grub-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v3 1/5] Import libgcrypt 1.10.3


From: Vladimir Serbinenko
Subject: [PATCH v3 1/5] Import libgcrypt 1.10.3
Date: Fri, 24 May 2024 20:30:02 +0300

---
 grub-core/lib/libgcrypt/AUTHORS               |   264 +
 grub-core/lib/libgcrypt/COPYING               |   340 +
 grub-core/lib/libgcrypt/COPYING.LIB           |   510 +
 grub-core/lib/libgcrypt/LICENSES              |   287 +
 grub-core/lib/libgcrypt/README                |   280 +
 grub-core/lib/libgcrypt/README.GIT            |    49 +
 grub-core/lib/libgcrypt/THANKS                |   168 +
 grub-core/lib/libgcrypt/VERSION               |     1 +
 grub-core/lib/libgcrypt/cipher/ChangeLog      |  3990 ---
 grub-core/lib/libgcrypt/cipher/ChangeLog-2011 |    40 +-
 grub-core/lib/libgcrypt/cipher/Makefile.am    |   271 +-
 grub-core/lib/libgcrypt/cipher/Manifest       |    73 -
 grub-core/lib/libgcrypt/cipher/ac.c           |  3301 --
 .../lib/libgcrypt/cipher/arcfour-amd64.S      |   108 +
 grub-core/lib/libgcrypt/cipher/arcfour.c      |    87 +-
 .../lib/libgcrypt/cipher/asm-common-aarch64.h |   108 +
 .../lib/libgcrypt/cipher/asm-common-amd64.h   |   193 +
 .../lib/libgcrypt/cipher/asm-common-s390x.h   |    90 +
 .../lib/libgcrypt/cipher/asm-inline-s390x.h   |   205 +
 .../libgcrypt/cipher/asm-poly1305-aarch64.h   |   245 +
 .../lib/libgcrypt/cipher/asm-poly1305-amd64.h |   171 +
 .../lib/libgcrypt/cipher/asm-poly1305-s390x.h |   140 +
 grub-core/lib/libgcrypt/cipher/bithelp.h      |   111 +-
 grub-core/lib/libgcrypt/cipher/blake2.c       |  1045 +
 .../lib/libgcrypt/cipher/blake2b-amd64-avx2.S |   300 +
 .../lib/libgcrypt/cipher/blake2s-amd64-avx.S  |   278 +
 .../lib/libgcrypt/cipher/blowfish-amd64.S     |   601 +
 grub-core/lib/libgcrypt/cipher/blowfish-arm.S |   743 +
 grub-core/lib/libgcrypt/cipher/blowfish.c     |   847 +-
 grub-core/lib/libgcrypt/cipher/bufhelp.h      |   458 +-
 .../lib/libgcrypt/cipher/camellia-aarch64.S   |   586 +
 .../cipher/camellia-aesni-avx-amd64.S         |  2618 ++
 .../cipher/camellia-aesni-avx2-amd64.S        |    34 +
 .../cipher/camellia-aesni-avx2-amd64.h        |  1794 ++
 grub-core/lib/libgcrypt/cipher/camellia-arm.S |   626 +
 .../lib/libgcrypt/cipher/camellia-glue.c      |  1014 +-
 .../cipher/camellia-vaes-avx2-amd64.S         |    35 +
 grub-core/lib/libgcrypt/cipher/camellia.c     |   172 +-
 grub-core/lib/libgcrypt/cipher/camellia.h     |    14 +
 grub-core/lib/libgcrypt/cipher/cast5-amd64.S  |   663 +
 grub-core/lib/libgcrypt/cipher/cast5-arm.S    |   728 +
 grub-core/lib/libgcrypt/cipher/cast5.c        |   822 +-
 .../lib/libgcrypt/cipher/chacha20-aarch64.S   |   648 +
 .../libgcrypt/cipher/chacha20-amd64-avx2.S    |   601 +
 .../libgcrypt/cipher/chacha20-amd64-ssse3.S   |  1012 +
 .../libgcrypt/cipher/chacha20-armv7-neon.S    |   393 +
 grub-core/lib/libgcrypt/cipher/chacha20-ppc.c |   646 +
 .../lib/libgcrypt/cipher/chacha20-s390x.S     |  1561 +
 grub-core/lib/libgcrypt/cipher/chacha20.c     |  1306 +
 .../lib/libgcrypt/cipher/cipher-aeswrap.c     |   380 +
 grub-core/lib/libgcrypt/cipher/cipher-cbc.c   |   292 +
 grub-core/lib/libgcrypt/cipher/cipher-ccm.c   |   415 +
 grub-core/lib/libgcrypt/cipher/cipher-cfb.c   |   317 +
 grub-core/lib/libgcrypt/cipher/cipher-cmac.c  |   292 +
 grub-core/lib/libgcrypt/cipher/cipher-ctr.c   |   131 +
 grub-core/lib/libgcrypt/cipher/cipher-eax.c   |   289 +
 .../libgcrypt/cipher/cipher-gcm-armv7-neon.S  |   341 +
 .../cipher/cipher-gcm-armv8-aarch32-ce.S      |   588 +
 .../cipher/cipher-gcm-armv8-aarch64-ce.S      |   652 +
 .../cipher/cipher-gcm-intel-pclmul.c          |   914 +
 .../lib/libgcrypt/cipher/cipher-gcm-ppc.c     |   551 +
 .../lib/libgcrypt/cipher/cipher-gcm-siv.c     |   664 +
 grub-core/lib/libgcrypt/cipher/cipher-gcm.c   |  1263 +
 .../lib/libgcrypt/cipher/cipher-internal.h    |   946 +
 grub-core/lib/libgcrypt/cipher/cipher-ocb.c   |   762 +
 grub-core/lib/libgcrypt/cipher/cipher-ofb.c   |   108 +
 .../lib/libgcrypt/cipher/cipher-poly1305.c    |   379 +
 .../lib/libgcrypt/cipher/cipher-selftest.c    |   512 +
 .../lib/libgcrypt/cipher/cipher-selftest.h    |    69 +
 grub-core/lib/libgcrypt/cipher/cipher-siv.c   |   375 +
 grub-core/lib/libgcrypt/cipher/cipher-xts.c   |   189 +
 grub-core/lib/libgcrypt/cipher/cipher.c       |  2621 +-
 .../libgcrypt/cipher/crc-armv8-aarch64-ce.S   |   497 +
 grub-core/lib/libgcrypt/cipher/crc-armv8-ce.c |   229 +
 .../lib/libgcrypt/cipher/crc-intel-pclmul.c   |   939 +
 grub-core/lib/libgcrypt/cipher/crc-ppc.c      |   656 +
 grub-core/lib/libgcrypt/cipher/crc.c          |   188 +-
 grub-core/lib/libgcrypt/cipher/des-amd64.S    |  1111 +
 grub-core/lib/libgcrypt/cipher/des.c          |   365 +-
 grub-core/lib/libgcrypt/cipher/dsa-common.c   |   473 +
 grub-core/lib/libgcrypt/cipher/dsa.c          |  1207 +-
 grub-core/lib/libgcrypt/cipher/ecc-common.h   |   143 +
 grub-core/lib/libgcrypt/cipher/ecc-curves.c   |  1585 +
 grub-core/lib/libgcrypt/cipher/ecc-ecdh.c     |   127 +
 grub-core/lib/libgcrypt/cipher/ecc-ecdsa.c    |   297 +
 grub-core/lib/libgcrypt/cipher/ecc-eddsa.c    |  1079 +
 grub-core/lib/libgcrypt/cipher/ecc-gost.c     |   218 +
 grub-core/lib/libgcrypt/cipher/ecc-misc.c     |   469 +
 grub-core/lib/libgcrypt/cipher/ecc-sm2.c      |   569 +
 grub-core/lib/libgcrypt/cipher/ecc.c          |  2940 +-
 grub-core/lib/libgcrypt/cipher/elgamal.c      |   829 +-
 grub-core/lib/libgcrypt/cipher/gost-s-box.c   |   266 +
 grub-core/lib/libgcrypt/cipher/gost-sb.h      |  2128 ++
 grub-core/lib/libgcrypt/cipher/gost.h         |    34 +
 grub-core/lib/libgcrypt/cipher/gost28147.c    |   553 +
 grub-core/lib/libgcrypt/cipher/gostr3411-94.c |   383 +
 grub-core/lib/libgcrypt/cipher/hash-common.c  |   112 +-
 grub-core/lib/libgcrypt/cipher/hash-common.h  |    35 +-
 grub-core/lib/libgcrypt/cipher/hmac-tests.c   |   732 -
 grub-core/lib/libgcrypt/cipher/idea.c         |    24 +-
 grub-core/lib/libgcrypt/cipher/kdf-internal.h |    43 +
 grub-core/lib/libgcrypt/cipher/kdf.c          |  1074 +-
 .../lib/libgcrypt/cipher/keccak-armv7-neon.S  |   945 +
 grub-core/lib/libgcrypt/cipher/keccak.c       |  1561 +
 .../lib/libgcrypt/cipher/keccak_permute_32.h  |   536 +
 .../lib/libgcrypt/cipher/keccak_permute_64.h  |   385 +
 grub-core/lib/libgcrypt/cipher/mac-cmac.c     |   524 +
 grub-core/lib/libgcrypt/cipher/mac-gmac.c     |   187 +
 grub-core/lib/libgcrypt/cipher/mac-hmac.c     |  1471 +
 grub-core/lib/libgcrypt/cipher/mac-internal.h |   275 +
 grub-core/lib/libgcrypt/cipher/mac-poly1305.c |   364 +
 grub-core/lib/libgcrypt/cipher/mac.c          |   802 +
 grub-core/lib/libgcrypt/cipher/md.c           |  1517 +-
 grub-core/lib/libgcrypt/cipher/md4.c          |   167 +-
 grub-core/lib/libgcrypt/cipher/md5.c          |   173 +-
 .../lib/libgcrypt/cipher/poly1305-internal.h  |    64 +
 .../lib/libgcrypt/cipher/poly1305-s390x.S     |    87 +
 grub-core/lib/libgcrypt/cipher/poly1305.c     |   763 +
 grub-core/lib/libgcrypt/cipher/primegen.c     |   555 +-
 .../lib/libgcrypt/cipher/pubkey-internal.h    |   107 +
 grub-core/lib/libgcrypt/cipher/pubkey-util.c  |  1369 +
 grub-core/lib/libgcrypt/cipher/pubkey.c       |  4315 +--
 grub-core/lib/libgcrypt/cipher/rfc2268.c      |    51 +-
 .../lib/libgcrypt/cipher/rijndael-aarch64.S   |   514 +
 .../lib/libgcrypt/cipher/rijndael-aesni.c     |  4880 +++
 .../lib/libgcrypt/cipher/rijndael-amd64.S     |   477 +
 grub-core/lib/libgcrypt/cipher/rijndael-arm.S |   581 +
 .../cipher/rijndael-armv8-aarch32-ce.S        |  1988 ++
 .../cipher/rijndael-armv8-aarch64-ce.S        |  1921 ++
 .../lib/libgcrypt/cipher/rijndael-armv8-ce.c  |   431 +
 .../lib/libgcrypt/cipher/rijndael-gcm-p10le.s |  1401 +
 .../lib/libgcrypt/cipher/rijndael-internal.h  |   204 +
 .../lib/libgcrypt/cipher/rijndael-p10le.c     |   119 +
 .../lib/libgcrypt/cipher/rijndael-padlock.c   |   110 +
 .../libgcrypt/cipher/rijndael-ppc-common.h    |   342 +
 .../libgcrypt/cipher/rijndael-ppc-functions.h |  2020 ++
 grub-core/lib/libgcrypt/cipher/rijndael-ppc.c |   259 +
 .../lib/libgcrypt/cipher/rijndael-ppc9le.c    |   102 +
 .../lib/libgcrypt/cipher/rijndael-s390x.c     |  1166 +
 .../cipher/rijndael-ssse3-amd64-asm.S         |   874 +
 .../libgcrypt/cipher/rijndael-ssse3-amd64.c   |   743 +
 .../lib/libgcrypt/cipher/rijndael-tables.h    |  1839 +-
 .../cipher/rijndael-vaes-avx2-amd64.S         |  3021 ++
 .../lib/libgcrypt/cipher/rijndael-vaes.c      |   197 +
 grub-core/lib/libgcrypt/cipher/rijndael.c     |  2611 +-
 grub-core/lib/libgcrypt/cipher/rmd160.c       |   588 +-
 grub-core/lib/libgcrypt/cipher/rsa-common.c   |  1115 +
 grub-core/lib/libgcrypt/cipher/rsa.c          |  1733 +-
 .../lib/libgcrypt/cipher/salsa20-amd64.S      |   940 +
 .../lib/libgcrypt/cipher/salsa20-armv7-neon.S |   899 +
 grub-core/lib/libgcrypt/cipher/salsa20.c      |   600 +
 grub-core/lib/libgcrypt/cipher/scrypt.c       |   322 +
 grub-core/lib/libgcrypt/cipher/seed.c         |    28 +-
 .../lib/libgcrypt/cipher/serpent-armv7-neon.S |  1124 +
 .../lib/libgcrypt/cipher/serpent-avx2-amd64.S |  1160 +
 .../lib/libgcrypt/cipher/serpent-sse2-amd64.S |  1211 +
 grub-core/lib/libgcrypt/cipher/serpent.c      |  1800 +-
 .../lib/libgcrypt/cipher/sha1-armv7-neon.S    |   526 +
 .../libgcrypt/cipher/sha1-armv8-aarch32-ce.S  |   220 +
 .../libgcrypt/cipher/sha1-armv8-aarch64-ce.S  |   201 +
 .../lib/libgcrypt/cipher/sha1-avx-amd64.S     |   429 +
 .../libgcrypt/cipher/sha1-avx-bmi2-amd64.S    |   441 +
 .../libgcrypt/cipher/sha1-avx2-bmi2-amd64.S   |   573 +
 .../lib/libgcrypt/cipher/sha1-intel-shaext.c  |   292 +
 .../lib/libgcrypt/cipher/sha1-ssse3-amd64.S   |   437 +
 grub-core/lib/libgcrypt/cipher/sha1.c         |   571 +-
 grub-core/lib/libgcrypt/cipher/sha1.h         |    47 +
 .../cipher/sha256-armv8-aarch32-ce.S          |   231 +
 .../cipher/sha256-armv8-aarch64-ce.S          |   215 +
 .../lib/libgcrypt/cipher/sha256-avx-amd64.S   |   506 +
 .../libgcrypt/cipher/sha256-avx2-bmi2-amd64.S |   527 +
 .../libgcrypt/cipher/sha256-intel-shaext.c    |   363 +
 grub-core/lib/libgcrypt/cipher/sha256-ppc.c   |   795 +
 .../lib/libgcrypt/cipher/sha256-ssse3-amd64.S |   528 +
 grub-core/lib/libgcrypt/cipher/sha256.c       |   775 +-
 grub-core/lib/libgcrypt/cipher/sha512-arm.S   |   464 +
 .../lib/libgcrypt/cipher/sha512-armv7-neon.S  |   452 +
 .../lib/libgcrypt/cipher/sha512-avx-amd64.S   |   461 +
 .../libgcrypt/cipher/sha512-avx2-bmi2-amd64.S |   502 +
 grub-core/lib/libgcrypt/cipher/sha512-ppc.c   |   969 +
 .../lib/libgcrypt/cipher/sha512-ssse3-amd64.S |   467 +
 .../lib/libgcrypt/cipher/sha512-ssse3-i386.c  |   404 +
 grub-core/lib/libgcrypt/cipher/sha512.c       |  1232 +-
 grub-core/lib/libgcrypt/cipher/sm3-aarch64.S  |   657 +
 .../lib/libgcrypt/cipher/sm3-avx-bmi2-amd64.S |   553 +
 grub-core/lib/libgcrypt/cipher/sm3.c          |   537 +
 .../libgcrypt/cipher/sm4-aesni-avx-amd64.S    |   987 +
 .../libgcrypt/cipher/sm4-aesni-avx2-amd64.S   |   851 +
 grub-core/lib/libgcrypt/cipher/sm4.c          |  1251 +
 grub-core/lib/libgcrypt/cipher/stribog.c      |  1362 +
 .../lib/libgcrypt/cipher/test-getrusage.c     |   105 -
 grub-core/lib/libgcrypt/cipher/tiger.c        |   307 +-
 .../lib/libgcrypt/cipher/twofish-aarch64.S    |   321 +
 .../lib/libgcrypt/cipher/twofish-amd64.S      |  1184 +
 grub-core/lib/libgcrypt/cipher/twofish-arm.S  |   363 +
 .../lib/libgcrypt/cipher/twofish-avx2-amd64.S |  1048 +
 grub-core/lib/libgcrypt/cipher/twofish.c      |   893 +-
 .../libgcrypt/cipher/whirlpool-sse2-amd64.S   |   348 +
 grub-core/lib/libgcrypt/cipher/whirlpool.c    |   334 +-
 grub-core/lib/libgcrypt/compat/Makefile.am    |    48 +
 grub-core/lib/libgcrypt/compat/clock.c        |    36 +
 grub-core/lib/libgcrypt/compat/compat.c       |    40 +
 grub-core/lib/libgcrypt/compat/getpid.c       |    29 +
 grub-core/lib/libgcrypt/compat/libcompat.h    |    37 +
 grub-core/lib/libgcrypt/config.h.in           |   823 +
 grub-core/lib/libgcrypt/configure             | 25141 ++++++++++++++++
 grub-core/lib/libgcrypt/configure.ac          |  3394 +++
 grub-core/lib/libgcrypt/mkinstalldirs         |   161 +
 grub-core/lib/libgcrypt/mpi/ChangeLog-2011    |    17 +-
 grub-core/lib/libgcrypt/mpi/Makefile.am       |     9 +-
 grub-core/lib/libgcrypt/mpi/Manifest          |    41 -
 .../mpi/{pentium4/sse2 => aarch64}/distfiles  |     1 +
 .../lib/libgcrypt/mpi/aarch64/mpi-asm-defs.h  |     4 +
 .../lib/libgcrypt/mpi/aarch64/mpih-add1.S     |    74 +
 .../lib/libgcrypt/mpi/aarch64/mpih-mul1.S     |    99 +
 .../lib/libgcrypt/mpi/aarch64/mpih-mul2.S     |   111 +
 .../lib/libgcrypt/mpi/aarch64/mpih-mul3.S     |   124 +
 .../lib/libgcrypt/mpi/aarch64/mpih-sub1.S     |    74 +
 grub-core/lib/libgcrypt/mpi/alpha/README      |     4 +-
 grub-core/lib/libgcrypt/mpi/amd64/distfiles   |     1 +
 grub-core/lib/libgcrypt/mpi/amd64/func_abi.h  |    34 +
 .../lib/libgcrypt/mpi/amd64/mpi-asm-defs.h    |     2 +-
 grub-core/lib/libgcrypt/mpi/amd64/mpih-add1.S |     4 +-
 .../lib/libgcrypt/mpi/amd64/mpih-lshift.S     |    47 +-
 grub-core/lib/libgcrypt/mpi/amd64/mpih-mul1.S |     3 +-
 grub-core/lib/libgcrypt/mpi/amd64/mpih-mul2.S |    46 +-
 grub-core/lib/libgcrypt/mpi/amd64/mpih-mul3.S |     4 +-
 .../lib/libgcrypt/mpi/amd64/mpih-rshift.S     |    49 +-
 grub-core/lib/libgcrypt/mpi/amd64/mpih-sub1.S |     3 +-
 .../lib/libgcrypt/mpi/{i586 => arm}/distfiles |     6 +-
 .../lib/libgcrypt/mpi/arm/mpi-asm-defs.h      |     4 +
 grub-core/lib/libgcrypt/mpi/arm/mpih-add1.S   |    76 +
 grub-core/lib/libgcrypt/mpi/arm/mpih-mul1.S   |    80 +
 grub-core/lib/libgcrypt/mpi/arm/mpih-mul2.S   |    94 +
 grub-core/lib/libgcrypt/mpi/arm/mpih-mul3.S   |   100 +
 grub-core/lib/libgcrypt/mpi/arm/mpih-sub1.S   |    77 +
 .../lib/libgcrypt/mpi/asm-common-aarch64.h    |    26 +
 .../lib/libgcrypt/mpi/asm-common-amd64.h      |    26 +
 grub-core/lib/libgcrypt/mpi/config.links      |   174 +-
 .../{cipher/rmd.h => mpi/ec-ed25519.c}        |    35 +-
 grub-core/lib/libgcrypt/mpi/ec-hw-s390x.c     |   412 +
 grub-core/lib/libgcrypt/mpi/ec-inline.h       |  1065 +
 grub-core/lib/libgcrypt/mpi/ec-internal.h     |    49 +
 grub-core/lib/libgcrypt/mpi/ec-nist.c         |   817 +
 grub-core/lib/libgcrypt/mpi/ec.c              |  2053 +-
 grub-core/lib/libgcrypt/mpi/generic/Manifest  |    29 -
 grub-core/lib/libgcrypt/mpi/generic/distfiles |     1 -
 .../lib/libgcrypt/mpi/generic/mpi-asm-defs.h  |    16 +-
 .../lib/libgcrypt/mpi/generic/mpih-add1.c     |     6 +-
 .../lib/libgcrypt/mpi/generic/mpih-lshift.c   |     2 +-
 .../lib/libgcrypt/mpi/generic/mpih-mul1.c     |     4 +-
 .../lib/libgcrypt/mpi/generic/mpih-mul2.c     |     6 +-
 .../lib/libgcrypt/mpi/generic/mpih-mul3.c     |     4 +-
 .../lib/libgcrypt/mpi/generic/mpih-sub1.c     |     4 +-
 grub-core/lib/libgcrypt/mpi/i386/Manifest     |    28 -
 grub-core/lib/libgcrypt/mpi/i386/distfiles    |     1 -
 grub-core/lib/libgcrypt/mpi/i386/mpih-add1.S  |    47 +-
 .../lib/libgcrypt/mpi/i386/mpih-lshift.S      |    12 +-
 grub-core/lib/libgcrypt/mpi/i386/mpih-mul1.S  |    12 +-
 grub-core/lib/libgcrypt/mpi/i386/mpih-mul2.S  |    12 +-
 grub-core/lib/libgcrypt/mpi/i386/mpih-mul3.S  |    12 +-
 .../lib/libgcrypt/mpi/i386/mpih-rshift.S      |    14 +-
 grub-core/lib/libgcrypt/mpi/i386/mpih-sub1.S  |    47 +-
 grub-core/lib/libgcrypt/mpi/i386/syntax.h     |    32 +-
 grub-core/lib/libgcrypt/mpi/i586/Manifest     |    27 -
 grub-core/lib/libgcrypt/mpi/i586/README       |    26 -
 grub-core/lib/libgcrypt/mpi/i586/mpih-add1.S  |   135 -
 .../lib/libgcrypt/mpi/i586/mpih-lshift.S      |   229 -
 grub-core/lib/libgcrypt/mpi/i586/mpih-mul1.S  |    89 -
 grub-core/lib/libgcrypt/mpi/i586/mpih-mul2.S  |    93 -
 grub-core/lib/libgcrypt/mpi/i586/mpih-mul3.S  |    93 -
 .../lib/libgcrypt/mpi/i586/mpih-rshift.S      |   228 -
 grub-core/lib/libgcrypt/mpi/i586/mpih-sub1.S  |   142 -
 grub-core/lib/libgcrypt/mpi/longlong.h        |   686 +-
 grub-core/lib/libgcrypt/mpi/m68k/Manifest     |    25 -
 grub-core/lib/libgcrypt/mpi/m68k/distfiles    |     1 -
 .../lib/libgcrypt/mpi/m68k/mc68020/Manifest   |    23 -
 .../lib/libgcrypt/mpi/m68k/mc68020/distfiles  |     1 -
 grub-core/lib/libgcrypt/mpi/m68k/syntax.h     |     2 +-
 grub-core/lib/libgcrypt/mpi/mips3/Manifest    |    28 -
 grub-core/lib/libgcrypt/mpi/mips3/README      |     2 +-
 grub-core/lib/libgcrypt/mpi/mips3/distfiles   |     1 -
 grub-core/lib/libgcrypt/mpi/mpi-add.c         |    23 +-
 grub-core/lib/libgcrypt/mpi/mpi-bit.c         |   119 +-
 grub-core/lib/libgcrypt/mpi/mpi-cmp.c         |    41 +-
 grub-core/lib/libgcrypt/mpi/mpi-div.c         |    19 +-
 grub-core/lib/libgcrypt/mpi/mpi-gcd.c         |    11 +-
 grub-core/lib/libgcrypt/mpi/mpi-internal.h    |    54 +-
 grub-core/lib/libgcrypt/mpi/mpi-inv.c         |   312 +-
 grub-core/lib/libgcrypt/mpi/mpi-mod.c         |    16 +-
 grub-core/lib/libgcrypt/mpi/mpi-mpow.c        |     8 +-
 grub-core/lib/libgcrypt/mpi/mpi-mul.c         |    12 +-
 grub-core/lib/libgcrypt/mpi/mpi-pow.c         |   458 +-
 grub-core/lib/libgcrypt/mpi/mpi-scan.c        |   132 +-
 grub-core/lib/libgcrypt/mpi/mpicoder.c        |   763 +-
 grub-core/lib/libgcrypt/mpi/mpih-const-time.c |   240 +
 grub-core/lib/libgcrypt/mpi/mpih-div.c        |    10 +-
 grub-core/lib/libgcrypt/mpi/mpih-mul.c        |    19 +-
 grub-core/lib/libgcrypt/mpi/mpiutil.c         |   481 +-
 grub-core/lib/libgcrypt/mpi/pa7100/Manifest   |    22 -
 grub-core/lib/libgcrypt/mpi/pa7100/distfiles  |     1 -
 grub-core/lib/libgcrypt/mpi/pentium4/README   |   115 -
 .../lib/libgcrypt/mpi/pentium4/distfiles      |     3 -
 .../lib/libgcrypt/mpi/pentium4/mmx/distfiles  |     2 -
 .../libgcrypt/mpi/pentium4/mmx/mpih-lshift.S  |   457 -
 .../libgcrypt/mpi/pentium4/mmx/mpih-rshift.S  |   453 -
 .../libgcrypt/mpi/pentium4/sse2/mpih-add1.S   |    91 -
 .../libgcrypt/mpi/pentium4/sse2/mpih-mul1.S   |    96 -
 .../libgcrypt/mpi/pentium4/sse2/mpih-mul2.S   |   136 -
 .../libgcrypt/mpi/pentium4/sse2/mpih-mul3.S   |   127 -
 .../libgcrypt/mpi/pentium4/sse2/mpih-sub1.S   |   112 -
 grub-core/lib/libgcrypt/mpi/power/Manifest    |    27 -
 grub-core/lib/libgcrypt/mpi/power/distfiles   |     1 -
 .../lib/libgcrypt/mpi/powerpc32/Manifest      |    28 -
 .../lib/libgcrypt/mpi/powerpc32/distfiles     |     1 -
 grub-core/lib/libgcrypt/mpi/sparc32/Manifest  |    24 -
 grub-core/lib/libgcrypt/mpi/sparc32/distfiles |     1 -
 .../lib/libgcrypt/mpi/sparc32v8/Manifest      |    23 -
 .../lib/libgcrypt/mpi/sparc32v8/distfiles     |     1 -
 .../lib/libgcrypt/mpi/supersparc/Manifest     |    21 -
 .../lib/libgcrypt/mpi/supersparc/distfiles    |     1 -
 grub-core/lib/libgcrypt/src/ChangeLog-2011    |    75 +-
 grub-core/lib/libgcrypt/src/Makefile.am       |    87 +-
 grub-core/lib/libgcrypt/src/Manifest          |    58 -
 grub-core/lib/libgcrypt/src/ath.c             |   344 -
 grub-core/lib/libgcrypt/src/ath.h             |   147 -
 grub-core/lib/libgcrypt/src/cipher-proto.h    |   248 +-
 grub-core/lib/libgcrypt/src/cipher.h          |   150 +-
 grub-core/lib/libgcrypt/src/const-time.c      |    88 +
 grub-core/lib/libgcrypt/src/const-time.h      |   167 +
 grub-core/lib/libgcrypt/src/context.c         |   139 +
 grub-core/lib/libgcrypt/src/context.h         |    32 +
 grub-core/lib/libgcrypt/src/dumpsexp.c        |    24 +-
 grub-core/lib/libgcrypt/src/ec-context.h      |   107 +
 grub-core/lib/libgcrypt/src/fips.c            |   802 +-
 grub-core/lib/libgcrypt/src/g10lib.h          |   265 +-
 grub-core/lib/libgcrypt/src/gcrypt-int.h      |   563 +
 grub-core/lib/libgcrypt/src/gcrypt-module.h   |   240 -
 grub-core/lib/libgcrypt/src/gcrypt-testapi.h  |    69 +
 grub-core/lib/libgcrypt/src/gcrypt.h.in       |  1173 +-
 .../lib/libgcrypt/src/gen-note-integrity.sh   |   123 +
 grub-core/lib/libgcrypt/src/global.c          |   890 +-
 grub-core/lib/libgcrypt/src/hmac256.c         |    85 +-
 grub-core/lib/libgcrypt/src/hwf-arm.c         |   393 +
 grub-core/lib/libgcrypt/src/hwf-common.h      |    28 +
 grub-core/lib/libgcrypt/src/hwf-ppc.c         |   247 +
 grub-core/lib/libgcrypt/src/hwf-s390x.c       |   231 +
 grub-core/lib/libgcrypt/src/hwf-x86.c         |   414 +
 grub-core/lib/libgcrypt/src/hwfeatures.c      |   314 +-
 .../lib/libgcrypt/src/libgcrypt-config.in     |    16 +-
 grub-core/lib/libgcrypt/src/libgcrypt.def     |   154 +-
 grub-core/lib/libgcrypt/src/libgcrypt.m4      |    96 +-
 grub-core/lib/libgcrypt/src/libgcrypt.pc.in   |    18 +
 grub-core/lib/libgcrypt/src/libgcrypt.vers    |    76 +-
 grub-core/lib/libgcrypt/src/misc.c            |   345 +-
 grub-core/lib/libgcrypt/src/module.c          |   212 -
 grub-core/lib/libgcrypt/src/mpi.h             |   215 +-
 grub-core/lib/libgcrypt/src/mpicalc.c         |   648 +
 grub-core/lib/libgcrypt/src/secmem.c          |   616 +-
 grub-core/lib/libgcrypt/src/secmem.h          |    11 +-
 grub-core/lib/libgcrypt/src/sexp.c            |  1364 +-
 grub-core/lib/libgcrypt/src/stdmem.c          |    44 +-
 grub-core/lib/libgcrypt/src/stdmem.h          |     4 +-
 grub-core/lib/libgcrypt/src/types.h           |   154 +-
 grub-core/lib/libgcrypt/src/versioninfo.rc.in |     2 +-
 grub-core/lib/libgcrypt/src/visibility.c      |   951 +-
 grub-core/lib/libgcrypt/src/visibility.h      |  1220 +-
 367 files changed, 164876 insertions(+), 31107 deletions(-)
 create mode 100644 grub-core/lib/libgcrypt/AUTHORS
 create mode 100644 grub-core/lib/libgcrypt/COPYING
 create mode 100644 grub-core/lib/libgcrypt/COPYING.LIB
 create mode 100644 grub-core/lib/libgcrypt/LICENSES
 create mode 100644 grub-core/lib/libgcrypt/README
 create mode 100644 grub-core/lib/libgcrypt/README.GIT
 create mode 100644 grub-core/lib/libgcrypt/THANKS
 create mode 100644 grub-core/lib/libgcrypt/VERSION
 delete mode 100644 grub-core/lib/libgcrypt/cipher/ChangeLog
 delete mode 100644 grub-core/lib/libgcrypt/cipher/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/cipher/ac.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/arcfour-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/asm-common-aarch64.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/asm-common-amd64.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/asm-common-s390x.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/asm-inline-s390x.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/asm-poly1305-aarch64.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/asm-poly1305-amd64.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/asm-poly1305-s390x.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/blake2.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/blake2b-amd64-avx2.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/blake2s-amd64-avx.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/blowfish-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/blowfish-arm.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/camellia-aarch64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/camellia-aesni-avx-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/camellia-arm.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/camellia-vaes-avx2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/cast5-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/cast5-arm.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/chacha20-aarch64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/chacha20-amd64-avx2.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/chacha20-amd64-ssse3.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/chacha20-armv7-neon.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/chacha20-ppc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/chacha20-s390x.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/chacha20.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-aeswrap.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-cbc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-ccm.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-cfb.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-cmac.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-ctr.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-eax.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-gcm-armv7-neon.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-gcm-intel-pclmul.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-gcm-ppc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-gcm-siv.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-gcm.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-internal.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-ocb.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-ofb.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-poly1305.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-selftest.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-selftest.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-siv.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/cipher-xts.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/crc-armv8-aarch64-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/crc-armv8-ce.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/crc-intel-pclmul.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/crc-ppc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/des-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/dsa-common.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-common.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-curves.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-ecdh.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-ecdsa.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-eddsa.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-gost.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-misc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/ecc-sm2.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/gost-s-box.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/gost-sb.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/gost.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/gost28147.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/gostr3411-94.c
 delete mode 100644 grub-core/lib/libgcrypt/cipher/hmac-tests.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/kdf-internal.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/keccak-armv7-neon.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/keccak.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/keccak_permute_32.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/keccak_permute_64.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/mac-cmac.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/mac-gmac.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/mac-hmac.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/mac-internal.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/mac-poly1305.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/mac.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/poly1305-internal.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/poly1305-s390x.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/poly1305.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/pubkey-internal.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/pubkey-util.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-aarch64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-aesni.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-arm.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-armv8-ce.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-gcm-p10le.s
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-internal.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-p10le.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-padlock.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-ppc-common.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-ppc-functions.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-ppc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-ppc9le.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-s390x.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-vaes-avx2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael-vaes.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/rsa-common.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/salsa20-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/salsa20-armv7-neon.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/salsa20.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/scrypt.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/serpent-armv7-neon.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/serpent-avx2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/serpent-sse2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-armv7-neon.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch32-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch64-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-avx-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-avx-bmi2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-intel-shaext.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1-ssse3-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha1.h
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch32-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch64-ce.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha256-avx-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha256-intel-shaext.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha256-ppc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha256-ssse3-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha512-arm.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha512-armv7-neon.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha512-avx-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha512-ppc.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha512-ssse3-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sha512-ssse3-i386.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/sm3-aarch64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sm3-avx-bmi2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sm3.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/sm4-aesni-avx-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sm4-aesni-avx2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/sm4.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/stribog.c
 delete mode 100644 grub-core/lib/libgcrypt/cipher/test-getrusage.c
 create mode 100644 grub-core/lib/libgcrypt/cipher/twofish-aarch64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/twofish-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/twofish-arm.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/twofish-avx2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/cipher/whirlpool-sse2-amd64.S
 create mode 100644 grub-core/lib/libgcrypt/compat/Makefile.am
 create mode 100644 grub-core/lib/libgcrypt/compat/clock.c
 create mode 100644 grub-core/lib/libgcrypt/compat/compat.c
 create mode 100644 grub-core/lib/libgcrypt/compat/getpid.c
 create mode 100644 grub-core/lib/libgcrypt/compat/libcompat.h
 create mode 100644 grub-core/lib/libgcrypt/config.h.in
 create mode 100755 grub-core/lib/libgcrypt/configure
 create mode 100644 grub-core/lib/libgcrypt/configure.ac
 create mode 100755 grub-core/lib/libgcrypt/mkinstalldirs
 delete mode 100644 grub-core/lib/libgcrypt/mpi/Manifest
 rename grub-core/lib/libgcrypt/mpi/{pentium4/sse2 => aarch64}/distfiles (80%)
 create mode 100644 grub-core/lib/libgcrypt/mpi/aarch64/mpi-asm-defs.h
 create mode 100644 grub-core/lib/libgcrypt/mpi/aarch64/mpih-add1.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul1.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul2.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul3.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/aarch64/mpih-sub1.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/amd64/func_abi.h
 rename grub-core/lib/libgcrypt/mpi/{i586 => arm}/distfiles (57%)
 create mode 100644 grub-core/lib/libgcrypt/mpi/arm/mpi-asm-defs.h
 create mode 100644 grub-core/lib/libgcrypt/mpi/arm/mpih-add1.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/arm/mpih-mul1.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/arm/mpih-mul2.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/arm/mpih-mul3.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/arm/mpih-sub1.S
 create mode 100644 grub-core/lib/libgcrypt/mpi/asm-common-aarch64.h
 create mode 100644 grub-core/lib/libgcrypt/mpi/asm-common-amd64.h
 rename grub-core/lib/libgcrypt/{cipher/rmd.h => mpi/ec-ed25519.c} (52%)
 create mode 100644 grub-core/lib/libgcrypt/mpi/ec-hw-s390x.c
 create mode 100644 grub-core/lib/libgcrypt/mpi/ec-inline.h
 create mode 100644 grub-core/lib/libgcrypt/mpi/ec-internal.h
 create mode 100644 grub-core/lib/libgcrypt/mpi/ec-nist.c
 delete mode 100644 grub-core/lib/libgcrypt/mpi/generic/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i386/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/README
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/mpih-add1.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/mpih-lshift.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/mpih-mul1.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/mpih-mul2.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/mpih-mul3.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/mpih-rshift.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/i586/mpih-sub1.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/m68k/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/m68k/mc68020/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/mips3/Manifest
 create mode 100644 grub-core/lib/libgcrypt/mpi/mpih-const-time.c
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pa7100/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/README
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/distfiles
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/mmx/distfiles
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-lshift.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-rshift.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-add1.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul1.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul2.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul3.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-sub1.S
 delete mode 100644 grub-core/lib/libgcrypt/mpi/power/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/powerpc32/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/sparc32/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/sparc32v8/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/mpi/supersparc/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/src/Manifest
 delete mode 100644 grub-core/lib/libgcrypt/src/ath.c
 delete mode 100644 grub-core/lib/libgcrypt/src/ath.h
 create mode 100644 grub-core/lib/libgcrypt/src/const-time.c
 create mode 100644 grub-core/lib/libgcrypt/src/const-time.h
 create mode 100644 grub-core/lib/libgcrypt/src/context.c
 create mode 100644 grub-core/lib/libgcrypt/src/context.h
 create mode 100644 grub-core/lib/libgcrypt/src/ec-context.h
 create mode 100644 grub-core/lib/libgcrypt/src/gcrypt-int.h
 delete mode 100644 grub-core/lib/libgcrypt/src/gcrypt-module.h
 create mode 100644 grub-core/lib/libgcrypt/src/gcrypt-testapi.h
 create mode 100755 grub-core/lib/libgcrypt/src/gen-note-integrity.sh
 create mode 100644 grub-core/lib/libgcrypt/src/hwf-arm.c
 create mode 100644 grub-core/lib/libgcrypt/src/hwf-common.h
 create mode 100644 grub-core/lib/libgcrypt/src/hwf-ppc.c
 create mode 100644 grub-core/lib/libgcrypt/src/hwf-s390x.c
 create mode 100644 grub-core/lib/libgcrypt/src/hwf-x86.c
 create mode 100644 grub-core/lib/libgcrypt/src/libgcrypt.pc.in
 delete mode 100644 grub-core/lib/libgcrypt/src/module.c
 create mode 100644 grub-core/lib/libgcrypt/src/mpicalc.c

diff --git a/grub-core/lib/libgcrypt/AUTHORS b/grub-core/lib/libgcrypt/AUTHORS
new file mode 100644
index 000000000..bc6182ec1
--- /dev/null
+++ b/grub-core/lib/libgcrypt/AUTHORS
@@ -0,0 +1,264 @@
+Library: Libgcrypt
+Homepage: https://www.gnupg.org/related_software/libgcrypt/
+Download: https://ftp.gnupg.org/ftp/gcrypt/libgcrypt/
+          ftp://ftp.gnupg.org/gcrypt/libgcrypt/
+Repository: git://git.gnupg.org/libgcrypt.git
+Maintainer: Werner Koch <wk@gnupg.org>
+Bug reports: https://bugs.gnupg.org
+Security related bug reports: <security@gnupg.org>
+End-of-life: TBD
+License (library): LGPLv2.1+
+License (manual and tools): GPLv2+
+
+
+Libgcrypt is free software.  See the files COPYING.LIB and COPYING for
+copying conditions, and LICENSES for notices about a few contributions
+that require these additional notices to be distributed.  License
+copyright years may be listed using range notation, e.g., 2000-2013,
+indicating that every year in the range, inclusive, is a copyrightable
+year that would otherwise be listed individually.
+
+
+List of Copyright holders
+=========================
+
+  Copyright (C) 1989,1991-2018 Free Software Foundation, Inc.
+  Copyright (C) 1994 X Consortium
+  Copyright (C) 1996 L. Peter Deutsch
+  Copyright (C) 1997 Werner Koch
+  Copyright (C) 1998 The Internet Society
+  Copyright (C) 1996-1999 Peter Gutmann, Paul Kendall, and Chris Wedgwood
+  Copyright (C) 1996-2006 Peter Gutmann, Matt Thomlinson and Blake Coverett
+  Copyright (C) 2003 Nikos Mavroyanopoulos
+  Copyright (c) 2006 CRYPTOGAMS
+  Copyright (C) 2006-2007 NTT (Nippon Telegraph and Telephone Corporation)
+  Copyright (C) 2012-2023 g10 Code GmbH
+  Copyright (C) 2012 Simon Josefsson, Niels Möller
+  Copyright (c) 2012 Intel Corporation
+  Copyright (C) 2013 Christian Grothoff
+  Copyright (C) 2013-2022 Jussi Kivilinna
+  Copyright (C) 2013-2014 Dmitry Eremin-Solenikov
+  Copyright (C) 2014 Stephan Mueller
+  Copyright (C) 2017 Jia Zhang
+  Copyright (C) 2018 Bundesamt für Sicherheit in der Informationstechnik
+  Copyright (C) 2020 Alibaba Group.
+  Copyright (C) 2020 Tianjia Zhang
+
+
+Authors with a FSF copyright assignment
+=======================================
+
+LIBGCRYPT       Werner Koch    2001-06-07
+Assigns past and future changes.
+Assignment for future changes terminated on 2012-12-04.
+wk@gnupg.org
+Designed and implemented Libgcrypt.
+
+GNUPG  Matthew Skala              1998-08-10
+Disclaims changes.
+mskala@ansuz.sooke.bc.ca
+Wrote cipher/twofish.c.
+
+GNUPG  Natural Resources Canada    1998-08-11
+Disclaims changes by Matthew Skala.
+
+GNUPG  Michael Roth    Germany     1998-09-17
+Assigns changes.
+mroth@nessie.de
+Wrote cipher/des.c.
+Changes and bug fixes all over the place.
+
+GNUPG  Niklas Hernaeus         1998-09-18
+Disclaims changes.
+nh@df.lth.se
+Weak key patches.
+
+GNUPG  Rémi Guyomarch          1999-05-25
+Assigns past and future changes. (g10/compress.c, g10/encr-data.c,
+g10/free-packet.c, g10/mdfilter.c, g10/plaintext.c, util/iobuf.c)
+rguyom@mail.dotcom.fr
+
+ANY     g10 Code GmbH           2001-06-07
+Assignment for future changes terminated on 2012-12-04.
+Code marked with ChangeLog entries of g10 Code employees.
+
+LIBGCRYPT Timo Schulz           2001-08-31
+Assigns past and future changes.
+twoaday@freakmail.de
+
+LIBGCRYPT Simon Josefsson       2002-10-25
+Assigns past and future changes to FSF (cipher/{md4,crc}.c, CTR mode,
+CTS/MAC flags, self test improvements)
+simon@josefsson.org
+
+LIBGCRYPT Moritz Schulte       2003-04-17
+Assigns past and future changes.
+moritz@g10code.com
+
+GNUTLS  Nikolaos Mavrogiannopoulos  2003-11-22
+nmav@gnutls.org
+Original code for cipher/rfc2268.c.
+
+LIBGCRYPT      The Written Word        2005-04-15
+Assigns past and future changes. (new: src/libgcrypt.pc.in,
+src/Makefile.am, src/secmem.c, mpi/hppa1.1/mpih-mul3.S,
+mpi/hppa1.1/udiv-qrnnd.S, mpi/hppa1.1/mpih-mul2.S,
+mpi/hppa1.1/mpih-mul1.S, mpi/Makefile.am, tests/prime.c,
+tests/register.c, tests/ac.c, tests/basic.c, tests/tsexp.c,
+tests/keygen.c, tests/pubkey.c, configure.ac, acinclude.m4)
+
+LIBGCRYPT       Brad Hards       2006-02-09
+Assigns Past and Future Changes
+bradh@frogmouth.net
+(Added OFB mode. Changed cipher/cipher.c, test/basic.c doc/gcrypt.tex.
+ added SHA-224, changed cipher/sha256.c, added HMAC tests.)
+
+LIBGCRYPT       Hye-Shik Chang   2006-09-07
+Assigns Past and Future Changes
+perky@freebsd.org
+(SEED cipher)
+
+LIBGCRYPT       Werner Dittmann  2009-05-20
+Assigns Past and Future Changes
+werner.dittmann@t-online.de
+(mpi/amd64, tests/mpitests.c)
+
+GNUPG           David Shaw
+Assigns past and future changes.
+dshaw@jabberwocky.com
+(cipher/camellia-glue.c and related stuff)
+
+LIBGCRYPT       Andrey Jivsov    2010-12-09
+Assigns Past and Future Changes
+openpgp@brainhub.org
+(cipher/ecc.c and related files)
+
+LIBGCRYPT       Ulrich Müller    2012-02-15
+Assigns Past and Future Changes
+ulm@gentoo.org
+(Changes to cipher/idea.c and related files)
+
+LIBGCRYPT       Vladimir Serbinenko  2012-04-26
+Assigns Past and Future Changes
+phcoder@gmail.com
+(cipher/serpent.c)
+
+
+Authors with a DCO
+==================
+
+Andrei Scherer <andsch@inbox.com>
+2014-08-22:BF7CEF794F9.000003F0andsch@inbox.com:
+
+Christian Aistleitner <christian@quelltextlich.at>
+2013-02-26:20130226110144.GA12678@quelltextlich.at:
+
+Christian Grothoff <christian@grothoff.org>
+2013-03-21:514B5D8A.6040705@grothoff.org:
+
+Clemens Lang <cllang@redhat.com>
+2022-02-10:20220210133844.46581-1-cllang@redhat.com:
+
+Danny Tsen <dtsen@us.ibm.com>
+2021-12-20:OF85D11C2F.7A339D7D-ON002587B1.0042A81E-002587B1.0042B94D@ibm.com
+
+Dmitry Baryshkov <dbaryshkov@gmail.com>
+Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+2013-07-13:20130713144407.GA27334@fangorn.rup.mentorg.com:
+
+Dmitry Kasatkin <dmitry.kasatkin@intel.com>
+2012-12-14:50CAE2DB.80302@intel.com:
+
+H.J. Lu <hjl.tools@gmail.com>
+2020-01-19:20200119135241.GA4970@gmail.com:
+
+Jia Zhang <qianyue.zj@alibaba-inc.com>
+2017-10-17:59E56E30.9060503@alibaba-inc.com:
+
+Jérémie Courrèges-Anglas <jca@wxcvbn.org>
+2016-05-26:87bn3ssqg0.fsf@ritchie.wxcvbn.org:
+
+Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+2012-11-15:20121115172331.150537dzb5i6jmy8@www.dalek.fi:
+
+Jussi Kivilinna <jussi.kivilinna@iki.fi>
+2013-05-06:5186720A.4090101@iki.fi:
+
+Markus Teich <markus dot teich at stusta dot mhn dot de>
+2014-10-08:20141008180509.GA2770@trolle:
+
+Martin Storsjö <martin@martin.st>
+2018-03-28:dc1605ce-a47d-34c5-8851-d9569f9ea5d3@martin.st:
+
+Mathias L. Baumann <mathias.baumann at sociomantic.com>
+2017-01-30:07c06d79-0828-b564-d604-fd16c7c86ebe@sociomantic.com:
+
+Milan Broz <gmazyland@gmail.com>
+2014-01-13:52D44CC6.4050707@gmail.com:
+
+Paul Wolneykien <manowar@altlinux.org>
+2019-11-19:20191119204459.312927aa@rigel.localdomain:
+
+Peter Wu <peter@lekensteyn.nl>
+2015-07-22:20150722191325.GA8113@al:
+
+Rafaël Carré <funman@videolan.org>
+2012-04-20:4F91988B.1080502@videolan.org:
+
+Sergey V. <sftp.mtuci@gmail.com>
+2013-11-07:2066221.5IYa7Yq760@darkstar:
+
+Shawn Landden <shawn@git.icu>
+2019-07-09:2794651562684255@iva4-64850291ca1c.qloud-c.yandex.net:
+
+Stephan Mueller <smueller@chronox.de>
+2014-08-22:2008899.25OeoelVVA@myon.chronox.de:
+
+Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+2020-01-08:dcda0127-2f45-93a3-0736-27259a33bffa@linux.alibaba.com:
+
+Tomáš Mráz <tm@t8m.info>
+2012-04-16:1334571250.5056.52.camel@vespa.frost.loc:
+
+Vitezslav Cizek <vcizek@suse.com>
+2015-11-05:20151105131424.GA32700@kolac.suse.cz:
+
+Werner Koch <wk@gnupg.org> (g10 Code GmbH)
+2012-12-05:87obi8u4h2.fsf@vigenere.g10code.de:
+
+
+More credits
+============
+
+Libgcrypt used to be part of GnuPG but has been taken out into its own
+package on 2000-12-21.
+
+Most of the stuff in mpi has been taken from an old GMP library
+version by Torbjorn Granlund <tege@noisy.tmg.se>.
+
+The files cipher/rndunix.c and cipher/rndw32.c are based on those
+files from Cryptlib.  Copyright Peter Gutmann, Paul Kendall, and Chris
+Wedgwood 1996-1999.
+
+The ECC code cipher/ecc.c was based on code by Sergi Blanch i Torne,
+sergi at calcurco dot org.
+
+The implementation of the Camellia cipher has been been taken from the
+original NTT provided GPL source.
+
+The CAVS testing program tests/cavs_driver.pl is not to be considered
+a part of libgcrypt proper.  We distribute it merely for convenience.
+It has a permissive license and is copyrighted by atsec information
+security corporation.  See the file for details.
+
+The file salsa20.c is based on D.J. Bernstein's public domain code and
+taken from Nettle.  Copyright 2012 Simon Josefsson and Niels Möller.
+
+
+ This file is free software; as a special exception the author gives
+ unlimited permission to copy and/or distribute it, with or without
+ modifications, as long as this notice is preserved.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
+ implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/grub-core/lib/libgcrypt/COPYING b/grub-core/lib/libgcrypt/COPYING
new file mode 100644
index 000000000..d60c31a97
--- /dev/null
+++ b/grub-core/lib/libgcrypt/COPYING
@@ -0,0 +1,340 @@
+                   GNU GENERAL PUBLIC LICENSE
+                      Version 2, June 1991
+
+ Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                           Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users.  This
+General Public License applies to most of the Free Software
+Foundation's software and to any other program whose authors commit to
+using it.  (Some other Free Software Foundation software is covered by
+the GNU Library General Public License instead.)  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+this service if you wish), that you receive source code or can get it
+if you want it, that you can change the software or use pieces of it
+in new free programs; and that you know you can do these things.
+
+  To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have.  You must make sure that they, too, receive or can get the
+source code.  And you must show them these terms so they know their
+rights.
+
+  We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+  Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software.  If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+  Finally, any free program is threatened constantly by software
+patents.  We wish to avoid the danger that redistributors of a free
+program will individually obtain patent licenses, in effect making the
+program proprietary.  To prevent this, we have made it clear that any
+patent must be licensed for everyone's free use or not licensed at all.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                   GNU GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License applies to any program or other work which contains
+a notice placed by the copyright holder saying it may be distributed
+under the terms of this General Public License.  The "Program", below,
+refers to any such program or work, and a "work based on the Program"
+means either the Program or any derivative work under copyright law:
+that is to say, a work containing the Program or a portion of it,
+either verbatim or with modifications and/or translated into another
+language.  (Hereinafter, translation is included without limitation in
+the term "modification".)  Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running the Program is not restricted, and the output from the Program
+is covered only if its contents constitute a work based on the
+Program (independent of having been made by running the Program).
+Whether that is true depends on what the Program does.
+
+  1. You may copy and distribute verbatim copies of the Program's
+source code as you receive it, in any medium, provided that you
+conspicuously and appropriately publish on each copy an appropriate
+copyright notice and disclaimer of warranty; keep intact all the
+notices that refer to this License and to the absence of any warranty;
+and give any other recipients of the Program a copy of this License
+along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and
+you may at your option offer warranty protection in exchange for a fee.
+
+  2. You may modify your copy or copies of the Program or any portion
+of it, thus forming a work based on the Program, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) You must cause the modified files to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    b) You must cause any work that you distribute or publish, that in
+    whole or in part contains or is derived from the Program or any
+    part thereof, to be licensed as a whole at no charge to all third
+    parties under the terms of this License.
+
+    c) If the modified program normally reads commands interactively
+    when run, you must cause it, when started running for such
+    interactive use in the most ordinary way, to print or display an
+    announcement including an appropriate copyright notice and a
+    notice that there is no warranty (or else, saying that you provide
+    a warranty) and that users may redistribute the program under
+    these conditions, and telling the user how to view a copy of this
+    License.  (Exception: if the Program itself is interactive but
+    does not normally print such an announcement, your work based on
+    the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Program,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Program, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program
+with the Program (or with a work based on the Program) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may copy and distribute the Program (or a work based on it,
+under Section 2) in object code or executable form under the terms of
+Sections 1 and 2 above provided that you also do one of the following:
+
+    a) Accompany it with the complete corresponding machine-readable
+    source code, which must be distributed under the terms of Sections
+    1 and 2 above on a medium customarily used for software interchange; or,
+
+    b) Accompany it with a written offer, valid for at least three
+    years, to give any third party, for a charge no more than your
+    cost of physically performing source distribution, a complete
+    machine-readable copy of the corresponding source code, to be
+    distributed under the terms of Sections 1 and 2 above on a medium
+    customarily used for software interchange; or,
+
+    c) Accompany it with the information you received as to the offer
+    to distribute corresponding source code.  (This alternative is
+    allowed only for noncommercial distribution and only if you
+    received the program in object code or executable form with such
+    an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for
+making modifications to it.  For an executable work, complete source
+code means all the source code for all modules it contains, plus any
+associated interface definition files, plus the scripts used to
+control compilation and installation of the executable.  However, as a
+special exception, the source code distributed need not include
+anything that is normally distributed (in either source or binary
+form) with the major components (compiler, kernel, and so on) of the
+operating system on which the executable runs, unless that component
+itself accompanies the executable.
+
+If distribution of executable or object code is made by offering
+access to copy from a designated place, then offering equivalent
+access to copy the source code from the same place counts as
+distribution of the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  4. You may not copy, modify, sublicense, or distribute the Program
+except as expressly provided under this License.  Any attempt
+otherwise to copy, modify, sublicense or distribute the Program is
+void, and will automatically terminate your rights under this License.
+However, parties who have received copies, or rights, from you under
+this License will not have their licenses terminated so long as such
+parties remain in full compliance.
+
+  5. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Program or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Program (or any work based on the
+Program), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Program or works based on it.
+
+  6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the
+original licensor to copy, distribute or modify the Program subject to
+these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties to
+this License.
+
+  7. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Program at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Program by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system, which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  8. If the distribution and/or use of the Program is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Program under this License
+may add an explicit geographical distribution limitation excluding
+those countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  9. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Program
+specifies a version number of this License which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation.  If the Program does not specify a version number of
+this License, you may choose any version ever published by the Free Software
+Foundation.
+
+  10. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission.  For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this.  Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+                           NO WARRANTY
+
+  11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW.  EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU.  SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+  12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+                    END OF TERMS AND CONDITIONS
+
+           How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+    Gnomovision version 69, Copyright (C) year  name of author
+    Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, the commands you use may
+be called something other than `show w' and `show c'; they could even be
+mouse-clicks or menu items--whatever suits your program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the program
+  `Gnomovision' (which makes passes at compilers) written by James Hacker.
+
+  <signature of Ty Coon>, 1 April 1989
+  Ty Coon, President of Vice
+
+This General Public License does not permit incorporating your program into
+proprietary programs.  If your program is a subroutine library, you may
+consider it more useful to permit linking proprietary applications with the
+library.  If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/grub-core/lib/libgcrypt/COPYING.LIB 
b/grub-core/lib/libgcrypt/COPYING.LIB
new file mode 100644
index 000000000..cf9b6b997
--- /dev/null
+++ b/grub-core/lib/libgcrypt/COPYING.LIB
@@ -0,0 +1,510 @@
+
+                  GNU LESSER GENERAL PUBLIC LICENSE
+                       Version 2.1, February 1999
+
+ Copyright (C) 1991, 1999 Free Software Foundation, Inc.
+     59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+[This is the first released version of the Lesser GPL.  It also counts
+ as the successor of the GNU Library Public License, version 2, hence
+ the version number 2.1.]
+
+                            Preamble
+
+  The licenses for most software are designed to take away your
+freedom to share and change it.  By contrast, the GNU General Public
+Licenses are intended to guarantee your freedom to share and change
+free software--to make sure the software is free for all its users.
+
+  This license, the Lesser General Public License, applies to some
+specially designated software packages--typically libraries--of the
+Free Software Foundation and other authors who decide to use it.  You
+can use it too, but we suggest you first think carefully about whether
+this license or the ordinary General Public License is the better
+strategy to use in any particular case, based on the explanations
+below.
+
+  When we speak of free software, we are referring to freedom of use,
+not price.  Our General Public Licenses are designed to make sure that
+you have the freedom to distribute copies of free software (and charge
+for this service if you wish); that you receive source code or can get
+it if you want it; that you can change the software and use pieces of
+it in new free programs; and that you are informed that you can do
+these things.
+
+  To protect your rights, we need to make restrictions that forbid
+distributors to deny you these rights or to ask you to surrender these
+rights.  These restrictions translate to certain responsibilities for
+you if you distribute copies of the library or if you modify it.
+
+  For example, if you distribute copies of the library, whether gratis
+or for a fee, you must give the recipients all the rights that we gave
+you.  You must make sure that they, too, receive or can get the source
+code.  If you link other code with the library, you must provide
+complete object files to the recipients, so that they can relink them
+with the library after making changes to the library and recompiling
+it.  And you must show them these terms so they know their rights.
+
+  We protect your rights with a two-step method: (1) we copyright the
+library, and (2) we offer you this license, which gives you legal
+permission to copy, distribute and/or modify the library.
+
+  To protect each distributor, we want to make it very clear that
+there is no warranty for the free library.  Also, if the library is
+modified by someone else and passed on, the recipients should know
+that what they have is not the original version, so that the original
+author's reputation will not be affected by problems that might be
+introduced by others.
+^L
+  Finally, software patents pose a constant threat to the existence of
+any free program.  We wish to make sure that a company cannot
+effectively restrict the users of a free program by obtaining a
+restrictive license from a patent holder.  Therefore, we insist that
+any patent license obtained for a version of the library must be
+consistent with the full freedom of use specified in this license.
+
+  Most GNU software, including some libraries, is covered by the
+ordinary GNU General Public License.  This license, the GNU Lesser
+General Public License, applies to certain designated libraries, and
+is quite different from the ordinary General Public License.  We use
+this license for certain libraries in order to permit linking those
+libraries into non-free programs.
+
+  When a program is linked with a library, whether statically or using
+a shared library, the combination of the two is legally speaking a
+combined work, a derivative of the original library.  The ordinary
+General Public License therefore permits such linking only if the
+entire combination fits its criteria of freedom.  The Lesser General
+Public License permits more lax criteria for linking other code with
+the library.
+
+  We call this license the "Lesser" General Public License because it
+does Less to protect the user's freedom than the ordinary General
+Public License.  It also provides other free software developers Less
+of an advantage over competing non-free programs.  These disadvantages
+are the reason we use the ordinary General Public License for many
+libraries.  However, the Lesser license provides advantages in certain
+special circumstances.
+
+  For example, on rare occasions, there may be a special need to
+encourage the widest possible use of a certain library, so that it
+becomes a de-facto standard.  To achieve this, non-free programs must
+be allowed to use the library.  A more frequent case is that a free
+library does the same job as widely used non-free libraries.  In this
+case, there is little to gain by limiting the free library to free
+software only, so we use the Lesser General Public License.
+
+  In other cases, permission to use a particular library in non-free
+programs enables a greater number of people to use a large body of
+free software.  For example, permission to use the GNU C Library in
+non-free programs enables many more people to use the whole GNU
+operating system, as well as its variant, the GNU/Linux operating
+system.
+
+  Although the Lesser General Public License is Less protective of the
+users' freedom, it does ensure that the user of a program that is
+linked with the Library has the freedom and the wherewithal to run
+that program using a modified version of the Library.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.  Pay close attention to the difference between a
+"work based on the library" and a "work that uses the library".  The
+former contains code derived from the library, whereas the latter must
+be combined with the library in order to run.
+^L
+                  GNU LESSER GENERAL PUBLIC LICENSE
+   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+  0. This License Agreement applies to any software library or other
+program which contains a notice placed by the copyright holder or
+other authorized party saying it may be distributed under the terms of
+this Lesser General Public License (also called "this License").
+Each licensee is addressed as "you".
+
+  A "library" means a collection of software functions and/or data
+prepared so as to be conveniently linked with application programs
+(which use some of those functions and data) to form executables.
+
+  The "Library", below, refers to any such software library or work
+which has been distributed under these terms.  A "work based on the
+Library" means either the Library or any derivative work under
+copyright law: that is to say, a work containing the Library or a
+portion of it, either verbatim or with modifications and/or translated
+straightforwardly into another language.  (Hereinafter, translation is
+included without limitation in the term "modification".)
+
+  "Source code" for a work means the preferred form of the work for
+making modifications to it.  For a library, complete source code means
+all the source code for all modules it contains, plus any associated
+interface definition files, plus the scripts used to control
+compilation and installation of the library.
+
+  Activities other than copying, distribution and modification are not
+covered by this License; they are outside its scope.  The act of
+running a program using the Library is not restricted, and output from
+such a program is covered only if its contents constitute a work based
+on the Library (independent of the use of the Library in a tool for
+writing it).  Whether that is true depends on what the Library does
+and what the program that uses the Library does.
+
+  1. You may copy and distribute verbatim copies of the Library's
+complete source code as you receive it, in any medium, provided that
+you conspicuously and appropriately publish on each copy an
+appropriate copyright notice and disclaimer of warranty; keep intact
+all the notices that refer to this License and to the absence of any
+warranty; and distribute a copy of this License along with the
+Library.
+
+  You may charge a fee for the physical act of transferring a copy,
+and you may at your option offer warranty protection in exchange for a
+fee.
+
+  2. You may modify your copy or copies of the Library or any portion
+of it, thus forming a work based on the Library, and copy and
+distribute such modifications or work under the terms of Section 1
+above, provided that you also meet all of these conditions:
+
+    a) The modified work must itself be a software library.
+
+    b) You must cause the files modified to carry prominent notices
+    stating that you changed the files and the date of any change.
+
+    c) You must cause the whole of the work to be licensed at no
+    charge to all third parties under the terms of this License.
+
+    d) If a facility in the modified Library refers to a function or a
+    table of data to be supplied by an application program that uses
+    the facility, other than as an argument passed when the facility
+    is invoked, then you must make a good faith effort to ensure that,
+    in the event an application does not supply such function or
+    table, the facility still operates, and performs whatever part of
+    its purpose remains meaningful.
+
+    (For example, a function in a library to compute square roots has
+    a purpose that is entirely well-defined independent of the
+    application.  Therefore, Subsection 2d requires that any
+    application-supplied function or table used by this function must
+    be optional: if the application does not supply it, the square
+    root function must still compute square roots.)
+
+These requirements apply to the modified work as a whole.  If
+identifiable sections of that work are not derived from the Library,
+and can be reasonably considered independent and separate works in
+themselves, then this License, and its terms, do not apply to those
+sections when you distribute them as separate works.  But when you
+distribute the same sections as part of a whole which is a work based
+on the Library, the distribution of the whole must be on the terms of
+this License, whose permissions for other licensees extend to the
+entire whole, and thus to each and every part regardless of who wrote
+it.
+
+Thus, it is not the intent of this section to claim rights or contest
+your rights to work written entirely by you; rather, the intent is to
+exercise the right to control the distribution of derivative or
+collective works based on the Library.
+
+In addition, mere aggregation of another work not based on the Library
+with the Library (or with a work based on the Library) on a volume of
+a storage or distribution medium does not bring the other work under
+the scope of this License.
+
+  3. You may opt to apply the terms of the ordinary GNU General Public
+License instead of this License to a given copy of the Library.  To do
+this, you must alter all the notices that refer to this License, so
+that they refer to the ordinary GNU General Public License, version 2,
+instead of to this License.  (If a newer version than version 2 of the
+ordinary GNU General Public License has appeared, then you can specify
+that version instead if you wish.)  Do not make any other change in
+these notices.
+^L
+  Once this change is made in a given copy, it is irreversible for
+that copy, so the ordinary GNU General Public License applies to all
+subsequent copies and derivative works made from that copy.
+
+  This option is useful when you wish to copy part of the code of
+the Library into a program that is not a library.
+
+  4. You may copy and distribute the Library (or a portion or
+derivative of it, under Section 2) in object code or executable form
+under the terms of Sections 1 and 2 above provided that you accompany
+it with the complete corresponding machine-readable source code, which
+must be distributed under the terms of Sections 1 and 2 above on a
+medium customarily used for software interchange.
+
+  If distribution of object code is made by offering access to copy
+from a designated place, then offering equivalent access to copy the
+source code from the same place satisfies the requirement to
+distribute the source code, even though third parties are not
+compelled to copy the source along with the object code.
+
+  5. A program that contains no derivative of any portion of the
+Library, but is designed to work with the Library by being compiled or
+linked with it, is called a "work that uses the Library".  Such a
+work, in isolation, is not a derivative work of the Library, and
+therefore falls outside the scope of this License.
+
+  However, linking a "work that uses the Library" with the Library
+creates an executable that is a derivative of the Library (because it
+contains portions of the Library), rather than a "work that uses the
+library".  The executable is therefore covered by this License.
+Section 6 states terms for distribution of such executables.
+
+  When a "work that uses the Library" uses material from a header file
+that is part of the Library, the object code for the work may be a
+derivative work of the Library even though the source code is not.
+Whether this is true is especially significant if the work can be
+linked without the Library, or if the work is itself a library.  The
+threshold for this to be true is not precisely defined by law.
+
+  If such an object file uses only numerical parameters, data
+structure layouts and accessors, and small macros and small inline
+functions (ten lines or less in length), then the use of the object
+file is unrestricted, regardless of whether it is legally a derivative
+work.  (Executables containing this object code plus portions of the
+Library will still fall under Section 6.)
+
+  Otherwise, if the work is a derivative of the Library, you may
+distribute the object code for the work under the terms of Section 6.
+Any executables containing that work also fall under Section 6,
+whether or not they are linked directly with the Library itself.
+^L
+  6. As an exception to the Sections above, you may also combine or
+link a "work that uses the Library" with the Library to produce a
+work containing portions of the Library, and distribute that work
+under terms of your choice, provided that the terms permit
+modification of the work for the customer's own use and reverse
+engineering for debugging such modifications.
+
+  You must give prominent notice with each copy of the work that the
+Library is used in it and that the Library and its use are covered by
+this License.  You must supply a copy of this License.  If the work
+during execution displays copyright notices, you must include the
+copyright notice for the Library among them, as well as a reference
+directing the user to the copy of this License.  Also, you must do one
+of these things:
+
+    a) Accompany the work with the complete corresponding
+    machine-readable source code for the Library including whatever
+    changes were used in the work (which must be distributed under
+    Sections 1 and 2 above); and, if the work is an executable linked
+    with the Library, with the complete machine-readable "work that
+    uses the Library", as object code and/or source code, so that the
+    user can modify the Library and then relink to produce a modified
+    executable containing the modified Library.  (It is understood
+    that the user who changes the contents of definitions files in the
+    Library will not necessarily be able to recompile the application
+    to use the modified definitions.)
+
+    b) Use a suitable shared library mechanism for linking with the
+    Library.  A suitable mechanism is one that (1) uses at run time a
+    copy of the library already present on the user's computer system,
+    rather than copying library functions into the executable, and (2)
+    will operate properly with a modified version of the library, if
+    the user installs one, as long as the modified version is
+    interface-compatible with the version that the work was made with.
+
+    c) Accompany the work with a written offer, valid for at least
+    three years, to give the same user the materials specified in
+    Subsection 6a, above, for a charge no more than the cost of
+    performing this distribution.
+
+    d) If distribution of the work is made by offering access to copy
+    from a designated place, offer equivalent access to copy the above
+    specified materials from the same place.
+
+    e) Verify that the user has already received a copy of these
+    materials or that you have already sent this user a copy.
+
+  For an executable, the required form of the "work that uses the
+Library" must include any data and utility programs needed for
+reproducing the executable from it.  However, as a special exception,
+the materials to be distributed need not include anything that is
+normally distributed (in either source or binary form) with the major
+components (compiler, kernel, and so on) of the operating system on
+which the executable runs, unless that component itself accompanies
+the executable.
+
+  It may happen that this requirement contradicts the license
+restrictions of other proprietary libraries that do not normally
+accompany the operating system.  Such a contradiction means you cannot
+use both them and the Library together in an executable that you
+distribute.
+^L
+  7. You may place library facilities that are a work based on the
+Library side-by-side in a single library together with other library
+facilities not covered by this License, and distribute such a combined
+library, provided that the separate distribution of the work based on
+the Library and of the other library facilities is otherwise
+permitted, and provided that you do these two things:
+
+    a) Accompany the combined library with a copy of the same work
+    based on the Library, uncombined with any other library
+    facilities.  This must be distributed under the terms of the
+    Sections above.
+
+    b) Give prominent notice with the combined library of the fact
+    that part of it is a work based on the Library, and explaining
+    where to find the accompanying uncombined form of the same work.
+
+  8. You may not copy, modify, sublicense, link with, or distribute
+the Library except as expressly provided under this License.  Any
+attempt otherwise to copy, modify, sublicense, link with, or
+distribute the Library is void, and will automatically terminate your
+rights under this License.  However, parties who have received copies,
+or rights, from you under this License will not have their licenses
+terminated so long as such parties remain in full compliance.
+
+  9. You are not required to accept this License, since you have not
+signed it.  However, nothing else grants you permission to modify or
+distribute the Library or its derivative works.  These actions are
+prohibited by law if you do not accept this License.  Therefore, by
+modifying or distributing the Library (or any work based on the
+Library), you indicate your acceptance of this License to do so, and
+all its terms and conditions for copying, distributing or modifying
+the Library or works based on it.
+
+  10. Each time you redistribute the Library (or any work based on the
+Library), the recipient automatically receives a license from the
+original licensor to copy, distribute, link with or modify the Library
+subject to these terms and conditions.  You may not impose any further
+restrictions on the recipients' exercise of the rights granted herein.
+You are not responsible for enforcing compliance by third parties with
+this License.
+^L
+  11. If, as a consequence of a court judgment or allegation of patent
+infringement or for any other reason (not limited to patent issues),
+conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot
+distribute so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you
+may not distribute the Library at all.  For example, if a patent
+license would not permit royalty-free redistribution of the Library by
+all those who receive copies directly or indirectly through you, then
+the only way you could satisfy both it and this License would be to
+refrain entirely from distribution of the Library.
+
+If any portion of this section is held invalid or unenforceable under
+any particular circumstance, the balance of the section is intended to
+apply, and the section as a whole is intended to apply in other
+circumstances.
+
+It is not the purpose of this section to induce you to infringe any
+patents or other property right claims or to contest validity of any
+such claims; this section has the sole purpose of protecting the
+integrity of the free software distribution system which is
+implemented by public license practices.  Many people have made
+generous contributions to the wide range of software distributed
+through that system in reliance on consistent application of that
+system; it is up to the author/donor to decide if he or she is willing
+to distribute software through any other system and a licensee cannot
+impose that choice.
+
+This section is intended to make thoroughly clear what is believed to
+be a consequence of the rest of this License.
+
+  12. If the distribution and/or use of the Library is restricted in
+certain countries either by patents or by copyrighted interfaces, the
+original copyright holder who places the Library under this License
+may add an explicit geographical distribution limitation excluding those
+countries, so that distribution is permitted only in or among
+countries not thus excluded.  In such case, this License incorporates
+the limitation as if written in the body of this License.
+
+  13. The Free Software Foundation may publish revised and/or new
+versions of the Lesser General Public License from time to time.
+Such new versions will be similar in spirit to the present version,
+but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number.  If the Library
+specifies a version number of this License which applies to it and
+"any later version", you have the option of following the terms and
+conditions either of that version or of any later version published by
+the Free Software Foundation.  If the Library does not specify a
+license version number, you may choose any version ever published by
+the Free Software Foundation.
+^L
+  14. If you wish to incorporate parts of the Library into other free
+programs whose distribution conditions are incompatible with these,
+write to the author to ask for permission.  For software which is
+copyrighted by the Free Software Foundation, write to the Free
+Software Foundation; we sometimes make exceptions for this.  Our
+decision will be guided by the two goals of preserving the free status
+of all derivatives of our free software and of promoting the sharing
+and reuse of software generally.
+
+                            NO WARRANTY
+
+  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
+WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
+EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
+OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
+KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
+LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
+THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
+AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
+FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
+CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
+LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
+RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
+FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
+SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+DAMAGES.
+
+                     END OF TERMS AND CONDITIONS
+^L
+           How to Apply These Terms to Your New Libraries
+
+  If you develop a new library, and you want it to be of the greatest
+possible use to the public, we recommend making it free software that
+everyone can redistribute and change.  You can do so by permitting
+redistribution under these terms (or, alternatively, under the terms
+of the ordinary General Public License).
+
+  To apply these terms, attach the following notices to the library.
+It is safest to attach them to the start of each source file to most
+effectively convey the exclusion of warranty; and each file should
+have at least the "copyright" line and a pointer to where the full
+notice is found.
+
+
+    <one line to give the library's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+
+Also add information on how to contact you by electronic and paper mail.
+
+You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the library,
+if necessary.  Here is a sample; alter the names:
+
+  Yoyodyne, Inc., hereby disclaims all copyright interest in the
+  library `Frob' (a library for tweaking knobs) written by James
+  Random Hacker.
+
+  <signature of Ty Coon>, 1 April 1990
+  Ty Coon, President of Vice
+
+That's all there is to it!
+
+
diff --git a/grub-core/lib/libgcrypt/LICENSES b/grub-core/lib/libgcrypt/LICENSES
new file mode 100644
index 000000000..8be7fb244
--- /dev/null
+++ b/grub-core/lib/libgcrypt/LICENSES
@@ -0,0 +1,287 @@
+Additional license notices for Libgcrypt.                    -*- org -*-
+
+This file contains the copying permission notices for various files in
+the Libgcrypt distribution which are not covered by the GNU Lesser
+General Public License (LGPL) or the GNU General Public License (GPL).
+
+These notices all require that a copy of the notice be included
+in the accompanying documentation and be distributed with binary
+distributions of the code, so be sure to include this file along
+with any binary distributions derived from the GNU C Library.
+
+* BSD_3Clause
+
+  For files:
+  - cipher/sha256-avx-amd64.S
+  - cipher/sha256-avx2-bmi2-amd64.S
+  - cipher/sha256-ssse3-amd64.S
+  - cipher/sha512-avx-amd64.S
+  - cipher/sha512-avx2-bmi2-amd64.S
+  - cipher/sha512-ssse3-amd64.S
+  - cipher/sha512-ssse3-i386.c
+
+#+begin_quote
+  Copyright (c) 2012, Intel Corporation
+
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are
+  met:
+
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the
+    distribution.
+
+  * Neither the name of the Intel Corporation nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+
+  THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#+end_quote
+
+  For files:
+  - random/jitterentropy-base.c
+  - random/jitterentropy-gcd.c
+  - random/jitterentropy-gcd.h
+  - random/jitterentropy-health.c
+  - random/jitterentropy-health.h
+  - random/jitterentropy-noise.c
+  - random/jitterentropy-noise.h
+  - random/jitterentropy-sha3.c
+  - random/jitterentropy-sha3.h
+  - random/jitterentropy-timer.c
+  - random/jitterentropy-timer.h
+  - random/jitterentropy.h
+  - random/rndjent.c (plus common Libgcrypt copyright holders)
+
+#+begin_quote
+ Copyright (C) 2017 - 2021, Stephan Mueller <smueller@chronox.de>
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ 1. Redistributions of source code must retain the above copyright
+    notice, and the entire permission notice in its entirety,
+    including the disclaimer of warranties.
+ 2. Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+ 3. The name of the author may not be used to endorse or promote
+    products derived from this software without specific prior
+    written permission.
+
+ ALTERNATIVELY, this product may be distributed under the terms of
+ the GNU General Public License, in which case the provisions of the GPL2
+ are required INSTEAD OF the above restrictions.  (This clause is
+ necessary due to a potential bad interaction between the GPL and
+ the restrictions contained in a BSD-style copyright.)
+
+ THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
+ WHICH ARE HEREBY DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE
+ LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
+ USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
+ DAMAGE.
+#+end_quote
+
+  For files:
+  - cipher/cipher-gcm-ppc.c
+
+#+begin_quote
+ Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>
+ All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+       * Redistributions of source code must retain copyright notices,
+         this list of conditions and the following disclaimer.
+
+       * Redistributions in binary form must reproduce the above
+         copyright notice, this list of conditions and the following
+         disclaimer in the documentation and/or other materials
+         provided with the distribution.
+
+       * Neither the name of the CRYPTOGAMS nor the names of its
+         copyright holder and contributors may be used to endorse or
+         promote products derived from this software without specific
+         prior written permission.
+
+ ALTERNATIVELY, provided that this notice is retained in full, this
+ product may be distributed under the terms of the GNU General Public
+ License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+ those given above.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#+end_quote
+
+* X License
+
+  For files:
+  - install.sh
+
+#+begin_quote
+  Copyright (C) 1994 X Consortium
+
+  Permission is hereby granted, free of charge, to any person obtaining a copy
+  of this software and associated documentation files (the "Software"), to
+  deal in the Software without restriction, including without limitation the
+  rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+  sell copies of the Software, and to permit persons to whom the Software is
+  furnished to do so, subject to the following conditions:
+
+  The above copyright notice and this permission notice shall be included in
+  all copies or substantial portions of the Software.
+
+  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+  X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+  AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+  TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+  Except as contained in this notice, the name of the X Consortium shall not
+  be used in advertising or otherwise to promote the sale, use or other deal-
+  ings in this Software without prior written authorization from the X Consor-
+  tium.
+#+end_quote
+
+* Public domain
+
+  For files:
+  - cipher/arcfour-amd64.S
+
+#+begin_quote
+ Author: Marc Bevand <bevand_m (at) epita.fr>
+ Licence: I hereby disclaim the copyright on this code and place it
+ in the public domain.
+#+end_quote
+
+* OCB license 1
+
+  For files:
+  - cipher/cipher-ocb.c
+
+#+begin_quote
+  OCB is covered by several patents but may be used freely by most
+  software.  See http://web.cs.ucdavis.edu/~rogaway/ocb/license.htm .
+  In particular license 1 is suitable for Libgcrypt: See
+  http://web.cs.ucdavis.edu/~rogaway/ocb/license1.pdf for the full
+  license document; it basically says:
+
+    License 1 — License for Open-Source Software Implementations of OCB
+                (Jan 9, 2013)
+
+    Under this license, you are authorized to make, use, and
+    distribute open-source software implementations of OCB. This
+    license terminates for you if you sue someone over their
+    open-source software implementation of OCB claiming that you have
+    a patent covering their implementation.
+
+
+
+ License for Open Source Software Implementations of OCB
+ January 9, 2013
+
+ 1 Definitions
+
+ 1.1 “Licensor” means Phillip Rogaway.
+
+ 1.2 “Licensed Patents” means any patent that claims priority to United
+ States Patent Application No. 09/918,615 entitled “Method and Apparatus
+ for Facilitating Efficient Authenticated Encryption,” and any utility,
+ divisional, provisional, continuation, continuations-in-part, reexamination,
+ reissue, or foreign counterpart patents that may issue with respect to the
+ aforesaid patent application. This includes, but is not limited to, United
+ States Patent No. 7,046,802; United States Patent No. 7,200,227; United
+ States Patent No. 7,949,129; United States Patent No. 8,321,675 ; and any
+ patent that issues out of United States Patent Application No. 13/669,114.
+
+ 1.3 “Use” means any practice of any invention claimed in the Licensed Patents.
+
+ 1.4 “Software Implementation” means any practice of any invention
+ claimed in the Licensed Patents that takes the form of software executing on
+ a user-programmable, general-purpose computer or that takes the form of a
+ computer-readable medium storing such software. Software Implementation does
+ not include, for example, application-specific integrated circuits (ASICs),
+ field-programmable gate arrays (FPGAs), embedded systems, or IP cores.
+
+ 1.5 “Open Source Software” means software whose source code is published
+ and made available for inspection and use by anyone because either (a) the
+ source code is subject to a license that permits recipients to copy, modify,
+ and distribute the source code without payment of fees or royalties, or
+ (b) the source code is in the public domain, including code released for
+ public use through a CC0 waiver. All licenses certified by the Open Source
+ Initiative at opensource.org as of January 9, 2013 and all Creative Commons
+ licenses identified on the creativecommons.org website as of January 9,
+ 2013, including the Public License Fallback of the CC0 waiver, satisfy these
+ requirements for the purposes of this license.
+
+ 1.6 “Open Source Software Implementation” means a Software
+ Implementation in which the software implicating the Licensed Patents is
+ Open Source Software. Open Source Software Implementation does not include
+ any Software Implementation in which the software implicating the Licensed
+ Patents is combined, so as to form a larger program, with software that is
+ not Open Source Software.
+
+ 2 License Grant
+
+ 2.1 License. Subject to your compliance with the term s of this license,
+ including the restriction set forth in Section 2.2, Licensor hereby
+ grants to you a perpetual, worldwide, non-exclusive, non-transferable,
+ non-sublicenseable, no-charge, royalty-free, irrevocable license to practice
+ any invention claimed in the Licensed Patents in any Open Source Software
+ Implementation.
+
+ 2.2 Restriction. If you or your affiliates institute patent litigation
+ (including, but not limited to, a cross-claim or counterclaim in a lawsuit)
+ against any entity alleging that any Use authorized by this license
+ infringes another patent, then any rights granted to you under this license
+ automatically terminate as of the date such litigation is filed.
+
+ 3 Disclaimer
+ YOUR USE OF THE LICENSED PATENTS IS AT YOUR OWN RISK AND UNLESS REQUIRED
+ BY APPLICABLE LAW, LICENSOR MAKES NO REPRESENTATIONS OR WARRANTIES OF ANY
+ KIND CONCERNING THE LICENSED PATENTS OR ANY PRODUCT EMBODYING ANY LICENSED
+ PATENT, EXPRESS OR IMPLIED, STATUT ORY OR OTHERWISE, INCLUDING, WITHOUT
+ LIMITATION, WARRANTIES OF TITLE, MERCHANTIBILITY, FITNESS FOR A PARTICULAR
+ PURPOSE, OR NONINFRINGEMENT. IN NO EVENT WILL LICENSOR BE LIABLE FOR ANY
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ ARISING FROM OR RELATED TO ANY USE OF THE LICENSED PATENTS, INCLUDING,
+ WITHOUT LIMITATION, DIRECT, INDIRECT, INCIDENTAL, CONSEQUENTIAL, PUNITIVE
+ OR SPECIAL DAMAGES, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF
+ SUCH DAMAGES PRIOR TO SUCH AN OCCURRENCE.
+#+end_quote
diff --git a/grub-core/lib/libgcrypt/README b/grub-core/lib/libgcrypt/README
new file mode 100644
index 000000000..3c174a36f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/README
@@ -0,0 +1,280 @@
+                   Libgcrypt - The GNU Crypto Library
+                  ------------------------------------
+                             Version 1.10
+
+       Copyright (C) 1989,1991-2018 Free Software Foundation, Inc.
+       Copyright (C) 2012-2023 g10 Code GmbH
+       Copyright (C) 2013-2023 Jussi Kivilinna
+
+    Libgcrypt is free software.  See the file AUTHORS for full copying
+    notices, and LICENSES for notices about contributions that require
+    these additional notices to be distributed.
+
+
+    Overview
+    --------
+
+    Libgcrypt is a general purpose crypto library based on the code
+    used in GnuPG.  Libgcrypt depends on the library `libgpg-error',
+    which must be installed correctly before Libgcrypt is to be built.
+    Libgcrypt is distributed under the LGPL, see the section "License"
+    below for details.
+
+
+    Build Instructions
+    ------------------
+
+    The download canonical location for libgcrypt is:
+
+      https://gnupg.org/ftp/gcrypt/libgcrypt/
+
+    To build libgcrypt you need libgpg-error:
+
+      https://gnupg.org/ftp/gcrypt/libgpg-error/
+
+    You should get the latest versions of course.
+
+    After building and installing the libgpg-error package, you may
+    continue with Libgcrypt installation as with allmost all GNU
+    packages, you just have to do
+
+       ./configure
+       make
+       make check
+       make install
+
+    The "make check" is not required but a good idea to see whether
+    the library works as expected.  The check takes some while and
+    prints some benchmarking results.  Before doing "make install" you
+    probably need to become root.
+
+    To build libgcrypt for Microsoft Windows, you need to have the
+    mingw32 cross-building toolchain installed.  Instead of running a
+    plain configure you use
+
+      ./autogen.sh --build-w32
+      make
+      make install
+
+    By default this command sequences expectsd a libgpg-error
+    installed below $HOME/w32root and installs libgcrypt to that
+    directory too.  See the autogen.sh code for details.
+
+    The documentation is available as an Info file (gcrypt.info).  To
+    build documentation in PDF, run this:
+
+      cd doc
+      make pdf
+
+
+
+    Mailing List
+    ------------
+
+    You may want to join the developer's mailing list
+    gcrypt-devel@gnupg.org by sending mail with a subject of
+    "subscribe" to gcrypt-devel-request@gnupg.org.  An archive of this
+    list is available at https://lists.gnupg.org .
+
+
+    Configure options
+    -----------------
+    Here is a list of configure options which are sometimes useful
+    for installation.
+
+     --enable-large-data-tests
+                     With this option a "make check" will take really
+                     long due to extra checks for the hash algorithms.
+
+     --enable-m-guard
+                     Enable the integrated malloc checking code. Please
+                     note that this feature does not work on all CPUs
+                     (e.g. SunOS 5.7 on UltraSparc-2) and might give
+                     you a bus error.
+
+     --disable-asm
+                     Do not use assembler modules.  It is not possible
+                     to use this on some CPU types.
+
+     --enable-ld-version-script
+                     Libgcrypt tries to build a library where internal
+                     symbols are not exported.  This requires support
+                     from ld and is currently enabled for a few OSes.
+                     If you know that your ld supports the so called
+                     ELF version scripts, you can use this option to
+                     force its use.  OTOH, if you get error message
+                     from the linker, you probably want to use this
+                     option to disable the use of version scripts.
+                     Note, that you should never ever use an
+                     undocumented symbol or one which is prefixed with
+                     an underscore.
+
+     --enable-ciphers=list
+     --enable-pubkey-ciphers=list
+     --enable-digests=list
+                     If not otherwise specified, all algorithms
+                     included in the libgcrypt source tree are built.
+                    An exception are algorithms, which depend on
+                    features not provided by the system, like 64bit
+                    data types.  With these switches it is possible
+                     to select exactly those algorithm modules, which
+                    should be built.  The algorithms are to be
+                     separated by spaces, commas or colons.  To view
+                     the list used with the current build the program
+                     tests/version may be used.
+
+     --disable-endian-check
+                     Don't let configure test for the endianness but
+                     try to use the OS provided macros at compile
+                     time.  This is helpful to create OS X fat binaries.
+
+     --enable-random-daemon
+                     Include support for a global random daemon and
+                     build the daemon.  This is an experimental feature.
+
+     --enable-mpi-path=EXTRA_PATH
+                     Prepend EXTRA_PATH to list of CPU specific
+                     optimizations.  For example, if you want to add
+                     optimizations forn a Intel Pentium 4 compatible
+                     CPU, you may use
+                        --enable-mpi-path=pentium4/sse2:pentium4/mmx
+                     Take care: The generated library may crash on
+                     non-compatible CPUs.
+
+     --enable-random=NAME
+                     Force the use of the random gathering module
+                    NAME.  Default is either to use /dev/random or
+                    the auto mode.  Possible values for NAME are:
+                      egd - Use the module which accesses the
+                            Entropy Gathering Daemon. See the webpages
+                            for more information about it.
+                     unix - Use the standard Unix module which does not
+                            have a very good performance.
+                    linux - Use the module which accesses /dev/random.
+                            This is the first choice and the default one
+                            for GNU/Linux or *BSD.
+                      auto - Compile linux, egd and unix in and
+                             automagically select at runtime.
+
+     --enable-hmac-binary-check
+                     Include support to check the binary at runtime
+                     against a HMAC checksum.  This works only in FIPS
+                     mode on systems providing the dladdr function and using
+                     the ELF binary format.
+
+     --with-fips-module-version=version
+                     Specify a string used as a module version for FIPS
+                     certification purposes.
+
+     --disable-padlock-support
+                     Disable support for the PadLock engine of VIA
+                     processors.  The default is to use PadLock if
+                     available.  Try this if you get problems with
+                     assembler code.
+
+     --disable-aesni-support
+                     Disable support for the AES-NI instructions of
+                     newer Intel CPUs.  The default is to use AES-NI
+                     if available.  Try this if you get problems with
+                     assembler code.
+
+     --disable-O-flag-munging
+                     Some code is too complex for some compilers while
+                     in higher optimization modes, thus the compiler
+                     invocation is modified to use a lower
+                     optimization level.  Usually this works very well
+                     but on some platforms these rules break the
+                     invocation.  This option may be used to disable
+                     the feature under the assumption that either good
+                     CFLAGS are given or the compiler can grok the code.
+
+
+
+
+    Build Problems
+    --------------
+
+    If you have a problem with a a certain release, please first check
+    the Release-info URL given in the NEWS file.
+
+    We can't check all assembler files, so if you have problems
+    assembling them (or the program crashes) use --disable-asm with
+    ./configure.  If you opt to delete individual replacement files in
+    hopes of using the remaining ones, be aware that the configure
+    scripts may consider several subdirectories to get all available
+    assembler files; be sure to delete the correct ones.  Never delete
+    udiv-qrnnd.S in any CPU directory, because there may be no C
+    substitute (in mpi/genereic).  Don't forget to delete
+    "config.cache" and run "./config.status --recheck".  We got a few
+    reports about problems using versions of gcc earlier than 2.96
+    along with a non-GNU assembler (as).  If this applies to your
+    platform, you can either upgrade gcc to a more recent version, or
+    use the GNU assembler.
+
+    Some make tools are broken - the best solution is to use GNU's
+    make.  Try gmake or grab the sources from a GNU archive and
+    install them.
+
+    Specific problems on some machines:
+
+      * AArch64 (GCC 11.1 and 11.2)
+
+       Because of the bug in GCC (fixed in 11.3), with the option
+       -O3, vectorization results wrong code for the function
+       buf_eq_const.  Please use -O2 or -fno-tree-loop-vectorize.
+
+      * IBM RS/6000 running AIX
+
+       Due to a change in gcc (since version 2.8) the MPI stuff may
+       not build. In this case try to run configure using:
+           CFLAGS="-g -O2 -mcpu=powerpc" ./configure
+
+      * SVR4.2 (ESIX V4.2 cc)
+
+        Due to problems with the ESIX as(1), you probably want to do:
+            CFLAGS="-O -K pentium" ./configure --disable-asm
+
+      * SunOS 4.1.4
+
+         ./configure ac_cv_sys_symbol_underscore=yes
+
+      * Sparc64 CPUs
+
+        We have reports about failures in the AES module when
+        compiling using gcc (e.g. version 4.1.2) and the option -O3;
+        using -O2 solves the problem.
+
+
+    License
+    -------
+
+    The library is distributed under the terms of the GNU Lesser
+    General Public License (LGPL); see the file COPYING.LIB for the
+    actual terms.
+
+    The helper programs as well as the documentation are distributed
+    under the terms of the GNU General Public License (GPL); see the
+    file COPYING for the actual terms.
+
+    The file LICENSES has notices about contributions that require
+    that these additional notices are distributed.
+
+
+    Contact
+    -------
+
+    See the file AUTHORS.
+
+    Commercial grade support for Libgcrypt is available; for a listing
+    of offers see https://www.gnupg.org/service.html .
+
+    Since 2001 maintenance and development of Libgcrypt is done by
+    g10 Code GmbH and until 2021 mostly financed by donations.
+
+  This file is Free Software; as a special exception the authors gives
+  unlimited permission to copy and/or distribute it, with or without
+  modifications, as long as this notice is preserved. For conditions
+  of the whole package, please see the file COPYING.  This file is
+  distributed in the hope that it will be useful, but WITHOUT ANY
+  WARRANTY, to the extent permitted by law; without even the implied
+  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/grub-core/lib/libgcrypt/README.GIT 
b/grub-core/lib/libgcrypt/README.GIT
new file mode 100644
index 000000000..ee2c6383f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/README.GIT
@@ -0,0 +1,49 @@
+If you are building from GIT, run the script
+
+./autogen.sh
+
+first, to make sure that you have all the necessary maintainer tools
+are installed and to build the actual configuration files.  If you
+have just checked out from GIT, you should add the option "--force" to
+autogen.sh so that meta data is noticed by autom4te.cache.  Then run
+
+./configure --enable-maintainer-mode
+
+followed by the usual make.
+
+If autogen.sh complains about insufficient versions of the required
+tools, or the tools are not installed, you may use environment
+variables to override the default tool names:
+
+ AUTOMAKE_SUFFIX  is used as a suffix for all tools from the automake
+                  package.  For example
+                     AUTOMAKE_SUFFIX="-1.7" ./autogen.sh
+                  uses "automake-1.7" and "aclocal-1.7.
+ AUTOMAKE_PREFIX  is used as a prefix for all tools from the automake
+                  page and may be combined with AUTOMAKE_SUFFIX. e.g.:
+                    AUTOMAKE_PREFIX=/usr/foo/bin ./autogen.sh
+                  uses "automake" and "aclocal" in the /usr/foo/bin
+                  directory.
+ AUTOCONF_SUFFIX  is used as a suffix for all tools from the automake
+                  package
+ AUTOCONF_PREFIX  is used as a prefix for all tools from the automake
+                  package
+ GETTEXT_SUFFIX   is used as a suffix for all tools from the gettext
+                  package
+ GETTEXT_PREFIX   is used as a prefix for all tools from the gettext
+                  package
+
+It is also possible to use the variable name AUTOMAKE, AUTOCONF,
+ACLOCAL, AUTOHEADER, GETTEXT and MSGMERGE to directly specify the name
+of the programs to run.  It is however better to use the suffix and
+prefix forms as described above because that does not require
+knowledge about the actual tools used by autogen.sh.
+
+
+Please don't use autopoint, libtoolize or autoreconf unless you are
+the current maintainer and want to update the standard configuration
+files.  All those files should be in GIT and only updated manually
+if the maintainer decides that newer versions are required.  The
+maintainer should also make sure that the required version of automake
+et al. are properly indicated at the top of configure.ac and take care
+to copy the files and not merely use symlinks.
diff --git a/grub-core/lib/libgcrypt/THANKS b/grub-core/lib/libgcrypt/THANKS
new file mode 100644
index 000000000..6a44eade0
--- /dev/null
+++ b/grub-core/lib/libgcrypt/THANKS
@@ -0,0 +1,168 @@
+Libgcrypt is based on the GnuPG code.  Here is a list of people, who
+helped in GnuPG and Libgcrypt development.  Please help us to keep it
+complete and free of errors.
+
+Albert Chin                china at thewrittenword com
+Allan Clark               allanc@sco.com
+Anand Kumria              wildfire@progsoc.uts.edu.au
+Andreas Metzler            ametzler at downhill.at.eu.org
+Ariel T Glenn             ariel@columbia.edu
+Aurelien Jarno             aurel32 at debian.org
+Ben Hutchings              ben decadent org uk
+Bodo Moeller              Bodo_Moeller@public.uni-hamburg.de
+Brenno de Winter          brenno@dewinter.com
+Brian Moore               bem@cmc.net
+Brian Warner              warner@lothar.com
+Brieuc Jeunhomme          bbp@via.ecp.fr
+Bryan Fullerton           bryanf@samurai.com
+Caskey L. Dickson         caskey@technocage.com
+Cees van de Griend        cees-list@griend.xs4all.nl
+Charles Levert            charles@comm.polymtl.ca
+Christian Biere            christianbiere@gmx.de
+Christian Grothoff         christian at grothoff org
+Christian von Roques      roques@pond.sub.org
+Christopher Oliver        oliver@fritz.traverse.net
+Christian Recktenwald     chris@citecs.de
+Daiki Ueno                 ueno at unixuser org
+Dan Fandrich               dan at coneharvesters com
+Daniel Eisenbud           eisenbud@cs.swarthmore.edu
+Daniel Koening            dan@mail.isis.de
+David Ellement            ellement@sdd.hp.com
+Detlef Lannert            lannert@lannert.rz.uni-duesseldorf.de
+Dirk Lattermann           dlatt@t-online.de
+Dirk Stoecker              gcrypt@dstoecker.de
+Ed Boraas                 ecxjo@esperanto.org
+Elie De Brauwer            elie@de-brauwer.be
+Enzo Michelangeli         em@MailAndNews.com
+Ernst Molitor             ernst.molitor@uni-bonn.de
+Fabian Keil                fk at fabiankeil de
+Fabio Coatti              cova@felix.unife.it
+Felix von Leitner         leitner@amdiv.de
+Frank Heckenbach          heckenb@mi.uni-erlangen.de
+Frank Stajano             frank.stajano@cl.cam.ac.uk
+Gabriele Monti             psicus78 gmail com
+Gaël Quéri                gqueri@mail.dotcom.fr
+Gregor Riepl               seto-kun@freesurf.ch
+Gerlinde Klaes             gk@u64.de
+Greg Louis                glouis@dynamicro.on.ca
+Greg Troxel               gdt@ir.bbn.com
+Gregory Steuck            steuck@iname.com
+Geoff Keating             geoffk@ozemail.com.au
+Harald Denker             harry@hal.westfalen.de
+Hendrik Buschkamp         buschkamp@rheumanet.org
+Holger Schurig            holger@d.om.org
+Hugh Daniel               hugh@toad.com
+Ian McKellar              imckellar@harvestroad.com.au
+Ian Peters                 itp@ximian.com
+Janusz A. Urbanowicz      alex@bofh.torun.pl
+James Troup               james@nocrew.org
+Jean-loup Gailly          gzip@prep.ai.mit.edu
+Jeff Johnson               jbj@redhat.com
+Jens Bachem               bachem@rrz.uni-koeln.de
+J Horacio MG              homega@ciberia.es
+Joachim Backes            backes@rhrk.uni-kl.de
+Jordi Mallach              jordi@sindominio.net
+John A. Martin            jam@jamux.com
+Johnny Teveßen            j.tevessen@gmx.de
+Jörg Schilling            schilling@fokus.gmd.de
+Jun Kuriyama              kuriyama@sky.rim.or.jp
+Karl Fogel                kfogel@guanabana.onshore.com
+Karsten Thygesen          karthy@kom.auc.dk
+Katsuhiro Kondou          kondou@nec.co.jp
+Kazu Yamamoto             kazu@iijlab.net
+Lars Kellogg-Stedman      lars@bu.edu
+Lee Fisher                 blibbet at gmail dot com
+Marco d'Itri               md@linux.it
+Mark Adler                madler@alumni.caltech.edu
+Mark Elbrecht             snowball3@bigfoot.com
+Markus Friedl             Markus.Friedl@informatik.uni-erlangen.de
+Matthias Urlichs           smurf@smurf.noris.de
+Martin Kahlert            martin.kahlert@provi.de
+Martin Hamilton
+Martin Schulte            schulte@thp.uni-koeln.de
+Matthew Skala             mskala@ansuz.sooke.bc.ca
+Max Kellermann             max@duempel.org
+Max Valianskiy            maxcom@maxcom.ml.org
+Michael Fischer v. Mollard mfvm@gmx.de
+Michael Roth              mroth@nessie.de
+Michael Sobolev           mss@despair.transas.com
+Michele Baldessari        michele@pupazzo.org
+Modestas Vainius          geromanas@mailas.com
+Neil Dunbar                neil.dunbar at pobox.com
+Neil Spring               nspring@cs.washington.edu
+Newton Hammet              newton@hammet.net
+Nicolas Graner            Nicolas.Graner@cri.u-psud.fr
+NIIBE Yutaka              gniibe@chroot.org
+Niklas Hernaeus
+Nikolay Sturm             sturm@sec.informatik.tu-darmstadt.de
+Nikos Mavroyanopoulos      nmav@hellug.gr
+Nimrod Zimerman           zimerman@forfree.at
+N J Doye                  nic@niss.ac.uk
+Oliver Haakert            haakert@hsp.de
+Oskari Jääskeläinen       f33003a@cc.hut.fi
+Paul D. Smith             psmith@baynetworks.com
+Philippe Laliberte        arsphl@oeil.qc.ca
+Peter Gutmann             pgut001@cs.auckland.ac.nz
+QingLong                  qinglong@bolizm.ihep.su
+Rafael Ávila de Espíndola  rafael.espindola@gmail.com
+Rafaël Carré               funman@videolan.org
+Ralf Fassel                ralf@akutech.de
+Ralf Hildebrandt           Ralf.Hildebrandt@innominate.com
+Ralf Schneider             ralf@tapfere-schneiderleins.de
+Ralph Gillen              gillen@theochem.uni-duesseldorf.de
+Rami Lehti                 Rami.Lehti@finland.sun.com
+Randolph Chung             tausq@debian.org
+Randy                     mcclellr@oit.edu
+Rat                       ratinox@peorth.gweep.net
+Reinhard Wobst            R.Wobst@ifw-dresden.de
+Rémi Guyomarch            rguyom@mail.dotcom.fr
+Reuben Sumner             rasumner@wisdom.weizmann.ac.il
+Richard Outerbridge       outer@interlog.com
+Roddy Strachan            roddy@satlink.com.au
+Roland Rosenfeld          roland@spinnaker.rhein.de
+Ross Golder               rossigee@bigfoot.com
+Serge Munhoven            munhoven@mema.ucl.ac.be
+Sergi Blanch i Torné       sergi at calcurco cat
+Simon Josefsson            jas@extundo.com
+SL Baur                   steve@xemacs.org
+Stephan Austermuehle       au@hcsd.de
+Stephan Müller             smueller at atsec com
+Stephane Corthesy          stephane@sente.ch
+Stefan Karrmann           S.Karrmann@gmx.net
+Stefan Keller             dres@cs.tu-berlin.de
+Stefan Krüger              stadtkind2 at gmx de
+Steffen Ullrich           ccrlphr@xensei.com
+Steffen Zahn              zahn@berlin.snafu.de
+Steven Bakker             steven@icoe.att.com
+Susanne Schultz           schultz@hsp.de
+Sven Bjorn
+Szakats Istvan             szaki.ms@gmail.com
+Thiago Jung Bauermann     jungmann@cwb.matrix.com.br
+Thomas Roessler           roessler@guug.de
+Tom Holroyd                tomh@po.crl.go.jp
+Tom Spindler              dogcow@home.merit.edu
+Tom Zerucha               tzeruch@ceddec.com
+Tomas Fasth               tomas.fasth@twinspot.net
+Tommi Komulainen           Tommi.Komulainen@iki.fi
+Thomas Mikkelsen          tbm@image.dk
+Ulf Möller                3umoelle@informatik.uni-hamburg.de
+Umberto Salsi             salsi@icosaedro.it
+Uoti Urpala
+Urko Lusa                 ulusa@euskalnet.net
+Victor Stinner             haypo@inl.fr
+Walter Koch               koch@u32.de
+Werner Koch               wk@gnupg.org
+Wim Vandeputte            wim@kd85.com
+                          nbecker@hns.com
+
+
+ Copyright 1998, 1999, 2000, 2001, 2002, 2003,
+           2009, 2011 Free Software Foundation, Inc.
+
+ This file is free software; as a special exception the author gives
+ unlimited permission to copy and/or distribute it, with or without
+ modifications, as long as this notice is preserved.
+
+ This file is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
+ implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/grub-core/lib/libgcrypt/VERSION b/grub-core/lib/libgcrypt/VERSION
new file mode 100644
index 000000000..587c5f0c7
--- /dev/null
+++ b/grub-core/lib/libgcrypt/VERSION
@@ -0,0 +1 @@
+1.10.3
diff --git a/grub-core/lib/libgcrypt/cipher/ChangeLog 
b/grub-core/lib/libgcrypt/cipher/ChangeLog
deleted file mode 100644
index 1b3694f58..000000000
--- a/grub-core/lib/libgcrypt/cipher/ChangeLog
+++ /dev/null
@@ -1,3990 +0,0 @@
-2010-08-19  Werner Koch  <wk@g10code.com>
-
-       * cipher.c (gcry_cipher_open): Remove double release of the module.
-       Fixes bug#1263.
-
-2010-06-10  Jeff Johnson  <n3npq@mac.com>  (wk)
-
-       * ecc.c (ecc_generate_ext): Parse transient-key flag.
-       (generate_key): Add arg TRANSIENT_KEY and use it to set the random
-       level.
-
-2010-04-12  Brad Hards  <bradh@frogmouth.net>  (wk)
-
-       Spelling fixes.
-
-2010-03-26  Werner Koch  <wk@g10code.com>
-
-       * tiger.c (asn): Unfetter the old TIGER from an OID.
-       (TIGER_CONTEXT): Add field VARIANT.
-       (tiger_init): Factor code out to ...
-       (do_init): New.
-       (tiger1_init, tiger2_init): New.
-       (_gcry_digest_spec_tiger1, _gcry_digest_spec_tiger2): New.
-       * md.c (digest_table): Add TIGER1 and TIGER2 variants.
-
-2009-12-11  Werner Koch  <wk@g10code.com>
-
-       * sha256.c (Cho, Maj, Sum0, Sum1): Turn macros into inline
-       functions.
-       (transform): Partly unroll to interweave the chain variables
-
-       * sha512.c (ROTR, Ch, Maj, Sum0, Sum1): Turn macros into inline
-       functions.
-       (transform): Partly unroll to interweave the chain variables.
-       Suggested by Christian Grothoff.
-
-2009-12-10  Werner Koch  <wk@g10code.com>
-
-       * Makefile.am (o_flag_munging): New.
-       (tiger.o, tiger.lo): Use it.
-
-       * cipher.c (do_ctr_encrypt): Add arg OUTBUFLEN.  Check for
-       suitable value.  Add check for valid inputlen.  Wipe temporary
-       memory.
-       (do_ctr_decrypt): Likewise.
-       (do_cbc_encrypt, do_cbc_decrypt): Add arg OUTBUFLEN.  Check for
-       suitable value.  Move check for valid inputlen to here; change
-       returned error from INV_ARG to INV_LENGTH.
-       (do_ecb_encrypt, do_ecb_decrypt): Ditto.
-       (do_cfb_encrypt, do_cfb_decrypt): Ditto.
-       (do_ofb_encrypt, do_ofb_decrypt): Ditto.
-       (cipher_encrypt, cipher_encrypt): Adjust for above changes.
-       (gcry_cipher_encrypt, gcry_cipher_decrypt): Simplify.
-
-2009-12-09  Werner Koch  <wk@g10code.com>
-
-       * cipher.c (gcry_cipher_open): Allow for GCRY_CIPHER_MODE_AESWRAP.
-       (cipher_encrypt, cipher_decrypt): Ditto.
-       (do_aeswrap_encrypt, do_aeswrap_decrypt): New.
-       (struct gcry_cipher_handle): Add field marks.
-       (cipher_setkey, cipher_setiv): Update marks flags.
-       (cipher_reset): Reset marks.
-       (cipher_encrypt, cipher_decrypt): Add new arg OUTBUFLEN.
-       (gcry_cipher_encrypt, gcry_cipher_decrypt): Pass outbuflen to
-       cipher_encrypt.  Replace GPG_ERR_TOO_SHORT by
-       GPG_ERR_BUFFER_TOO_SHORT.
-
-2009-08-21  Werner Koch  <wk@g10code.com>
-
-       * dsa.c (dsa_generate_ext): Release retfactors array before
-       setting it to NULL.  Reported by Daiko Ueno.
-
-2009-07-02  Werner Koch  <wk@g10code.com>
-
-       * md.c (md_read): Fix incomplete check for NULL.
-       Reported by Fabian Kail.
-
-2009-03-31  Werner Koch  <wk@g10code.com>
-
-       * rsa.c (rsa_check_secret_key): Return GPG_ERR_BAD_SECKEY and not
-       GPG_ERR_PUBKEY_ALGO.
-
-2009-02-16  Werner Koch  <wk@g10code.com>
-
-       * rsa.c (generate_x931): Do not initialize TBL with automatic
-       variables.
-       * whirlpool.c, tiger.c, sha256.c, sha1.c, rmd160.c, md5.c
-       * md4.c, crc.c: Remove memory.h.  This is garbage from gnupg.
-       Reported by Dan Fandrich.
-
-2009-01-22  Werner Koch  <wk@g10code.com>
-
-       * ecc.c (compute_keygrip): Remove superfluous const.
-
-2009-01-06  Werner Koch  <wk@g10code.com>
-
-       * rmd160.c (oid_spec_rmd160): Add TeleTrust identifier.
-
-2008-12-10  Werner Koch  <wk@g10code.com>
-
-       * dsa.c (generate): Add arg DOMAIN and use it if specified.
-       (generate_fips186): Ditto.
-       (dsa_generate_ext): Parse and check the optional "domain"
-       parameter and pass them to the generate functions.
-
-       * rijndael.c (rijndael_names): Add "AES128" and "AES-128".
-       (rijndael192_names): Add "AES-192".
-       (rijndael256_names): Add "AES-256".
-
-2008-12-05  Werner Koch  <wk@g10code.com>
-
-       * dsa.c (generate): Add arg TRANSIENT_KEY and use it to detrmine
-       the RNG quality needed.
-       (dsa_generate_ext): Parse the transient-key flag und pass it to
-       generate.
-
-2008-11-28  Werner Koch  <wk@g10code.com>
-
-       * dsa.c (generate_fips186): Add arg DERIVEPARMS and use the seed
-       value if available.
-
-       * primegen.c (_gcry_generate_fips186_2_prime): Fix inner p loop.
-
-2008-11-26  Werner Koch  <wk@g10code.com>
-
-       * primegen.c (_gcry_generate_fips186_3_prime): New.
-       * dsa.c (generate_fips186): Add arg USE_FIPS186_2.
-       (dsa_generate_ext): Parse new flag use-fips183-2.
-
-2008-11-25  Werner Koch  <wk@g10code.com>
-
-       * dsa.c (generate_fips186): New.
-       (dsa_generate_ext): Use new function if derive-parms are given or
-       if in FIPS mode.
-       * primegen.c (_gcry_generate_fips186_2_prime): New.
-
-2008-11-24  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (gcry_pk_genkey): Insert code to output extrainfo.
-       (pubkey_generate): Add arg R_EXTRAINFO and pass it to the extended
-       key generation function.
-       * rsa.c (gen_x931_parm_xp, gen_x931_parm_xi): New.
-       (generate_x931): Generate params if not given.
-       (rsa_generate_ext): Parse use-x931 flag.  Return p-q-swapped
-       indicator.
-       * dsa.c (dsa_generate_ext): Put RETFACTORS into R_EXTRAINFO if
-       possible.
-
-       * pubkey.c (gcry_pk_genkey): Remove parsing of almost all
-       parameters and pass the parameter S-expression to pubkey_generate.
-       (pubkey_generate): Simplify by requitring modules to parse the
-       parameters. Remove the special cases for Elgamal and ECC.
-       (sexp_elements_extract_ecc): Add arg EXTRASPEC and use it.  Fix
-       small memory leak.
-       (sexp_to_key): Pass EXTRASPEC to sexp_elements_extract_ecc.
-       (pubkey_table) [USE_ELGAMAL]: Add real extraspec.
-       * rsa.c (rsa_generate_ext): Adjust for new calling convention.
-       * dsa.c (dsa_generate_ext): Ditto.
-       * elgamal.c (_gcry_elg_generate): Ditto. Rename to elg_generate_ext.
-       (elg_generate): New.
-       (_gcry_elg_generate_using_x): Remove after merging code with
-       elg_generate_ext.
-       (_gcry_pubkey_extraspec_elg): New.
-       (_gcry_elg_check_secret_key, _gcry_elg_encrypt, _gcry_elg_sign) 
-       (_gcry_elg_verify, _gcry_elg_get_nbits): Make static and remove
-       _gcry_ prefix.
-       * ecc.c (_gcry_ecc_generate): Rename to ecc_generate_ext and
-       adjust for new calling convention.
-       (_gcry_ecc_get_param): Rename to ecc_get_param and make static.
-       (_gcry_pubkey_extraspec_ecdsa): Add ecc_generate_ext and
-       ecc_get_param.
-       
-2008-11-20  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (pubkey_generate): Add arg DERIVEPARMS.
-       (gcry_pk_genkey): Parse derive-parms and pass it to above.
-       * rsa.c (generate_x931): New.
-       (rsa_generate_ext): Add arg DERIVEPARMS and call new function in
-       fips mode or if DERIVEPARMS is given.
-       * primegen.c (_gcry_derive_x931_prime, find_x931_prime): New.
-
-2008-11-19  Werner Koch  <wk@g10code.com>
-
-       * rsa.c (rsa_decrypt): Use gcry_create_nonce for blinding.
-       (generate): Rename to generate_std.
-
-2008-11-05  Werner Koch  <wk@g10code.com>
-
-       * md.c (md_open): Use a switch to set the Bsize.
-       (prepare_macpads): Fix long key case for SHA384 and SHA512.
-
-       * cipher.c (gcry_cipher_handle): Add field EXTRASPEC.
-       (gcry_cipher_open): Set it.
-       (gcry_cipher_ctl): Add private control code to disable weak key
-       detection and to return the current input block.
-       * des.c (_tripledes_ctx): Add field FLAGS.
-       (do_tripledes_set_extra_info): New.
-       (_gcry_cipher_extraspec_tripledes): Add new function.
-       (do_tripledes_setkey): Disable weak key detection.
-
-2008-10-24  Werner Koch  <wk@g10code.com>
-
-       * md.c (digest_table): Allow MD5 in fips mode.
-       (md_register_default): Take special action for MD5.
-       (md_enable, gcry_md_hash_buffer): Ditto.
-
-2008-09-30  Werner Koch  <wk@g10code.com>
-
-       * rijndael.c (do_setkey): Properly align "t" and "tk".
-       (prepare_decryption): Properly align "w".  Fixes bug #936.
-
-2008-09-18  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (gcry_pk_genkey): Parse domain parameter.
-       (pubkey_generate): Add new arg DOMAIN and remove special case for
-       DSA with qbits.
-       * rsa.c (rsa_generate): Add dummy args QBITS, NAME and DOMAIN and
-       rename to rsa_generate_ext.  Change caller.
-       (_gcry_rsa_generate, _gcry_rsa_check_secret_key) 
-       (_gcry_rsa_encrypt, _gcry_rsa_decrypt, _gcry_rsa_sign) 
-       (_gcry_rsa_verify, _gcry_rsa_get_nbits): Make static and remove
-       _gcry_ prefix.
-       (_gcry_pubkey_spec_rsa, _gcry_pubkey_extraspec_rsa): Adjust names.
-       * dsa.c (dsa_generate_ext): New.
-       (_gcry_dsa_generate): Replace code by a call to dsa_generate.
-       (_gcry_dsa_check_secret_key, _gcry_dsa_sign, _gcry_dsa_verify)
-       (_gcry_dsa_get_nbits): Make static and remove _gcry prefix.
-       (_gcry_dsa_generate2): Remove.
-       (_gcry_pubkey_spec_dsa): Adjust to name changes.
-       (_gcry_pubkey_extraspec_rsa): Add dsa_generate_ext.
-
-2008-09-16  Werner Koch  <wk@g10code.com>
-
-       * ecc.c (run_selftests): Add arg EXTENDED.
-
-2008-09-12  Werner Koch  <wk@g10code.com>
-
-       * rsa.c (test_keys): Do a bad case signature check.
-       * dsa.c (test_keys): Do a bad case check.
-
-       * cipher.c (_gcry_cipher_selftest): Add arg EXTENDED and pass it
-       to the called tests.
-       * md.c (_gcry_md_selftest): Ditto.
-       * pubkey.c (_gcry_pk_selftest): Ditto.
-       * rijndael.c (run_selftests): Add arg EXTENDED and pass it to the
-       called tests.
-       (selftest_fips_128): Add arg EXTENDED and run only one test
-       non-extended mode.
-       (selftest_fips_192): Add dummy arg EXTENDED.
-       (selftest_fips_256): Ditto.
-       * hmac-tests.c (_gcry_hmac_selftest): Ditto.
-       (run_selftests): Ditto.
-       (selftests_sha1): Add arg EXTENDED and run only one test
-       non-extended mode.
-       (selftests_sha224, selftests_sha256): Ditto.
-       (selftests_sha384, selftests_sha512): Ditto.
-       * sha1.c (run_selftests): Add arg EXTENDED and pass it to the
-       called test.
-       (selftests_sha1): Add arg EXTENDED and run only one test
-       non-extended mode.
-       * sha256.c (run_selftests): Add arg EXTENDED and pass it to the
-       called tests.
-       (selftests_sha224): Add arg EXTENDED and run only one test
-       non-extended mode.
-       (selftests_sha256): Ditto.
-       * sha512.c (run_selftests): Add arg EXTENDED and pass it to the
-       called tests.
-       (selftests_sha384): Add arg EXTENDED and run only one test
-       non-extended mode.
-       (selftests_sha512): Ditto.
-       * des.c (run_selftests): Add arg EXTENDED and pass it to the
-       called test.
-       (selftest_fips): Add dummy arg EXTENDED.
-       * rsa.c (run_selftests): Add dummy arg EXTENDED.
-
-       * dsa.c (run_selftests): Add dummy arg EXTENDED.
-
-       * rsa.c (extract_a_from_sexp): New.
-       (selftest_encr_1024): Check that the ciphertext does not match the
-       plaintext.
-       (test_keys): Improve tests and return an error status.
-       (generate): Return an error if test_keys fails.
-       * dsa.c (test_keys): Add comments and return an error status.
-       (generate): Return an error if test_keys failed.
-
-2008-09-11  Werner Koch  <wk@g10code.com>
-
-       * rsa.c (_gcry_rsa_decrypt): Return an error instead of calling
-       BUG in case of a practically impossible condition.
-       (sample_secret_key, sample_public_key): New.
-       (selftest_sign_1024, selftest_encr_1024): New.
-       (selftests_rsa): Implement tests.
-       * dsa.c (sample_secret_key, sample_public_key): New.
-       (selftest_sign_1024): New.
-       (selftests_dsa): Implement tests.
-
-2008-09-09  Werner Koch  <wk@g10code.com>
-
-       * hmac-tests.c (selftests_sha1): Add tests.
-       (selftests_sha224, selftests_sha384, selftests_sha512): Make up tests.
-
-       * hash-common.c, hash-common.h: New.
-       * sha1.c (selftests_sha1): Add 3 tests.
-       * sha256.c (selftests_sha256, selftests_sha224): Ditto.
-       * sha512.c (selftests_sha512, selftests_sha384): Ditto.
-
-2008-08-29  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (gcry_pk_get_keygrip): Remove the special case for RSA
-       and check whether a custom computation function has been setup.
-       * rsa.c (compute_keygrip): New.
-       (_gcry_pubkey_extraspec_rsa): Setup this function.
-       * ecc.c (compute_keygrip): New.
-       (_gcry_pubkey_extraspec_ecdsa): Setup this function.
-
-2008-08-28  Werner Koch  <wk@g10code.com>
-
-       * cipher.c (cipher_decrypt, cipher_encrypt): Return an error if
-       mode NONE is used.
-       (gcry_cipher_open): Allow mode NONE only with a debug flag set and
-       if not in FIPS mode.
-
-2008-08-26  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (pubkey_generate): Add arg KEYGEN_FLAGS.
-       (gcry_pk_genkey): Implement new parameter "transient-key" and
-       pass it as flags to pubkey_generate.
-       (pubkey_generate): Make use of an ext_generate function.
-       * rsa.c (generate): Add new arg transient_key and pass appropriate
-       args to the prime generator.
-       (_gcry_rsa_generate): Factor all code out to ...
-       (rsa_generate): .. new func with extra arg KEYGEN_FLAGS.
-       (_gcry_pubkey_extraspec_ecdsa): Setup rsa_generate.
-       * primegen.c (_gcry_generate_secret_prime) 
-       (_gcry_generate_public_prime): Add new arg RANDOM_LEVEL.
-
-2008-08-21  Werner Koch  <wk@g10code.com>
-
-       * primegen.c (_gcry_generate_secret_prime)
-       (_gcry_generate_public_prime): Use a constant macro for the random
-       level.
-       
-2008-08-19  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (sexp_elements_extract_ecc) [!USE_ECC]: Do not allow
-       allow "curve" parameter.
-
-2008-08-15  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (_gcry_pk_selftest): New.
-       * dsa.c (selftests_dsa, run_selftests): New.
-       * rsa.c (selftests_rsa, run_selftests): New.
-       * ecc.c (selftests_ecdsa, run_selftests): New.
-
-       * md.c (_gcry_md_selftest): New.
-       * sha1.c (run_selftests, selftests_sha1): New.
-       * sha256.c (selftests_sha224, selftests_sha256, run_selftests): New.
-       * sha512.c (selftests_sha384, selftests_sha512, run_selftests): New.
-
-       * des.c (selftest): Remove static variable form selftest.
-       (des_setkey): No on-the-fly self test in fips mode.
-       (tripledes_set3keys): Ditto.
-
-       * cipher.c (_gcry_cipher_setkey, _gcry_cipher_setiv): 
-
-       * dsa.c (generate): Bail out in fips mode if NBITS is less than 1024.
-       * rsa.c (generate): Return an error code if the the requested size
-       is less than 1024 and we are in fpis mode.
-       (_gcry_rsa_generate): Take care of that error code.
-
-       * ecc.c (generate_curve): In fips mode enable only NIST curves.
-
-       * cipher.c (_gcry_cipher_selftest): New.
-
-       * sha512.c (_gcry_digest_extraspec_sha384)
-       (_gcry_digest_extraspec_sha512): New.
-       * sha256.c (_gcry_digest_extraspec_sha224)
-       (_gcry_digest_extraspec_sha256): New.
-       * sha1.c (_gcry_digest_extraspec_sha1): New.
-       * ecc.c (_gcry_pubkey_extraspec_ecdsa): New.
-       * dsa.c (_gcry_pubkey_extraspec_dsa): New.
-       * rsa.c (_gcry_pubkey_extraspec_rsa): New.
-       * rijndael.c (_gcry_cipher_extraspec_aes)
-       (_gcry_cipher_extraspec_aes192, _gcry_cipher_extraspec_aes256): New.
-       * des.c (_gcry_cipher_extraspec_tripledes): New.
-
-       * cipher.c (gcry_cipher_register): Rename to _gcry_cipher_register.
-       Add arg EXTRASPEC.
-       (dummy_extra_spec): New.
-       (cipher_table_entry): Add extraspec field.
-       * md.c (_gcry_md_register): Rename to _gcry_md_register.  Add
-       arg EXTRASPEC.
-       (dummy_extra_spec): New.
-       (digest_table_entry): Add extraspec field.
-       * pubkey.c (gcry_pk_register): Rename to _gcry_pk_register.  Add
-       arg EXTRASPEC.
-       (dummy_extra_spec): New.
-       (pubkey_table_entry): Add extraspec field.
-
-       * ac.c: Let most public functions return GPG_ERR_UNSUPPORTED in
-       fips mode.
-
-       * pubkey.c (pubkey_table_entry): Add field FIPS_ALLOWED and mark
-       appropriate algorithms.
-       (dummy_generate, dummy_check_secret_key, dummy_encrypt) 
-       (dummy_decrypt, dummy_sign, dummy_verify, dummy_get_nbits): Signal
-       a fips error when used.
-       (gcry_pk_register): In fips mode do not allow to register new
-       algorithms.
-
-       * md.c (digest_table): Add field FIPS_ALLOWED and mark appropriate
-       algorithms.
-       (md_register_default): In fips mode register only fips algorithms.
-       (gcry_md_register): In fips mode do not allow to register new
-       algorithms.
-       (gcry_md_get): Signal a fips error if called.
-       (gcry_md_hash_buffer): Do not allow rmd160 when not in fips mode.
-       (md_start_debug): Disable in fips_mode.
-
-       * md.c (gcry_md_register_default): Rename to ..
-       (md_register_default): .. this.
-       (md_digest): Remove this commented fucntion.
-       * pubkey.c (gcry_pk_register_default): Rename to ..
-       (pk_register_default): .. this.
-
-       * cipher.c (cipher_table_entry): Add field FIPS_ALLOWED.
-       (gcry_cipher_register_default): Register only fips approved
-       algorithms.
-       (gcry_cipher_register): Do not allow to register new ciphers.
-       (cipher_setiv): Signal fips error.
-       
-       * cipher (gcry_cipher_register_default): Rename to ..
-       (cipher_register_default): .. this.
-       (REGISTER_DEFAULT_CIPHERS): Adjust for that change.
-
-2008-07-05  Werner Koch  <wk@g10code.com>
-
-       * random-daemon.c, random.h, rndhw.c, rndunix.c, rand-internal.h
-       * random.c, rndegd.c, rndlinux.c, rndw32.c: Move to ../cipher.
-       * Makefile.am: Remove random stuff.
-
-2008-06-24  Szakats Istvan <szaki.ms@gmail.com>  (wk)
-
-       * ac.c (_gcry_ac_key_destroy, _gcry_ac_key_pair_generate): Relase
-       some more memory.
-
-2008-04-22  Werner Koch  <wk@g10code.com>
-
-       * rijndael.c (_gcry_aes_cfb_enc, _gcry_aes_cbc_enc) 
-       (_gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use Padlock if possible.
-
-2008-04-18  Werner Koch  <wk@g10code.com>
-
-       * sha1.c (transform_aligned): Remove.  That is will obviosuly not
-       work because we need a scratch working area and our internal API
-       does not allow to modify the buffers.
-
-       * rijndael.c: Factor tables out to ..
-       * rijndael-tables.h: .. new.
-
-       * ac.c (ac_data_extract): Make static.
-
-       * camellia.h [HAVE_CONFIG_H]: Include config.h.
-
-       * rndw32.c (registry_poll): Only print the performance data
-       problem warning once.  Suggested by Simon Josefsson.
-
-2008-03-19  Werner Koch  <wk@g10code.com>
-
-       * cipher.c (gcry_cipher_open) [USE_AES]: Init bulk encryption only
-       if requested.  Suggested by Dirk Stoecker.
-
-2008-03-18  Werner Koch  <wk@g10code.com>
-
-       * sha1.c: Include stdint.h.
-       (transform): Add arg NBLOCKS so that we can work on more than one
-       block and avoid updates of the chaining variables.  Changed all
-       callers to use 1.
-       (sha1_write): Replace loop around transform.
-       (transform_aligned) [WORDS_BIGENDIAN]: New.
-       (TRANSFORM): New macro to replace all direct calls of transform.
-
-2008-03-17  Werner Koch  <wk@g10code.com>
-
-       * rijndael.c (_gcry_aes_cfb_dec): New.
-       (do_encrypt): Factor code out to ..
-       (do_encrypt_aligned): .. New.
-       (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec): Use new function.
-       (do_decrypt): Factor code out to ..
-       (do_decrypt_aligned): .. new.
-       (_gcry_aes_cbc_enc, _gcry_aes_cbc_dec): New.
-       * cipher.c (struct gcry_cipher_handle): Put field IV into new
-       union U_IV to enforce proper alignment.  Change all users.
-       (do_cfb_decrypt): Optimize.
-       (do_cbc_encrypt, do_cbc_decrypt): Optimize.
-
-2008-03-15  Werner Koch  <wk@g10code.com>
-
-       * rijndael.c (_gcry_aes_cfb_enc): New.
-       * cipher.c (struct gcry_cipher_handle): Add field ALGO and BULK.
-       (gcry_cipher_open): Set ALGO and BULK.
-       (do_cfb_encrypt): Optimize.
-
-2008-02-18  Werner Koch  <wk@g10code.com>
-
-       * rsa.c (_gcry_rsa_verify) [IS_DEVELOPMENT_VERSION]: Print
-       intermediate results.
-
-2008-01-08  Werner Koch  <wk@g10code.com>
-
-       * random.c (add_randomness): Do not just increment
-       POOL_FILLED_COUNTER but update it by the actual amount of data.
-
-2007-12-13  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (sexp_data_to_mpi): Support SHA-224.
-
-2007-12-05  Werner Koch  <wk@g10code.com>
-
-       * rijndael.c (USE_PADLOCK): Depend on ENABLE_PADLOCK_SUPPORT.
-       * rndhw.c (USE_PADLOCK): Ditto
-
-       * rsa.c (secret): Fixed condition test for using CRT.  Reported by
-       Dean Scarff.  Fixes bug#864.
-       (_gcry_rsa_check_secret_key): Return an erro if the optional
-       parameters are missing.
-       * pubkey.c (sexp_elements_extract): Add arg ALGO_NAME. Changed all
-       callers to pass NULL. Add hack to allow for optional RSA
-       parameters.
-       (sexp_to_key): Pass algo name to sexp_elements_extract.
-
-2007-12-03  Werner Koch  <wk@g10code.com>
-
-       * random.c (gcry_random_add_bytes): Implement it.
-       * rand-internal.h (RANDOM_ORIGIN_EXTERNAL): New.
-
-2007-11-30  Werner Koch  <wk@g10code.com>
-
-       * rndhw.c: New.
-       * rndlinux.c (_gcry_rndlinux_gather_random): Try to read 50%
-       directly from the hwrng.
-       * random.c (do_fast_random_poll): Also run the hw rng fast poll.
-       (_gcry_random_dump_stats): Tell whether the hw rng failed.
-
-2007-11-29  Werner Koch  <wk@g10code.com>
-
-       * rijndael.c (USE_PADLOCK): Define new macro used for ia32.
-       (RIJNDAEL_context) [USE_PADLOCK]: Add fields USE_PADLOCK and
-       PADLOCK_KEY.
-       (do_setkey) [USE_PADLOCK]: Enable padlock if available for 128 bit
-       AES.
-       (do_padlock) [USE_PADLOCK]: New.
-       (rijndael_encrypt, rijndael_decrypt) [USE_PADLOCK]: Divert to
-       do_padlock.
-       * cipher.c (cipher_context_alignment_t): New.  Use it in this
-       module in place of PROPERLY_ALIGNED_TYPE.
-       (NEED_16BYTE_ALIGNED_CONTEXT): Define macro for ia32.
-       (struct gcry_cipher_handle): Add field HANDLE_OFFSET.
-       (gcry_cipher_open): Take care of increased alignment requirements.
-       (gcry_cipher_close): Ditto.
-
-2007-11-28  Werner Koch  <wk@g10code.com>
-
-       * sha256.c (asn224): Fixed wrong template.  It happened due to a
-       bug in RFC4880.  SHA-224 is not in the stable version of libgcrypt
-       so the consequences are limited to users of this devel version.
-
-2007-10-31  Werner Koch  <wk@g10code.com>
-
-       * ac.c (gcry_ac_data_new): Remove due to the visibility wrapper.
-       (gcry_ac_data_destroy, gcry_ac_data_copy, gcry_ac_data_length) 
-       (gcry_ac_data_set, gcry_ac_data_get_name, gcry_ac_data_get_index) 
-       (gcry_ac_data_to_sexp, gcry_ac_data_from_sexp) 
-       (gcry_ac_data_clear, gcry_ac_io_init, gcry_ac_open) 
-       (gcry_ac_close, gcry_ac_key_init, gcry_ac_key_pair_generate) 
-       (gcry_ac_key_pair_extract, gcry_ac_key_destroy) 
-       (gcry_ac_key_pair_destroy, gcry_ac_key_data_get) 
-       (gcry_ac_key_test, gcry_ac_key_get_nbits, gcry_ac_key_get_grip) 
-       (gcry_ac_data_encrypt, gcry_ac_data_decrypt, gcry_ac_data_sign) 
-       (gcry_ac_data_verify, gcry_ac_data_encode, gcry_ac_data_decode) 
-       (gcry_ac_mpi_to_os, gcry_ac_mpi_to_os_alloc, gcry_ac_os_to_mpi) 
-       (gcry_ac_data_encrypt_scheme, gcry_ac_data_decrypt_scheme) 
-       (gcry_ac_data_sign_scheme, gcry_ac_data_verify_scheme) 
-       (gcry_ac_io_init_va): Ditto.
-       (gcry_ac_id_to_name, gcry_ac_name_to_id): Remove as these
-       deprecated functions are now implemented by visibility.c.
-
-2007-10-26  Werner Koch  <wk@g10code.com>
-
-       * rndw32.c: Disable debug flag.
-
-2007-10-25  Werner Koch  <wk@g10code.com>
-
-       * rndw32.c: Updated from current cryptlib snapshot and modified
-       for our use.  Removed support from pre NT systems.
-       (slow_gatherer_windows95): Remove.
-       (_gcry_rndw32_gather_random): Require an NT platform.
-       (init_system_rng, read_system_rng, read_mbm_data): New.
-       (slow_gatherer_windowsNT): Rename to ...
-       (slow_gatherer): .. this.  Read system RNG and MBM.
-       (registry_poll): New with code factored out from slow_gatherer.
-
-2007-08-23  Werner Koch  <wk@g10code.com>
-
-       * random.c (pool_filled_counter): New.
-       (add_randomness): Use it.
-
-2007-08-22  Werner Koch  <wk@g10code.com>
-
-       * rndw32.c, rndunix.c: Switched to LGPL.
-
-2007-05-30  Werner Koch  <wk@g10code.com>
-
-       * camellia.h, camellia.c: Replace by new LGPL version and adjusted
-       camellia.h.
-
-2007-05-09  Marcus Brinkmann  <marcus@g10code.de>
-
-       * ac.c (_gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read):
-       Adjust users of gcry_ac_io_t because union is not anonymous
-       anymore.
-
-2007-05-02  Werner Koch  <wk@g10code.com>
-
-       * camellia-glue.c (camellia_setkey, camellia_encrypt)
-       (camellia_decrypt): Recalculated used stack size in called
-       functions.
-       * camellia.h: Redefine external symbols.
-
-2007-05-02  David Shaw  <dshaw@jabberwocky.com>
-
-       * Makefile.am, cipher.c: Add Camellia.
-
-       * camellia-glue.c: New.  The necessary glue to interface libgcrypt
-       to the stock NTT Camellia distribution.
-
-       * camellia.h, camellia.c: The stock NTT Camellia distribution
-       (GPL).
-
-2007-04-30  David Shaw  <dshaw@jabberwocky.com>
-
-       * cipher.c: Use #if instead of #ifdef as configure defines the
-       USE_cipher defines as 0 for disabled.
-
-2007-04-30  Werner Koch  <wk@g10code.com>
-
-       * rndegd.c (_gcry_rndegd_set_socket_name): New.
-
-2007-04-30  Marcus Brinkmann  <marcus@g10code.de>
-
-       * ecc.c (ec2os): Fix relocation of short numbers.
-
-       * ecc.c (generate_key): Do not allocate D, which will be allocated
-       by GEN_K.  Remove G.  Fix test if g_x, g_y resp. q_x, q_y are
-       requested.
-       (_gcry_ecc_generate): Release unneeded members of SK.
-       * pubkey.c (sexp_to_key): Release NAME.
-
-2007-04-28  Marcus Brinkmann  <marcus@g10code.de>
-
-       * ac.c (gcry_ac_mpi): Remove member NAME_PROVIDED.
-       (ac_data_mpi_copy, _gcry_ac_data_set, _gcry_ac_data_get_name)
-       (_gcry_ac_data_get_index, ac_data_construct): Adjust handling of
-       NAME accordingly.
-
-2007-04-20  Werner Koch  <wk@g10code.com>
-
-       * ecc.c (domain_parms): Add standard brainpool curves.
-
-2007-04-18  Werner Koch  <wk@g10code.com>
-
-       * ecc.c (generate_curve): Implement alias mechanism.
-
-       * pubkey.c (sexp_elements_extract_ecc): New.
-       (sexp_to_key): Add special case for ecc.
-       (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_genkey): Replace
-       name_terminated stuff by a call to _gcry_sexp_nth_string.
-       (gcry_pk_get_keygrip): Ditto.
-
-2007-04-16  Werner Koch  <wk@g10code.com>
-
-       * ecc.c (_gcry_ecc_generate): Renamed DUMMY to CURVE and use it.
-
-2007-04-13  Marcus Brinkmann  <marcus@g10code.de>
-
-       * ac.c (ac_data_construct): Cast const away to suppress compiler
-       warning.
-
-       * ecc.c (ecc_generate): Avoid compiler warning for unused argument
-       DUMMY.
-       (ecc_verify): Avoid compiler warning for unused arguments CMP and
-       OPAQUEV.
-
-2007-04-06  Werner Koch  <wk@g10code.com>
-
-       * sha1.c (oid_spec_sha1): Add another oid from X9.62.
-
-2007-03-28  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (gcry_pk_genkey): Do not issue misc-key-info if it is
-       empty.
-       (gcry_pk_genkey): New parameter "curve".
-
-       * ecc.c: Entirely rewritten with only a few traces of the old
-       code left.
-       (_gcry_ecc_generate): New.
-       (generate_key) New arg NAME.
-       (generate_curve): Ditto.  Return actual number of NBITS.
-
-2007-03-26  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (gcry_pk_genkey): Increase size of SKEY array and add a
-       runtime bounds check.
-
-2007-03-23  Werner Koch  <wk@g10code.com>
-
-       * ecc.c (ecc_ctx_init, ecc_ctx_free, ecc_mod, ecc_mulm): New.
-       (duplicate_point, sum_points, escalar_mult): Don't use a
-       copy of base->p.  Replaced all mpi_mulm by ecc_mulm so that we can
-       experiment with different algorithms.
-       (generate_key, check_secret_key, sign, verify): Initialize a
-       computation context for use by ecc_mulm.
-
-2007-03-22  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (pubkey_table): Initialize ECC.
-       * Makefile.am (EXTRA_libcipher_la_SOURCES): Add ecc.c.
-       * ecc.c: New. Heavily reformatted and changed for use in libgcrypt.
-       (point_init): New.
-       (escalar_mult): Make arg R the first arg to be similar to the mpi
-       functions.
-       (duplicate_point): Ditto
-       (sum_points): Ditto
-       (sign, verify): Remove unneeded copy operations.
-       (sum_points): Removed memory leaks and optimized some compares.
-       (verify): Simplified input check.
-
-2007-03-14  Werner Koch  <wk@g10code.com>
-
-       * random.c (MASK_LEVEL): Removed macro as it was used only at one
-       place.  Open coded it there.
-       (gcry_randomize, _gcry_update_random_seed_file)
-       (_gcry_fast_random_poll): Factor lock code out to ..
-       (lock_pool, unlock_pool): .. new.
-       (initialize): Look the pool while allocating. 
-       (read_random_source, do_fast_random_poll): Moved intialization to ...
-       (initialize): .. here.
-       (_gcry_enable_quick_random_gen): No more need for initialization.
-       (is_initialized):  Moved this global flag to ..
-       (initialize): .. here and changed all users to unconditionally call
-       initialize.
-       (add_randomness): Remove initalization here.  It simply can't
-       happen. 
-
-       * random.c (enum random_origins): Moved to ..
-       * rand-internal.h: .. here.
-       * rndunix.c (_gcry_rndunix_gather_random): Use enum in prototype
-       for ORIGIN and renamed REQUESTOR to ORIGIN.
-       * rndegd.c (_gcry_rndegd_gather_random): Ditto.
-       * rndlinux.c (_gcry_rndlinux_gather_random): Ditto.
-       * rndw32.c (_gcry_rndw32_gather_random): Ditto.
-       (_gcry_rndw32_gather_random_fast): Ditto.
-
-2007-03-13  Werner Koch  <wk@g10code.com>
-
-       * random.c (enum random_origins): New.
-       (add_randomness): Renamed arg SOURCE to ORIGIN.
-       (read_random_source): Renamed arg REQUESTOR to ORIGIN.
-       (getfnc_gather_random): Removed static variable because this
-       function is only called one and thus we don't need this
-       optimization.
-       (_gcry_quick_random_gen): Removed and replaced by..
-       (_gcry_enable_quick_random_gen): .. this.  It is onlyu used to
-       enable it and it does not make sense to disable it later. Changed
-       the only one caller too.
-       (get_random_bytes): Removed.
-       (gcry_random_bytes, gcry_random_bytes_secure): Implement in terms
-       of gcry_randomize.
-       * random-daemon.c (_gcry_daemon_get_random_bytes): Removed.
-
-2007-02-23  Werner Koch  <wk@g10code.com>
-
-       * elgamal.c (generate): Removed unused variable TEMP.
-       (test_keys): New arg NODIE.
-       (generate_using_x, _gcry_elg_generate_using_x): New.
-       * pubkey.c (pubkey_generate): New arg XVALUE and direct call to
-       the new elgamal generate fucntion.
-       (gcry_pk_genkey): Parse the new "xvalue" tag.
-
-2007-02-22  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (sexp_data_to_mpi): Handle dynamically allocated
-       algorithms.  Suggested by Neil Dunbar.  Fixes bug#596.
-
-       * rndw32.c (_gcry_rndw32_gather_random_fast): Make it return void.
-
-       * cipher.c (gcry_cipher_algo_name): Simplified.
-
-       * random.c: Use the daemon only if compiled with USE_RANDOM_DAEMON. 
-
-       * Makefile.am (libcipher_la_SOURCES): Build random-daemon support
-       only if requested.
-
-2007-02-21  Werner Koch  <wk@g10code.com>
-
-       * random.c (rndpool, keypool): Make unsigned.
-       (mix_pool): Change char* variables to unsigned char*.
-       (gcry_randomize): Make arg BUFFER a void*.
-       (gcry_create_nonce): Ditto.
-
-       * rmd160.c (gcry_rmd160_mixblock): Make BUFFER a void*.
-       (_gcry_rmd160_hash_buffer): Make OUTBUF and BUFFER void*.
-       * sha1.c (_gcry_sha1_hash_buffer): Ditto.
-
-       * cipher.c (gcry_cipher_encrypt, cry_cipher_decrypt): Change
-       buffer args to void*.
-       (gcry_cipher_register): Make ALGORITHM_ID a int *.
-
-       * md.c (md_start_debug): Make SUFFIX a const char*.  Use snprintf.
-       (gcry_md_debug): New.
-       (gcry_md_ctl): Changed arg BUFFER from unsigned char*.
-
-       * md.c (md_write): Make INBUF a const void*.
-       (gcry_md_write): Remove needless cast.
-       * crc.c (crc32_write): Make INBUF a const void*
-       (update_crc32, crc24rfc2440_write): Ditto.
-       * sha512.c (sha512_write, transform): Ditto.
-       * sha256.c (sha256_write, transform): Ditto.
-       * rmd160.c (rmd160_write, transform): Ditto.
-       * md5.c (md5_write, transform): Ditto.
-       * md4.c (md4_write, transform): Ditto.
-       * sha1.c (sha1_write, transform): Ditto.
-
-       * tiger.c (tiger_write, transform): Ditto.
-       * whirlpool.c (whirlpool_write, whirlpool_add, transform): Ditto.
-
-       * elgamal.c (elg_names): Change to a const*.
-       * dsa.c (dsa_names): Ditto.
-       * rsa.c (rsa_names): Ditto.
-       * pubkey.c (gcry_pk_lookup_func_name): Make ALIASES a const.
-
-2007-02-20  Werner Koch  <wk@g10code.com>
-
-       * rndlinux.c (open_device): Remove unsused arg MINOR.
-
-2007-01-30  Werner Koch  <wk@g10code.com>
-
-       * sha256.c (oid_spec_sha256): Add alias from pkcs#1.
-       * sha512.c (oid_spec_sha512): Ditto.
-       (oid_spec_sha384): Ditto.
-
-2006-12-18  Werner Koch  <wk@g10code.com>
-
-       * rndlinux.c (set_cloexec_flag): New.
-       (open_device): Set close-on-exit flags.  Suggested by Max
-       Kellermann.  Fixes Debian#403613.
-
-       * Makefile.am (AM_CPPFLAGS, AM_CFLAGS): Splitted and merged
-       Moritz' changes.
-       (INCLUDES): Removed.
-
-2006-11-30  Werner Koch  <wk@g10code.com>
-
-       * serpent.c (byte_swap_32): Remove trailing semicolon.
-
-2006-11-15  Werner Koch  <wk@g10code.com>
-
-       * Makefile.am (INCLUDES): Include ../src/
-
-2006-11-03  Werner Koch  <wk@g10code.com>
-
-       * random.c [HAVE_GETTIMEOFDAY]: Included sys/time.h and not
-       sys/times.h.  Reported by Rafaël Carré.
-
-2006-11-05  Moritz Schulte  <moritz@g10code.com>
-
-       * Makefile.am (AM_CFLAGS): Added -I$(top_builddir)/src so that the
-       new gcrypt.h is used, not the one installed in the system.
-
-2006-10-25  Werner Koch  <wk@g10code.com>
-
-       * primegen.c (prime_generate_internal): Tweaked use of secure
-       memory and entropy use. Safe unused primes from the pool. Allocate
-       at least a pool of 30.
-       (save_pool_prime, get_pool_prime): New.
-
-2006-10-23  Werner Koch  <wk@g10code.com>
-
-       * ac.c (_gcry_ac_data_from_sexp): Reset sexp_tmp for failsafe
-       means.  Release sexp_cur if needed.  Reported by Dirk Stoecker.
-
-       * pubkey.c (pubkeys_registered_lock): Intialized it.  It is not
-        realy needed because this is a mere initialization to 0 anyway.
-        Noted by Victor Stinner.
-
-2006-10-17  Werner Koch  <wk@g10code.com>
-
-       * dsa.c (_gcry_dsa_generate2): New.
-       (generate): New arg QBITS.  Add sanity checks for reasonable qbits
-       and nbits.
-       * pubkey.c (gcry_pk_genkey): Parse an qbits element.
-       (pubkey_generate): New arg QBITS.  Pass it to the DSA generation.
-
-2006-10-05  Werner Koch  <wk@g10code.com>
-
-       * md.c (gcry_md_algo_info) <get_asnoid>: Check that the algo is
-       available.
-
-2006-10-04  David Shaw  <dshaw@jabberwocky.com>  (wk)
- 
-       * tiger.c (round): Rename to tiger_round as gcc 4 has a built-in
-       round function that this conflicts with.
- 
-2006-09-11  Werner Koch  <wk@g10code.com>
-
-       * rndw32.c (slow_gatherer_windowsNT): While adding data use the
-       size of the diskPerformance and not its address. Has been fixed in
-       GnuPG more than a year ago.  Noted by Lee Fisher.
-
-2006-08-30  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (sexp_data_to_mpi): Need to allow "ripemd160" here as
-       this is the canonical name.
-
-2006-08-29  Hye-Shik Chang <perky@FreeBSD.org>  (wk)
-
-       * seed.c: New.
-
-2006-08-03  Werner Koch  <wk@g10code.com>
-
-       * random-daemon.c (_gcry_daemon_initialize_basics): Don't
-       initialize the socket.  Remove arg SOCKETNAME.
-       (connect_to_socket): Make sure that daemon is set to -1 on error.
-       (call_daemon): Initialize the socket on the first call.
-       (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes) 
-       (_gcry_daemon_create_nonce): New arg SOCKETNAME.
-       * random.c (initialize): Call new daemon initializator.
-       (get_random_bytes, gcry_randomize, gcry_create_nonce): Pass socket
-       name to daemon call and reset allow_daemon on failure.
-
-2006-07-26  Werner Koch  <wk@g10code.com>
-
-       * rmd160.c (_gcry_rmd160_mixblock): Add cast to transform call.
-
-       * blowfish.c (selftest): Cast string to usnigned char*.
-
-       * primegen.c (prime_generate_internal): Cast unsigned/char*
-       mismatch in calling m_out_of_n.
-       (is_prime): Changed COUNT to unsigned int *.
-
-       * ac.c (_gcry_ac_data_copy): Initialize DATA_MPIS.
-
-       * random.c (gcry_create_nonce): Update the pid after a fork.
-       Reported by Uoti Urpala.
-
-2006-07-04  Marcus Brinkmann  <marcus@g10code.de>
-
-       * sha512.c: Fix typo in copyright notice.
-
-2006-06-21  Werner Koch  <wk@g10code.com>
-
-       * rsa.c (_gcry_rsa_generate): Replace xcalloc by calloc.
-       * pubkey.c (gcry_pk_encrypt, gcry_pk_sign): Ditto.
-       (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_encrypt) 
-       (gcry_pk_sign, gcry_pk_genkey, gcry_pk_get_keygrip): Ditto. 
-       * md.c (md_copy): Ditto.
-       
-2006-04-22  Moritz Schulte  <moritz@g10code.com>
-
-       * random-daemon.c (_gcry_daemon_initialize_basics): New argument:
-       SOCKETNAME.  Passing on to connect_to_socket() if non-NULL.
-       (connect_to_socket, writen, readn, call_daemon): New functions.
-       (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes) 
-       (_gcry_daemon_create_nonce): Call call_daemon().
-       (RANDOM_DAEMON_SOCKET): New symbol.
-       (daemon_socket): New static variable.
-
-       * random.h (_gcry_daemon_initialize_basics): New parameter:
-       SOCKETNAME.
-       (_gcry_set_random_daemon_socket): New declaration.
-
-       * random.c (initialize_basics): Pass DAEMON_SOCKET_NAME to
-       _gcry_daemon_initialize_basics.
-       (_gcry_set_random_daemon_socket): New function, setting
-       DAEMON_SOCKET_NAME.
-
-2006-04-01  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (eme_pkcs_v1_5_encode): Use KEY_SIZE directly, no need to
-       call gcry_ac_key_get_nbits.
-       (eme_pkcs_v1_5_decode): Likewise.
-       (ac_es_dencode_prepare_pkcs_v1_5): Fill options_em structure with
-       key_size.
-       (_gcry_ac_data_dump, gcry_ac_data_dump): New functions.
-       (_gcry_ac_data_to_sexp, _gcry_ac_data_from_sexp): More or less
-       rewritten; changed S-Expression format so that it matches the one
-       used in pubkey.c.
-
-2006-03-15  Werner Koch  <wk@g10code.com>
-
-       * random-daemon.c: New.
-       * random.c (_gcry_use_random_daemon): New.
-       (get_random_bytes, gcry_randomize, gcry_create_nonce): Try
-       diverting to the daemon functions.
-
-2006-03-14  Werner Koch  <wk@g10code.com>
-
-       * random.c (lock_seed_file): New.
-       (read_seed_file, _gcry_update_random_seed_file): Use it.
-
-       * random.c (gcry_create_nonce):  Detect a fork and re-seed.
-       (read_pool): Fixed the fork detection; it used to work only for
-       multi-threaded processes.
-
-2006-03-12  Brad Hards  <bradh@frogmouth.net>  (wk)
-
-       * md.c (md_open): Use new variable macpads_Bsize instead of
-       hardwiring the block size.  Changed at all places.
-
-2006-03-10  Brad Hards  <bradh@frogmouth.net>  (wk, patch 2005-04-22)
-
-       * md.c, sha256.c:  Add support for SHA-224.
-       (sha224_init): New.
-       
-2006-01-18  Brad Hards  <bradh@frogmouth.net>  (wk 2006-03-07)
-
-       * cipher.c (cipher_encrypt, cipher_decrypt, do_ofb_encrypt)
-       (do_ofb_decrypt, gcry_cipher_open): Implement Output Feedback Mode.
-
-2005-11-02  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_algo_name): Return "?" instead of NULL for
-       unknown algorithm IDs.
-       * cipher.c (cipher_algo_to_string): Likewise.
-
-2005-11-01  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_algo_info): Don't forget to break after switch
-       case.
-
-2005-09-19  Werner Koch  <wk@g10code.com>
-
-       * dsa.c (generate): Add preliminary support for 2 and 4 keys.
-       Return an error code if the key size is not supported.
-       (_gcry_dsa_generate): Return an error.
-
-2005-08-22  Werner Koch  <wk@g10code.com>
-
-       * primegen.c (check_prime): New arg RM_ROUNDS.
-       (prime_generate_internal): Call it here with 5 rounds as used
-       before.
-       (gcry_prime_check): But here with 64 rounds.
-       (is_prime): Make sure never to use less than 5 rounds.
-
-2005-04-16  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (_gcry_ac_init): New function.
-
-2005-04-12  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (_gcry_ac_io_write, _gcry_ac_io_read): Initialize err to
-       make the compiler happy.
-       Always use errno, now that gcry_malloc() is guaranteed to set
-       errno on failure.
-       (_gcry_ac_data_to_sexp): Don't forget to goto out after error in
-       loop.
-       (_gcry_ac_data_to_sexp): Remove unused variable: mpi_list;
-       (_gcry_ac_data_to_sexp): Always deallocate sexp_buffer.
-       (_gcry_ac_data_from_sexp): Don't forget to initialize data_set_new.
-       (_gcry_ac_data_from_sexp): Handle special case, which is
-       necessary, since gcry_sexp_nth() does not distinguish between
-       "element does not exist" and "element is the empty list".
-       (_gcry_ac_io_init_va): Use assert to make sure that mode and type
-       are correct.
-       Use gcry_error_t types where gcry_err_code_t types have been used
-       before.
-
-2005-04-11  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (_gcry_ac_data_sign_scheme): Don't forget to initialize
-       buffer.
-
-       * whirlpool.c: New file.
-       * md.c (digest_table): Add whirlpool.
-       * Makefile.am (EXTRA_libcipher_la_SOURCES): Added: whirlpool.c.
-
-2005-03-30  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (_gcry_ac_data_from_sexp): Use length of SEXP_CUR, not
-       length of SEXP; do not forget to set SEXP_TMP to NULL after it has
-       been released.
-
-       (struct gcry_ac_mpi): New member: name_provided.
-       (_gcry_ac_data_set): Rename variable `name_final' to `name_cp';
-       remove const qualifier; change code to not cast away const
-       qualifiers; use name_provided member as well.
-       (_gcry_ac_data_set, _gcry_ac_data_get_name): Use name_provided
-       member of named mpi structure.
-
-       (gcry_ac_name_to_id): Do not forget to initialize err.
-       (_gcry_ac_data_get_index): Do not forget to initialize mpi_return;
-       use gcry_free() instead of free(); remove unnecessary cast; rename
-       mpi_return and name_return to mpi_cp and name_cp; adjust code.
-       (ac_data_mpi_copy): Do not cast away const qualifier.
-       (ac_data_values_destroy): Likewise.
-       (ac_data_construct): Likewise.
-
-       (ac_data_mpi_copy): Initialize flags to GCRY_AC_FLAG_DEALLOC.
-       (ac_data_extract): Use GCRY_AC_FLAG_DEALLOC instead of
-       GCRY_AC_FLAG_COPY.
-
-       (_gcry_ac_io_init_va, _gcry_ac_io_init, gcry_ac_io_init)
-       (gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read)
-       (_gcry_ac_io_read_all, _gcry_ac_io_process): New functions.
-       (gry_ac_em_dencode_t): Use gcry_ac_io_t in prototype instead of
-       memroy strings directly; adjust encode/decode functions to use io
-       objects.
-       (emsa_pkcs_v1_5_encode_data_cb): New function ...
-       (emsa_pkcs_v1_5_encode): ... use it here.
-       (ac_data_dencode): Use io objects.
-       (_gcry_ac_data_encode, _gcry_ac_data_decode, gcry_ac_data_encode)
-       (gcry_ac_data_decode): Likewise.
-       (_gcry_ac_data_encrypt_scheme, gcry_ac_data_encrypt_scheme)
-       (_gcry_ac_data_decrypt_scheme, gcry_ac_data_decrypt_scheme)
-       (_gcry_ac_data_sign_scheme, gcry_ac_data_sign_scheme)
-       (_gcry_ac_data_verify_scheme, gcry_ac_data_verify_scheme):
-       Likewise.
-
-2005-03-23  Werner Koch  <wk@g10code.com>
-
-       * rndw32.c (_gcry_rndw32_gather_random_fast): While adding data
-       use the size of the object and not the one of its address.  Bug
-       reported by Sascha Kiefer.
-
-2005-03-19  Moritz Schulte  <moritz@g10code.com>
-
-       * cipher.c (do_cbc_encrypt): Be careful to not overwrite data,
-       which is to be used later on.  This happend, in case CTS is
-       enabled and OUTBUF is equal to INBUF.
-
-2005-02-25  Werner Koch  <wk@g10code.com>
-
-       * pubkey.c (gcry_pk_get_keygrip): Allow for shadowed-private-key.
-
-2005-02-13  Moritz Schulte  <moritz@g10code.com>
-
-       * serpent.c: Updated from 1.2 branch:
-
-       s/u32_t/u32/ and s/byte_t/byte/.  Too match what we have always
-       used and are using in all other files too
-       (serpent_test): Moved prototype out of a fucntion.
-
-2005-02-07  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c: Major parts rewritten.
-       * pubkey.c (_gcry_pk_get_elements): New function.
-
-2004-12-09  Werner Koch  <wk@g10code.com>
-
-       * serpent.c (serpent_setkey): Moved prototype of serpent_test to
-       outer scope.
-
-2004-09-11  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (pubkey_table): Added an alias entry for GCRY_PK_ELG_E.
-
-2004-08-23  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c: Do not include <assert.h>.
-       * rndegd.c: Likewise.
-       * sha1.c: Likewise.
-       * rndunix.c: Likewise.
-       * rndlinux.c: Likewise.
-       * rmd160.c: Likewise.
-       * md5.c: Likewise.
-       * md4.c: Likewise.
-       * cipher.c: Likewise.
-       * crc.c: Likewise.
-       * blowfish.c: Likewise.
-
-       * pubkey.c (dummy_generate, dummy_check_secret_key)
-       (dummy_encrypt, dummy_decrypt, dummy_sign, dummy_verify): Return
-       err code GPG_ERR_NOT_IMPLEMENTED instead of aborting through
-       log_bug().
-       (dummy_get_nbits): Return 0 instead of aborting though log_bug().
-
-2004-08-19  Werner Koch  <wk@g10code.de>
-
-       * pubkey.c (sexp_data_to_mpi): Changed the zero random byte
-       substituting code to actually do clever things.  Thanks to
-       Matthias Urlichs for noting the implementation problem.
-
-2004-08-09  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_sign): Fixed memory leak; fix provided by
-       Modestas Vainius.
-
-2004-07-16  Werner Koch  <wk@gnupg.org>
-
-       * rijndael.c (do_encrypt): Fix alignment problem.  Bugs found by
-       Matthias Urlichs.
-       (do_decrypt): Ditto.
-       (keySched, keySched2): Use 2 macros along with unions in the key
-       schedule context.
-
-2004-07-14  Moritz Schulte  <moritz@g10code.com>
-
-       * rsa.c (_gcry_rsa_decrypt): Don't forget to free "a".  Thanks to
-       Nikos Mavroyanopoulos.
-
-2004-05-09  Werner Koch  <wk@gnupg.org>
-
-       * random.c (read_pool): Mix the PID in to better protect after a
-       fork.
-
-2004-07-04  Moritz Schulte  <moritz@g10code.com>
-
-       * serpent.c: Use "u32_t" instead of "unsigned long", do not
-       declare S-Box variables as "register".  Fixes failure on
-       OpenBSD/sparc64, reported by Nikolay Sturm.
-
-2004-05-07  Werner Koch  <wk@gnupg.org>
-
-       * random.c (initialize): Factored out some code to ..
-       (initialize_basics): .. new function.
-       (_gcry_random_initialize): Just call initialize_basics unless the
-       new arg FULL is set to TRUE.
-       (_gcry_fast_random_poll): Don't do anything unless the random
-       system has been really initialized.
-
-2004-05-07  Moritz Schulte  <moritz@g10code.de>
-
-       * ac.c (gcry_ac_open): Do not dereference NULL pointer.  Reported
-       by Umberto Salsi.
-
-2004-02-20  Werner Koch  <wk@gnupg.org>
-
-       * primegen.c (check_prime): New args CB_FUNC and CB_ARG; call them
-       at different stages.  Pass these arguments through all callers.
-
-2004-02-06  Werner Koch  <wk@gnupg.org>
-
-       * des.c: Add a new OID as used by pkcs#12.
-
-       * rfc2268.c: New. Taken from libgcrypt. 
-       * cipher.c: Setup the rfc2268 algorithm.
-
-2004-01-25  Moritz Schulte  <mo@g10code.com>
-
-       * primegen.c (prime_generate_internal): Do not forget to free
-       `q_factor'; fixed by Brieuc Jeunhomme.
-       (prime_generate_internal): Do not forget to free `prime'.
-
-2004-01-14  Moritz Schulte  <mo@g10code.com>
-
-       * ac.c (gcry_ac_data_set): New argument: flags; slightly
-       rewritten.
-       (gcry_ac_data_get_name, gcry_ac_data_get_index): Likewise.
-       (gcry_ac_key_pair_generate): New argument: misc_data; modified
-       order of arguments.
-       (gcry_ac_key_test): New argument: handle.
-       (gcry_ac_key_get_nbits, gcry_ac_key_get_grip): Likewise.
-       Use GCRY_AC_FLAG_NO_BLINDING instead of
-       GCRY_AC_DATA_FLAG_NO_BLINDING.
-       (gcry_ac_mpi): New member: flags.
-       (gcry_ac_data_search, gcry_ac_data_add): Removed functions.
-
-2003-12-22  Werner Koch  <wk@gnupg.org>
-
-       * primegen.c (is_prime): Release A2.
-
-2003-12-19  Werner Koch  <wk@gnupg.org>
-
-       * md.c: Moved a couple of functions down below the data structure
-       definitions.
-       (struct gcry_md_context): New field ACTUAL_HANDLE_SIZE.
-       (md_open): Set it here.
-       (strcut gcry_md_list): New field ACTUAL_STRUCT_SIZE.
-       (md_enable): Set it here.
-       (md_close): Wipe the context memory.
-       secure memory.
-       * cipher.c (struct gcry_cipher_handle): New field ACTUAL_HANDLE_SIZE.
-       (gcry_cipher_open): Set it here.
-       (gcry_cipher_close): Use it to always wipe out the handle data.
-
-       * ac.c (gcry_ac_open): Make sure HANDLE gets initialized even when
-       the function is not successful.
-       (gcry_ac_close): Allow a NULL handle.
-       (gcry_ac_key_destroy, gcry_ac_key_pair_destroy): Ditto.
-       (gcry_ac_key_get_grip): Return INV_OBJ on error.
-
-       * primegen.c (prime_generate_internal): Fixed error code for
-       failed malloc.  Replaced the !err if chain by gotos.
-       (gcry_prime_group_generator): Remove the extra sanity check.
-
-       * md.c: Minor code and comment cleanups.
-
-2003-12-16  Werner Koch  <wk@gnupg.org>
-
-       * primegen.c (gen_prime): Doc fix.  Thanks to Newton Hammet.
-
-2003-12-11  Werner Koch  <wk@gnupg.org>
-
-       * rndunix.c (slow_poll): Don't use #warning but #error.
-
-       * rndegd.c: Changed indentation.
-       (my_make_filename): Removd the var_arg cruft becuase we
-       don't need it here.  Changed caller.  
-
-       * rndlinux.c: Changed indentation.
-       (open_device): Remove the superfluous stat call and clarify
-       comment.
-
-       * rsa.c: Changed indentation.
-       (secret): Use the standard algorithm if p, q and u are not
-       available.
-       (rsa_blind, rsa_unblind): Renamed from _gcry_rsa_blind,
-       _gcry_rsa_unblind and moved more to the top.
-
-       * md4.c: Changed indentation.  Removed unnecessary casts.
-       * md5.c, rmd160.c, sha1.c, tiger.c: Ditto.
-       * rijndael.c, twofish.c: Ditto.
-       * serpent.c: Removed unnecessary casts.
-       * sha256.c, sha512.c: Ditto.
-
-2003-12-09  Werner Koch  <wk@gnupg.org>
-
-       * dsa.c: Unified indentation style.
-       * elgamal.c: Ditto. 
-       * des.c (des_key_schedule): Code beautifications.
-       * blowfish.c: Changed indentation style.
-       * cast5.c (do_cast_setkey): Ditto.
-
-       * pubkey.c (gcry_pk_encrypt): Replaced the chain of if(!err) tests
-       by straightforward gotos. Other cleanups.
-       (gcry_pk_decrypt): Ditto.
-       (gcry_pk_sign): Ditto.
-       (gcry_pk_verify): Ditto.
-       (gcry_pk_genkey): Ditto.  Use strtoul instead of strtol.
-       (gcry_pk_ctl): Use GPG_ERR_INV_ARG to indicate bad arguments.
-
-2003-12-07  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (gcry_pk_register_default): Undef the helper macro.
-       (gcry_pk_map_name): Allow NULL for string.
-       (sexp_to_key): Use memcpy and not strncpy.  Use gcry_free and not
-       free.
-       (sexp_to_sig): Ditto.
-       (sexp_to_enc): Ditto.  Replaced the chain of if(!err) tests by
-       straightforward gotos.
-
-2003-12-05  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c: Documentation cleanups.
-       (gcry_cipher_mode_from_oid): Allow NULL for STRING.
-
-2003-12-03  Werner Koch  <wk@gnupg.org>
-
-       * elgamal.c (sign, do_encrypt, gen_k): Make sure that a small K is
-       only used for encryption.
-
-2003-11-18  Werner Koch  <wk@gnupg.org>
-
-       * random.h (rndw32_set_dll_name): Removed unused prototype.
-
-       * Makefile.am (EXTRA_DIST): Added Manifest.
-
-2003-11-11  Werner Koch  <wk@gnupg.org>
-
-       * Manifest: New.
-
-2003-11-04  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_hash_buffer): Use shortcut for SHA1
-       * sha1.c (_gcry_sha1_hash_buffer): New.
-
-       * random.c: Reformatted most functions.
-       (mix_pool): Moved the failsafe_digest from global
-       scope to here.
-       (do_fast_random_poll): Use the generic fucntions even if a fast
-       gathering function has been used.
-       (read_pool): Detect a fork and retry.
-       (gcry_randomize, get_random_bytes): Don't distinguish anymore
-       between weak and strong random.
-       (gcry_create_nonce): New.
-
-2003-10-31  Werner Koch  <wk@gnupg.org>
-
-       * rndw32.c (slow_gatherer_windowsNT): Use a plain buffer for the
-       disk performance values and not the W32 API structure.
-
-       * dsa.c (verify): s/exp/ex/ due to shadowing of a builtin.
-       * elgamal.c (verify): Ditto.
-
-       * ac.c (gcry_ac_data_get_index): s/index/idx/
-       (gcry_ac_data_copy_internal): Remove the cast in _gcry_malloc.
-       (gcry_ac_data_add): Must use gcry_realloc instead of realloc.
-       * pubkey.c (sexp_elements_extract): s/index/idx/ as tribute to the
-       forehackers.
-       (gcry_pk_encrypt): Removed shadowed definition of I. Reordered
-       arguments to malloc for clarity.
-       (gcry_pk_sign, gcry_pk_genkey): Ditto.
-       * primegen.c (prime_generate_internal): s/random/randomlevel/.
-
-2003-10-27  Moritz Schulte  <mo@g10code.com>
-
-       * pubkey.c (gcry_pk_encrypt): Don't forget to deallocate pkey.
-
-2003-10-27  Werner Koch  <wk@gnupg.org>
-
-       * random.c (gcry_random_add_bytes): Return if buflen is zero to
-       avoid gcc warning about unsed parameter.
-       (MASK_LEVEL): Simplified; does now work for signed and unsigned
-       w/o warnings.
-
-       * md.c (md_start_debug): Removed the const from SUFFIX, because
-       this function is called from the control fucntion which does not
-       require const.
-
-       Prefixed all (pubkey,digest,cipher}_spec_* globale variables with
-       _gcry_.
-
-       * ac.c (ac_key_identifiers): Made static.
-
-       * random.c (getfnc_gather_random,getfnc_fast_random_poll): Move
-       prototypes to ..
-       * rand-internal.h: .. here 
-       * random.c (getfnc_gather_random): Include rndw32 gatherer.
-       * rndunix.c, rndw32.c, rndegd.c: Include them here.
-       * rndlinux.c (_gcry_rndlinux_gather_random): Prepend the _gcry_
-       prefix.  Changed all callers.
-       * rndegd.c (_gcry_rndegd_gather_random): Likewise.
-       (_gcry_rndegd_connect_socket): Likewise.
-       * rndunix.c (_gcry_rndunix_gather_random): Likewise.
-       (waitpid): Made static.
-       * rndw32.c: Removed the old and unused winseed.dll cruft.
-       (_gcry_rndw32_gather_random_fast): Renamed from
-       gather_random_fast.
-       (_gcry_rndw32_gather_random): Renamed from gather_random.  Note,
-       that the changes 2003-04-08 somehow got lost.
-
-       * sha512.c (sha512_init, sha384_init): Made static.
-
-       * cipher.c (do_ctr_decrypt): Removed "return" from this void
-       function.
-
-2003-10-24  Moritz Schulte  <mo@g10code.com>
-
-       * serpent.c: Fix an issue on big-endian systems.
-
-       * rndw32.c: Removed IS_MODULE -cruft.
-       * rndlinux.c (rndlinux_gather_random): Likewise.
-
-2003-10-10  Werner Koch  <wk@gnupg.org>
-
-       * primegen.c (gen_prime): Bail out if NBITS is less than 16.
-       (prime_generate_internal): Initialize prime variable to suppress
-       compiler warning.  Check pbits, initialize qbits when passed as
-       zero.
-
-       * primegen.c (prime_generate_internal): New arg
-       ALL_FACTORS. Changed all callers.
-       (gcry_prime_generate): Make the factors arg optional. Request
-       all_factors.  Make sure PRIME is set to NULL even on error.
-       (gcry_prime_group_generator): New.
-       (gcry_prime_release_factors): New.
-
-2003-10-06  Werner Koch  <wk@gnupg.org>
-
-       * primegen.c (gen_prime): Assert that NBITS is never zero, it
-       would cause a segv.
-
-2003-09-28  Moritz Schulte  <mo@g10code.com>
-
-       * ac.c: Include "cipher.h".
-
-2003-09-27  Moritz Schulte  <mo@g10code.com>
-
-       * rndegd.c (do_read): Return nread instead of nbytes; thanks to
-       Michael Caerwyn.
-
-2003-09-04  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (_gcry_pk_aliased_algo_name): New.
-       * ac.c (gcry_ac_open): Use it here.
-
-       * Makefile.am (EXTRA_libcipher_la_SOURCES): Add serpent.c
-
-2003-09-02  Moritz Schulte  <mo@g10code.com>
-
-       * primegen.c (gcry_prime_check, gcry_prime_generate): New
-       functions.
-       (prime_generate_internal): New function, based on
-       _gcry_generate_elg_prime.
-       (_gcry_generate_elg_prime): Rewritten as a wrapper for
-       prime_generate_internal.
-
-2003-08-28  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (gcry_pk_encrypt): Don't include the flags list in the
-       return value.  This does not make sense and breaks any programs
-       parsing the output strictly (e.g. current gpgsm).
-       (gcry_pk_encrypt): If aliases for the algorithm name exists, take
-       the first one instead of the regular name to adhere to SPKI
-       conventions.
-       (gcry_pk_genkey): Ditto.
-       (gcry_pk_sign): Ditto. Removed unused KEY_ALGO_NAME.
-
-2003-08-19  Moritz Schulte  <mo@g10code.com>
-
-       * cipher.c: Add support for Serpent
-       * serpent.c: New file.
-
-2003-08-10  Moritz Schulte  <moritz@g10code.com>
-
-       * rsa.c (_gcry_rsa_blind, _gcry_rsa_unblind): Declare static.
-
-2003-08-09  Timo Schulz  <twoaday@freakmail.de>
-
-       * random.c (getfnc_gather_random): Don't check NAME_OF_DEV_RANDOM
-       two times, but also the NAME_OF_DEV_URANDOM device.
-       
-2003-08-08  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (sexp_to_enc): Fixed extraction of S-Expression: do not
-       fail if no `flags' sub S-Expression is found.
-
-2003-07-27  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_lookup_func_oid): Allow for empty OID lists.
-
-2003-07-23  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (gcry_ac_data_construct): New argument: include_flags, only
-       include `flags' S-expression, if include_flags is true.  Adjust
-       callers.  Thanks for triggering a bug caused by `flags'
-       sub-S-expression where they are not expected to Ralf Schneider.
-
-2003-07-21  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_lookup_func_name): Use new member name
-       `aliases' instead of `sexp_names'.
-
-       * ac.c (gcry_ac_key_data_get): New function.
-
-       * cipher.c (gcry_cipher_lookup_func_name): Fix return value.
-
-2003-07-20  Moritz Schulte  <moritz@g10code.com>
-
-       * blowfish.c: Adjusted for new gcry_cipher_spec_t structure.
-       * cast5.c: Likewise.
-       * twofish.c: Likewise.
-       * arcfour.c: Likewise.
-       * rijndael.c (rijndael_oids, rijndael192_oids, rijndael256_oids):
-       New variables, adjust for new gcry_cipher_spec_t structure.
-       * des.c (oids_tripledes): New variable, adjust for new
-       gcry_cipher_spec_t structure.
-
-       * md.c (oid_table): Removed.
-
-       * tiger.c (oid_spec_tiger): New variable.
-       (digest_spec_tiger): Adjusted for new gry_md_spec_t structure.
-
-       * sha512.c (oid_spec_sha512): New variable.
-       (digest_spec_sha512): Adjusted for new gry_md_spec_t structure.
-
-       * sha512.c (oid_spec_sha384): New variable.
-       (digest_spec_sha384): Adjusted for new gry_md_spec_t structure.
-
-       * sha256.c (oid_spec_sha256): New variable.
-       (digest_spec_sha256): Adjusted for new gry_md_spec_t structure.
-
-       * sha1.c (oid_spec_sha1): New variable.
-       (digest_spec_sha1): Adjusted for new gry_md_spec_t structure.
-
-       * rmd160.c (oid_spec_rmd160): New variable.
-       (digest_spec_rnd160): Adjusted for new gry_md_spec_t structure.
-
-       * md5.c (oid_spec_md5): New variable.
-       (digest_spec_md5): Adjusted for new gry_md_spec_t structure.
-
-       * md4.c (oid_spec_md4): New variable.
-       (digest_spec_md4): Adjusted for new gry_md_spec_t structure.
-
-       * crc.c (digest_spec_crc32, digest_spec_crc32_rfc1510,
-       digest_spec_crc32_rfc2440): Adjusted for new gry_md_spec_t
-       structure.
-
-2003-07-19  Moritz Schulte  <moritz@g10code.com>
-
-       * md.c (gcry_md_lookup_func_oid): New function.
-       (search_oid): New function, copied from cipher.c.
-       (gcry_md_map_name): Adjust for new search_oid_interface.
-
-       * cipher.c (oid_table): Removed table.
-       (gcry_cipher_lookup_func_oid): New function.
-       (search_oid): Rewritten to use the module functions.
-       (gcry_cipher_map_name): Adjust for new search_oid interface.
-       (gcry_cipher_mode_from_oid): Likewise.
-
-2003-07-18  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_hash_buffer): Convert ERR to gpg_error_t in
-       gpg_strerror.
-
-2003-07-14  Moritz Schulte  <moritz@g10code.com>
-
-       * cipher.c (gcry_cipher_lookup_func_name): Also check the cipher
-       name aliases, not just the primary name.
-       (gcry_cipher_map_name): Remove kludge for aliasing Rijndael to
-       AES.
-
-       * arcfour.c, blowfish.c, cast5.c, des.c, twofish.c: Adjust cipher
-       specification structures.
-
-       * rijndael.c (rijndael_names, rijndael192_names,
-       rijndael256_names): New variables, use them in the cipher
-       specifications.
-
-       * rmd160test.c: Removed file.
-
-       * ac.c, arcfour.c, blowfish.c, cast5.c, cipher.c, des.c, dsa.c,
-       elgamal.c, md.c, pubkey.c, random.c, rijndael.c, rsa.c, twofish.c:
-       Used gcry_err* wrappers for libgpg symbols.
-
-       * primegen.c (gen_prime): Correct the order arguments to
-       extra_check.
-
-2003-07-12  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c: Replaced all public occurences of gpg_error_t with
-       gcry_error_t.
-       * cipher.c: Likewise.
-       * md.c: Likewise.
-       * pubkey.c: Likewise.
-       * random.c: Likewise.
-
-       * cipher.c: Added support for TWOFISH128.
-
-2003-07-08  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (gcry_ac_data_copy_internal): New function, based on
-       gcry_ac_data_copy.
-       (gcry_ac_data_copy): Made public, use gcry_ac_data_copy_internal.
-       (gcry_ac_key_init): Use gcry_ac_data_copy_internal.
-
-2003-07-07  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c (gcry_ac_data_set): Only release old MPI value if it is
-       different from the new value.  Bug reported by Simon Josefsson
-       <jas@extundo.com>.
-
-       * pubkey.c (gcry_pk_list): New function.
-       * md.c (gcry_md_list): New function.
-
-       * ac.c (gcry_ac_key_pair_generate): Fix calculation of format
-       string size.
-
-2003-07-05  Moritz Schulte  <moritz@g10code.com>
-
-       * md.c: Named struct of digest_table `digest_table_entry'.
-       (digest_table_entry): New member: algorithm; filled in.
-       (digest_table_entry): Removed unused member: flags.
-       (gcry_md_register): New argument: algorithm_id, filled in.
-       (gcry_md_register_default): Used algorithm ID from module
-       structure.
-       (gcry_md_map_name): Likewise.
-       (md_enable): Likewise.
-       (md_read): Likewise.
-       (gcry_md_info): Likewise.
-
-       * pubkey.c: Named truct for pubkey_table `pubkey_table_entry'.
-       (pubkey_table_entry): New member: algorithm; filled in.
-       (gcry_pk_register_default): Used algorithm ID from pubkey_table.
-       (gcry_pk_register): New argument: algorithm_id, filled in.
-       (gcry_pk_map_name): Used algorithm ID from module structure.
-       (gcry_pk_decrypt): Likewise.
-       (gcry_pk_encrypt): Likewise.
-       (gcry_pk_verify): Likewise.
-       (gcry_pk_sign): Likewise.
-       (gcry_pk_testkey): Likewise.
-       (gcry_pk_genkey): Likewise.
-       (gcry_pk_get_nbits): Likewise.
-       (sexp_to_key): Removed unused variable: algo.
-       (sexp_to_sig): Likewise.
-
-       * cipher.c: Named struct for cipher_table `cipher_table_entry'.
-       (cipher_table_entry): New member: algorithm; filled in.
-       (gcry_cipher_register_default): Used algorithm ID from
-       cipher_table.
-       (gcry_cipher_register): New argument: algorithm_id, filled in.
-       (gcry_cipher_map_name): Used algorithm ID from module structure.
-
-       * arcfour.c (cipher_spec_arcfour): Removed algorithm ID.
-       * blowfish.c (cipher_spec_blowfish): Likewise.
-       * cast5.c (cipher_spec_cast5): Likewise.
-       * crc.c (digest_spec_crc32): Likewise.
-       * crc.c (digest_spec_crc32_rfc1510): Likewise.
-       * crc.c (digest_spec_crc32_rfc2440): Likewise.
-       * des.c (cipher_spec_des): Likewise.
-       * des.c (cipher_spec_tripledes): Likewise.
-       * dsa.c (pubkey_spec_dsa): Likewise.
-       * elgamal.c (pubkey_spec_elg): Likewise.
-       * md4.c (digest_spec_md4): Likewise.
-       * md5.c (digest_spec_md5): Likewise.
-       * aes.c (cipher_spec_aes): Likewise.
-       * aes.c (cipher_spec_aes192): Likewise.
-       * aes.c (cipher_spec_aes256): Likewise.
-       * rsa.c (pubkey_spec_rsa): Likewise.
-       * sha1.c (digest_spec_sha1): Likewise.
-       * sha256.c (digest_spec_sha256): Likewise.
-       * sha512.c (digest_spec_sha512): Likewise.
-       * tiger.c (digest_spec_tiger): Likewise.
-       * twofish.c (cipher_spec_twofish): Likewise.
-       * twofish.c (cipher_spec_twofish128): Likewise.
-
-       * Makefile.am (EXTRA_libcipher_la_SOURCES): Fix list of source
-       files; reported by Simon Josefsson <jas@extundo.com>.
-
-       * pubkey.c: Replaced all occurences of `id' with `algorithm',
-       since `id' is a keyword in obj-c.
-       * md.c: Likewise.
-       * cipher.c: Likewise.
-
-       * crc.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, tiger.c:
-       Replaced all occurences of gcry_digest_spec_t with gcry_md_spec_t.
-
-       * dsa.c, rsa.c, elgamal.c: Replaced all occurencens of
-       gcry_pubkey_spec_t with gcry_pk_spec_t.
-
-       * md.c: Replaced all occurences of gcry_digest_spec_t with
-       gcry_md_spec_t.
-       (gcry_digest_register_default): Renamed to ...
-       (gcry_md_register_default): ... this; adjusted callers.
-       (gcry_digest_lookup_func_name): Renamed to ...
-       (gcry_md_lookup_func_name): ... this; adjusted callers.
-       (gcry_digest_lookup_name): Renamed to ...
-       (gcry_md_lookup_name): ... this; adjusted callers.
-       (gcry_digest_register): Renamed to ...
-       (gcry_md_register): ... this.
-       (gcry_digest_unregister): Renamed to ...
-       (gcry_md_unregister): ... this.
-
-       * pubkey.c (gcry_pubkey_register): Renamed to ...
-       (gcry_pk_register): ... this.
-       (gcry_pubkey_unregister): Renamed to ...
-       (gcry_pk_unregister): ... this.
-       Replaced all occurences of gcry_pubkey_spec_t with gcry_pk_spec_t.
-       (gcry_pubkey_register_default): Renamed to ...
-       (gcry_pk_register_default): ... this; adjusted callers.
-       (gcry_pubkey_lookup_func_name): Renamed to ...
-       (gcry_pk_lookup_func_name): ... this; adjusted callers.
-       (gcry_pubkey_lookup_name): Renamed to ...
-       (gcry_pk_lookup_name): ... this; adjusted callers.
-
-       * md.c (gcry_md_hash_buffer): Fix error checking.  Thanks to Simon
-       Josefsson <jas@extunde.com>.
-
-2003-07-04  Moritz Schulte  <moritz@g10code.com>
-
-       * cipher.c (gcry_cipher_list): New function.
-
-2003-07-01  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (sexp_to_sig): Accept a `flags' S-expression to be more
-       consistent with sexp_to_enc.
-
-2003-06-30  Moritz Schulte  <moritz@g10code.com>
-
-       * Makefile.am (libcipher_la_SOURCES): Added: ac.c.
-
-       * pubkey.c (_gcry_pk_module_lookup): New function.
-       (_gcry_pk_module_release): New function.
-
-2003-06-29  Moritz Schulte  <moritz@g10code.com>
-
-       * ac.c: New file.
-
-2003-06-26  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_hash_buffer): Trigger BUG correcly with new API.
-
-2003-06-19  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_is_enabled): Fixed. 
-
-2003-06-18  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c (gcry_cipher_get_algo_keylen): New.
-       (gcry_cipher_get_algo_blklen): New.
-
-2003-06-18  Moritz Schulte  <moritz@g10code.com>
-
-       * arcfour.c, cipher.c, blowfish.c, md.c, cast5.c, pubkey.c, crc.c,
-       des.c, dsa.c, elgamal.c, md4.c, md5.c, random.c, rijndael.c,
-       rmd160.c, rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c:
-       Replaced older types GcryDigestSpec, GcryCipherSpec and
-       GcryPubkeySpec with newer types: gcry_digest_spec_t,
-       gcry_cipher_spec_t and gcry_pubkey_spec_t.
-
-       * md.c (gcry_digest_id_new): Removed function.
-       (gcry_digest_register): Removed code for generating a new module
-       ID.
-
-       * pubkey.c (gcry_pubkey_id_new): Removed function.
-       (gcry_pubkey_register): Removed code for generating a new module
-       ID.
-
-       * cipher.c, md.c, pubkey.c: Replace old type GcryModule with newer
-       one: gcry_module_t.
-       (gcry_cipher_id_new): Removed function.
-       (gcry_cipher_register): Removed code for generating a new module
-       ID.
-
-       * cipher.c (gcry_cipher_register): Adjust call to
-       _gcry_module_add.
-       (gcry_cipher_register_default): Likewise.
-       * pubkey.c (gcry_pubkey_register_default): Likewise.
-       (gcry_pubkey_register): Likewise.
-       * md.c (gcry_digest_register_default): Likewise.
-       (gcry_digest_register): Likewise.
-
-       * md.c (gcry_digest_lookup_func_id): Removed function.
-       (gcry_digest_lookup_id): Likewise.
-       (gcry_digest_id_new): Use _gcry_module_lookup_id instead of
-       gcry_digest_lookup_id.
-       (digest_algo_to_string): Likewise.
-       (check_digest_algo): Likewise.
-       (md_enable): Likewise.
-       (md_digest_length): Likewise.
-       (md_asn_oid): Likewise.
-
-       * pubkey.c (gcry_pubkey_lookup_id): Removed function.
-       (gcry_pubkey_lookup_func_id): Likewise.
-       (gcry_pubkey_id_new): Use _gcry_module_lookup_id instead of
-       gcry_pubkey_id_new.
-       (gcry_pk_algo_name): Likewise.
-       (disable_pubkey_algo): Likewise.
-       (check_pubkey_algo): Likewise.
-       (pubkey_get_npkey): Likewise.
-       (pubkey_get_nskey): Likewise.
-       (pubkey_get_nsig): Likewise.
-       (pubkey_get_nenc): Likewise.
-       (pubkey_generate): Likewise.
-       (pubkey_check_secret_key): Likewise.
-       (pubkey_encrypt): Likewise.
-       (pubkey_decrypt): Likewise.
-       (pubkey_sign): Likewise.
-       (pubkey_verify): Likewise.
-       (gcry_pk_algo_info): Likewise.
-
-       * cipher.c (gcry_cipher_lookup_func_id): Removed function.
-       (gcry_cipher_lookup_id): Likewise.
-       (cipher_algo_to_string): use _gcry_module_lookup_id instead of
-       gcry_cipher_lookup_id.
-       (disable_cipher_algo): Likewise.
-       (check_cipher_algo): Likewise.
-       (cipher_get_blocksize): Likewise.
-       (gcry_cipher_open): Likewise.
-       (gcry_cipher_id_new): Likewise.
-
-2003-06-17  Moritz Schulte  <moritz@g10code.com>
-
-       * Makefile.am (GCRYPT_MODULES): Set to @GCRYPT_CIPHERS@,
-       @GCRYPT_PUBKEY_CIPHERS@, @GCRYPT_DIGESTS@ and @GCRYPT_RANDOM@.
-       (libcipher_la_DEPENDENCIES): Set to $(GCRYPT_MODULES).
-       (libcipher_la_LIBADD): Likewise.
-       (AM_CFLAGS): Added: @GPG_ERROR_CFLAGS@.
-       (EXTRA_libcipher_la_SOURCES): Added all conditional sources.
-
-       * md.c (md_open): Use _gcry_fast_random_poll instead of
-       fast_random_poll.
-       * cipher.c (gcry_cipher_open): Likewise.
-
-       * random.h (fast_random_poll): Removed macro.
-
-       * blowfish.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, sha512.c,
-       tiger.c: Use Autoconf's WORDS_BIGENDIAN instead of our own
-       BIG_ENDIAN_HOST.
-
-2003-06-16  Moritz Schulte  <moritz@g10code.com>
-
-       * random.c (getfnc_gather_random): Do not special-case
-       USE_ALL_RANDOM_MODULES, make it the default.
-
-       * dsa.c: Replace last occurences of old type names with newer
-       names (i.e. replace MPI with gcry_mpi_t).
-       * elgamal.c: Likewise.
-       * primegen.c: Likewise.
-       * pubkey.c: Likewise.
-       * rsa.c: Likewise.
-
-2003-06-14  Moritz Schulte  <moritz@g10code.com>
-
-       * des.c (des_setkey): Add selftest check.
-       (tripledes_set3keys): Likewise.
-       (do_tripledes_setkey): Remove selftest check.
-       (do_des_setkey): Likewise.
-
-2003-06-11  Moritz Schulte  <moritz@g10code.com>
-
-       * md.c (_gcry_md_init): New function.
-       * cipher.c (_gcry_cipher_init): New function.
-       * pubkey.c (_gcry_pk_init): New function.
-
-2003-06-13  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_get_algo): Reverted to old API.  This is a
-       convenience function anyway and error checking is not approriate.
-       (gcry_md_is_secure): New.
-       (gcry_md_is_enabled): New.
-
-2003-06-12  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c (gcry_cipher_open): Make sure HANDLE is set to NULL on
-       error.
-
-2003-06-11  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_open): Make sure H receives either NULL or an
-       valid handle.
-       (gcry_md_copy): Swapped arguments so that it is more in lione with
-       md_open and most other API fucntions like memcpy (destination
-       comes first).  Make sure HANDLE is set to NULL on error.
-       
-       * rijndael.c (do_encrypt): Hack to force correct alignment.  It
-       seems not to be not sufficient, though.  We should rework this
-       fucntions and remove all these ugly casts.  Let the compiler
-       optimize or have an assembler implementation.
-
-2003-06-09  Moritz Schulte  <moritz@g10code.com>
-
-       * Makefile.am: Removed rules serpent, since that is not commited
-       yet.
-
-2003-06-08  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_encrypt): Improve calculation for size of the
-       format string.
-
-2003-06-07  Moritz Schulte  <moritz@g10code.com>
-
-       * arcfour.c, bithelp.h, blowfish.c, cast5.c, cipher.c, crc.c,
-       des.c, dsa.c, elgamal.c, md4.c, md5.c, md.c, primegen.c, pubkey.c,
-       rand-internal.h, random.c, random.h, rijndael.c, rmd160.c,
-       rmd160test.c, rmd.h, rndeged.c, rndlinux.c, rndunix.c, rndw32.c,
-       rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c: Edited all
-       preprocessor instructions to remove whitespace before the '#'.
-       This is not required by C89, but there are some compilers out
-       there that don't like it.  Replaced any occurence of the now
-       deprecated type names with the new ones.
-       
-2003-06-04  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_encrypt): Construct an arg_list and use
-       gcry_sexp_build_array instead of gcry_sexp_build.
-       (gcry_pk_sign): Likewise.
-       (gcry_pk_genkey): Likewise.
-
-2003-06-01  Moritz Schulte  <moritz@g10code.com>
-
-       * dsa.c (_gcry_dsa_generate): Do not check wether the algorithm ID
-       does indeed belong to DSA.
-       (_gcry_dsa_sign): Likewise.
-       (_gcry_dsa_verify): Likewise.
-       (_gcry_dsa_get_nbits): Likewise.
-
-       * elgamal.c (_gcry_elg_check_secret_key): Do not check wether the
-       algorithm ID does indeed belong to ElGamal.
-       (_gcry_elg_encrypt): Likewise.
-       (_gcry_elg_decrypt): Likewise.
-       (_gcry_elg_sign): Likewise.
-       (_gcry_elg_verify): Likewise.
-       (_gcry_elg_get_nbits): Likewise.
-       (_gcry_elg_generate): Likewise.
-
-       * rsa.c (_gcry_rsa_generate): Do not check wether the algorithm ID
-       does indeed belong to RSA.
-       (_gcry_rsa_encrypt): Likewise.
-       (_gcry_rsa_decrypt): Likewise.
-       (_gcry_rsa_sign): Likewise.
-       (_gcry_rsa_verify): Likewise.
-       (_gcry_rsa_get_nbits): Likewise.
-
-2003-05-30  Moritz Schulte  <moritz@g10code.com>
-
-       * md.c (md_get_algo): Return zero in case to algorithm is enabled.
-
-       * md.c (gcry_md_info): Adjusted for new no-errno-API.
-       (md_final): Likewise.
-       (gcry_md_get_algo): Likewise.
-       * pubkey.c (gcry_pk_get_keygrip): Likewise.
-       (gcry_pk_ctl): Likewise.
-       (gcry_pk_algo_info): Likewise.
-       * des.c (selftest): Likewise.
-
-2003-05-29  Moritz Schulte  <moritz@g10code.com>
-
-       * md.c (md_enable): Do not forget to release module on error.
-       (gcry_md_open): Adjusted for new no-errno-API.
-       (md_open): Likewise.
-       (md_copy): Likewise.
-       (gcry_md_copy): Likewise.
-       (gcry_md_setkey): Likewise.
-       (gcry_md_algo_info): Likewise.
-
-       * cipher.c (gcry_cipher_open): Adjusted for new no-errno-API and
-       also fixed a locking bug.
-       (gcry_cipher_encrypt): Adjusted for new no-errno-API.
-       (gcry_cipher_decrypt): Likewise.
-       (gcry_cipher_ctl): Likewise.
-       (gcry_cipher_info): Likewise.
-       (gcry_cipher_algo_info): Likewise.
-
-2003-05-28  Moritz Schulte  <moritz@g10code.com>
-
-       * md.c (md_enable): Adjusted for libgpg-error.
-       (gcry_md_enable): Likewise.
-       (gcry_digest_register_default): Likewise.
-       (gcry_digest_register): Likewise.
-       (check_digest_algo): Likewise.
-       (prepare_macpads): Likewise.
-       (gcry_md_setkey): Likewise.
-       (gcry_md_ctl): Likewise.
-       (gcry_md_get): Likewise.
-       (gcry_md_algo_info): Likewise.
-       (gcry_md_info): Likewise.
-       * dsa.c (_gcry_dsa_generate): Likewise.
-       (_gcry_dsa_check_secret_key): Likewise.
-       (_gcry_dsa_sign): Likewie.
-       (_gcry_dsa_verify): Likewise.
-       * twofish.c (do_twofish_setkey): Likewise.
-       (twofish_setkey): Likewise.
-       * cipher.c (gcry_cipher_register): Likewise.
-
-2003-05-25  Moritz Schulte  <moritz@g10code.com>
-
-       * rijndael.c (do_setkey): Adjusted for libgpg-error.
-       (rijndael_setkey): Likewise.
-       * random.c (gcry_random_add_bytes): Likewise.
-       * elgamal.c (_gcry_elg_generate): Likewise.
-       (_gcry_elg_check_secret_key): Likewise.
-       (_gcry_elg_encrypt): Likewise.
-       (_gcry_elg_decrypt): Likewise.
-       (_gcry_elg_sign): Likewise.
-       (_gcry_elg_verify): Likewise.
-       * rsa.c (_gcry_rsa_generate): Likewise.
-       (_gcry_rsa_check_secret_key): Likewise.
-       (_gcry_rsa_encrypt): Likewise.
-       (_gcry_rsa_decrypt): Likewise.
-       (_gcry_rsa_sign): Likewise.
-       (_gcry_rsa_verify): Likewise.
-       * pubkey.c (dummy_generate, dummy_check_secret_key, dummy_encrypt,
-       dummy_decrypt, dummy_sign, dummy_verify): Likewise.
-       (gcry_pubkey_register): Likewise.
-       (check_pubkey_algo): Likewise.
-       (pubkey_generate): Likewise.
-       (pubkey_check_secret_key): Likewise.
-       (pubkey_encrypt): Likewise.
-       (pubkey_decrypt): Likewise.
-       (pubkey_sign): Likewise.
-       (pubkey_verify): Likewise.
-       (sexp_elements_extract): Likewise.
-       (sexp_to_key): Likewise.
-       (sexp_to_sig): Likewise.
-       (sexp_to_enc): Likewise.
-       (sexp_data_to_mpi): Likewise.
-       (gcry_pk_encrypt): Likewise.
-       (gcry_pk_decrypt): Likewise.
-       (gcry_pk_sign): Likewise.
-       (gcry_pk_verify): Likewise.
-       (gcry_pk_testkey): Likewise.
-       (gcry_pk_genkey): Likewise.
-       (gcry_pk_ctl): Likewise.
-       * cipher.c (dummy_setkey): Likewise.
-       (check_cipher_algo): Likewise.
-       (gcry_cipher_open): Likewise.
-       (cipher_setkey): Likewise.
-       (gcry_cipher_ctl): Likewise.
-       (cipher_encrypt): Likewise.
-       (gcry_cipher_encrypt): Likewise.
-       (cipher_decrypt): Likewise.
-       (gcry_cipher_decrypt): Likewise.
-       (gcry_cipher_info): Likewise.
-       (gcry_cipher_algo_info): Likewise.
-       * cast5.c (cast_setkey): Likewise.
-       (do_cast_setkey): Likewise.
-       * arcfour.c (arcfour_setkey): Likewise.
-       (do_arcfour_setkey): Likewise.
-       * blowfish.c (do_bf_setkey): Likewise.
-       (bf_setkey): Likewise.
-       * des.c (do_des_setkey): Likewise.
-       (do_tripledes_setkey): Likewise.
-
-2003-05-22  Moritz Schulte  <moritz@g10code.com>
-
-       * tiger.c: Merged code ussing the U64_C macro from GnuPG.
-
-       * sha512.c: Likewise.
-
-2003-05-17  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_genkey): Fix type: acquire a lock, instead of
-       releasing it.
-
-2003-05-11  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_testkey): Call REGISTER_DEFAULT_CIPHERS.
-       (gcry_pk_ctl): Likewise.
-
-2003-04-27  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (gcry_pk_genkey): Release sexp after extracted data has
-       been used.
-
-       * md.c (gcry_md_get_algo_dlen): Simplified, simply call
-       md_digest_length to do the job.
-
-       * des.c (do_des_setkey): Check for selftest failure not only
-       during initialization.
-       (do_tripledes_setkey): Include check for selftest failure.
-
-       * pubkey.c (gcry_pubkey_register_default): New macro
-       `pubkey_use_dummy', use it.
-
-       * elgamal.c (elg_names): New variable.
-       (pubkey_spec_elg): Include elg_names.
-
-       * dsa.c (dsa_names): New variable.
-       (pubkey_spec_dsa): Include dsa_names.
-
-       * rsa.c (rsa_names): New variable.
-       (pubkey_spec_rsa): Include rsa_names.
-
-       * pubkey.c (gcry_pubkey_lookup_func_name): Compare name also with
-       the names listed in `sexp_names'.
-
-2003-04-24  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (sexp_to_key): New variables: module, pubkey.  Adjusted
-       to new module interface.
-       (sexp_to_key): Changend type of argument `retalgo' from `int *' to
-       `GcryModule **'.  Adjusted all callers.  Removed argument:
-       r_algotblidx.
-       (sexp_to_sig): Changend type of argument `retalgo' from `int *' to
-       `GcryModule **'.  Adjusted all callers.
-       (sexp_to_enc): Likewise.
-
-       (pubkey_get_npkey, pubkey_get_nskey, pubkey_get_nsig,
-       pubkey_get_nenc): Use strlen to find out the number.
-
-       * rsa.c: Adjust pubkey_spec_rsa to new internal interface.
-       * dsa.c: Likewise.
-       * elgamal.c: Likewise.
-
-2003-04-17  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c (sexp_elements_extract): New function.
-       * pubkey.c (sexp_to_key): Removed variable `idx', added `err', use
-       sexp_elements_extract.
-       (sexp_to_sig): Likewise.
-       (sexp_to_enc): Likewise.
-
-       * pubkey.c: Terminate list correctly.
-       * md.c: Include sha512/sha384 in digest_table.
-
-2003-04-16  Moritz Schulte  <moritz@g10code.com>
-
-       * Makefile.am: Include support for sha512.c.
-
-       * sha512.c: New file, merged from GnuPG, with few modifications
-       for libgcrypt.
-
-       * rand-internal.h: Removed declarations for constructor functions.
-
-       * md.c (md_copy): Call _gcry_module_use for incrementing the usage
-       counter of the digest modules.
-
-       * rsa.c: Do not include "rsa.h".
-       * dsa.c: Do not include "dsa.h".
-       * elgamal.c: Do not include "elgamal.h".
-       * des.c: Do not include "des.h".
-       * cast5.c: Do not include "cast5.h".
-       * blowfish.c: Do not include "blowfish.h".
-       * arcfour.c: Do not include "arcfour.h".
-
-       * Makefile.am (libcipher_la_DEPENDENCIES): Removed.
-       (libcipher_la_LIBADD): Removed.
-       Use Automake conditionals for conditional compilation.
-
-2003-04-13  Moritz Schulte  <moritz@g10code.com>
-
-       * cipher.c (gcry_cipher_open): Call REGISTER_DEFAULT_CIPHERS.
-
-       * md.c (gcry_md_list): New member: module.
-       (md_enable): New variable: module, changed use of module and
-       digest.
-       (md_enable): Initialize member: module.
-       (md_close): Call _gcry_module_release.
-
-       * cipher.c (gcry_cipher_open): New variable: module, changed use of
-       module and cipher.
-       (struct gcry_cipher_handle): New member: module.
-       (gcry_cipher_open): Initialize member: module.
-       (gcry_cipher_close): Call _gcry_module_release.
-
-2003-04-09  Moritz Schulte  <moritz@g10code.com>
-       
-       * cipher.c: Include "ath.h".
-       * md.c: Likewise.
-       * pubkey.c: Likewise.
-
-       * cipher.c (ciphers_registered_lock): New variable.
-       * md.c (digests_registered_lock): New variable.
-       * pubkey.c (pubkeys_registered_lock): New variable.
-
-       * rndlinux.c (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_rndlinux_constructor): Removed function.
-
-       * rndegd.c (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_rndegd_constructor): Removed function.
-
-       * rndunix.c (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_rndunix_constructor): Removed function.
-
-       * rndw32.c (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_rndw32_constructor): Removed function.
-
-       * rndegd.c (rndegd_connect_socket): Simplify code for creating the
-       egd socket address.
-       (rndegd_connect_socket): Call log_fatal use instead of
-       g10_log_fatal.
-       (egd_gather_random): Renamed to ...
-       (rndegd_gather_random): ... here.
-
-2003-04-08  Moritz Schulte  <moritz@g10code.com>
-
-       * rndlinux.c: Do not include "dynload.h".
-       * rndunix.c: Likewise.
-       * rndw32.c: Likewise.
-
-       * rndegd.c (rndegd_connect_socket): Factored out from ...
-       (egd_gather_random): here; call it.
-       (egd_socket): New variable.
-       (egd_gather_random): Initialize fd with egd_socket, do not declare
-       fd static.
-       (do_read): Merged few changes from GnuPG. FIXME - not finished?
-       Do not include "dynload.h".
-
-       * rndw32.c (gather_random): Renamed to rndw32_gather_random, do
-       not declare static.
-       (gather_random_fast): Renamed to rndw32_gather_random_fast, do not
-       declare static.
-
-       * rndunix.c (gather_random): Renamed to rndunix_gather_random, do
-       not declare static.
-       * rndegd.c (gather_random): Renamed to rndegd_gather_random, do
-       not declare static.
-       * rndlinux.c (gather_random): Renamed to rndlinux_gather_random,
-       do not declare static.
-
-2003-04-07  Moritz Schulte  <moritz@g10code.com>
-
-       * Makefile.am (libcipher_la_SOURCES): Removed construct.c.
-       (libcipher_la_SOURCES): Added sha1.c, sha256.c, rmd160.c, md4.c,
-       md5.c, tiger.c and crc.c
-       (EXTRA_PROGRAMS): Removed sha1, sha256, rmd160, md4, md5, tiger
-       and crc.  Removed definitions: EXTRA_md4_SOURCES,
-       EXTRA_md5_SOURCES, EXTRA_rmd160_SOURCES, EXTRA_sha1_SOURCES,
-       EXTRA_sha256_SOURCES, EXTRA_tiger_SOURCES and EXTRA_crc_SOURCES,
-       BUILT_SOURCES, DISTCLEANFILES.
-
-       * pubkey.c: Do not include "elgamal.h", "dsa.h" and "rsa.h".
-
-       * Makefile.am (libcipher_la_SOURCES): Removed rsa.h, elgamal.h,
-       dsa.h, des.h, cast5.h, arcfour.h and blowfish.h.
-
-       * rsa.h: Removed file.
-       * elgamal.h: Removed file.
-       * dsa.h: Removed file.
-       * des.h: Removed file.
-       * cast5.h: Removed file.
-       * arcfour.h: Removed file.
-       * blowfish.h: Removed file.
-
-       * Makefile.am (libcipher_la_SOURCES): Removed dynload.c and
-       dynload.h.
-
-       * rsa.c (pubkey_spec_rsa): New variable.
-       * dsa.c (pubkey_spec_rsa): New variable.
-       * elgamal.c (pubkey_spec_elg): New variable.
-       
-       * rsa.c (_gcry_rsa_get_info): Removed function.
-       * elgamal.c (_gcry_elg_get_info): Removed function.
-       * dsa.c (_gcry_dsa_get_info): Removed function.
-
-       * tiger.c (tiger_get_info): Removed function.
-       (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_tiger_constructor): Removed function.
-       
-       * sha1.c (sha1_get_info): Removed function.
-       (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_sha1_constructor): Removed function.
-
-       * sha256.c (sha256_get_info): Removed function.
-       (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_sha256_constructor): Removed function.
-
-       * rmd160.c (rmd160_get_info): Removed function.
-       (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_rmd160_constructor): Removed function.
-
-       * md5.c (md5_get_info): Removed function.
-       (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_md5_constructor): Removed function.
-
-       * md4.c (md4_get_info): Removed function.
-       (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func): Removed function.
-       (_gcry_md4_constructor): Removed function.
-
-       * crc.c (crc_get_info): Removed function.
-
-       * arcfour.c (do_arcfour_setkey): Changed type of context argument
-       to `void *', added local variable for cast, adjusted callers.
-       (arcfour_setkey): Likewise.
-       (encrypt_stream): Likewise.
-       * cast5.c (cast_setkey): Likewise.
-       (encrypt_block): Likewise.
-       * rijndael.c (rijndael_setkey): Likewise.
-       (rijndael_encrypt): Likewise.
-       (rijndael_decrypt): Likewise.
-       * twofish.c (twofish_setkey): Likewise.
-       (twofish_encrypt): Likewise.
-       (twofish_decrypt): Likewise.
-       * des.c (do_des_setkey): Likewise.
-       (do_des_encrypt): Likewise.
-       (do_des_encrypt): Likewise.
-       (do_tripledes_encrypt): Likewise.
-       (do_tripledes_encrypt): Likewise.
-       * blowfish.c (bf_setkey: Likewise.
-       (encrypt_block): Likewise.
-       (decrypt_block): Likewise.
-       
-       * arcfour.c (encrypt_stream): Likewise.
-
-       * rijndael.c (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func) Removed function.  
-       
-       * twofish.c (gnupgext_version, func_table): Removed definitions.
-       (gnupgext_enum_func) Removed function.  
-
-       * cast5.c (CIPHER_ALGO_CAST5): Removed.
-
-       * blowfish.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros.
-       (CIPHER_ALGO_BLOWFISH): Removed symbol.
-       * cast5.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Likewise.
-       * des.c (selftest_failed): Removed.
-       (initialized): New variable.
-       (do_des_setkey): Run selftest, if not yet done.
-       (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros.
-
-       * arcfour.c (_gcry_arcfour_get_info): Removed function.
-       * blowfish.c (_gcry_blowfish_get_info): Removed function.
-       * cast5.c (_gcry_cast5_get_info): Removed function.
-       * des.c (_gcry_des_get_info): Removed function.
-       * rijndael.c (_gcry_rijndael_get_info): Removed function.
-       * twofish.c (_gcry_twofish_get_info): Removed function.
-
-       * arcfour.c (cipher_spec_arcfour): New variable.
-       * twofish.c (cipher_spec_twofish, cipher_spec_twofish128): New
-       variables.
-       * rijndael.c (cipher_spec_aes, cipher_spec_aes192,
-       cipher_spec256): New variables.
-       * des.c (cipher_spec_des, cipher_spec_tripledes): New variables.
-       * cast5.c (cipher_spec_cast5): New variable.
-       * blowfish.c (cipher_spec_blowfish): Likewise.
-       
-       * twofish.c: Do not include "dynload.h".
-       * rijndael.c: Likewise.
-       * des.c: Likewise.
-       * cast5.c: Likewise.
-       * blowfish.c: Likewise.
-       * cipher.c: Likewise.
-       * crc.c: Likewise.
-       * md4.c: Likewise.
-       * md5.c: Likewise.
-       * md.c: Likewise.
-       * pubkey.c: Likewise.
-       * rijndael.c: Likewise.
-       * sha1.c: Likewise.
-       * sha256.c: Likewise.
-
-       * arcfour.c: Include "cipher.h".
-       * twofish.c: Likewise.
-       * rijndael.c: Likewise.
-       * des.c: Likewise.
-       * cast5.c: Likewise.
-       * blowfish.c: Likewise.
-
-       * twofish.c (twofish_setkey): Declared argument `key' const.
-       (twofish_encrypt): Declared argument `inbuf' const.
-       (twofish_decrypt): Likewise.
-
-       * rijndael.c (rijndael_setkey): Declared argument `key' const.
-       (rijndael_encrypt): Declared argument `inbuf' const.
-       (rijndael_decrypt): Likewise.
-
-       * des.c (do_des_setkey): Declared argument `key' const.
-       (do_tripledes_setkey): Likewise.
-       (do_des_encrypt): Declared argument `inbuf' const.
-       (do_des_decrypt): Likewise.
-       (do_tripledes_encrypt): Likewise.
-       (do_tripledes_decrypt): Likewise.
-
-       * cast5.c (encrypt_block): Declared argument `inbuf' const.
-       (decrypt_block): Likewise.
-       (cast_setkey): Declared argument `key' const.
-
-       * blowfish.c (do_bf_setkey): Declared argument `key' const.
-       (encrypt_block): Declared argument `inbuf' const.
-       (encrypt_block): Likewise.
-
-       
-
-       * cipher.c: Remove CIPHER_ALGO_DUMMY related code.
-       Removed struct cipher_table_s.
-       Changed definition of cipher_table.
-       Removed definition of disabled_algos.
-       (ciphers_registered, default_ciphers_registered): New variables.
-       (REGISTER_DEFAULT_CIPHERS): New macro.
-       (dummy_setkey): Declared argument `key' const.
-       (dummy_encrypt_block): Declared argument `inbuf' const.
-       (dummy_encrypt_block): Likewise.
-       (dummy_encrypt_stream): Likewise.
-       (dummy_encrypt_stream): Likewise.
-       (dummy_setkey): Use `unsigned char' instead of `byte'.
-       (dummy_encrypt_block): Likewise.
-       (dummy_decrypt_block): Likewise.
-       (dummy_encrypt_stream): Likewise.
-       (dummy_decrypt_stream): Likewise.
-       (gcry_cipher_register_default): New function.
-       (gcry_cipher_lookup_func_id): New function.
-       (gcry_cipher_lookup_func_name): New function.
-       (gcry_cipher_lookup_id): New function.
-       (gcry_cipher_lookup_name): New function.
-       (gcry_cipher_id_new): New function.
-       (gcry_cipher_register): New function.
-       (gcry_cipher_unregister): New function.
-       (setup_cipher_table): Removed function.
-       (load_cipher_modules): Removed function.
-       (gcry_cipher_map_name): Adjusted to use new module management.
-       (cipher_algo_to_string): Likewise.
-       (disable_cipher_algo): Likewise.
-       (check_cipher_algo): Likewise.
-       (cipher_get_keylen): Likewise.
-       (cipher_get_blocksize): Likewise.
-       (gcry_cipher_open): Likewise.
-       (struct gcry_cipher_handle): Replaced members algo, algo_index,
-       blocksize, setkey, encrypt, decrypt, stencrypt, stdecrypt with one
-       member: cipher.
-       (gcry_cipher_open): Adjusted code for new handle structure.
-       (cipher_setkey): Likewise.
-       (cipher_setiv): Likewise.
-       (cipher_reset): Likewise.
-       (do_ecb_encrypt): Likewise.
-       (do_ecb_decrypt): Likewise.
-       (do_cbc_encrypt): Likewise.
-       (do_cbc_decrypt): Likewise.
-       (do_cfb_encrypt): Likewise.
-       (do_cfb_decrypt): Likewise.
-       (do_ctr_encrypt): Likewise.
-       (cipher_encrypt): Likewise.
-       (gcry_cipher_encrypt): Likewise.
-       (cipher_decrypt): Likewise.
-       (gcry_cipher_decrypt): Likewise.
-       (cipher_sync): Likewise.
-       (gcry_cipher_ctl): Likewise.
-
-       * pubkey.c: Removed struct pubkey_table_s.
-       Changed definition of pubkey_table.
-       Removed definition of disabled_algos.
-       (pubkeys_registered, default_pubkeys_registered): New variables.
-       (REGISTER_DEFAULT_PUBKEYS): New macro.
-       (setup_pubkey_table): Removed function.
-       (load_pubkey_modules): Removed function.
-       (gcry_pubkey_register_default): New function.
-       (gcry_pubkey_lookup_func_id): New function.
-       (gcry_pubkey_lookup_func_name): New function.
-       (gcry_pubkey_lookup_id): New function.
-       (gcry_pubkey_lookup_name): New function.
-       (gcry_pubkey_id_new): New function.
-       (gcry_pubkey_register): New function.
-       (gcry_pubkey_unregister): New function.
-       (gcry_pk_map_name): Adjusted to use new module management.
-       (gcry_pk_algo_name): Likewise.
-       (disable_pubkey_algo): Likewise.
-       (check_pubkey_algo): Likewise.
-       (pubkey_get_npkey): Likewise.
-       (pubkey_get_nskey): Likewise.
-       (pubkey_get_nsig): Likewise.
-       (pubkey_get_nenc): Likewise.
-       (pubkey_generate): Likewise.
-       (pubkey_check_secret_key): Likewise.
-       (pubkey_encrypt): Likewise.
-       (pubkey_decrypt): Likewise.
-       (pubkey_sign): Likewise.
-       (pubkey_verify): Likewise.
-       (gcry_pk_get_nbits): Likewise.
-       (gcry_pk_algo_info): Likewise.
-
-       * md.c: Removed struct md_digest_list_s.
-       (digest_list): Changed definition.
-       (digests_registered, default_digests_registered): New variables.
-       (REGISTER_DEFAULT_DIGESTS): New macro.
-       (new_list_item): Removed function.
-       (setup_md_table): Removed function.
-       (load_digest_module): Removed function.
-       (gcry_digest_register_default): New function.
-       (gcry_digest_lookup_func_id): New function.
-       (gcry_digest_lookup_func_name): New function.
-       (gcry_digest_lookup_id): New function.
-       (gcry_digest_lookup_name): New function.
-       (gcry_digest_id_new): New function.
-       (gcry_digest_register): New function.
-       (gcry_digest_unregister): New function.
-       (GcryDigestEntry): New type.
-       (struct gcry_md_context): Adjusted type of `list'.
-       (gcry_md_map_name): Adjusted to use new module management.
-       (digest_algo_to_string): Likewise.
-       (check_digest_algo): Likewise.
-       (md_enable): Likewise.
-       (md_digest_length): Likewise.
-       (md_asn_oid): Likewise.
-
-2003-04-07  Moritz Schulte  <moritz@g10code.com>
-
-       * pubkey.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA,
-       PUBKEY_ALGO_RSA with GCRY_PK_RSA and PUBKEY_ALGO_ELGAMAL with
-       GCRY_PK_ELG.
-
-       * dsa.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA.
-
-2003-04-01  Moritz Schulte  <moritz@g10code.com>
-
-       * des.c: Removed checks for GCRY_CIPHER_3DES and GCRY_CIPHER_DES.
-
-2003-03-31  Moritz Schulte  <moritz@g10code.com>
-
-       * tiger.c (tiger_get_info): Do not declare static.
-       * sha256.c (sha256_get_info): Likewise.
-       * sha1.c (sha1_get_info): Likewise.
-       * rmd160.c (rmd160_get_info): Likewise.
-       * md5.c (md5_get_info): Likewise.
-       * md4.c (md4_get_info): Likewise.
-       * crc.c (crc_get_info): Likewise.
-
-       * md.c (load_digest_module): Call setup_md_table during
-       initialization.
-       (new_list_item): Link new element into digest_list.
-
-       * cipher.c (do_ctr_decrypt): Made do_ctr_encrypt act as a wrapper
-       for do_ctr_encrypt, since these functions are identical.
-
-2003-03-30  Simon Josefsson  <jas@extundo.com>
-
-       * cipher.c (struct gcry_cipher_handle): Add counter field.
-       (gcry_cipher_open): Add CTR.
-       (cipher_reset): Clear counter field.
-       (do_ctr_encrypt, do_ctr_decrypt): New functions.
-       (cipher_encrypt, cipher_decrypt): Call CTR functions.
-       (gcry_cipher_ctl): Add SET_CTR to set counter.
-
-2003-03-30  Moritz Schulte  <moritz@g10code.com>
-
-       * rsa.c (_gcry_rsa_blind): New function.
-       (_gcry_rsa_unblind): New function.
-       (_gcry_rsa_decrypt): Use _gcry_rsa_blind and _gcry_rsa_decrypt.
-
-2003-03-26  Moritz Schulte  <moritz@g10code.com>
-
-       * dynload.c (_gcry_enum_gnupgext_pubkeys): Adjust `encrypt' and
-       `decrypt' function arguments.
-       (_gcry_enum_gnupgext_pubkeys): Likewise.
-       * dynload.h: Likewise.
-       
-       * pubkey.c (dummy_decrypt): Add argument: int flags.
-       (dummy_encrypt): Likewise.
-
-       * elgamal.c (_gcry_elg_encrypt): Add argument: int flags.
-       (_gcry_elg_decrypt): Likewise.
-
-       * rsa.c (_gcry_rsa_encrypt): Add argument: int flags.
-       (_gcry_rsa_decrypt): Likewise.
-
-       * pubkey.c: Add `flags' argument to members `encrypt' and
-       `decrypt' of struct `pubkey_table_s'.
-
-       * rsa.h: Add `flags' argument to function declarations.
-       * elgamal.h: Likewise.
-
-       * pubkey.c (sexp_data_to_mpi): New variable: int parsed_flags.
-       (sexp_data_to_mpi): Set `parsed_flags'.
-       (sexp_data_to_mpi): New argument: int *flags.
-       (gcry_pk_encrypt): New variable: int flags.
-       (gcry_pk_encrypt): Pass `flags' to pubkey_encrypt.
-       (pubkey_encrypt): New variable: int flags.
-       (pubkey_encrypt): Pass `flags' to pubkey encrypt function.
-       (pubkey_decrypt): Likewise.
-       (pubkey_decrypt): Pass `flags' to pubkey encrypt function.
-       (gcry_pk_encrypt): Include `flags' s-exp in return list.
-       (sexp_to_enc): New argument: int *flags.
-       (gcry_pk_decrypt): New variable: int flags.
-       (gcry_pk_decrypt): Pass `flags' to pubkey_decrypt.
-       (sexp_to_enc): New variable: int parsed_flags.
-       (sexp_to_enc): Set `parsed_flags'.
-
-2003-03-22  Simon Josefsson  <jas@extundo.com>
-
-       * cipher.c (gcry_cipher_open, do_cbc_encrypt)
-       (gcry_cipher_encrypt): Support GCRY_CIPHER_CBC_MAC.
-       (gcry_cipher_ctl): Support GCRYCTL_SET_CBC_MAC.
-
-2003-03-19  Werner Koch  <wk@gnupg.org>
-
-       * primegen.c (gen_prime): New args EXTRA_CHECK and EXTRA_CHECK_ARG
-       to allow for a user callback.  Changed all callers.
-       (_gcry_generate_secret_prime)
-       (_gcry_generate_public_prime): Ditto, pass them to gen_prime.
-       * rsa.c (check_exponent): New.
-       (generate): Use a callback to ensure that a given exponent is
-       actually generated.
-
-2003-03-12  Moritz Schulte  <moritz@g10code.com>
-
-       * primegen.c: Initialize `no_of_small_prime_numbers' statically.
-       (gen_prime): Remove calculation of `no_of_small_prime_numbers'.
-
-2003-03-03  Moritz Schulte  <moritz@g10code.com>
-
-       * md.c (gcry_md_ctl): Rewritten to use same style like the other
-       functions dispatchers.
-
-2003-03-02  Moritz Schulte  <moritz@g10code.com>
-
-       * cipher.c (struct gcry_cipher_handle): New member: algo_index.
-       (gcry_cipher_open): Allocate memory for two cipher contexts.
-       Initialize algo_index.
-       (cipher_setkey): Duplicate context into reserved memory.
-       (cipher_reset): New function, which resets the context and clear
-       the IV.
-       (gcry_cipher_ctl): Call cipher_reset.
-
-2003-02-23  Moritz Schulte  <moritz@g10code.com>
-
-       * cipher.c: Remove (bogus) `digitp' macro definition.
-       * md.c: Likewise.
-
-       * blowfish.c (burn_stack): Removed.
-       * arcfour.c (burn_stack): Likewise.
-       * cast5.c (burn_stack): Likewise.
-       * des.c (burn_stack): Likewise.
-       * md4.c (burn_stack): Likewise.
-       * md5.c (burn_stack): Likewise.
-       * random.c (burn_stack): Likewise.
-       * rijndael.c (burn_stack): Likewise.
-       * rmd160.c (burn_stack): Likewise.
-       * sha1.c (burn_stack): Likewise.
-       * sha256.c (burn_stack): Likewise.
-       * tiger.c (burn_stack): Likewise.
-       * twofish.c (burn_stack): Likewise.
-
-       * blowfish.c: Changed all occurences of burn_stack to
-       _gcry_burn_stack.
-       * arcfour.c: Likewise.
-       * cast5.c: Likewise.
-       * des.c: Likewise.
-       * md4.c: Likewise.
-       * md5.c: Likewise.
-       * random.c: Likewise.
-       * rijndael.c: Likewise.
-       * rmd160.c: Likewise.
-       * sha1.c: Likewise.
-       * sha256.c: Likewise.
-       * tiger.c: Likewise.
-       * twofish.c: Likewise.
-
-       * arcfour.c (_gcry_arcfour_get_info): Use GCRY_CIPHER_ARCFOUR
-       instead of hard-coded value `301'.
-
-2003-01-24  Werner Koch  <wk@gnupg.org>
-
-       * random.c (_gcry_register_random_progress): New.
-       (_gcry_random_progress): New.
-
-       * rndlinux.c (gather_random): Call the random progress function. 
-
-2003-01-23  Werner Koch  <wk@gnupg.org>
-
-       * rsa.c (generate): New arg USE_E to request a specific public
-       exponent.
-       (_gcry_rsa_generate): Ditto.
-       * elgamal.c (_gcry_elg_generate): Must add an dummy argument
-       instead of USE_E.
-       * dsa.c (_gcry_dsa_generate): Ditto.
-       * pubkey.c (dummy_generate): Ditto.
-       (pubkey_generate): Add USE_E arg and pass it down.
-       (gcry_pk_genkey): Detect "rsa-use-e" parameter and pass it to generate.
-
-       * pubkey.c (sexp_to_enc): New arg RET_MODERN.
-       (gcry_pk_decrypt): Make use of it to return a real S-expression.
-       Return better error codes.
-       (gcry_pk_verify): Return better error codes.
-
-2003-01-21  Werner Koch  <wk@gnupg.org>
-
-       * random.c (gcry_random_add_bytes): Add QUALITY argument, let
-       function return an error code and disable its core for now.
-
-2003-01-21  Timo Schulz  <twoaday@freakmail.de>
-
-       * random.c (gcry_random_add_bytes): New. Function to add external
-       random to the pool.
-       
-2003-01-20  Simon Josefsson  <jas@extundo.com>
-
-       * crc.c: New.
-       * Makefile.am (EXTRA_PROGRAMS, EXTRA_crc_SOURCES): Add crc.c.
-       * md.c (gcry_md_get_algo_dlen): Add values for CRC.
-
-2003-01-20  Werner Koch  <wk@gnupg.org>
-
-       * sha256.c: New.
-       * bithelp.h (ror): New.
-       * Makfile.am: Add sha256.c.
-       * md.c (oid_table): Add values for SHA256 et al.
-       (gcry_md_get_algo_dlen): Likewise
-
-2003-01-20  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (gcry_pk_get_keygrip): Implemented keygrips for DSA
-       and ElGamal.
-
-2003-01-17  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c (gcry_cipher_encrypt): Reworked so that the output will
-       never contain the plaintext even if the caller did not checked the
-       return value.
-
-       * md.c (gcry_md_get_algo): Changed error code to GCRYERR_GENERAL
-       because we don't have an invalid md algo but no algorithm enabled.
-
-       * pubkey.c (gcry_pk_genkey): Changed error code for bounds check
-       of table parameters to GCRYERR_INTERNAL.
-
-       * md.c (gcry_md_open): Partly reverted Timo's change from
-       2002-10-10 by removing the check for the algorithm.  An algorithm
-       of 0 is allowed and anyway we should not double check it or check
-       it using a different function.  Also fixed the flags check.
-
-       * pubkey.c (gcry_pk_encrypt): Make sure that R_CIPH points to NULL
-       on error.
-       (gcry_pk_decrypt): Ditto for R_PLAIN.
-       (gcry_pk_sign): Ditto for R_SIG.
-       (gcry_pk_genkey): Ditto for R_KEY.
-
-2003-01-16  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_write): Changed 2nd argument type to void*.
-       (gcry_md_hash_buffer): Changed type of boths buffers to void*.
-       (gcry_md_setkey): Changed 2nd argument type to void*.
-
-2003-01-15  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (sexp_data_to_mpi): New.  This handles pkcs1 padding.
-       (gcry_pk_sign, gcry_pk_verify): Use it here.
-       (gcry_pk_encrypt): And here.
-       (pubkey_verify): Add debug code.
-       (sexp_to_enc): Handle flags in the input and return the pkcs1 flag
-       in a new parameter.
-       (gcry_pk_decrypt): Prepare for future pkcs1 handling.
-
-2002-12-19  Werner Koch  <wk@gnupg.org>
-
-       * random.c (_gcry_random_initialize): New.
-
-2002-12-16  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c: Added a Teletrust specific OID for 3DES.
-
-2002-12-12  Werner Koch  <wk@gnupg.org>
-
-       * md.c: Added another oddball OIW OID (sha-1WithRSAEncryption).
-
-2002-11-23  Werner Koch  <wk@gnupg.org>
-
-       * md.c (load_digest_module): Enlarged checked_algos bitmap.
-       * md4.c (func_table):  Fixed entry for md4. 
-       Both by Simon Josephson.
-       (transform): Copy data to get the alignment straight. Tested only
-       on i386.
-
-2002-11-10  Simon Josefsson  <jas@extundo.com>
-
-       * cipher.c (gcry_cipher_open): Don't reject CTS flag.
-       (do_cbc_encrypt, do_cbc_decrypt, cipher_encrypt) 
-       (gcry_cipher_encrypt, cipher_decrypt)
-       (gcry_cipher_decrypt): Support CTS flag.
-       (gcry_cipher_ctl): Toggle CTS flag.
-
-2002-11-10  Werner Koch  <wk@gnupg.org>
-
-       * md4.c: New. By Simon Josefsson.
-       * Makefile.am (EXTRA_PROGRAMS): Add md4.c. 
-       * md.c (oid_table,gcry_md_get_algo_dlen): MD4 support. 
-
-2002-10-14  Werner Koch  <wk@gnupg.org>
-
-       * arcfour.c (do_encrypt_stream): Don't use increment op when
-       assigning to the same variable.
-
-2002-10-10  Timo Schulz  <ts@winpt.org>
-
-       * pubkey.c (gcry_pk_genkey): Check boundaries.
-       
-       * md.c (gcry_md_open): Check that algo is available and only
-       valid flag values are used.
-       (gcry_md_get_algo): Add error handling.
-       
-2002-09-26  Werner Koch  <wk@gnupg.org>
-
-       * md.c: Include an OID for TIGER.
-       * tiger.c (tiger_get_info): Use a regular OID.
-
-2002-09-17  Werner Koch  <wk@gnupg.org>
-
-       * random.c: Replaced mutex.h by the new ath.h.  Changed all calls.
-
-2002-09-16  Werner Koch  <wk@gnupg.org>
-
-       * arcfour.c (do_encrypt_stream): Use register modifier and modulo.
-       According to Nikos Mavroyanopoulos this increases perfromace on
-       i386 system noticable.  And I always tought gcc is clever enough.
-       * md5.c (transform): Use register modifier.
-       * rmd160.c (transform): Ditto.
-       * sha1.c (transform): Ditto.  We hope that there are 6 free registers.
-       * random.c (gcry_randomize): Rewrote to avoid malloc calls.
-
-       * rndlinux.c (gather_random): Replaced remaining fprintfs by log_*.
-       * arcfour.c (do_arcfour_setkey): Ditto.
-       * twofish.c (do_twofish_setkey): Ditto.
-       * rndegd.c (gather_random): Ditto.
-       * rijndael.c (do_setkey): Ditto.
-       * random.c (_gcry_random_dump_stats): Ditto. 
-       * primegen.c (_gcry_generate_elg_prime): Ditto.
-       * des.c (_gcry_des_get_info): Ditto.
-       * cast5.c (do_cast_setkey): Ditto.
-       * blowfish.c (do_bf_setkey): Ditto.
-
-2002-08-26  Werner Koch  <wk@gnupg.org>
-
-       * des.c (weak_keys): Fixed one entry in the table and compared
-       all entries against the literature.
-       (selftest): Checksum the weak key table.
-
-2002-08-21  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c: Enable keygrip calculation for "openpgp-rsa".
-
-2002-08-17  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c (setup_cipher_table): Don't overwrite the DES entry
-       with the entry for DUMMY.
-
-2002-08-14  Werner Koch  <wk@gnupg.org>
-
-       * des.c (do_des_setkey,do_des_encrypt, do_des_decrypt): New.
-       (_gcry_des_get_info): Support plain old DES.
-       * cipher.c (setup_cipher_table): Put DES into the table.
-
-2002-07-25  Werner Koch  <wk@gnupg.org>
-
-       * rndunix.c (_gcry_rndunix_constructor): Prefixed with _gcry_.
-       Noted by Stephan Austermuehle.
-
-2002-07-08  Timo Schulz  <ts@winpt.org>
-
-       * rndw32.c: Replaced the m_ memory functions with the real 
-       gcry_ functions. Renamed all g10_ prefixed functions to log_.
-       
-2002-06-12  Werner Koch  <wk@gnupg.org>
-
-       * rsa.c (generate): Use e = 65537 for now.
-
-2002-06-11  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (gcry_pk_get_keygrip): Allow a "protected-private-key".
-
-2002-06-05  Timo Schulz  <ts@winpt.org>
-
-       * cipher.c (gcry_cipher_encrypt, gcry_cipher_decrypt):
-       Check that the input size is a multiple of the blocksize.
-       
-2002-05-23  Werner Koch  <wk@gnupg.org>
-
-       * md.c (oid_table): Add an rsadsi OID for MD5.
-
-2002-05-21  Werner Koch  <wk@gnupg.org>
-
-       * primegen.c, elgamal.c, dsa.c (progress): Do not print anything
-       by default.  Pass an extra identifying string to the callback and
-       reserved 2 argumenst for current and total counters.  Changed the
-       register function prototype.
-
-2002-05-17  Werner Koch  <wk@gnupg.org>
-
-       * rndegd.c (rndegd_constructor): Fixed name of register function
-       and prefixed the function name with _gcry_.
-       * rndw32.c (rndw32_constructor): Ditto.
-       * tiger.c (tiger_constructor): Ditto.
-
-       * Makefile.am: Removed all dynamic loading stuff.
-       * dynload.c: Ditto. Now only used for the constructor system.
-
-2002-05-15  Werner Koch  <wk@gnupg.org>
-
-       * random.c (gcry_random_bytes,gcry_random_bytes_secure)
-       (gcry_randomize): Make sure we are initialized.
-
-2002-05-14  Werner Koch  <wk@gnupg.org>
-
-       Changed license of most files to the LGPL.
-
-2002-05-02  Werner Koch  <wk@gnupg.org>
-
-       * random.c (_gcry_fast_random_poll): Initialize the module so the
-       mutex can be used.
-
-       * primegen.c (small_prime_numbers): Moved table from smallprime.c
-       * smallprime.c: File removed.
-
-       * des.c (leftkey_swap, rightkey_swap, working_memcmp): Made static.
-
-       * cipher.c (gcry_cipher_map_name): Map "RIJNDAEL" to "AES".
-       * rijndael.c (rijndael_get_info): We do only support a 128 bit
-       blocksize so it makes sense to change the algorithm strings to
-       AES.
-
-       * tiger.c (tiger_final): Removed superfluous token pasting operators.
-       * md5.c (md5_final): Ditto.
-
-2002-04-30  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c: Fixed list of copyright years.
-
-2002-03-18  Werner Koch  <wk@gnupg.org>
-
-       * random.c (initialize): Initialize the new pool lock mutex.
-       (_gcry_fast_random_poll): Add locking and moved main
-       code out to...
-       (do_fast_random_poll): new function.
-       (read_pool): Use the new function here.
-       (get_random_bytes): Add locking.
-       (_gcry_update_random_seed_file): Ditto.
-
-2002-03-11  Werner Koch  <wk@gnupg.org>
-
-       * md.c: Add rsaSignatureWithripemd160 to OID table.
-
-2002-02-20  Werner Koch  <wk@gnupg.org>
-
-       * sha1.c: Removed a left over comment note.  The code has been
-       rewritten from scratch in 1998.  Thanks to Niels Möller for
-       reporting this misleading comment.
-
-2002-02-18  Werner Koch  <wk@gnupg.org>
-
-       * rndunix.c (rndunix_constructor): Use the the new prefixed
-       function name.  Reported by Jordi Mallach.
-
-2002-02-10  Werner Koch  <wk@gnupg.org>
-
-       * random.c (mix_pool): Carry an extra failsafe_digest buffer
-       around to make the function more robust.
-
-2002-02-08  Werner Koch  <wk@gnupg.org>
-
-       * random.c (add_randomness): Xor new data into the pool and not
-       just copy it.  This avoids any choosen input attacks which are not
-       serious in our setting because an outsider won't be able to mix
-       data in and even then we keep going with a PRNG.  Thanks to Stefan
-       Keller for pointing this out.
-
-2002-01-04  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (gcry_pk_genkey): Do not release skey - it is static.
-
-       * primegen.c (gen_prime): Of course we should use set_bit
-       and not set_highbit to set the second high bit.
-
-2001-12-18  Werner Koch  <wk@gnupg.org>
-
-       * rsa.c (generate): Loop until we find the exact modulus size.
-       Changed the exponent to 41.
-       (rsa_get_info): s/usage/r_usage/ to avoid shadow warnings.
-       * primegen.c (gen_prime): Set 2 high order bits for secret primes.
-
-       * Makefile.am (DISTCLEANFILES): Include construct.c.
-
-2001-12-17  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (gcry_pk_get_keygrip): New - experimental.
-
-2001-12-11  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c: Added OIDs for AES.
-       (gcry_cipher_mode_from_oid): New.
-       (gcry_cipher_map_name): Moved OID search code to ..
-       (search_oid): .. new function.
-
-2001-12-10  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (gcry_pk_encrypt): Find the signature algorithm by name
-       and not by number.
-       
-       * pubkey.c (gcry_pk_encrypt,gcry_pk_decrypt,gcry_pk_sign)
-       (gcry_pk_verify,gcry_pk_testkey, gcry_pk_genkey)
-       (gcry_pk_get_nbits): Release the arrays.  Noted by Nikos
-       Mavroyanopoulos.
-
-2001-12-06  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c (gcry_cipher_map_name): Look also for OIDs prefixed
-       with "oid."  or "OID.".
-
-2001-12-05  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c (algo_info_table): Fixed entry for openpgp-rsa. 
-
-2001-11-24  Werner Koch  <wk@gnupg.org>
-
-       * pubkey.c: Added the rsaEncryption OID to the tables.
-       (sexp_to_key): Add an arg to return the index of the algorithm,
-       changed all callers.
-       (gcry_pk_sign): Find the signature algorithm by name and not by
-       number.
-       (gcry_pk_get_nbits): Fixed so that we can now really pass a secret
-       key to get the result.
-       
-       * md.c (gcry_md_map_name): Look also for OIDs prefixed with "oid."
-       or "OID." so that an OID string can be used as an S-Exp token.
-
-2001-11-20  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_map_name): Lookup by OID if the the name begins
-       with a digit.
-       (oid_table): New.
-       
-2001-11-16  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_info): New operator GCRYCTL_IS_ALGO_ENABLED.
-
-2001-11-07  Werner Koch  <wk@gnupg.org>
-
-       * md.c (gcry_md_hash_buffer): Close the handle which was left open
-       for algorithms other than rmd160.
-
-2001-08-08  Werner Koch  <wk@gnupg.org>
-
-       * rndw32.c (gather_random): Use toolhelp in addition to the NT
-       gatherer for Windows2000.  Suggested by Sami Tolvanen.
-
-       * random.c (read_pool): Fixed length check, this used to be one
-       byte to strict.  Made an assert out of it because the caller has
-       already made sure that only poolsize bytes are requested.
-       Reported by Marcus Brinkmann.
-
-2001-08-03  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c (cipher_encrypt, cipher_decrypt): Prepare to return
-       errors. We have to change the interface to all ciphers to make
-       this really work but we should do so to prepare for hardware
-       encryption modules.
-       (gcry_cipher_encrypt, gcry_cipher_decrypt): Return the error and
-       set lasterr. 
-       (gcry_cipher_ctl): Make sure that errors from setkey are returned.
-
-2001-08-02  Werner Koch  <wk@gnupg.org>
-
-       * rndlinux.c (gather_random): casted a size_t arg to int so that
-       the format string is correct.  Casting is okay here and avoids
-       translation changes. 
-
-       * random.c (fast_random_poll): Do not check the return code of
-       getrusage.
-
-       * rndunix.c: Add a signal.h header to avoid warnings on Solaris 7
-       and 8.
-
-       * tiger.c (print_abc,print_data): Removed.
-
-       * rijndael.c, des.c, blowfish.c, twofish.c, cast5.c, arcfour.c
-       (burn_stack): New.  Add wrappers for most functions to be able to
-       call burn_stack after the function invocation. This methods seems
-       to be the most portable way to zeroise the stack used. It does
-       only work on stack frame based machines but it is highly portable
-       and has no side effects.  Just setting the automatic variables at
-       the end of a function to zero does not work well because the
-       compiler will optimize them away - marking them as volatile would
-       be bad for performance.
-       * md5.c, sha1.c, rmd160.c, tiger.c (burn_stack): Likewise.
-       * random.c (burn_stack): New.
-       (mix_pool): Use it here to burn the stack of the mixblock function.
-
-       * primegen.c (_gcry_generate_elg_prime): Freed q at 3 places.
-       Thanks to Tommi Komulainen.
-
-       * arcfour.c (arcfour_setkey): Check the minimim keylength against
-       bytes and not bits.
-       (selftest): Must reset the key before decryption. 
-
-2001-05-31  Werner Koch  <wk@gnupg.org>
-
-       * sha1.c (sha1_init): Made static.
-
-        Changed all g10_ prefixed function names as well as some mpi_
-       function names to cope with the introduced naming changes.
-       
-       * md.c (prepare_macpads): Made key const.
-
-2001-05-28  Werner Koch  <wk@gnupg.org>
-
-       * rndegd.c (gather_random): Removed the use of tty_printf.
-
-2001-03-29  Werner Koch  <wk@gnupg.org>
-
-       * md5.c (md5_final): Fixed calculation of hashed length.  Thanks
-       to disastry@saiknes.lv for pointing out that it was horrible wrong
-       for more than 512MB of input.
-       * sha1.c (sha1_final): Ditto.
-       * rmd160.c (rmd160_final): Ditto.
-       * tiger.c (tiger_final): Ditto.
-
-       * blowfish.c (encrypt,do_encrypt): Changed name to do_encrypt to
-       avoid name clashes with an encrypt function in stdlib.h of
-       Dynix/PIX.  Thanks to Gene Carter.
-       * elgamal.c (encrypt,do_encrypt): Ditto.
-
-       * twofish.c (gnupgext_enum_func): Use only when when compiled as a
-       module.
-       * rijndael.c (gnupgext_enum_func): Ditto.
-
-       * tiger.c (tiger_get_info): Return "TIGER192" and not just
-       "TIGER".  By Edwin Woudt.
-       
-       * random.c: Always include time.h - standard requirement.  Thanks
-       to James Troup.
-
-       * rndw32.c: Fixes to the macros.
-
-2001-01-11  Werner Koch  <wk@gnupg.org>
-
-       * cipher.c (cipher_encrypt,gcry_cipher_encrypt): Use blocksize and
-       not 8.
-
-2000-12-19  Werner Koch  <wk@gnupg.org>
-
-       Major change:
-       Removed all GnuPG stuff and renamed this piece of software
-       to gcrypt. 
-
-2000-11-14  Werner Koch  <wk@gnupg.org>
-
-       * dsa.c (test_keys): Replaced mpi_alloc by gcry_mpi_new and
-       mpi_free by gcry_mpi_release.
-       * elgamal.c (test_keys,generate): Ditto, also for mpi_alloc_secure.
-       * rsa.c (test_keys,generate,rsa_verify): Ditto.
-       * primegen.c (generate_elg_prime): Ditto.
-       (gen_prime): Ditto and removed nlimbs.
-
-       * rsa.c (generate): Allocate 2 more vars in secure memory.
-
-       * Makefile.am (OMIT_DEPENDENCIES): Hack to work around dependency
-       problems.
-
-2000-10-09  Werner Koch  <wk@gnupg.org>
-
-       * arcfour.c, arcfour.h: New.
-       * cipher.c (cipher_encrypt, cipher_decrypt): Add stream mode.
-       (setup_cipher_table): Add Arcfour.
-       (gcry_cipher_open): Kludge to allow stream mode.
-
-Wed Oct  4 13:16:18 CEST 2000  Werner Koch  <wk@openit.de>
-
-        * sha1.c (transform): Use rol() macro.  Actually this is not needed
-        for a newer gcc but there are still aoter compilers.
-
-        * rsa.c (test_keys): Use new random function. 
-
-        * md.c (gcry_md_setkey): New function to overcome problems with
-        const conflics.  
-        (gcry_md_ctl): Pass set key to the new functions.
-
-        * rijndael.c: New.
-        * cipher.c: Add Rijndael support.
-
-Mon Sep 18 16:35:45 CEST 2000  Werner Koch  <wk@openit.de>
-
-        * rndlinux.c (open_device): Loose random device checking.
-        By Nils Ellmenreich.
-
-        * random.c (fast_random_poll): Check ENOSYS for getrusage.
-        * rndunix.c:  Add 2 sources for QNX. By Sam Roberts.
-
-        * pubkey.c (gcry_pk_algo_info): Add GCRYCTL_GET_ALGO_USAGE.
-
-        * rsa.c: Changed the comment about the patent.
-        (secret): Speed up by using the CRT.  For a 2k keys this
-        is about 3 times faster.
-        (stronger_key_check): New but unused code to check the secret key.
-        * Makefile.am: Included rsa.[ch].
-        * pubkey.c: Enabled RSA support.
-        (pubkey_get_npkey): Removed RSA workaround.
-
-Mon Jul 31 10:04:47 CEST 2000  Werner Koch  <wk@openit.de>
-
-  * pubkey.c: Replaced all gcry_sexp_{car,cdr}_{data,mpi} by the new
-  gcry_sexp_nth_{data,mpi} functions.
-
-Tue Jul 25 17:44:15 CEST 2000  Werner Koch  <wk@openit.de>
-
-  * pubkey.c (exp_to_key,sexp_to_sig,sexp_to_enc,gcry_pk_encrypt,
-    gcry_pk_decrypt,gcry_pk_sign,gcry_pk_genkey): Changed to work with
-    the new S-Exp interface.
-
-Mon Jul 17 16:35:47 CEST 2000  Werner Koch  <wk@>
-
-  * random.c (gather_faked): Replaced make_timestamp by time(2) again.
-
-Fri Jul 14 19:38:23 CEST 2000  Werner Koch  <wk@>
-
-  * md.c (gcry_md_ctl): Support GCRYCTL_{START,STOP}_DUMP.
-
-  * Makefile.am: Never compile mingw32 as module.
-
-  * Makefile.am: Tweaked module build and removed libtool
-
-  * Makefile.am:  Replaced -O1 by -O. Suggested by Alec Habig.
-
-  * elgamal.c (sign): Removed inactive code.
-
-  * rsa.c, rsa.h: New based on the old module version (only in CVS for now).
-  * pubkey.c (setup_pubkey_table): Added commented support for RSA.
-
-  * rndunix.c (waitpid): New. For UTS 2.1.  All by Dave Dykstra.
-  (my_popen): Do the FD_CLOEXEC only if it is available
-  (start_gatherer): Cope with missing _SC_OPEN_MAX
-
-  * rndunix.c: Add some more headers for QNX. By Sam Roberts.
-
-  * rndegd.c (gather_random): Shortcut level 0.
-  * rndunix.c (gather_random): Ditto.
-  * rndw32.c (gather_random): Ditto.
-
-  * rndw32.c: Replaced with code from Cryptlib and commented the old stuff.
-  * rndw32.c: Add some debuging code enabled by an environment variable.
-
-  * random.c (read_seed_file): Binary open for DOSish system
-  (update_random_seed_file): Ditto.
-  * random.c [MINGW32]: Include process.h for getpid.
-  * random.c (fast_random_poll): Add clock_gettime() as fallback for
-  system which support this POSIX.4 fucntion. By Sam Roberts.
-
-  * random.c (read_seed_file): Removed the S_ISLNK test becuase it
-  is already covered by !S_ISREG and is not defined in Unixware.
-  Reported by Dave Dykstra.
-  (update_random_seed_file): Silently ignore update request when pool
-  is not filled.
-
-  * random.c (read_seed_file): New.
-  (set_random_seed_file): New.
-  (read_pool): Try to read the seeding file.
-  (update_random_seed_file): New.
-
-  (read_pool): Do an initial extra seeding when level 2 quality random
-  is requested the first time. This requestes at least POOLSIZE/2 bytes
-  of entropy.  Compined with the seeding file this should make normal
-  random bytes cheaper and increase the quality of the random bytes
-  used for key generation.
-
-  * random.c (read_pool): Print a more friendly error message in
-  cases when too much random is requested in one call.
-
-  * random.c (fast_random_poll): Check whether RUSAGE_SELF is defined;
-  this is not the case for some ESIX and Unixware, although they have
-  getrusage().
-
-  * primegen.c (generate_elg_prime): All primes are now generated with
-  the lowest random quality level.  Because they are public anyway we
-  don't need stronger random and by this we do not drain the systems
-  entropy so much.
-
-  * primegen.c (register_primegen_progress): New.
-  * dsa.c (register_pk_dsa_progress): New.
-  * elgamal.c (register_pk_elg_progress): New.
-
-  * elgamal.c (wiener_map): New.
-  (gen_k): Use a much smaller k.
-  (generate): Calculate the qbits using the wiener map and
-  choose an x at a size comparable to the one choosen in gen_k
-
-  * rmd160.c (rmd160_get_info): Moved casting to the left side due to a
-  problem with UTS4.3. Suggested by Dave Dykstra.
-  * sha1.c (sha1_get_info): Ditto.
-  * tiger.c (tiger_get_info): Ditto.
-  * md5.c (md5_get_info): Ditto
-  * des.c (des_get_info): Ditto.
-  * blowfish.c (blowfish_get_info): Ditto.
-  * cast5.c (cast5_get_info): Ditto.
-  * twofish.c (twofish_get_info): Ditto.
-
-Fri Mar 24 11:25:45 CET 2000  Werner Koch  <wk@openit.de>
-
-       * md.c (md_open): Add hmac arg and allocate space for the pads.
-       (md_finalize): Add HMAC support.
-       (md_copy): Ditto.
-       (md_close): Ditto.
-       (gcry_md_reset): Ditto.
-       (gcry_md_ctl): Ditto.
-       (prepare_macpdas): New.
-
-Mon Mar 13 19:22:46 CET 2000  Werner Koch  <wk@openit.de>
-
-       * md.c (gcry_md_hash_buffer): Add support for the other algorithms.
-
-Mon Jan 31 16:37:34 CET 2000  Werner Koch  <wk@gnupg.de>
-
-       * genprime.c (generate_elg_prime): Fixed returned factors which never
-       worked for non-DSA keys.
-
-Thu Jan 27 18:00:44 CET 2000  Werner Koch  <wk@gnupg.de>
-
-       * pubkey.c (sexp_to_key): Fixed mem leaks in case of errors.
-
-Mon Jan 24 22:24:38 CET 2000  Werner Koch  <wk@gnupg.de>
-
-       * pubkey.c (gcry_pk_decrypt): Implemented.
-       (gcry_pk_encrypt): Implemented.
-       (gcry_pk_testkey): New.
-       (gcry_pk_genkey): New.
-       (pubkey_decrypt): Made static.
-       (pubkey_encrypt): Ditto.
-       (pubkey_check_secret_key): Ditto.
-       (pubkey_generate): Ditto.
-
-Mon Jan 24 13:04:28 CET 2000  Werner Koch  <wk@gnupg.de>
-
-       * pubkey.c (pubkey_nbits): Removed and replaced by ...
-       (gcry_pk_get_nbits): this new one.
-
-Wed Dec  8 21:58:32 CET 1999  Werner Koch  <wk@gnupg.de>
-
-       * dsa.c: s/mpi_powm/gcry_mpi_powm/g
-       * elgamal.c: Ditto.
-       * primegen.c: Ditto.
-
-       * : Replaced g10_opt_verbose by g10_log_verbosity().
-
-       * Makefile.am (INCLUDES): removed intl, add ../gcrypt
-
-Fri Nov 19 17:15:20 CET 1999  Werner Koch  <wk@gnupg.de>
-
-       * dynload.c (cmp_filenames): New to replaced compare_filename() in
-       module.
-       (register_cipher_extension): Removed the tilde expansion stuff.
-       * rndeg.c (my_make_filename): New.
-
-       * : Replaced header util.h by g10lib.h
-
-       * random.c (gather_faked): Replaced make_timestamp by time(2).
-       Disabled wrning printed with tty_printf.
-       * rndlinux.c (gather_random): Always use fprintf instead of tty_xxx;
-       this should be replaced by a callback function.
-
-       * primegen.c (gen_prime): Use gcry_mpi_randomize.
-       (is_prime): Ditto.
-       * elgamal.c (test_keys): Ditto.
-       * dsa.c (test_keys): Ditto.
-
-       * cipher.c (gcry_cipher_close): Die on invalid handle.
-
-Mon Nov 15 21:36:02 CET 1999  Werner Koch  <wk@gnupg.de>
-
-       * elgamal.c (gen_k): Use the new random API.
-       (generate): Ditto.
-       * dsa.c (gen_k): Ditto.
-       (generate): Ditto.
-
-Sat Nov 13 17:44:23 CET 1999  Werner Koch  <wk@gnupg.de>
-
-       * pubkey.c (disable_pubkey_algo): Made static.
-       (gcry_pk_ctl): New.
-
-       * random.c (get_random_bits): Renamed to ...
-       (get_random_bytes): ... this and made static.
-       (gcry_random_bytes): New.
-       (gcry_random_bytes_secure): New.
-       (randomize_buffer): Renamed to ...
-       (gcry_randomize): ...this.
-
-       * md.c (gcry_md_hash_buffer): New.
-
-       * pubkey.c (gcry_pk_algo_info): 4 new commands.
-       (pubkey_get_npkey): Made static.
-       (pubkey_get_nskey): Made static.
-       (pubkey_get_nsig): Made static.
-       (pubkey_get_nenc): Made static.
-
-       * pubkey.c: Removed all G10ERR_xxx.
-       * cipher.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_CIPHER_ALGO.
-       * md.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_MD_ALGO.
-       * cast5.c (cast_setkey): Changed errocodes to GCRYERR_xxx.
-       * blowfish.c: Ditto.
-       * des.c: Ditto.
-       * twofish.c: Ditto.
-       * dsa.c: Ditto.
-       * elgamal.c: Ditto.
-
-       * g10c.c: Removed
-
-       * cipher.c (gcry_cipher_open): Replaced alloc functions and return NULL
-       if we are out of core.
-       * dynload.c: Replaced all memory allocation functions.
-       * md.c: Ditto.
-       * primegen.c: Ditto.
-       * pubkey.c: Ditto.
-       * random.c: Ditto.
-       * rndw32.c: Ditto.
-       * elgamal.c: Ditto.
-       * dsa.c: Ditto.
-
-Tue Oct 26 14:10:21 CEST 1999  Werner Koch  <wk@gnupg.de>
-
-       * elgamal.c (sign): Hugh found strange code here. Replaced by BUG().
-
-       * cipher.c: Merged with gcrypt/symapi.c.
-
-       * pubkey.c (string_to_pubkey_algo): Renamed function to ...
-       (gcry_pk_map_name): ... this.
-       (pubkey_algo_to_string): Renamed function to ...
-       (gcry_pk_algo_name): ... this.
-       (gcry_pk_algo_info): New.
-       * pubkey.c: Merged with gcrypt/pkapi.c.
-
-       * md.c (md_reset): Clear finalized; thanks to Ulf Moeller for
-       fixing this bug.
-
-       * md.c: Merged with gcrypt/mdapi.c
-
-Wed Sep 15 14:39:59 CEST 1999  Michael Roth <mroth@nessie.de>
-
-       * des.c: Various speed improvements: One bit pre rotation
-         trick after initial permutation (Richard Outerbridge).
-         Finished test of SSLeay Tripple-DES patterns.
-
-Wed Sep 15 16:22:17 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndw32.c: New.
-
-Mon Sep 13 10:51:29 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * bithelp.h: New.
-       * rmd160.h, sha1.h, md5.h: Use the rol macro from bithelp.h
-
-Tue Sep  7 16:23:36 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * Makefile.am: Fixed seds for latest egcc. By Ollivier Robert.
-
-Mon Sep  6 19:59:08 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * des.c (selftest): Add some testpattern
-
-Mon Aug 30 20:38:33 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * cipher.c (do_cbc_encrypt): Fixed serious bug occuring when not using
-       in place encryption. Pointed out by Frank Stajano.
-
-Mon Jul 26 09:34:46 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * md5.c (md5_final): Fix for a SCO cpp bug.
-
-Thu Jul 15 10:15:35 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * elgamal.c (elg_check_secret_key,elg_encrypt
-       elg_decrypt,elg_sign,elg_verify): Sanity check on the args.
-       * dsa.c (dsa_check_secret_key,dsa_sign,dsa_verify): Ditto.
-
-       * pubkey.c (disable_pubkey_algo): New.
-       (check_pubkey_algo2): Look at disabled algo table.
-       * cipher.c (disable_cipher_algo): New.
-       (check_cipher_algo): Look at disabled algo table.
-
-Wed Jul  7 13:08:40 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * Makefile.am: Support for libtool.
-
-Fri Jul  2 11:45:54 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * dsa.c (gen_k): Changed algorithm to consume less random bytes
-       * elgamal.c (gen_k): Ditto.
-
-       * random.c (random_dump_stats): New.
-
-Thu Jul  1 12:47:31 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * primegen.c, elgamal.c, dsa.c (progess): New and replaced all
-       fputc with a call to this function.
-
-Sat Jun 26 12:15:59 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndegd.c (do_write): s/ssize_t/int/ due to SunOS 4.1 probs.
-
-       * cipher.c (do_cbc_encrypt, do_cbc_decrypt): New.
-
-       * dynload.c (HAVE_DL_SHL_LOAD): Map hpux API to dlopen (Dave Dykstra).
-       * Makefile.am (install-exec-hook): Removed.
-
-Sun May 23 14:20:22 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * cipher.c (setup_cipher_table): Enable Twofish
-
-       * random.c (fast_random_poll): Disable use of times() for mingw32.
-
-Mon May 17 21:54:43 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * dynload.c (register_internal_cipher_extension): Minor init fix.
-
-Tue May  4 15:47:53 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * primegen.c (gen_prime): Readded the Fermat test. Fixed the bug
-       that we didn't correct for step when passing the prime to the
-       Rabin-Miller test which led to bad performance (Stefan Keller).
-       (check_prime): Add a first Fermat test.
-
-Sun Apr 18 10:11:28 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * cipher.c (cipher_setiv): Add ivlen arg, changed all callers.
-
-       * random.c (randomize_buffer): alway use secure memory because
-       we can't use m_is_secure() on a statically allocated buffer.
-
-       * twofish.c: Replaced some macros by a loop to reduce text size.
-       * Makefile.am (twofish): No more need for sed editing.
-
-Fri Apr  9 12:26:25 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * cipher.c (cipher_open): Reversed the changes for AUTO_CFB.
-
-       * blowfish.c: Dropped the Blowfish 160 mode.
-       * cipher.c (cipher_open): Ditto.
-       (setup_cipher_table): Ditto.  And removed support of twofish128
-
-Wed Apr  7 20:51:39 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * random.c (get_random_bits): Can now handle requests > POOLSIZE
-
-       * cipher.c (cipher_open): Now uses standard CFB for automode if
-       the blocksize is gt 8 (according to rfc2440).
-
-       * twofish.c: Applied Matthew Skala's patches for 256 bit key.
-
-Tue Apr  6 19:58:12 CEST 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * random.c (get_random_bits): Can now handle requests > POOLSIZE
-
-       * cipher.c (cipher_open): Now uses standard CFB for automode if
-       the blocksize is gt 8 (according to rfc2440).
-
-Sat Mar 20 11:44:21 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndlinux.c (tty_printf) [IS_MODULE]: Removed.
-
-       * rndegd.c (gather_random): Some fixes.
-
-Wed Mar 17 13:09:03 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndegd.c (do_read): New.
-       (gather_random): Changed the implementation.
-
-Mon Mar  8 20:47:17 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * dynload.c (DLSYM_NEEDS_UNDERSCORE): Renamed.
-
-Fri Feb 26 17:55:41 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * md.c: Nearly a total rewrote.
-
-Wed Feb 24 11:07:27 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * cipher.c (context): Fixed alignment
-       * md.c: Ditto.
-
-       * rndegd.c: New
-
-Mon Feb 22 20:04:00 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndegd.c: New.
-
-Wed Feb 10 17:15:39 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * Makefile.am: Modules are now figured out by configure
-       * construct.c: New. Generated by configure. Changed all modules
-       to work with that.
-       * sha1.h: Removed.
-       * md5.h: Removed.
-
-       * twofish.c: Changed interface to allow Twofish/256
-
-       * rndunix.c (start_gatherer): Die on SIGPIPE.
-
-Wed Jan 20 18:59:49 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndunix.c (gather_random): Fix to avoid infinite loop.
-
-Sun Jan 17 11:04:33 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * des.c (is_weak_key): Replace system memcmp due to bugs
-       in SunOS's memcmp.
-       (des_get_info): Return error on failed selftest.
-       * twofish.c (twofish_setkey): Return error on failed selftest or
-       invalid keylength.
-       * cast5.c (cast_setkey): Ditto.
-       * blowfish.c (bf_setkey): Return error on failed selftest.
-
-Tue Jan 12 11:17:18 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * random.c (random_is_faked): New.
-
-       * tiger.c: Only compile if we have the u64 type
-
-Sat Jan  9 16:02:23 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndunix.c (gather_random): check for setuid.
-
-       * Makefile.am: Add a way to staically link random modules
-
-Thu Jan  7 18:00:58 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * md.c (md_stop_debug): Do a flush first.
-       (md_open): size of buffer now depends on the secure parameter
-
-Sun Jan  3 15:28:44 CET 1999  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * rndunix.c (start_gatherer): Fixed stupid ==/= bug
-
-1998-12-31  Geoff Keating  <geoffk@ozemail.com.au>
-
-       * des.c (is_weak_key): Rewrite loop end condition.
-
-Tue Dec 29 14:41:47 CET 1998  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * random.c: add unistd.h for getpid().
-       (RAND_MAX): Fallback value for Sun.
-
-Wed Dec 23 17:12:24 CET 1998  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * md.c (md_copy): Reset debug.
-
-Mon Dec 14 21:18:49 CET 1998  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * random.c (read_random_source): Changed the interface to the
-       random gathering function.
-       (gather_faked): Use new interface.
-       * dynload.c (dynload_getfnc_fast_random_poll): Ditto.
-       (dynload_getfnc_gather_random): Ditto.
-       * rndlinux.c (gather_random): Ditto.
-       * rndunix.c (gather_random): Ditto.
-
-Sat Dec 12 18:40:32 CET 1998  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * dynload.c (SYMBOL_VERSION): New to cope with system which needs
-       underscores.
-
-       * rndunix.c: Rewrote large parts
-
-Thu Dec 10 20:15:36 CET 1998  Werner Koch  <wk@isil.d.shuttle.de>
-
-       * dynload.c (load_extension): increased needed verbosity level.
-
-       * random.c (fast_random_poll): Fallback to a default fast random
-       poll function.
-       (read_random_source): Always use the faked entroy gatherer if no
-       gather module is available.
-       * rndlinux.c (fast_poll): Removed.
-       * rndunix.c (fast_poll): Removed.
-
-
-Wed Nov 25 12:33:41 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rand-*.c: Removed.
-       * rndlinux.c : New.
-       * rndunix.c : New.
-       * random.c : Restructured the interface to the gather modules.
-       (intialize): Call constructor functions
-       (read_radnom_source): Moved to here.
-       * dynload.c (dynload_getfnc_gather_random): New.
-       (dynload_getfnc_fast_random_poll): New.
-       (register_internal_cipher_extension): New.
-       (register_cipher_extension): Support of internal modules.
-
-Sun Nov  8 17:44:36 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rand-unix.c (read_random_source): Removed the assert.
-
-Mon Oct 19 18:34:30 1998  me,,,  (wk@tobold)
-
-       * pubkey.c: Hack to allow us to give some info about RSA keys back.
-
-Thu Oct 15 11:47:57 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * dynload.c: Support for DLD
-
-Wed Oct 14 12:13:07 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rand-unix.c: Now uses names from configure for /dev/random.
-
-1998-10-10  SL Baur  <steve@altair.xemacs.org>
-
-       * Makefile.am: fix sed -O substitutions to catch -O6, etc.
-
-Tue Oct  6 10:06:32 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rand-unix.c (HAVE_GETTIMEOFDAY): Fixed (was ..GETTIMEOFTIME :-)
-       * rand-dummy.c (HAVE_GETTIMEOFDAY): Ditto.
-
-Mon Sep 28 13:23:09 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * md.c (md_digest): New.
-       (md_reset): New.
-
-Wed Sep 23 12:27:02 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * tiger.c (TIGER_CONTEXT): moved "buf", so that it is 64 bit aligned.
-
-Mon Sep 21 06:22:53 1998  Werner Koch  (wk@(none))
-
-       * des.c: Some patches from Michael.
-
-Thu Sep 17 19:00:06 1998  Werner Koch  (wk@(none))
-
-       * des.c : New file from Michael Roth <mroth@nessie.de>
-
-Mon Sep 14 11:10:55 1998  Werner Koch  (wk@(none))
-
-       * blowfish.c (bf_setkey): Niklas Hernaeus patch to detect weak keys.
-
-Mon Sep 14 09:19:25 1998  Werner Koch  (wk@(none))
-
-       * dynload.c (RTLD_NOW): Now defined to 1 if it is undefined.
-
-Mon Sep  7 17:04:33 1998  Werner Koch  (wk@(none))
-
-       * Makefile.am: Fixes to allow a different build directory
-
-Thu Aug  6 17:25:38 1998  Werner Koch,mobil,,, (wk@tobold)
-
-       * random.c (get_random_byte): Removed and changed all callers
-       to use get_random_bits()
-
-Mon Jul 27 10:30:22 1998  Werner Koch  (wk@(none))
-
-       * cipher.c : Support for other blocksizes
-       (cipher_get_blocksize): New.
-       * twofish.c: New.
-       * Makefile.am: Add twofish module.
-
-Mon Jul 13 21:30:52 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * random.c (read_pool): Simple alloc if secure_alloc is not set.
-       (get_random_bits): Ditto.
-
-Thu Jul  9 13:01:14 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * dynload.c (load_extension): Function now nbails out if
-       the program is run setuid.
-
-Wed Jul  8 18:58:23 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rmd160.c (rmd160_hash_buffer): New.
-
-Thu Jul  2 10:50:30 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * cipher.c (cipher_open): algos >=100 use standard CFB
-
-Thu Jun 25 11:18:25 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * Makefile.am: Support for extensions
-
-Thu Jun 18 12:09:38 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * random.c (mix_pool): simpler handling for level 0
-
-Mon Jun 15 14:40:48 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * tiger.c: Removed from dist, will reappear as dynload module
-
-Sat Jun 13 14:16:57 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * pubkey.c: Major changes to allow extensions. Changed the inteface
-       of all public key ciphers and added the ability to load extensions
-       on demand.
-
-       * misc.c: Removed.
-
-Wed Jun 10 07:52:08 1998  Werner Koch,mobil,,, (wk@tobold)
-
-       * dynload.c: New.
-       * cipher.c: Major changes to allow extensions.
-
-Mon Jun  8 22:43:00 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * cipher.c: Major internal chnages to support extensions.
-       * blowfish.c (blowfish_get_info): New and made all internal
-       functions static, changed heder.
-       * cast5.c (cast5_get_info): Likewise.
-
-Mon Jun  8 12:27:52 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * tiger.c (transform): Fix for big endian
-
-       * cipher.c (do_cfb_decrypt): Big endian fix.
-
-Fri May 22 07:30:39 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * md.c (md_get_oid): Add a new one for TIGER.
-
-Thu May 21 13:24:52 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * cipher.c: Add support for a dummy cipher
-
-Thu May 14 15:40:36 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rmd160.c (transform): fixed sigbus - I should better
-       add Christian von Roques's new implemenation of rmd160_write.
-
-Fri May  8 18:07:44 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rand-internal.h, rand-unix.c, rand-w32.c, rand_dummy.c: New
-       * random.c: Moved system specific functions to rand-****.c
-
-Fri May  8 14:01:17 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * random.c (fast_random_poll): add call to gethrtime.
-
-Tue May  5 21:28:55 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * elgamal.c (elg_generate): choosing x was not correct, could
-       yield 6 bytes which are not from the random pool, tsss, tsss..
-
-Tue May  5 14:09:06 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * primegen.c (generate_elg_prime): Add arg mode, changed all
-       callers and implemented mode 1.
-
-Mon Apr 27 14:41:58 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * cipher.c (cipher_get_keylen): New.
-
-Sun Apr 26 14:44:52 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * tiger.c, tiger.h: New.
-
-Wed Apr  8 14:57:11 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * misc.c (check_pubkey_algo2): New.
-
-Tue Apr  7 18:46:49 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * cipher.c: New
-       * misc.c (check_cipher_algo): Moved to cipher.c
-       * cast5.c: Moved many functions to cipher.c
-       * blowfish.c: Likewise.
-
-Sat Apr  4 19:52:08 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * cast5.c: Implemented and tested.
-
-Wed Apr  1 16:38:27 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * elgamal.c (elg_generate): Faster generation of x in some cases.
-
-Thu Mar 19 13:54:48 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * blowfish.c (blowfish_decode_cfb): changed XOR operation
-       (blowfish_encode_cfb): Ditto.
-
-Thu Mar 12 14:04:05 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * sha1.c (transform): Rewrote
-
-       * blowfish.c (encrypt): Unrolled for rounds == 16
-       (decrypt): Ditto.
-
-Tue Mar 10 16:32:08 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rmd160.c (transform): Unrolled the loop.
-
-Tue Mar 10 13:05:14 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * random.c (read_pool): Add pool_balance stuff.
-       (get_random_bits): New.
-
-       * elgamal.c (elg_generate): Now uses get_random_bits to generate x.
-
-
-Tue Mar 10 11:33:51 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * md.c (md_digest_length): New.
-
-Tue Mar 10 11:27:41 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * dsa.c (dsa_verify): Works.
-
-Mon Mar  9 12:59:08 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * dsa.c, dsa.h: Removed some unused code.
-
-Wed Mar  4 10:39:22 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * md.c (md_open): Add call to fast_random_poll.
-       blowfish.c (blowfish_setkey): Ditto.
-
-Tue Mar  3 13:32:54 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * rmd160.c (rmd160_mixblock): New.
-       * random.c: Restructured to start with a new RNG implementation.
-       * random.h: New.
-
-Mon Mar  2 19:21:46 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * gost.c, gost.h: Removed because they did only contain trash.
-
-Sun Mar  1 16:42:29 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * random.c (fill_buffer): removed error message if n == -1.
-
-Fri Feb 27 16:39:34 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * md.c (md_enable): No init if called twice.
-
-Thu Feb 26 07:57:02 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * primegen.c (generate_elg_prime): Changed the progress printing.
-       (gen_prime): Ditto.
-
-Tue Feb 24 12:28:42 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * md5.c, md.5 : Replaced by a modified version of md5.c from
-       GNU textutils 1.22.
-
-Wed Feb 18 14:08:30 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * md.c, md.h : New debugging support
-
-Mon Feb 16 10:08:47 1998  Werner Koch  (wk@isil.d.shuttle.de)
-
-       * misc.c (cipher_algo_to_string): New
-       (pubkey_algo_to_string): New.
-       (digest_algo_to_string): New.
-
-
- Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006
-          2007, 2008, 2009, 2010 Free Software Foundation, Inc.
-
- This file is free software; as a special exception the author gives
- unlimited permission to copy and/or distribute it, with or without
- modifications, as long as this notice is preserved.
-
- This file is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
- implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
diff --git a/grub-core/lib/libgcrypt/cipher/ChangeLog-2011 
b/grub-core/lib/libgcrypt/cipher/ChangeLog-2011
index 05516c99e..1ce6bd1e6 100644
--- a/grub-core/lib/libgcrypt/cipher/ChangeLog-2011
+++ b/grub-core/lib/libgcrypt/cipher/ChangeLog-2011
@@ -1,9 +1,37 @@
 2011-12-01  Werner Koch  <wk@g10code.com>
 
-        NB: ChangeLog files are no longer manually maintained.  Starting
-        on December 1st, 2011 we put change information only in the GIT
-        commit log, and generate a top-level ChangeLog file from logs at
-        "make dist".  See doc/HACKING for details.
+       NB: ChangeLog files are no longer manually maintained.  Starting
+       on December 1st, 2011 we put change information only in the GIT
+       commit log, and generate a top-level ChangeLog file from logs at
+       "make dist".  See doc/HACKING for details.
+
+2011-09-16  Werner Koch  <wk@g10code.com>
+
+       * primegen.c (_gcry_primegen_init): New.
+
+2011-09-15  Werner Koch  <wk@g10code.com>
+
+       * cipher-cbc.c, cipher-cfb.c, cipher-ofb.c, cipher-ctr.c: New.
+       * cipher-aeswrap.c: New.
+       * cipher-internal.h: New.
+       * cipher.c (cipher_context_alignment_t, struct gcry_cipher_handle)
+       (CTX_MAGIC_NORMAL, CTX_MAGIC_SECURE, NEED_16BYTE_ALIGNED_CONTEXT)
+       (MAX_BLOCKSIZE): Move to cipher-internal.h.
+       (do_aeswrap_encrypt, do_aeswrap_encrypt)
+       (do_cbc_encrypt, do_cbc_decrypt, do_ctr_encrypt, do_ctr_decrypt)
+       (do_ofb_encrypt, do_ofb_decrypt, do_ctr_encrypt): Move to the
+       respective new cipher-foo.c files.
+       (do_ctr_decrypt): Remove.
+
+2011-09-15  Werner Koch  <wk@g10code.com>
+
+       * pubkey.c (gcry_pk_list): Remove.
+       (gcry_pk_unregister): Remove.
+       * md.c (gcry_md_list): Remove.
+       (gcry_md_unregister): Remove.
+       * cipher.c (gcry_cipher_list): Remove.
+       (gcry_cipher_unregister): Remove.
+       * ac.c: Remove.
 
 2011-06-29  Werner Koch  <wk@g10code.com>
 
@@ -4245,3 +4273,7 @@ Mon Feb 16 10:08:47 1998  Werner Koch  
(wk@isil.d.shuttle.de)
  This file is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Local Variables:
+buffer-read-only: t
+End:
diff --git a/grub-core/lib/libgcrypt/cipher/Makefile.am 
b/grub-core/lib/libgcrypt/cipher/Makefile.am
index 76cdc96ad..c3d642b2a 100644
--- a/grub-core/lib/libgcrypt/cipher/Makefile.am
+++ b/grub-core/lib/libgcrypt/cipher/Makefile.am
@@ -19,65 +19,258 @@
 
 # Process this file with automake to produce Makefile.in
 
-EXTRA_DIST = Manifest
-
 # Need to include ../src in addition to top_srcdir because gcrypt.h is
 # a built header.
-AM_CPPFLAGS = -I../src -I$(top_srcdir)/src
+AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi
 AM_CFLAGS = $(GPG_ERROR_CFLAGS)
 
+AM_CCASFLAGS = $(NOEXECSTACK_FLAGS)
+
+EXTRA_DIST = gost-s-box.c
+
+CLEANFILES = gost-s-box
+DISTCLEANFILES = gost-sb.h
 
 noinst_LTLIBRARIES = libcipher.la
 
-GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ @GCRYPT_DIGESTS@
+GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \
+                 @GCRYPT_DIGESTS@ @GCRYPT_KDFS@
 
 libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES)
 libcipher_la_LIBADD = $(GCRYPT_MODULES)
 
 libcipher_la_SOURCES = \
-cipher.c pubkey.c ac.c md.c kdf.c \
-hmac-tests.c \
-bithelp.h  \
-primegen.c  \
-hash-common.c hash-common.h \
-rmd.h
+       cipher.c cipher-internal.h \
+       cipher-cbc.c \
+       cipher-cfb.c \
+       cipher-ofb.c \
+       cipher-ctr.c \
+       cipher-aeswrap.c \
+       cipher-ccm.c \
+       cipher-cmac.c \
+       cipher-gcm.c \
+       cipher-poly1305.c \
+       cipher-ocb.c \
+       cipher-xts.c \
+       cipher-eax.c \
+       cipher-siv.c \
+       cipher-gcm-siv.c \
+       cipher-selftest.c cipher-selftest.h \
+       pubkey.c pubkey-internal.h pubkey-util.c \
+       md.c \
+       mac.c mac-internal.h \
+       mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \
+       poly1305.c poly1305-internal.h \
+       kdf.c kdf-internal.h \
+       bithelp.h  \
+       bufhelp.h  \
+       primegen.c  \
+       hash-common.c hash-common.h \
+       dsa-common.c rsa-common.c \
+       sha1.h
 
 EXTRA_libcipher_la_SOURCES = \
-arcfour.c \
-blowfish.c \
-cast5.c \
-crc.c \
-des.c \
-dsa.c \
-elgamal.c \
-ecc.c \
-idea.c \
-md4.c \
-md5.c \
-rijndael.c rijndael-tables.h \
-rmd160.c \
-rsa.c \
-seed.c \
-serpent.c \
-sha1.c \
-sha256.c \
-sha512.c \
-tiger.c \
-whirlpool.c \
-twofish.c \
-rfc2268.c \
-camellia.c camellia.h camellia-glue.c
+       asm-common-aarch64.h \
+       asm-common-amd64.h \
+       asm-common-s390x.h \
+       asm-inline-s390x.h \
+       asm-poly1305-aarch64.h \
+       asm-poly1305-amd64.h \
+       asm-poly1305-s390x.h \
+       arcfour.c arcfour-amd64.S \
+       blowfish.c blowfish-amd64.S blowfish-arm.S \
+       cast5.c cast5-amd64.S cast5-arm.S \
+       chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \
+       chacha20-armv7-neon.S chacha20-aarch64.S \
+       chacha20-ppc.c chacha20-s390x.S \
+       cipher-gcm-ppc.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \
+       cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \
+       crc.c crc-intel-pclmul.c crc-armv8-ce.c \
+       crc-armv8-aarch64-ce.S \
+       crc-ppc.c \
+       des.c des-amd64.S \
+       dsa.c \
+       elgamal.c \
+       ecc.c ecc-curves.c ecc-misc.c ecc-common.h \
+       ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \
+       idea.c \
+       gost28147.c gost.h \
+       gostr3411-94.c \
+       md4.c \
+       md5.c \
+       poly1305-s390x.S \
+       rijndael.c rijndael-internal.h rijndael-tables.h   \
+       rijndael-aesni.c rijndael-padlock.c                \
+       rijndael-amd64.S rijndael-arm.S                    \
+       rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S  \
+       rijndael-vaes.c rijndael-vaes-avx2-amd64.S         \
+       rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S    \
+       rijndael-armv8-aarch64-ce.S rijndael-aarch64.S     \
+       rijndael-ppc.c rijndael-ppc9le.c                   \
+       rijndael-p10le.c rijndael-gcm-p10le.s             \
+       rijndael-ppc-common.h rijndael-ppc-functions.h     \
+       rijndael-s390x.c                                   \
+       rmd160.c \
+       rsa.c \
+       salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \
+       scrypt.c \
+       seed.c \
+       serpent.c serpent-sse2-amd64.S \
+       sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \
+       serpent-avx2-amd64.S serpent-armv7-neon.S \
+       sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \
+       sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \
+       sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \
+       sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \
+       sha256-avx2-bmi2-amd64.S \
+       sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \
+       sha256-intel-shaext.c sha256-ppc.c \
+       sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \
+       sha512-avx2-bmi2-amd64.S \
+       sha512-armv7-neon.S sha512-arm.S \
+       sha512-ppc.c sha512-ssse3-i386.c \
+       sm3.c sm3-avx-bmi2-amd64.S sm3-aarch64.S \
+       keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \
+       stribog.c \
+       tiger.c \
+       whirlpool.c whirlpool-sse2-amd64.S \
+       twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \
+       twofish-avx2-amd64.S \
+       rfc2268.c \
+       camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \
+       camellia-aesni-avx2-amd64.h camellia-vaes-avx2-amd64.S \
+       camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \
+       blake2.c \
+       blake2b-amd64-avx2.S blake2s-amd64-avx.S
+
+gost28147.lo: gost-sb.h
+gost-sb.h: gost-s-box
+       ./gost-s-box $@
+
+gost-s-box: gost-s-box.c
+       $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \
+           $(CPPFLAGS_FOR_BUILD) -o $@ $(srcdir)/gost-s-box.c
+
 
 if ENABLE_O_FLAG_MUNGING
-o_flag_munging = sed -e 's/-O\([2-9s][2-9s]*\)/-O1/' -e 's/-Ofast/-O1/g'
+o_flag_munging = sed -e 's/-O\([2-9sgz][2-9sgz]*\)/-O1/' -e 's/-Ofast/-O1/g'
 else
 o_flag_munging = cat
 endif
 
 
 # We need to lower the optimization for this module.
-tiger.o: $(srcdir)/tiger.c
-       `echo $(COMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) `
+tiger.o: $(srcdir)/tiger.c Makefile
+       `echo $(COMPILE) -c $< | $(o_flag_munging) `
+
+tiger.lo: $(srcdir)/tiger.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(o_flag_munging) `
+
+
+# We need to disable instrumentation for these modules as they use cc as
+# thin assembly front-end and do not tolerate in-between function calls
+# inserted by compiler as those functions may clobber the XMM registers.
+if ENABLE_INSTRUMENTATION_MUNGING
+instrumentation_munging = sed \
+       -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \
+       -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \
+       -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g'
+else
+instrumentation_munging = cat
+endif
+
+rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile
+       `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile
+       `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile
+       `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile
+       `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile
+       `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile
+       `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile
+       `echo $(COMPILE) -c $< | $(instrumentation_munging) `
+
+crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile
+       `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) `
+
+if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS
+ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto
+else
+ppc_vcrypto_cflags =
+endif
+
+rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+rijndael-p10le.o: $(srcdir)/rijndael-p10le.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+rijndael-p10le.lo: $(srcdir)/rijndael-p10le.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+crc-ppc.o: $(srcdir)/crc-ppc.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
+
+cipher-gcm-ppc.o: $(srcdir)/cipher-gcm-ppc.c Makefile
+       `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
 
-tiger.lo: $(srcdir)/tiger.c
-       `echo $(LTCOMPILE) -c $(srcdir)/tiger.c | $(o_flag_munging) `
+cipher-gcm-ppc.lo: $(srcdir)/cipher-gcm-ppc.c Makefile
+       `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | 
$(instrumentation_munging) `
diff --git a/grub-core/lib/libgcrypt/cipher/Manifest 
b/grub-core/lib/libgcrypt/cipher/Manifest
deleted file mode 100644
index 0cd64f71f..000000000
--- a/grub-core/lib/libgcrypt/cipher/Manifest
+++ /dev/null
@@ -1,73 +0,0 @@
-# Manifest - checksums of the cipher directory
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-# Checksums for all source files in this directory. Format is
-# filename, blanks, base-64 part of an OpenPGP detached signature
-# without the header lines.  Blank lines and lines beginning with a
-# hash mark are ignored.  A tool to process this file is available by
-# cvs -d :pserver:anoncvs@cvs.gnupg.org:/cvs/wk co misc-scripts/manifest-tool
-#
-# The special entry "$names$" holds a signature over all sorted
-# filenames excluding itself.
-
-
-# Algorithm API
-cipher.c 
iQCVAwUAQDzrVjEAnp832S/7AQIPDgP+OVJ/YNWY5m7c09EBbPAzL/WsGoj6wrBNMmkRlMOqTHeh+OOtjuFHt1f9uhfM2Nzl7sJ5+h4ryZKLEZmQPRMTZTnAqkvGdsrJWJnigUA9QwYdV0ONqC9C63gpuG465gO9TZVOqlQu/FTxSRuTQYUulkaBNG71n8nZEOusBVwV2YA==58xH
-pubkey.c 
iQCVAwUAP9XQ3jEAnp832S/7AQJ5UgQAyHfEBvPVJ8wTRg8c7ixS2GiVmIgwIo5tvQaiQJTPWASevvYrB+2Z2qa9cATyu50ACjLzbaquGBgPzjJV3dU/qttT1gCqRuN/LCNvXFe5qnIZezejc3RAadFNTw/pOTHq0wxD1Keg66ruei9R36Nba59pEQIWIBXTfubRft2hMYk==E09t
-ac.c 
iQCVAwUAQDzsOzEAnp832S/7AQJCBQP/WI6EV/dsR4rmha6RVhvkjZo17kQ8z6pIl5J3cXOvqEkIFeD2HYu3HHrWST5l7yXlffhpDkVHkfMih4ruK76q6Fm0dxZ98pO4C/dVtgimlvvcy/wOQjpzsE0fYAe1BYdg81LJ09X33vW5x6C29lunfKROO2tPlV5i8ffeoFvmMF8==j26g
-md.c 
iQCVAwUAP+NFGjEAnp832S/7AQJs8wP/Qdk0EAKsyr3O1/pmOSN8AG4rPKbd6KDTzvoBPAN4upFwKYY4hWwvy12Q3YU9DmECrzZkRCXHR7mljVQKs6B7CRZJKjFKmOELpcJDtKvu40vTs1bOH4k9iJYZpGgRA83nkQ+ELAcphAbCA+KIpVr2K4mCJAB0FhpC2uOQ50JHAko==BeF6
-primegen.c 
iQCVAwUAQDzsoDEAnp832S/7AQKYRwP/TqAQBm1rHTnF0HYE05PqXfWlOqa6EosqVpaOcs/OIW6PaqX0xH1UlrukK7jNOjK3xC4o1qNQ1UKzz2dvQaq1bMvNNizeavxAh10SJZc0hIc/ofc83IbjLh8SZVWQ67JxjsUd3DOXmSmhPZ+Pqd7cUIiw8fDoF+I9EZqy3COu1wY==1ebT
-
-# Algorithm implementations
-arcfour.c 
iQCVAwUAP9XR/TEAnp832S/7AQJcRwP6AlvYEx++fpT4mIYo0xRDqKEQeqMQvbaRhIg2eV74JxItpHa3q5YsYIl+n1yUz5g35JRWWXSWmAZBwO5wLKsHii4kRUhgrKWnSoQZoPpl49L5+N3R58ON3S0ru5lsBiEJEze3xplf2vqwrH9v1QHVD+gU7UTlfNqrIJoOUXN+1O4==Tq+x
-blowfish.c 
iQCVAwUAP9XTETEAnp832S/7AQJaEgQAgiqqfuO+zQtscgTB0rvOzVymIKjRKjYhFuLjVuc79G4z1RCAffvIn/YM2d7kt+Z/QF7zjcTAOgETCQL1XokpX2zz9HPAMi2tlDY5zsDufTNqj0n4WBL9nM7w6XAvsiwP1B3bqCTv9SjJV4KbxJ58vw1yQE+sqW74R/QIHFvC7mU==wZnX
-cast5.c 
iQCVAwUAP9XT6DEAnp832S/7AQJ3xgP/ehLjEN3GELGudbqeo91Xd+PqitHrkuBbtRIYX7Udd/fyXLN+h8rMJVyIQX2m+mpxbBxudVU3x8/DNT8B0ZHAwK6qqJmEBLLhEYPgIuF76i9LMrP1KqUPhAwRZ2OppjIIugBQ+rP74aD4eLyd/aKQHNuXML8QGWR6KwQShohXM5I==/BRh
-crc.c 
iQCVAwUAP7ouejEAnp832S/7AQIgwQQApg5Nm63tH5DQkbN+zPzMO9Ygoj3ukxfFTyTBPYSXYKMiTjEbESegaU40uN8jnz2vprcIQWcgZfzO4+opEJMcI35aPwzEk0vKOp0S/PrBLUY2rJfnDVkX5XgJFZa2Q7LLe826UEBzTVYW924utiCCe8oOaOEWVNpg1mqdknu3M9o==kz5D
-des.c 
iQCVAwUAQCN2oDEAnp832S/7AQL/jwP6Auoq6nZCDBjpgc9tDzuIRwa9DqyuM3gX94uvgEpUwdHszb2bG43dz03kVmcYxtj1MzXbyCeCZOwox0b2SKmLgxIbrNP6yGbzVdTj6592gDYuf/ZXmc1ZNJ1DDldcPQ0n9fXUipUPwyPaNWo3mSZaNcMKSWWzdK0J6ciG6nk7SWI==9k/t
-dsa.c 
iQCVAwUAP9XZHDEAnp832S/7AQLBRgP/XrBzTEYx5ccMj1MMb6sg37liEHdIyyy49zjvt6jUqxj4RuwVEN8S6v3u4q/QyJkHAi1E0EkREgENlyHW6PKWhYbcrd0vPIAN15yjnl2yqtrCrJImexUCoqJJewK0E4JOicGbabTil8MZjk+mbhEPnjJBqOkyP1w0i31pEDgE/8M==pC8s
-elgamal.c 
iQCVAwUAP9XbYzEAnp832S/7AQLXagQA3HrvspZfbTGgmUH0IqLQTJ0exUPxJv5DET2TvoIy62trDmMN6lTAj5P+a7jQ8udcu0w+mR2vXUHcxUpNA2PxLaMwGzNSY4zRDNe9r3SFTDrFm6m4y9Ko2e8XtEA+WF6P/XLpck4Jn7vMEDmVGPwkNd22kXFFE8dBGwG6i5Hk1Mk==oBUs
-md4.c 
iQCVAwUAP9h50DEAnp832S/7AQJhHgQAzNA/B6MWFDlCtPkIVaW8RpP1Eg0ZNMsy0s7SJkopOCBlu6CwXUOKe+8ppcSxhjYKh4i4uQr/QtfipYlBjzKJGnrafoF/NugXNCOHSTGT11TvK7mCiBuUMVgvZGAlOJImk6eTTfUjRrMfaXM/SWl8bdJ4ZpzdjEyVh89r7I5JrGk==x2UD
-md5.c 
iQCVAwUAP9h7LzEAnp832S/7AQJUGQP/c0cbf6WZXCzmjufHxiE9FAQBzTsA0WtaNqdFcHl7fhmikGtknlaED8n5a7eYd/C481UQW6Wgq/oZdsvgoPWPhG3fOCy2CFP9cZVXITuMSf0ucyZTFUJNO15fnZ+nDfsUv+JPdv1aSeRinAUtfAcSKfkSyR9BCPZvkx+tgU6cphU==Zv+h
-rijndael.c 
iQCVAwUAP9h9cTEAnp832S/7AQKF1AP+P2L/tPqDJRDg+/fwbOk8Ts0MNxnvvYEm3gE73TKuLt1S+B2+jkrZcKNvM5VGPnVMJbnS0lmIK04nmedHCOftGTOwhGulZAHHIaKGystT3Jql4iPws/JMgAjE7Fyxh5WZMtB9yEljKBpJ5XNqhrMvvxcHpnyP3+YzIXNwzk34V+c==dJ5k
-rmd160.c 
iQCVAwUAP9h+bTEAnp832S/7AQK1OgP+PNKF6Nzi6X93easVlksdLqKEsArCAw2QjGWDGyxTnbiJM55qAl9JxR1mn3V+oOL7izLLwTt6EYK9evhzfcxY5N5Mni85RAcsLPsuAfQDEzjI6GUWHtQUKPbM+BaorzfhQjYFSZyvum/dZYJ/WfiwwwhqqIKyVU2ZFSqA38YGC/c==9jdA
-rsa.c 
iQCVAwUAP9iHIzEAnp832S/7AQKAYwQAuWtnMte54QHN+Hij9t4sGuypXogajOb1vQQwGgS0fKsaBZsuSP2amze4o5diIvsQTsFQ4CzjvqoCVuBDoHM3xkSD8wGDizgvtCamAxkdbF7wmzldKFn8SpJqlVwWQMP6kk1IjXHEuYb4IDWGTbVMhfEu+eOlU8+PSK4IhZqNvt4==/3hp
-serpent.c 
iQCVAwUAP9h/VzEAnp832S/7AQLyCwP/d1zbmb7l/PriZNa9/Z7mo01XFe5MnAqCfIwhl9GjeaMszcoS37jECNq5nLvrTTFIIJpm3rvBePwiCG4Wwx1I18HCxaP198pcSaR+BLOJ3Aj52EZPrxtqlDKuFr38ZOP5giyUqUYVYGVdrz4kRMNWAZQK53GeJnGhXCnhxojLEgA==ck46
-sha1.c 
iQCVAwUAP9iATTEAnp832S/7AQKcSwQAwAs/HnNqho3lU1ZUgCPNt5P2/Brm6W21+wWWGKJkSrra/c4NYVKJGDDwlsFE0b9ln1uZt7bHReFkKXK3JnrKTmNVcx/Cy64iCMRNMhaM72Mqy7wWx5yHBAmMBxzFGnNQKbmeY52zeGih5HsNLSibc2pPuOViWo2JPJ5Ci/wIwl8==/wtO
-sha256.c 
iQCVAwUAP9iAtzEAnp832S/7AQJD2QP/UqvL0hhjG1wEFbGrdkV9tba1sMDXdnnK6X7HdLuRpVAgNiQiFf8JDmntd/dZ2Q71p4Uae2ctqve4WoEijPUZPjACnpuZfx0SEQL0lQBkwxzJp7lz9ujVtwQ2cM/aYexJkXcWgGcloJNLM3JbWPGIJnuYbr/IwJ6RQF9vgj0357o==UWO1
-sha512.c 
iQCVAwUAP9iBTDEAnp832S/7AQIPBAQA28CJSUQLiW0s2x9u8/OH2eKnxPjA4sZmb50WP7920Lem66P31C3BrOqwfBot4RLhjL+zh/+Uc4s3HPwApZuj9E4BxNMlqLv+Tqk++DAbdaOeYT4jeUt+mlhQQ6mH/RDsy32rZsNsGQ2bUGxazZmfG++PL3JyhawqCy00SUDr/o0==H+0X
-tiger.c 
iQCVAwUAP9iCfjEAnp832S/7AQKufwP/fryv3MqSOYY+90325DH7X3/CtekxeooN0scGsHX0fxBakWSMecTNrj33KPddLS46gU/S89zIc2N/Bw/7EVIAXVFA3/3Ip+OrFOuIMO4Py1sCdB8o2Y+5ygv8iXLcsXIq1O0av79i9g774V3uaXa2qN9ZnXe0AEhcy8FHJ2i/wro==5XVB
-twofish.c 
iQCVAwUAP9iD6TEAnp832S/7AQKUnQP/Rq8FaYeHTG7HbZuqAs9pbPitzjDbkdZddmInWR7NmevBkKvhsJALjVooc0KGQfo2lAAmy3Xi/4QQN8VPn51DVjDIgf7x+DQh/9TFJHMccxI9asUgi4+TNnmMqLU1k3N8S2PjyZ1sjeC8B79fKPpwCzj72WkqPkzZw3l2jArr+dU==NdJT
-rfc2268.c 
iQCVAwUAQCN+3jEAnp832S/7AQLv1gQA1hJh29hAjKi4uLSGxXvJ6cyYmPdmevdKrbLnuHZWtHe4xvCgy/nTdEojEpxgLp/hL/ogasuWRC1W16Wiz9ryxf7YR0uhZWayO/bQNagpfU5MIkJTLuKqqgpwYumCSQfOugXVAqcgEzj+13eeyJaFVrzwrNa67sh84nmbjOjNjvE==0zBq
-
-# Random number related
-random.c 
iQCVAwUAP7nsITEAnp832S/7AQK4SAQAtvfUgrtGOQ2PlxGMla0qJLPHjJacMwgq0ecusiI79elPdDsFfCCk6dK1Ug2kFbNm22nCGHNcUquqbX7noi7ZVQnmPBQXzyLNZd7GmrawRZfdlRerTUDBpSnR8V8ui/5+YYp627E7kKGC0hPSgqXFql6oBMIfno0LZwFJTjIevRY==L419
-random.h 
iQCVAwUAP7ovKDEAnp832S/7AQJ3bQQAjnPebnyTC7sphAv2I7uIz+yPgw1ZfbVhLv+OiWDlO9ish+fRyyMpy+HELBOgZjJdgRegqhlZC6qyns5arM/VglYi+PzvdLO3hIqHE/YFfpIFPz8wBrcmlqrYyd3CsGqcYsfjocXNttCBLeSWmoJ09ltKQH8yzJf3oAgN6X1yuc4==eNoU
-rand-internal.h 
iQCVAwUAP7ouvDEAnp832S/7AQLYnAQAhdI7ERoJVCkV8GiV7MjaUxv1WIL7iZ+jIOvVhv4fNyhCGCGoEtTjkyput/lj7Nsh3FXEqRhypGGrCLf47x/gua5n+BwffogxVyUDqiOyyGhNTPpe3fQcNBvbPCtco8yMK4GJO5G3BqzlPyN+BMeogLymyV6Sm1mvh5LZDyAFbfQ==tZSE
-rndlinux.c 
iQCVAwUAP9iPYTEAnp832S/7AQL6/AP/ZDrbOkVuB9qJ7sKeX1MImZEsz3mi0xPovJzaBtBU7a0idcUKrWYOvQFWRlLUeq0iCT6+h2l5bniP7q7hepzlKa+VPY9VWaQthqeJm2l5LN6QQ5PyMfBq04QuBncw9BJnCGmEyTLt3RxIXBAPdxmiVxtcRIFUqCBtQvoUXGLvemw==t37k
-rndegd.c 
iQCVAwUAP9iPRDEAnp832S/7AQImBQP/WHKg+hKXcm1pQvilzML0jZpwK5PAMM4uBnnPJNIXWOYBO6I/Xg9d/tPLg8NlmmtyQCo2Eu0ybDSt+8mu+dWveAys+0LTi0MIqeP9BMzCKz8dnWH6+S8huLXwTF3m0IrqM0JLb6b71GK9SOq6sWQ22yW5vf61hXP8kH9dhIaoMZs==FaHV
-rndunix.c 
iQCVAwUAP9iQlzEAnp832S/7AQL/KgQA29GnvcD4Xb5qjDMBgW9THEE4+4lfex/6k+Fh0IT61OLJsWVLJ7bJpRntburw4uQm4Tf7CO8vaiDFDYhKKrzXeOF1fmdpcL8hA+fNp9I/MUOc4e9kN9+YJ9wikVa0SZj1OBfhzgcFLd1xOtulkr3ii52HLF9vhrxzkgVwvD10Bi8==2cML
-rndw32.c 
iQCVAwUAP9iRKDEAnp832S/7AQIuaAQA3AJr3WqnxNDsWCIdvehf8Suotthj+laX8nJsvDfFhXPKcXDpsg0wTTXSnnKgyED53+uYiMDnVRsxeWAyhKwvx1MjjlaSMMjzbH6isWTH8FaWpLgrxEkXoPeNqYf5FXpdUkcUxGX2RkQeuX/cIfiHLNE9CV0usaF2jysjBX2iERY==EEnO
-
-# Helper
-bithelp.h 
iQCVAwUAP7ouPTEAnp832S/7AQKXggQAqjcgvihIF3WclOgw1JV2rbARw4ISIDRMFqdaNCqBRx6BwEz3UGsEIlz6+iR1sS/reqN61WvtjLb+D0+tujAkGrgQJhFLG85WtG2tB5UVoI3am1fpkwiRm+bR4rv0rGk0BYk81bC7+l4KrK9o5lVp4lCsrorlUKsd48lNmBHyAXM==mDDN
-rmd.h 
iQCVAwUAP7oumjEAnp832S/7AQJiJQP/V4bJwjZaYndJzV+KRnIDbl1koHuw+ZK5heMYVu8Qk4ylqv//BGyeRa3jZCcfPHI35q6HilCs2VBm8hiBMjHSqY/VPn2ZQ0yg/lt6qEvl7YjsLmyMICvjG+ncszHoq9pRvnF3vTnM18sPIioXLk8fskuM0XOCNBs0ARBAQjY9UGI==olUN
-
-# Configuration
-Makefile.am 
iQCVAwUAQCN33TEAnp832S/7AQKFJAQAz7BDkC814q+QiuE/jnutJHR5qlgbrm3ikGbQwdRzYUscst4bCCWy3uKL/sIPGLg+JQXtF5FnsQy3s4D9BOYhp72cA9ktYK65hhi4pNm/JQ0lXkZMNfk8Go5lNzKezlWwHvkMwRXR0Fep0wPdyeaKW5BfaW2ABvgep6Bp+hHEbyg==zSyi
-$names$ 
iQCVAwUAQCN3EDEAnp832S/7AQJXLAP8DvHTpm5DkTF35EmzeKpi9ie59AZcZanD19ir/e/7+PaQxr2riuLHDGwFKTju+dcvvBsqrygXOC378GXVWzIF2OZwS4EdDcJ+pgojo9UpsqpKsJHouY4Ugx5cQialxba462kUn8hcihSBnMyc4LzbJ5WQ4puQuqy544d2x94+2ms==G4Ls
diff --git a/grub-core/lib/libgcrypt/cipher/ac.c 
b/grub-core/lib/libgcrypt/cipher/ac.c
deleted file mode 100644
index 63f6fcd11..000000000
--- a/grub-core/lib/libgcrypt/cipher/ac.c
+++ /dev/null
@@ -1,3301 +0,0 @@
-/* ac.c - Alternative interface for asymmetric cryptography.
-   Copyright (C) 2003, 2004, 2005, 2006
-                 2007, 2008  Free Software Foundation, Inc.
-
-   This file is part of Libgcrypt.
-
-   Libgcrypt is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser general Public License as
-   published by the Free Software Foundation; either version 2.1 of
-   the License, or (at your option) any later version.
-
-   Libgcrypt is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <config.h>
-#include <errno.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdio.h>
-#include <stddef.h>
-
-#include "g10lib.h"
-#include "cipher.h"
-#include "mpi.h"
-
-
-
-/* At the moment the ac interface is a wrapper around the pk
-   interface, but this might change somewhen in the future, depending
-   on how many people prefer the ac interface.  */
-
-/* Mapping of flag numbers to the according strings as it is expected
-   for S-expressions.  */
-static struct number_string
-{
-  int number;
-  const char *string;
-} ac_flags[] =
-  {
-    { GCRY_AC_FLAG_NO_BLINDING, "no-blinding" },
-  };
-
-/* The positions in this list correspond to the values contained in
-   the gcry_ac_key_type_t enumeration list.  */
-static const char *ac_key_identifiers[] =
-  {
-    "private-key",
-    "public-key"
-  };
-
-/* These specifications are needed for key-pair generation; the caller
-   is allowed to pass additional, algorithm-specific `specs' to
-   gcry_ac_key_pair_generate.  This list is used for decoding the
-   provided values according to the selected algorithm.  */
-struct gcry_ac_key_generate_spec
-{
-  int algorithm;               /* Algorithm for which this flag is
-                                  relevant.  */
-  const char *name;            /* Name of this flag.  */
-  size_t offset;               /* Offset in the cipher-specific spec
-                                  structure at which the MPI value
-                                  associated with this flag is to be
-                                  found.  */
-} ac_key_generate_specs[] =
-  {
-    { GCRY_AC_RSA, "rsa-use-e", offsetof (gcry_ac_key_spec_rsa_t, e) },
-    { 0 }
-  };
-
-/* Handle structure.  */
-struct gcry_ac_handle
-{
-  int algorithm;               /* Algorithm ID associated with this
-                                  handle.  */
-  const char *algorithm_name;  /* Name of the algorithm.  */
-  unsigned int flags;          /* Flags, not used yet.  */
-  gcry_module_t module;                /* Reference to the algorithm
-                                  module.  */
-};
-
-/* A named MPI value.  */
-typedef struct gcry_ac_mpi
-{
-  char *name;                  /* Self-maintained copy of name.  */
-  gcry_mpi_t mpi;              /* MPI value.         */
-  unsigned int flags;          /* Flags.             */
-} gcry_ac_mpi_t;
-
-/* A data set, that is simply a list of named MPI values.  */
-struct gcry_ac_data
-{
-  gcry_ac_mpi_t *data;         /* List of named values.      */
-  unsigned int data_n;         /* Number of values in DATA.  */
-};
-
-/* A single key.  */
-struct gcry_ac_key
-{
-  gcry_ac_data_t data;         /* Data in native ac structure.  */
-  gcry_ac_key_type_t type;     /* Type of the key.              */
-};
-
-/* A key pair.  */
-struct gcry_ac_key_pair
-{
-  gcry_ac_key_t public;
-  gcry_ac_key_t secret;
-};
-
-
-
-/*
- * Functions for working with data sets.
- */
-
-/* Creates a new, empty data set and store it in DATA.  */
-gcry_error_t
-_gcry_ac_data_new (gcry_ac_data_t *data)
-{
-  gcry_ac_data_t data_new;
-  gcry_error_t err;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  data_new = gcry_malloc (sizeof (*data_new));
-  if (! data_new)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  data_new->data = NULL;
-  data_new->data_n = 0;
-  *data = data_new;
-  err = 0;
-
- out:
-
-  return err;
-}
-
-/* Destroys all the entries in DATA, but not DATA itself.  */
-static void
-ac_data_values_destroy (gcry_ac_data_t data)
-{
-  unsigned int i;
-
-  for (i = 0; i < data->data_n; i++)
-    if (data->data[i].flags & GCRY_AC_FLAG_DEALLOC)
-      {
-       gcry_mpi_release (data->data[i].mpi);
-       gcry_free (data->data[i].name);
-      }
-}
-
-/* Destroys the data set DATA.  */
-void
-_gcry_ac_data_destroy (gcry_ac_data_t data)
-{
-  if (data)
-    {
-      ac_data_values_destroy (data);
-      gcry_free (data->data);
-      gcry_free (data);
-    }
-}
-
-/* This function creates a copy of the array of named MPIs DATA_MPIS,
-   which is of length DATA_MPIS_N; the copy is stored in
-   DATA_MPIS_CP.  */
-static gcry_error_t
-ac_data_mpi_copy (gcry_ac_mpi_t *data_mpis, unsigned int data_mpis_n,
-                 gcry_ac_mpi_t **data_mpis_cp)
-{
-  gcry_ac_mpi_t *data_mpis_new;
-  gcry_error_t err;
-  unsigned int i;
-  gcry_mpi_t mpi;
-  char *label;
-
-  data_mpis_new = gcry_calloc (data_mpis_n, sizeof (*data_mpis_new));
-  if (! data_mpis_new)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-  memset (data_mpis_new, 0, sizeof (*data_mpis_new) * data_mpis_n);
-
-  err = 0;
-  for (i = 0; i < data_mpis_n; i++)
-    {
-      /* Copy values.  */
-
-      label = gcry_strdup (data_mpis[i].name);
-      mpi = gcry_mpi_copy (data_mpis[i].mpi);
-      if (! (label && mpi))
-       {
-         err = gcry_error_from_errno (errno);
-         gcry_mpi_release (mpi);
-         gcry_free (label);
-         break;
-       }
-
-      data_mpis_new[i].flags = GCRY_AC_FLAG_DEALLOC;
-      data_mpis_new[i].name = label;
-      data_mpis_new[i].mpi = mpi;
-    }
-  if (err)
-    goto out;
-
-  *data_mpis_cp = data_mpis_new;
-  err = 0;
-
- out:
-
-  if (err)
-    if (data_mpis_new)
-      {
-       for (i = 0; i < data_mpis_n; i++)
-         {
-           gcry_mpi_release (data_mpis_new[i].mpi);
-           gcry_free (data_mpis_new[i].name);
-         }
-       gcry_free (data_mpis_new);
-      }
-
-  return err;
-}
-
-/* Create a copy of the data set DATA and store it in DATA_CP.  */
-gcry_error_t
-_gcry_ac_data_copy (gcry_ac_data_t *data_cp, gcry_ac_data_t data)
-{
-  gcry_ac_mpi_t *data_mpis = NULL;
-  gcry_ac_data_t data_new;
-  gcry_error_t err;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  /* Allocate data set.  */
-  data_new = gcry_malloc (sizeof (*data_new));
-  if (! data_new)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  err = ac_data_mpi_copy (data->data, data->data_n, &data_mpis);
-  if (err)
-    goto out;
-
-  data_new->data_n = data->data_n;
-  data_new->data = data_mpis;
-  *data_cp = data_new;
-
- out:
-
-  if (err)
-    gcry_free (data_new);
-
-  return err;
-}
-
-/* Returns the number of named MPI values inside of the data set
-   DATA.  */
-unsigned int
-_gcry_ac_data_length (gcry_ac_data_t data)
-{
-  return data->data_n;
-}
-
-
-/* Add the value MPI to DATA with the label NAME.  If FLAGS contains
-   GCRY_AC_FLAG_COPY, the data set will contain copies of NAME
-   and MPI.  If FLAGS contains GCRY_AC_FLAG_DEALLOC or
-   GCRY_AC_FLAG_COPY, the values contained in the data set will
-   be deallocated when they are to be removed from the data set.  */
-gcry_error_t
-_gcry_ac_data_set (gcry_ac_data_t data, unsigned int flags,
-                  const char *name, gcry_mpi_t mpi)
-{
-  gcry_error_t err;
-  gcry_mpi_t mpi_cp;
-  char *name_cp;
-  unsigned int i;
-
-  name_cp = NULL;
-  mpi_cp = NULL;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  if (flags & ~(GCRY_AC_FLAG_DEALLOC | GCRY_AC_FLAG_COPY))
-    {
-      err = gcry_error (GPG_ERR_INV_ARG);
-      goto out;
-    }
-
-  if (flags & GCRY_AC_FLAG_COPY)
-    {
-      /* Create copies.  */
-
-      flags |= GCRY_AC_FLAG_DEALLOC;
-      name_cp = gcry_strdup (name);
-      mpi_cp = gcry_mpi_copy (mpi);
-      if (! (name_cp && mpi_cp))
-       {
-         err = gcry_error_from_errno (errno);
-         goto out;
-       }
-    }
-
-  /* Search for existing entry.  */
-  for (i = 0; i < data->data_n; i++)
-    if (! strcmp (name, data->data[i].name))
-      break;
-  if (i < data->data_n)
-    {
-      /* An entry for NAME does already exist.  */
-      if (data->data[i].flags & GCRY_AC_FLAG_DEALLOC)
-       {
-         gcry_mpi_release (data->data[i].mpi);
-         gcry_free (data->data[i].name);
-       }
-    }
-  else
-    {
-      /* Create a new entry.  */
-
-      gcry_ac_mpi_t *ac_mpis;
-
-      ac_mpis = gcry_realloc (data->data,
-                             sizeof (*data->data) * (data->data_n + 1));
-      if (! ac_mpis)
-       {
-         err = gcry_error_from_errno (errno);
-         goto out;
-       }
-
-      if (data->data != ac_mpis)
-       data->data = ac_mpis;
-      data->data_n++;
-    }
-
-  data->data[i].name = name_cp ? name_cp : ((char *) name);
-  data->data[i].mpi = mpi_cp ? mpi_cp : mpi;
-  data->data[i].flags = flags;
-  err = 0;
-
- out:
-
-  if (err)
-    {
-      gcry_mpi_release (mpi_cp);
-      gcry_free (name_cp);
-    }
-
-  return err;
-}
-
-/* Stores the value labelled with NAME found in the data set DATA in
-   MPI.  The returned MPI value will be released in case
-   gcry_ac_data_set is used to associate the label NAME with a
-   different MPI value.  */
-gcry_error_t
-_gcry_ac_data_get_name (gcry_ac_data_t data, unsigned int flags,
-                       const char *name, gcry_mpi_t *mpi)
-{
-  gcry_mpi_t mpi_return;
-  gcry_error_t err;
-  unsigned int i;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  if (flags & ~(GCRY_AC_FLAG_COPY))
-    {
-      err = gcry_error (GPG_ERR_INV_ARG);
-      goto out;
-    }
-
-  for (i = 0; i < data->data_n; i++)
-    if (! strcmp (name, data->data[i].name))
-      break;
-  if (i == data->data_n)
-    {
-      err = gcry_error (GPG_ERR_NOT_FOUND);
-      goto out;
-    }
-
-  if (flags & GCRY_AC_FLAG_COPY)
-    {
-      mpi_return = gcry_mpi_copy (data->data[i].mpi);
-      if (! mpi_return)
-       {
-         err = gcry_error_from_errno (errno); /* FIXME? */
-         goto out;
-       }
-    }
-  else
-    mpi_return = data->data[i].mpi;
-
-  *mpi = mpi_return;
-  err = 0;
-
- out:
-
-  return err;
-}
-
-/* Stores in NAME and MPI the named MPI value contained in the data
-   set DATA with the index IDX.  NAME or MPI may be NULL.  The
-   returned MPI value will be released in case gcry_ac_data_set is
-   used to associate the label NAME with a different MPI value.  */
-gcry_error_t
-_gcry_ac_data_get_index (gcry_ac_data_t data, unsigned int flags,
-                        unsigned int idx,
-                        const char **name, gcry_mpi_t *mpi)
-{
-  gcry_error_t err;
-  gcry_mpi_t mpi_cp;
-  char *name_cp;
-
-  name_cp = NULL;
-  mpi_cp = NULL;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  if (flags & ~(GCRY_AC_FLAG_COPY))
-    {
-      err = gcry_error (GPG_ERR_INV_ARG);
-      goto out;
-    }
-
-  if (idx >= data->data_n)
-    {
-      err = gcry_error (GPG_ERR_INV_ARG);
-      goto out;
-    }
-
-  if (flags & GCRY_AC_FLAG_COPY)
-    {
-      /* Return copies to the user.  */
-      if (name)
-       {
-         name_cp = gcry_strdup (data->data[idx].name);
-         if (! name_cp)
-           {
-             err = gcry_error_from_errno (errno);
-             goto out;
-           }
-       }
-      if (mpi)
-       {
-         mpi_cp = gcry_mpi_copy (data->data[idx].mpi);
-         if (! mpi_cp)
-           {
-             err = gcry_error_from_errno (errno);
-             goto out;
-           }
-       }
-    }
-
-  if (name)
-    *name = name_cp ? name_cp : data->data[idx].name;
-  if (mpi)
-    *mpi = mpi_cp ? mpi_cp : data->data[idx].mpi;
-  err = 0;
-
- out:
-
-  if (err)
-    {
-      gcry_mpi_release (mpi_cp);
-      gcry_free (name_cp);
-    }
-
-  return err;
-}
-
-/* Convert the data set DATA into a new S-Expression, which is to be
-   stored in SEXP, according to the identifiers contained in
-   IDENTIFIERS.  */
-gcry_error_t
-_gcry_ac_data_to_sexp (gcry_ac_data_t data, gcry_sexp_t *sexp,
-                      const char **identifiers)
-{
-  gcry_sexp_t sexp_new;
-  gcry_error_t err;
-  char *sexp_buffer;
-  size_t sexp_buffer_n;
-  size_t identifiers_n;
-  const char *label;
-  gcry_mpi_t mpi;
-  void **arg_list;
-  size_t data_n;
-  unsigned int i;
-
-  sexp_buffer_n = 1;
-  sexp_buffer = NULL;
-  arg_list = NULL;
-  err = 0;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  /* Calculate size of S-expression representation.  */
-
-  i = 0;
-  if (identifiers)
-    while (identifiers[i])
-      {
-       /* For each identifier, we add "(<IDENTIFIER>)".  */
-       sexp_buffer_n += 1 + strlen (identifiers[i]) + 1;
-       i++;
-      }
-  identifiers_n = i;
-
-  if (! identifiers_n)
-    /* If there are NO identifiers, we still add surrounding braces so
-       that we have a list of named MPI value lists.  Otherwise it
-       wouldn't be too much fun to process these lists.  */
-    sexp_buffer_n += 2;
-
-  data_n = _gcry_ac_data_length (data);
-  for (i = 0; i < data_n; i++)
-    {
-      err = gcry_ac_data_get_index (data, 0, i, &label, NULL);
-      if (err)
-       break;
-      /* For each MPI we add "(<LABEL> %m)".  */
-      sexp_buffer_n += 1 + strlen (label) + 4;
-    }
-  if (err)
-    goto out;
-
-  /* Allocate buffer.  */
-
-  sexp_buffer = gcry_malloc (sexp_buffer_n);
-  if (! sexp_buffer)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Fill buffer.  */
-
-  *sexp_buffer = 0;
-  sexp_buffer_n = 0;
-
-  /* Add identifiers: (<IDENTIFIER0>(<IDENTIFIER1>...)).  */
-  if (identifiers_n)
-    {
-      /* Add nested identifier lists as usual.  */
-      for (i = 0; i < identifiers_n; i++)
-       sexp_buffer_n += sprintf (sexp_buffer + sexp_buffer_n, "(%s",
-                                 identifiers[i]);
-    }
-  else
-    {
-      /* Add special list.  */
-      sexp_buffer_n += sprintf (sexp_buffer + sexp_buffer_n, "(");
-    }
-
-  /* Add MPI list.  */
-  arg_list = gcry_calloc (data_n + 1, sizeof (*arg_list));
-  if (! arg_list)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-  for (i = 0; i < data_n; i++)
-    {
-      err = gcry_ac_data_get_index (data, 0, i, &label, &mpi);
-      if (err)
-       break;
-      sexp_buffer_n += sprintf (sexp_buffer + sexp_buffer_n,
-                               "(%s %%m)", label);
-      arg_list[i] = &data->data[i].mpi;
-    }
-  if (err)
-    goto out;
-
-  if (identifiers_n)
-    {
-      /* Add closing braces for identifier lists as usual.  */
-      for (i = 0; i < identifiers_n; i++)
-       sexp_buffer_n += sprintf (sexp_buffer + sexp_buffer_n, ")");
-    }
-  else
-    {
-      /* Add closing braces for special list.  */
-      sexp_buffer_n += sprintf (sexp_buffer + sexp_buffer_n, ")");
-    }
-
-  /* Construct.  */
-  err = gcry_sexp_build_array (&sexp_new, NULL, sexp_buffer, arg_list);
-  if (err)
-    goto out;
-
-  *sexp = sexp_new;
-
- out:
-
-  gcry_free (sexp_buffer);
-  gcry_free (arg_list);
-
-  return err;
-}
-
-/* Create a new data set, which is to be stored in DATA_SET, from the
-   S-Expression SEXP, according to the identifiers contained in
-   IDENTIFIERS.  */
-gcry_error_t
-_gcry_ac_data_from_sexp (gcry_ac_data_t *data_set, gcry_sexp_t sexp,
-                        const char **identifiers)
-{
-  gcry_ac_data_t data_set_new;
-  gcry_error_t err;
-  gcry_sexp_t sexp_cur;
-  gcry_sexp_t sexp_tmp;
-  gcry_mpi_t mpi;
-  char *string;
-  const char *data;
-  size_t data_n;
-  size_t sexp_n;
-  unsigned int i;
-  int skip_name;
-
-  data_set_new = NULL;
-  sexp_cur = sexp;
-  sexp_tmp = NULL;
-  string = NULL;
-  mpi = NULL;
-  err = 0;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  /* Process S-expression/identifiers.  */
-
-  if (identifiers)
-    {
-      for (i = 0; identifiers[i]; i++)
-       {
-         /* Next identifier.  Extract first data item from
-            SEXP_CUR.  */
-         data = gcry_sexp_nth_data (sexp_cur, 0, &data_n);
-
-         if (! ((data_n == strlen (identifiers[i]))
-                && (! strncmp (data, identifiers[i], data_n))))
-           {
-             /* Identifier mismatch -> error.  */
-             err = gcry_error (GPG_ERR_INV_SEXP);
-             break;
-           }
-
-         /* Identifier matches.  Now we have to distinguish two
-            cases:
-
-            (i)  we are at the last identifier:
-            leave loop
-
-            (ii) we are not at the last identifier:
-            extract next element, which is supposed to be a
-            sublist.  */
-
-         if (! identifiers[i + 1])
-           /* Last identifier.  */
-           break;
-         else
-           {
-             /* Not the last identifier, extract next sublist.  */
-
-             sexp_tmp = gcry_sexp_nth (sexp_cur, 1);
-             if (! sexp_tmp)
-               {
-                 /* Missing sublist.  */
-                 err = gcry_error (GPG_ERR_INV_SEXP);
-                 break;
-               }
-
-             /* Release old SEXP_CUR, in case it is not equal to the
-                original SEXP.  */
-             if (sexp_cur != sexp)
-               gcry_sexp_release (sexp_cur);
-
-             /* Make SEXP_CUR point to the new current sublist.  */
-             sexp_cur = sexp_tmp;
-              sexp_tmp = NULL;
-           }
-       }
-      if (err)
-       goto out;
-
-      if (i)
-        {
-          /* We have at least one identifier in the list, this means
-             the the list of named MPI values is prefixed, this means
-             that we need to skip the first item (the list name), when
-             processing the MPI values.  */
-          skip_name = 1;
-        }
-      else
-        {
-          /* Since there is no identifiers list, the list of named MPI
-             values is not prefixed with a list name, therefore the
-             offset to use is zero.  */
-          skip_name = 0;
-        }
-    }
-  else
-    /* Since there is no identifiers list, the list of named MPI
-       values is not prefixed with a list name, therefore the offset
-       to use is zero.  */
-    skip_name = 0;
-
-  /* Create data set from S-expression data.  */
-
-  err = gcry_ac_data_new (&data_set_new);
-  if (err)
-    goto out;
-
-  /* Figure out amount of named MPIs in SEXP_CUR.  */
-  if (sexp_cur)
-    sexp_n = gcry_sexp_length (sexp_cur) - skip_name;
-  else
-    sexp_n = 0;
-
-  /* Extracte the named MPIs sequentially.  */
-  for (i = 0; i < sexp_n; i++)
-    {
-      /* Store next S-Expression pair, which is supposed to consist of
-        a name and an MPI value, in SEXP_TMP.  */
-
-      sexp_tmp = gcry_sexp_nth (sexp_cur, i + skip_name);
-      if (! sexp_tmp)
-       {
-         err = gcry_error (GPG_ERR_INV_SEXP);
-         break;
-       }
-
-      /* Extract name from current S-Expression pair.  */
-      data = gcry_sexp_nth_data (sexp_tmp, 0, &data_n);
-      string = gcry_malloc (data_n + 1);
-      if (! string)
-       {
-         err = gcry_error_from_errno (errno);
-         break;
-       }
-      memcpy (string, data, data_n);
-      string[data_n] = 0;
-
-      /* Extract MPI value.  */
-      mpi = gcry_sexp_nth_mpi (sexp_tmp, 1, 0);
-      if (! mpi)
-       {
-         err = gcry_error (GPG_ERR_INV_SEXP); /* FIXME? */
-         break;
-       }
-
-      /* Store named MPI in data_set_new.  */
-      err = gcry_ac_data_set (data_set_new, GCRY_AC_FLAG_DEALLOC, string, mpi);
-      if (err)
-       break;
-
-/*       gcry_free (string); */
-      string = NULL;
-/*       gcry_mpi_release (mpi); */
-      mpi = NULL;
-
-      gcry_sexp_release (sexp_tmp);
-      sexp_tmp = NULL;
-    }
-  if (err)
-    goto out;
-
-  *data_set = data_set_new;
-
- out:
-
-  if (sexp_cur != sexp)
-    gcry_sexp_release (sexp_cur);
-  gcry_sexp_release (sexp_tmp);
-  gcry_mpi_release (mpi);
-  gcry_free (string);
-
-  if (err)
-    gcry_ac_data_destroy (data_set_new);
-
-  return err;
-}
-
-
-static void
-_gcry_ac_data_dump (const char *prefix, gcry_ac_data_t data)
-{
-  unsigned char *mpi_buffer;
-  size_t mpi_buffer_n;
-  unsigned int data_n;
-  gcry_error_t err;
-  const char *name;
-  gcry_mpi_t mpi;
-  unsigned int i;
-
-  if (! data)
-    return;
-
-  if (fips_mode ())
-    return;
-
-  mpi_buffer = NULL;
-
-  data_n = _gcry_ac_data_length (data);
-  for (i = 0; i < data_n; i++)
-    {
-      err = gcry_ac_data_get_index (data, 0, i, &name, &mpi);
-      if (err)
-       {
-         log_error ("failed to dump data set");
-         break;
-       }
-
-      err = gcry_mpi_aprint (GCRYMPI_FMT_HEX, &mpi_buffer, &mpi_buffer_n, mpi);
-      if (err)
-       {
-         log_error ("failed to dump data set");
-         break;
-       }
-
-      log_printf ("%s%s%s: %s\n",
-                 prefix ? prefix : "",
-                 prefix ? ": " : ""
-                 , name, mpi_buffer);
-
-      gcry_free (mpi_buffer);
-      mpi_buffer = NULL;
-    }
-
-  gcry_free (mpi_buffer);
-}
-
-/* Dump the named MPI values contained in the data set DATA to
-   Libgcrypt's logging stream.  */
-void
-gcry_ac_data_dump (const char *prefix, gcry_ac_data_t data)
-{
-  _gcry_ac_data_dump (prefix, data);
-}
-
-/* Destroys any values contained in the data set DATA.  */
-void
-_gcry_ac_data_clear (gcry_ac_data_t data)
-{
-  ac_data_values_destroy (data);
-  gcry_free (data->data);
-  data->data = NULL;
-  data->data_n = 0;
-}
-
-
-
-/*
- * Implementation of `ac io' objects.
- */
-
-/* Initialize AC_IO according to MODE, TYPE and the variable list of
-   arguments AP.  The list of variable arguments to specify depends on
-   the given TYPE.  */
-void
-_gcry_ac_io_init_va (gcry_ac_io_t *ac_io,
-                    gcry_ac_io_mode_t mode, gcry_ac_io_type_t type, va_list ap)
-{
-  memset (ac_io, 0, sizeof (*ac_io));
-
-  if (fips_mode ())
-    return;
-
-  gcry_assert ((mode == GCRY_AC_IO_READABLE) || (mode == GCRY_AC_IO_WRITABLE));
-  gcry_assert ((type == GCRY_AC_IO_STRING) || (type == GCRY_AC_IO_STRING));
-
-  ac_io->mode = mode;
-  ac_io->type = type;
-
-  switch (mode)
-    {
-    case GCRY_AC_IO_READABLE:
-      switch (type)
-       {
-       case GCRY_AC_IO_STRING:
-         ac_io->io.readable.string.data = va_arg (ap, unsigned char *);
-         ac_io->io.readable.string.data_n = va_arg (ap, size_t);
-         break;
-
-       case GCRY_AC_IO_CALLBACK:
-         ac_io->io.readable.callback.cb = va_arg (ap, gcry_ac_data_read_cb_t);
-         ac_io->io.readable.callback.opaque = va_arg (ap, void *);
-         break;
-       }
-      break;
-    case GCRY_AC_IO_WRITABLE:
-      switch (type)
-       {
-       case GCRY_AC_IO_STRING:
-         ac_io->io.writable.string.data = va_arg (ap, unsigned char **);
-         ac_io->io.writable.string.data_n = va_arg (ap, size_t *);
-         break;
-
-       case GCRY_AC_IO_CALLBACK:
-         ac_io->io.writable.callback.cb = va_arg (ap, gcry_ac_data_write_cb_t);
-         ac_io->io.writable.callback.opaque = va_arg (ap, void *);
-         break;
-       }
-      break;
-    }
-}
-
-/* Initialize AC_IO according to MODE, TYPE and the variable list of
-   arguments.  The list of variable arguments to specify depends on
-   the given TYPE. */
-void
-_gcry_ac_io_init (gcry_ac_io_t *ac_io,
-                 gcry_ac_io_mode_t mode, gcry_ac_io_type_t type, ...)
-{
-  va_list ap;
-
-  va_start (ap, type);
-  _gcry_ac_io_init_va (ac_io, mode, type, ap);
-  va_end (ap);
-}
-
-
-/* Write to the IO object AC_IO BUFFER_N bytes from BUFFER.  Return
-   zero on success or error code.  */
-static gcry_error_t
-_gcry_ac_io_write (gcry_ac_io_t *ac_io, unsigned char *buffer, size_t buffer_n)
-{
-  gcry_error_t err;
-
-  gcry_assert (ac_io->mode == GCRY_AC_IO_WRITABLE);
-  err = 0;
-
-  switch (ac_io->type)
-    {
-    case GCRY_AC_IO_STRING:
-      {
-       unsigned char *p;
-
-       if (*ac_io->io.writable.string.data)
-         {
-           p = gcry_realloc (*ac_io->io.writable.string.data,
-                             *ac_io->io.writable.string.data_n + buffer_n);
-           if (! p)
-             err = gcry_error_from_errno (errno);
-           else
-             {
-               if (*ac_io->io.writable.string.data != p)
-                 *ac_io->io.writable.string.data = p;
-               memcpy (p + *ac_io->io.writable.string.data_n, buffer, 
buffer_n);
-               *ac_io->io.writable.string.data_n += buffer_n;
-             }
-         }
-       else
-         {
-           if (gcry_is_secure (buffer))
-             p = gcry_malloc_secure (buffer_n);
-           else
-             p = gcry_malloc (buffer_n);
-           if (! p)
-             err = gcry_error_from_errno (errno);
-           else
-             {
-               memcpy (p, buffer, buffer_n);
-               *ac_io->io.writable.string.data = p;
-               *ac_io->io.writable.string.data_n = buffer_n;
-             }
-         }
-      }
-      break;
-
-    case GCRY_AC_IO_CALLBACK:
-      err = (*ac_io->io.writable.callback.cb) 
(ac_io->io.writable.callback.opaque,
-                                              buffer, buffer_n);
-      break;
-    }
-
-  return err;
-}
-
-/* Read *BUFFER_N bytes from the IO object AC_IO into BUFFER; NREAD
-   bytes have already been read from the object; on success, store the
-   amount of bytes read in *BUFFER_N; zero bytes read means EOF.
-   Return zero on success or error code.  */
-static gcry_error_t
-_gcry_ac_io_read (gcry_ac_io_t *ac_io,
-                 unsigned int nread, unsigned char *buffer, size_t *buffer_n)
-{
-  gcry_error_t err;
-
-  gcry_assert (ac_io->mode == GCRY_AC_IO_READABLE);
-  err = 0;
-
-  switch (ac_io->type)
-    {
-    case GCRY_AC_IO_STRING:
-      {
-       size_t bytes_available;
-       size_t bytes_to_read;
-       size_t bytes_wanted;
-
-       bytes_available = ac_io->io.readable.string.data_n - nread;
-       bytes_wanted = *buffer_n;
-
-       if (bytes_wanted > bytes_available)
-         bytes_to_read = bytes_available;
-       else
-         bytes_to_read = bytes_wanted;
-
-       memcpy (buffer, ac_io->io.readable.string.data + nread, bytes_to_read);
-       *buffer_n = bytes_to_read;
-       err = 0;
-       break;
-      }
-
-    case GCRY_AC_IO_CALLBACK:
-      err = (*ac_io->io.readable.callback.cb)
-       (ac_io->io.readable.callback.opaque, buffer, buffer_n);
-      break;
-    }
-
-  return err;
-}
-
-/* Read all data available from the IO object AC_IO into newly
-   allocated memory, storing an appropriate pointer in *BUFFER and the
-   amount of bytes read in *BUFFER_N.  Return zero on success or error
-   code.  */
-static gcry_error_t
-_gcry_ac_io_read_all (gcry_ac_io_t *ac_io, unsigned char **buffer, size_t 
*buffer_n)
-{
-  unsigned char *buffer_new;
-  size_t buffer_new_n;
-  unsigned char buf[BUFSIZ];
-  size_t buf_n;
-  unsigned char *p;
-  gcry_error_t err;
-
-  buffer_new = NULL;
-  buffer_new_n = 0;
-
-  while (1)
-    {
-      buf_n = sizeof (buf);
-      err = _gcry_ac_io_read (ac_io, buffer_new_n, buf, &buf_n);
-      if (err)
-       break;
-
-      if (buf_n)
-       {
-         p = gcry_realloc (buffer_new, buffer_new_n + buf_n);
-         if (! p)
-           {
-             err = gcry_error_from_errno (errno);
-             break;
-           }
-
-         if (buffer_new != p)
-           buffer_new = p;
-
-         memcpy (buffer_new + buffer_new_n, buf, buf_n);
-         buffer_new_n += buf_n;
-       }
-      else
-       break;
-    }
-  if (err)
-    goto out;
-
-  *buffer_n = buffer_new_n;
-  *buffer = buffer_new;
-
- out:
-
-  if (err)
-    gcry_free (buffer_new);
-
-  return err;
-}
-
-/* Read data chunks from the IO object AC_IO until EOF, feeding them
-   to the callback function CB.  Return zero on success or error
-   code.  */
-static gcry_error_t
-_gcry_ac_io_process (gcry_ac_io_t *ac_io,
-                    gcry_ac_data_write_cb_t cb, void *opaque)
-{
-  unsigned char buffer[BUFSIZ];
-  unsigned int nread;
-  size_t buffer_n;
-  gcry_error_t err;
-
-  nread = 0;
-
-  while (1)
-    {
-      buffer_n = sizeof (buffer);
-      err = _gcry_ac_io_read (ac_io, nread, buffer, &buffer_n);
-      if (err)
-       break;
-      if (buffer_n)
-       {
-         err = (*cb) (opaque, buffer, buffer_n);
-         if (err)
-           break;
-         nread += buffer_n;
-       }
-      else
-       break;
-    }
-
-  return err;
-}
-
-
-
-/*
- * Functions for converting data between the native ac and the
- * S-expression structure used by the pk interface.
- */
-
-/* Extract the S-Expression DATA_SEXP into DATA under the control of
-   TYPE and NAME.  This function assumes that S-Expressions are of the
-   following structure:
-
-   (IDENTIFIER [...]
-   (ALGORITHM <list of named MPI values>)) */
-static gcry_error_t
-ac_data_extract (const char *identifier, const char *algorithm,
-                gcry_sexp_t sexp, gcry_ac_data_t *data)
-{
-  gcry_error_t err;
-  gcry_sexp_t value_sexp;
-  gcry_sexp_t data_sexp;
-  size_t data_sexp_n;
-  gcry_mpi_t value_mpi;
-  char *value_name;
-  const char *data_raw;
-  size_t data_raw_n;
-  gcry_ac_data_t data_new;
-  unsigned int i;
-
-  value_sexp = NULL;
-  data_sexp = NULL;
-  value_name = NULL;
-  value_mpi = NULL;
-  data_new = NULL;
-
-  /* Verify that the S-expression contains the correct identifier.  */
-  data_raw = gcry_sexp_nth_data (sexp, 0, &data_raw_n);
-  if ((! data_raw) || strncmp (identifier, data_raw, data_raw_n))
-    {
-      err = gcry_error (GPG_ERR_INV_SEXP);
-      goto out;
-    }
-
-  /* Extract inner S-expression.  */
-  data_sexp = gcry_sexp_find_token (sexp, algorithm, 0);
-  if (! data_sexp)
-    {
-      err = gcry_error (GPG_ERR_INV_SEXP);
-      goto out;
-    }
-
-  /* Count data elements.  */
-  data_sexp_n = gcry_sexp_length (data_sexp);
-  data_sexp_n--;
-
-  /* Allocate new data set.  */
-  err = _gcry_ac_data_new (&data_new);
-  if (err)
-    goto out;
-
-  /* Iterate through list of data elements and add them to the data
-     set.  */
-  for (i = 0; i < data_sexp_n; i++)
-    {
-      /* Get the S-expression of the named MPI, that contains the name
-        and the MPI value.  */
-      value_sexp = gcry_sexp_nth (data_sexp, i + 1);
-      if (! value_sexp)
-       {
-         err = gcry_error (GPG_ERR_INV_SEXP);
-         break;
-       }
-
-      /* Extract the name.  */
-      data_raw = gcry_sexp_nth_data (value_sexp, 0, &data_raw_n);
-      if (! data_raw)
-       {
-         err = gcry_error (GPG_ERR_INV_SEXP);
-         break;
-       }
-
-      /* Extract the MPI value.  */
-      value_mpi = gcry_sexp_nth_mpi (value_sexp, 1, GCRYMPI_FMT_USG);
-      if (! value_mpi)
-       {
-         err = gcry_error (GPG_ERR_INTERNAL); /* FIXME? */
-         break;
-       }
-
-      /* Duplicate the name.  */
-      value_name = gcry_malloc (data_raw_n + 1);
-      if (! value_name)
-       {
-         err = gcry_error_from_errno (errno);
-         break;
-       }
-      strncpy (value_name, data_raw, data_raw_n);
-      value_name[data_raw_n] = 0;
-
-      err = _gcry_ac_data_set (data_new, GCRY_AC_FLAG_DEALLOC, value_name, 
value_mpi);
-      if (err)
-       break;
-
-      gcry_sexp_release (value_sexp);
-      value_sexp = NULL;
-      value_name = NULL;
-      value_mpi = NULL;
-    }
-  if (err)
-    goto out;
-
-  /* Copy out.  */
-  *data = data_new;
-
- out:
-
-  /* Deallocate resources.  */
-  if (err)
-    {
-      _gcry_ac_data_destroy (data_new);
-      gcry_mpi_release (value_mpi);
-      gcry_free (value_name);
-      gcry_sexp_release (value_sexp);
-    }
-  gcry_sexp_release (data_sexp);
-
-  return err;
-}
-
-/* Construct an S-expression from the DATA and store it in
-   DATA_SEXP. The S-expression will be of the following structure:
-
-   (IDENTIFIER [(flags [...])]
-   (ALGORITHM <list of named MPI values>))  */
-static gcry_error_t
-ac_data_construct (const char *identifier, int include_flags,
-                  unsigned int flags, const char *algorithm,
-                  gcry_ac_data_t data, gcry_sexp_t *sexp)
-{
-  unsigned int data_length;
-  gcry_sexp_t sexp_new;
-  gcry_error_t err;
-  size_t sexp_format_n;
-  char *sexp_format;
-  void **arg_list;
-  unsigned int i;
-
-  arg_list = NULL;
-  sexp_new = NULL;
-  sexp_format = NULL;
-
-  /* We build a list of arguments to pass to
-     gcry_sexp_build_array().  */
-  data_length = _gcry_ac_data_length (data);
-  arg_list = gcry_calloc (data_length, sizeof (*arg_list) * 2);
-  if (! arg_list)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Fill list with MPIs.  */
-  for (i = 0; i < data_length; i++)
-    {
-      char **nameaddr  = &data->data[i].name;
-
-      arg_list[(i * 2) + 0] = nameaddr;
-      arg_list[(i * 2) + 1] = &data->data[i].mpi;
-    }
-
-  /* Calculate size of format string.  */
-  sexp_format_n = (3
-                  + (include_flags ? 7 : 0)
-                  + (algorithm ? (2 + strlen (algorithm)) : 0)
-                  + strlen (identifier));
-
-  for (i = 0; i < data_length; i++)
-    /* Per-element sizes.  */
-    sexp_format_n += 6;
-
-  if (include_flags)
-    /* Add flags.  */
-    for (i = 0; i < DIM (ac_flags); i++)
-      if (flags & ac_flags[i].number)
-       sexp_format_n += strlen (ac_flags[i].string) + 1;
-
-  /* Done.  */
-  sexp_format = gcry_malloc (sexp_format_n);
-  if (! sexp_format)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Construct the format string.  */
-
-  *sexp_format = 0;
-  strcat (sexp_format, "(");
-  strcat (sexp_format, identifier);
-  if (include_flags)
-    {
-      strcat (sexp_format, "(flags");
-      for (i = 0; i < DIM (ac_flags); i++)
-       if (flags & ac_flags[i].number)
-         {
-           strcat (sexp_format, " ");
-           strcat (sexp_format, ac_flags[i].string);
-         }
-      strcat (sexp_format, ")");
-    }
-  if (algorithm)
-    {
-      strcat (sexp_format, "(");
-      strcat (sexp_format, algorithm);
-    }
-  for (i = 0; i < data_length; i++)
-    strcat (sexp_format, "(%s%m)");
-  if (algorithm)
-    strcat (sexp_format, ")");
-  strcat (sexp_format, ")");
-
-  /* Create final S-expression.  */
-  err = gcry_sexp_build_array (&sexp_new, NULL, sexp_format, arg_list);
-  if (err)
-    goto out;
-
-  *sexp = sexp_new;
-
- out:
-
-  /* Deallocate resources.  */
-  gcry_free (sexp_format);
-  gcry_free (arg_list);
-  if (err)
-    gcry_sexp_release (sexp_new);
-
-  return err;
-}
-
-
-
-/*
- * Handle management.
- */
-
-/* Creates a new handle for the algorithm ALGORITHM and stores it in
-   HANDLE.  FLAGS is not used yet.  */
-gcry_error_t
-_gcry_ac_open (gcry_ac_handle_t *handle,
-              gcry_ac_id_t algorithm, unsigned int flags)
-{
-  gcry_ac_handle_t handle_new;
-  const char *algorithm_name;
-  gcry_module_t module;
-  gcry_error_t err;
-
-  *handle = NULL;
-  module = NULL;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  /* Get name.  */
-  algorithm_name = _gcry_pk_aliased_algo_name (algorithm);
-  if (! algorithm_name)
-    {
-      err = gcry_error (GPG_ERR_PUBKEY_ALGO);
-      goto out;
-    }
-
-  /* Acquire reference to the pubkey module.  */
-  err = _gcry_pk_module_lookup (algorithm, &module);
-  if (err)
-    goto out;
-
-  /* Allocate.  */
-  handle_new = gcry_malloc (sizeof (*handle_new));
-  if (! handle_new)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Done.  */
-  handle_new->algorithm = algorithm;
-  handle_new->algorithm_name = algorithm_name;
-  handle_new->flags = flags;
-  handle_new->module = module;
-  *handle = handle_new;
-
- out:
-
-  /* Deallocate resources.  */
-  if (err)
-    _gcry_pk_module_release (module);
-
-  return err;
-}
-
-
-/* Destroys the handle HANDLE.  */
-void
-_gcry_ac_close (gcry_ac_handle_t handle)
-{
-  /* Release reference to pubkey module.  */
-  if (handle)
-    {
-      _gcry_pk_module_release (handle->module);
-      gcry_free (handle);
-    }
-}
-
-
-
-/*
- * Key management.
- */
-
-/* Initialize a key from a given data set.  */
-/* FIXME/Damn: the argument HANDLE is not only unnecessary, it is
-   completely WRONG here.  */
-gcry_error_t
-_gcry_ac_key_init (gcry_ac_key_t *key, gcry_ac_handle_t handle,
-                  gcry_ac_key_type_t type, gcry_ac_data_t data)
-{
-  gcry_ac_data_t data_new;
-  gcry_ac_key_t key_new;
-  gcry_error_t err;
-
-  (void)handle;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  /* Allocate.  */
-  key_new = gcry_malloc (sizeof (*key_new));
-  if (! key_new)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Copy data set.  */
-  err = _gcry_ac_data_copy (&data_new, data);
-  if (err)
-    goto out;
-
-  /* Done.  */
-  key_new->data = data_new;
-  key_new->type = type;
-  *key = key_new;
-
- out:
-
-  if (err)
-    /* Deallocate resources.  */
-    gcry_free (key_new);
-
-  return err;
-}
-
-
-/* Generates a new key pair via the handle HANDLE of NBITS bits and
-   stores it in KEY_PAIR.  In case non-standard settings are wanted, a
-   pointer to a structure of type gcry_ac_key_spec_<algorithm>_t,
-   matching the selected algorithm, can be given as KEY_SPEC.
-   MISC_DATA is not used yet.  */
-gcry_error_t
-_gcry_ac_key_pair_generate (gcry_ac_handle_t handle, unsigned int nbits,
-                           void *key_spec,
-                           gcry_ac_key_pair_t *key_pair,
-                           gcry_mpi_t **misc_data)
-{
-  gcry_sexp_t genkey_sexp_request;
-  gcry_sexp_t genkey_sexp_reply;
-  gcry_ac_data_t key_data_secret;
-  gcry_ac_data_t key_data_public;
-  gcry_ac_key_pair_t key_pair_new;
-  gcry_ac_key_t key_secret;
-  gcry_ac_key_t key_public;
-  gcry_sexp_t key_sexp;
-  gcry_error_t err;
-  char *genkey_format;
-  size_t genkey_format_n;
-  void **arg_list;
-  size_t arg_list_n;
-  unsigned int i;
-  unsigned int j;
-
-  (void)misc_data;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  key_data_secret = NULL;
-  key_data_public = NULL;
-  key_secret = NULL;
-  key_public = NULL;
-  genkey_format = NULL;
-  arg_list = NULL;
-  genkey_sexp_request = NULL;
-  genkey_sexp_reply = NULL;
-  key_sexp = NULL;
-
-  /* Allocate key pair.  */
-  key_pair_new = gcry_malloc (sizeof (struct gcry_ac_key_pair));
-  if (! key_pair_new)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Allocate keys.  */
-  key_secret = gcry_malloc (sizeof (*key_secret));
-  if (! key_secret)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-  key_public = gcry_malloc (sizeof (*key_public));
-  if (! key_public)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Calculate size of the format string, that is used for creating
-     the request S-expression.  */
-  genkey_format_n = 22;
-
-  /* Respect any relevant algorithm specific commands.  */
-  if (key_spec)
-    for (i = 0; i < DIM (ac_key_generate_specs); i++)
-      if (handle->algorithm == ac_key_generate_specs[i].algorithm)
-       genkey_format_n += 6;
-
-  /* Create format string.  */
-  genkey_format = gcry_malloc (genkey_format_n);
-  if (! genkey_format)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Fill format string.  */
-  *genkey_format = 0;
-  strcat (genkey_format, "(genkey(%s(nbits%d)");
-  if (key_spec)
-    for (i = 0; i < DIM (ac_key_generate_specs); i++)
-      if (handle->algorithm == ac_key_generate_specs[i].algorithm)
-       strcat (genkey_format, "(%s%m)");
-  strcat (genkey_format, "))");
-
-  /* Build list of argument pointers, the algorithm name and the nbits
-     are always needed.  */
-  arg_list_n = 2;
-
-  /* Now the algorithm specific arguments.  */
-  if (key_spec)
-    for (i = 0; i < DIM (ac_key_generate_specs); i++)
-      if (handle->algorithm == ac_key_generate_specs[i].algorithm)
-       arg_list_n += 2;
-
-  /* Allocate list.  */
-  arg_list = gcry_calloc (arg_list_n, sizeof (*arg_list));
-  if (! arg_list)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  arg_list[0] = (void *) &handle->algorithm_name;
-  arg_list[1] = (void *) &nbits;
-  if (key_spec)
-    for (j = 2, i = 0; i < DIM (ac_key_generate_specs); i++)
-      if (handle->algorithm == ac_key_generate_specs[i].algorithm)
-       {
-         /* Add name of this specification flag and the
-            according member of the spec strucuture.  */
-         arg_list[j++] = (void *)(&ac_key_generate_specs[i].name);
-         arg_list[j++] = (void *)
-           (((char *) key_spec)
-            + ac_key_generate_specs[i].offset);
-         /* FIXME: above seems to suck.  */
-       }
-
-  /* Construct final request S-expression.  */
-  err = gcry_sexp_build_array (&genkey_sexp_request,
-                              NULL, genkey_format, arg_list);
-  if (err)
-    goto out;
-
-  /* Perform genkey operation.  */
-  err = gcry_pk_genkey (&genkey_sexp_reply, genkey_sexp_request);
-  if (err)
-    goto out;
-
-  key_sexp = gcry_sexp_find_token (genkey_sexp_reply, "private-key", 0);
-  if (! key_sexp)
-    {
-      err = gcry_error (GPG_ERR_INTERNAL);
-      goto out;
-    }
-  err = ac_data_extract ("private-key", handle->algorithm_name,
-                        key_sexp, &key_data_secret);
-  if (err)
-    goto out;
-
-  gcry_sexp_release (key_sexp);
-  key_sexp = gcry_sexp_find_token (genkey_sexp_reply, "public-key", 0);
-  if (! key_sexp)
-    {
-      err = gcry_error (GPG_ERR_INTERNAL);
-      goto out;
-    }
-  err = ac_data_extract ("public-key", handle->algorithm_name,
-                        key_sexp, &key_data_public);
-  if (err)
-    goto out;
-
-  /* Done.  */
-
-  key_secret->type = GCRY_AC_KEY_SECRET;
-  key_secret->data = key_data_secret;
-  key_public->type = GCRY_AC_KEY_PUBLIC;
-  key_public->data = key_data_public;
-  key_pair_new->secret = key_secret;
-  key_pair_new->public = key_public;
-  *key_pair = key_pair_new;
-
- out:
-
-  /* Deallocate resources.  */
-
-  gcry_free (genkey_format);
-  gcry_free (arg_list);
-  gcry_sexp_release (genkey_sexp_request);
-  gcry_sexp_release (genkey_sexp_reply);
-  gcry_sexp_release (key_sexp);
-  if (err)
-    {
-      _gcry_ac_data_destroy (key_data_secret);
-      _gcry_ac_data_destroy (key_data_public);
-      gcry_free (key_secret);
-      gcry_free (key_public);
-      gcry_free (key_pair_new);
-    }
-
-  return err;
-}
-
-/* Returns the key of type WHICH out of the key pair KEY_PAIR.  */
-gcry_ac_key_t
-_gcry_ac_key_pair_extract (gcry_ac_key_pair_t key_pair,
-                           gcry_ac_key_type_t which)
-{
-  gcry_ac_key_t key;
-
-  if (fips_mode ())
-    return NULL;
-
-  switch (which)
-    {
-    case GCRY_AC_KEY_SECRET:
-      key = key_pair->secret;
-      break;
-
-    case GCRY_AC_KEY_PUBLIC:
-      key = key_pair->public;
-      break;
-
-    default:
-      key = NULL;
-      break;
-    }
-
-  return key;
-}
-
-/* Destroys the key KEY.  */
-void
-_gcry_ac_key_destroy (gcry_ac_key_t key)
-{
-  unsigned int i;
-
-  if (key)
-    {
-      if (key->data)
-        {
-          for (i = 0; i < key->data->data_n; i++)
-            {
-              if (key->data->data[i].mpi)
-                gcry_mpi_release (key->data->data[i].mpi);
-              if (key->data->data[i].name)
-                gcry_free (key->data->data[i].name);
-            }
-          gcry_free (key->data->data);
-          gcry_free (key->data);
-        }
-      gcry_free (key);
-    }
-}
-
-/* Destroys the key pair KEY_PAIR.  */
-void
-_gcry_ac_key_pair_destroy (gcry_ac_key_pair_t key_pair)
-{
-  if (key_pair)
-    {
-      gcry_ac_key_destroy (key_pair->secret);
-      gcry_ac_key_destroy (key_pair->public);
-      gcry_free (key_pair);
-    }
-}
-
-/* Returns the data set contained in the key KEY.  */
-gcry_ac_data_t
-_gcry_ac_key_data_get (gcry_ac_key_t key)
-{
-  if (fips_mode ())
-    return NULL;
-  return key->data;
-}
-
-/* Verifies that the key KEY is sane via HANDLE.  */
-gcry_error_t
-_gcry_ac_key_test (gcry_ac_handle_t handle, gcry_ac_key_t key)
-{
-  gcry_sexp_t key_sexp;
-  gcry_error_t err;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  key_sexp = NULL;
-  err = ac_data_construct (ac_key_identifiers[key->type], 0, 0,
-                          handle->algorithm_name, key->data, &key_sexp);
-  if (err)
-    goto out;
-
-  err = gcry_pk_testkey (key_sexp);
-
- out:
-
-  gcry_sexp_release (key_sexp);
-
-  return gcry_error (err);
-}
-
-/* Stores the number of bits of the key KEY in NBITS via HANDLE.  */
-gcry_error_t
-_gcry_ac_key_get_nbits (gcry_ac_handle_t handle,
-                       gcry_ac_key_t key, unsigned int *nbits)
-{
-  gcry_sexp_t key_sexp;
-  gcry_error_t err;
-  unsigned int n;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  key_sexp = NULL;
-
-  err = ac_data_construct (ac_key_identifiers[key->type],
-                          0, 0, handle->algorithm_name, key->data, &key_sexp);
-  if (err)
-    goto out;
-
-  n = gcry_pk_get_nbits (key_sexp);
-  if (! n)
-    {
-      err = gcry_error (GPG_ERR_PUBKEY_ALGO);
-      goto out;
-    }
-
-  *nbits = n;
-
- out:
-
-  gcry_sexp_release (key_sexp);
-
-  return err;
-}
-
-/* Writes the 20 byte long key grip of the key KEY to KEY_GRIP via
-   HANDLE.  */
-gcry_error_t
-_gcry_ac_key_get_grip (gcry_ac_handle_t handle,
-                      gcry_ac_key_t key, unsigned char *key_grip)
-{
-  gcry_sexp_t key_sexp;
-  gcry_error_t err;
-  unsigned char *ret;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  key_sexp = NULL;
-  err = ac_data_construct (ac_key_identifiers[key->type], 0, 0,
-                          handle->algorithm_name, key->data, &key_sexp);
-  if (err)
-    goto out;
-
-  ret = gcry_pk_get_keygrip (key_sexp, key_grip);
-  if (! ret)
-    {
-      err = gcry_error (GPG_ERR_INV_OBJ);
-      goto out;
-    }
-
-  err = 0;
-
- out:
-
-  gcry_sexp_release (key_sexp);
-
-  return err;
-}
-
-
-
-
-/*
- * Functions performing cryptographic operations.
- */
-
-/* Encrypts the plain text MPI value DATA_PLAIN with the key public
-   KEY under the control of the flags FLAGS and stores the resulting
-   data set into DATA_ENCRYPTED.  */
-gcry_error_t
-_gcry_ac_data_encrypt (gcry_ac_handle_t handle,
-                      unsigned int flags,
-                      gcry_ac_key_t key,
-                      gcry_mpi_t data_plain,
-                      gcry_ac_data_t *data_encrypted)
-{
-  gcry_ac_data_t data_encrypted_new;
-  gcry_ac_data_t data_value;
-  gcry_sexp_t sexp_request;
-  gcry_sexp_t sexp_reply;
-  gcry_sexp_t sexp_key;
-  gcry_error_t err;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  data_encrypted_new = NULL;
-  sexp_request = NULL;
-  sexp_reply = NULL;
-  data_value = NULL;
-  sexp_key = NULL;
-
-  if (key->type != GCRY_AC_KEY_PUBLIC)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  err = ac_data_construct (ac_key_identifiers[key->type], 0, 0,
-                          handle->algorithm_name, key->data, &sexp_key);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_new (&data_value);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_set (data_value, 0, "value", data_plain);
-  if (err)
-    goto out;
-
-  err = ac_data_construct ("data", 1, flags, handle->algorithm_name,
-                          data_value, &sexp_request);
-  if (err)
-    goto out;
-
-  /* FIXME: error vs. errcode? */
-
-  err = gcry_pk_encrypt (&sexp_reply, sexp_request, sexp_key);
-  if (err)
-    goto out;
-
-  /* Extract data.  */
-  err = ac_data_extract ("enc-val", handle->algorithm_name,
-                        sexp_reply, &data_encrypted_new);
-  if (err)
-    goto out;
-
-  *data_encrypted = data_encrypted_new;
-
- out:
-
-  /* Deallocate resources.  */
-
-  gcry_sexp_release (sexp_request);
-  gcry_sexp_release (sexp_reply);
-  gcry_sexp_release (sexp_key);
-  _gcry_ac_data_destroy (data_value);
-
-  return err;
-}
-
-/* Decrypts the encrypted data contained in the data set
-   DATA_ENCRYPTED with the secret key KEY under the control of the
-   flags FLAGS and stores the resulting plain text MPI value in
-   DATA_PLAIN.  */
-gcry_error_t
-_gcry_ac_data_decrypt (gcry_ac_handle_t handle,
-                      unsigned int flags,
-                      gcry_ac_key_t key,
-                      gcry_mpi_t *data_plain,
-                      gcry_ac_data_t data_encrypted)
-{
-  gcry_mpi_t data_decrypted;
-  gcry_sexp_t sexp_request;
-  gcry_sexp_t sexp_reply;
-  gcry_sexp_t sexp_value;
-  gcry_sexp_t sexp_key;
-  gcry_error_t err;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  sexp_request = NULL;
-  sexp_reply = NULL;
-  sexp_value = NULL;
-  sexp_key = NULL;
-
-  if (key->type != GCRY_AC_KEY_SECRET)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  err = ac_data_construct (ac_key_identifiers[key->type], 0, 0,
-                          handle->algorithm_name, key->data, &sexp_key);
-  if (err)
-    goto out;
-
-  /* Create S-expression from data.  */
-  err = ac_data_construct ("enc-val", 1, flags, handle->algorithm_name,
-                          data_encrypted, &sexp_request);
-  if (err)
-    goto out;
-
-  /* Decrypt.  */
-  err = gcry_pk_decrypt (&sexp_reply, sexp_request, sexp_key);
-  if (err)
-    goto out;
-
-  /* Extract plain text. */
-  sexp_value = gcry_sexp_find_token (sexp_reply, "value", 0);
-  if (! sexp_value)
-    {
-      /* FIXME?  */
-      err = gcry_error (GPG_ERR_GENERAL);
-      goto out;
-    }
-
-  data_decrypted = gcry_sexp_nth_mpi (sexp_value, 1, GCRYMPI_FMT_USG);
-  if (! data_decrypted)
-    {
-      err = gcry_error (GPG_ERR_GENERAL);
-      goto out;
-    }
-
-  *data_plain = data_decrypted;
-
- out:
-
-  /* Deallocate resources.  */
-  gcry_sexp_release (sexp_request);
-  gcry_sexp_release (sexp_reply);
-  gcry_sexp_release (sexp_value);
-  gcry_sexp_release (sexp_key);
-
-  return gcry_error (err);
-
-}
-
-/* Signs the data contained in DATA with the secret key KEY and stores
-   the resulting signature data set in DATA_SIGNATURE.  */
-gcry_error_t
-_gcry_ac_data_sign (gcry_ac_handle_t handle,
-                   gcry_ac_key_t key,
-                   gcry_mpi_t data,
-                   gcry_ac_data_t *data_signature)
-{
-  gcry_ac_data_t data_signed;
-  gcry_ac_data_t data_value;
-  gcry_sexp_t sexp_request;
-  gcry_sexp_t sexp_reply;
-  gcry_sexp_t sexp_key;
-  gcry_error_t err;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  data_signed = NULL;
-  data_value = NULL;
-  sexp_request = NULL;
-  sexp_reply = NULL;
-  sexp_key = NULL;
-
-  if (key->type != GCRY_AC_KEY_SECRET)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  err = ac_data_construct (ac_key_identifiers[key->type], 0, 0,
-                          handle->algorithm_name, key->data, &sexp_key);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_new (&data_value);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_set (data_value, 0, "value", data);
-  if (err)
-    goto out;
-
-  /* Create S-expression holding the data.  */
-  err = ac_data_construct ("data", 1, 0, NULL, data_value, &sexp_request);
-  if (err)
-    goto out;
-
-  /* Sign.  */
-  err = gcry_pk_sign (&sexp_reply, sexp_request, sexp_key);
-  if (err)
-    goto out;
-
-  /* Extract data.  */
-  err = ac_data_extract ("sig-val", handle->algorithm_name,
-                        sexp_reply, &data_signed);
-  if (err)
-    goto out;
-
-  /* Done.  */
-  *data_signature = data_signed;
-
- out:
-
-  gcry_sexp_release (sexp_request);
-  gcry_sexp_release (sexp_reply);
-  gcry_sexp_release (sexp_key);
-  _gcry_ac_data_destroy (data_value);
-
-  return gcry_error (err);
-}
-
-
-/* Verifies that the signature contained in the data set
-   DATA_SIGNATURE is indeed the result of signing the data contained
-   in DATA with the secret key belonging to the public key KEY.  */
-gcry_error_t
-_gcry_ac_data_verify (gcry_ac_handle_t handle,
-                     gcry_ac_key_t key,
-                     gcry_mpi_t data,
-                     gcry_ac_data_t data_signature)
-{
-  gcry_sexp_t sexp_signature;
-  gcry_ac_data_t data_value;
-  gcry_sexp_t sexp_data;
-  gcry_sexp_t sexp_key;
-  gcry_error_t err;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  sexp_signature = NULL;
-  data_value = NULL;
-  sexp_data = NULL;
-  sexp_key = NULL;
-
-  err = ac_data_construct ("public-key", 0, 0,
-                          handle->algorithm_name, key->data, &sexp_key);
-  if (err)
-    goto out;
-
-  if (key->type != GCRY_AC_KEY_PUBLIC)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  /* Construct S-expression holding the signature data.  */
-  err = ac_data_construct ("sig-val", 1, 0, handle->algorithm_name,
-                          data_signature, &sexp_signature);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_new (&data_value);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_set (data_value, 0, "value", data);
-  if (err)
-    goto out;
-
-  /* Construct S-expression holding the data.  */
-  err = ac_data_construct ("data", 1, 0, NULL, data_value, &sexp_data);
-  if (err)
-    goto out;
-
-  /* Verify signature.  */
-  err = gcry_pk_verify (sexp_signature, sexp_data, sexp_key);
-
- out:
-
-  gcry_sexp_release (sexp_signature);
-  gcry_sexp_release (sexp_data);
-  gcry_sexp_release (sexp_key);
-  _gcry_ac_data_destroy (data_value);
-
-  return gcry_error (err);
-}
-
-
-
-
-/*
- * Implementation of encoding methods (em).
- */
-
-/* Type for functions that encode or decode (hence the name) a
-   message.  */
-typedef gcry_error_t (*gcry_ac_em_dencode_t) (unsigned int flags,
-                                                void *options,
-                                                gcry_ac_io_t *ac_io_read,
-                                                gcry_ac_io_t *ac_io_write);
-
-/* Fill the buffer BUFFER which is BUFFER_N bytes long with non-zero
-   random bytes of random level LEVEL.  */
-static void
-em_randomize_nonzero (unsigned char *buffer, size_t buffer_n,
-                     gcry_random_level_t level)
-{
-  unsigned char *buffer_rand;
-  unsigned int buffer_rand_n;
-  unsigned int zeros;
-  unsigned int i;
-  unsigned int j;
-
-  for (i = 0; i < buffer_n; i++)
-    buffer[i] = 0;
-
-  do
-    {
-      /* Count zeros.  */
-      for (i = zeros = 0; i < buffer_n; i++)
-       if (! buffer[i])
-         zeros++;
-
-      if (zeros)
-       {
-         /* Get random bytes.  */
-         buffer_rand_n = zeros + (zeros / 128);
-         buffer_rand = gcry_random_bytes_secure (buffer_rand_n, level);
-
-         /* Substitute zeros with non-zero random bytes.  */
-         for (i = j = 0; zeros && (i < buffer_n) && (j < buffer_rand_n); i++)
-           if (! buffer[i])
-             {
-               while ((j < buffer_rand_n) && (! buffer_rand[j]))
-                 j++;
-               if (j < buffer_rand_n)
-                 {
-                   buffer[i] = buffer_rand[j++];
-                   zeros--;
-                 }
-               else
-                 break;
-             }
-         gcry_free (buffer_rand);
-       }
-    }
-  while (zeros);
-}
-
-/* Encode a message according to the Encoding Method for Encryption
-   `PKCS-V1_5' (EME-PKCS-V1_5).  */
-static gcry_error_t
-eme_pkcs_v1_5_encode (unsigned int flags, void *opts,
-                     gcry_ac_io_t *ac_io_read,
-                     gcry_ac_io_t *ac_io_write)
-{
-  gcry_ac_eme_pkcs_v1_5_t *options;
-  gcry_error_t err;
-  unsigned char *buffer;
-  unsigned char *ps;
-  unsigned char *m;
-  size_t m_n;
-  unsigned int ps_n;
-  unsigned int k;
-
-  (void)flags;
-
-  options = opts;
-  buffer = NULL;
-  m = NULL;
-
-  err = _gcry_ac_io_read_all (ac_io_read, &m, &m_n);
-  if (err)
-    goto out;
-
-  /* Figure out key length in bytes.  */
-  k = options->key_size / 8;
-
-  if (m_n > k - 11)
-    {
-      /* Key is too short for message.  */
-      err = gcry_error (GPG_ERR_TOO_SHORT);
-      goto out;
-    }
-
-  /* According to this encoding method, the first byte of the encoded
-     message is zero.  This byte will be lost anyway, when the encoded
-     message is to be converted into an MPI, that's why we skip
-     it.  */
-
-  /* Allocate buffer.  */
-  buffer = gcry_malloc (k - 1);
-  if (! buffer)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  /* Generate an octet string PS of length k - mLen - 3 consisting
-     of pseudorandomly generated nonzero octets.  The length of PS
-     will be at least eight octets.  */
-  ps_n = k - m_n - 3;
-  ps = buffer + 1;
-  em_randomize_nonzero (ps, ps_n, GCRY_STRONG_RANDOM);
-
-  /* Concatenate PS, the message M, and other padding to form an
-     encoded message EM of length k octets as:
-
-     EM = 0x00 || 0x02 || PS || 0x00 || M.  */
-
-  buffer[0] = 0x02;
-  buffer[ps_n + 1] = 0x00;
-  memcpy (buffer + ps_n + 2, m, m_n);
-
-  err = _gcry_ac_io_write (ac_io_write, buffer, k - 1);
-
- out:
-
-  gcry_free (buffer);
-  gcry_free (m);
-
-  return err;
-}
-
-/* Decode a message according to the Encoding Method for Encryption
-   `PKCS-V1_5' (EME-PKCS-V1_5).  */
-static gcry_error_t
-eme_pkcs_v1_5_decode (unsigned int flags, void *opts,
-                     gcry_ac_io_t *ac_io_read,
-                     gcry_ac_io_t *ac_io_write)
-{
-  gcry_ac_eme_pkcs_v1_5_t *options;
-  unsigned char *buffer;
-  unsigned char *em;
-  size_t em_n;
-  gcry_error_t err;
-  unsigned int i;
-  unsigned int k;
-
-  (void)flags;
-
-  options = opts;
-  buffer = NULL;
-  em = NULL;
-
-  err = _gcry_ac_io_read_all (ac_io_read, &em, &em_n);
-  if (err)
-    goto out;
-
-  /* Figure out key size.  */
-  k = options->key_size / 8;
-
-  /* Search for zero byte.  */
-  for (i = 0; (i < em_n) && em[i]; i++);
-
-  /* According to this encoding method, the first byte of the encoded
-     message should be zero.  This byte is lost.  */
-
-  if (! ((em_n >= 10)
-        && (em_n == (k - 1))
-        && (em[0] == 0x02)
-        && (i < em_n)
-        && ((i - 1) >= 8)))
-    {
-      err = gcry_error (GPG_ERR_DECRYPT_FAILED);
-      goto out;
-    }
-
-  i++;
-  buffer = gcry_malloc (em_n - i);
-  if (! buffer)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  memcpy (buffer, em + i, em_n - i);
-  err = _gcry_ac_io_write (ac_io_write, buffer, em_n - i);
-
- out:
-
-  gcry_free (buffer);
-  gcry_free (em);
-
-  return err;
-}
-
-static gcry_error_t
-emsa_pkcs_v1_5_encode_data_cb (void *opaque,
-                              unsigned char *buffer, size_t buffer_n)
-{
-  gcry_md_hd_t md_handle;
-
-  md_handle = opaque;
-  gcry_md_write (md_handle, buffer, buffer_n);
-
-  return 0;
-}
-
-
-/* Encode a message according to the Encoding Method for Signatures
-   with Appendix `PKCS-V1_5' (EMSA-PKCS-V1_5).  */
-static gcry_error_t
-emsa_pkcs_v1_5_encode (unsigned int flags, void *opts,
-                      gcry_ac_io_t *ac_io_read,
-                      gcry_ac_io_t *ac_io_write)
-{
-  gcry_ac_emsa_pkcs_v1_5_t *options;
-  gcry_error_t err;
-  gcry_md_hd_t md;
-  unsigned char *t;
-  size_t t_n;
-  unsigned char *h;
-  size_t h_n;
-  unsigned char *ps;
-  size_t ps_n;
-  unsigned char *buffer;
-  size_t buffer_n;
-  unsigned char asn[100];      /* FIXME, always enough?  */
-  size_t asn_n;
-  unsigned int i;
-
-  (void)flags;
-
-  options = opts;
-  buffer = NULL;
-  md = NULL;
-  ps = NULL;
-  t = NULL;
-
-  /* Create hashing handle and get the necessary information.  */
-  err = gcry_md_open (&md, options->md, 0);
-  if (err)
-    goto out;
-
-  asn_n = DIM (asn);
-  err = gcry_md_algo_info (options->md, GCRYCTL_GET_ASNOID, asn, &asn_n);
-  if (err)
-    goto out;
-
-  h_n = gcry_md_get_algo_dlen (options->md);
-
-  err = _gcry_ac_io_process (ac_io_read, emsa_pkcs_v1_5_encode_data_cb, md);
-  if (err)
-    goto out;
-
-  h = gcry_md_read (md, 0);
-
-  /* Encode the algorithm ID for the hash function and the hash value
-     into an ASN.1 value of type DigestInfo with the Distinguished
-     Encoding Rules (DER), where the type DigestInfo has the syntax:
-
-     DigestInfo ::== SEQUENCE {
-     digestAlgorithm AlgorithmIdentifier,
-     digest OCTET STRING
-     }
-
-     The first field identifies the hash function and the second
-     contains the hash value.  Let T be the DER encoding of the
-     DigestInfo value and let tLen be the length in octets of T.  */
-
-  t_n = asn_n + h_n;
-  t = gcry_malloc (t_n);
-  if (! t)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  for (i = 0; i < asn_n; i++)
-    t[i] = asn[i];
-  for (i = 0; i < h_n; i++)
-    t[asn_n + i] = h[i];
-
-  /* If emLen < tLen + 11, output "intended encoded message length
-     too short" and stop.  */
-  if (options->em_n < t_n + 11)
-    {
-      err = gcry_error (GPG_ERR_TOO_SHORT);
-      goto out;
-    }
-
-  /* Generate an octet string PS consisting of emLen - tLen - 3 octets
-     with hexadecimal value 0xFF.  The length of PS will be at least 8
-     octets.  */
-  ps_n = options->em_n - t_n - 3;
-  ps = gcry_malloc (ps_n);
-  if (! ps)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-  for (i = 0; i < ps_n; i++)
-    ps[i] = 0xFF;
-
-  /* Concatenate PS, the DER encoding T, and other padding to form the
-     encoded message EM as:
-
-     EM = 0x00 || 0x01 || PS || 0x00 || T.  */
-
-  buffer_n = ps_n + t_n + 3;
-  buffer = gcry_malloc (buffer_n);
-  if (! buffer)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  buffer[0] = 0x00;
-  buffer[1] = 0x01;
-  for (i = 0; i < ps_n; i++)
-    buffer[2 + i] = ps[i];
-  buffer[2 + ps_n] = 0x00;
-  for (i = 0; i < t_n; i++)
-    buffer[3 + ps_n + i] = t[i];
-
-  err = _gcry_ac_io_write (ac_io_write, buffer, buffer_n);
-
- out:
-
-  gcry_md_close (md);
-
-  gcry_free (buffer);
-  gcry_free (ps);
-  gcry_free (t);
-
-  return err;
-}
-
-/* `Actions' for data_dencode().  */
-typedef enum dencode_action
-  {
-    DATA_ENCODE,
-    DATA_DECODE,
-  }
-dencode_action_t;
-
-/* Encode or decode a message according to the the encoding method
-   METHOD; ACTION specifies whether the message that is contained in
-   BUFFER_IN and of length BUFFER_IN_N should be encoded or decoded.
-   The resulting message will be stored in a newly allocated buffer in
-   BUFFER_OUT and BUFFER_OUT_N.  */
-static gcry_error_t
-ac_data_dencode (gcry_ac_em_t method, dencode_action_t action,
-                unsigned int flags, void *options,
-                gcry_ac_io_t *ac_io_read,
-                gcry_ac_io_t *ac_io_write)
-{
-  struct
-  {
-    gcry_ac_em_t method;
-    gcry_ac_em_dencode_t encode;
-    gcry_ac_em_dencode_t decode;
-  } methods[] =
-    {
-      { GCRY_AC_EME_PKCS_V1_5,
-       eme_pkcs_v1_5_encode, eme_pkcs_v1_5_decode },
-      { GCRY_AC_EMSA_PKCS_V1_5,
-       emsa_pkcs_v1_5_encode, NULL },
-    };
-  size_t methods_n;
-  gcry_error_t err;
-  unsigned int i;
-
-  methods_n = sizeof (methods) / sizeof (*methods);
-
-  for (i = 0; i < methods_n; i++)
-    if (methods[i].method == method)
-      break;
-  if (i == methods_n)
-    {
-      err = gcry_error (GPG_ERR_NOT_FOUND);    /* FIXME? */
-      goto out;
-    }
-
-  err = 0;
-  switch (action)
-    {
-    case DATA_ENCODE:
-      if (methods[i].encode)
-       /* FIXME? */
-       err = (*methods[i].encode) (flags, options, ac_io_read, ac_io_write);
-      break;
-
-    case DATA_DECODE:
-      if (methods[i].decode)
-       /* FIXME? */
-       err = (*methods[i].decode) (flags, options, ac_io_read, ac_io_write);
-      break;
-
-    default:
-      err = gcry_error (GPG_ERR_INV_ARG);
-      break;
-    }
-
- out:
-
-  return err;
-}
-
-/* Encode a message according to the encoding method METHOD.  OPTIONS
-   must be a pointer to a method-specific structure
-   (gcry_ac_em*_t).  */
-gcry_error_t
-_gcry_ac_data_encode (gcry_ac_em_t method,
-                     unsigned int flags, void *options,
-                     gcry_ac_io_t *ac_io_read,
-                     gcry_ac_io_t *ac_io_write)
-{
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  return ac_data_dencode (method, DATA_ENCODE, flags, options,
-                         ac_io_read, ac_io_write);
-}
-
-/* Dencode a message according to the encoding method METHOD.  OPTIONS
-   must be a pointer to a method-specific structure
-   (gcry_ac_em*_t).  */
-gcry_error_t
-_gcry_ac_data_decode (gcry_ac_em_t method,
-                     unsigned int flags, void *options,
-                     gcry_ac_io_t *ac_io_read,
-                     gcry_ac_io_t *ac_io_write)
-{
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  return ac_data_dencode (method, DATA_DECODE, flags, options,
-                         ac_io_read, ac_io_write);
-}
-
-/* Convert an MPI into an octet string.  */
-void
-_gcry_ac_mpi_to_os (gcry_mpi_t mpi, unsigned char *os, size_t os_n)
-{
-  unsigned long digit;
-  gcry_mpi_t base;
-  unsigned int i;
-  unsigned int n;
-  gcry_mpi_t m;
-  gcry_mpi_t d;
-
-  if (fips_mode ())
-    return;
-
-  base = gcry_mpi_new (0);
-  gcry_mpi_set_ui (base, 256);
-
-  n = 0;
-  m = gcry_mpi_copy (mpi);
-  while (gcry_mpi_cmp_ui (m, 0))
-    {
-      n++;
-      gcry_mpi_div (m, NULL, m, base, 0);
-    }
-
-  gcry_mpi_set (m, mpi);
-  d = gcry_mpi_new (0);
-  for (i = 0; (i < n) && (i < os_n); i++)
-    {
-      gcry_mpi_mod (d, m, base);
-      _gcry_mpi_get_ui (d, &digit);
-      gcry_mpi_div (m, NULL, m, base, 0);
-      os[os_n - i - 1] = (digit & 0xFF);
-    }
-
-  for (; i < os_n; i++)
-    os[os_n - i - 1] = 0;
-
-  gcry_mpi_release (base);
-  gcry_mpi_release (d);
-  gcry_mpi_release (m);
-}
-
-/* Convert an MPI into an newly allocated octet string.  */
-gcry_error_t
-_gcry_ac_mpi_to_os_alloc (gcry_mpi_t mpi, unsigned char **os, size_t *os_n)
-{
-  unsigned char *buffer;
-  size_t buffer_n;
-  gcry_error_t err;
-  unsigned int nbits;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  nbits = gcry_mpi_get_nbits (mpi);
-  buffer_n = (nbits + 7) / 8;
-  buffer = gcry_malloc (buffer_n);
-  if (! buffer)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  _gcry_ac_mpi_to_os (mpi, buffer, buffer_n);
-  *os = buffer;
-  *os_n = buffer_n;
-  err = 0;
-
- out:
-
-  return err;
-}
-
-
-/* Convert an octet string into an MPI.  */
-void
-_gcry_ac_os_to_mpi (gcry_mpi_t mpi, unsigned char *os, size_t os_n)
-{
-  unsigned int i;
-  gcry_mpi_t xi;
-  gcry_mpi_t x;
-  gcry_mpi_t a;
-
-  if (fips_mode ())
-    return;
-
-  a = gcry_mpi_new (0);
-  gcry_mpi_set_ui (a, 1);
-  x = gcry_mpi_new (0);
-  gcry_mpi_set_ui (x, 0);
-  xi = gcry_mpi_new (0);
-
-  for (i = 0; i < os_n; i++)
-    {
-      gcry_mpi_mul_ui (xi, a, os[os_n - i - 1]);
-      gcry_mpi_add (x, x, xi);
-      gcry_mpi_mul_ui (a, a, 256);
-    }
-
-  gcry_mpi_release (xi);
-  gcry_mpi_release (a);
-
-  gcry_mpi_set (mpi, x);
-  gcry_mpi_release (x);                /* FIXME: correct? */
-}
-
-
-
-/*
- * Implementation of Encryption Schemes (ES) and Signature Schemes
- * with Appendix (SSA).
- */
-
-/* Schemes consist of two things: encoding methods and cryptographic
-   primitives.
-
-   Since encoding methods are accessible through a common API with
-   method-specific options passed as an anonymous struct, schemes have
-   to provide functions that construct this method-specific structure;
-   this is what the functions of type `gcry_ac_dencode_prepare_t' are
-   there for.  */
-
-typedef gcry_error_t (*gcry_ac_dencode_prepare_t) (gcry_ac_handle_t handle,
-                                                  gcry_ac_key_t key,
-                                                  void *opts,
-                                                  void *opts_em);
-
-/* The `dencode_prepare' function for ES-PKCS-V1_5.  */
-static gcry_error_t
-ac_es_dencode_prepare_pkcs_v1_5 (gcry_ac_handle_t handle, gcry_ac_key_t key,
-                                void *opts, void *opts_em)
-{
-  gcry_ac_eme_pkcs_v1_5_t *options_em;
-  unsigned int nbits;
-  gcry_error_t err;
-
-  (void)opts;
-
-  err = _gcry_ac_key_get_nbits (handle, key, &nbits);
-  if (err)
-    goto out;
-
-  options_em = opts_em;
-  options_em->key_size = nbits;
-
- out:
-
-  return err;
-}
-
-/* The `dencode_prepare' function for SSA-PKCS-V1_5.  */
-static gcry_error_t
-ac_ssa_dencode_prepare_pkcs_v1_5 (gcry_ac_handle_t handle, gcry_ac_key_t key,
-                                 void *opts, void *opts_em)
-{
-  gcry_ac_emsa_pkcs_v1_5_t *options_em;
-  gcry_ac_ssa_pkcs_v1_5_t *options;
-  gcry_error_t err;
-  unsigned int k;
-
-  options_em = opts_em;
-  options = opts;
-
-  err = _gcry_ac_key_get_nbits (handle, key, &k);
-  if (err)
-    goto out;
-
-  k = (k + 7) / 8;
-  options_em->md = options->md;
-  options_em->em_n = k;
-
- out:
-
-  return err;
-}
-
-/* Type holding the information about each supported
-   Encryption/Signature Scheme.  */
-typedef struct ac_scheme
-{
-  gcry_ac_scheme_t scheme;
-  gcry_ac_em_t scheme_encoding;
-  gcry_ac_dencode_prepare_t dencode_prepare;
-  size_t options_em_n;
-} ac_scheme_t;
-
-/* List of supported Schemes.  */
-static ac_scheme_t ac_schemes[] =
-  {
-    { GCRY_AC_ES_PKCS_V1_5, GCRY_AC_EME_PKCS_V1_5,
-      ac_es_dencode_prepare_pkcs_v1_5,
-      sizeof (gcry_ac_eme_pkcs_v1_5_t) },
-    { GCRY_AC_SSA_PKCS_V1_5, GCRY_AC_EMSA_PKCS_V1_5,
-      ac_ssa_dencode_prepare_pkcs_v1_5,
-      sizeof (gcry_ac_emsa_pkcs_v1_5_t) }
-  };
-
-/* Lookup a scheme by it's ID.  */
-static ac_scheme_t *
-ac_scheme_get (gcry_ac_scheme_t scheme)
-{
-  ac_scheme_t *ac_scheme;
-  unsigned int i;
-
-  for (i = 0; i < DIM (ac_schemes); i++)
-    if (scheme == ac_schemes[i].scheme)
-      break;
-  if (i == DIM (ac_schemes))
-    ac_scheme = NULL;
-  else
-    ac_scheme = ac_schemes + i;
-
-  return ac_scheme;
-}
-
-/* Prepares the encoding/decoding by creating an according option
-   structure.  */
-static gcry_error_t
-ac_dencode_prepare (gcry_ac_handle_t handle, gcry_ac_key_t key, void *opts,
-                   ac_scheme_t scheme, void **opts_em)
-{
-  gcry_error_t err;
-  void *options_em;
-
-  options_em = gcry_malloc (scheme.options_em_n);
-  if (! options_em)
-    {
-      err = gcry_error_from_errno (errno);
-      goto out;
-    }
-
-  err = (*scheme.dencode_prepare) (handle, key, opts, options_em);
-  if (err)
-    goto out;
-
-  *opts_em = options_em;
-
- out:
-
-  if (err)
-    free (options_em);
-
-  return err;
-}
-
-/* Convert a data set into a single MPI; currently, this is only
-   supported for data sets containing a single MPI.  */
-static gcry_error_t
-ac_data_set_to_mpi (gcry_ac_data_t data, gcry_mpi_t *mpi)
-{
-  gcry_error_t err;
-  gcry_mpi_t mpi_new;
-  unsigned int elems;
-
-  elems = _gcry_ac_data_length (data);
-
-  if (elems != 1)
-    {
-      /* FIXME: I guess, we should be more flexible in this respect by
-        allowing the actual encryption/signature schemes to implement
-        this conversion mechanism.  */
-      err = gcry_error (GPG_ERR_CONFLICT);
-      goto out;
-    }
-
-  err = _gcry_ac_data_get_index (data, GCRY_AC_FLAG_COPY, 0, NULL, &mpi_new);
-  if (err)
-    goto out;
-
-  *mpi = mpi_new;
-
- out:
-
-  return err;
-}
-
-/* Encrypts the plain text message contained in M, which is of size
-   M_N, with the public key KEY_PUBLIC according to the Encryption
-   Scheme SCHEME_ID.  HANDLE is used for accessing the low-level
-   cryptographic primitives.  If OPTS is not NULL, it has to be an
-   anonymous structure specific to the chosen scheme (gcry_ac_es_*_t).
-   The encrypted message will be stored in C and C_N.  */
-gcry_error_t
-_gcry_ac_data_encrypt_scheme (gcry_ac_handle_t handle,
-                             gcry_ac_scheme_t scheme_id,
-                             unsigned int flags, void *opts,
-                             gcry_ac_key_t key,
-                             gcry_ac_io_t *io_message,
-                             gcry_ac_io_t *io_cipher)
-{
-  gcry_error_t err;
-  gcry_ac_io_t io_em;
-  unsigned char *em;
-  size_t em_n;
-  gcry_mpi_t mpi_plain;
-  gcry_ac_data_t data_encrypted;
-  gcry_mpi_t mpi_encrypted;
-  unsigned char *buffer;
-  size_t buffer_n;
-  void *opts_em;
-  ac_scheme_t *scheme;
-
-  (void)flags;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  data_encrypted = NULL;
-  mpi_encrypted = NULL;
-  mpi_plain = NULL;
-  opts_em = NULL;
-  buffer = NULL;
-  em = NULL;
-
-  scheme = ac_scheme_get (scheme_id);
-  if (! scheme)
-    {
-      err = gcry_error (GPG_ERR_NO_ENCRYPTION_SCHEME);
-      goto out;
-    }
-
-  if (key->type != GCRY_AC_KEY_PUBLIC)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  err = ac_dencode_prepare (handle, key, opts, *scheme, &opts_em);
-  if (err)
-    goto out;
-
-  _gcry_ac_io_init (&io_em, GCRY_AC_IO_WRITABLE,
-                   GCRY_AC_IO_STRING, &em, &em_n);
-
-  err = _gcry_ac_data_encode (scheme->scheme_encoding, 0, opts_em,
-                             io_message, &io_em);
-  if (err)
-    goto out;
-
-  mpi_plain = gcry_mpi_snew (0);
-  gcry_ac_os_to_mpi (mpi_plain, em, em_n);
-
-  err = _gcry_ac_data_encrypt (handle, 0, key, mpi_plain, &data_encrypted);
-  if (err)
-    goto out;
-
-  err = ac_data_set_to_mpi (data_encrypted, &mpi_encrypted);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_mpi_to_os_alloc (mpi_encrypted, &buffer, &buffer_n);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_io_write (io_cipher, buffer, buffer_n);
-
- out:
-
-  gcry_ac_data_destroy (data_encrypted);
-  gcry_mpi_release (mpi_encrypted);
-  gcry_mpi_release (mpi_plain);
-  gcry_free (opts_em);
-  gcry_free (buffer);
-  gcry_free (em);
-
-  return err;
-}
-
-/* Decryptes the cipher message contained in C, which is of size C_N,
-   with the secret key KEY_SECRET according to the Encryption Scheme
-   SCHEME_ID.  Handle is used for accessing the low-level
-   cryptographic primitives.  If OPTS is not NULL, it has to be an
-   anonymous structure specific to the chosen scheme (gcry_ac_es_*_t).
-   The decrypted message will be stored in M and M_N.  */
-gcry_error_t
-_gcry_ac_data_decrypt_scheme (gcry_ac_handle_t handle,
-                             gcry_ac_scheme_t scheme_id,
-                             unsigned int flags, void *opts,
-                             gcry_ac_key_t key,
-                             gcry_ac_io_t *io_cipher,
-                             gcry_ac_io_t *io_message)
-{
-  gcry_ac_io_t io_em;
-  gcry_error_t err;
-  gcry_ac_data_t data_encrypted;
-  unsigned char *em;
-  size_t em_n;
-  gcry_mpi_t mpi_encrypted;
-  gcry_mpi_t mpi_decrypted;
-  void *opts_em;
-  ac_scheme_t *scheme;
-  char *elements_enc;
-  size_t elements_enc_n;
-  unsigned char *c;
-  size_t c_n;
-
-  (void)flags;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  data_encrypted = NULL;
-  mpi_encrypted = NULL;
-  mpi_decrypted = NULL;
-  elements_enc = NULL;
-  opts_em = NULL;
-  em = NULL;
-  c = NULL;
-
-  scheme = ac_scheme_get (scheme_id);
-  if (! scheme)
-    {
-      err = gcry_error (GPG_ERR_NO_ENCRYPTION_SCHEME);
-      goto out;
-    }
-
-  if (key->type != GCRY_AC_KEY_SECRET)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  err = _gcry_ac_io_read_all (io_cipher, &c, &c_n);
-  if (err)
-    goto out;
-
-  mpi_encrypted = gcry_mpi_snew (0);
-  gcry_ac_os_to_mpi (mpi_encrypted, c, c_n);
-
-  err = _gcry_pk_get_elements (handle->algorithm, &elements_enc, NULL);
-  if (err)
-    goto out;
-
-  elements_enc_n = strlen (elements_enc);
-  if (elements_enc_n != 1)
-    {
-      /* FIXME? */
-      err = gcry_error (GPG_ERR_CONFLICT);
-      goto out;
-    }
-
-  err = _gcry_ac_data_new (&data_encrypted);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_set (data_encrypted, GCRY_AC_FLAG_COPY | 
GCRY_AC_FLAG_DEALLOC,
-                          elements_enc, mpi_encrypted);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_decrypt (handle, 0, key, &mpi_decrypted, data_encrypted);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_mpi_to_os_alloc (mpi_decrypted, &em, &em_n);
-  if (err)
-    goto out;
-
-  err = ac_dencode_prepare (handle, key, opts, *scheme, &opts_em);
-  if (err)
-    goto out;
-
-  _gcry_ac_io_init (&io_em, GCRY_AC_IO_READABLE,
-                   GCRY_AC_IO_STRING, em, em_n);
-
-  err = _gcry_ac_data_decode (scheme->scheme_encoding, 0, opts_em,
-                             &io_em, io_message);
-  if (err)
-    goto out;
-
- out:
-
-  _gcry_ac_data_destroy (data_encrypted);
-  gcry_mpi_release (mpi_encrypted);
-  gcry_mpi_release (mpi_decrypted);
-  free (elements_enc);
-  gcry_free (opts_em);
-  gcry_free (em);
-  gcry_free (c);
-
-  return err;
-}
-
-
-/* Signs the message contained in M, which is of size M_N, with the
-   secret key KEY according to the Signature Scheme SCHEME_ID.  Handle
-   is used for accessing the low-level cryptographic primitives.  If
-   OPTS is not NULL, it has to be an anonymous structure specific to
-   the chosen scheme (gcry_ac_ssa_*_t).  The signed message will be
-   stored in S and S_N.  */
-gcry_error_t
-_gcry_ac_data_sign_scheme (gcry_ac_handle_t handle,
-                          gcry_ac_scheme_t scheme_id,
-                          unsigned int flags, void *opts,
-                          gcry_ac_key_t key,
-                          gcry_ac_io_t *io_message,
-                          gcry_ac_io_t *io_signature)
-{
-  gcry_ac_io_t io_em;
-  gcry_error_t err;
-  gcry_ac_data_t data_signed;
-  unsigned char *em;
-  size_t em_n;
-  gcry_mpi_t mpi;
-  void *opts_em;
-  unsigned char *buffer;
-  size_t buffer_n;
-  gcry_mpi_t mpi_signed;
-  ac_scheme_t *scheme;
-
-  (void)flags;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  data_signed = NULL;
-  mpi_signed = NULL;
-  opts_em = NULL;
-  buffer = NULL;
-  mpi = NULL;
-  em = NULL;
-
-  if (key->type != GCRY_AC_KEY_SECRET)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  scheme = ac_scheme_get (scheme_id);
-  if (! scheme)
-    {
-      /* FIXME: adjust api of scheme_get in respect to err codes.  */
-      err = gcry_error (GPG_ERR_NO_SIGNATURE_SCHEME);
-      goto out;
-    }
-
-  err = ac_dencode_prepare (handle, key, opts, *scheme, &opts_em);
-  if (err)
-    goto out;
-
-  _gcry_ac_io_init (&io_em, GCRY_AC_IO_WRITABLE,
-                   GCRY_AC_IO_STRING, &em, &em_n);
-
-  err = _gcry_ac_data_encode (scheme->scheme_encoding, 0, opts_em,
-                             io_message, &io_em);
-  if (err)
-    goto out;
-
-  mpi = gcry_mpi_new (0);
-  _gcry_ac_os_to_mpi (mpi, em, em_n);
-
-  err = _gcry_ac_data_sign (handle, key, mpi, &data_signed);
-  if (err)
-    goto out;
-
-  err = ac_data_set_to_mpi (data_signed, &mpi_signed);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_mpi_to_os_alloc (mpi_signed, &buffer, &buffer_n);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_io_write (io_signature, buffer, buffer_n);
-
- out:
-
-  _gcry_ac_data_destroy (data_signed);
-  gcry_mpi_release (mpi_signed);
-  gcry_mpi_release (mpi);
-  gcry_free (opts_em);
-  gcry_free (buffer);
-  gcry_free (em);
-
-  return err;
-}
-
-/* Verifies that the signature contained in S, which is of length S_N,
-   is indeed the result of signing the message contained in M, which
-   is of size M_N, with the secret key belonging to the public key
-   KEY_PUBLIC.  If OPTS is not NULL, it has to be an anonymous
-   structure (gcry_ac_ssa_*_t) specific to the Signature Scheme, whose
-   ID is contained in SCHEME_ID.  */
-gcry_error_t
-_gcry_ac_data_verify_scheme (gcry_ac_handle_t handle,
-                            gcry_ac_scheme_t scheme_id,
-                            unsigned int flags, void *opts,
-                            gcry_ac_key_t key,
-                            gcry_ac_io_t *io_message,
-                            gcry_ac_io_t *io_signature)
-{
-  gcry_ac_io_t io_em;
-  gcry_error_t err;
-  gcry_ac_data_t data_signed;
-  unsigned char *em;
-  size_t em_n;
-  void *opts_em;
-  gcry_mpi_t mpi_signature;
-  gcry_mpi_t mpi_data;
-  ac_scheme_t *scheme;
-  char *elements_sig;
-  size_t elements_sig_n;
-  unsigned char *s;
-  size_t s_n;
-
-  (void)flags;
-
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  mpi_signature = NULL;
-  elements_sig = NULL;
-  data_signed = NULL;
-  mpi_data = NULL;
-  opts_em = NULL;
-  em = NULL;
-  s = NULL;
-
-  if (key->type != GCRY_AC_KEY_PUBLIC)
-    {
-      err = gcry_error (GPG_ERR_WRONG_KEY_USAGE);
-      goto out;
-    }
-
-  scheme = ac_scheme_get (scheme_id);
-  if (! scheme)
-    {
-      err = gcry_error (GPG_ERR_NO_SIGNATURE_SCHEME);
-      goto out;
-    }
-
-  err = ac_dencode_prepare (handle, key, opts, *scheme, &opts_em);
-  if (err)
-    goto out;
-
-  _gcry_ac_io_init (&io_em, GCRY_AC_IO_WRITABLE,
-                   GCRY_AC_IO_STRING, &em, &em_n);
-
-  err = _gcry_ac_data_encode (scheme->scheme_encoding, 0, opts_em,
-                             io_message, &io_em);
-  if (err)
-    goto out;
-
-  mpi_data = gcry_mpi_new (0);
-  _gcry_ac_os_to_mpi (mpi_data, em, em_n);
-
-  err = _gcry_ac_io_read_all (io_signature, &s, &s_n);
-  if (err)
-    goto out;
-
-  mpi_signature = gcry_mpi_new (0);
-  _gcry_ac_os_to_mpi (mpi_signature, s, s_n);
-
-  err = _gcry_pk_get_elements (handle->algorithm, NULL, &elements_sig);
-  if (err)
-    goto out;
-
-  elements_sig_n = strlen (elements_sig);
-  if (elements_sig_n != 1)
-    {
-      /* FIXME? */
-      err = gcry_error (GPG_ERR_CONFLICT);
-      goto out;
-    }
-
-  err = _gcry_ac_data_new (&data_signed);
-  if (err)
-    goto out;
-
-  err = _gcry_ac_data_set (data_signed, GCRY_AC_FLAG_COPY | 
GCRY_AC_FLAG_DEALLOC,
-                          elements_sig, mpi_signature);
-  if (err)
-    goto out;
-
-  gcry_mpi_release (mpi_signature);
-  mpi_signature = NULL;
-
-  err = _gcry_ac_data_verify (handle, key, mpi_data, data_signed);
-
- out:
-
-  _gcry_ac_data_destroy (data_signed);
-  gcry_mpi_release (mpi_signature);
-  gcry_mpi_release (mpi_data);
-  free (elements_sig);
-  gcry_free (opts_em);
-  gcry_free (em);
-  gcry_free (s);
-
-  return err;
-}
-
-
-/*
- * General functions.
- */
-
-gcry_err_code_t
-_gcry_ac_init (void)
-{
-  if (fips_mode ())
-    return GPG_ERR_NOT_SUPPORTED;
-
-  return 0;
-}
diff --git a/grub-core/lib/libgcrypt/cipher/arcfour-amd64.S 
b/grub-core/lib/libgcrypt/cipher/arcfour-amd64.S
new file mode 100644
index 000000000..2abd90a7e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/arcfour-amd64.S
@@ -0,0 +1,108 @@
+/*
+** RC4 implementation optimized for AMD64.
+**
+** Author: Marc Bevand <bevand_m (at) epita.fr>
+** Licence: I hereby disclaim the copyright on this code and place it
+** in the public domain.
+**
+** The throughput achieved by this code is about 320 MBytes/sec, on
+** a 1.8 GHz AMD Opteron (rev C0) processor.
+**
+** 2013/12/20 <jussi.kivilinna@iki.fi>:
+**  - Integrated to libgcrypt
+**  - 4.18 cycles/byte on Intel i5-4570
+*/
+
+#ifdef __x86_64__
+#include <config.h>
+#if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || 
\
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+.align 16
+.globl _gcry_arcfour_amd64
+ELF(.type _gcry_arcfour_amd64,@function)
+_gcry_arcfour_amd64:
+       CFI_STARTPROC()
+       ENTER_SYSV_FUNC_PARAMS_0_4
+       push    %rbp
+       CFI_PUSH(%rbp)
+       push    %rbx
+       CFI_PUSH(%rbx)
+       mov     %rdi,           %rbp    # key = ARG(key)
+       mov     %rsi,           %rbx    # rbx = ARG(len)
+       mov     %rdx,           %rsi    # in = ARG(in)
+       mov     %rcx,           %rdi    # out = ARG(out)
+       mov     (4*256)(%rbp),  %ecx    # x = key->x
+       mov     (4*256+4)(%rbp),%edx    # y = key->y
+       inc     %rcx                    # x++
+       and     $255,           %rcx    # x &= 0xff
+       lea     -8(%rbx,%rsi),  %rbx    # rbx = in+len-8
+       mov     %rbx,           %r9     # tmp = in+len-8
+       mov     (%rbp,%rcx,4),  %eax    # tx = d[x]
+       cmp     %rsi,           %rbx    # cmp in with in+len-8
+       jl      .Lend                   # jump if (in+len-8 < in)
+
+.Lstart:
+       add     $8,             %rsi            # increment in
+       add     $8,             %rdi            # increment out
+
+       # generate the next 8 bytes of the rc4 stream into %r8
+       mov     $8,             %r11            # byte counter
+1:     add     %al,            %dl             # y += tx
+       mov     (%rbp,%rdx,4),  %ebx            # ty = d[y]
+       mov     %ebx,           (%rbp,%rcx,4)   # d[x] = ty
+       add     %al,            %bl             # val = ty + tx
+       mov     %eax,           (%rbp,%rdx,4)   # d[y] = tx
+       inc     %cl                             # x++           (NEXT ROUND)
+       mov     (%rbp,%rcx,4),  %eax            # tx = d[x]     (NEXT ROUND)
+       shl     $8,             %r8
+       movb    (%rbp,%rbx,4),  %r8b            # val = d[val]
+       dec     %r11b
+       jnz 1b
+
+       # xor 8 bytes
+       bswap   %r8
+       xor     -8(%rsi),       %r8
+       cmp     %r9,            %rsi            # cmp in+len-8 with in
+       mov     %r8,            -8(%rdi)
+       jle     .Lstart                         # jump if (in <= in+len-8)
+
+.Lend:
+       add     $8,             %r9             # tmp = in+len
+
+       # handle the last bytes, one by one
+1:     cmp     %rsi,           %r9             # cmp in with in+len
+       jle     .Lfinished                      # jump if (in+len <= in)
+       add     %al,            %dl             # y += tx
+       mov     (%rbp,%rdx,4),  %ebx            # ty = d[y]
+       mov     %ebx,           (%rbp,%rcx,4)   # d[x] = ty
+       add     %al,            %bl             # val = ty + tx
+       mov     %eax,           (%rbp,%rdx,4)   # d[y] = tx
+       inc     %cl                             # x++           (NEXT ROUND)
+       mov     (%rbp,%rcx,4),  %eax            # tx = d[x]     (NEXT ROUND)
+       movb    (%rbp,%rbx,4),  %r8b            # val = d[val]
+       xor     (%rsi),         %r8b            # xor 1 byte
+       movb    %r8b,           (%rdi)
+       inc     %rsi                            # in++
+       inc     %rdi                            # out++
+       jmp 1b
+
+.Lfinished:
+       dec     %rcx                            # x--
+       movb    %cl,            (4*256)(%rbp)   # key->y = y
+       movb    %dl,            (4*256+4)(%rbp) # key->x = x
+       pop     %rbx
+       CFI_POP(%rbx)
+       pop     %rbp
+       CFI_POP(%rbp)
+       EXIT_SYSV_FUNC
+       ret_spec_stop
+       CFI_ENDPROC()
+.L__gcry_arcfour_amd64_end:
+ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/arcfour.c 
b/grub-core/lib/libgcrypt/cipher/arcfour.c
index 6ef07fb20..353de00bd 100644
--- a/grub-core/lib/libgcrypt/cipher/arcfour.c
+++ b/grub-core/lib/libgcrypt/cipher/arcfour.c
@@ -30,18 +30,70 @@
 #include "types.h"
 #include "g10lib.h"
 #include "cipher.h"
+#include "cipher-internal.h"
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
 
 static const char *selftest(void);
 
+#ifdef USE_AMD64_ASM
+
+typedef struct {
+    u32 sbox[256];
+    u32 idx_i, idx_j;
+} ARCFOUR_context;
+
+void _gcry_arcfour_amd64(void *key, size_t len, const byte *indata,
+                        byte *outdata);
+
+static void
+encrypt_stream (void *context,
+                byte *outbuf, const byte *inbuf, size_t length)
+{
+  _gcry_arcfour_amd64 (context, length, inbuf, outbuf );
+}
+
+#else /*!USE_AMD64_ASM*/
+
 typedef struct {
-    int idx_i, idx_j;
     byte sbox[256];
+    int idx_i, idx_j;
 } ARCFOUR_context;
 
 static void
 do_encrypt_stream( ARCFOUR_context *ctx,
-                  byte *outbuf, const byte *inbuf, unsigned int length )
+                  byte *outbuf, const byte *inbuf, size_t length )
 {
+#ifndef __i386__
+  register unsigned int i = ctx->idx_i;
+  register byte j = ctx->idx_j;
+  register byte *sbox = ctx->sbox;
+  register byte t, u;
+
+  while ( length-- )
+    {
+      i++;
+      t = sbox[(byte)i];
+      j += t;
+      u = sbox[j];
+      sbox[(byte)i] = u;
+      u += t;
+      sbox[j] = t;
+      *outbuf++ = sbox[u] ^ *inbuf++;
+    }
+
+  ctx->idx_i = (byte)i;
+  ctx->idx_j = (byte)j;
+#else /*__i386__*/
+  /* Old implementation of arcfour is faster on i386 than the version above.
+   * This is because version above increases register pressure which on i386
+   * would push some of the variables to memory/stack.  Therefore keep this
+   * version for i386 to avoid regressing performance.  */
   register int i = ctx->idx_i;
   register int j = ctx->idx_j;
   register byte *sbox = ctx->sbox;
@@ -59,17 +111,20 @@ do_encrypt_stream( ARCFOUR_context *ctx,
 
   ctx->idx_i = i;
   ctx->idx_j = j;
+#endif
 }
 
 static void
 encrypt_stream (void *context,
-                byte *outbuf, const byte *inbuf, unsigned int length)
+                byte *outbuf, const byte *inbuf, size_t length)
 {
   ARCFOUR_context *ctx = (ARCFOUR_context *) context;
   do_encrypt_stream (ctx, outbuf, inbuf, length );
   _gcry_burn_stack (64);
 }
 
+#endif /*!USE_AMD64_ASM*/
+
 
 static gcry_err_code_t
 do_arcfour_setkey (void *context, const byte *key, unsigned int keylen)
@@ -96,27 +151,32 @@ do_arcfour_setkey (void *context, const byte *key, 
unsigned int keylen)
   ctx->idx_i = ctx->idx_j = 0;
   for (i=0; i < 256; i++ )
     ctx->sbox[i] = i;
-  for (i=0; i < 256; i++ )
-    karr[i] = key[i%keylen];
+  for (i=j=0; i < 256; i++,j++ )
+    {
+      if (j >= keylen)
+        j = 0;
+      karr[i] = key[j];
+    }
   for (i=j=0; i < 256; i++ )
     {
       int t;
-      j = (j + ctx->sbox[i] + karr[i]) % 256;
+      j = (j + ctx->sbox[i] + karr[i]) & 255;
       t = ctx->sbox[i];
       ctx->sbox[i] = ctx->sbox[j];
       ctx->sbox[j] = t;
     }
-  memset( karr, 0, 256 );
+  wipememory( karr, sizeof(karr) );
 
   return GPG_ERR_NO_ERROR;
 }
 
 static gcry_err_code_t
-arcfour_setkey ( void *context, const byte *key, unsigned int keylen )
+arcfour_setkey ( void *context, const byte *key, unsigned int keylen,
+                 cipher_bulk_ops_t *bulk_ops )
 {
   ARCFOUR_context *ctx = (ARCFOUR_context *) context;
   gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen );
-  _gcry_burn_stack (300);
+  (void)bulk_ops;
   return rc;
 }
 
@@ -129,18 +189,18 @@ selftest(void)
 
   /* Test vector from Cryptlib labeled there: "from the
      State/Commerce Department". */
-  static byte key_1[] =
+  static const byte key_1[] =
     { 0x61, 0x8A, 0x63, 0xD2, 0xFB };
-  static byte plaintext_1[] =
+  static const byte plaintext_1[] =
     { 0xDC, 0xEE, 0x4C, 0xF9, 0x2C };
   static const byte ciphertext_1[] =
     { 0xF1, 0x38, 0x29, 0xC9, 0xDE };
 
-  arcfour_setkey( &ctx, key_1, sizeof(key_1));
+  arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
   encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1));
   if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1)))
     return "Arcfour encryption test 1 failed.";
-  arcfour_setkey( &ctx, key_1, sizeof(key_1));
+  arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL);
   encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */
   if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1)))
     return "Arcfour decryption test 1 failed.";
@@ -150,6 +210,7 @@ selftest(void)
 
 gcry_cipher_spec_t _gcry_cipher_spec_arcfour =
   {
+    GCRY_CIPHER_ARCFOUR, {0, 0},
     "ARCFOUR", NULL, NULL, 1, 128, sizeof (ARCFOUR_context),
     arcfour_setkey, NULL, NULL, encrypt_stream, encrypt_stream,
   };
diff --git a/grub-core/lib/libgcrypt/cipher/asm-common-aarch64.h 
b/grub-core/lib/libgcrypt/cipher/asm-common-aarch64.h
new file mode 100644
index 000000000..451539e82
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/asm-common-aarch64.h
@@ -0,0 +1,108 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsjö <martin@martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AARCH64_H
+#define GCRY_ASM_COMMON_AARCH64_H
+
+#include <config.h>
+
+#ifdef HAVE_GCC_ASM_ELF_DIRECTIVES
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#ifdef __APPLE__
+#define GET_DATA_POINTER(reg, name) \
+       adrp    reg, name@GOTPAGE ; \
+       add     reg, reg, name@GOTPAGEOFF ;
+#elif defined(_WIN32)
+#define GET_DATA_POINTER(reg, name) \
+       adrp    reg, name ; \
+       add     reg, reg, #:lo12:name ;
+#else
+#define GET_DATA_POINTER(reg, name) \
+       adrp    reg, :got:name ; \
+       ldr     reg, [reg, #:got_lo12:name] ;
+#endif
+
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+/* CFI directives to emit DWARF stack unwinding information. */
+# define CFI_STARTPROC()            .cfi_startproc
+# define CFI_ENDPROC()              .cfi_endproc
+# define CFI_REMEMBER_STATE()       .cfi_remember_state
+# define CFI_RESTORE_STATE()        .cfi_restore_state
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off)    .cfi_rel_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg)  .cfi_def_cfa_register reg
+# define CFI_REGISTER(ro,rn)        .cfi_register ro, rn
+# define CFI_RESTORE(reg)           .cfi_restore reg
+
+/* CFA expressions are used for pointing CFA and registers to
+ * SP relative offsets. */
+# define DW_REGNO_SP 31
+
+/* Fixed length encoding used for integers for now. */
+# define DW_SLEB128_7BIT(value) \
+       0x00|((value) & 0x7f)
+# define DW_SLEB128_28BIT(value) \
+       0x80|((value)&0x7f), \
+       0x80|(((value)>>7)&0x7f), \
+       0x80|(((value)>>14)&0x7f), \
+       0x00|(((value)>>21)&0x7f)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \
+       .cfi_escape \
+         0x0f, /* DW_CFA_def_cfa_expression */ \
+           DW_SLEB128_7BIT(11), /* length */ \
+         0x8f, /* DW_OP_breg31, rsp + constant */ \
+           DW_SLEB128_28BIT(rsp_offs), \
+         0x06, /* DW_OP_deref */ \
+         0x23, /* DW_OP_plus_constu */ \
+           DW_SLEB128_28BIT((cfa_depth)+8)
+
+# define CFI_REG_ON_STACK(regno,rsp_offs) \
+       .cfi_escape \
+         0x10, /* DW_CFA_expression */ \
+           DW_SLEB128_7BIT(regno), \
+           DW_SLEB128_7BIT(5), /* length */ \
+         0x8f, /* DW_OP_breg31, rsp + constant */ \
+           DW_SLEB128_28BIT(rsp_offs)
+
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_REMEMBER_STATE()
+# define CFI_RESTORE_STATE()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_REGISTER(ro,rn)
+# define CFI_RESTORE(reg)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth)
+# define CFI_REG_ON_STACK(reg,rsp_offs)
+#endif
+
+/* 'ret' instruction replacement for straight-line speculation mitigation */
+#define ret_spec_stop \
+       ret; dsb sy; isb;
+
+#endif /* GCRY_ASM_COMMON_AARCH64_H */
diff --git a/grub-core/lib/libgcrypt/cipher/asm-common-amd64.h 
b/grub-core/lib/libgcrypt/cipher/asm-common-amd64.h
new file mode 100644
index 000000000..97912b1b1
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/asm-common-amd64.h
@@ -0,0 +1,193 @@
+/* asm-common-amd64.h  -  Common macros for AMD64 assembly
+ *
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_AMD64_H
+#define GCRY_ASM_COMMON_AMD64_H
+
+#include <config.h>
+
+#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#ifdef __PIC__
+#  define rRIP (%rip)
+#else
+#  define rRIP
+#endif
+
+#ifdef __PIC__
+#  define RIP %rip
+#else
+#  define RIP
+#endif
+
+#ifdef __PIC__
+#  define ADD_RIP +rip
+#else
+#  define ADD_RIP
+#endif
+
+#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__)
+#  define GET_EXTERN_POINTER(name, reg) movabsq $name, reg
+#else
+#  ifdef __code_model_large__
+#    define GET_EXTERN_POINTER(name, reg) \
+              pushq %r15; \
+              pushq %r14; \
+           1: leaq 1b(%rip), reg; \
+              movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \
+              movabsq $name@GOT, %r15; \
+              addq %r14, reg; \
+              popq %r14; \
+              movq (reg, %r15), reg; \
+              popq %r15;
+#  else
+#    define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg
+#  endif
+#endif
+
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+/* CFI directives to emit DWARF stack unwinding information. */
+# define CFI_STARTPROC()            .cfi_startproc
+# define CFI_ENDPROC()              .cfi_endproc
+# define CFI_REMEMBER_STATE()       .cfi_remember_state
+# define CFI_RESTORE_STATE()        .cfi_restore_state
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off)    .cfi_rel_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg)  .cfi_def_cfa_register reg
+# define CFI_REGISTER(ro,rn)        .cfi_register ro, rn
+# define CFI_RESTORE(reg)           .cfi_restore reg
+
+# define CFI_PUSH(reg) \
+       CFI_ADJUST_CFA_OFFSET(8); CFI_REL_OFFSET(reg, 0)
+# define CFI_POP(reg) \
+       CFI_ADJUST_CFA_OFFSET(-8); CFI_RESTORE(reg)
+# define CFI_POP_TMP_REG() \
+       CFI_ADJUST_CFA_OFFSET(-8);
+# define CFI_LEAVE() \
+       CFI_ADJUST_CFA_OFFSET(-8); CFI_DEF_CFA_REGISTER(%rsp)
+
+/* CFA expressions are used for pointing CFA and registers to
+ * %rsp relative offsets. */
+# define DW_REGNO_rax 0
+# define DW_REGNO_rdx 1
+# define DW_REGNO_rcx 2
+# define DW_REGNO_rbx 3
+# define DW_REGNO_rsi 4
+# define DW_REGNO_rdi 5
+# define DW_REGNO_rbp 6
+# define DW_REGNO_rsp 7
+# define DW_REGNO_r8  8
+# define DW_REGNO_r9  9
+# define DW_REGNO_r10 10
+# define DW_REGNO_r11 11
+# define DW_REGNO_r12 12
+# define DW_REGNO_r13 13
+# define DW_REGNO_r14 14
+# define DW_REGNO_r15 15
+
+# define DW_REGNO(reg) DW_REGNO_ ## reg
+
+/* Fixed length encoding used for integers for now. */
+# define DW_SLEB128_7BIT(value) \
+       0x00|((value) & 0x7f)
+# define DW_SLEB128_28BIT(value) \
+       0x80|((value)&0x7f), \
+       0x80|(((value)>>7)&0x7f), \
+       0x80|(((value)>>14)&0x7f), \
+       0x00|(((value)>>21)&0x7f)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \
+       .cfi_escape \
+         0x0f, /* DW_CFA_def_cfa_expression */ \
+           DW_SLEB128_7BIT(11), /* length */ \
+         0x77, /* DW_OP_breg7, rsp + constant */ \
+           DW_SLEB128_28BIT(rsp_offs), \
+         0x06, /* DW_OP_deref */ \
+         0x23, /* DW_OP_plus_constu */ \
+           DW_SLEB128_28BIT((cfa_depth)+8)
+
+# define CFI_REG_ON_STACK(reg,rsp_offs) \
+       .cfi_escape \
+         0x10, /* DW_CFA_expression */ \
+           DW_SLEB128_7BIT(DW_REGNO(reg)), \
+           DW_SLEB128_7BIT(5), /* length */ \
+         0x77, /* DW_OP_breg7, rsp + constant */ \
+           DW_SLEB128_28BIT(rsp_offs)
+
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_REMEMBER_STATE()
+# define CFI_RESTORE_STATE()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_REGISTER(ro,rn)
+# define CFI_RESTORE(reg)
+
+# define CFI_PUSH(reg)
+# define CFI_POP(reg)
+# define CFI_POP_TMP_REG()
+# define CFI_LEAVE()
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth)
+# define CFI_REG_ON_STACK(reg,rsp_offs)
+#endif
+
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ENTER_SYSV_FUNC_PARAMS_0_4 \
+       pushq %rdi; \
+       CFI_PUSH(%rdi); \
+       pushq %rsi; \
+       CFI_PUSH(%rsi); \
+       movq %rcx, %rdi; \
+       movq %rdx, %rsi; \
+       movq %r8, %rdx; \
+       movq %r9, %rcx; \
+
+# define ENTER_SYSV_FUNC_PARAMS_5 \
+       ENTER_SYSV_FUNC_PARAMS_0_4; \
+       movq 0x38(%rsp), %r8;
+
+# define ENTER_SYSV_FUNC_PARAMS_6 \
+       ENTER_SYSV_FUNC_PARAMS_5; \
+       movq 0x40(%rsp), %r9;
+
+# define EXIT_SYSV_FUNC \
+       popq %rsi; \
+       CFI_POP(%rsi); \
+       popq %rdi; \
+       CFI_POP(%rdi);
+#else
+# define ENTER_SYSV_FUNC_PARAMS_0_4
+# define ENTER_SYSV_FUNC_PARAMS_5
+# define ENTER_SYSV_FUNC_PARAMS_6
+# define EXIT_SYSV_FUNC
+#endif
+
+/* 'ret' instruction replacement for straight-line speculation mitigation */
+#define ret_spec_stop \
+       ret; int3;
+
+#endif /* GCRY_ASM_COMMON_AMD64_H */
diff --git a/grub-core/lib/libgcrypt/cipher/asm-common-s390x.h 
b/grub-core/lib/libgcrypt/cipher/asm-common-s390x.h
new file mode 100644
index 000000000..b3a996cd6
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/asm-common-s390x.h
@@ -0,0 +1,90 @@
+/* asm-common-s390x.h  -  Common macros for zSeries assembly
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_COMMON_S390X_H
+#define GCRY_ASM_COMMON_S390X_H
+
+#include <config.h>
+
+#ifdef HAVE_GCC_ASM_ELF_DIRECTIVES
+# define ELF(...) __VA_ARGS__
+#else
+# define ELF(...) /*_*/
+#endif
+
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+/* CFI directives to emit DWARF stack unwinding information. */
+# define CFI_STARTPROC()            .cfi_startproc
+# define CFI_ENDPROC()              .cfi_endproc
+# define CFI_REMEMBER_STATE()       .cfi_remember_state
+# define CFI_RESTORE_STATE()        .cfi_restore_state
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off)    .cfi_rel_offset reg, off
+# define CFI_DEF_CFA_REGISTER(reg)  .cfi_def_cfa_register reg
+# define CFI_REGISTER(ro,rn)        .cfi_register ro, rn
+# define CFI_RESTORE(reg)           .cfi_restore reg
+
+/* CFA expressions are used for pointing CFA and registers to
+ * SP relative offsets. */
+# define DW_REGNO_SP 15
+
+/* Fixed length encoding used for integers for now. */
+# define DW_SLEB128_7BIT(value) \
+       0x00|((value) & 0x7f)
+# define DW_SLEB128_28BIT(value) \
+       0x80|((value)&0x7f), \
+       0x80|(((value)>>7)&0x7f), \
+       0x80|(((value)>>14)&0x7f), \
+       0x00|(((value)>>21)&0x7f)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \
+       .cfi_escape \
+         0x0f, /* DW_CFA_def_cfa_expression */ \
+           DW_SLEB128_7BIT(11), /* length */ \
+         0x7f, /* DW_OP_breg15, rsp + constant */ \
+           DW_SLEB128_28BIT(rsp_offs), \
+         0x06, /* DW_OP_deref */ \
+         0x23, /* DW_OP_plus_constu */ \
+           DW_SLEB128_28BIT((cfa_depth)+160)
+
+# define CFI_REG_ON_STACK(regno,rsp_offs) \
+       .cfi_escape \
+         0x10, /* DW_CFA_expression */ \
+           DW_SLEB128_7BIT(regno), \
+           DW_SLEB128_7BIT(5), /* length */ \
+         0x7f, /* DW_OP_breg15, rsp + constant */ \
+           DW_SLEB128_28BIT(rsp_offs)
+
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_REMEMBER_STATE()
+# define CFI_RESTORE_STATE()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_DEF_CFA_REGISTER(reg)
+# define CFI_REGISTER(ro,rn)
+# define CFI_RESTORE(reg)
+
+# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth)
+# define CFI_REG_ON_STACK(reg,rsp_offs)
+#endif
+
+#endif /* GCRY_ASM_COMMON_AMD64_H */
diff --git a/grub-core/lib/libgcrypt/cipher/asm-inline-s390x.h 
b/grub-core/lib/libgcrypt/cipher/asm-inline-s390x.h
new file mode 100644
index 000000000..001cb965f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/asm-inline-s390x.h
@@ -0,0 +1,205 @@
+/* asm-inline-s390x.h  -  Common macros for zSeries inline assembly
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_INLINE_S390X_H
+#define GCRY_ASM_INLINE_S390X_H
+
+#include <config.h>
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+
+typedef unsigned int u128_t __attribute__ ((mode (TI)));
+
+enum kmxx_functions_e
+{
+  KM_FUNCTION_AES_128 = 18,
+  KM_FUNCTION_AES_192 = 19,
+  KM_FUNCTION_AES_256 = 20,
+  KM_FUNCTION_XTS_AES_128 = 50,
+  KM_FUNCTION_XTS_AES_256 = 52,
+
+  KMID_FUNCTION_SHA1 = 1,
+  KMID_FUNCTION_SHA256 = 2,
+  KMID_FUNCTION_SHA512 = 3,
+  KMID_FUNCTION_SHA3_224 = 32,
+  KMID_FUNCTION_SHA3_256 = 33,
+  KMID_FUNCTION_SHA3_384 = 34,
+  KMID_FUNCTION_SHA3_512 = 35,
+  KMID_FUNCTION_SHAKE128 = 36,
+  KMID_FUNCTION_SHAKE256 = 37,
+  KMID_FUNCTION_GHASH = 65,
+
+  PCC_FUNCTION_NIST_P256 = 64,
+  PCC_FUNCTION_NIST_P384 = 65,
+  PCC_FUNCTION_NIST_P521 = 66,
+  PCC_FUNCTION_ED25519 = 72,
+  PCC_FUNCTION_ED448 = 73,
+  PCC_FUNCTION_X25519 = 80,
+  PCC_FUNCTION_X448 = 81
+};
+
+enum kmxx_function_flags_e
+{
+  KM_ENCRYPT  = 0 << 7,
+  KM_DECRYPT  = 1 << 7,
+
+  KMF_LCFB_16 = 16 << 24,
+
+  KMA_LPC     = 1 << 8,
+  KMA_LAAD    = 1 << 9,
+  KMA_HS      = 1 << 10,
+
+  KLMD_PADDING_STATE = 1 << 8,
+};
+
+static ALWAYS_INLINE u128_t km_function_to_mask(enum kmxx_functions_e func)
+{
+  return (u128_t)1 << (127 - func);
+}
+
+static inline u128_t kimd_query(void)
+{
+  static u128_t function_codes = 0;
+  static int initialized = 0;
+  register unsigned long reg0 asm("0") = 0;
+  register void *reg1 asm("1") = &function_codes;
+  u128_t r1;
+
+  if (initialized)
+    return function_codes;
+
+  asm volatile ("0: .insn rre,0xb93e << 16, 0, %[r1]\n\t"
+               "   brc 1,0b\n\t"
+               : [r1] "=a" (r1)
+               : [reg0] "r" (reg0), [reg1] "r" (reg1)
+               : "cc", "memory");
+
+  initialized = 1;
+  return function_codes;
+}
+
+static inline u128_t klmd_query(void)
+{
+  static u128_t function_codes = 0;
+  static int initialized = 0;
+  register unsigned long reg0 asm("0") = 0;
+  register void *reg1 asm("1") = &function_codes;
+  u128_t r1;
+
+  if (initialized)
+    return function_codes;
+
+  asm volatile ("0: .insn rre,0xb93f << 16, 0, %[r1]\n\t"
+               "   brc 1,0b\n\t"
+               : [r1] "=a" (r1)
+               : [reg0] "r" (reg0), [reg1] "r" (reg1)
+               : "cc", "memory");
+
+  initialized = 1;
+  return function_codes;
+}
+
+static inline u128_t pcc_query(void)
+{
+  static u128_t function_codes = 0;
+  static int initialized = 0;
+  register unsigned long reg0 asm("0") = 0;
+  register void *reg1 asm("1") = &function_codes;
+
+  if (initialized)
+    return function_codes;
+
+  asm volatile ("0: .insn rre,0xb92c << 16, 0, 0\n\t"
+               "   brc 1,0b\n\t"
+               :
+               : [reg0] "r" (reg0), [reg1] "r" (reg1)
+               : "cc", "memory");
+
+  initialized = 1;
+  return function_codes;
+}
+
+static ALWAYS_INLINE void
+kimd_execute(unsigned int func, void *param_block, const void *src,
+            size_t src_len)
+{
+  register unsigned long reg0 asm("0") = func;
+  register byte *reg1 asm("1") = param_block;
+  u128_t r1 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+
+  asm volatile ("0: .insn rre,0xb93e << 16, 0, %[r1]\n\t"
+               "   brc 1,0b\n\t"
+               : [r1] "+a" (r1)
+               : [func] "r" (reg0), [param_ptr] "r" (reg1)
+               : "cc", "memory");
+}
+
+static ALWAYS_INLINE void
+klmd_execute(unsigned int func, void *param_block, const void *src,
+            size_t src_len)
+{
+  register unsigned long reg0 asm("0") = func;
+  register byte *reg1 asm("1") = param_block;
+  u128_t r1 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+
+  asm volatile ("0: .insn rre,0xb93f << 16, 0, %[r1]\n\t"
+               "   brc 1,0b\n\t"
+               : [func] "+r" (reg0), [r1] "+a" (r1)
+               : [param_ptr] "r" (reg1)
+               : "cc", "memory");
+}
+
+static ALWAYS_INLINE void
+klmd_shake_execute(unsigned int func, void *param_block, void *dst,
+                  size_t dst_len, const void *src, size_t src_len)
+{
+  register unsigned long reg0 asm("0") = func;
+  register byte *reg1 asm("1") = param_block;
+  u128_t r1 = ((u128_t)(uintptr_t)dst << 64) | (u64)dst_len;
+  u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+
+  asm volatile ("0: .insn rre,0xb93f << 16, %[r1], %[r2]\n\t"
+               "   brc 1,0b\n\t"
+               : [func] "+r" (reg0), [r1] "+a" (r1), [r2] "+a" (r2)
+               : [param_ptr] "r" (reg1)
+               : "cc", "memory");
+}
+
+static ALWAYS_INLINE unsigned int
+pcc_scalar_multiply(unsigned int func, void *param_block)
+{
+  register unsigned long reg0 asm("0") = func;
+  register byte *reg1 asm("1") = param_block;
+  register unsigned long error = 0;
+
+  asm volatile ("0: .insn rre,0xb92c << 16, 0, 0\n\t"
+               "   brc 1,0b\n\t"
+               "   brc 7,1f\n\t"
+               "   j 2f\n\t"
+               "1: lhi %[error], 1\n\t"
+               "2:\n\t"
+               : [func] "+r" (reg0), [error] "+r" (error)
+               : [param_ptr] "r" (reg1)
+               : "cc", "memory");
+
+  return error;
+}
+
+#endif /* GCRY_ASM_INLINE_S390X_H */
diff --git a/grub-core/lib/libgcrypt/cipher/asm-poly1305-aarch64.h 
b/grub-core/lib/libgcrypt/cipher/asm-poly1305-aarch64.h
new file mode 100644
index 000000000..2f05aae2a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/asm-poly1305-aarch64.h
@@ -0,0 +1,245 @@
+/* asm-common-aarch64.h  -  Poly1305 macros for ARMv8/AArch64 assembly
+ *
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_POLY1305_AARCH64_H
+#define GCRY_ASM_POLY1305_AARCH64_H
+
+#include "asm-common-aarch64.h"
+
+#ifdef __AARCH64EL__
+  #define le_to_host(reg) /*_*/
+#else
+  #define le_to_host(reg) rev reg, reg;
+#endif
+
+/**********************************************************************
+  poly1305 for stitched chacha20-poly1305 Aarch64 implementations
+ **********************************************************************/
+
+#define POLY_RSTATE    x8
+#define POLY_RSRC      x9
+
+#define POLY_R_H0      x10
+#define POLY_R_H1      x11
+#define POLY_R_H2      x12
+#define POLY_R_H2d     w12
+#define POLY_R_R0      x13
+#define POLY_R_R1      x14
+#define POLY_R_R1_MUL5 x15
+#define POLY_R_X0_HI   x16
+#define POLY_R_X0_LO   x17
+#define POLY_R_X1_HI   x19
+#define POLY_R_X1_LO   x20
+#define POLY_R_ONE     x21
+#define POLY_R_ONEd    w21
+
+#define POLY_TMP0      x22
+#define POLY_TMP1      x23
+#define POLY_TMP2      x24
+#define POLY_TMP3      x25
+
+#define POLY_CHACHA_ROUND x26
+
+#define POLY_S_R0      (4 * 4 + 0 * 8)
+#define POLY_S_R1      (4 * 4 + 1 * 8)
+#define POLY_S_H0      (4 * 4 + 2 * 8 + 0 * 8)
+#define POLY_S_H1      (4 * 4 + 2 * 8 + 1 * 8)
+#define POLY_S_H2d     (4 * 4 + 2 * 8 + 2 * 8)
+
+#define POLY1305_PUSH_REGS() \
+       stp x19, x20, [sp, #-16]!; \
+       CFI_ADJUST_CFA_OFFSET(16); \
+       CFI_REG_ON_STACK(19, 0); \
+       CFI_REG_ON_STACK(20, 8); \
+       stp x21, x22, [sp, #-16]!; \
+       CFI_ADJUST_CFA_OFFSET(16); \
+       CFI_REG_ON_STACK(21, 0); \
+       CFI_REG_ON_STACK(22, 8); \
+       stp x23, x24, [sp, #-16]!; \
+       CFI_ADJUST_CFA_OFFSET(16); \
+       CFI_REG_ON_STACK(23, 0); \
+       CFI_REG_ON_STACK(24, 8); \
+       stp x25, x26, [sp, #-16]!; \
+       CFI_ADJUST_CFA_OFFSET(16); \
+       CFI_REG_ON_STACK(25, 0); \
+       CFI_REG_ON_STACK(26, 8);
+
+#define POLY1305_POP_REGS() \
+       ldp x25, x26, [sp], #16; \
+       CFI_ADJUST_CFA_OFFSET(-16); \
+       CFI_RESTORE(x25); \
+       CFI_RESTORE(x26); \
+       ldp x23, x24, [sp], #16; \
+       CFI_ADJUST_CFA_OFFSET(-16); \
+       CFI_RESTORE(x23); \
+       CFI_RESTORE(x24); \
+       ldp x21, x22, [sp], #16; \
+       CFI_ADJUST_CFA_OFFSET(-16); \
+       CFI_RESTORE(x21); \
+       CFI_RESTORE(x22); \
+       ldp x19, x20, [sp], #16; \
+       CFI_ADJUST_CFA_OFFSET(-16); \
+       CFI_RESTORE(x19); \
+       CFI_RESTORE(x20);
+
+#define POLY1305_LOAD_STATE() \
+       ldr POLY_R_R1, [POLY_RSTATE, #(POLY_S_R1)]; \
+       ldr POLY_R_H0, [POLY_RSTATE, #(POLY_S_H0)];  \
+       ldr POLY_R_H1, [POLY_RSTATE, #(POLY_S_H1)]; \
+       ldr POLY_R_H2d, [POLY_RSTATE, #(POLY_S_H2d)]; \
+       ldr POLY_R_R0, [POLY_RSTATE, #(POLY_S_R0)]; \
+       add POLY_R_R1_MUL5, POLY_R_R1, POLY_R_R1, lsr #2; \
+       mov POLY_R_ONE, #1;
+
+#define POLY1305_STORE_STATE() \
+       str POLY_R_H0, [POLY_RSTATE, #(POLY_S_H0)]; \
+       str POLY_R_H1, [POLY_RSTATE, #(POLY_S_H1)]; \
+       str POLY_R_H2d, [POLY_RSTATE, #(POLY_S_H2d)];
+
+#define POLY1305_BLOCK_PART1(src_offset) \
+       /* a = h + m */ \
+       ldr POLY_TMP0, [POLY_RSRC, #((src_offset) + 0 * 8)];
+#define POLY1305_BLOCK_PART2(src_offset) \
+       ldr POLY_TMP1, [POLY_RSRC, #((src_offset) + 1 * 8)];
+#define POLY1305_BLOCK_PART3() \
+       le_to_host(POLY_TMP0);
+#define POLY1305_BLOCK_PART4() \
+       le_to_host(POLY_TMP1);
+#define POLY1305_BLOCK_PART5() \
+       adds POLY_R_H0, POLY_R_H0, POLY_TMP0;
+#define POLY1305_BLOCK_PART6() \
+       adcs POLY_R_H1, POLY_R_H1, POLY_TMP1;
+#define POLY1305_BLOCK_PART7() \
+       adc POLY_R_H2d, POLY_R_H2d, POLY_R_ONEd;
+
+#define POLY1305_BLOCK_PART8() \
+       /* h = a * r (partial mod 2^130-5): */ \
+       mul POLY_R_X1_LO, POLY_R_H0, POLY_R_R1;   /* lo: h0 * r1 */
+#define POLY1305_BLOCK_PART9() \
+       mul POLY_TMP0, POLY_R_H1, POLY_R_R0;      /* lo: h1 * r0 */
+#define POLY1305_BLOCK_PART10() \
+       mul POLY_R_X0_LO, POLY_R_H0, POLY_R_R0;   /* lo: h0 * r0 */
+#define POLY1305_BLOCK_PART11() \
+       umulh POLY_R_X1_HI, POLY_R_H0, POLY_R_R1; /* hi: h0 * r1 */
+#define POLY1305_BLOCK_PART12() \
+       adds POLY_R_X1_LO, POLY_R_X1_LO, POLY_TMP0;
+#define POLY1305_BLOCK_PART13() \
+       umulh POLY_TMP1, POLY_R_H1, POLY_R_R0;    /* hi: h1 * r0 */
+#define POLY1305_BLOCK_PART14() \
+       mul POLY_TMP2, POLY_R_H1, POLY_R_R1_MUL5;   /* lo: h1 * r1 mod 2^130-5 
*/
+#define POLY1305_BLOCK_PART15() \
+       umulh POLY_R_X0_HI, POLY_R_H0, POLY_R_R0; /* hi: h0 * r0 */
+#define POLY1305_BLOCK_PART16() \
+       adc POLY_R_X1_HI, POLY_R_X1_HI, POLY_TMP1;
+#define POLY1305_BLOCK_PART17() \
+       umulh POLY_TMP3, POLY_R_H1, POLY_R_R1_MUL5; /* hi: h1 * r1 mod 2^130-5 
*/
+#define POLY1305_BLOCK_PART18() \
+       adds POLY_R_X0_LO, POLY_R_X0_LO, POLY_TMP2;
+#define POLY1305_BLOCK_PART19() \
+       mul POLY_R_H1, POLY_R_H2, POLY_R_R1_MUL5; /* h2 * r1 mod 2^130-5 */
+#define POLY1305_BLOCK_PART20() \
+       adc POLY_R_X0_HI, POLY_R_X0_HI, POLY_TMP3;
+#define POLY1305_BLOCK_PART21() \
+       mul POLY_R_H2, POLY_R_H2, POLY_R_R0;      /* h2 * r0 */
+#define POLY1305_BLOCK_PART22() \
+       adds POLY_R_H1, POLY_R_H1, POLY_R_X1_LO;
+#define POLY1305_BLOCK_PART23() \
+       adc POLY_R_H0, POLY_R_H2, POLY_R_X1_HI;
+
+#define POLY1305_BLOCK_PART24() \
+       /* carry propagation */ \
+       and POLY_R_H2, POLY_R_H0, #3;
+#define POLY1305_BLOCK_PART25() \
+       lsr POLY_R_H0, POLY_R_H0, #2;
+#define POLY1305_BLOCK_PART26() \
+       add POLY_R_H0, POLY_R_H0, POLY_R_H0, lsl #2;
+#define POLY1305_BLOCK_PART27() \
+       adds POLY_R_H0, POLY_R_H0, POLY_R_X0_LO;
+#define POLY1305_BLOCK_PART28() \
+       adcs POLY_R_H1, POLY_R_H1, POLY_R_X0_HI;
+#define POLY1305_BLOCK_PART29() \
+       adc POLY_R_H2d, POLY_R_H2d, wzr;
+
+//#define TESTING_POLY1305_ASM
+#ifdef TESTING_POLY1305_ASM
+/* for testing only. */
+.align 3
+.globl _gcry_poly1305_aarch64_blocks1
+ELF(.type _gcry_poly1305_aarch64_blocks1,%function;)
+_gcry_poly1305_aarch64_blocks1:
+       /* input:
+        *      x0: poly1305-state
+        *      x1: src
+        *      x2: nblks
+        */
+       CFI_STARTPROC()
+       POLY1305_PUSH_REGS();
+
+       mov POLY_RSTATE, x0;
+       mov POLY_RSRC, x1;
+
+       POLY1305_LOAD_STATE();
+
+.L_gcry_poly1305_aarch64_loop1:
+       POLY1305_BLOCK_PART1(0 * 16);
+       POLY1305_BLOCK_PART2(0 * 16);
+       add POLY_RSRC, POLY_RSRC, #16;
+       POLY1305_BLOCK_PART3();
+       POLY1305_BLOCK_PART4();
+       POLY1305_BLOCK_PART5();
+       POLY1305_BLOCK_PART6();
+       POLY1305_BLOCK_PART7();
+       POLY1305_BLOCK_PART8();
+       POLY1305_BLOCK_PART9();
+       POLY1305_BLOCK_PART10();
+       POLY1305_BLOCK_PART11();
+       POLY1305_BLOCK_PART12();
+       POLY1305_BLOCK_PART13();
+       POLY1305_BLOCK_PART14();
+       POLY1305_BLOCK_PART15();
+       POLY1305_BLOCK_PART16();
+       POLY1305_BLOCK_PART17();
+       POLY1305_BLOCK_PART18();
+       POLY1305_BLOCK_PART19();
+       POLY1305_BLOCK_PART20();
+       POLY1305_BLOCK_PART21();
+       POLY1305_BLOCK_PART22();
+       POLY1305_BLOCK_PART23();
+       POLY1305_BLOCK_PART24();
+       POLY1305_BLOCK_PART25();
+       POLY1305_BLOCK_PART26();
+       POLY1305_BLOCK_PART27();
+       POLY1305_BLOCK_PART28();
+       POLY1305_BLOCK_PART29();
+
+       subs x2, x2, #1;
+       b.ne .L_gcry_poly1305_aarch64_loop1;
+
+       POLY1305_STORE_STATE();
+
+       mov x0, #0;
+
+       POLY1305_POP_REGS();
+       ret_spec_stop;
+       CFI_ENDPROC()
+ELF(.size _gcry_poly1305_aarch64_blocks1, .-_gcry_poly1305_aarch64_blocks1;)
+#endif
+
+#endif /* GCRY_ASM_POLY1305_AARCH64_H */
diff --git a/grub-core/lib/libgcrypt/cipher/asm-poly1305-amd64.h 
b/grub-core/lib/libgcrypt/cipher/asm-poly1305-amd64.h
new file mode 100644
index 000000000..3f99ea3e1
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/asm-poly1305-amd64.h
@@ -0,0 +1,171 @@
+/* asm-common-amd64.h  -  Poly1305 macros for AMD64 assembly
+ *
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_POLY1305_AMD64_H
+#define GCRY_ASM_POLY1305_AMD64_H
+
+#include "asm-common-amd64.h"
+
+/**********************************************************************
+  poly1305 for stitched chacha20-poly1305 AMD64 implementations
+ **********************************************************************/
+
+#define POLY_RSTATE    %r8
+#define POLY_RSRC      %r9
+
+#define POLY_R_H0      %rbx
+#define POLY_R_H1      %rcx
+#define POLY_R_H2      %r10
+#define POLY_R_H2d     %r10d
+#define POLY_R_R0      %r11
+#define POLY_R_R1_MUL5 %r12
+#define POLY_R_X0_HI   %r13
+#define POLY_R_X0_LO   %r14
+#define POLY_R_X1_HI   %r15
+#define POLY_R_X1_LO   %rsi
+
+#define POLY_S_R0      (4 * 4 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_R1      (4 * 4 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H0      (4 * 4 + 2 * 8 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_H1      (4 * 4 + 2 * 8 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H2d     (4 * 4 + 2 * 8 + 2 * 8)(POLY_RSTATE)
+
+#define POLY1305_LOAD_STATE() \
+       movq POLY_S_H0, POLY_R_H0; \
+       movq POLY_S_H1, POLY_R_H1; \
+       movl POLY_S_H2d, POLY_R_H2d; \
+       movq POLY_S_R0, POLY_R_R0; \
+       movq POLY_S_R1, POLY_R_R1_MUL5; \
+       shrq $2, POLY_R_R1_MUL5; \
+       addq POLY_S_R1, POLY_R_R1_MUL5;
+
+#define POLY1305_STORE_STATE() \
+       movq POLY_R_H0, POLY_S_H0; \
+       movq POLY_R_H1, POLY_S_H1; \
+       movl POLY_R_H2d, POLY_S_H2d;
+
+/* a = h + m */
+#define POLY1305_BLOCK_PART1(src_offset) \
+       addq ((src_offset) + 0 * 8)(POLY_RSRC), POLY_R_H0; \
+       adcq ((src_offset) + 1 * 8)(POLY_RSRC), POLY_R_H1; \
+       adcl $1, POLY_R_H2d; \
+       \
+       /* h = a * r (partial mod 2^130-5): */ \
+       \
+       /* h0 * r1 */ \
+       movq POLY_R_H0, %rax; \
+       mulq POLY_S_R1; \
+       movq %rax, POLY_R_X1_LO; \
+       movq %rdx, POLY_R_X1_HI;
+
+#define POLY1305_BLOCK_PART2() \
+       \
+       /* h0 * r0 */ \
+       movq POLY_R_H0, %rax; \
+       mulq POLY_R_R0; \
+       movq %rax, POLY_R_X0_LO; \
+       movq %rdx, POLY_R_X0_HI;
+
+#define POLY1305_BLOCK_PART3() \
+       \
+       /* h1 * r0 */ \
+       movq POLY_R_H1, %rax; \
+       mulq POLY_R_R0; \
+       addq %rax, POLY_R_X1_LO; \
+       adcq %rdx, POLY_R_X1_HI; \
+       \
+       /* h1 * r1 mod 2^130-5 */ \
+       movq POLY_R_R1_MUL5, %rax; \
+       mulq POLY_R_H1;
+
+#define POLY1305_BLOCK_PART4() \
+       movq POLY_R_H2, POLY_R_H1; \
+       imulq POLY_R_R1_MUL5, POLY_R_H1; /* h2 * r1 mod 2^130-5 */ \
+       addq %rax, POLY_R_X0_LO; \
+       adcq %rdx, POLY_R_X0_HI; \
+       imulq POLY_R_R0, POLY_R_H2;      /* h2 * r0 */ \
+       addq POLY_R_X1_LO, POLY_R_H1; \
+       adcq POLY_R_X1_HI, POLY_R_H2;
+
+#define POLY1305_BLOCK_PART5() \
+       \
+       /* carry propagation */ \
+       movq POLY_R_H2, POLY_R_H0; \
+       andl $3, POLY_R_H2d; \
+       shrq $2, POLY_R_H0; \
+       leaq (POLY_R_H0, POLY_R_H0, 4), POLY_R_H0; \
+       addq POLY_R_X0_LO, POLY_R_H0; \
+       adcq POLY_R_X0_HI, POLY_R_H1; \
+       adcl $0, POLY_R_H2d;
+
+#ifdef TESTING_POLY1305_ASM
+/* for testing only, mixed C/asm poly1305.c is marginally faster (~2%). */
+.align 8
+.globl _gcry_poly1305_amd64_ssse3_blocks1
+ELF(.type _gcry_poly1305_amd64_ssse3_blocks1,@function;)
+
+_gcry_poly1305_amd64_ssse3_blocks1:
+       /* input:
+        *      %rdi: poly1305-state
+        *      %rsi: src
+        *      %rdx: nblks
+        */
+       pushq %rbp;
+       movq %rsp, %rbp;
+
+       subq $(10 * 8), %rsp;
+       movq %rbx, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       movq %r14, (4 * 8)(%rsp);
+       movq %r15, (5 * 8)(%rsp);
+
+       movq %rdx, (8 * 8)(%rsp); # NBLKS
+
+       movq %rdi, POLY_RSTATE;
+       movq %rsi, POLY_RSRC;
+
+       POLY1305_LOAD_STATE();
+
+.L_poly1:
+       POLY1305_BLOCK_PART1(0 * 16);
+       POLY1305_BLOCK_PART2();
+       POLY1305_BLOCK_PART3();
+       POLY1305_BLOCK_PART4();
+       POLY1305_BLOCK_PART5();
+
+       subq $1, (8 * 8)(%rsp); # NBLKS
+       leaq (16)(POLY_RSRC), POLY_RSRC;
+       jnz .L_poly1;
+
+       POLY1305_STORE_STATE();
+
+       movq (1 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       movq (4 * 8)(%rsp), %r14;
+       movq (5 * 8)(%rsp), %r15;
+
+       xorl %eax, %eax;
+       leave
+       ret;
+#endif
+
+#endif /* GCRY_ASM_POLY1305_AMD64_H */
diff --git a/grub-core/lib/libgcrypt/cipher/asm-poly1305-s390x.h 
b/grub-core/lib/libgcrypt/cipher/asm-poly1305-s390x.h
new file mode 100644
index 000000000..113ab9491
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/asm-poly1305-s390x.h
@@ -0,0 +1,140 @@
+/* asm-common-amd64.h  -  Poly1305 macros for zSeries assembly
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ASM_POLY1305_S390X_H
+#define GCRY_ASM_POLY1305_S390X_H
+
+#include "asm-common-s390x.h"
+
+/**********************************************************************
+  poly1305 for stitched chacha20-poly1305
+ **********************************************************************/
+
+#define POLY_RSTATE       %r1
+#define POLY_RSRC         %r14
+
+#define POLY_R_H0_TMP_HI  %r6  // even-
+#define POLY_R_H0         %r7  //      odd pair
+#define POLY_R_H1_TMP_HI  %r8  // even-
+#define POLY_R_H1         %r9  //      odd pair
+#define POLY_R_H2         %r10
+#define POLY_R_R0         %r11
+#define POLY_R_R1         %r12
+#define POLY_R_R1_MUL5    %r13
+#define POLY_R_X0_HI      %r2  // even-
+#define POLY_R_X0_LO      %r3  //      odd pair
+#define POLY_R_X1_HI      %r4  // even-
+#define POLY_R_X1_LO      %r5  //      odd pair
+
+#define POLY_S_R0      (4 * 4 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_R1      (4 * 4 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H0      (4 * 4 + 2 * 8 + 0 * 8)(POLY_RSTATE)
+#define POLY_S_H1      (4 * 4 + 2 * 8 + 1 * 8)(POLY_RSTATE)
+#define POLY_S_H2d     (4 * 4 + 2 * 8 + 2 * 8)(POLY_RSTATE)
+
+#define INC_POLY1305_SRC(a) \
+       aghi POLY_RSRC, (a);
+
+#define POLY1305_LOAD_STATE() \
+       lg POLY_R_H0, POLY_S_H0; \
+       lg POLY_R_H1, POLY_S_H1; \
+       llgf POLY_R_H2, POLY_S_H2d; \
+       rllg POLY_R_H0, POLY_R_H0, 32; \
+       rllg POLY_R_H1, POLY_R_H1, 32; \
+       lg POLY_R_R0, POLY_S_R0; \
+       lg POLY_R_R1, POLY_S_R1; \
+       rllg POLY_R_R0, POLY_R_R0, 32; \
+       rllg POLY_R_R1, POLY_R_R1, 32; \
+       srlg POLY_R_R1_MUL5, POLY_R_R1, 2; \
+       algr POLY_R_R1_MUL5, POLY_R_R1;
+
+#define POLY1305_STORE_STATE() \
+       rllg POLY_R_H0, POLY_R_H0, 32; \
+       rllg POLY_R_H1, POLY_R_H1, 32; \
+       stg POLY_R_H0, POLY_S_H0; \
+       stg POLY_R_H1, POLY_S_H1; \
+       st POLY_R_H2, POLY_S_H2d;
+
+/* a = h + m */
+#define POLY1305_BLOCK_PART1_HB(src_offset, high_pad) \
+       lrvg POLY_R_X0_HI, ((src_offset) + 1 * 8)(POLY_RSRC); \
+       lrvg POLY_R_X0_LO, ((src_offset) + 0 * 8)(POLY_RSRC); \
+       lghi POLY_R_H1_TMP_HI, (high_pad);
+
+#define POLY1305_BLOCK_PART1(src_offset) \
+       POLY1305_BLOCK_PART1_HB(src_offset, 1);
+
+#define POLY1305_BLOCK_PART2() \
+       algr POLY_R_H0, POLY_R_X0_LO; \
+       alcgr POLY_R_H1, POLY_R_X0_HI; \
+       alcgr POLY_R_H2, POLY_R_H1_TMP_HI; \
+       lgr POLY_R_X1_LO, POLY_R_H0; \
+       lgr POLY_R_X0_LO, POLY_R_H0;
+
+#define POLY1305_BLOCK_PART3() \
+       /* h = a * r (partial mod 2^130-5): */ \
+       \
+       /* h0 * r1 */ \
+       mlgr POLY_R_X1_HI, POLY_R_R1; \
+       \
+       /* h1 * r0 */ \
+       lgr POLY_R_H0, POLY_R_H1; \
+       mlgr POLY_R_H0_TMP_HI, POLY_R_R0; \
+       \
+       /* h1 * r1 mod 2^130-5 */ \
+       mlgr POLY_R_H1_TMP_HI, POLY_R_R1_MUL5;
+
+#define POLY1305_BLOCK_PART4() \
+       \
+       /* h0 * r0 */ \
+       mlgr POLY_R_X0_HI, POLY_R_R0; \
+       \
+       algr POLY_R_X1_LO, POLY_R_H0; \
+       alcgr POLY_R_X1_HI, POLY_R_H0_TMP_HI; \
+       \
+       lgr POLY_R_H0_TMP_HI, POLY_R_H2; \
+       msgr POLY_R_H0_TMP_HI, POLY_R_R1_MUL5; /* h2 * r1 mod 2^130-5 */ \
+       msgr POLY_R_H2, POLY_R_R0;             /* h2 * r0 */
+
+#define POLY1305_BLOCK_PART5() \
+       \
+       algr POLY_R_X0_LO, POLY_R_H1; \
+       alcgr POLY_R_X0_HI, POLY_R_H1_TMP_HI;
+
+#define POLY1305_BLOCK_PART6() \
+       \
+       algrk POLY_R_H1, POLY_R_H0_TMP_HI, POLY_R_X1_LO; \
+       alcgr POLY_R_H2, POLY_R_X1_HI;
+
+#define POLY1305_BLOCK_PART7() \
+       \
+       /* carry propagation */ \
+       srlg POLY_R_H0, POLY_R_H2, 2; \
+       risbgn POLY_R_X1_LO, POLY_R_H2, 0, 0x80 | 61, 0; \
+       lghi POLY_R_H1_TMP_HI, 0; \
+       agr POLY_R_H0, POLY_R_X1_LO; \
+       risbgn POLY_R_H2, POLY_R_H2, 62, 0x80 | 63, 0;
+
+#define POLY1305_BLOCK_PART8() \
+       algr POLY_R_H0, POLY_R_X0_LO; \
+       alcgr POLY_R_H1, POLY_R_X0_HI; \
+       alcgr POLY_R_H2, POLY_R_H1_TMP_HI;
+
+#endif /* GCRY_ASM_POLY1305_AMD64_H */
diff --git a/grub-core/lib/libgcrypt/cipher/bithelp.h 
b/grub-core/lib/libgcrypt/cipher/bithelp.h
index 150532433..7793ce7ca 100644
--- a/grub-core/lib/libgcrypt/cipher/bithelp.h
+++ b/grub-core/lib/libgcrypt/cipher/bithelp.h
@@ -4,7 +4,7 @@
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser general Public License as
+ * it under the terms of the GNU Lesser General Public License as
  * published by the Free Software Foundation; either version 2.1 of
  * the License, or (at your option) any later version.
  *
@@ -14,41 +14,110 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
-#ifndef G10_BITHELP_H
-#define G10_BITHELP_H
+#ifndef GCRYPT_BITHELP_H
+#define GCRYPT_BITHELP_H
+
+#include "types.h"
 
 
 /****************
  * Rotate the 32 bit unsigned integer X by N bits left/right
  */
-#if defined(__GNUC__) && defined(__i386__)
+static inline u32 rol(u32 x, int n)
+{
+       return ( (x << (n&(32-1))) | (x >> ((32-n)&(32-1))) );
+}
+
+static inline u32 ror(u32 x, int n)
+{
+       return ( (x >> (n&(32-1))) | (x << ((32-n)&(32-1))) );
+}
+
+static inline u64 rol64(u64 x, int n)
+{
+  return ( (x << (n&(64-1))) | (x >> ((64-n)&(64-1))) );
+}
+
+/* Byte swap for 32-bit and 64-bit integers.  If available, use compiler
+   provided helpers.  */
+#ifdef HAVE_BUILTIN_BSWAP32
+# define _gcry_bswap32 __builtin_bswap32
+#else
 static inline u32
-rol( u32 x, int n)
+_gcry_bswap32(u32 x)
+{
+       return ((rol(x, 8) & 0x00ff00ffL) | (ror(x, 8) & 0xff00ff00L));
+}
+#endif
+
+#ifdef HAVE_BUILTIN_BSWAP64
+# define _gcry_bswap64 __builtin_bswap64
+#else
+static inline u64
+_gcry_bswap64(u64 x)
 {
-       __asm__("roll %%cl,%0"
-               :"=r" (x)
-               :"0" (x),"c" (n));
-       return x;
+       return ((u64)_gcry_bswap32(x) << 32) | (_gcry_bswap32(x >> 32));
 }
+#endif
+
+/* Endian dependent byte swap operations.  */
+#ifdef WORDS_BIGENDIAN
+# define le_bswap32(x) _gcry_bswap32(x)
+# define be_bswap32(x) ((u32)(x))
+# define le_bswap64(x) _gcry_bswap64(x)
+# define be_bswap64(x) ((u64)(x))
 #else
-#define rol(x,n) ( ((x) << (n)) | ((x) >> (32-(n))) )
+# define le_bswap32(x) ((u32)(x))
+# define be_bswap32(x) _gcry_bswap32(x)
+# define le_bswap64(x) ((u64)(x))
+# define be_bswap64(x) _gcry_bswap64(x)
 #endif
 
-#if defined(__GNUC__) && defined(__i386__)
-static inline u32
-ror(u32 x, int n)
+
+/* Count trailing zero bits in an unsigend int.  We return an int
+   because that is what gcc's builtin does.  Returns the number of
+   bits in X if X is 0. */
+static inline int
+_gcry_ctz (unsigned int x)
 {
-       __asm__("rorl %%cl,%0"
-               :"=r" (x)
-               :"0" (x),"c" (n));
-       return x;
+#if defined (HAVE_BUILTIN_CTZ)
+  return x ? __builtin_ctz (x) : 8 * sizeof (x);
+#else
+  /* See
+   * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightModLookup
+   */
+  static const unsigned char mod37[] =
+    {
+      sizeof (unsigned int)*8,
+          0,  1, 26,  2, 23, 27,  0,  3, 16, 24, 30, 28, 11,  0, 13,
+      4,  7, 17,  0, 25, 22, 31, 15, 29, 10, 12,  6,  0, 21, 14,  9,
+      5, 20,  8, 19, 18
+    };
+  return (int)mod37[(-x & x) % 37];
+#endif
 }
+
+
+/* Count trailing zero bits in an u64.  We return an int because that
+   is what gcc's builtin does.  Returns the number of bits in X if X
+   is 0.  */
+static inline int
+_gcry_ctz64(u64 x)
+{
+#if defined (HAVE_BUILTIN_CTZL) && SIZEOF_UNSIGNED_LONG >= 8
+  return x ? __builtin_ctzl (x) : 8 * sizeof (x);
+#elif defined (HAVE_BUILTIN_CTZ) && SIZEOF_UNSIGNED_INT >= 8
+#warning hello
+  return x ? __builtin_ctz (x) : 8 * sizeof (x);
 #else
-#define ror(x,n) ( ((x) >> (n)) | ((x) << (32-(n))) )
+  if ((x & 0xffffffff))
+    return _gcry_ctz (x);
+  else
+    return 32 + _gcry_ctz (x >> 32);
 #endif
+}
 
 
-#endif /*G10_BITHELP_H*/
+#endif /*GCRYPT_BITHELP_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/blake2.c 
b/grub-core/lib/libgcrypt/cipher/blake2.c
new file mode 100644
index 000000000..d7f9a7e46
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/blake2.c
@@ -0,0 +1,1045 @@
+/* blake2.c - BLAKE2b and BLAKE2s hash functions (RFC 7693)
+ * Copyright (C) 2017  Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 BLAKE2 reference implementation
+ * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/ref
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>
+ */
+
+#include <config.h>
+#include <string.h>
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and 
additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AVX2) && defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+#define BLAKE2B_BLOCKBYTES 128
+#define BLAKE2B_OUTBYTES 64
+#define BLAKE2B_KEYBYTES 64
+
+#define BLAKE2S_BLOCKBYTES 64
+#define BLAKE2S_OUTBYTES 32
+#define BLAKE2S_KEYBYTES 32
+
+typedef struct
+{
+  u64 h[8];
+  u64 t[2];
+  u64 f[2];
+} BLAKE2B_STATE;
+
+struct blake2b_param_s
+{
+  byte digest_length;
+  byte key_length;
+  byte fanout;
+  byte depth;
+  byte leaf_length[4];
+  byte node_offset[4];
+  byte xof_length[4];
+  byte node_depth;
+  byte inner_length;
+  byte reserved[14];
+  byte salt[16];
+  byte personal[16];
+};
+
+typedef struct BLAKE2B_CONTEXT_S
+{
+  BLAKE2B_STATE state;
+  byte buf[BLAKE2B_BLOCKBYTES];
+  size_t buflen;
+  size_t outlen;
+#ifdef USE_AVX2
+  unsigned int use_avx2:1;
+#endif
+} BLAKE2B_CONTEXT;
+
+typedef struct
+{
+  u32 h[8];
+  u32 t[2];
+  u32 f[2];
+} BLAKE2S_STATE;
+
+struct blake2s_param_s
+{
+  byte digest_length;
+  byte key_length;
+  byte fanout;
+  byte depth;
+  byte leaf_length[4];
+  byte node_offset[4];
+  byte xof_length[2];
+  byte node_depth;
+  byte inner_length;
+  /* byte reserved[0]; */
+  byte salt[8];
+  byte personal[8];
+};
+
+typedef struct BLAKE2S_CONTEXT_S
+{
+  BLAKE2S_STATE state;
+  byte buf[BLAKE2S_BLOCKBYTES];
+  size_t buflen;
+  size_t outlen;
+#ifdef USE_AVX
+  unsigned int use_avx:1;
+#endif
+} BLAKE2S_CONTEXT;
+
+typedef unsigned int (*blake2_transform_t)(void *S, const void *inblk,
+                                          size_t nblks);
+
+
+static const u64 blake2b_IV[8] =
+{
+  U64_C(0x6a09e667f3bcc908), U64_C(0xbb67ae8584caa73b),
+  U64_C(0x3c6ef372fe94f82b), U64_C(0xa54ff53a5f1d36f1),
+  U64_C(0x510e527fade682d1), U64_C(0x9b05688c2b3e6c1f),
+  U64_C(0x1f83d9abfb41bd6b), U64_C(0x5be0cd19137e2179)
+};
+
+static const u32 blake2s_IV[8] =
+{
+  0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+  0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL
+};
+
+static byte zero_block[BLAKE2B_BLOCKBYTES] = { 0, };
+
+
+static void blake2_write(void *S, const void *inbuf, size_t inlen,
+                        byte *tmpbuf, size_t *tmpbuflen, size_t blkbytes,
+                        blake2_transform_t transform_fn)
+{
+  const byte* in = inbuf;
+  unsigned int burn = 0;
+
+  if (inlen > 0)
+    {
+      size_t left = *tmpbuflen;
+      size_t fill = blkbytes - left;
+      size_t nblks;
+
+      if (inlen > fill)
+       {
+         if (fill > 0)
+           buf_cpy (tmpbuf + left, in, fill); /* Fill buffer */
+         left = 0;
+
+         burn = transform_fn (S, tmpbuf, 1); /* Increment counter + Compress */
+
+         in += fill;
+         inlen -= fill;
+
+         nblks = inlen / blkbytes - !(inlen % blkbytes);
+         if (nblks)
+           {
+             burn = transform_fn(S, in, nblks);
+             in += blkbytes * nblks;
+             inlen -= blkbytes * nblks;
+           }
+       }
+
+      gcry_assert (inlen > 0);
+
+      buf_cpy (tmpbuf + left, in, inlen);
+      *tmpbuflen = left + inlen;
+    }
+
+  if (burn)
+    _gcry_burn_stack (burn);
+
+  return;
+}
+
+
+static inline void blake2b_set_lastblock(BLAKE2B_STATE *S)
+{
+  S->f[0] = U64_C(0xffffffffffffffff);
+}
+
+static inline int blake2b_is_lastblock(const BLAKE2B_STATE *S)
+{
+  return S->f[0] != 0;
+}
+
+static inline void blake2b_increment_counter(BLAKE2B_STATE *S, const int inc)
+{
+  S->t[0] += (u64)inc;
+  S->t[1] += (S->t[0] < (u64)inc) - (inc < 0);
+}
+
+static inline u64 rotr64(u64 x, u64 n)
+{
+  return ((x >> (n & 63)) | (x << ((64 - n) & 63)));
+}
+
+static unsigned int blake2b_transform_generic(BLAKE2B_STATE *S,
+                                              const void *inblks,
+                                              size_t nblks)
+{
+  static const byte blake2b_sigma[12][16] =
+  {
+    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 },
+    { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 },
+    {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 },
+    {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 },
+    {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 },
+    { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 },
+    { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 },
+    {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 },
+    { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 },
+    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 }
+  };
+  const byte* in = inblks;
+  u64 m[16];
+  u64 v[16];
+
+  while (nblks--)
+    {
+      /* Increment counter */
+      blake2b_increment_counter (S, BLAKE2B_BLOCKBYTES);
+
+      /* Compress */
+      m[0] = buf_get_le64 (in + 0 * sizeof(m[0]));
+      m[1] = buf_get_le64 (in + 1 * sizeof(m[0]));
+      m[2] = buf_get_le64 (in + 2 * sizeof(m[0]));
+      m[3] = buf_get_le64 (in + 3 * sizeof(m[0]));
+      m[4] = buf_get_le64 (in + 4 * sizeof(m[0]));
+      m[5] = buf_get_le64 (in + 5 * sizeof(m[0]));
+      m[6] = buf_get_le64 (in + 6 * sizeof(m[0]));
+      m[7] = buf_get_le64 (in + 7 * sizeof(m[0]));
+      m[8] = buf_get_le64 (in + 8 * sizeof(m[0]));
+      m[9] = buf_get_le64 (in + 9 * sizeof(m[0]));
+      m[10] = buf_get_le64 (in + 10 * sizeof(m[0]));
+      m[11] = buf_get_le64 (in + 11 * sizeof(m[0]));
+      m[12] = buf_get_le64 (in + 12 * sizeof(m[0]));
+      m[13] = buf_get_le64 (in + 13 * sizeof(m[0]));
+      m[14] = buf_get_le64 (in + 14 * sizeof(m[0]));
+      m[15] = buf_get_le64 (in + 15 * sizeof(m[0]));
+
+      v[ 0] = S->h[0];
+      v[ 1] = S->h[1];
+      v[ 2] = S->h[2];
+      v[ 3] = S->h[3];
+      v[ 4] = S->h[4];
+      v[ 5] = S->h[5];
+      v[ 6] = S->h[6];
+      v[ 7] = S->h[7];
+      v[ 8] = blake2b_IV[0];
+      v[ 9] = blake2b_IV[1];
+      v[10] = blake2b_IV[2];
+      v[11] = blake2b_IV[3];
+      v[12] = blake2b_IV[4] ^ S->t[0];
+      v[13] = blake2b_IV[5] ^ S->t[1];
+      v[14] = blake2b_IV[6] ^ S->f[0];
+      v[15] = blake2b_IV[7] ^ S->f[1];
+
+#define G(r,i,a,b,c,d)                      \
+  do {                                      \
+    a = a + b + m[blake2b_sigma[r][2*i+0]]; \
+    d = rotr64(d ^ a, 32);                  \
+    c = c + d;                              \
+    b = rotr64(b ^ c, 24);                  \
+    a = a + b + m[blake2b_sigma[r][2*i+1]]; \
+    d = rotr64(d ^ a, 16);                  \
+    c = c + d;                              \
+    b = rotr64(b ^ c, 63);                  \
+  } while(0)
+
+#define ROUND(r)                    \
+  do {                              \
+    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+    G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+  } while(0)
+
+      ROUND(0);
+      ROUND(1);
+      ROUND(2);
+      ROUND(3);
+      ROUND(4);
+      ROUND(5);
+      ROUND(6);
+      ROUND(7);
+      ROUND(8);
+      ROUND(9);
+      ROUND(10);
+      ROUND(11);
+
+#undef G
+#undef ROUND
+
+      S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8];
+      S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8];
+      S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8];
+      S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8];
+      S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8];
+      S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8];
+      S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8];
+      S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8];
+
+      in += BLAKE2B_BLOCKBYTES;
+    }
+
+  return sizeof(void *) * 4 + sizeof(u64) * 16 * 2;
+}
+
+#ifdef USE_AVX2
+unsigned int _gcry_blake2b_transform_amd64_avx2(BLAKE2B_STATE *S,
+                                                const void *inblks,
+                                                size_t nblks) ASM_FUNC_ABI;
+#endif
+
+static unsigned int blake2b_transform(void *ctx, const void *inblks,
+                                      size_t nblks)
+{
+  BLAKE2B_CONTEXT *c = ctx;
+  unsigned int nburn;
+
+  if (0)
+    {}
+#ifdef USE_AVX2
+  if (c->use_avx2)
+    nburn = _gcry_blake2b_transform_amd64_avx2(&c->state, inblks, nblks);
+#endif
+  else
+    nburn = blake2b_transform_generic(&c->state, inblks, nblks);
+
+  if (nburn)
+    nburn += ASM_EXTRA_STACK;
+
+  return nburn;
+}
+
+static void blake2b_final(void *ctx)
+{
+  BLAKE2B_CONTEXT *c = ctx;
+  BLAKE2B_STATE *S = &c->state;
+  unsigned int burn;
+  size_t i;
+
+  gcry_assert (sizeof(c->buf) >= c->outlen);
+  if (blake2b_is_lastblock(S))
+    return;
+
+  if (c->buflen < BLAKE2B_BLOCKBYTES)
+    memset (c->buf + c->buflen, 0, BLAKE2B_BLOCKBYTES - c->buflen); /* Padding 
*/
+  blake2b_set_lastblock (S);
+  blake2b_increment_counter (S, (int)c->buflen - BLAKE2B_BLOCKBYTES);
+  burn = blake2b_transform (ctx, c->buf, 1);
+
+  /* Output full hash to buffer */
+  for (i = 0; i < 8; ++i)
+    buf_put_le64 (c->buf + sizeof(S->h[i]) * i, S->h[i]);
+
+  /* Zero out extra buffer bytes. */
+  if (c->outlen < sizeof(c->buf))
+    memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen);
+
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+static byte *blake2b_read(void *ctx)
+{
+  BLAKE2B_CONTEXT *c = ctx;
+  return c->buf;
+}
+
+static void blake2b_write(void *ctx, const void *inbuf, size_t inlen)
+{
+  BLAKE2B_CONTEXT *c = ctx;
+  BLAKE2B_STATE *S = &c->state;
+  blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2B_BLOCKBYTES,
+              blake2b_transform);
+}
+
+static inline void blake2b_init_param(BLAKE2B_STATE *S,
+                                     const struct blake2b_param_s *P)
+{
+  const byte *p = (const byte *)P;
+  size_t i;
+
+  /* init xors IV with input parameter block */
+
+  /* IV XOR ParamBlock */
+  for (i = 0; i < 8; ++i)
+    S->h[i] = blake2b_IV[i] ^ buf_get_le64(p + sizeof(S->h[i]) * i);
+}
+
+static inline gcry_err_code_t blake2b_init(BLAKE2B_CONTEXT *ctx,
+                                          const byte *key, size_t keylen)
+{
+  struct blake2b_param_s P[1] = { { 0, } };
+  BLAKE2B_STATE *S = &ctx->state;
+
+  if (!ctx->outlen || ctx->outlen > BLAKE2B_OUTBYTES)
+    return GPG_ERR_INV_ARG;
+  if (sizeof(P[0]) != sizeof(u64) * 8)
+    return GPG_ERR_INTERNAL;
+  if (keylen && (!key || keylen > BLAKE2B_KEYBYTES))
+    return GPG_ERR_INV_KEYLEN;
+
+  P->digest_length = ctx->outlen;
+  P->key_length = keylen;
+  P->fanout = 1;
+  P->depth = 1;
+
+  blake2b_init_param (S, P);
+  wipememory (P, sizeof(P));
+
+  if (key)
+    {
+      blake2b_write (ctx, key, keylen);
+      blake2b_write (ctx, zero_block, BLAKE2B_BLOCKBYTES - keylen);
+    }
+
+  return 0;
+}
+
+static gcry_err_code_t blake2b_init_ctx(void *ctx, unsigned int flags,
+                                       const byte *key, size_t keylen,
+                                       unsigned int dbits)
+{
+  BLAKE2B_CONTEXT *c = ctx;
+  unsigned int features = _gcry_get_hw_features ();
+
+  (void)features;
+  (void)flags;
+
+  memset (c, 0, sizeof (*c));
+
+#ifdef USE_AVX2
+  c->use_avx2 = !!(features & HWF_INTEL_AVX2);
+#endif
+
+  c->outlen = dbits / 8;
+  c->buflen = 0;
+  return blake2b_init(c, key, keylen);
+}
+
+/* Variable-length Hash Function H'.  */
+gcry_err_code_t
+blake2b_vl_hash (const void *in, size_t inlen, size_t outputlen, void *output)
+{
+  gcry_err_code_t ec;
+  BLAKE2B_CONTEXT ctx;
+  unsigned char buf[4];
+
+  ec = blake2b_init_ctx (&ctx, 0, NULL, 0,
+                         (outputlen < 64 ? outputlen: 64)*8);
+  if (ec)
+    return ec;
+
+  buf_put_le32 (buf, outputlen);
+  blake2b_write (&ctx, buf, 4);
+  blake2b_write (&ctx, in, inlen);
+  blake2b_final (&ctx);
+
+  if (outputlen <= 64)
+    memcpy (output, ctx.buf, outputlen);
+  else
+    {
+      int r = (outputlen-1)/32 - 1;
+      unsigned int remained = outputlen - 32*r;
+      int i;
+      unsigned char d[64];
+
+      i = 0;
+      while (1)
+        {
+          memcpy (d, ctx.buf, 64);
+          memcpy ((unsigned char *)output+i*32, d, 32);
+
+          if (++i >= r)
+            break;
+
+          ec = blake2b_init_ctx (&ctx, 0, NULL, 0, 64*8);
+          if (ec)
+            return ec;
+
+          blake2b_write (&ctx, d, 64);
+          blake2b_final (&ctx);
+        }
+
+      ec = blake2b_init_ctx (&ctx, 0, NULL, 0, remained*8);
+      if (ec)
+        return ec;
+
+      blake2b_write (&ctx, d, 64);
+      blake2b_final (&ctx);
+
+      memcpy ((unsigned char *)output+r*32, ctx.buf, remained);
+    }
+
+  wipememory (buf, sizeof (buf));
+  wipememory (&ctx, sizeof (ctx));
+  return 0;
+}
+
+static inline void blake2s_set_lastblock(BLAKE2S_STATE *S)
+{
+  S->f[0] = 0xFFFFFFFFUL;
+}
+
+static inline int blake2s_is_lastblock(BLAKE2S_STATE *S)
+{
+  return S->f[0] != 0;
+}
+
+static inline void blake2s_increment_counter(BLAKE2S_STATE *S, const int inc)
+{
+  S->t[0] += (u32)inc;
+  S->t[1] += (S->t[0] < (u32)inc) - (inc < 0);
+}
+
+static unsigned int blake2s_transform_generic(BLAKE2S_STATE *S,
+                                              const void *inblks,
+                                              size_t nblks)
+{
+  static const byte blake2s_sigma[10][16] =
+  {
+    {  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15 },
+    { 14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5,  3 },
+    { 11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9,  4 },
+    {  7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15,  8 },
+    {  9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 13 },
+    {  2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1,  9 },
+    { 12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 11 },
+    { 13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 10 },
+    {  6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10,  5 },
+    { 10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 , 0 },
+  };
+  unsigned int burn = 0;
+  const byte* in = inblks;
+  u32 m[16];
+  u32 v[16];
+
+  while (nblks--)
+    {
+      /* Increment counter */
+      blake2s_increment_counter (S, BLAKE2S_BLOCKBYTES);
+
+      /* Compress */
+      m[0] = buf_get_le32 (in + 0 * sizeof(m[0]));
+      m[1] = buf_get_le32 (in + 1 * sizeof(m[0]));
+      m[2] = buf_get_le32 (in + 2 * sizeof(m[0]));
+      m[3] = buf_get_le32 (in + 3 * sizeof(m[0]));
+      m[4] = buf_get_le32 (in + 4 * sizeof(m[0]));
+      m[5] = buf_get_le32 (in + 5 * sizeof(m[0]));
+      m[6] = buf_get_le32 (in + 6 * sizeof(m[0]));
+      m[7] = buf_get_le32 (in + 7 * sizeof(m[0]));
+      m[8] = buf_get_le32 (in + 8 * sizeof(m[0]));
+      m[9] = buf_get_le32 (in + 9 * sizeof(m[0]));
+      m[10] = buf_get_le32 (in + 10 * sizeof(m[0]));
+      m[11] = buf_get_le32 (in + 11 * sizeof(m[0]));
+      m[12] = buf_get_le32 (in + 12 * sizeof(m[0]));
+      m[13] = buf_get_le32 (in + 13 * sizeof(m[0]));
+      m[14] = buf_get_le32 (in + 14 * sizeof(m[0]));
+      m[15] = buf_get_le32 (in + 15 * sizeof(m[0]));
+
+      v[ 0] = S->h[0];
+      v[ 1] = S->h[1];
+      v[ 2] = S->h[2];
+      v[ 3] = S->h[3];
+      v[ 4] = S->h[4];
+      v[ 5] = S->h[5];
+      v[ 6] = S->h[6];
+      v[ 7] = S->h[7];
+      v[ 8] = blake2s_IV[0];
+      v[ 9] = blake2s_IV[1];
+      v[10] = blake2s_IV[2];
+      v[11] = blake2s_IV[3];
+      v[12] = S->t[0] ^ blake2s_IV[4];
+      v[13] = S->t[1] ^ blake2s_IV[5];
+      v[14] = S->f[0] ^ blake2s_IV[6];
+      v[15] = S->f[1] ^ blake2s_IV[7];
+
+#define G(r,i,a,b,c,d)                      \
+  do {                                      \
+    a = a + b + m[blake2s_sigma[r][2*i+0]]; \
+    d = ror(d ^ a, 16);                     \
+    c = c + d;                              \
+    b = ror(b ^ c, 12);                     \
+    a = a + b + m[blake2s_sigma[r][2*i+1]]; \
+    d = ror(d ^ a, 8);                      \
+    c = c + d;                              \
+    b = ror(b ^ c, 7);                      \
+  } while(0)
+
+#define ROUND(r)                    \
+  do {                              \
+    G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \
+    G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \
+    G(r,2,v[ 2],v[ 6],v[10],v[14]); \
+    G(r,3,v[ 3],v[ 7],v[11],v[15]); \
+    G(r,4,v[ 0],v[ 5],v[10],v[15]); \
+    G(r,5,v[ 1],v[ 6],v[11],v[12]); \
+    G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \
+    G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \
+  } while(0)
+
+      ROUND(0);
+      ROUND(1);
+      ROUND(2);
+      ROUND(3);
+      ROUND(4);
+      ROUND(5);
+      ROUND(6);
+      ROUND(7);
+      ROUND(8);
+      ROUND(9);
+
+#undef G
+#undef ROUND
+
+      S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8];
+      S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8];
+      S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8];
+      S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8];
+      S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8];
+      S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8];
+      S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8];
+      S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8];
+
+      in += BLAKE2S_BLOCKBYTES;
+    }
+
+  return burn;
+}
+
+#ifdef USE_AVX
+unsigned int _gcry_blake2s_transform_amd64_avx(BLAKE2S_STATE *S,
+                                               const void *inblks,
+                                               size_t nblks) ASM_FUNC_ABI;
+#endif
+
+static unsigned int blake2s_transform(void *ctx, const void *inblks,
+                                      size_t nblks)
+{
+  BLAKE2S_CONTEXT *c = ctx;
+  unsigned int nburn;
+
+  if (0)
+    {}
+#ifdef USE_AVX
+  if (c->use_avx)
+    nburn = _gcry_blake2s_transform_amd64_avx(&c->state, inblks, nblks);
+#endif
+  else
+    nburn = blake2s_transform_generic(&c->state, inblks, nblks);
+
+  if (nburn)
+    nburn += ASM_EXTRA_STACK;
+
+  return nburn;
+}
+
+static void blake2s_final(void *ctx)
+{
+  BLAKE2S_CONTEXT *c = ctx;
+  BLAKE2S_STATE *S = &c->state;
+  unsigned int burn;
+  size_t i;
+
+  gcry_assert (sizeof(c->buf) >= c->outlen);
+  if (blake2s_is_lastblock(S))
+    return;
+
+  if (c->buflen < BLAKE2S_BLOCKBYTES)
+    memset (c->buf + c->buflen, 0, BLAKE2S_BLOCKBYTES - c->buflen); /* Padding 
*/
+  blake2s_set_lastblock (S);
+  blake2s_increment_counter (S, (int)c->buflen - BLAKE2S_BLOCKBYTES);
+  burn = blake2s_transform (ctx, c->buf, 1);
+
+  /* Output full hash to buffer */
+  for (i = 0; i < 8; ++i)
+    buf_put_le32 (c->buf + sizeof(S->h[i]) * i, S->h[i]);
+
+  /* Zero out extra buffer bytes. */
+  if (c->outlen < sizeof(c->buf))
+    memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen);
+
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+static byte *blake2s_read(void *ctx)
+{
+  BLAKE2S_CONTEXT *c = ctx;
+  return c->buf;
+}
+
+static void blake2s_write(void *ctx, const void *inbuf, size_t inlen)
+{
+  BLAKE2S_CONTEXT *c = ctx;
+  BLAKE2S_STATE *S = &c->state;
+  blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2S_BLOCKBYTES,
+              blake2s_transform);
+}
+
+static inline void blake2s_init_param(BLAKE2S_STATE *S,
+                                     const struct blake2s_param_s *P)
+{
+  const byte *p = (const byte *)P;
+  size_t i;
+
+  /* init2 xors IV with input parameter block */
+
+  /* IV XOR ParamBlock */
+  for (i = 0; i < 8; ++i)
+    S->h[i] ^= blake2s_IV[i] ^ buf_get_le32(&p[i * 4]);
+}
+
+static inline gcry_err_code_t blake2s_init(BLAKE2S_CONTEXT *ctx,
+                                          const byte *key, size_t keylen)
+{
+  struct blake2s_param_s P[1] = { { 0, } };
+  BLAKE2S_STATE *S = &ctx->state;
+
+  if (!ctx->outlen || ctx->outlen > BLAKE2S_OUTBYTES)
+    return GPG_ERR_INV_ARG;
+  if (sizeof(P[0]) != sizeof(u32) * 8)
+    return GPG_ERR_INTERNAL;
+  if (keylen && (!key || keylen > BLAKE2S_KEYBYTES))
+    return GPG_ERR_INV_KEYLEN;
+
+  P->digest_length = ctx->outlen;
+  P->key_length = keylen;
+  P->fanout = 1;
+  P->depth = 1;
+
+  blake2s_init_param (S, P);
+  wipememory (P, sizeof(P));
+
+  if (key)
+    {
+      blake2s_write (ctx, key, keylen);
+      blake2s_write (ctx, zero_block, BLAKE2S_BLOCKBYTES - keylen);
+    }
+
+  return 0;
+}
+
+static gcry_err_code_t blake2s_init_ctx(void *ctx, unsigned int flags,
+                                       const byte *key, size_t keylen,
+                                       unsigned int dbits)
+{
+  BLAKE2S_CONTEXT *c = ctx;
+  unsigned int features = _gcry_get_hw_features ();
+
+  (void)features;
+  (void)flags;
+
+  memset (c, 0, sizeof (*c));
+
+#ifdef USE_AVX
+  c->use_avx = !!(features & HWF_INTEL_AVX);
+#endif
+
+  c->outlen = dbits / 8;
+  c->buflen = 0;
+  return blake2s_init(c, key, keylen);
+}
+
+/* Selftests from "RFC 7693, Appendix E. BLAKE2b and BLAKE2s Self-Test
+ * Module C Source". */
+static void selftest_seq(byte *out, size_t len, u32 seed)
+{
+  size_t i;
+  u32 t, a, b;
+
+  a = 0xDEAD4BAD * seed;
+  b = 1;
+
+  for (i = 0; i < len; i++)
+    {
+      t = a + b;
+      a = b;
+      b = t;
+      out[i] = (t >> 24) & 0xFF;
+    }
+}
+
+static gpg_err_code_t
+selftests_blake2b (int algo, int extended, selftest_report_func_t report)
+{
+  static const byte blake2b_res[32] =
+  {
+    0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD,
+    0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56,
+    0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73,
+    0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75
+  };
+  static const size_t b2b_md_len[4] = { 20, 32, 48, 64 };
+  static const size_t b2b_in_len[6] = { 0, 3, 128, 129, 255, 1024 };
+  size_t i, j, outlen, inlen;
+  byte in[1024], key[64];
+  BLAKE2B_CONTEXT ctx;
+  BLAKE2B_CONTEXT ctx2;
+  const char *what;
+  const char *errtxt;
+
+  (void)extended;
+
+  what = "rfc7693 BLAKE2b selftest";
+
+  /* 256-bit hash for testing */
+  if (blake2b_init_ctx(&ctx, 0, NULL, 0, 32 * 8))
+    {
+      errtxt = "init failed";
+      goto failed;
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      outlen = b2b_md_len[i];
+      for (j = 0; j < 6; j++)
+       {
+         inlen = b2b_in_len[j];
+
+         selftest_seq(in, inlen, inlen); /* unkeyed hash */
+         blake2b_init_ctx(&ctx2, 0, NULL, 0, outlen * 8);
+         blake2b_write(&ctx2, in, inlen);
+         blake2b_final(&ctx2);
+         blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+
+         selftest_seq(key, outlen, outlen); /* keyed hash */
+         blake2b_init_ctx(&ctx2, 0, key, outlen, outlen * 8);
+         blake2b_write(&ctx2, in, inlen);
+         blake2b_final(&ctx2);
+         blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+       }
+    }
+
+  /* compute and compare the hash of hashes */
+  blake2b_final(&ctx);
+  for (i = 0; i < 32; i++)
+    {
+      if (ctx.buf[i] != blake2b_res[i])
+       {
+         errtxt = "digest mismatch";
+         goto failed;
+       }
+    }
+
+  return 0;
+
+failed:
+  if (report)
+    report ("digest", algo, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+selftests_blake2s (int algo, int extended, selftest_report_func_t report)
+{
+  static const byte blake2s_res[32] =
+  {
+    0x6A, 0x41, 0x1F, 0x08, 0xCE, 0x25, 0xAD, 0xCD,
+    0xFB, 0x02, 0xAB, 0xA6, 0x41, 0x45, 0x1C, 0xEC,
+    0x53, 0xC5, 0x98, 0xB2, 0x4F, 0x4F, 0xC7, 0x87,
+    0xFB, 0xDC, 0x88, 0x79, 0x7F, 0x4C, 0x1D, 0xFE
+  };
+  static const size_t b2s_md_len[4] = { 16, 20, 28, 32 };
+  static const size_t b2s_in_len[6] = { 0, 3, 64, 65, 255, 1024 };
+  size_t i, j, outlen, inlen;
+  byte in[1024], key[32];
+  BLAKE2S_CONTEXT ctx;
+  BLAKE2S_CONTEXT ctx2;
+  const char *what;
+  const char *errtxt;
+
+  (void)extended;
+
+  what = "rfc7693 BLAKE2s selftest";
+
+  /* 256-bit hash for testing */
+  if (blake2s_init_ctx(&ctx, 0, NULL, 0, 32 * 8))
+    {
+      errtxt = "init failed";
+      goto failed;
+    }
+
+  for (i = 0; i < 4; i++)
+    {
+      outlen = b2s_md_len[i];
+      for (j = 0; j < 6; j++)
+       {
+         inlen = b2s_in_len[j];
+
+         selftest_seq(in, inlen, inlen); /* unkeyed hash */
+         blake2s_init_ctx(&ctx2, 0, NULL, 0, outlen * 8);
+         blake2s_write(&ctx2, in, inlen);
+         blake2s_final(&ctx2);
+         blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+
+         selftest_seq(key, outlen, outlen); /* keyed hash */
+         blake2s_init_ctx(&ctx2, 0, key, outlen, outlen * 8);
+         blake2s_write(&ctx2, in, inlen);
+         blake2s_final(&ctx2);
+         blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */
+       }
+    }
+
+  /* compute and compare the hash of hashes */
+  blake2s_final(&ctx);
+  for (i = 0; i < 32; i++)
+    {
+      if (ctx.buf[i] != blake2s_res[i])
+       {
+         errtxt = "digest mismatch";
+         goto failed;
+       }
+    }
+
+  return 0;
+
+failed:
+  if (report)
+    report ("digest", algo, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags,
+                                          const unsigned char *key,
+                                          size_t keylen, int algo)
+{
+  gcry_err_code_t rc;
+  switch (algo)
+    {
+    case GCRY_MD_BLAKE2B_512:
+      rc = blake2b_init_ctx (ctx, flags, key, keylen, 512);
+      break;
+    case GCRY_MD_BLAKE2B_384:
+      rc = blake2b_init_ctx (ctx, flags, key, keylen, 384);
+      break;
+    case GCRY_MD_BLAKE2B_256:
+      rc = blake2b_init_ctx (ctx, flags, key, keylen, 256);
+      break;
+    case GCRY_MD_BLAKE2B_160:
+      rc = blake2b_init_ctx (ctx, flags, key, keylen, 160);
+      break;
+    case GCRY_MD_BLAKE2S_256:
+      rc = blake2s_init_ctx (ctx, flags, key, keylen, 256);
+      break;
+    case GCRY_MD_BLAKE2S_224:
+      rc = blake2s_init_ctx (ctx, flags, key, keylen, 224);
+      break;
+    case GCRY_MD_BLAKE2S_160:
+      rc = blake2s_init_ctx (ctx, flags, key, keylen, 160);
+      break;
+    case GCRY_MD_BLAKE2S_128:
+      rc = blake2s_init_ctx (ctx, flags, key, keylen, 128);
+      break;
+    default:
+      rc = GPG_ERR_DIGEST_ALGO;
+      break;
+    }
+
+  return rc;
+}
+
+
+#define DEFINE_BLAKE2_VARIANT(bs, BS, dbits, oid_branch) \
+  static void blake2##bs##_##dbits##_init(void *ctx, unsigned int flags) \
+  { \
+    int err = blake2##bs##_init_ctx (ctx, flags, NULL, 0, dbits); \
+    gcry_assert (err == 0); \
+  } \
+  static void \
+  _gcry_blake2##bs##_##dbits##_hash_buffers(void *outbuf, size_t nbytes, \
+        const gcry_buffer_t *iov, int iovcnt) \
+  { \
+    BLAKE2##BS##_CONTEXT hd; \
+    (void)nbytes; \
+    blake2##bs##_##dbits##_init (&hd, 0); \
+    for (;iovcnt > 0; iov++, iovcnt--) \
+      blake2##bs##_write (&hd, (const char*)iov[0].data + iov[0].off, \
+                          iov[0].len); \
+    blake2##bs##_final (&hd); \
+    memcpy (outbuf, blake2##bs##_read (&hd), dbits / 8); \
+  } \
+  static const byte blake2##bs##_##dbits##_asn[] = { 0x30 }; \
+  static const gcry_md_oid_spec_t oid_spec_blake2##bs##_##dbits[] = \
+    { \
+      { " 1.3.6.1.4.1.1722.12.2." oid_branch }, \
+      { NULL } \
+    }; \
+  const gcry_md_spec_t _gcry_digest_spec_blake2##bs##_##dbits = \
+    { \
+      GCRY_MD_BLAKE2##BS##_##dbits, {0, 0}, \
+      "BLAKE2" #BS "_" #dbits, blake2##bs##_##dbits##_asn, \
+      DIM (blake2##bs##_##dbits##_asn), oid_spec_blake2##bs##_##dbits, \
+      dbits / 8, blake2##bs##_##dbits##_init, blake2##bs##_write, \
+      blake2##bs##_final, blake2##bs##_read, NULL, \
+      _gcry_blake2##bs##_##dbits##_hash_buffers, \
+      sizeof (BLAKE2##BS##_CONTEXT), selftests_blake2##bs \
+    };
+
+DEFINE_BLAKE2_VARIANT(b, B, 512, "1.16")
+DEFINE_BLAKE2_VARIANT(b, B, 384, "1.12")
+DEFINE_BLAKE2_VARIANT(b, B, 256, "1.8")
+DEFINE_BLAKE2_VARIANT(b, B, 160, "1.5")
+
+DEFINE_BLAKE2_VARIANT(s, S, 256, "2.8")
+DEFINE_BLAKE2_VARIANT(s, S, 224, "2.7")
+DEFINE_BLAKE2_VARIANT(s, S, 160, "2.5")
+DEFINE_BLAKE2_VARIANT(s, S, 128, "2.4")
diff --git a/grub-core/lib/libgcrypt/cipher/blake2b-amd64-avx2.S 
b/grub-core/lib/libgcrypt/cipher/blake2b-amd64-avx2.S
new file mode 100644
index 000000000..3601b65f3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/blake2b-amd64-avx2.S
@@ -0,0 +1,300 @@
+/* blake2b-amd64-avx2.S  -  AVX2 implementation of BLAKE2b
+ *
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 BLAKE2 reference implementation
+ * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/sse
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+   (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* register macros */
+#define RSTATE  %rdi
+#define RINBLKS %rsi
+#define RNBLKS  %rdx
+#define RIV     %rcx
+
+/* state structure */
+#define STATE_H 0
+#define STATE_T (STATE_H + 8 * 8)
+#define STATE_F (STATE_T + 2 * 8)
+
+/* vector registers */
+#define ROW1  %ymm0
+#define ROW2  %ymm1
+#define ROW3  %ymm2
+#define ROW4  %ymm3
+#define TMP1  %ymm4
+#define TMP1x %xmm4
+#define R16   %ymm5
+#define R24   %ymm6
+
+#define MA1   %ymm8
+#define MA2   %ymm9
+#define MA3   %ymm10
+#define MA4   %ymm11
+#define MA1x  %xmm8
+#define MA2x  %xmm9
+#define MA3x  %xmm10
+#define MA4x  %xmm11
+
+#define MB1   %ymm12
+#define MB2   %ymm13
+#define MB3   %ymm14
+#define MB4   %ymm15
+#define MB1x  %xmm12
+#define MB2x  %xmm13
+#define MB3x  %xmm14
+#define MB4x  %xmm15
+
+/**********************************************************************
+  blake2b/AVX2
+ **********************************************************************/
+
+#define GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                   s0, s1, s2, s3, s4, s5, s6, s7, s8, \
+                   s9, s10, s11, s12, s13, s14, s15) \
+        vmovq (s0)*8(RINBLKS), m1x; \
+        vmovq (s4)*8(RINBLKS), TMP1x; \
+        vpinsrq $1, (s2)*8(RINBLKS), m1x, m1x; \
+        vpinsrq $1, (s6)*8(RINBLKS), TMP1x, TMP1x; \
+        vinserti128 $1, TMP1x, m1, m1; \
+          vmovq (s1)*8(RINBLKS), m2x; \
+          vmovq (s5)*8(RINBLKS), TMP1x; \
+          vpinsrq $1, (s3)*8(RINBLKS), m2x, m2x; \
+          vpinsrq $1, (s7)*8(RINBLKS), TMP1x, TMP1x; \
+          vinserti128 $1, TMP1x, m2, m2; \
+            vmovq (s8)*8(RINBLKS), m3x; \
+            vmovq (s12)*8(RINBLKS), TMP1x; \
+            vpinsrq $1, (s10)*8(RINBLKS), m3x, m3x; \
+            vpinsrq $1, (s14)*8(RINBLKS), TMP1x, TMP1x; \
+            vinserti128 $1, TMP1x, m3, m3; \
+              vmovq (s9)*8(RINBLKS), m4x; \
+              vmovq (s13)*8(RINBLKS), TMP1x; \
+              vpinsrq $1, (s11)*8(RINBLKS), m4x, m4x; \
+              vpinsrq $1, (s15)*8(RINBLKS), TMP1x, TMP1x; \
+              vinserti128 $1, TMP1x, m4, m4;
+
+#define LOAD_MSG_0(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
15)
+#define LOAD_MSG_1(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                   14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5, 
 3)
+#define LOAD_MSG_2(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                   11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9, 
 4)
+#define LOAD_MSG_3(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                    7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15, 
 8)
+#define LOAD_MSG_4(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                    9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 
13)
+#define LOAD_MSG_5(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                    2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1, 
 9)
+#define LOAD_MSG_6(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                   12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 
11)
+#define LOAD_MSG_7(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                   13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 
10)
+#define LOAD_MSG_8(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                    6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10, 
 5)
+#define LOAD_MSG_9(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \
+                   10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 
, 0)
+#define LOAD_MSG_10(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        LOAD_MSG_0(m1, m2, m3, m4, m1x, m2x, m3x, m4x)
+#define LOAD_MSG_11(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \
+        LOAD_MSG_1(m1, m2, m3, m4, m1x, m2x, m3x, m4x)
+
+#define LOAD_MSG(r, m1, m2, m3, m4) \
+        LOAD_MSG_##r(m1, m2, m3, m4, m1##x, m2##x, m3##x, m4##x)
+
+#define ROR_32(in, out) vpshufd $0xb1, in, out;
+
+#define ROR_24(in, out) vpshufb R24, in, out;
+
+#define ROR_16(in, out) vpshufb R16, in, out;
+
+#define ROR_63(in, out) \
+        vpsrlq $63, in, TMP1; \
+        vpaddq in, in, out; \
+        vpxor  TMP1, out, out;
+
+#define G(r1, r2, r3, r4, m, ROR_A, ROR_B) \
+        vpaddq m, r1, r1; \
+        vpaddq r2, r1, r1; \
+        vpxor r1, r4, r4; \
+        ROR_A(r4, r4); \
+        vpaddq r4, r3, r3; \
+        vpxor r3, r2, r2; \
+        ROR_B(r2, r2);
+
+#define G1(r1, r2, r3, r4, m) \
+        G(r1, r2, r3, r4, m, ROR_32, ROR_24);
+
+#define G2(r1, r2, r3, r4, m) \
+        G(r1, r2, r3, r4, m, ROR_16, ROR_63);
+
+#define MM_SHUFFLE(z,y,x,w) \
+        (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
+
+#define DIAGONALIZE(r1, r2, r3, r4) \
+        vpermq $MM_SHUFFLE(0,3,2,1), r2, r2; \
+        vpermq $MM_SHUFFLE(1,0,3,2), r3, r3; \
+        vpermq $MM_SHUFFLE(2,1,0,3), r4, r4;
+
+#define UNDIAGONALIZE(r1, r2, r3, r4) \
+        vpermq $MM_SHUFFLE(2,1,0,3), r2, r2; \
+        vpermq $MM_SHUFFLE(1,0,3,2), r3, r3; \
+        vpermq $MM_SHUFFLE(0,3,2,1), r4, r4;
+
+#define ROUND(r, m1, m2, m3, m4) \
+        G1(ROW1, ROW2, ROW3, ROW4, m1); \
+        G2(ROW1, ROW2, ROW3, ROW4, m2); \
+        DIAGONALIZE(ROW1, ROW2, ROW3, ROW4); \
+        G1(ROW1, ROW2, ROW3, ROW4, m3); \
+        G2(ROW1, ROW2, ROW3, ROW4, m4); \
+        UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4);
+
+blake2b_data:
+.align 32
+.Liv:
+        .quad 0x6a09e667f3bcc908, 0xbb67ae8584caa73b
+        .quad 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1
+        .quad 0x510e527fade682d1, 0x9b05688c2b3e6c1f
+        .quad 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
+.Lshuf_ror16:
+        .byte 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9
+.Lshuf_ror24:
+        .byte 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10
+
+.align 64
+.globl _gcry_blake2b_transform_amd64_avx2
+ELF(.type _gcry_blake2b_transform_amd64_avx2,@function;)
+
+_gcry_blake2b_transform_amd64_avx2:
+        /* input:
+         *     %rdi: state
+         *     %rsi: blks
+         *     %rdx: num_blks
+         */
+        CFI_STARTPROC();
+
+        vzeroupper;
+
+        addq $128, (STATE_T + 0)(RSTATE);
+        adcq $0, (STATE_T + 8)(RSTATE);
+
+        vbroadcasti128 .Lshuf_ror16 rRIP, R16;
+        vbroadcasti128 .Lshuf_ror24 rRIP, R24;
+
+        vmovdqa .Liv+(0 * 8) rRIP, ROW3;
+        vmovdqa .Liv+(4 * 8) rRIP, ROW4;
+
+        vmovdqu (STATE_H + 0 * 8)(RSTATE), ROW1;
+        vmovdqu (STATE_H + 4 * 8)(RSTATE), ROW2;
+
+        vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+        LOAD_MSG(0, MA1, MA2, MA3, MA4);
+        LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+.Loop:
+        ROUND(0, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(2, MA1, MA2, MA3, MA4);
+        ROUND(1, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(3, MB1, MB2, MB3, MB4);
+        ROUND(2, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(4, MA1, MA2, MA3, MA4);
+        ROUND(3, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(5, MB1, MB2, MB3, MB4);
+        ROUND(4, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(6, MA1, MA2, MA3, MA4);
+        ROUND(5, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(7, MB1, MB2, MB3, MB4);
+        ROUND(6, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(8, MA1, MA2, MA3, MA4);
+        ROUND(7, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(9, MB1, MB2, MB3, MB4);
+        ROUND(8, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(10, MA1, MA2, MA3, MA4);
+        ROUND(9, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(11, MB1, MB2, MB3, MB4);
+        sub $1, RNBLKS;
+        jz .Loop_end;
+
+        lea 128(RINBLKS), RINBLKS;
+        addq $128, (STATE_T + 0)(RSTATE);
+        adcq $0, (STATE_T + 8)(RSTATE);
+
+        ROUND(10, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(0, MA1, MA2, MA3, MA4);
+        ROUND(11, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+        vpxor ROW3, ROW1, ROW1;
+        vpxor ROW4, ROW2, ROW2;
+
+        vmovdqa .Liv+(0 * 8) rRIP, ROW3;
+        vmovdqa .Liv+(4 * 8) rRIP, ROW4;
+
+        vpxor (STATE_H + 0 * 8)(RSTATE), ROW1, ROW1;
+        vpxor (STATE_H + 4 * 8)(RSTATE), ROW2, ROW2;
+
+        vmovdqu ROW1, (STATE_H + 0 * 8)(RSTATE);
+        vmovdqu ROW2, (STATE_H + 4 * 8)(RSTATE);
+
+        vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+        jmp .Loop;
+
+.Loop_end:
+        ROUND(10, MA1, MA2, MA3, MA4);
+        ROUND(11, MB1, MB2, MB3, MB4);
+
+        vpxor ROW3, ROW1, ROW1;
+        vpxor ROW4, ROW2, ROW2;
+        vpxor (STATE_H + 0 * 8)(RSTATE), ROW1, ROW1;
+        vpxor (STATE_H + 4 * 8)(RSTATE), ROW2, ROW2;
+
+        vmovdqu ROW1, (STATE_H + 0 * 8)(RSTATE);
+        vmovdqu ROW2, (STATE_H + 4 * 8)(RSTATE);
+
+        xor %eax, %eax;
+        vzeroall;
+        ret_spec_stop;
+        CFI_ENDPROC();
+ELF(.size _gcry_blake2b_transform_amd64_avx2,
+    .-_gcry_blake2b_transform_amd64_avx2;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/blake2s-amd64-avx.S 
b/grub-core/lib/libgcrypt/cipher/blake2s-amd64-avx.S
new file mode 100644
index 000000000..5094b4c1d
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/blake2s-amd64-avx.S
@@ -0,0 +1,278 @@
+/* blake2s-amd64-avx.S  -  AVX implementation of BLAKE2s
+ *
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 BLAKE2 reference implementation
+ * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/sse
+ * Copyright 2012, Samuel Neves <sneves@dei.uc.pt>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_AVX) && \
+   (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* register macros */
+#define RSTATE  %rdi
+#define RINBLKS %rsi
+#define RNBLKS  %rdx
+#define RIV     %rcx
+
+/* state structure */
+#define STATE_H 0
+#define STATE_T (STATE_H + 8 * 4)
+#define STATE_F (STATE_T + 2 * 4)
+
+/* vector registers */
+#define ROW1  %xmm0
+#define ROW2  %xmm1
+#define ROW3  %xmm2
+#define ROW4  %xmm3
+#define TMP1  %xmm4
+#define TMP1x %xmm4
+#define R16   %xmm5
+#define R8    %xmm6
+
+#define MA1   %xmm8
+#define MA2   %xmm9
+#define MA3   %xmm10
+#define MA4   %xmm11
+
+#define MB1   %xmm12
+#define MB2   %xmm13
+#define MB3   %xmm14
+#define MB4   %xmm15
+
+/**********************************************************************
+  blake2s/AVX
+ **********************************************************************/
+
+#define GATHER_MSG(m1, m2, m3, m4, \
+                   s0, s1, s2, s3, s4, s5, s6, s7, s8, \
+                   s9, s10, s11, s12, s13, s14, s15) \
+        vmovd (s0)*4(RINBLKS), m1; \
+          vmovd (s1)*4(RINBLKS), m2; \
+            vmovd (s8)*4(RINBLKS), m3; \
+              vmovd (s9)*4(RINBLKS), m4; \
+        vpinsrd $1, (s2)*4(RINBLKS), m1, m1; \
+          vpinsrd $1, (s3)*4(RINBLKS), m2, m2; \
+            vpinsrd $1, (s10)*4(RINBLKS), m3, m3; \
+              vpinsrd $1, (s11)*4(RINBLKS), m4, m4; \
+        vpinsrd $2, (s4)*4(RINBLKS), m1, m1; \
+          vpinsrd $2, (s5)*4(RINBLKS), m2, m2; \
+            vpinsrd $2, (s12)*4(RINBLKS), m3, m3; \
+              vpinsrd $2, (s13)*4(RINBLKS), m4, m4; \
+        vpinsrd $3, (s6)*4(RINBLKS), m1, m1; \
+          vpinsrd $3, (s7)*4(RINBLKS), m2, m2; \
+            vpinsrd $3, (s14)*4(RINBLKS), m3, m3; \
+              vpinsrd $3, (s15)*4(RINBLKS), m4, m4;
+
+#define LOAD_MSG_0(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
15)
+#define LOAD_MSG_1(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                   14, 10,  4,  8,  9, 15, 13,  6,  1, 12,  0,  2, 11,  7,  5, 
 3)
+#define LOAD_MSG_2(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                   11,  8, 12,  0,  5,  2, 15, 13, 10, 14,  3,  6,  7,  1,  9, 
 4)
+#define LOAD_MSG_3(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                    7,  9,  3,  1, 13, 12, 11, 14,  2,  6,  5, 10,  4,  0, 15, 
 8)
+#define LOAD_MSG_4(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                    9,  0,  5,  7,  2,  4, 10, 15, 14,  1, 11, 12,  6,  8,  3, 
13)
+#define LOAD_MSG_5(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                    2, 12,  6, 10,  0, 11,  8,  3,  4, 13,  7,  5, 15, 14,  1, 
 9)
+#define LOAD_MSG_6(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                   12,  5,  1, 15, 14, 13,  4, 10,  0,  7,  6,  3,  9,  2,  8, 
11)
+#define LOAD_MSG_7(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                   13, 11,  7, 14, 12,  1,  3,  9,  5,  0, 15,  4,  8,  6,  2, 
10)
+#define LOAD_MSG_8(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                    6, 15, 14,  9, 11,  3,  0,  8, 12,  2, 13,  7,  1,  4, 10, 
 5)
+#define LOAD_MSG_9(m1, m2, m3, m4) \
+        GATHER_MSG(m1, m2, m3, m4, \
+                   10,  2,  8,  4,  7,  6,  1,  5, 15, 11,  9, 14,  3, 12, 13 
, 0)
+
+#define LOAD_MSG(r, m1, m2, m3, m4) LOAD_MSG_##r(m1, m2, m3, m4)
+
+#define ROR_16(in, out) vpshufb R16, in, out;
+
+#define ROR_8(in, out)  vpshufb R8, in, out;
+
+#define ROR_12(in, out) \
+        vpsrld $12, in, TMP1; \
+        vpslld $(32 - 12), in, out; \
+        vpxor TMP1, out, out;
+
+#define ROR_7(in, out) \
+        vpsrld $7, in, TMP1; \
+        vpslld $(32 - 7), in, out; \
+        vpxor TMP1, out, out;
+
+#define G(r1, r2, r3, r4, m, ROR_A, ROR_B) \
+        vpaddd m, r1, r1; \
+        vpaddd r2, r1, r1; \
+        vpxor r1, r4, r4; \
+        ROR_A(r4, r4); \
+        vpaddd r4, r3, r3; \
+        vpxor r3, r2, r2; \
+        ROR_B(r2, r2);
+
+#define G1(r1, r2, r3, r4, m) \
+        G(r1, r2, r3, r4, m, ROR_16, ROR_12);
+
+#define G2(r1, r2, r3, r4, m) \
+        G(r1, r2, r3, r4, m, ROR_8, ROR_7);
+
+#define MM_SHUFFLE(z,y,x,w) \
+        (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
+
+#define DIAGONALIZE(r1, r2, r3, r4) \
+        vpshufd $MM_SHUFFLE(0,3,2,1), r2, r2; \
+        vpshufd $MM_SHUFFLE(1,0,3,2), r3, r3; \
+        vpshufd $MM_SHUFFLE(2,1,0,3), r4, r4;
+
+#define UNDIAGONALIZE(r1, r2, r3, r4) \
+        vpshufd $MM_SHUFFLE(2,1,0,3), r2, r2; \
+        vpshufd $MM_SHUFFLE(1,0,3,2), r3, r3; \
+        vpshufd $MM_SHUFFLE(0,3,2,1), r4, r4;
+
+#define ROUND(r, m1, m2, m3, m4) \
+        G1(ROW1, ROW2, ROW3, ROW4, m1); \
+        G2(ROW1, ROW2, ROW3, ROW4, m2); \
+        DIAGONALIZE(ROW1, ROW2, ROW3, ROW4); \
+        G1(ROW1, ROW2, ROW3, ROW4, m3); \
+        G2(ROW1, ROW2, ROW3, ROW4, m4); \
+        UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4);
+
+blake2s_data:
+.align 16
+.Liv:
+        .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A
+        .long 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19
+.Lshuf_ror16:
+        .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.Lshuf_ror8:
+        .byte 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12
+
+.align 64
+.globl _gcry_blake2s_transform_amd64_avx
+ELF(.type _gcry_blake2s_transform_amd64_avx,@function;)
+
+_gcry_blake2s_transform_amd64_avx:
+        /* input:
+         *     %rdi: state
+         *     %rsi: blks
+         *     %rdx: num_blks
+         */
+        CFI_STARTPROC();
+
+        vzeroupper;
+
+        addq $64, (STATE_T + 0)(RSTATE);
+
+        vmovdqa .Lshuf_ror16 rRIP, R16;
+        vmovdqa .Lshuf_ror8 rRIP, R8;
+
+        vmovdqa .Liv+(0 * 4) rRIP, ROW3;
+        vmovdqa .Liv+(4 * 4) rRIP, ROW4;
+
+        vmovdqu (STATE_H + 0 * 4)(RSTATE), ROW1;
+        vmovdqu (STATE_H + 4 * 4)(RSTATE), ROW2;
+
+        vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+        LOAD_MSG(0, MA1, MA2, MA3, MA4);
+        LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+.Loop:
+        ROUND(0, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(2, MA1, MA2, MA3, MA4);
+        ROUND(1, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(3, MB1, MB2, MB3, MB4);
+        ROUND(2, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(4, MA1, MA2, MA3, MA4);
+        ROUND(3, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(5, MB1, MB2, MB3, MB4);
+        ROUND(4, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(6, MA1, MA2, MA3, MA4);
+        ROUND(5, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(7, MB1, MB2, MB3, MB4);
+        ROUND(6, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(8, MA1, MA2, MA3, MA4);
+        ROUND(7, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(9, MB1, MB2, MB3, MB4);
+        sub $1, RNBLKS;
+        jz .Loop_end;
+
+        lea 64(RINBLKS), RINBLKS;
+        addq $64, (STATE_T + 0)(RSTATE);
+
+        ROUND(8, MA1, MA2, MA3, MA4);
+                                      LOAD_MSG(0, MA1, MA2, MA3, MA4);
+        ROUND(9, MB1, MB2, MB3, MB4);
+                                      LOAD_MSG(1, MB1, MB2, MB3, MB4);
+
+        vpxor ROW3, ROW1, ROW1;
+        vpxor ROW4, ROW2, ROW2;
+
+        vmovdqa .Liv+(0 * 4) rRIP, ROW3;
+        vmovdqa .Liv+(4 * 4) rRIP, ROW4;
+
+        vpxor (STATE_H + 0 * 4)(RSTATE), ROW1, ROW1;
+        vpxor (STATE_H + 4 * 4)(RSTATE), ROW2, ROW2;
+
+        vmovdqu ROW1, (STATE_H + 0 * 4)(RSTATE);
+        vmovdqu ROW2, (STATE_H + 4 * 4)(RSTATE);
+
+        vpxor (STATE_T)(RSTATE), ROW4, ROW4;
+
+        jmp .Loop;
+
+.Loop_end:
+        ROUND(8, MA1, MA2, MA3, MA4);
+        ROUND(9, MB1, MB2, MB3, MB4);
+
+        vpxor ROW3, ROW1, ROW1;
+        vpxor ROW4, ROW2, ROW2;
+        vpxor (STATE_H + 0 * 4)(RSTATE), ROW1, ROW1;
+        vpxor (STATE_H + 4 * 4)(RSTATE), ROW2, ROW2;
+
+        vmovdqu ROW1, (STATE_H + 0 * 4)(RSTATE);
+        vmovdqu ROW2, (STATE_H + 4 * 4)(RSTATE);
+
+        xor %eax, %eax;
+        vzeroall;
+        ret_spec_stop;
+        CFI_ENDPROC();
+ELF(.size _gcry_blake2s_transform_amd64_avx,
+    .-_gcry_blake2s_transform_amd64_avx;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/blowfish-amd64.S 
b/grub-core/lib/libgcrypt/cipher/blowfish-amd64.S
new file mode 100644
index 000000000..2b4ffa1a3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/blowfish-amd64.S
@@ -0,0 +1,601 @@
+/* blowfish-amd64.S  -  AMD64 assembly implementation of Blowfish cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(USE_BLOWFISH) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* structure of BLOWFISH_context: */
+#define s0     0
+#define s1     ((s0) + 256 * 4)
+#define s2     ((s1) + 256 * 4)
+#define s3     ((s2) + 256 * 4)
+#define p      ((s3) + 256 * 4)
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rcx
+#define RX3 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %ecx
+#define RX3d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %cl
+#define RX3bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %ch
+#define RX3bh %dh
+
+#define RT0 %rbp
+#define RT1 %rsi
+#define RT2 %r8
+#define RT3 %r9
+
+#define RT0d %ebp
+#define RT1d %esi
+#define RT2d %r8d
+#define RT3d %r9d
+
+#define RKEY %r10
+
+/***********************************************************************
+ * 1-way blowfish
+ ***********************************************************************/
+#define F() \
+       movzbl RX0bh,           RT1d; \
+       movzbl RX0bl,           RT3d; \
+       rorq $16,               RX0; \
+       movzbl RX0bh,           RT0d; \
+       movzbl RX0bl,           RT2d; \
+       rorq $16,               RX0; \
+       movl s0(CTX,RT0,4),     RT0d; \
+       addl s1(CTX,RT2,4),     RT0d; \
+       xorl s2(CTX,RT1,4),     RT0d; \
+       addl s3(CTX,RT3,4),     RT0d; \
+       xorq RT0,               RX0;
+
+#define load_roundkey_enc(n) \
+       movq p+4*(n)(CTX),      RX3;
+
+#define add_roundkey_enc() \
+       xorq RX3,               RX0;
+
+#define round_enc(n) \
+       add_roundkey_enc(); \
+       load_roundkey_enc(n); \
+       \
+       F(); \
+       F();
+
+#define load_roundkey_dec(n) \
+       movq p+4*(n-1)(CTX),    RX3; \
+       rorq $32,               RX3;
+
+#define add_roundkey_dec() \
+       xorq RX3,               RX0;
+
+#define round_dec(n) \
+       add_roundkey_dec(); \
+       load_roundkey_dec(n); \
+       \
+       F(); \
+       F();
+
+#define read_block() \
+       movq (RIO),             RX0; \
+       rorq $32,               RX0; \
+       bswapq                  RX0;
+
+#define write_block() \
+       bswapq                  RX0; \
+       movq RX0,               (RIO);
+
+.align 8
+ELF(.type   __blowfish_enc_blk1,@function;)
+
+__blowfish_enc_blk1:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RX0: input plaintext block
+        * output:
+        *      RX0: output plaintext block
+        */
+       CFI_STARTPROC();
+       movq %rbp, %r11;
+       CFI_REGISTER(%rbp, %r11);
+
+       load_roundkey_enc(0);
+       round_enc(2);
+       round_enc(4);
+       round_enc(6);
+       round_enc(8);
+       round_enc(10);
+       round_enc(12);
+       round_enc(14);
+       round_enc(16);
+       add_roundkey_enc();
+
+       movq %r11, %rbp;
+       CFI_RESTORE(%rbp)
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;)
+
+.align 8
+.globl  _gcry_blowfish_amd64_do_encrypt
+ELF(.type   _gcry_blowfish_amd64_do_encrypt,@function;)
+
+_gcry_blowfish_amd64_do_encrypt:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: u32 *ret_xl
+        *      %rdx: u32 *ret_xr
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       movl (%rdx), RX0d;
+       shlq $32, RX0;
+       movl (%rsi), RT3d;
+       movq %rdx, %r10;
+       orq RT3, RX0;
+       movq %rsi, RX2;
+
+       call __blowfish_enc_blk1;
+
+       movl RX0d, (%r10);
+       shrq $32, RX0;
+       movl RX0d, (RX2);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;)
+
+.align 8
+.globl  _gcry_blowfish_amd64_encrypt_block
+ELF(.type   _gcry_blowfish_amd64_encrypt_block,@function;)
+
+_gcry_blowfish_amd64_encrypt_block:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       movq %rsi, %r10;
+
+       movq %rdx, RIO;
+       read_block();
+
+       call __blowfish_enc_blk1;
+
+       movq %r10, RIO;
+       write_block();
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size 
_gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;)
+
+.align 8
+.globl  _gcry_blowfish_amd64_decrypt_block
+ELF(.type   _gcry_blowfish_amd64_decrypt_block,@function;)
+
+_gcry_blowfish_amd64_decrypt_block:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       movq %rbp, %r11;
+       CFI_REGISTER(%rbp, %r11);
+
+       movq %rsi, %r10;
+       movq %rdx, RIO;
+
+       read_block();
+
+       load_roundkey_dec(17);
+       round_dec(15);
+       round_dec(13);
+       round_dec(11);
+       round_dec(9);
+       round_dec(7);
+       round_dec(5);
+       round_dec(3);
+       round_dec(1);
+       add_roundkey_dec();
+
+       movq %r10, RIO;
+       write_block();
+
+       movq %r11, %rbp;
+       CFI_RESTORE(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size 
_gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;)
+
+/**********************************************************************
+  4-way blowfish, four blocks parallel
+ **********************************************************************/
+#define F4(x) \
+       movzbl x ## bh,         RT1d; \
+       movzbl x ## bl,         RT3d; \
+       rorq $16,               x; \
+       movzbl x ## bh,         RT0d; \
+       movzbl x ## bl,         RT2d; \
+       rorq $16,               x; \
+       movl s0(CTX,RT0,4),     RT0d; \
+       addl s1(CTX,RT2,4),     RT0d; \
+       xorl s2(CTX,RT1,4),     RT0d; \
+       addl s3(CTX,RT3,4),     RT0d; \
+       xorq RT0,               x;
+
+#define add_preloaded_roundkey4() \
+       xorq RKEY,              RX0; \
+       xorq RKEY,              RX1; \
+       xorq RKEY,              RX2; \
+       xorq RKEY,              RX3;
+
+#define preload_roundkey_enc(n) \
+       movq p+4*(n)(CTX),      RKEY;
+
+#define add_roundkey_enc4(n) \
+       add_preloaded_roundkey4(); \
+       preload_roundkey_enc(n + 2);
+
+#define round_enc4(n) \
+       add_roundkey_enc4(n); \
+       \
+       F4(RX0); \
+       F4(RX1); \
+       F4(RX2); \
+       F4(RX3); \
+       \
+       F4(RX0); \
+       F4(RX1); \
+       F4(RX2); \
+       F4(RX3);
+
+#define preload_roundkey_dec(n) \
+       movq p+4*((n)-1)(CTX),  RKEY; \
+       rorq $32,               RKEY;
+
+#define add_roundkey_dec4(n) \
+       add_preloaded_roundkey4(); \
+       preload_roundkey_dec(n - 2);
+
+#define round_dec4(n) \
+       add_roundkey_dec4(n); \
+       \
+       F4(RX0); \
+       F4(RX1); \
+       F4(RX2); \
+       F4(RX3); \
+       \
+       F4(RX0); \
+       F4(RX1); \
+       F4(RX2); \
+       F4(RX3);
+
+#define inbswap_block4() \
+       rorq $32,               RX0; \
+       bswapq                  RX0; \
+       rorq $32,               RX1; \
+       bswapq                  RX1; \
+       rorq $32,               RX2; \
+       bswapq                  RX2; \
+       rorq $32,               RX3; \
+       bswapq                  RX3;
+
+#define inctrswap_block4() \
+       rorq $32,               RX0; \
+       rorq $32,               RX1; \
+       rorq $32,               RX2; \
+       rorq $32,               RX3;
+
+#define outbswap_block4() \
+       bswapq                  RX0; \
+       bswapq                  RX1; \
+       bswapq                  RX2; \
+       bswapq                  RX3;
+
+.align 8
+ELF(.type   __blowfish_enc_blk4,@function;)
+
+__blowfish_enc_blk4:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RX0,RX1,RX2,RX3: four input inbswapped plaintext blocks
+        * output:
+        *      RX0,RX1,RX2,RX3: four output ciphertext blocks
+        */
+       CFI_STARTPROC();
+       preload_roundkey_enc(0);
+
+       round_enc4(0);
+       round_enc4(2);
+       round_enc4(4);
+       round_enc4(6);
+       round_enc4(8);
+       round_enc4(10);
+       round_enc4(12);
+       round_enc4(14);
+       add_preloaded_roundkey4();
+
+       outbswap_block4();
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;)
+
+.align 8
+ELF(.type   __blowfish_dec_blk4,@function;)
+
+__blowfish_dec_blk4:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RX0,RX1,RX2,RX3: four input ciphertext blocks
+        * output:
+        *      RX0,RX1,RX2,RX3: four output plaintext blocks
+        */
+       CFI_STARTPROC();
+       preload_roundkey_dec(17);
+
+       inbswap_block4();
+
+       round_dec4(17);
+       round_dec4(15);
+       round_dec4(13);
+       round_dec4(11);
+       round_dec4(9);
+       round_dec4(7);
+       round_dec4(5);
+       round_dec4(3);
+       add_preloaded_roundkey4();
+
+       outbswap_block4();
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;)
+
+.align 8
+.globl  _gcry_blowfish_amd64_ctr_enc
+ELF(.type   _gcry_blowfish_amd64_ctr_enc,@function;)
+_gcry_blowfish_amd64_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (4 blocks)
+        *      %rdx: src (4 blocks)
+        *      %rcx: iv (big endian, 64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+
+       /* %r11-%r13 are not used by __blowfish_enc_blk4 */
+       movq %rcx, %r13; /*iv*/
+       movq %rdx, %r12; /*src*/
+       movq %rsi, %r11; /*dst*/
+
+       /* load IV and byteswap */
+       movq (%r13), RT0;
+       bswapq RT0;
+       movq RT0, RX0;
+
+       /* construct IVs */
+       leaq 1(RT0), RX1;
+       leaq 2(RT0), RX2;
+       leaq 3(RT0), RX3;
+       leaq 4(RT0), RT0;
+       bswapq RT0;
+
+       inctrswap_block4();
+
+       /* store new IV */
+       movq RT0, (%r13);
+
+       call __blowfish_enc_blk4;
+
+       /* XOR key-stream with plaintext */
+       xorq 0 * 8(%r12), RX0;
+       xorq 1 * 8(%r12), RX1;
+       xorq 2 * 8(%r12), RX2;
+       xorq 3 * 8(%r12), RX3;
+       movq RX0, 0 * 8(%r11);
+       movq RX1, 1 * 8(%r11);
+       movq RX2, 2 * 8(%r11);
+       movq RX3, 3 * 8(%r11);
+
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;)
+
+.align 8
+.globl  _gcry_blowfish_amd64_cbc_dec
+ELF(.type   _gcry_blowfish_amd64_cbc_dec,@function;)
+_gcry_blowfish_amd64_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (4 blocks)
+        *      %rdx: src (4 blocks)
+        *      %rcx: iv (64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+
+       /* %r11-%r13 are not used by __blowfish_dec_blk4 */
+       movq %rsi, %r11; /*dst*/
+       movq %rdx, %r12; /*src*/
+       movq %rcx, %r13; /*iv*/
+
+       /* load input */
+       movq 0 * 8(%r12), RX0;
+       movq 1 * 8(%r12), RX1;
+       movq 2 * 8(%r12), RX2;
+       movq 3 * 8(%r12), RX3;
+
+       call __blowfish_dec_blk4;
+
+       movq 3 * 8(%r12), RT0;
+       xorq      (%r13), RX0;
+       xorq 0 * 8(%r12), RX1;
+       xorq 1 * 8(%r12), RX2;
+       xorq 2 * 8(%r12), RX3;
+       movq RT0, (%r13); /* store new IV */
+
+       movq RX0, 0 * 8(%r11);
+       movq RX1, 1 * 8(%r11);
+       movq RX2, 2 * 8(%r11);
+       movq RX3, 3 * 8(%r11);
+
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;)
+
+.align 8
+.globl  _gcry_blowfish_amd64_cfb_dec
+ELF(.type   _gcry_blowfish_amd64_cfb_dec,@function;)
+_gcry_blowfish_amd64_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (4 blocks)
+        *      %rdx: src (4 blocks)
+        *      %rcx: iv (64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+
+       /* %r11-%r13 are not used by __blowfish_enc_blk4 */
+       movq %rcx, %r13; /*iv*/
+       movq %rdx, %r12; /*src*/
+       movq %rsi, %r11; /*dst*/
+
+       /* Load input */
+       movq (%r13), RX0;
+       movq 0 * 8(%r12), RX1;
+       movq 1 * 8(%r12), RX2;
+       movq 2 * 8(%r12), RX3;
+
+       inbswap_block4();
+
+       /* Update IV */
+       movq 3 * 8(%r12), RT0;
+       movq RT0, (%r13);
+
+       call __blowfish_enc_blk4;
+
+       xorq 0 * 8(%r12), RX0;
+       xorq 1 * 8(%r12), RX1;
+       xorq 2 * 8(%r12), RX2;
+       xorq 3 * 8(%r12), RX3;
+       movq RX0, 0 * 8(%r11);
+       movq RX1, 1 * 8(%r11);
+       movq RX2, 2 * 8(%r11);
+       movq RX3, 3 * 8(%r11);
+
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;)
+
+#endif /*defined(USE_BLOWFISH)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/blowfish-arm.S 
b/grub-core/lib/libgcrypt/cipher/blowfish-arm.S
new file mode 100644
index 000000000..b30aa31f1
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/blowfish-arm.S
@@ -0,0 +1,743 @@
+/* blowfish-arm.S  -  ARM assembly implementation of Blowfish cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* structure of crypto context */
+#define s0     0
+#define s1     (s0 + (1 * 256) * 4)
+#define s2     (s0 + (2 * 256) * 4)
+#define s3     (s0 + (3 * 256) * 4)
+#define p      (s3 + (1 * 256) * 4)
+
+/* register macros */
+#define CTXs0 %r0
+#define CTXs1 %r9
+#define CTXs2 %r8
+#define CTXs3 %r10
+#define RMASK %lr
+#define RKEYL %r2
+#define RKEYR %ip
+
+#define RL0 %r3
+#define RR0 %r4
+
+#define RL1 %r9
+#define RR1 %r10
+
+#define RT0 %r11
+#define RT1 %r7
+#define RT2 %r5
+#define RT3 %r6
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+       ldrb rout, [rsrc, #((offs) + 0)]; \
+       ldrb rtmp, [rsrc, #((offs) + 1)]; \
+       orr rout, rout, rtmp, lsl #8; \
+       ldrb rtmp, [rsrc, #((offs) + 2)]; \
+       orr rout, rout, rtmp, lsl #16; \
+       ldrb rtmp, [rsrc, #((offs) + 3)]; \
+       orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+       mov rtmp0, rin, lsr #8; \
+       strb rin, [rdst, #((offs) + 0)]; \
+       mov rtmp1, rin, lsr #16; \
+       strb rtmp0, [rdst, #((offs) + 1)]; \
+       mov rtmp0, rin, lsr #24; \
+       strb rtmp1, [rdst, #((offs) + 2)]; \
+       strb rtmp0, [rdst, #((offs) + 3)];
+
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+       ldrb rout, [rsrc, #((offs) + 3)]; \
+       ldrb rtmp, [rsrc, #((offs) + 2)]; \
+       orr rout, rout, rtmp, lsl #8; \
+       ldrb rtmp, [rsrc, #((offs) + 1)]; \
+       orr rout, rout, rtmp, lsl #16; \
+       ldrb rtmp, [rsrc, #((offs) + 0)]; \
+       orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \
+       mov rtmp0, rin, lsr #8; \
+       strb rin, [rdst, #((offs) + 3)]; \
+       mov rtmp1, rin, lsr #16; \
+       strb rtmp0, [rdst, #((offs) + 2)]; \
+       mov rtmp0, rin, lsr #24; \
+       strb rtmp1, [rdst, #((offs) + 1)]; \
+       strb rtmp0, [rdst, #((offs) + 0)];
+
+#ifdef __ARMEL__
+       #define ldr_unaligned_host ldr_unaligned_le
+       #define str_unaligned_host str_unaligned_le
+
+       /* bswap on little-endian */
+#ifdef HAVE_ARM_ARCH_V6
+       #define host_to_be(reg, rtmp) \
+               rev reg, reg;
+       #define be_to_host(reg, rtmp) \
+               rev reg, reg;
+#else
+       #define host_to_be(reg, rtmp) \
+               eor     rtmp, reg, reg, ror #16; \
+               mov     rtmp, rtmp, lsr #8; \
+               bic     rtmp, rtmp, #65280; \
+               eor     reg, rtmp, reg, ror #8;
+       #define be_to_host(reg, rtmp) \
+               eor     rtmp, reg, reg, ror #16; \
+               mov     rtmp, rtmp, lsr #8; \
+               bic     rtmp, rtmp, #65280; \
+               eor     reg, rtmp, reg, ror #8;
+#endif
+#else
+       #define ldr_unaligned_host ldr_unaligned_be
+       #define str_unaligned_host str_unaligned_be
+
+       /* nop on big-endian */
+       #define host_to_be(reg, rtmp) /*_*/
+       #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define host_to_host(x, y) /*_*/
+
+/***********************************************************************
+ * 1-way blowfish
+ ***********************************************************************/
+#define F(l, r) \
+       and RT0, RMASK, l, lsr#(24 - 2); \
+       and RT1, RMASK, l, lsr#(16 - 2); \
+       ldr RT0, [CTXs0, RT0]; \
+       and RT2, RMASK, l, lsr#(8 - 2); \
+       ldr RT1, [CTXs1, RT1]; \
+       and RT3, RMASK, l, lsl#2; \
+       ldr RT2, [CTXs2, RT2]; \
+       add RT0, RT1; \
+       ldr RT3, [CTXs3, RT3]; \
+       eor RT0, RT2; \
+       add RT0, RT3; \
+       eor r, RT0;
+
+#define load_roundkey_enc(n) \
+       ldr RKEYL, [CTXs2, #((p - s2) + (4 * (n) + 0))]; \
+       ldr RKEYR, [CTXs2, #((p - s2) + (4 * (n) + 4))];
+
+#define add_roundkey_enc() \
+       eor RL0, RKEYL; \
+       eor RR0, RKEYR;
+
+#define round_enc(n) \
+       add_roundkey_enc(); \
+       load_roundkey_enc(n); \
+       \
+       F(RL0, RR0); \
+       F(RR0, RL0);
+
+#define load_roundkey_dec(n) \
+       ldr RKEYL, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 4))]; \
+       ldr RKEYR, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 0))];
+
+#define add_roundkey_dec() \
+       eor RL0, RKEYL; \
+       eor RR0, RKEYR;
+
+#define round_dec(n) \
+       add_roundkey_dec(); \
+       load_roundkey_dec(n); \
+       \
+       F(RL0, RR0); \
+       F(RR0, RL0);
+
+#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \
+       ldr l0, [rin, #((offs) + 0)]; \
+       ldr r0, [rin, #((offs) + 4)]; \
+       convert(l0, rtmp); \
+       convert(r0, rtmp);
+
+#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \
+       convert(l0, rtmp); \
+       convert(r0, rtmp); \
+       str l0, [rout, #((offs) + 0)]; \
+       str r0, [rout, #((offs) + 4)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+       /* unaligned word reads allowed */
+       #define read_block(rin, offs, l0, r0, rtmp0) \
+               read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0)
+
+       #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \
+               write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0)
+
+       #define read_block_host(rin, offs, l0, r0, rtmp0) \
+               read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0)
+
+       #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \
+               write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0)
+#else
+       /* need to handle unaligned reads by byte reads */
+       #define read_block(rin, offs, l0, r0, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \
+                       ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block_aligned(rin, offs, l0, r0, host_to_be, 
rtmp0); \
+               2:;
+
+       #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \
+                       str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       write_block_aligned(rout, offs, l0, r0, be_to_host, 
rtmp0); \
+               2:;
+
+       #define read_block_host(rin, offs, l0, r0, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \
+                       ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block_aligned(rin, offs, l0, r0, host_to_host, 
rtmp0); \
+               2:;
+
+       #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); 
\
+                       str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); 
\
+                       b 2f; \
+               1:;\
+                       write_block_aligned(rout, offs, l0, r0, host_to_host); \
+               2:;
+#endif
+
+.align 3
+.type  __blowfish_enc_blk1,%function;
+
+__blowfish_enc_blk1:
+       /* input:
+        *      preloaded: CTX
+        *      [RL0, RR0]: src
+        * output:
+        *      [RR0, RL0]: dst
+        */
+       push {%lr};
+
+       add CTXs1, CTXs0, #(s1 - s0);
+       add CTXs2, CTXs0, #(s2 - s0);
+       mov RMASK, #(0xff << 2); /* byte mask */
+       add CTXs3, CTXs1, #(s3 - s1);
+
+       load_roundkey_enc(0);
+       round_enc(2);
+       round_enc(4);
+       round_enc(6);
+       round_enc(8);
+       round_enc(10);
+       round_enc(12);
+       round_enc(14);
+       round_enc(16);
+       add_roundkey_enc();
+
+       pop {%pc};
+.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
+
+.align 8
+.globl  _gcry_blowfish_arm_do_encrypt
+.type   _gcry_blowfish_arm_do_encrypt,%function;
+
+_gcry_blowfish_arm_do_encrypt:
+       /* input:
+        *      %r0: ctx, CTX
+        *      %r1: u32 *ret_xl
+        *      %r2: u32 *ret_xr
+        */
+       push {%r2, %r4-%r11, %ip, %lr};
+
+       ldr RL0, [%r1];
+       ldr RR0, [%r2];
+
+       bl __blowfish_enc_blk1;
+
+       pop {%r2};
+       str RR0, [%r1];
+       str RL0, [%r2];
+
+       pop {%r4-%r11, %ip, %pc};
+.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt;
+
+.align 3
+.globl _gcry_blowfish_arm_encrypt_block
+.type   _gcry_blowfish_arm_encrypt_block,%function;
+
+_gcry_blowfish_arm_encrypt_block:
+       /* input:
+        *      %r0: ctx, CTX
+        *      %r1: dst
+        *      %r2: src
+        */
+       push {%r4-%r11, %ip, %lr};
+
+       read_block(%r2, 0, RL0, RR0, RT0);
+
+       bl __blowfish_enc_blk1;
+
+       write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+       pop {%r4-%r11, %ip, %pc};
+.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block;
+
+.align 3
+.globl _gcry_blowfish_arm_decrypt_block
+.type   _gcry_blowfish_arm_decrypt_block,%function;
+
+_gcry_blowfish_arm_decrypt_block:
+       /* input:
+        *      %r0: ctx, CTX
+        *      %r1: dst
+        *      %r2: src
+        */
+       push {%r4-%r11, %ip, %lr};
+
+       add CTXs1, CTXs0, #(s1 - s0);
+       add CTXs2, CTXs0, #(s2 - s0);
+       mov RMASK, #(0xff << 2); /* byte mask */
+       add CTXs3, CTXs1, #(s3 - s1);
+
+       read_block(%r2, 0, RL0, RR0, RT0);
+
+       load_roundkey_dec(17);
+       round_dec(15);
+       round_dec(13);
+       round_dec(11);
+       round_dec(9);
+       round_dec(7);
+       round_dec(5);
+       round_dec(3);
+       round_dec(1);
+       add_roundkey_dec();
+
+       write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+       pop {%r4-%r11, %ip, %pc};
+.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block;
+
+/***********************************************************************
+ * 2-way blowfish
+ ***********************************************************************/
+#define F2(n, l0, r0, l1, r1, set_nextk, dec) \
+       \
+       and RT0, RMASK, l0, lsr#(24 - 2); \
+       and RT1, RMASK, l0, lsr#(16 - 2); \
+       and RT2, RMASK, l0, lsr#(8 - 2); \
+       add RT1, #(s1 - s0); \
+       \
+       ldr RT0, [CTXs0, RT0]; \
+       and RT3, RMASK, l0, lsl#2; \
+       ldr RT1, [CTXs0, RT1]; \
+       add RT3, #(s3 - s2); \
+       ldr RT2, [CTXs2, RT2]; \
+       add RT0, RT1; \
+       ldr RT3, [CTXs2, RT3]; \
+       \
+       and RT1, RMASK, l1, lsr#(24 - 2); \
+       eor RT0, RT2; \
+       and RT2, RMASK, l1, lsr#(16 - 2); \
+       add RT0, RT3; \
+       add RT2, #(s1 - s0); \
+       and RT3, RMASK, l1, lsr#(8 - 2); \
+       eor r0, RT0; \
+       \
+       ldr RT1, [CTXs0, RT1]; \
+       and RT0, RMASK, l1, lsl#2; \
+       ldr RT2, [CTXs0, RT2]; \
+       add RT0, #(s3 - s2); \
+       ldr RT3, [CTXs2, RT3]; \
+       add RT1, RT2; \
+       ldr RT0, [CTXs2, RT0]; \
+       \
+       and RT2, RMASK, r0, lsr#(24 - 2); \
+       eor RT1, RT3; \
+       and RT3, RMASK, r0, lsr#(16 - 2); \
+       add RT1, RT0; \
+       add RT3, #(s1 - s0); \
+       and RT0, RMASK, r0, lsr#(8 - 2); \
+       eor r1, RT1; \
+       \
+       ldr RT2, [CTXs0, RT2]; \
+       and RT1, RMASK, r0, lsl#2; \
+       ldr RT3, [CTXs0, RT3]; \
+       add RT1, #(s3 - s2); \
+       ldr RT0, [CTXs2, RT0]; \
+       add RT2, RT3; \
+       ldr RT1, [CTXs2, RT1]; \
+       \
+       and RT3, RMASK, r1, lsr#(24 - 2); \
+       eor RT2, RT0; \
+       and RT0, RMASK, r1, lsr#(16 - 2); \
+       add RT2, RT1; \
+       add RT0, #(s1 - s0); \
+       and RT1, RMASK, r1, lsr#(8 - 2); \
+       eor l0, RT2; \
+       \
+       ldr RT3, [CTXs0, RT3]; \
+       and RT2, RMASK, r1, lsl#2; \
+       ldr RT0, [CTXs0, RT0]; \
+       add RT2, #(s3 - s2); \
+       ldr RT1, [CTXs2, RT1]; \
+       eor l1, RKEYL; \
+       ldr RT2, [CTXs2, RT2]; \
+       \
+       eor r0, RKEYR; \
+       add RT3, RT0; \
+       eor r1, RKEYR; \
+       eor RT3, RT1; \
+       eor l0, RKEYL; \
+       add RT3, RT2; \
+       set_nextk(RKEYL, (p - s2) + (4 * (n) + ((dec) * 4))); \
+       eor l1, RT3; \
+       set_nextk(RKEYR, (p - s2) + (4 * (n) + (!(dec) * 4)));
+
+#define load_n_add_roundkey_enc2(n) \
+       load_roundkey_enc(n); \
+       eor RL0, RKEYL; \
+       eor RR0, RKEYR; \
+       eor RL1, RKEYL; \
+       eor RR1, RKEYR; \
+       load_roundkey_enc((n) + 2);
+
+#define next_key(reg, offs) \
+       ldr reg, [CTXs2, #(offs)];
+
+#define dummy(x, y) /* do nothing */
+
+#define round_enc2(n, load_next_key) \
+       F2((n) + 2, RL0, RR0, RL1, RR1, load_next_key, 0);
+
+#define load_n_add_roundkey_dec2(n) \
+       load_roundkey_dec(n); \
+       eor RL0, RKEYL; \
+       eor RR0, RKEYR; \
+       eor RL1, RKEYL; \
+       eor RR1, RKEYR; \
+       load_roundkey_dec((n) - 2);
+
+#define round_dec2(n, load_next_key) \
+       F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1);
+
+#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \
+       ldr l0, [rin, #(0)]; \
+       ldr r0, [rin, #(4)]; \
+       convert(l0, rtmp); \
+       ldr l1, [rin, #(8)]; \
+       convert(r0, rtmp); \
+       ldr r1, [rin, #(12)]; \
+       convert(l1, rtmp); \
+       convert(r1, rtmp);
+
+#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \
+       convert(l0, rtmp); \
+       convert(r0, rtmp); \
+       convert(l1, rtmp); \
+       str l0, [rout, #(0)]; \
+       convert(r1, rtmp); \
+       str r0, [rout, #(4)]; \
+       str l1, [rout, #(8)]; \
+       str r1, [rout, #(12)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+       /* unaligned word reads allowed */
+       #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+               read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0)
+
+       #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0)
+
+       #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+               read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0)
+
+       #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0)
+#else
+       /* need to handle unaligned reads by byte reads */
+       #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_be(l0, rin, 0, rtmp0); \
+                       ldr_unaligned_be(r0, rin, 4, rtmp0); \
+                       ldr_unaligned_be(l1, rin, 8, rtmp0); \
+                       ldr_unaligned_be(r1, rin, 12, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, 
rtmp0); \
+               2:;
+
+       #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \
+                       str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \
+                       str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \
+                       str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, 
rtmp0); \
+               2:;
+
+       #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_host(l0, rin, 0, rtmp0); \
+                       ldr_unaligned_host(r0, rin, 4, rtmp0); \
+                       ldr_unaligned_host(l1, rin, 8, rtmp0); \
+                       ldr_unaligned_host(r1, rin, 12, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, 
rtmp0); \
+               2:;
+
+       #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \
+                       str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \
+                       str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \
+                       str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       write_block2_aligned(rout, l0, r0, l1, r1, 
host_to_host, rtmp0); \
+               2:;
+#endif
+
+.align 3
+.type  _gcry_blowfish_arm_enc_blk2,%function;
+
+_gcry_blowfish_arm_enc_blk2:
+       /* input:
+        *      preloaded: CTX
+        *      [RL0, RR0], [RL1, RR1]: src
+        * output:
+        *      [RR0, RL0], [RR1, RL1]: dst
+        */
+       push {RT0,%lr};
+
+       add CTXs2, CTXs0, #(s2 - s0);
+       mov RMASK, #(0xff << 2); /* byte mask */
+
+       load_n_add_roundkey_enc2(0);
+       round_enc2(2, next_key);
+       round_enc2(4, next_key);
+       round_enc2(6, next_key);
+       round_enc2(8, next_key);
+       round_enc2(10, next_key);
+       round_enc2(12, next_key);
+       round_enc2(14, next_key);
+       round_enc2(16, dummy);
+
+       host_to_be(RR0, RT0);
+       host_to_be(RL0, RT0);
+       host_to_be(RR1, RT0);
+       host_to_be(RL1, RT0);
+
+       pop {RT0,%pc};
+.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2;
+
+.align 3
+.globl _gcry_blowfish_arm_cfb_dec;
+.type  _gcry_blowfish_arm_cfb_dec,%function;
+
+_gcry_blowfish_arm_cfb_dec:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst (2 blocks)
+        *      %r2: src (2 blocks)
+        *      %r3: iv (64bit)
+        */
+       push {%r2, %r4-%r11, %ip, %lr};
+
+       mov %lr, %r3;
+
+       /* Load input (iv/%r3 is aligned, src/%r2 might not be) */
+       ldm %r3, {RL0, RR0};
+       host_to_be(RL0, RT0);
+       host_to_be(RR0, RT0);
+       read_block(%r2, 0, RL1, RR1, RT0);
+
+       /* Update IV, load src[1] and save to iv[0] */
+       read_block_host(%r2, 8, %r5, %r6, RT0);
+       stm %lr, {%r5, %r6};
+
+       bl _gcry_blowfish_arm_enc_blk2;
+       /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+       /* %r1: dst, %r0: %src */
+       pop {%r0};
+
+       /* dst = src ^ result */
+       read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr);
+       eor %r5, %r4;
+       eor %r6, %r3;
+       eor %r7, %r10;
+       eor %r8, %r9;
+       write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec;
+
+.align 3
+.globl _gcry_blowfish_arm_ctr_enc;
+.type  _gcry_blowfish_arm_ctr_enc,%function;
+
+_gcry_blowfish_arm_ctr_enc:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst (2 blocks)
+        *      %r2: src (2 blocks)
+        *      %r3: iv (64bit, big-endian)
+        */
+       push {%r2, %r4-%r11, %ip, %lr};
+
+       mov %lr, %r3;
+
+       /* Load IV (big => host endian) */
+       read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0);
+
+       /* Construct IVs */
+       adds RR1, RR0, #1; /* +1 */
+       adc RL1, RL0, #0;
+       adds %r6, RR1, #1; /* +2 */
+       adc %r5, RL1, #0;
+
+       /* Store new IV (host => big-endian) */
+       write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0);
+
+       bl _gcry_blowfish_arm_enc_blk2;
+       /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+       /* %r1: dst, %r0: %src */
+       pop {%r0};
+
+       /* XOR key-stream with plaintext */
+       read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr);
+       eor %r5, %r4;
+       eor %r6, %r3;
+       eor %r7, %r10;
+       eor %r8, %r9;
+       write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc;
+
+.align 3
+.type  _gcry_blowfish_arm_dec_blk2,%function;
+
+_gcry_blowfish_arm_dec_blk2:
+       /* input:
+        *      preloaded: CTX
+        *      [RL0, RR0], [RL1, RR1]: src
+        * output:
+        *      [RR0, RL0], [RR1, RL1]: dst
+        */
+       add CTXs2, CTXs0, #(s2 - s0);
+       mov RMASK, #(0xff << 2); /* byte mask */
+
+       load_n_add_roundkey_dec2(17);
+       round_dec2(15, next_key);
+       round_dec2(13, next_key);
+       round_dec2(11, next_key);
+       round_dec2(9, next_key);
+       round_dec2(7, next_key);
+       round_dec2(5, next_key);
+       round_dec2(3, next_key);
+       round_dec2(1, dummy);
+
+       host_to_be(RR0, RT0);
+       host_to_be(RL0, RT0);
+       host_to_be(RR1, RT0);
+       host_to_be(RL1, RT0);
+
+       b .Ldec_cbc_tail;
+.ltorg
+.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2;
+
+.align 3
+.globl _gcry_blowfish_arm_cbc_dec;
+.type  _gcry_blowfish_arm_cbc_dec,%function;
+
+_gcry_blowfish_arm_cbc_dec:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst (2 blocks)
+        *      %r2: src (2 blocks)
+        *      %r3: iv (64bit)
+        */
+       push {%r2-%r11, %ip, %lr};
+
+       read_block2(%r2, RL0, RR0, RL1, RR1, RT0);
+
+       /* dec_blk2 is only used by cbc_dec, jump directly in/out instead
+        * of function call. */
+       b _gcry_blowfish_arm_dec_blk2;
+.Ldec_cbc_tail:
+       /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+       /* %r0: %src, %r1: dst, %r2: iv */
+       pop {%r0, %r2};
+
+       /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */
+       read_block_host(%r0, 0, %r7, %r8, %r5);
+       /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */
+       ldm %r2, {%r5, %r6};
+
+       /* out[1] ^= IV+1 */
+       eor %r10, %r7;
+       eor %r9, %r8;
+       /* out[0] ^= IV */
+       eor %r4, %r5;
+       eor %r3, %r6;
+
+       /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */
+       read_block_host(%r0, 8, %r7, %r8, %r5);
+       /* store IV+2 to iv[0] (aligned). */
+       stm %r2, {%r7, %r8};
+
+       /* store result to dst[0-3]. Might be unaligned. */
+       write_block2_host(%r1, %r4, %r3, %r10, %r9, %r5, %r6);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec;
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARM_ARCH >= 6*/
diff --git a/grub-core/lib/libgcrypt/cipher/blowfish.c 
b/grub-core/lib/libgcrypt/cipher/blowfish.c
index b4d2b9c9a..7b001306c 100644
--- a/grub-core/lib/libgcrypt/cipher/blowfish.c
+++ b/grub-core/lib/libgcrypt/cipher/blowfish.c
@@ -36,21 +36,42 @@
 #include "types.h"
 #include "g10lib.h"
 #include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
 
 #define BLOWFISH_BLOCKSIZE 8
-#define BLOWFISH_ROUNDS 16
+#define BLOWFISH_KEY_MIN_BITS 8
+#define BLOWFISH_KEY_MAX_BITS 576
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+#  define USE_ARM_ASM 1
+# endif
+#endif
 
 typedef struct {
     u32 s0[256];
     u32 s1[256];
     u32 s2[256];
     u32 s3[256];
-    u32 p[BLOWFISH_ROUNDS+2];
+    u32 p[16+2];
 } BLOWFISH_context;
 
-static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen);
-static void encrypt_block (void *bc, byte *outbuf, const byte *inbuf);
-static void decrypt_block (void *bc, byte *outbuf, const byte *inbuf);
+static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen,
+                                  cipher_bulk_ops_t *bulk_ops);
+static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf);
+static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf);
 
 
 /* precomputed S boxes */
@@ -234,49 +255,157 @@ static const u32 ks3[256] = {
     0x01C36AE4,0xD6EBE1F9,0x90D4F869,0xA65CDEA0,0x3F09252D,0xC208E69F,
     0xB74E6132,0xCE77E25B,0x578FDFE3,0x3AC372E6 };
 
-static const u32 ps[BLOWFISH_ROUNDS+2] = {
+static const u32 ps[16+2] = {
     0x243F6A88,0x85A308D3,0x13198A2E,0x03707344,0xA4093822,0x299F31D0,
     0x082EFA98,0xEC4E6C89,0x452821E6,0x38D01377,0xBE5466CF,0x34E90C6C,
     0xC0AC29B7,0xC97C50DD,0x3F84D5B5,0xB5470917,0x9216D5D9,0x8979FB1B };
 
 
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementations of Blowfish. */
+extern void _gcry_blowfish_amd64_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
+                                           u32 *ret_xr);
+
+extern void _gcry_blowfish_amd64_encrypt_block(BLOWFISH_context *c, byte *out,
+                                              const byte *in);
+
+extern void _gcry_blowfish_amd64_decrypt_block(BLOWFISH_context *c, byte *out,
+                                              const byte *in);
+
+/* These assembly implementations process four blocks in parallel. */
+extern void _gcry_blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out,
+                                        const byte *in, byte *ctr);
+
+extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out,
+                                        const byte *in, byte *iv);
+
+extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out,
+                                        const byte *in, byte *iv);
+
+static void
+do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
+{
+  _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr);
+}
 
-#if BLOWFISH_ROUNDS != 16
-static inline u32
-function_F( BLOWFISH_context *bc, u32 x )
+static void
+do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
 {
-    u16 a, b, c, d;
+  _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf);
+}
 
-#ifdef WORDS_BIGENDIAN
-    a = ((byte*)&x)[0];
-    b = ((byte*)&x)[1];
-    c = ((byte*)&x)[2];
-    d = ((byte*)&x)[3];
-#else
-    a = ((byte*)&x)[3];
-    b = ((byte*)&x)[2];
-    c = ((byte*)&x)[1];
-    d = ((byte*)&x)[0];
-#endif
+static void
+do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf);
+}
 
-    return ((bc->s0[a] + bc->s1[b]) ^ bc->s2[c] ) + bc->s3[d];
+static inline void
+blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *ctr)
+{
+  _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr);
 }
-#endif
 
-#ifdef WORDS_BIGENDIAN
-#define F(x) ((( s0[((byte*)&x)[0]] + s1[((byte*)&x)[1]])       \
-                  ^ s2[((byte*)&x)[2]]) + s3[((byte*)&x)[3]] )
-#else
-#define F(x) ((( s0[((byte*)&x)[3]] + s1[((byte*)&x)[2]])       \
-                  ^ s2[((byte*)&x)[1]]) + s3[((byte*)&x)[0]] )
-#endif
-#define R(l,r,i)  do { l ^= p[i]; r ^= F(l); } while(0)
+static inline void
+blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *iv)
+{
+  _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv);
+}
+
+static inline void
+blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in,
+                       byte *iv)
+{
+  _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+  BLOWFISH_context *c = (BLOWFISH_context *) context;
+  do_encrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (2*8);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+  BLOWFISH_context *c = (BLOWFISH_context *) context;
+  do_decrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (2*8);
+}
+
+#elif defined(USE_ARM_ASM)
+
+/* Assembly implementations of Blowfish. */
+extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl,
+                                           u32 *ret_xr);
+
+extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out,
+                                              const byte *in);
+
+extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out,
+                                              const byte *in);
+
+/* These assembly implementations process two blocks in parallel. */
+extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out,
+                                        const byte *in, byte *ctr);
+
+extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out,
+                                        const byte *in, byte *iv);
+
+extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out,
+                                        const byte *in, byte *iv);
+
+static void
+do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
+{
+  _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr);
+}
+
+static void
+do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+  BLOWFISH_context *c = (BLOWFISH_context *) context;
+  do_encrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (10*4);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+  BLOWFISH_context *c = (BLOWFISH_context *) context;
+  do_decrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (10*4);
+}
+
+#else /*USE_ARM_ASM*/
+
+
+#define F(x) ((( s0[(x)>>24] + s1[((x)>>16)&0xff])      \
+                  ^ s2[((x)>>8)&0xff]) + s3[(x)&0xff] )
+#define R(l,r,i) do { l ^= p[i]; r ^= F(l); } while(0)
+#define R3(l,r,i) do { R(l##0,r##0,i);R(l##1,r##1,i);R(l##2,r##2,i);} while(0)
 
 
 static void
 do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
-#if BLOWFISH_ROUNDS == 16
   u32 xl, xr, *s0, *s1, *s2, *s3, *p;
 
   xl = *ret_xl;
@@ -287,16 +416,16 @@ do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 
*ret_xr )
   s2 = bc->s2;
   s3 = bc->s3;
 
-  R( xl, xr,   0);
-  R( xr, xl,   1);
-  R( xl, xr,   2);
-  R( xr, xl,   3);
-  R( xl, xr,   4);
-  R( xr, xl,   5);
-  R( xl, xr,   6);
-  R( xr, xl,   7);
-  R( xl, xr,   8);
-  R( xr, xl,   9);
+  R( xl, xr,  0);
+  R( xr, xl,  1);
+  R( xl, xr,  2);
+  R( xr, xl,  3);
+  R( xl, xr,  4);
+  R( xr, xl,  5);
+  R( xl, xr,  6);
+  R( xr, xl,  7);
+  R( xl, xr,  8);
+  R( xr, xl,  9);
   R( xl, xr, 10);
   R( xr, xl, 11);
   R( xl, xr, 12);
@@ -304,45 +433,67 @@ do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 
*ret_xr )
   R( xl, xr, 14);
   R( xr, xl, 15);
 
-  xl ^= p[BLOWFISH_ROUNDS];
-  xr ^= p[BLOWFISH_ROUNDS+1];
+  xl ^= p[16];
+  xr ^= p[16+1];
 
   *ret_xl = xr;
   *ret_xr = xl;
+}
 
-#else
-  u32 xl, xr, temp, *p;
-  int i;
 
-  xl = *ret_xl;
-  xr = *ret_xr;
+static void
+do_encrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src )
+{
+  u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p;
+
+  xl0 = buf_get_be32(src + 0);
+  xr0 = buf_get_be32(src + 4);
+  xl1 = buf_get_be32(src + 8);
+  xr1 = buf_get_be32(src + 12);
+  xl2 = buf_get_be32(src + 16);
+  xr2 = buf_get_be32(src + 20);
   p = bc->p;
+  s0 = bc->s0;
+  s1 = bc->s1;
+  s2 = bc->s2;
+  s3 = bc->s3;
 
-  for(i=0; i < BLOWFISH_ROUNDS; i++ )
-    {
-      xl ^= p[i];
-      xr ^= function_F(bc, xl);
-      temp = xl;
-      xl = xr;
-      xr = temp;
-    }
-  temp = xl;
-  xl = xr;
-  xr = temp;
-
-  xr ^= p[BLOWFISH_ROUNDS];
-  xl ^= p[BLOWFISH_ROUNDS+1];
-
-  *ret_xl = xl;
-  *ret_xr = xr;
-#endif
+  R3( xl, xr,  0);
+  R3( xr, xl,  1);
+  R3( xl, xr,  2);
+  R3( xr, xl,  3);
+  R3( xl, xr,  4);
+  R3( xr, xl,  5);
+  R3( xl, xr,  6);
+  R3( xr, xl,  7);
+  R3( xl, xr,  8);
+  R3( xr, xl,  9);
+  R3( xl, xr, 10);
+  R3( xr, xl, 11);
+  R3( xl, xr, 12);
+  R3( xr, xl, 13);
+  R3( xl, xr, 14);
+  R3( xr, xl, 15);
+
+  xl0 ^= p[16];
+  xr0 ^= p[16+1];
+  xl1 ^= p[16];
+  xr1 ^= p[16+1];
+  xl2 ^= p[16];
+  xr2 ^= p[16+1];
+
+  buf_put_be32(dst + 0, xr0);
+  buf_put_be32(dst + 4, xl0);
+  buf_put_be32(dst + 8, xr1);
+  buf_put_be32(dst + 12, xl1);
+  buf_put_be32(dst + 16, xr2);
+  buf_put_be32(dst + 20, xl2);
 }
 
 
 static void
 decrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
 {
-#if BLOWFISH_ROUNDS == 16
   u32 xl, xr, *s0, *s1, *s2, *s3, *p;
 
   xl = *ret_xl;
@@ -361,77 +512,94 @@ decrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr )
   R( xr, xl, 12);
   R( xl, xr, 11);
   R( xr, xl, 10);
-  R( xl, xr,   9);
-  R( xr, xl,   8);
-  R( xl, xr,   7);
-  R( xr, xl,   6);
-  R( xl, xr,   5);
-  R( xr, xl,   4);
-  R( xl, xr,   3);
-  R( xr, xl,   2);
+  R( xl, xr,  9);
+  R( xr, xl,  8);
+  R( xl, xr,  7);
+  R( xr, xl,  6);
+  R( xl, xr,  5);
+  R( xr, xl,  4);
+  R( xl, xr,  3);
+  R( xr, xl,  2);
 
   xl ^= p[1];
   xr ^= p[0];
 
   *ret_xl = xr;
   *ret_xr = xl;
+}
 
-#else
-  u32 xl, xr, temp, *p;
-  int i;
 
-  xl = *ret_xl;
-  xr = *ret_xr;
+static void
+do_decrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src )
+{
+  u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p;
+
+  xl0 = buf_get_be32(src + 0);
+  xr0 = buf_get_be32(src + 4);
+  xl1 = buf_get_be32(src + 8);
+  xr1 = buf_get_be32(src + 12);
+  xl2 = buf_get_be32(src + 16);
+  xr2 = buf_get_be32(src + 20);
   p = bc->p;
+  s0 = bc->s0;
+  s1 = bc->s1;
+  s2 = bc->s2;
+  s3 = bc->s3;
 
-  for (i=BLOWFISH_ROUNDS+1; i > 1; i-- )
-    {
-      xl ^= p[i];
-      xr ^= function_F(bc, xl);
-      temp = xl;
-      xl = xr;
-      xr = temp;
-    }
-
-  temp = xl;
-  xl = xr;
-  xr = temp;
-
-  xr ^= p[1];
-  xl ^= p[0];
-
-  *ret_xl = xl;
-  *ret_xr = xr;
-#endif
+  R3( xl, xr, 17);
+  R3( xr, xl, 16);
+  R3( xl, xr, 15);
+  R3( xr, xl, 14);
+  R3( xl, xr, 13);
+  R3( xr, xl, 12);
+  R3( xl, xr, 11);
+  R3( xr, xl, 10);
+  R3( xl, xr,  9);
+  R3( xr, xl,  8);
+  R3( xl, xr,  7);
+  R3( xr, xl,  6);
+  R3( xl, xr,  5);
+  R3( xr, xl,  4);
+  R3( xl, xr,  3);
+  R3( xr, xl,  2);
+
+  xl0 ^= p[1];
+  xr0 ^= p[0];
+  xl1 ^= p[1];
+  xr1 ^= p[0];
+  xl2 ^= p[1];
+  xr2 ^= p[0];
+
+  buf_put_be32(dst + 0, xr0);
+  buf_put_be32(dst + 4, xl0);
+  buf_put_be32(dst + 8, xr1);
+  buf_put_be32(dst + 12, xl1);
+  buf_put_be32(dst + 16, xr2);
+  buf_put_be32(dst + 20, xl2);
 }
 
 #undef F
 #undef R
+#undef R3
 
 static void
 do_encrypt_block ( BLOWFISH_context *bc, byte *outbuf, const byte *inbuf )
 {
   u32 d1, d2;
 
-  d1 = inbuf[0] << 24 | inbuf[1] << 16 | inbuf[2] << 8 | inbuf[3];
-  d2 = inbuf[4] << 24 | inbuf[5] << 16 | inbuf[6] << 8 | inbuf[7];
+  d1 = buf_get_be32(inbuf);
+  d2 = buf_get_be32(inbuf + 4);
   do_encrypt( bc, &d1, &d2 );
-  outbuf[0] = (d1 >> 24) & 0xff;
-  outbuf[1] = (d1 >> 16) & 0xff;
-  outbuf[2] = (d1 >>   8) & 0xff;
-  outbuf[3] =  d1         & 0xff;
-  outbuf[4] = (d2 >> 24) & 0xff;
-  outbuf[5] = (d2 >> 16) & 0xff;
-  outbuf[6] = (d2 >>   8) & 0xff;
-  outbuf[7] =  d2         & 0xff;
+  buf_put_be32(outbuf, d1);
+  buf_put_be32(outbuf + 4, d2);
 }
 
-static void
+static unsigned int
 encrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *bc = (BLOWFISH_context *) context;
   do_encrypt_block (bc, outbuf, inbuf);
-  _gcry_burn_stack (64);
+  return /*burn_stack*/ (64);
 }
 
 
@@ -440,25 +608,293 @@ do_decrypt_block (BLOWFISH_context *bc, byte *outbuf, 
const byte *inbuf)
 {
   u32 d1, d2;
 
-  d1 = inbuf[0] << 24 | inbuf[1] << 16 | inbuf[2] << 8 | inbuf[3];
-  d2 = inbuf[4] << 24 | inbuf[5] << 16 | inbuf[6] << 8 | inbuf[7];
+  d1 = buf_get_be32(inbuf);
+  d2 = buf_get_be32(inbuf + 4);
   decrypt( bc, &d1, &d2 );
-  outbuf[0] = (d1 >> 24) & 0xff;
-  outbuf[1] = (d1 >> 16) & 0xff;
-  outbuf[2] = (d1 >>   8) & 0xff;
-  outbuf[3] =  d1         & 0xff;
-  outbuf[4] = (d2 >> 24) & 0xff;
-  outbuf[5] = (d2 >> 16) & 0xff;
-  outbuf[6] = (d2 >>   8) & 0xff;
-  outbuf[7] =  d2         & 0xff;
+  buf_put_be32(outbuf, d1);
+  buf_put_be32(outbuf + 4, d2);
 }
 
-static void
+static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   BLOWFISH_context *bc = (BLOWFISH_context *) context;
   do_decrypt_block (bc, outbuf, inbuf);
-  _gcry_burn_stack (64);
+  return /*burn_stack*/ (64);
+}
+
+#endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/
+
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size BLOWFISH_BLOCKSIZE. */
+static void
+_gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+                      const void *inbuf_arg, size_t nblocks)
+{
+  BLOWFISH_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3];
+  int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 5 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+        nblocks -= 4;
+        outbuf += 4 * BLOWFISH_BLOCKSIZE;
+        inbuf  += 4 * BLOWFISH_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#elif defined(USE_ARM_ASM)
+  {
+    /* Process data in 2 block chunks. */
+    while (nblocks >= 2)
+      {
+        _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+        nblocks -= 2;
+        outbuf += 2 * BLOWFISH_BLOCKSIZE;
+        inbuf  += 2 * BLOWFISH_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+  for ( ;nblocks >= 3; nblocks -= 3)
+    {
+      /* Prepare the counter blocks. */
+      cipher_block_cpy (tmpbuf + 0, ctr, BLOWFISH_BLOCKSIZE);
+      cipher_block_cpy (tmpbuf + 8, ctr, BLOWFISH_BLOCKSIZE);
+      cipher_block_cpy (tmpbuf + 16, ctr, BLOWFISH_BLOCKSIZE);
+      cipher_block_add (tmpbuf + 8, 1, BLOWFISH_BLOCKSIZE);
+      cipher_block_add (tmpbuf + 16, 2, BLOWFISH_BLOCKSIZE);
+      cipher_block_add (ctr, 3, BLOWFISH_BLOCKSIZE);
+      /* Encrypt the counter. */
+      do_encrypt_3(ctx, tmpbuf, tmpbuf);
+      /* XOR the input with the encrypted counter and store in output.  */
+      buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE * 3);
+      outbuf += BLOWFISH_BLOCKSIZE * 3;
+      inbuf  += BLOWFISH_BLOCKSIZE * 3;
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* Encrypt the counter. */
+      do_encrypt_block(ctx, tmpbuf, ctr);
+      /* XOR the input with the encrypted counter and store in output.  */
+      cipher_block_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE);
+      outbuf += BLOWFISH_BLOCKSIZE;
+      inbuf  += BLOWFISH_BLOCKSIZE;
+      /* Increment the counter.  */
+      cipher_block_add (ctr, 1, BLOWFISH_BLOCKSIZE);
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                      const void *inbuf_arg, size_t nblocks)
+{
+  BLOWFISH_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char savebuf[BLOWFISH_BLOCKSIZE * 3];
+  int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 5 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 4;
+        outbuf += 4 * BLOWFISH_BLOCKSIZE;
+        inbuf  += 4 * BLOWFISH_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#elif defined(USE_ARM_ASM)
+  {
+    /* Process data in 2 block chunks. */
+    while (nblocks >= 2)
+      {
+        _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 2;
+        outbuf += 2 * BLOWFISH_BLOCKSIZE;
+        inbuf  += 2 * BLOWFISH_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+  for ( ;nblocks >= 3; nblocks -= 3)
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      do_decrypt_3 (ctx, savebuf, inbuf);
+
+      cipher_block_xor_1 (savebuf + 0, iv, BLOWFISH_BLOCKSIZE);
+      cipher_block_xor_1 (savebuf + 8, inbuf, BLOWFISH_BLOCKSIZE * 2);
+      cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE);
+      buf_cpy (outbuf, savebuf, BLOWFISH_BLOCKSIZE * 3);
+      inbuf += BLOWFISH_BLOCKSIZE * 3;
+      outbuf += BLOWFISH_BLOCKSIZE * 3;
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      do_decrypt_block (ctx, savebuf, inbuf);
+
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, 
BLOWFISH_BLOCKSIZE);
+      inbuf += BLOWFISH_BLOCKSIZE;
+      outbuf += BLOWFISH_BLOCKSIZE;
+    }
+
+  wipememory(savebuf, sizeof(savebuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                      const void *inbuf_arg, size_t nblocks)
+{
+  BLOWFISH_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3];
+  int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 5 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 4;
+        outbuf += 4 * BLOWFISH_BLOCKSIZE;
+        inbuf  += 4 * BLOWFISH_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#elif defined(USE_ARM_ASM)
+  {
+    /* Process data in 2 block chunks. */
+    while (nblocks >= 2)
+      {
+        _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 2;
+        outbuf += 2 * BLOWFISH_BLOCKSIZE;
+        inbuf  += 2 * BLOWFISH_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+  for ( ;nblocks >= 3; nblocks -= 3 )
+    {
+      cipher_block_cpy (tmpbuf + 0, iv, BLOWFISH_BLOCKSIZE);
+      cipher_block_cpy (tmpbuf + 8, inbuf + 0, BLOWFISH_BLOCKSIZE * 2);
+      cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE);
+      do_encrypt_3 (ctx, tmpbuf, tmpbuf);
+      buf_xor (outbuf, inbuf, tmpbuf, BLOWFISH_BLOCKSIZE * 3);
+      outbuf += BLOWFISH_BLOCKSIZE * 3;
+      inbuf  += BLOWFISH_BLOCKSIZE * 3;
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      do_encrypt_block(ctx, iv, iv);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE);
+      outbuf += BLOWFISH_BLOCKSIZE;
+      inbuf  += BLOWFISH_BLOCKSIZE;
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Run the self-tests for BLOWFISH-CTR, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+  const int nblocks = 4+1;
+  const int blocksize = BLOWFISH_BLOCKSIZE;
+  const int context_size = sizeof(BLOWFISH_context);
+
+  return _gcry_selftest_helper_ctr("BLOWFISH", &bf_setkey,
+           &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for BLOWFISH-CBC, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+  const int nblocks = 4+2;
+  const int blocksize = BLOWFISH_BLOCKSIZE;
+  const int context_size = sizeof(BLOWFISH_context);
+
+  return _gcry_selftest_helper_cbc("BLOWFISH", &bf_setkey,
+           &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for BLOWFISH-CFB, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+  const int nblocks = 4+2;
+  const int blocksize = BLOWFISH_BLOCKSIZE;
+  const int context_size = sizeof(BLOWFISH_context);
+
+  return _gcry_selftest_helper_cfb("BLOWFISH", &bf_setkey,
+           &encrypt_block, nblocks, blocksize, context_size);
 }
 
 
@@ -466,14 +902,20 @@ static const char*
 selftest(void)
 {
   BLOWFISH_context c;
+  cipher_bulk_ops_t bulk_ops;
   byte plain[] = "BLOWFISH";
   byte buffer[8];
-  byte plain3[] = { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 };
-  byte key3[] = { 0x41, 0x79, 0x6E, 0xA0, 0x52, 0x61, 0x6E, 0xE4 };
-  byte cipher3[] = { 0xE1, 0x13, 0xF4, 0x10, 0x2C, 0xFC, 0xCE, 0x43 };
+  static const byte plain3[] =
+    { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 };
+  static const byte key3[] =
+    { 0x41, 0x79, 0x6E, 0xA0, 0x52, 0x61, 0x6E, 0xE4 };
+  static const byte cipher3[] =
+    { 0xE1, 0x13, 0xF4, 0x10, 0x2C, 0xFC, 0xCE, 0x43 };
+  const char *r;
 
   bf_setkey( (void *) &c,
-             (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26 );
+             (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26,
+             &bulk_ops );
   encrypt_block( (void *) &c, buffer, plain );
   if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) )
     return "Blowfish selftest failed (1).";
@@ -481,22 +923,88 @@ selftest(void)
   if( memcmp( buffer, plain, 8 ) )
     return "Blowfish selftest failed (2).";
 
-  bf_setkey( (void *) &c, key3, 8 );
+  bf_setkey( (void *) &c, key3, 8, &bulk_ops );
   encrypt_block( (void *) &c, buffer, plain3 );
   if( memcmp( buffer, cipher3, 8 ) )
     return "Blowfish selftest failed (3).";
   decrypt_block( (void *) &c, buffer, buffer );
   if( memcmp( buffer, plain3, 8 ) )
     return "Blowfish selftest failed (4).";
+
+  if ( (r = selftest_cbc ()) )
+    return r;
+
+  if ( (r = selftest_cfb ()) )
+    return r;
+
+  if ( (r = selftest_ctr ()) )
+    return r;
+
   return NULL;
 }
 
 
+struct hashset_elem {
+  u32 val;
+  short nidx;
+  char used;
+};
+
+static inline byte
+val_to_hidx(u32 val)
+{
+  /* bf sboxes are quite random already. */
+  return (val >> 24) ^ (val >> 16)  ^ (val >> 8) ^ val;
+}
+
+static inline int
+add_val(struct hashset_elem hset[256], u32 val, int *midx,
+       struct hashset_elem *mpool)
+{
+  struct hashset_elem *elem;
+  byte hidx;
+
+  hidx = val_to_hidx(val);
+  elem = &hset[hidx];
+
+  /* Check if first is in use. */
+  if (elem->used == 0)
+    {
+      elem->val = val;
+      elem->nidx = -1;
+      elem->used = 1;
+      return 0;
+    }
+
+  /* Check if first matches. */
+  if (elem->val == val)
+    return 1;
+
+  for (; elem->nidx >= 0; elem = &mpool[elem->nidx])
+    {
+      /* Check if elem matches. */
+      if (elem->val == val)
+        return 1;
+    }
+
+  elem->nidx = (*midx)++;
+  elem = &mpool[elem->nidx];
+
+  elem->val = val;
+  elem->nidx = -1;
+  elem->used = 1;
+
+  return 0;
+}
 
 static gcry_err_code_t
 do_bf_setkey (BLOWFISH_context *c, const byte *key, unsigned keylen)
 {
-  int i, j;
+  struct hashset_elem mempool[4 * 255]; /* Enough entries for the worst case. 
*/
+  struct hashset_elem hset[4][256];
+  int memidx = 0;
+  int weak = 0;
+  int i, j, ret;
   u32 data, datal, datar;
   static int initialized;
   static const char *selftest_failed;
@@ -511,7 +1019,13 @@ do_bf_setkey (BLOWFISH_context *c, const byte *key, 
unsigned keylen)
   if( selftest_failed )
     return GPG_ERR_SELFTEST_FAILED;
 
-  for(i=0; i < BLOWFISH_ROUNDS+2; i++ )
+  if (keylen < BLOWFISH_KEY_MIN_BITS / 8 ||
+      keylen > BLOWFISH_KEY_MAX_BITS / 8)
+    return GPG_ERR_INV_KEYLEN;
+
+  memset(hset, 0, sizeof(hset));
+
+  for(i=0; i < 16+2; i++ )
     c->p[i] = ps[i];
   for(i=0; i < 256; i++ )
     {
@@ -521,25 +1035,18 @@ do_bf_setkey (BLOWFISH_context *c, const byte *key, 
unsigned keylen)
       c->s3[i] = ks3[i];
     }
 
-  for(i=j=0; i < BLOWFISH_ROUNDS+2; i++ )
+  for(i=j=0; i < 16+2; i++ )
     {
-#ifdef WORDS_BIGENDIAN
-      ((byte*)&data)[0] = key[j];
-      ((byte*)&data)[1] = key[(j+1)%keylen];
-      ((byte*)&data)[2] = key[(j+2)%keylen];
-      ((byte*)&data)[3] = key[(j+3)%keylen];
-#else
-      ((byte*)&data)[3] = key[j];
-      ((byte*)&data)[2] = key[(j+1)%keylen];
-      ((byte*)&data)[1] = key[(j+2)%keylen];
-      ((byte*)&data)[0] = key[(j+3)%keylen];
-#endif
+      data = ((u32)key[j] << 24) |
+             ((u32)key[(j+1)%keylen] << 16) |
+             ((u32)key[(j+2)%keylen] << 8) |
+             ((u32)key[(j+3)%keylen]);
       c->p[i] ^= data;
       j = (j+4) % keylen;
     }
 
   datal = datar = 0;
-  for(i=0; i < BLOWFISH_ROUNDS+2; i += 2 )
+  for(i=0; i < 16+2; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->p[i]   = datal;
@@ -550,55 +1057,85 @@ do_bf_setkey (BLOWFISH_context *c, const byte *key, 
unsigned keylen)
       do_encrypt( c, &datal, &datar );
       c->s0[i]   = datal;
       c->s0[i+1] = datar;
+
+      /* Add values to hashset, detect duplicates (weak keys). */
+      ret = add_val (hset[0], datal, &memidx, mempool);
+      weak = ret ? 1 : weak;
+      ret = add_val (hset[0], datar, &memidx, mempool);
+      weak = ret ? 1 : weak;
     }
   for(i=0; i < 256; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->s1[i]   = datal;
       c->s1[i+1] = datar;
+
+      /* Add values to hashset, detect duplicates (weak keys). */
+      ret = add_val (hset[1], datal, &memidx, mempool);
+      weak = ret ? 1 : weak;
+      ret = add_val (hset[1], datar, &memidx, mempool);
+      weak = ret ? 1 : weak;
     }
   for(i=0; i < 256; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->s2[i]   = datal;
       c->s2[i+1] = datar;
+
+      /* Add values to hashset, detect duplicates (weak keys). */
+      ret = add_val (hset[2], datal, &memidx, mempool);
+      weak = ret ? 1 : weak;
+      ret = add_val (hset[2], datar, &memidx, mempool);
+      weak = ret ? 1 : weak;
     }
   for(i=0; i < 256; i += 2 )
     {
       do_encrypt( c, &datal, &datar );
       c->s3[i]   = datal;
       c->s3[i+1] = datar;
+
+      /* Add values to hashset, detect duplicates (weak keys). */
+      ret = add_val (hset[3], datal, &memidx, mempool);
+      weak = ret ? 1 : weak;
+      ret = add_val (hset[3], datar, &memidx, mempool);
+      weak = ret ? 1 : weak;
     }
 
+  /* Clear stack. */
+  wipememory(hset, sizeof(hset));
+  wipememory(mempool, sizeof(mempool[0]) * memidx);
+
+  _gcry_burn_stack (64);
 
   /* Check for weak key.  A weak key is a key in which a value in
      the P-array (here c) occurs more than once per table.  */
-  for(i=0; i < 255; i++ )
-    {
-      for( j=i+1; j < 256; j++)
-        {
-          if( (c->s0[i] == c->s0[j]) || (c->s1[i] == c->s1[j]) ||
-              (c->s2[i] == c->s2[j]) || (c->s3[i] == c->s3[j]) )
-            return GPG_ERR_WEAK_KEY;
-        }
-    }
+  if (weak)
+    return GPG_ERR_WEAK_KEY;
 
   return GPG_ERR_NO_ERROR;
 }
 
 
 static gcry_err_code_t
-bf_setkey (void *context, const byte *key, unsigned keylen)
+bf_setkey (void *context, const byte *key, unsigned keylen,
+           cipher_bulk_ops_t *bulk_ops)
 {
   BLOWFISH_context *c = (BLOWFISH_context *) context;
   gcry_err_code_t rc = do_bf_setkey (c, key, keylen);
-  _gcry_burn_stack (64);
+
+  /* Setup bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cfb_dec = _gcry_blowfish_cfb_dec;
+  bulk_ops->cbc_dec = _gcry_blowfish_cbc_dec;
+  bulk_ops->ctr_enc = _gcry_blowfish_ctr_enc;
+
   return rc;
 }
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_blowfish =
   {
+    GCRY_CIPHER_BLOWFISH, {0, 0},
     "BLOWFISH", NULL, NULL, BLOWFISH_BLOCKSIZE, 128,
     sizeof (BLOWFISH_context),
     bf_setkey, encrypt_block, decrypt_block
diff --git a/grub-core/lib/libgcrypt/cipher/bufhelp.h 
b/grub-core/lib/libgcrypt/cipher/bufhelp.h
index df3559472..6dcc622a1 100644
--- a/grub-core/lib/libgcrypt/cipher/bufhelp.h
+++ b/grub-core/lib/libgcrypt/cipher/bufhelp.h
@@ -1,5 +1,5 @@
 /* bufhelp.h  -  Some buffer manipulation helpers
- * Copyright (C) 2012 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
+ * Copyright (C) 2012-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
  *
  * This file is part of Libgcrypt.
  *
@@ -20,269 +20,23 @@
 #define GCRYPT_BUFHELP_H
 
 
+#include "g10lib.h"
 #include "bithelp.h"
+#include "const-time.h"
 
 
-#undef BUFHELP_FAST_UNALIGNED_ACCESS
+#undef BUFHELP_UNALIGNED_ACCESS
 #if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \
     defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \
-    (defined(__i386__) || defined(__x86_64__) || \
-     (defined(__arm__) && defined(__ARM_FEATURE_UNALIGNED)) || \
-     defined(__aarch64__))
-/* These architectures are able of unaligned memory accesses and can
-   handle those fast.
+    defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS)
+/* Compiler is supports attributes needed for automatically issuing unaligned
+   memory access instructions.
  */
-# define BUFHELP_FAST_UNALIGNED_ACCESS 1
+# define BUFHELP_UNALIGNED_ACCESS 1
 #endif
 
 
-#ifdef BUFHELP_FAST_UNALIGNED_ACCESS
-/* Define type with one-byte alignment on architectures with fast unaligned
-   memory accesses.
- */
-typedef struct bufhelp_int_s
-{
-  uintptr_t a;
-} __attribute__((packed, aligned(1))) bufhelp_int_t;
-#else
-/* Define type with default alignment for other architectures (unaligned
-   accessed handled in per byte loops).
- */
-typedef struct bufhelp_int_s
-{
-  uintptr_t a;
-} bufhelp_int_t;
-#endif
-
-
-/* Optimized function for small buffer copying */
-static inline void
-buf_cpy(void *_dst, const void *_src, size_t len)
-{
-#if __GNUC__ >= 4 && (defined(__x86_64__) || defined(__i386__))
-  /* For AMD64 and i386, memcpy is faster.  */
-  memcpy(_dst, _src, len);
-#else
-  byte *dst = _dst;
-  const byte *src = _src;
-  bufhelp_int_t *ldst;
-  const bufhelp_int_t *lsrc;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (((uintptr_t)dst | (uintptr_t)src) & longmask)
-    goto do_bytes;
-#endif
-
-  ldst = (bufhelp_int_t *)(void *)dst;
-  lsrc = (const bufhelp_int_t *)(const void *)src;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst++)->a = (lsrc++)->a;
-
-  dst = (byte *)ldst;
-  src = (const byte *)lsrc;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst++ = *src++;
-#endif /*__GNUC__ >= 4 && (__x86_64__ || __i386__)*/
-}
-
-
-/* Optimized function for buffer xoring */
-static inline void
-buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
-{
-  byte *dst = _dst;
-  const byte *src1 = _src1;
-  const byte *src2 = _src2;
-  bufhelp_int_t *ldst;
-  const bufhelp_int_t *lsrc1, *lsrc2;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (((uintptr_t)dst | (uintptr_t)src1 | (uintptr_t)src2) & longmask)
-    goto do_bytes;
-#endif
-
-  ldst = (bufhelp_int_t *)(void *)dst;
-  lsrc1 = (const bufhelp_int_t *)(const void *)src1;
-  lsrc2 = (const bufhelp_int_t *)(const void *)src2;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst++)->a = (lsrc1++)->a ^ (lsrc2++)->a;
-
-  dst = (byte *)ldst;
-  src1 = (const byte *)lsrc1;
-  src2 = (const byte *)lsrc2;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst++ = *src1++ ^ *src2++;
-}
-
-
-/* Optimized function for in-place buffer xoring. */
-static inline void
-buf_xor_1(void *_dst, const void *_src, size_t len)
-{
-  byte *dst = _dst;
-  const byte *src = _src;
-  bufhelp_int_t *ldst;
-  const bufhelp_int_t *lsrc;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (((uintptr_t)dst | (uintptr_t)src) & longmask)
-    goto do_bytes;
-#endif
-
-  ldst = (bufhelp_int_t *)(void *)dst;
-  lsrc = (const bufhelp_int_t *)(const void *)src;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst++)->a ^= (lsrc++)->a;
-
-  dst = (byte *)ldst;
-  src = (const byte *)lsrc;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst++ ^= *src++;
-}
-
-
-/* Optimized function for buffer xoring with two destination buffers.  Used
-   mainly by CFB mode encryption.  */
-static inline void
-buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
-{
-  byte *dst1 = _dst1;
-  byte *dst2 = _dst2;
-  const byte *src = _src;
-  bufhelp_int_t *ldst1, *ldst2;
-  const bufhelp_int_t *lsrc;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (((uintptr_t)src | (uintptr_t)dst1 | (uintptr_t)dst2) & longmask)
-    goto do_bytes;
-#endif
-
-  ldst1 = (bufhelp_int_t *)(void *)dst1;
-  ldst2 = (bufhelp_int_t *)(void *)dst2;
-  lsrc = (const bufhelp_int_t *)(const void *)src;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    (ldst1++)->a = ((ldst2++)->a ^= (lsrc++)->a);
-
-  dst1 = (byte *)ldst1;
-  dst2 = (byte *)ldst2;
-  src = (const byte *)lsrc;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    *dst1++ = (*dst2++ ^= *src++);
-}
-
-
-/* Optimized function for combined buffer xoring and copying.  Used by mainly
-   CBC mode decryption.  */
-static inline void
-buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
-                const void *_src_cpy, size_t len)
-{
-  byte *dst_xor = _dst_xor;
-  byte *srcdst_cpy = _srcdst_cpy;
-  const byte *src_xor = _src_xor;
-  const byte *src_cpy = _src_cpy;
-  byte temp;
-  bufhelp_int_t *ldst_xor, *lsrcdst_cpy;
-  const bufhelp_int_t *lsrc_cpy, *lsrc_xor;
-  uintptr_t ltemp;
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-  const unsigned int longmask = sizeof(bufhelp_int_t) - 1;
-
-  /* Skip fast processing if buffers are unaligned.  */
-  if (((uintptr_t)src_cpy | (uintptr_t)src_xor | (uintptr_t)dst_xor |
-       (uintptr_t)srcdst_cpy) & longmask)
-    goto do_bytes;
-#endif
-
-  ldst_xor = (bufhelp_int_t *)(void *)dst_xor;
-  lsrc_xor = (const bufhelp_int_t *)(void *)src_xor;
-  lsrcdst_cpy = (bufhelp_int_t *)(void *)srcdst_cpy;
-  lsrc_cpy = (const bufhelp_int_t *)(const void *)src_cpy;
-
-  for (; len >= sizeof(bufhelp_int_t); len -= sizeof(bufhelp_int_t))
-    {
-      ltemp = (lsrc_cpy++)->a;
-      (ldst_xor++)->a = (lsrcdst_cpy)->a ^ (lsrc_xor++)->a;
-      (lsrcdst_cpy++)->a = ltemp;
-    }
-
-  dst_xor = (byte *)ldst_xor;
-  src_xor = (const byte *)lsrc_xor;
-  srcdst_cpy = (byte *)lsrcdst_cpy;
-  src_cpy = (const byte *)lsrc_cpy;
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
-do_bytes:
-#endif
-  /* Handle tail.  */
-  for (; len; len--)
-    {
-      temp = *src_cpy++;
-      *dst_xor++ = *srcdst_cpy ^ *src_xor++;
-      *srcdst_cpy++ = temp;
-    }
-}
-
-
-/* Optimized function for combined buffer xoring and copying.  Used by mainly
-   CFB mode decryption.  */
-static inline void
-buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len)
-{
-  buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len);
-}
-
-
-/* Constant-time compare of two buffers.  Returns 1 if buffers are equal,
-   and 0 if buffers differ.  */
-static inline int
-buf_eq_const(const void *_a, const void *_b, size_t len)
-{
-  const byte *a = _a;
-  const byte *b = _b;
-  size_t diff, i;
-
-  /* Constant-time compare. */
-  for (i = 0, diff = 0; i < len; i++)
-    diff -= !!(a[i] - b[i]);
-
-  return !diff;
-}
-
-
-#ifndef BUFHELP_FAST_UNALIGNED_ACCESS
+#ifndef BUFHELP_UNALIGNED_ACCESS
 
 /* Functions for loading and storing unaligned u32 values of different
    endianness.  */
@@ -365,12 +119,12 @@ static inline void buf_put_le64(void *_buf, u64 val)
   out[0] = val;
 }
 
-#else /*BUFHELP_FAST_UNALIGNED_ACCESS*/
+#else /*BUFHELP_UNALIGNED_ACCESS*/
 
 typedef struct bufhelp_u32_s
 {
   u32 a;
-} __attribute__((packed, aligned(1))) bufhelp_u32_t;
+} __attribute__((packed, aligned(1), may_alias)) bufhelp_u32_t;
 
 /* Functions for loading and storing unaligned u32 values of different
    endianness.  */
@@ -400,7 +154,7 @@ static inline void buf_put_le32(void *_buf, u32 val)
 typedef struct bufhelp_u64_s
 {
   u64 a;
-} __attribute__((packed, aligned(1))) bufhelp_u64_t;
+} __attribute__((packed, aligned(1), may_alias)) bufhelp_u64_t;
 
 /* Functions for loading and storing unaligned u64 values of different
    endianness.  */
@@ -426,7 +180,193 @@ static inline void buf_put_le64(void *_buf, u64 val)
   out->a = le_bswap64(val);
 }
 
+#endif /*BUFHELP_UNALIGNED_ACCESS*/
+
+
+/* Host-endian get/put macros */
+#ifdef WORDS_BIGENDIAN
+# define buf_get_he32 buf_get_be32
+# define buf_put_he32 buf_put_be32
+# define buf_get_he64 buf_get_be64
+# define buf_put_he64 buf_put_be64
+#else
+# define buf_get_he32 buf_get_le32
+# define buf_put_he32 buf_put_le32
+# define buf_get_he64 buf_get_le64
+# define buf_put_he64 buf_put_le64
+#endif
+
+
+
+/* Optimized function for small buffer copying */
+static inline void
+buf_cpy(void *_dst, const void *_src, size_t len)
+{
+  byte *dst = _dst;
+  const byte *src = _src;
+
+#if __GNUC__ >= 4
+  if (!__builtin_constant_p (len))
+    {
+      if (UNLIKELY(len == 0))
+       return;
+      memcpy(_dst, _src, len);
+      return;
+    }
+#endif
+
+  while (len >= sizeof(u64))
+    {
+      buf_put_he64(dst, buf_get_he64(src));
+      dst += sizeof(u64);
+      src += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len >= sizeof(u32))
+    {
+      buf_put_he32(dst, buf_get_he32(src));
+      dst += sizeof(u32);
+      src += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    *dst++ = *src++;
+}
+
+
+/* Optimized function for buffer xoring */
+static inline void
+buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len)
+{
+  byte *dst = _dst;
+  const byte *src1 = _src1;
+  const byte *src2 = _src2;
+
+  while (len >= sizeof(u64))
+    {
+      buf_put_he64(dst, buf_get_he64(src1) ^ buf_get_he64(src2));
+      dst += sizeof(u64);
+      src1 += sizeof(u64);
+      src2 += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len > sizeof(u32))
+    {
+      buf_put_he32(dst, buf_get_he32(src1) ^ buf_get_he32(src2));
+      dst += sizeof(u32);
+      src1 += sizeof(u32);
+      src2 += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    *dst++ = *src1++ ^ *src2++;
+}
+
+
+/* Optimized function for buffer xoring with two destination buffers.  Used
+   mainly by CFB mode encryption.  */
+static inline void
+buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len)
+{
+  byte *dst1 = _dst1;
+  byte *dst2 = _dst2;
+  const byte *src = _src;
+
+  while (len >= sizeof(u64))
+    {
+      u64 temp = buf_get_he64(dst2) ^ buf_get_he64(src);
+      buf_put_he64(dst2, temp);
+      buf_put_he64(dst1, temp);
+      dst2 += sizeof(u64);
+      dst1 += sizeof(u64);
+      src += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len >= sizeof(u32))
+    {
+      u32 temp = buf_get_he32(dst2) ^ buf_get_he32(src);
+      buf_put_he32(dst2, temp);
+      buf_put_he32(dst1, temp);
+      dst2 += sizeof(u32);
+      dst1 += sizeof(u32);
+      src += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    *dst1++ = (*dst2++ ^= *src++);
+}
+
+
+/* Optimized function for combined buffer xoring and copying.  Used by mainly
+   CBC mode decryption.  */
+static inline void
+buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy,
+                const void *_src_cpy, size_t len)
+{
+  byte *dst_xor = _dst_xor;
+  byte *srcdst_cpy = _srcdst_cpy;
+  const byte *src_xor = _src_xor;
+  const byte *src_cpy = _src_cpy;
+
+  while (len >= sizeof(u64))
+    {
+      u64 temp = buf_get_he64(src_cpy);
+      buf_put_he64(dst_xor, buf_get_he64(srcdst_cpy) ^ buf_get_he64(src_xor));
+      buf_put_he64(srcdst_cpy, temp);
+      dst_xor += sizeof(u64);
+      srcdst_cpy += sizeof(u64);
+      src_xor += sizeof(u64);
+      src_cpy += sizeof(u64);
+      len -= sizeof(u64);
+    }
+
+  if (len >= sizeof(u32))
+    {
+      u32 temp = buf_get_he32(src_cpy);
+      buf_put_he32(dst_xor, buf_get_he32(srcdst_cpy) ^ buf_get_he32(src_xor));
+      buf_put_he32(srcdst_cpy, temp);
+      dst_xor += sizeof(u32);
+      srcdst_cpy += sizeof(u32);
+      src_xor += sizeof(u32);
+      src_cpy += sizeof(u32);
+      len -= sizeof(u32);
+    }
+
+  /* Handle tail.  */
+  for (; len; len--)
+    {
+      byte temp = *src_cpy++;
+      *dst_xor++ = *srcdst_cpy ^ *src_xor++;
+      *srcdst_cpy++ = temp;
+    }
+}
+
+
+/* Optimized function for combined buffer xoring and copying.  Used by mainly
+   CFB mode decryption.  */
+static inline void
+buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len)
+{
+  buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len);
+}
+
+
+/* Constant-time compare of two buffers.  Returns 1 if buffers are equal,
+   and 0 if buffers differ.  */
+static inline int
+buf_eq_const(const void *a, const void *b, size_t len)
+{
+  return ct_memequal (a, b, len);
+}
 
-#endif /*BUFHELP_FAST_UNALIGNED_ACCESS*/
 
 #endif /*GCRYPT_BUFHELP_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/camellia-aarch64.S 
b/grub-core/lib/libgcrypt/cipher/camellia-aarch64.S
new file mode 100644
index 000000000..30b568d39
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/camellia-aarch64.S
@@ -0,0 +1,586 @@
+/* camellia-aarch64.S  -  ARMv8/AArch64 assembly implementation of Camellia
+ *                        cipher
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__)
+#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+
+.text
+
+/* struct camellia_ctx: */
+#define key_table 0
+
+/* register macros */
+#define CTX x0
+#define RDST x1
+#define RSRC x2
+#define RKEYBITS w3
+
+#define RTAB1 x4
+#define RTAB2 x5
+#define RTAB3 x6
+#define RTAB4 x7
+#define RMASK w8
+
+#define IL w9
+#define IR w10
+
+#define xIL x9
+#define xIR x10
+
+#define XL w11
+#define XR w12
+#define YL w13
+#define YR w14
+
+#define RT0 w15
+#define RT1 w16
+#define RT2 w17
+#define RT3 w19
+
+#define xRT0 x15
+#define xRT1 x16
+#define xRT2 x17
+#define xRT3 x19
+
+#ifdef __AARCH64EL__
+  #define host_to_be(reg, rtmp) \
+         rev reg, reg;
+  #define be_to_host(reg, rtmp) \
+         rev reg, reg;
+#else
+  /* nop on big-endian */
+  #define host_to_be(reg, rtmp) /*_*/
+  #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \
+       ldr a, [rin, #0]; \
+       ldr b, [rin, #4]; \
+       be_to_host(a, rtmp); \
+       ldr c, [rin, #8]; \
+       be_to_host(b, rtmp); \
+       ldr d, [rin, #12]; \
+       be_to_host(c, rtmp); \
+       be_to_host(d, rtmp);
+
+#define str_output_aligned_be(rout, a, b, c, d, rtmp) \
+       be_to_host(a, rtmp); \
+       be_to_host(b, rtmp); \
+       str a, [rout, #0]; \
+       be_to_host(c, rtmp); \
+       str b, [rout, #4]; \
+       be_to_host(d, rtmp); \
+       str c, [rout, #8]; \
+       str d, [rout, #12];
+
+/* unaligned word reads/writes allowed */
+#define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \
+       ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp)
+
+#define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+       str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0)
+
+/**********************************************************************
+  1-way camellia
+ **********************************************************************/
+#define roundsm(xl, xr, kl, kr, yl, yr) \
+       ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \
+       and  IR, RMASK, xr, lsl#(4);      /*sp1110*/ \
+       ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \
+       and  IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \
+       and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \
+       ldr  IR, [RTAB1,  xIR]; \
+       and RT1, RMASK, xl, lsr#(8 - 4);  /*sp3033*/ \
+       eor yl, yl, RT2; \
+       ldr  IL, [RTAB1,  xIL]; \
+       eor yr, yr, RT3; \
+       \
+       ldr RT0, [RTAB3, xRT0]; \
+       ldr RT1, [RTAB3, xRT1]; \
+       \
+       and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \
+       and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \
+       \
+       eor IR, IR, RT0; \
+       eor IL, IL, RT1; \
+       \
+       ldr RT2, [RTAB2, xRT2]; \
+       and RT0, RMASK, xr, lsr#(8 - 4);  /*sp4404*/ \
+       ldr RT3, [RTAB2, xRT3]; \
+       and RT1, RMASK, xl, lsl#(4);      /*sp4404*/ \
+       \
+       ldr RT0, [RTAB4, xRT0]; \
+       ldr RT1, [RTAB4, xRT1]; \
+       \
+       eor IR, IR, RT2; \
+       eor IL, IL, RT3; \
+       eor IR, IR, RT0; \
+       eor IL, IL, RT1; \
+       \
+       eor IR, IR, IL; \
+       eor yr, yr, IL, ror#8; \
+       eor yl, yl, IR; \
+       eor yr, yr, IR;
+
+#define enc_rounds(n) \
+       roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR);
+
+#define dec_rounds(n) \
+       roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR);
+
+/* perform FL and FL⁻¹ */
+#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \
+       ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \
+       ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \
+       and RT0, RT0, ll; \
+       ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \
+       orr RT2, RT2, rr; \
+       ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \
+       eor rl, rl, RT2; \
+       eor lr, lr, RT0, ror#31; \
+       and RT3, RT3, rl; \
+       orr RT1, RT1, lr; \
+       eor ll, ll, RT1; \
+       eor rr, rr, RT3, ror#31;
+
+#define enc_fls(n) \
+       fls(XL, XR, YL, YR, \
+           (n) * 2 + 0, (n) * 2 + 1, \
+           (n) * 2 + 2, (n) * 2 + 3);
+
+#define dec_fls(n) \
+       fls(XL, XR, YL, YR, \
+           (n) * 2 + 2, (n) * 2 + 3, \
+           (n) * 2 + 0, (n) * 2 + 1);
+
+#define inpack(n) \
+       ldr_input_be(RSRC, XL, XR, YL, YR, RT0); \
+       ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+       ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+       eor XL, XL, RT0; \
+       eor XR, XR, RT1;
+
+#define outunpack(n) \
+       ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+       ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+       eor YL, YL, RT0; \
+       eor YR, YR, RT1; \
+       str_output_be(RDST, YL, YR, XL, XR, RT0, RT1);
+
+.globl _gcry_camellia_arm_encrypt_block
+ELF(.type   _gcry_camellia_arm_encrypt_block,@function;)
+
+_gcry_camellia_arm_encrypt_block:
+       CFI_STARTPROC()
+       stp x19, x30, [sp, #-16]!
+       CFI_ADJUST_CFA_OFFSET(16)
+       CFI_REG_ON_STACK(19, 0)
+       CFI_REG_ON_STACK(30, 8)
+
+       /* input:
+        *      x0: keytable
+        *      x1: dst
+        *      x2: src
+        *      w3: keybitlen
+        */
+
+       adr RTAB1,  _gcry_camellia_arm_tables;
+       mov RMASK, #(0xff<<4); /* byte mask */
+       add RTAB2, RTAB1, #(1 * 4);
+       add RTAB3, RTAB1, #(2 * 4);
+       add RTAB4, RTAB1, #(3 * 4);
+
+       inpack(0);
+
+       enc_rounds(0);
+       enc_fls(8);
+       enc_rounds(8);
+       enc_fls(16);
+       enc_rounds(16);
+
+       cmp RKEYBITS, #(16 * 8);
+       bne .Lenc_256;
+
+       outunpack(24);
+
+       CFI_REMEMBER_STATE()
+       ldp x19, x30, [sp], #16
+       CFI_ADJUST_CFA_OFFSET(-16)
+       CFI_RESTORE(x19)
+       CFI_RESTORE(x30)
+       ret_spec_stop;
+       CFI_RESTORE_STATE()
+.ltorg
+
+.Lenc_256:
+       enc_fls(24);
+       enc_rounds(24);
+
+       outunpack(32);
+
+       ldp x19, x30, [sp], #16
+       CFI_ADJUST_CFA_OFFSET(-16)
+       CFI_RESTORE(x19)
+       CFI_RESTORE(x30)
+       ret_spec_stop;
+       CFI_ENDPROC()
+.ltorg
+ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;)
+
+.globl _gcry_camellia_arm_decrypt_block
+ELF(.type   _gcry_camellia_arm_decrypt_block,@function;)
+
+_gcry_camellia_arm_decrypt_block:
+       CFI_STARTPROC()
+       stp x19, x30, [sp, #-16]!
+       CFI_ADJUST_CFA_OFFSET(16)
+       CFI_REG_ON_STACK(19, 0)
+       CFI_REG_ON_STACK(30, 8)
+
+       /* input:
+        *      x0: keytable
+        *      x1: dst
+        *      x2: src
+        *      w3: keybitlen
+        */
+
+       adr RTAB1,  _gcry_camellia_arm_tables;
+       mov RMASK, #(0xff<<4); /* byte mask */
+       add RTAB2, RTAB1, #(1 * 4);
+       add RTAB3, RTAB1, #(2 * 4);
+       add RTAB4, RTAB1, #(3 * 4);
+
+       cmp RKEYBITS, #(16 * 8);
+       bne .Ldec_256;
+
+       inpack(24);
+
+.Ldec_128:
+       dec_rounds(16);
+       dec_fls(16);
+       dec_rounds(8);
+       dec_fls(8);
+       dec_rounds(0);
+
+       outunpack(0);
+
+       CFI_REMEMBER_STATE()
+       ldp x19, x30, [sp], #16
+       CFI_ADJUST_CFA_OFFSET(-16)
+       CFI_RESTORE(x19)
+       CFI_RESTORE(x30)
+       ret_spec_stop;
+       CFI_RESTORE_STATE()
+.ltorg
+
+.Ldec_256:
+       inpack(32);
+       dec_rounds(24);
+       dec_fls(24);
+
+       b .Ldec_128;
+       CFI_ENDPROC()
+.ltorg
+ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;)
+
+/* Encryption/Decryption tables */
+ELF(.type  _gcry_camellia_arm_tables,@object;)
+.balign 32
+_gcry_camellia_arm_tables:
+.Lcamellia_sp1110:
+.long 0x70707000
+.Lcamellia_sp0222:
+            .long 0x00e0e0e0
+.Lcamellia_sp3033:
+                        .long 0x38003838
+.Lcamellia_sp4404:
+                                    .long 0x70700070
+.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c
+.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3
+.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0
+.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4
+.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057
+.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea
+.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae
+.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023
+.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b
+.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045
+.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5
+.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed
+.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f
+.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d
+.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092
+.long 0x23232300, 0x00464646, 0x91009191, 0x86860086
+.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af
+.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c
+.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f
+.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e
+.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc
+.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e
+.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b
+.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6
+.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039
+.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5
+.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d
+.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9
+.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a
+.long 0x92929200, 0x00252525, 0x49004949, 0x51510051
+.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c
+.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b
+.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a
+.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb
+.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0
+.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074
+.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b
+.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0
+.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084
+.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df
+.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb
+.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034
+.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076
+.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d
+.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9
+.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1
+.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004
+.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014
+.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a
+.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de
+.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011
+.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032
+.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c
+.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053
+.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2
+.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe
+.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf
+.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3
+.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a
+.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024
+.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8
+.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060
+.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069
+.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa
+.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0
+.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1
+.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062
+.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054
+.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e
+.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0
+.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064
+.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010
+.long 0x12121200, 0x00242424, 0x09000909, 0x00000000
+.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3
+.long 0x20202000, 0x00404040, 0x10001010, 0x75750075
+.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a
+.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6
+.long 0x84848400, 0x00090909, 0x42004242, 0x09090009
+.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd
+.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087
+.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083
+.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd
+.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090
+.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073
+.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6
+.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d
+.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf
+.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052
+.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8
+.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8
+.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6
+.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081
+.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f
+.long 0x04040400, 0x00080808, 0x02000202, 0x13130013
+.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063
+.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9
+.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7
+.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f
+.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc
+.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029
+.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9
+.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f
+.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4
+.long 0x32323200, 0x00646464, 0x19001919, 0x78780078
+.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006
+.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7
+.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071
+.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4
+.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab
+.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088
+.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d
+.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072
+.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9
+.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8
+.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac
+.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036
+.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a
+.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c
+.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1
+.long 0x24242400, 0x00484848, 0x12001212, 0x40400040
+.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3
+.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb
+.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043
+.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015
+.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad
+.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077
+.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080
+.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082
+.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec
+.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027
+.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5
+.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085
+.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035
+.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c
+.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041
+.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef
+.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093
+.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019
+.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021
+.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e
+.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e
+.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065
+.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd
+.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8
+.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f
+.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb
+.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce
+.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030
+.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f
+.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5
+.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a
+.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1
+.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca
+.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047
+.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d
+.long 0x09090900, 0x00121212, 0x84008484, 0x01010001
+.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6
+.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056
+.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d
+.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d
+.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066
+.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc
+.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d
+.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012
+.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020
+.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1
+.long 0x33333300, 0x00666666, 0x99009999, 0x99990099
+.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c
+.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2
+.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e
+.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005
+.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7
+.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031
+.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017
+.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7
+.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058
+.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061
+.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b
+.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c
+.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f
+.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016
+.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018
+.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022
+.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044
+.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2
+.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5
+.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091
+.long 0x13131300, 0x00262626, 0x89008989, 0x08080008
+.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8
+.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc
+.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050
+.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0
+.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d
+.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089
+.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097
+.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b
+.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095
+.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff
+.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2
+.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4
+.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048
+.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7
+.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db
+.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003
+.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da
+.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f
+.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094
+.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c
+.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002
+.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a
+.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033
+.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067
+.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3
+.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f
+.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2
+.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b
+.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026
+.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037
+.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b
+.long 0x88888800, 0x00111111, 0x44004444, 0x96960096
+.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b
+.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be
+.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e
+.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079
+.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c
+.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e
+.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e
+.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5
+.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6
+.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd
+.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059
+.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098
+.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a
+.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046
+.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba
+.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025
+.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042
+.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2
+.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa
+.long 0x40404000, 0x00808080, 0x20002020, 0x07070007
+.long 0x28282800, 0x00505050, 0x14001414, 0x55550055
+.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee
+.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a
+.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049
+.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068
+.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038
+.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4
+.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028
+.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b
+.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9
+.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1
+.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3
+.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4
+.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7
+.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e
+ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;)
+
+#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
+#endif /*__AARCH64EL__*/
diff --git a/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx-amd64.S 
b/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx-amd64.S
new file mode 100644
index 000000000..5c304e574
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx-amd64.S
@@ -0,0 +1,2618 @@
+/* camellia-avx-aesni-amd64.S  -  AES-NI/AVX implementation of Camellia cipher
+ *
+ * Copyright (C) 2013-2015,2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct CAMELLIA_context: */
+#define key_table 0
+#define key_bitlength CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
+       vpand x, mask4bit, tmp0; \
+       vpandn x, mask4bit, x; \
+       vpsrld $4, x, x; \
+       \
+       vpshufb tmp0, lo_t, tmp0; \
+       vpshufb x, hi_t, x; \
+       vpxor tmp0, x, x;
+
+/**********************************************************************
+  16-way camellia
+ **********************************************************************/
+
+/*
+ * IN:
+ *   x0..x7: byte-sliced AB state
+ *   mem_cd: register pointer storing CD state
+ *   key: index for key material
+ * OUT:
+ *   x0..x7: new byte-sliced CD state
+ */
+#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \
+                 t7, mem_cd, key) \
+       /* \
+        * S-function with AES subbytes \
+        */ \
+       vmovdqa .Linv_shift_row rRIP, t4; \
+       vbroadcastss .L0f0f0f0f rRIP, t7; \
+       vmovdqa .Lpre_tf_lo_s1 rRIP, t0; \
+       vmovdqa .Lpre_tf_hi_s1 rRIP, t1; \
+       \
+       /* AES inverse shift rows */ \
+       vpshufb t4, x0, x0; \
+       vpshufb t4, x7, x7; \
+       vpshufb t4, x1, x1; \
+       vpshufb t4, x4, x4; \
+       vpshufb t4, x2, x2; \
+       vpshufb t4, x5, x5; \
+       vpshufb t4, x3, x3; \
+       vpshufb t4, x6, x6; \
+       \
+       /* prefilter sboxes 1, 2 and 3 */ \
+       vmovdqa .Lpre_tf_lo_s4 rRIP, t2; \
+       vmovdqa .Lpre_tf_hi_s4 rRIP, t3; \
+       filter_8bit(x0, t0, t1, t7, t6); \
+       filter_8bit(x7, t0, t1, t7, t6); \
+       filter_8bit(x1, t0, t1, t7, t6); \
+       filter_8bit(x4, t0, t1, t7, t6); \
+       filter_8bit(x2, t0, t1, t7, t6); \
+       filter_8bit(x5, t0, t1, t7, t6); \
+       \
+       /* prefilter sbox 4 */ \
+       vpxor t4, t4, t4; \
+       filter_8bit(x3, t2, t3, t7, t6); \
+       filter_8bit(x6, t2, t3, t7, t6); \
+       \
+       /* AES subbytes + AES shift rows */ \
+       vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \
+       vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \
+       vaesenclast t4, x0, x0; \
+       vaesenclast t4, x7, x7; \
+       vaesenclast t4, x1, x1; \
+       vaesenclast t4, x4, x4; \
+       vaesenclast t4, x2, x2; \
+       vaesenclast t4, x5, x5; \
+       vaesenclast t4, x3, x3; \
+       vaesenclast t4, x6, x6; \
+       \
+       /* postfilter sboxes 1 and 4 */ \
+       vmovdqa .Lpost_tf_lo_s3 rRIP, t2; \
+       vmovdqa .Lpost_tf_hi_s3 rRIP, t3; \
+       filter_8bit(x0, t0, t1, t7, t6); \
+       filter_8bit(x7, t0, t1, t7, t6); \
+       filter_8bit(x3, t0, t1, t7, t6); \
+       filter_8bit(x6, t0, t1, t7, t6); \
+       \
+       /* postfilter sbox 3 */ \
+       vmovdqa .Lpost_tf_lo_s2 rRIP, t4; \
+       vmovdqa .Lpost_tf_hi_s2 rRIP, t5; \
+       filter_8bit(x2, t2, t3, t7, t6); \
+       filter_8bit(x5, t2, t3, t7, t6); \
+       \
+       vpxor t6, t6, t6; \
+       vmovq key, t0; \
+       \
+       /* postfilter sbox 2 */ \
+       filter_8bit(x1, t4, t5, t7, t2); \
+       filter_8bit(x4, t4, t5, t7, t2); \
+       \
+       vpsrldq $5, t0, t5; \
+       vpsrldq $1, t0, t1; \
+       vpsrldq $2, t0, t2; \
+       vpsrldq $3, t0, t3; \
+       vpsrldq $4, t0, t4; \
+       vpshufb t6, t0, t0; \
+       vpshufb t6, t1, t1; \
+       vpshufb t6, t2, t2; \
+       vpshufb t6, t3, t3; \
+       vpshufb t6, t4, t4; \
+       vpsrldq $2, t5, t7; \
+       vpshufb t6, t7, t7; \
+       \
+       /* P-function */ \
+       vpxor x5, x0, x0; \
+       vpxor x6, x1, x1; \
+       vpxor x7, x2, x2; \
+       vpxor x4, x3, x3; \
+       \
+       vpxor x2, x4, x4; \
+       vpxor x3, x5, x5; \
+       vpxor x0, x6, x6; \
+       vpxor x1, x7, x7; \
+       \
+       vpxor x7, x0, x0; \
+       vpxor x4, x1, x1; \
+       vpxor x5, x2, x2; \
+       vpxor x6, x3, x3; \
+       \
+       vpxor x3, x4, x4; \
+       vpxor x0, x5, x5; \
+       vpxor x1, x6, x6; \
+       vpxor x2, x7, x7; /* note: high and low parts swapped */ \
+       \
+       /* Add key material and result to CD (x becomes new CD) */ \
+       \
+       vpxor t3, x4, x4; \
+       vpxor 0 * 16(mem_cd), x4, x4; \
+       \
+       vpxor t2, x5, x5; \
+       vpxor 1 * 16(mem_cd), x5, x5; \
+       \
+       vpsrldq $1, t5, t3; \
+       vpshufb t6, t5, t5; \
+       vpshufb t6, t3, t6; \
+       \
+       vpxor t1, x6, x6; \
+       vpxor 2 * 16(mem_cd), x6, x6; \
+       \
+       vpxor t0, x7, x7; \
+       vpxor 3 * 16(mem_cd), x7, x7; \
+       \
+       vpxor t7, x0, x0; \
+       vpxor 4 * 16(mem_cd), x0, x0; \
+       \
+       vpxor t6, x1, x1; \
+       vpxor 5 * 16(mem_cd), x1, x1; \
+       \
+       vpxor t5, x2, x2; \
+       vpxor 6 * 16(mem_cd), x2, x2; \
+       \
+       vpxor t4, x3, x3; \
+       vpxor 7 * 16(mem_cd), x3, x3;
+
+/*
+ * IN/OUT:
+ *  x0..x7: byte-sliced AB state preloaded
+ *  mem_ab: byte-sliced AB state in memory
+ *  mem_cb: byte-sliced CD state in memory
+ */
+#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
+       roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                 y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \
+       \
+       vmovdqu x4, 0 * 16(mem_cd); \
+       vmovdqu x5, 1 * 16(mem_cd); \
+       vmovdqu x6, 2 * 16(mem_cd); \
+       vmovdqu x7, 3 * 16(mem_cd); \
+       vmovdqu x0, 4 * 16(mem_cd); \
+       vmovdqu x1, 5 * 16(mem_cd); \
+       vmovdqu x2, 6 * 16(mem_cd); \
+       vmovdqu x3, 7 * 16(mem_cd); \
+       \
+       roundsm16(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \
+                 y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \
+       \
+       store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
+
+#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
+
+#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
+       /* Store new AB state */ \
+       vmovdqu x0, 0 * 16(mem_ab); \
+       vmovdqu x1, 1 * 16(mem_ab); \
+       vmovdqu x2, 2 * 16(mem_ab); \
+       vmovdqu x3, 3 * 16(mem_ab); \
+       vmovdqu x4, 4 * 16(mem_ab); \
+       vmovdqu x5, 5 * 16(mem_ab); \
+       vmovdqu x6, 6 * 16(mem_ab); \
+       vmovdqu x7, 7 * 16(mem_ab);
+
+#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, i) \
+       two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
+       two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
+       two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
+
+#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, i) \
+       two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
+       two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
+       two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
+
+/*
+ * IN:
+ *  v0..3: byte-sliced 32-bit integers
+ * OUT:
+ *  v0..3: (IN <<< 1)
+ */
+#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \
+       vpcmpgtb v0, zero, t0; \
+       vpaddb v0, v0, v0; \
+       vpabsb t0, t0; \
+       \
+       vpcmpgtb v1, zero, t1; \
+       vpaddb v1, v1, v1; \
+       vpabsb t1, t1; \
+       \
+       vpcmpgtb v2, zero, t2; \
+       vpaddb v2, v2, v2; \
+       vpabsb t2, t2; \
+       \
+       vpor t0, v1, v1; \
+       \
+       vpcmpgtb v3, zero, t0; \
+       vpaddb v3, v3, v3; \
+       vpabsb t0, t0; \
+       \
+       vpor t1, v2, v2; \
+       vpor t2, v3, v3; \
+       vpor t0, v0, v0;
+
+/*
+ * IN:
+ *   r: byte-sliced AB state in memory
+ *   l: byte-sliced CD state in memory
+ * OUT:
+ *   x0..x7: new byte-sliced CD state
+ */
+#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
+             tt1, tt2, tt3, kll, klr, krl, krr) \
+       /* \
+        * t0 = kll; \
+        * t0 &= ll; \
+        * lr ^= rol32(t0, 1); \
+        */ \
+       vpxor tt0, tt0, tt0; \
+       vmovd kll, t0; \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpand l0, t0, t0; \
+       vpand l1, t1, t1; \
+       vpand l2, t2, t2; \
+       vpand l3, t3, t3; \
+       \
+       rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+       \
+       vpxor l4, t0, l4; \
+       vmovdqu l4, 4 * 16(l); \
+       vpxor l5, t1, l5; \
+       vmovdqu l5, 5 * 16(l); \
+       vpxor l6, t2, l6; \
+       vmovdqu l6, 6 * 16(l); \
+       vpxor l7, t3, l7; \
+       vmovdqu l7, 7 * 16(l); \
+       \
+       /* \
+        * t2 = krr; \
+        * t2 |= rr; \
+        * rl ^= t2; \
+        */ \
+       \
+       vmovd krr, t0; \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpor 4 * 16(r), t0, t0; \
+       vpor 5 * 16(r), t1, t1; \
+       vpor 6 * 16(r), t2, t2; \
+       vpor 7 * 16(r), t3, t3; \
+       \
+       vpxor 0 * 16(r), t0, t0; \
+       vpxor 1 * 16(r), t1, t1; \
+       vpxor 2 * 16(r), t2, t2; \
+       vpxor 3 * 16(r), t3, t3; \
+       vmovdqu t0, 0 * 16(r); \
+       vmovdqu t1, 1 * 16(r); \
+       vmovdqu t2, 2 * 16(r); \
+       vmovdqu t3, 3 * 16(r); \
+       \
+       /* \
+        * t2 = krl; \
+        * t2 &= rl; \
+        * rr ^= rol32(t2, 1); \
+        */ \
+       vmovd krl, t0; \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpand 0 * 16(r), t0, t0; \
+       vpand 1 * 16(r), t1, t1; \
+       vpand 2 * 16(r), t2, t2; \
+       vpand 3 * 16(r), t3, t3; \
+       \
+       rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+       \
+       vpxor 4 * 16(r), t0, t0; \
+       vpxor 5 * 16(r), t1, t1; \
+       vpxor 6 * 16(r), t2, t2; \
+       vpxor 7 * 16(r), t3, t3; \
+       vmovdqu t0, 4 * 16(r); \
+       vmovdqu t1, 5 * 16(r); \
+       vmovdqu t2, 6 * 16(r); \
+       vmovdqu t3, 7 * 16(r); \
+       \
+       /* \
+        * t0 = klr; \
+        * t0 |= lr; \
+        * ll ^= t0; \
+        */ \
+       \
+       vmovd klr, t0; \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpor l4, t0, t0; \
+       vpor l5, t1, t1; \
+       vpor l6, t2, t2; \
+       vpor l7, t3, t3; \
+       \
+       vpxor l0, t0, l0; \
+       vmovdqu l0, 0 * 16(l); \
+       vpxor l1, t1, l1; \
+       vmovdqu l1, 1 * 16(l); \
+       vpxor l2, t2, l2; \
+       vmovdqu l2, 2 * 16(l); \
+       vpxor l3, t3, l3; \
+       vmovdqu l3, 3 * 16(l);
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+       vpunpckhdq x1, x0, t2; \
+       vpunpckldq x1, x0, x0; \
+       \
+       vpunpckldq x3, x2, t1; \
+       vpunpckhdq x3, x2, x2; \
+       \
+       vpunpckhqdq t1, x0, x1; \
+       vpunpcklqdq t1, x0, x0; \
+       \
+       vpunpckhqdq x2, t2, x3; \
+       vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \
+                             a3, b3, c3, d3, st0, st1) \
+       vmovdqu d2, st0; \
+       vmovdqu d3, st1; \
+       transpose_4x4(a0, a1, a2, a3, d2, d3); \
+       transpose_4x4(b0, b1, b2, b3, d2, d3); \
+       vmovdqu st0, d2; \
+       vmovdqu st1, d3; \
+       \
+       vmovdqu a0, st0; \
+       vmovdqu a1, st1; \
+       transpose_4x4(c0, c1, c2, c3, a0, a1); \
+       transpose_4x4(d0, d1, d2, d3, a0, a1); \
+       \
+       vmovdqu .Lshufb_16x16b rRIP, a0; \
+       vmovdqu st1, a1; \
+       vpshufb a0, a2, a2; \
+       vpshufb a0, a3, a3; \
+       vpshufb a0, b0, b0; \
+       vpshufb a0, b1, b1; \
+       vpshufb a0, b2, b2; \
+       vpshufb a0, b3, b3; \
+       vpshufb a0, a1, a1; \
+       vpshufb a0, c0, c0; \
+       vpshufb a0, c1, c1; \
+       vpshufb a0, c2, c2; \
+       vpshufb a0, c3, c3; \
+       vpshufb a0, d0, d0; \
+       vpshufb a0, d1, d1; \
+       vpshufb a0, d2, d2; \
+       vpshufb a0, d3, d3; \
+       vmovdqu d3, st1; \
+       vmovdqu st0, d3; \
+       vpshufb a0, d3, a0; \
+       vmovdqu d2, st0; \
+       \
+       transpose_4x4(a0, b0, c0, d0, d2, d3); \
+       transpose_4x4(a1, b1, c1, d1, d2, d3); \
+       vmovdqu st0, d2; \
+       vmovdqu st1, d3; \
+       \
+       vmovdqu b0, st0; \
+       vmovdqu b1, st1; \
+       transpose_4x4(a2, b2, c2, d2, b0, b1); \
+       transpose_4x4(a3, b3, c3, d3, b0, b1); \
+       vmovdqu st0, b0; \
+       vmovdqu st1, b1; \
+       /* does not adjust output bytes inside vectors */
+
+#define transpose_8x8b(a, b, c, d, e, f, g, h, t0, t1, t2, t3, t4) \
+       vpunpcklbw a, b, t0; \
+       vpunpckhbw a, b, b; \
+       \
+       vpunpcklbw c, d, t1; \
+       vpunpckhbw c, d, d; \
+       \
+       vpunpcklbw e, f, t2; \
+       vpunpckhbw e, f, f; \
+       \
+       vpunpcklbw g, h, t3; \
+       vpunpckhbw g, h, h; \
+       \
+       vpunpcklwd t0, t1, g; \
+       vpunpckhwd t0, t1, t0; \
+       \
+       vpunpcklwd b, d, t1; \
+       vpunpckhwd b, d, e; \
+       \
+       vpunpcklwd t2, t3, c; \
+       vpunpckhwd t2, t3, t2; \
+       \
+       vpunpcklwd f, h, t3; \
+       vpunpckhwd f, h, b; \
+       \
+       vpunpcklwd e, b, t4; \
+       vpunpckhwd e, b, b; \
+       \
+       vpunpcklwd t1, t3, e; \
+       vpunpckhwd t1, t3, f; \
+       \
+       vmovdqa .Ltranspose_8x8_shuf rRIP, t3; \
+       \
+       vpunpcklwd g, c, d; \
+       vpunpckhwd g, c, c; \
+       \
+       vpunpcklwd t0, t2, t1; \
+       vpunpckhwd t0, t2, h; \
+       \
+       vpunpckhqdq b, h, a; \
+       vpshufb t3, a, a; \
+       vpunpcklqdq b, h, b; \
+       vpshufb t3, b, b; \
+       \
+       vpunpckhqdq e, d, g; \
+       vpshufb t3, g, g; \
+       vpunpcklqdq e, d, h; \
+       vpshufb t3, h, h; \
+       \
+       vpunpckhqdq f, c, e; \
+       vpshufb t3, e, e; \
+       vpunpcklqdq f, c, f; \
+       vpshufb t3, f, f; \
+       \
+       vpunpckhqdq t4, t1, c; \
+       vpshufb t3, c, c; \
+       vpunpcklqdq t4, t1, d; \
+       vpshufb t3, d, d;
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                    y6, y7, rio, key) \
+       vmovq key, x0; \
+       vpshufb .Lpack_bswap rRIP, x0, x0; \
+       \
+       vpxor 0 * 16(rio), x0, y7; \
+       vpxor 1 * 16(rio), x0, y6; \
+       vpxor 2 * 16(rio), x0, y5; \
+       vpxor 3 * 16(rio), x0, y4; \
+       vpxor 4 * 16(rio), x0, y3; \
+       vpxor 5 * 16(rio), x0, y2; \
+       vpxor 6 * 16(rio), x0, y1; \
+       vpxor 7 * 16(rio), x0, y0; \
+       vpxor 8 * 16(rio), x0, x7; \
+       vpxor 9 * 16(rio), x0, x6; \
+       vpxor 10 * 16(rio), x0, x5; \
+       vpxor 11 * 16(rio), x0, x4; \
+       vpxor 12 * 16(rio), x0, x3; \
+       vpxor 13 * 16(rio), x0, x2; \
+       vpxor 14 * 16(rio), x0, x1; \
+       vpxor 15 * 16(rio), x0, x0;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd) \
+       byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \
+                             y4, y5, y6, y7, (mem_ab), (mem_cd)); \
+       \
+       vmovdqu x0, 0 * 16(mem_ab); \
+       vmovdqu x1, 1 * 16(mem_ab); \
+       vmovdqu x2, 2 * 16(mem_ab); \
+       vmovdqu x3, 3 * 16(mem_ab); \
+       vmovdqu x4, 4 * 16(mem_ab); \
+       vmovdqu x5, 5 * 16(mem_ab); \
+       vmovdqu x6, 6 * 16(mem_ab); \
+       vmovdqu x7, 7 * 16(mem_ab); \
+       vmovdqu y0, 0 * 16(mem_cd); \
+       vmovdqu y1, 1 * 16(mem_cd); \
+       vmovdqu y2, 2 * 16(mem_cd); \
+       vmovdqu y3, 3 * 16(mem_cd); \
+       vmovdqu y4, 4 * 16(mem_cd); \
+       vmovdqu y5, 5 * 16(mem_cd); \
+       vmovdqu y6, 6 * 16(mem_cd); \
+       vmovdqu y7, 7 * 16(mem_cd);
+
+/* de-byteslice, apply post-whitening and store blocks */
+#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
+                   y5, y6, y7, key, stack_tmp0, stack_tmp1) \
+       byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \
+                             y3, y7, x3, x7, stack_tmp0, stack_tmp1); \
+       \
+       vmovdqu x0, stack_tmp0; \
+       \
+       vmovq key, x0; \
+       vpshufb .Lpack_bswap rRIP, x0, x0; \
+       \
+       vpxor x0, y7, y7; \
+       vpxor x0, y6, y6; \
+       vpxor x0, y5, y5; \
+       vpxor x0, y4, y4; \
+       vpxor x0, y3, y3; \
+       vpxor x0, y2, y2; \
+       vpxor x0, y1, y1; \
+       vpxor x0, y0, y0; \
+       vpxor x0, x7, x7; \
+       vpxor x0, x6, x6; \
+       vpxor x0, x5, x5; \
+       vpxor x0, x4, x4; \
+       vpxor x0, x3, x3; \
+       vpxor x0, x2, x2; \
+       vpxor x0, x1, x1; \
+       vpxor stack_tmp0, x0, x0;
+
+#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                    y6, y7, rio) \
+       vmovdqu x0, 0 * 16(rio); \
+       vmovdqu x1, 1 * 16(rio); \
+       vmovdqu x2, 2 * 16(rio); \
+       vmovdqu x3, 3 * 16(rio); \
+       vmovdqu x4, 4 * 16(rio); \
+       vmovdqu x5, 5 * 16(rio); \
+       vmovdqu x6, 6 * 16(rio); \
+       vmovdqu x7, 7 * 16(rio); \
+       vmovdqu y0, 8 * 16(rio); \
+       vmovdqu y1, 9 * 16(rio); \
+       vmovdqu y2, 10 * 16(rio); \
+       vmovdqu y3, 11 * 16(rio); \
+       vmovdqu y4, 12 * 16(rio); \
+       vmovdqu y5, 13 * 16(rio); \
+       vmovdqu y6, 14 * 16(rio); \
+       vmovdqu y7, 15 * 16(rio);
+
+.text
+.align 16
+
+#define SHUFB_BYTES(idx) \
+       0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+
+.Lshufb_16x16b:
+       .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3);
+
+.Lpack_bswap:
+       .long 0x00010203
+       .long 0x04050607
+       .long 0x80808080
+       .long 0x80808080
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox1, sbox2, sbox3:
+ *   swap_bitendianness(
+ *       isom_map_camellia_to_aes(
+ *           camellia_f(
+ *               swap_bitendianess(in)
+ *           )
+ *       )
+ *   )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s1:
+       .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
+       .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
+.Lpre_tf_hi_s1:
+       .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
+       .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox4:
+ *   swap_bitendianness(
+ *       isom_map_camellia_to_aes(
+ *           camellia_f(
+ *               swap_bitendianess(in <<< 1)
+ *           )
+ *       )
+ *   )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s4:
+       .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
+       .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
+.Lpre_tf_hi_s4:
+       .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
+       .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox1, sbox4:
+ *  swap_bitendianness(
+ *      camellia_h(
+ *          isom_map_aes_to_camellia(
+ *              swap_bitendianness(
+ *                  aes_inverse_affine_transform(in)
+ *              )
+ *          )
+ *      )
+ *  )
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s1:
+       .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
+       .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
+.Lpost_tf_hi_s1:
+       .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
+       .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox2:
+ *  swap_bitendianness(
+ *      camellia_h(
+ *          isom_map_aes_to_camellia(
+ *              swap_bitendianness(
+ *                  aes_inverse_affine_transform(in)
+ *              )
+ *          )
+ *      )
+ *  ) <<< 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s2:
+       .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
+       .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
+.Lpost_tf_hi_s2:
+       .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
+       .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox3:
+ *  swap_bitendianness(
+ *      camellia_h(
+ *          isom_map_aes_to_camellia(
+ *              swap_bitendianness(
+ *                  aes_inverse_affine_transform(in)
+ *              )
+ *          )
+ *      )
+ *  ) >>> 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s3:
+       .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
+       .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
+.Lpost_tf_hi_s3:
+       .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
+       .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+       .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+       .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* shuffle mask for 8x8 byte transpose */
+.Ltranspose_8x8_shuf:
+       .byte 0, 1, 4, 5, 2, 3, 6, 7, 8+0, 8+1, 8+4, 8+5, 8+2, 8+3, 8+6, 8+7
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+       .long 0x0f0f0f0f
+
+
+.align 8
+ELF(.type   __camellia_enc_blk16,@function;)
+
+__camellia_enc_blk16:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rax: temporary storage, 256 bytes
+        *      %r8d: 24 for 16 byte key, 32 for larger
+        *      %xmm0..%xmm15: 16 plaintext blocks
+        * output:
+        *      %xmm0..%xmm15: 16 encrypted blocks, order swapped:
+        *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+        */
+       CFI_STARTPROC();
+
+       leaq 8 * 16(%rax), %rcx;
+
+       leaq (-8 * 8)(CTX, %r8, 8), %r8;
+
+       inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+                     %xmm15, %rax, %rcx);
+
+.align 8
+.Lenc_loop:
+       enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+                    %xmm15, %rax, %rcx, 0);
+
+       cmpq %r8, CTX;
+       je .Lenc_done;
+       leaq (8 * 8)(CTX), CTX;
+
+       fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+             %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+             %xmm15,
+             ((key_table) + 0)(CTX),
+             ((key_table) + 4)(CTX),
+             ((key_table) + 8)(CTX),
+             ((key_table) + 12)(CTX));
+       jmp .Lenc_loop;
+
+.align 8
+.Lenc_done:
+       /* load CD for output */
+       vmovdqu 0 * 16(%rcx), %xmm8;
+       vmovdqu 1 * 16(%rcx), %xmm9;
+       vmovdqu 2 * 16(%rcx), %xmm10;
+       vmovdqu 3 * 16(%rcx), %xmm11;
+       vmovdqu 4 * 16(%rcx), %xmm12;
+       vmovdqu 5 * 16(%rcx), %xmm13;
+       vmovdqu 6 * 16(%rcx), %xmm14;
+       vmovdqu 7 * 16(%rcx), %xmm15;
+
+       outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                   %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+                   %xmm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 16(%rax));
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;)
+
+.align 8
+ELF(.type   __camellia_dec_blk16,@function;)
+
+__camellia_dec_blk16:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rax: temporary storage, 256 bytes
+        *      %r8d: 24 for 16 byte key, 32 for larger
+        *      %xmm0..%xmm15: 16 encrypted blocks
+        * output:
+        *      %xmm0..%xmm15: 16 plaintext blocks, order swapped:
+        *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+        */
+       CFI_STARTPROC();
+
+       movq %r8, %rcx;
+       movq CTX, %r8
+       leaq (-8 * 8)(CTX, %rcx, 8), CTX;
+
+       leaq 8 * 16(%rax), %rcx;
+
+       inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                     %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+                     %xmm15, %rax, %rcx);
+
+.align 8
+.Ldec_loop:
+       dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+                    %xmm15, %rax, %rcx, 0);
+
+       cmpq %r8, CTX;
+       je .Ldec_done;
+
+       fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+             %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+             %xmm15,
+             ((key_table) + 8)(CTX),
+             ((key_table) + 12)(CTX),
+             ((key_table) + 0)(CTX),
+             ((key_table) + 4)(CTX));
+
+       leaq (-8 * 8)(CTX), CTX;
+       jmp .Ldec_loop;
+
+.align 8
+.Ldec_done:
+       /* load CD for output */
+       vmovdqu 0 * 16(%rcx), %xmm8;
+       vmovdqu 1 * 16(%rcx), %xmm9;
+       vmovdqu 2 * 16(%rcx), %xmm10;
+       vmovdqu 3 * 16(%rcx), %xmm11;
+       vmovdqu 4 * 16(%rcx), %xmm12;
+       vmovdqu 5 * 16(%rcx), %xmm13;
+       vmovdqu 6 * 16(%rcx), %xmm14;
+       vmovdqu 7 * 16(%rcx), %xmm15;
+
+       outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                   %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+                   %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax));
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+       vpcmpeqq minus_one, x, tmp; \
+       vpsubq minus_one, x, x; \
+       vpslldq $8, tmp, tmp; \
+       vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ctr_enc
+ELF(.type   _gcry_camellia_aesni_avx_ctr_enc,@function;)
+
+_gcry_camellia_aesni_avx_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %eax;
+       cmovel %eax, %r8d; /* max */
+
+       subq $(16 * 16), %rsp;
+       andq $~31, %rsp;
+       movq %rsp, %rax;
+
+       vmovdqa .Lbswap128_mask rRIP, %xmm14;
+
+       /* load IV and byteswap */
+       vmovdqu (%rcx), %xmm15;
+       vmovdqu %xmm15, 15 * 16(%rax);
+       vpshufb %xmm14, %xmm15, %xmm0; /* be => le */
+
+       vpcmpeqd %xmm15, %xmm15, %xmm15;
+       vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */
+
+       /* construct IVs */
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm13;
+       vmovdqu %xmm13, 14 * 16(%rax);
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm13;
+       vmovdqu %xmm13, 13 * 16(%rax);
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm12;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm11;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm10;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm9;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm8;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm7;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm6;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm5;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm4;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm3;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm2;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vpshufb %xmm14, %xmm0, %xmm1;
+       inc_le128(%xmm0, %xmm15, %xmm13);
+       vmovdqa %xmm0, %xmm13;
+       vpshufb %xmm14, %xmm0, %xmm0;
+       inc_le128(%xmm13, %xmm15, %xmm14);
+       vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; /* le => be */
+       vmovdqu %xmm13, (%rcx);
+
+       /* inpack16_pre: */
+       vmovq (key_table)(CTX), %xmm15;
+       vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+       vpxor %xmm0, %xmm15, %xmm0;
+       vpxor %xmm1, %xmm15, %xmm1;
+       vpxor %xmm2, %xmm15, %xmm2;
+       vpxor %xmm3, %xmm15, %xmm3;
+       vpxor %xmm4, %xmm15, %xmm4;
+       vpxor %xmm5, %xmm15, %xmm5;
+       vpxor %xmm6, %xmm15, %xmm6;
+       vpxor %xmm7, %xmm15, %xmm7;
+       vpxor %xmm8, %xmm15, %xmm8;
+       vpxor %xmm9, %xmm15, %xmm9;
+       vpxor %xmm10, %xmm15, %xmm10;
+       vpxor %xmm11, %xmm15, %xmm11;
+       vpxor %xmm12, %xmm15, %xmm12;
+       vpxor 13 * 16(%rax), %xmm15, %xmm13;
+       vpxor 14 * 16(%rax), %xmm15, %xmm14;
+       vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+       call __camellia_enc_blk16;
+
+       vpxor 0 * 16(%rdx), %xmm7, %xmm7;
+       vpxor 1 * 16(%rdx), %xmm6, %xmm6;
+       vpxor 2 * 16(%rdx), %xmm5, %xmm5;
+       vpxor 3 * 16(%rdx), %xmm4, %xmm4;
+       vpxor 4 * 16(%rdx), %xmm3, %xmm3;
+       vpxor 5 * 16(%rdx), %xmm2, %xmm2;
+       vpxor 6 * 16(%rdx), %xmm1, %xmm1;
+       vpxor 7 * 16(%rdx), %xmm0, %xmm0;
+       vpxor 8 * 16(%rdx), %xmm15, %xmm15;
+       vpxor 9 * 16(%rdx), %xmm14, %xmm14;
+       vpxor 10 * 16(%rdx), %xmm13, %xmm13;
+       vpxor 11 * 16(%rdx), %xmm12, %xmm12;
+       vpxor 12 * 16(%rdx), %xmm11, %xmm11;
+       vpxor 13 * 16(%rdx), %xmm10, %xmm10;
+       vpxor 14 * 16(%rdx), %xmm9, %xmm9;
+       vpxor 15 * 16(%rdx), %xmm8, %xmm8;
+
+       write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+                    %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+                    %xmm8, %rsi);
+
+       vzeroall;
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_cbc_dec
+ELF(.type   _gcry_camellia_aesni_avx_cbc_dec,@function;)
+
+_gcry_camellia_aesni_avx_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       movq %rcx, %r9;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %eax;
+       cmovel %eax, %r8d; /* max */
+
+       inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7,
+                    %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14,
+                    %xmm15, %rdx, (key_table)(CTX, %r8, 8));
+
+       subq $(16 * 16), %rsp;
+       andq $~31, %rsp;
+       movq %rsp, %rax;
+
+       call __camellia_dec_blk16;
+
+       /* XOR output with IV */
+       vpxor (%r9), %xmm7, %xmm7;
+       vpxor (0 * 16)(%rdx), %xmm6, %xmm6;
+       vpxor (1 * 16)(%rdx), %xmm5, %xmm5;
+       vpxor (2 * 16)(%rdx), %xmm4, %xmm4;
+       vpxor (3 * 16)(%rdx), %xmm3, %xmm3;
+       vpxor (4 * 16)(%rdx), %xmm2, %xmm2;
+       vpxor (5 * 16)(%rdx), %xmm1, %xmm1;
+       vpxor (6 * 16)(%rdx), %xmm0, %xmm0;
+       vpxor (7 * 16)(%rdx), %xmm15, %xmm15;
+       vpxor (8 * 16)(%rdx), %xmm14, %xmm14;
+       vpxor (9 * 16)(%rdx), %xmm13, %xmm13;
+       vpxor (10 * 16)(%rdx), %xmm12, %xmm12;
+       vpxor (11 * 16)(%rdx), %xmm11, %xmm11;
+       vpxor (12 * 16)(%rdx), %xmm10, %xmm10;
+       vpxor (13 * 16)(%rdx), %xmm9, %xmm9;
+       vpxor (14 * 16)(%rdx), %xmm8, %xmm8;
+       movq (15 * 16 + 0)(%rdx), %r10;
+       movq (15 * 16 + 8)(%rdx), %r11;
+
+       write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+                    %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+                    %xmm8, %rsi);
+
+       /* store new IV */
+       movq %r10, (0)(%r9);
+       movq %r11, (8)(%r9);
+
+       vzeroall;
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_cfb_dec
+ELF(.type   _gcry_camellia_aesni_avx_cfb_dec,@function;)
+
+_gcry_camellia_aesni_avx_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %eax;
+       cmovel %eax, %r8d; /* max */
+
+       subq $(16 * 16), %rsp;
+       andq $~31, %rsp;
+       movq %rsp, %rax;
+
+       /* inpack16_pre: */
+       vmovq (key_table)(CTX), %xmm0;
+       vpshufb .Lpack_bswap rRIP, %xmm0, %xmm0;
+       vpxor (%rcx), %xmm0, %xmm15;
+       vmovdqu 15 * 16(%rdx), %xmm1;
+       vmovdqu %xmm1, (%rcx); /* store new IV */
+       vpxor 0 * 16(%rdx), %xmm0, %xmm14;
+       vpxor 1 * 16(%rdx), %xmm0, %xmm13;
+       vpxor 2 * 16(%rdx), %xmm0, %xmm12;
+       vpxor 3 * 16(%rdx), %xmm0, %xmm11;
+       vpxor 4 * 16(%rdx), %xmm0, %xmm10;
+       vpxor 5 * 16(%rdx), %xmm0, %xmm9;
+       vpxor 6 * 16(%rdx), %xmm0, %xmm8;
+       vpxor 7 * 16(%rdx), %xmm0, %xmm7;
+       vpxor 8 * 16(%rdx), %xmm0, %xmm6;
+       vpxor 9 * 16(%rdx), %xmm0, %xmm5;
+       vpxor 10 * 16(%rdx), %xmm0, %xmm4;
+       vpxor 11 * 16(%rdx), %xmm0, %xmm3;
+       vpxor 12 * 16(%rdx), %xmm0, %xmm2;
+       vpxor 13 * 16(%rdx), %xmm0, %xmm1;
+       vpxor 14 * 16(%rdx), %xmm0, %xmm0;
+
+       call __camellia_enc_blk16;
+
+       vpxor 0 * 16(%rdx), %xmm7, %xmm7;
+       vpxor 1 * 16(%rdx), %xmm6, %xmm6;
+       vpxor 2 * 16(%rdx), %xmm5, %xmm5;
+       vpxor 3 * 16(%rdx), %xmm4, %xmm4;
+       vpxor 4 * 16(%rdx), %xmm3, %xmm3;
+       vpxor 5 * 16(%rdx), %xmm2, %xmm2;
+       vpxor 6 * 16(%rdx), %xmm1, %xmm1;
+       vpxor 7 * 16(%rdx), %xmm0, %xmm0;
+       vpxor 8 * 16(%rdx), %xmm15, %xmm15;
+       vpxor 9 * 16(%rdx), %xmm14, %xmm14;
+       vpxor 10 * 16(%rdx), %xmm13, %xmm13;
+       vpxor 11 * 16(%rdx), %xmm12, %xmm12;
+       vpxor 12 * 16(%rdx), %xmm11, %xmm11;
+       vpxor 13 * 16(%rdx), %xmm10, %xmm10;
+       vpxor 14 * 16(%rdx), %xmm9, %xmm9;
+       vpxor 15 * 16(%rdx), %xmm8, %xmm8;
+
+       write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+                    %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+                    %xmm8, %rsi);
+
+       vzeroall;
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ocb_enc
+ELF(.type   _gcry_camellia_aesni_avx_ocb_enc,@function;)
+
+_gcry_camellia_aesni_avx_ocb_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       subq $(16 * 16 + 4 * 8), %rsp;
+       andq $~31, %rsp;
+       movq %rsp, %rax;
+
+       movq %r10, (16 * 16 + 0 * 8)(%rsp);
+       movq %r11, (16 * 16 + 1 * 8)(%rsp);
+       movq %r12, (16 * 16 + 2 * 8)(%rsp);
+       movq %r13, (16 * 16 + 3 * 8)(%rsp);
+       CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+       CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+       CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+       CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
+
+       vmovdqu (%rcx), %xmm14;
+       vmovdqu (%r8), %xmm15;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         vmovdqu (n * 16)(%rdx), xreg; \
+         vpxor (lreg), %xmm14, %xmm14; \
+         vpxor xreg, %xmm15, %xmm15; \
+         vpxor xreg, %xmm14, xreg; \
+         vmovdqu %xmm14, (n * 16)(%rsi);
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %xmm0);
+       vmovdqu %xmm0, (15 * 16)(%rax);
+       OCB_INPUT(1, %r11, %xmm0);
+       vmovdqu %xmm0, (14 * 16)(%rax);
+       OCB_INPUT(2, %r12, %xmm13);
+       OCB_INPUT(3, %r13, %xmm12);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %xmm11);
+       OCB_INPUT(5, %r11, %xmm10);
+       OCB_INPUT(6, %r12, %xmm9);
+       OCB_INPUT(7, %r13, %xmm8);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(8, %r10, %xmm7);
+       OCB_INPUT(9, %r11, %xmm6);
+       OCB_INPUT(10, %r12, %xmm5);
+       OCB_INPUT(11, %r13, %xmm4);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(12, %r10, %xmm3);
+       OCB_INPUT(13, %r11, %xmm2);
+       OCB_INPUT(14, %r12, %xmm1);
+       OCB_INPUT(15, %r13, %xmm0);
+#undef OCB_INPUT
+
+       vmovdqu %xmm14, (%rcx);
+       vmovdqu %xmm15, (%r8);
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %r10d;
+       cmovel %r10d, %r8d; /* max */
+
+       /* inpack16_pre: */
+       vmovq (key_table)(CTX), %xmm15;
+       vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+       vpxor %xmm0, %xmm15, %xmm0;
+       vpxor %xmm1, %xmm15, %xmm1;
+       vpxor %xmm2, %xmm15, %xmm2;
+       vpxor %xmm3, %xmm15, %xmm3;
+       vpxor %xmm4, %xmm15, %xmm4;
+       vpxor %xmm5, %xmm15, %xmm5;
+       vpxor %xmm6, %xmm15, %xmm6;
+       vpxor %xmm7, %xmm15, %xmm7;
+       vpxor %xmm8, %xmm15, %xmm8;
+       vpxor %xmm9, %xmm15, %xmm9;
+       vpxor %xmm10, %xmm15, %xmm10;
+       vpxor %xmm11, %xmm15, %xmm11;
+       vpxor %xmm12, %xmm15, %xmm12;
+       vpxor %xmm13, %xmm15, %xmm13;
+       vpxor 14 * 16(%rax), %xmm15, %xmm14;
+       vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+       call __camellia_enc_blk16;
+
+       vpxor 0 * 16(%rsi), %xmm7, %xmm7;
+       vpxor 1 * 16(%rsi), %xmm6, %xmm6;
+       vpxor 2 * 16(%rsi), %xmm5, %xmm5;
+       vpxor 3 * 16(%rsi), %xmm4, %xmm4;
+       vpxor 4 * 16(%rsi), %xmm3, %xmm3;
+       vpxor 5 * 16(%rsi), %xmm2, %xmm2;
+       vpxor 6 * 16(%rsi), %xmm1, %xmm1;
+       vpxor 7 * 16(%rsi), %xmm0, %xmm0;
+       vpxor 8 * 16(%rsi), %xmm15, %xmm15;
+       vpxor 9 * 16(%rsi), %xmm14, %xmm14;
+       vpxor 10 * 16(%rsi), %xmm13, %xmm13;
+       vpxor 11 * 16(%rsi), %xmm12, %xmm12;
+       vpxor 12 * 16(%rsi), %xmm11, %xmm11;
+       vpxor 13 * 16(%rsi), %xmm10, %xmm10;
+       vpxor 14 * 16(%rsi), %xmm9, %xmm9;
+       vpxor 15 * 16(%rsi), %xmm8, %xmm8;
+
+       write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+                    %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+                    %xmm8, %rsi);
+
+       vzeroall;
+
+       movq (16 * 16 + 0 * 8)(%rsp), %r10;
+       movq (16 * 16 + 1 * 8)(%rsp), %r11;
+       movq (16 * 16 + 2 * 8)(%rsp), %r12;
+       movq (16 * 16 + 3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ocb_dec
+ELF(.type   _gcry_camellia_aesni_avx_ocb_dec,@function;)
+
+_gcry_camellia_aesni_avx_ocb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       subq $(16 * 16 + 4 * 8), %rsp;
+       andq $~31, %rsp;
+       movq %rsp, %rax;
+
+       movq %r10, (16 * 16 + 0 * 8)(%rsp);
+       movq %r11, (16 * 16 + 1 * 8)(%rsp);
+       movq %r12, (16 * 16 + 2 * 8)(%rsp);
+       movq %r13, (16 * 16 + 3 * 8)(%rsp);
+       CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+       CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+       CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+       CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
+
+       vmovdqu (%rcx), %xmm15;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         vmovdqu (n * 16)(%rdx), xreg; \
+         vpxor (lreg), %xmm15, %xmm15; \
+         vpxor xreg, %xmm15, xreg; \
+         vmovdqu %xmm15, (n * 16)(%rsi);
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %xmm0);
+       vmovdqu %xmm0, (15 * 16)(%rax);
+       OCB_INPUT(1, %r11, %xmm14);
+       OCB_INPUT(2, %r12, %xmm13);
+       OCB_INPUT(3, %r13, %xmm12);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %xmm11);
+       OCB_INPUT(5, %r11, %xmm10);
+       OCB_INPUT(6, %r12, %xmm9);
+       OCB_INPUT(7, %r13, %xmm8);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(8, %r10, %xmm7);
+       OCB_INPUT(9, %r11, %xmm6);
+       OCB_INPUT(10, %r12, %xmm5);
+       OCB_INPUT(11, %r13, %xmm4);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(12, %r10, %xmm3);
+       OCB_INPUT(13, %r11, %xmm2);
+       OCB_INPUT(14, %r12, %xmm1);
+       OCB_INPUT(15, %r13, %xmm0);
+#undef OCB_INPUT
+
+       vmovdqu %xmm15, (%rcx);
+
+       movq %r8, %r10;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %r9d;
+       cmovel %r9d, %r8d; /* max */
+
+       /* inpack16_pre: */
+       vmovq (key_table)(CTX, %r8, 8), %xmm15;
+       vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+       vpxor %xmm0, %xmm15, %xmm0;
+       vpxor %xmm1, %xmm15, %xmm1;
+       vpxor %xmm2, %xmm15, %xmm2;
+       vpxor %xmm3, %xmm15, %xmm3;
+       vpxor %xmm4, %xmm15, %xmm4;
+       vpxor %xmm5, %xmm15, %xmm5;
+       vpxor %xmm6, %xmm15, %xmm6;
+       vpxor %xmm7, %xmm15, %xmm7;
+       vpxor %xmm8, %xmm15, %xmm8;
+       vpxor %xmm9, %xmm15, %xmm9;
+       vpxor %xmm10, %xmm15, %xmm10;
+       vpxor %xmm11, %xmm15, %xmm11;
+       vpxor %xmm12, %xmm15, %xmm12;
+       vpxor %xmm13, %xmm15, %xmm13;
+       vpxor %xmm14, %xmm15, %xmm14;
+       vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+       call __camellia_dec_blk16;
+
+       vpxor 0 * 16(%rsi), %xmm7, %xmm7;
+       vpxor 1 * 16(%rsi), %xmm6, %xmm6;
+       vpxor 2 * 16(%rsi), %xmm5, %xmm5;
+       vpxor 3 * 16(%rsi), %xmm4, %xmm4;
+       vpxor 4 * 16(%rsi), %xmm3, %xmm3;
+       vpxor 5 * 16(%rsi), %xmm2, %xmm2;
+       vpxor 6 * 16(%rsi), %xmm1, %xmm1;
+       vpxor 7 * 16(%rsi), %xmm0, %xmm0;
+       vmovdqu %xmm7, (7 * 16)(%rax);
+       vpxor 8 * 16(%rsi), %xmm15, %xmm15;
+       vpxor 9 * 16(%rsi), %xmm14, %xmm14;
+       vpxor 10 * 16(%rsi), %xmm13, %xmm13;
+       vpxor 11 * 16(%rsi), %xmm12, %xmm12;
+       vpxor 12 * 16(%rsi), %xmm11, %xmm11;
+       vpxor 13 * 16(%rsi), %xmm10, %xmm10;
+       vpxor 14 * 16(%rsi), %xmm9, %xmm9;
+       vpxor 15 * 16(%rsi), %xmm8, %xmm8;
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+
+       vpxor (%r10), %xmm7, %xmm7;
+       vpxor %xmm6, %xmm7, %xmm7;
+       vpxor %xmm5, %xmm7, %xmm7;
+       vpxor %xmm4, %xmm7, %xmm7;
+       vpxor %xmm3, %xmm7, %xmm7;
+       vpxor %xmm2, %xmm7, %xmm7;
+       vpxor %xmm1, %xmm7, %xmm7;
+       vpxor %xmm0, %xmm7, %xmm7;
+       vpxor %xmm15, %xmm7, %xmm7;
+       vpxor %xmm14, %xmm7, %xmm7;
+       vpxor %xmm13, %xmm7, %xmm7;
+       vpxor %xmm12, %xmm7, %xmm7;
+       vpxor %xmm11, %xmm7, %xmm7;
+       vpxor %xmm10, %xmm7, %xmm7;
+       vpxor %xmm9, %xmm7, %xmm7;
+       vpxor %xmm8, %xmm7, %xmm7;
+       vmovdqu %xmm7, (%r10);
+       vmovdqu (7 * 16)(%rax), %xmm7;
+
+       write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0,
+                    %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9,
+                    %xmm8, %rsi);
+
+       vzeroall;
+
+       movq (16 * 16 + 0 * 8)(%rsp), %r10;
+       movq (16 * 16 + 1 * 8)(%rsp), %r11;
+       movq (16 * 16 + 2 * 8)(%rsp), %r12;
+       movq (16 * 16 + 3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_ocb_auth
+ELF(.type   _gcry_camellia_aesni_avx_ocb_auth,@function;)
+
+_gcry_camellia_aesni_avx_ocb_auth:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: abuf (16 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       subq $(16 * 16 + 4 * 8), %rsp;
+       andq $~31, %rsp;
+       movq %rsp, %rax;
+
+       movq %r10, (16 * 16 + 0 * 8)(%rsp);
+       movq %r11, (16 * 16 + 1 * 8)(%rsp);
+       movq %r12, (16 * 16 + 2 * 8)(%rsp);
+       movq %r13, (16 * 16 + 3 * 8)(%rsp);
+       CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8);
+       CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8);
+       CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8);
+       CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8);
+
+       vmovdqu (%rdx), %xmm15;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         vmovdqu (n * 16)(%rsi), xreg; \
+         vpxor (lreg), %xmm15, %xmm15; \
+         vpxor xreg, %xmm15, xreg;
+
+       movq (0 * 8)(%r8), %r10;
+       movq (1 * 8)(%r8), %r11;
+       movq (2 * 8)(%r8), %r12;
+       movq (3 * 8)(%r8), %r13;
+       OCB_INPUT(0, %r10, %xmm0);
+       vmovdqu %xmm0, (15 * 16)(%rax);
+       OCB_INPUT(1, %r11, %xmm14);
+       OCB_INPUT(2, %r12, %xmm13);
+       OCB_INPUT(3, %r13, %xmm12);
+       movq (4 * 8)(%r8), %r10;
+       movq (5 * 8)(%r8), %r11;
+       movq (6 * 8)(%r8), %r12;
+       movq (7 * 8)(%r8), %r13;
+       OCB_INPUT(4, %r10, %xmm11);
+       OCB_INPUT(5, %r11, %xmm10);
+       OCB_INPUT(6, %r12, %xmm9);
+       OCB_INPUT(7, %r13, %xmm8);
+       movq (8 * 8)(%r8), %r10;
+       movq (9 * 8)(%r8), %r11;
+       movq (10 * 8)(%r8), %r12;
+       movq (11 * 8)(%r8), %r13;
+       OCB_INPUT(8, %r10, %xmm7);
+       OCB_INPUT(9, %r11, %xmm6);
+       OCB_INPUT(10, %r12, %xmm5);
+       OCB_INPUT(11, %r13, %xmm4);
+       movq (12 * 8)(%r8), %r10;
+       movq (13 * 8)(%r8), %r11;
+       movq (14 * 8)(%r8), %r12;
+       movq (15 * 8)(%r8), %r13;
+       OCB_INPUT(12, %r10, %xmm3);
+       OCB_INPUT(13, %r11, %xmm2);
+       OCB_INPUT(14, %r12, %xmm1);
+       OCB_INPUT(15, %r13, %xmm0);
+#undef OCB_INPUT
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %r10d;
+       cmovel %r10d, %r8d; /* max */
+
+       vmovdqu %xmm15, (%rdx);
+
+       movq %rcx, %r10;
+
+       /* inpack16_pre: */
+       vmovq (key_table)(CTX), %xmm15;
+       vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15;
+       vpxor %xmm0, %xmm15, %xmm0;
+       vpxor %xmm1, %xmm15, %xmm1;
+       vpxor %xmm2, %xmm15, %xmm2;
+       vpxor %xmm3, %xmm15, %xmm3;
+       vpxor %xmm4, %xmm15, %xmm4;
+       vpxor %xmm5, %xmm15, %xmm5;
+       vpxor %xmm6, %xmm15, %xmm6;
+       vpxor %xmm7, %xmm15, %xmm7;
+       vpxor %xmm8, %xmm15, %xmm8;
+       vpxor %xmm9, %xmm15, %xmm9;
+       vpxor %xmm10, %xmm15, %xmm10;
+       vpxor %xmm11, %xmm15, %xmm11;
+       vpxor %xmm12, %xmm15, %xmm12;
+       vpxor %xmm13, %xmm15, %xmm13;
+       vpxor %xmm14, %xmm15, %xmm14;
+       vpxor 15 * 16(%rax), %xmm15, %xmm15;
+
+       call __camellia_enc_blk16;
+
+       vpxor %xmm7, %xmm6, %xmm6;
+       vpxor %xmm5, %xmm4, %xmm4;
+       vpxor %xmm3, %xmm2, %xmm2;
+       vpxor %xmm1, %xmm0, %xmm0;
+       vpxor %xmm15, %xmm14, %xmm14;
+       vpxor %xmm13, %xmm12, %xmm12;
+       vpxor %xmm11, %xmm10, %xmm10;
+       vpxor %xmm9, %xmm8, %xmm8;
+
+       vpxor %xmm6, %xmm4, %xmm4;
+       vpxor %xmm2, %xmm0, %xmm0;
+       vpxor %xmm14, %xmm12, %xmm12;
+       vpxor %xmm10, %xmm8, %xmm8;
+
+       vpxor %xmm4, %xmm0, %xmm0;
+       vpxor %xmm12, %xmm8, %xmm8;
+
+       vpxor %xmm0, %xmm8, %xmm0;
+       vpxor (%r10), %xmm0, %xmm0;
+       vmovdqu %xmm0, (%r10);
+
+       vzeroall;
+
+       movq (16 * 16 + 0 * 8)(%rsp), %r10;
+       movq (16 * 16 + 1 * 8)(%rsp), %r11;
+       movq (16 * 16 + 2 * 8)(%rsp), %r12;
+       movq (16 * 16 + 3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size 
_gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;)
+
+/*
+ * IN:
+ *  ab: 64-bit AB state
+ *  cd: 64-bit CD state
+ */
+#define camellia_f(ab, x, t0, t1, t2, t3, t4, inv_shift_row, sbox4mask, \
+                  _0f0f0f0fmask, pre_s1lo_mask, pre_s1hi_mask, key) \
+       vmovq key, t0; \
+       vpxor x, x, t3; \
+       \
+       vpxor ab, t0, x; \
+       \
+       /* \
+        * S-function with AES subbytes \
+        */ \
+       \
+       /* input rotation for sbox4 (<<< 1) */ \
+       vpand x, sbox4mask, t0; \
+       vpandn x, sbox4mask, x; \
+       vpaddw t0, t0, t1; \
+       vpsrlw $7, t0, t0; \
+       vpor t0, t1, t0; \
+       vpand sbox4mask, t0, t0; \
+       vpor t0, x, x; \
+       \
+       vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \
+       vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \
+       \
+       /* prefilter sboxes */ \
+       filter_8bit(x, pre_s1lo_mask, pre_s1hi_mask, _0f0f0f0fmask, t2); \
+       \
+       /* AES subbytes + AES shift rows + AES inv shift rows */ \
+       vaesenclast t3, x, x; \
+       \
+       /* postfilter sboxes */ \
+       filter_8bit(x, t0, t1, _0f0f0f0fmask, t2); \
+       \
+       /* output rotation for sbox2 (<<< 1) */ \
+       /* output rotation for sbox3 (>>> 1) */ \
+       vpshufb inv_shift_row, x, t1; \
+       vpshufb .Lsp0044440444044404mask rRIP, x, t4; \
+       vpshufb .Lsp1110111010011110mask rRIP, x, x; \
+       vpaddb t1, t1, t2; \
+       vpsrlw $7, t1, t0; \
+       vpsllw $7, t1, t3; \
+       vpor t0, t2, t0; \
+       vpsrlw $1, t1, t1; \
+       vpshufb .Lsp0222022222000222mask rRIP, t0, t0; \
+       vpor t1, t3, t1; \
+       \
+       vpxor x, t4, t4; \
+       vpshufb .Lsp3033303303303033mask rRIP, t1, t1; \
+       vpxor t4, t0, t0; \
+       vpxor t1, t0, t0; \
+       vpsrldq $8, t0, x; \
+       vpxor t0, x, x;
+
+#define vec_rol128(in, out, nrol, t0) \
+       vpshufd $0x4e, in, out; \
+       vpsllq $(nrol), in, t0; \
+       vpsrlq $(64-(nrol)), out, out; \
+       vpaddd t0, out, out;
+
+#define vec_ror128(in, out, nror, t0) \
+       vpshufd $0x4e, in, out; \
+       vpsrlq $(nror), in, t0; \
+       vpsllq $(64-(nror)), out, out; \
+       vpaddd t0, out, out;
+
+
+.align 16
+.Linv_shift_row_and_unpcklbw:
+       .byte 0x00, 0xff, 0x0d, 0xff, 0x0a, 0xff, 0x07, 0xff
+       .byte 0x04, 0xff, 0x01, 0xff, 0x0e, 0xff, 0x0b, 0xff
+.Lsp0044440444044404mask:
+       .long 0xffff0404, 0x0404ff04;
+       .long 0x0d0dff0d, 0x0d0dff0d;
+.Lsp1110111010011110mask:
+       .long 0x000000ff, 0x000000ff;
+       .long 0x0bffff0b, 0x0b0b0bff;
+.Lsp0222022222000222mask:
+       .long 0xff060606, 0xff060606;
+       .long 0x0c0cffff, 0xff0c0c0c;
+.Lsp3033303303303033mask:
+       .long 0x04ff0404, 0x04ff0404;
+       .long 0xff0a0aff, 0x0aff0a0a;
+.Lsbox4_input_mask:
+       .byte 0x00, 0xff, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00;
+.Lsigma1:
+       .long 0x3BCC908B, 0xA09E667F;
+.Lsigma2:
+       .long 0x4CAA73B2, 0xB67AE858;
+.Lsigma3:
+       .long 0xE94F82BE, 0xC6EF372F;
+.Lsigma4:
+       .long 0xF1D36F1C, 0x54FF53A5;
+.Lsigma5:
+       .long 0xDE682D1D, 0x10E527FA;
+.Lsigma6:
+       .long 0xB3E6C1FD, 0xB05688C2;
+
+
+.align 8
+ELF(.type  __camellia_avx_setup128,@function;)
+__camellia_avx_setup128:
+       /* input:
+        *      %rdi: ctx, CTX; subkey storage at key_table(CTX)
+        *      %xmm0: key
+        */
+       CFI_STARTPROC();
+
+#define cmll_sub(n, ctx) (key_table+((n)*8))(ctx)
+#define KL128 %xmm0
+#define KA128 %xmm2
+
+       vpshufb .Lbswap128_mask rRIP, KL128, KL128;
+
+       vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11;
+       vmovq .Lsbox4_input_mask rRIP, %xmm12;
+       vbroadcastss .L0f0f0f0f rRIP, %xmm13;
+       vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14;
+       vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15;
+
+       /*
+        * Generate KA
+        */
+       vpsrldq $8, KL128, %xmm2;
+       vmovdqa KL128, %xmm3;
+       vpslldq $8, %xmm3, %xmm3;
+       vpsrldq $8, %xmm3, %xmm3;
+
+       camellia_f(%xmm2, %xmm4, %xmm1,
+                  %xmm5, %xmm6, %xmm7, %xmm8,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP);
+       vpxor %xmm4, %xmm3, %xmm3;
+       camellia_f(%xmm3, %xmm2, %xmm1,
+                  %xmm5, %xmm6, %xmm7, %xmm8,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP);
+       camellia_f(%xmm2, %xmm3, %xmm1,
+                  %xmm5, %xmm6, %xmm7, %xmm8,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP);
+       vpxor %xmm4, %xmm3, %xmm3;
+       camellia_f(%xmm3, %xmm4, %xmm1,
+                  %xmm5, %xmm6, %xmm7, %xmm8,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP);
+
+       vpslldq $8, %xmm3, %xmm3;
+       vpxor %xmm4, %xmm2, %xmm2;
+       vpsrldq $8, %xmm3, %xmm3;
+       vpslldq $8, %xmm2, KA128;
+       vpor %xmm3, KA128, KA128;
+
+        /*
+         * Generate subkeys
+         */
+       vmovdqu KA128, cmll_sub(24, CTX);
+       vec_rol128(KL128, %xmm3, 15, %xmm15);
+       vec_rol128(KA128, %xmm4, 15, %xmm15);
+       vec_rol128(KA128, %xmm5, 30, %xmm15);
+       vec_rol128(KL128, %xmm6, 45, %xmm15);
+       vec_rol128(KA128, %xmm7, 45, %xmm15);
+       vec_rol128(KL128, %xmm8, 60, %xmm15);
+       vec_rol128(KA128, %xmm9, 60, %xmm15);
+       vec_ror128(KL128, %xmm10, 128-77, %xmm15);
+
+       /* absorb kw2 to other subkeys */
+       vpslldq $8, KL128, %xmm15;
+       vpsrldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, KA128, KA128;
+       vpxor %xmm15, %xmm3, %xmm3;
+       vpxor %xmm15, %xmm4, %xmm4;
+
+       /* subl(1) ^= subr(1) & ~subr(9); */
+       vpandn %xmm15, %xmm5, %xmm13;
+       vpslldq $12, %xmm13, %xmm13;
+       vpsrldq $8, %xmm13, %xmm13;
+       vpxor %xmm13, %xmm15, %xmm15;
+       /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm5, %xmm14;
+       vpslld $1, %xmm14, %xmm11;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm11, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpxor %xmm15, %xmm6, %xmm6;
+       vpxor %xmm15, %xmm8, %xmm8;
+       vpxor %xmm15, %xmm9, %xmm9;
+
+       /* subl(1) ^= subr(1) & ~subr(17); */
+       vpandn %xmm15, %xmm10, %xmm13;
+       vpslldq $12, %xmm13, %xmm13;
+       vpsrldq $8, %xmm13, %xmm13;
+       vpxor %xmm13, %xmm15, %xmm15;
+       /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm10, %xmm14;
+       vpslld $1, %xmm14, %xmm11;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm11, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpshufd $0x1b, KL128, KL128;
+       vpshufd $0x1b, KA128, KA128;
+       vpshufd $0x1b, %xmm3, %xmm3;
+       vpshufd $0x1b, %xmm4, %xmm4;
+       vpshufd $0x1b, %xmm5, %xmm5;
+       vpshufd $0x1b, %xmm6, %xmm6;
+       vpshufd $0x1b, %xmm7, %xmm7;
+       vpshufd $0x1b, %xmm8, %xmm8;
+       vpshufd $0x1b, %xmm9, %xmm9;
+       vpshufd $0x1b, %xmm10, %xmm10;
+
+       vmovdqu KL128, cmll_sub(0, CTX);
+       vpshufd $0x1b, KL128, KL128;
+       vmovdqu KA128, cmll_sub(2, CTX);
+       vmovdqu %xmm3, cmll_sub(4, CTX);
+       vmovdqu %xmm4, cmll_sub(6, CTX);
+       vmovdqu %xmm5, cmll_sub(8, CTX);
+       vmovdqu %xmm6, cmll_sub(10, CTX);
+       vpsrldq $8, %xmm8, %xmm8;
+       vmovq %xmm7, cmll_sub(12, CTX);
+       vmovq %xmm8, cmll_sub(13, CTX);
+       vmovdqu %xmm9, cmll_sub(14, CTX);
+       vmovdqu %xmm10, cmll_sub(16, CTX);
+
+       vmovdqu cmll_sub(24, CTX), KA128;
+
+       vec_ror128(KL128, %xmm3, 128 - 94, %xmm7);
+       vec_ror128(KA128, %xmm4, 128 - 94, %xmm7);
+       vec_ror128(KL128, %xmm5, 128 - 111, %xmm7);
+       vec_ror128(KA128, %xmm6, 128 - 111, %xmm7);
+
+       vpxor %xmm15, %xmm3, %xmm3;
+       vpxor %xmm15, %xmm4, %xmm4;
+       vpxor %xmm15, %xmm5, %xmm5;
+       vpslldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm6, %xmm6;
+
+       /* absorb kw4 to other subkeys */
+       vpslldq $8, %xmm6, %xmm15;
+       vpxor %xmm15, %xmm5, %xmm5;
+       vpxor %xmm15, %xmm4, %xmm4;
+       vpxor %xmm15, %xmm3, %xmm3;
+
+       /* subl(25) ^= subr(25) & ~subr(16); */
+       vpshufd $0x1b, cmll_sub(16, CTX), %xmm10;
+       vpandn %xmm15, %xmm10, %xmm13;
+       vpslldq $4, %xmm13, %xmm13;
+       vpxor %xmm13, %xmm15, %xmm15;
+       /* dw = subl(25) & subl(16), subr(25) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm10, %xmm14;
+       vpslld $1, %xmm14, %xmm11;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm11, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpshufd $0x1b, %xmm3, %xmm3;
+       vpshufd $0x1b, %xmm4, %xmm4;
+       vpshufd $0x1b, %xmm5, %xmm5;
+       vpshufd $0x1b, %xmm6, %xmm6;
+
+       vmovdqu %xmm3, cmll_sub(18, CTX);
+       vmovdqu %xmm4, cmll_sub(20, CTX);
+       vmovdqu %xmm5, cmll_sub(22, CTX);
+       vmovdqu %xmm6, cmll_sub(24, CTX);
+
+       vpshufd $0x1b, cmll_sub(14, CTX), %xmm3;
+       vpshufd $0x1b, cmll_sub(12, CTX), %xmm4;
+       vpshufd $0x1b, cmll_sub(10, CTX), %xmm5;
+       vpshufd $0x1b, cmll_sub(8, CTX), %xmm6;
+
+       vpxor %xmm15, %xmm3, %xmm3;
+       vpxor %xmm15, %xmm4, %xmm4;
+       vpxor %xmm15, %xmm5, %xmm5;
+
+       /* subl(25) ^= subr(25) & ~subr(8); */
+       vpandn %xmm15, %xmm6, %xmm13;
+       vpslldq $4, %xmm13, %xmm13;
+       vpxor %xmm13, %xmm15, %xmm15;
+       /* dw = subl(25) & subl(8), subr(25) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm6, %xmm14;
+       vpslld $1, %xmm14, %xmm11;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm11, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpshufd $0x1b, %xmm3, %xmm3;
+       vpshufd $0x1b, %xmm4, %xmm4;
+       vpshufd $0x1b, %xmm5, %xmm5;
+
+       vmovdqu %xmm3, cmll_sub(14, CTX);
+       vmovdqu %xmm4, cmll_sub(12, CTX);
+       vmovdqu %xmm5, cmll_sub(10, CTX);
+
+       vpshufd $0x1b, cmll_sub(6, CTX), %xmm6;
+       vpshufd $0x1b, cmll_sub(4, CTX), %xmm4;
+       vpshufd $0x1b, cmll_sub(2, CTX), %xmm2;
+       vpshufd $0x1b, cmll_sub(0, CTX), %xmm0;
+
+       vpxor %xmm15, %xmm6, %xmm6;
+       vpxor %xmm15, %xmm4, %xmm4;
+       vpxor %xmm15, %xmm2, %xmm2;
+       vpxor %xmm15, %xmm0, %xmm0;
+
+       vpshufd $0x1b, %xmm6, %xmm6;
+       vpshufd $0x1b, %xmm4, %xmm4;
+       vpshufd $0x1b, %xmm2, %xmm2;
+       vpshufd $0x1b, %xmm0, %xmm0;
+
+       vpsrldq $8, %xmm2, %xmm3;
+       vpsrldq $8, %xmm4, %xmm5;
+       vpsrldq $8, %xmm6, %xmm7;
+
+        /*
+        * key XOR is end of F-function.
+        */
+       vpxor %xmm2, %xmm0, %xmm0;
+       vpxor %xmm4, %xmm2, %xmm2;
+
+       vmovq %xmm0, cmll_sub(0, CTX);
+       vmovq %xmm3, cmll_sub(2, CTX);
+       vpxor %xmm5, %xmm3, %xmm3;
+       vpxor %xmm6, %xmm4, %xmm4;
+       vpxor %xmm7, %xmm5, %xmm5;
+       vmovq %xmm2, cmll_sub(3, CTX);
+       vmovq %xmm3, cmll_sub(4, CTX);
+       vmovq %xmm4, cmll_sub(5, CTX);
+       vmovq %xmm5, cmll_sub(6, CTX);
+
+       vmovq cmll_sub(7, CTX), %xmm7;
+       vmovq cmll_sub(8, CTX), %xmm8;
+       vmovq cmll_sub(9, CTX), %xmm9;
+       vmovq cmll_sub(10, CTX), %xmm10;
+       /* tl = subl(10) ^ (subr(10) & ~subr(8)); */
+       vpandn %xmm10, %xmm8, %xmm15;
+       vpsrldq $4, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm10, %xmm0;
+       /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm8, %xmm0, %xmm15;
+       vpslld $1, %xmm15, %xmm14;
+       vpsrld $31, %xmm15, %xmm15;
+       vpaddd %xmm14, %xmm15, %xmm15;
+       vpslldq $12, %xmm15, %xmm15;
+       vpsrldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm0, %xmm0;
+
+       vpxor %xmm0, %xmm6, %xmm6;
+       vmovq %xmm6, cmll_sub(7, CTX);
+
+       vmovq cmll_sub(11, CTX), %xmm11;
+       vmovq cmll_sub(12, CTX), %xmm12;
+       vmovq cmll_sub(13, CTX), %xmm13;
+       vmovq cmll_sub(14, CTX), %xmm14;
+       vmovq cmll_sub(15, CTX), %xmm15;
+       /* tl = subl(7) ^ (subr(7) & ~subr(9)); */
+       vpandn %xmm7, %xmm9, %xmm1;
+       vpsrldq $4, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm7, %xmm0;
+       /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm9, %xmm0, %xmm1;
+       vpslld $1, %xmm1, %xmm2;
+       vpsrld $31, %xmm1, %xmm1;
+       vpaddd %xmm2, %xmm1, %xmm1;
+       vpslldq $12, %xmm1, %xmm1;
+       vpsrldq $8, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm0, %xmm0;
+
+       vpxor %xmm11, %xmm0, %xmm0;
+       vpxor %xmm12, %xmm10, %xmm10;
+       vpxor %xmm13, %xmm11, %xmm11;
+       vpxor %xmm14, %xmm12, %xmm12;
+       vpxor %xmm15, %xmm13, %xmm13;
+       vmovq %xmm0, cmll_sub(10, CTX);
+       vmovq %xmm10, cmll_sub(11, CTX);
+       vmovq %xmm11, cmll_sub(12, CTX);
+       vmovq %xmm12, cmll_sub(13, CTX);
+       vmovq %xmm13, cmll_sub(14, CTX);
+
+       vmovq cmll_sub(16, CTX), %xmm6;
+       vmovq cmll_sub(17, CTX), %xmm7;
+       vmovq cmll_sub(18, CTX), %xmm8;
+       vmovq cmll_sub(19, CTX), %xmm9;
+       vmovq cmll_sub(20, CTX), %xmm10;
+       /* tl = subl(18) ^ (subr(18) & ~subr(16)); */
+       vpandn %xmm8, %xmm6, %xmm1;
+       vpsrldq $4, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm8, %xmm0;
+       /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm6, %xmm0, %xmm1;
+       vpslld $1, %xmm1, %xmm2;
+       vpsrld $31, %xmm1, %xmm1;
+       vpaddd %xmm2, %xmm1, %xmm1;
+       vpslldq $12, %xmm1, %xmm1;
+       vpsrldq $8, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm0, %xmm0;
+
+       vpxor %xmm14, %xmm0, %xmm0;
+       vmovq %xmm0, cmll_sub(15, CTX);
+
+       /* tl = subl(15) ^ (subr(15) & ~subr(17)); */
+       vpandn %xmm15, %xmm7, %xmm1;
+       vpsrldq $4, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm15, %xmm0;
+       /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm7, %xmm0, %xmm1;
+       vpslld $1, %xmm1, %xmm2;
+       vpsrld $31, %xmm1, %xmm1;
+       vpaddd %xmm2, %xmm1, %xmm1;
+       vpslldq $12, %xmm1, %xmm1;
+       vpsrldq $8, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm0, %xmm0;
+
+       vmovq cmll_sub(21, CTX), %xmm1;
+       vmovq cmll_sub(22, CTX), %xmm2;
+       vmovq cmll_sub(23, CTX), %xmm3;
+       vmovq cmll_sub(24, CTX), %xmm4;
+
+       vpxor %xmm9, %xmm0, %xmm0;
+       vpxor %xmm10, %xmm8, %xmm8;
+       vpxor %xmm1, %xmm9, %xmm9;
+       vpxor %xmm2, %xmm10, %xmm10;
+       vpxor %xmm3, %xmm1, %xmm1;
+       vpxor %xmm4, %xmm3, %xmm3;
+
+       vmovq %xmm0, cmll_sub(18, CTX);
+       vmovq %xmm8, cmll_sub(19, CTX);
+       vmovq %xmm9, cmll_sub(20, CTX);
+       vmovq %xmm10, cmll_sub(21, CTX);
+       vmovq %xmm1, cmll_sub(22, CTX);
+       vmovq %xmm2, cmll_sub(23, CTX);
+       vmovq %xmm3, cmll_sub(24, CTX);
+
+       /* kw2 and kw4 are unused now. */
+       movq $0, cmll_sub(1, CTX);
+       movq $0, cmll_sub(25, CTX);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;)
+
+.align 8
+ELF(.type  __camellia_avx_setup256,@function;)
+
+__camellia_avx_setup256:
+       /* input:
+        *      %rdi: ctx, CTX; subkey storage at key_table(CTX)
+        *      %xmm0 & %xmm1: key
+        */
+       CFI_STARTPROC();
+
+#define KL128 %xmm0
+#define KR128 %xmm1
+#define KA128 %xmm2
+#define KB128 %xmm3
+
+       vpshufb .Lbswap128_mask rRIP, KL128, KL128;
+       vpshufb .Lbswap128_mask rRIP, KR128, KR128;
+
+       vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11;
+       vmovq .Lsbox4_input_mask rRIP, %xmm12;
+       vbroadcastss .L0f0f0f0f rRIP, %xmm13;
+       vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14;
+       vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15;
+
+       /*
+        * Generate KA
+        */
+       vpxor KL128, KR128, %xmm3;
+       vpsrldq $8, KR128, %xmm6;
+       vpsrldq $8, %xmm3, %xmm2;
+       vpslldq $8, %xmm3, %xmm3;
+       vpsrldq $8, %xmm3, %xmm3;
+
+       camellia_f(%xmm2, %xmm4, %xmm5,
+                  %xmm7, %xmm8, %xmm9, %xmm10,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP);
+       vpxor %xmm4, %xmm3, %xmm3;
+       camellia_f(%xmm3, %xmm2, %xmm5,
+                  %xmm7, %xmm8, %xmm9, %xmm10,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP);
+       vpxor %xmm6, %xmm2, %xmm2;
+       camellia_f(%xmm2, %xmm3, %xmm5,
+                  %xmm7, %xmm8, %xmm9, %xmm10,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP);
+       vpxor %xmm4, %xmm3, %xmm3;
+       vpxor KR128, %xmm3, %xmm3;
+       camellia_f(%xmm3, %xmm4, %xmm5,
+                  %xmm7, %xmm8, %xmm9, %xmm10,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP);
+
+       vpslldq $8, %xmm3, %xmm3;
+       vpxor %xmm4, %xmm2, %xmm2;
+       vpsrldq $8, %xmm3, %xmm3;
+       vpslldq $8, %xmm2, KA128;
+       vpor %xmm3, KA128, KA128;
+
+       /*
+        * Generate KB
+        */
+       vpxor KA128, KR128, %xmm3;
+       vpsrldq $8, %xmm3, %xmm4;
+       vpslldq $8, %xmm3, %xmm3;
+       vpsrldq $8, %xmm3, %xmm3;
+
+       camellia_f(%xmm4, %xmm5, %xmm6,
+                  %xmm7, %xmm8, %xmm9, %xmm10,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 rRIP);
+       vpxor %xmm5, %xmm3, %xmm3;
+
+       camellia_f(%xmm3, %xmm5, %xmm6,
+                  %xmm7, %xmm8, %xmm9, %xmm10,
+                  %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 rRIP);
+       vpslldq $8, %xmm3, %xmm3;
+       vpxor %xmm5, %xmm4, %xmm4;
+       vpsrldq $8, %xmm3, %xmm3;
+       vpslldq $8, %xmm4, %xmm4;
+       vpor %xmm3, %xmm4, KB128;
+
+        /*
+         * Generate subkeys
+         */
+       vmovdqu KB128, cmll_sub(32, CTX);
+       vec_rol128(KR128, %xmm4, 15, %xmm15);
+       vec_rol128(KA128, %xmm5, 15, %xmm15);
+       vec_rol128(KR128, %xmm6, 30, %xmm15);
+       vec_rol128(KB128, %xmm7, 30, %xmm15);
+       vec_rol128(KL128, %xmm8, 45, %xmm15);
+       vec_rol128(KA128, %xmm9, 45, %xmm15);
+       vec_rol128(KL128, %xmm10, 60, %xmm15);
+       vec_rol128(KR128, %xmm11, 60, %xmm15);
+       vec_rol128(KB128, %xmm12, 60, %xmm15);
+
+       /* absorb kw2 to other subkeys */
+       vpslldq $8, KL128, %xmm15;
+       vpsrldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, KB128, KB128;
+       vpxor %xmm15, %xmm4, %xmm4;
+       vpxor %xmm15, %xmm5, %xmm5;
+
+       /* subl(1) ^= subr(1) & ~subr(9); */
+       vpandn %xmm15, %xmm6, %xmm13;
+       vpslldq $12, %xmm13, %xmm13;
+       vpsrldq $8, %xmm13, %xmm13;
+       vpxor %xmm13, %xmm15, %xmm15;
+       /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm6, %xmm14;
+       vpslld $1, %xmm14, %xmm13;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm13, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpxor %xmm15, %xmm7, %xmm7;
+       vpxor %xmm15, %xmm8, %xmm8;
+       vpxor %xmm15, %xmm9, %xmm9;
+
+       vpshufd $0x1b, KL128, KL128;
+       vpshufd $0x1b, KB128, KB128;
+       vpshufd $0x1b, %xmm4, %xmm4;
+       vpshufd $0x1b, %xmm5, %xmm5;
+       vpshufd $0x1b, %xmm6, %xmm6;
+       vpshufd $0x1b, %xmm7, %xmm7;
+       vpshufd $0x1b, %xmm8, %xmm8;
+       vpshufd $0x1b, %xmm9, %xmm9;
+
+       vmovdqu KL128, cmll_sub(0, CTX);
+       vpshufd $0x1b, KL128, KL128;
+       vmovdqu KB128, cmll_sub(2, CTX);
+       vmovdqu %xmm4, cmll_sub(4, CTX);
+       vmovdqu %xmm5, cmll_sub(6, CTX);
+       vmovdqu %xmm6, cmll_sub(8, CTX);
+       vmovdqu %xmm7, cmll_sub(10, CTX);
+       vmovdqu %xmm8, cmll_sub(12, CTX);
+       vmovdqu %xmm9, cmll_sub(14, CTX);
+
+       vmovdqu cmll_sub(32, CTX), KB128;
+
+       /* subl(1) ^= subr(1) & ~subr(17); */
+       vpandn %xmm15, %xmm10, %xmm13;
+       vpslldq $12, %xmm13, %xmm13;
+       vpsrldq $8, %xmm13, %xmm13;
+       vpxor %xmm13, %xmm15, %xmm15;
+       /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm10, %xmm14;
+       vpslld $1, %xmm14, %xmm13;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm13, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpxor %xmm15, %xmm11, %xmm11;
+       vpxor %xmm15, %xmm12, %xmm12;
+
+       vec_ror128(KL128, %xmm4, 128-77, %xmm14);
+       vec_ror128(KA128, %xmm5, 128-77, %xmm14);
+       vec_ror128(KR128, %xmm6, 128-94, %xmm14);
+       vec_ror128(KA128, %xmm7, 128-94, %xmm14);
+       vec_ror128(KL128, %xmm8, 128-111, %xmm14);
+       vec_ror128(KB128, %xmm9, 128-111, %xmm14);
+
+       vpxor %xmm15, %xmm4, %xmm4;
+
+       vpshufd $0x1b, %xmm10, %xmm10;
+       vpshufd $0x1b, %xmm11, %xmm11;
+       vpshufd $0x1b, %xmm12, %xmm12;
+       vpshufd $0x1b, %xmm4, %xmm4;
+
+       vmovdqu %xmm10, cmll_sub(16, CTX);
+       vmovdqu %xmm11, cmll_sub(18, CTX);
+       vmovdqu %xmm12, cmll_sub(20, CTX);
+       vmovdqu %xmm4, cmll_sub(22, CTX);
+
+       /* subl(1) ^= subr(1) & ~subr(25); */
+       vpandn %xmm15, %xmm5, %xmm13;
+       vpslldq $12, %xmm13, %xmm13;
+       vpsrldq $8, %xmm13, %xmm13;
+       vpxor %xmm13, %xmm15, %xmm15;
+       /* dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm5, %xmm14;
+       vpslld $1, %xmm14, %xmm13;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm13, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpxor %xmm15, %xmm6, %xmm6;
+       vpxor %xmm15, %xmm7, %xmm7;
+       vpxor %xmm15, %xmm8, %xmm8;
+       vpslldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm9, %xmm9;
+
+       /* absorb kw4 to other subkeys */
+       vpslldq $8, %xmm9, %xmm15;
+       vpxor %xmm15, %xmm8, %xmm8;
+       vpxor %xmm15, %xmm7, %xmm7;
+       vpxor %xmm15, %xmm6, %xmm6;
+
+       /* subl(33) ^= subr(33) & ~subr(24); */
+       vpandn %xmm15, %xmm5, %xmm14;
+       vpslldq $4, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+       /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm5, %xmm14;
+       vpslld $1, %xmm14, %xmm13;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm13, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpshufd $0x1b, %xmm5, %xmm5;
+       vpshufd $0x1b, %xmm6, %xmm6;
+       vpshufd $0x1b, %xmm7, %xmm7;
+       vpshufd $0x1b, %xmm8, %xmm8;
+       vpshufd $0x1b, %xmm9, %xmm9;
+
+       vmovdqu %xmm5, cmll_sub(24, CTX);
+       vmovdqu %xmm6, cmll_sub(26, CTX);
+       vmovdqu %xmm7, cmll_sub(28, CTX);
+       vmovdqu %xmm8, cmll_sub(30, CTX);
+       vmovdqu %xmm9, cmll_sub(32, CTX);
+
+       vpshufd $0x1b, cmll_sub(22, CTX), %xmm0;
+       vpshufd $0x1b, cmll_sub(20, CTX), %xmm1;
+       vpshufd $0x1b, cmll_sub(18, CTX), %xmm2;
+       vpshufd $0x1b, cmll_sub(16, CTX), %xmm3;
+       vpshufd $0x1b, cmll_sub(14, CTX), %xmm4;
+       vpshufd $0x1b, cmll_sub(12, CTX), %xmm5;
+       vpshufd $0x1b, cmll_sub(10, CTX), %xmm6;
+       vpshufd $0x1b, cmll_sub(8, CTX), %xmm7;
+
+       vpxor %xmm15, %xmm0, %xmm0;
+       vpxor %xmm15, %xmm1, %xmm1;
+       vpxor %xmm15, %xmm2, %xmm2;
+
+       /* subl(33) ^= subr(33) & ~subr(24); */
+       vpandn %xmm15, %xmm3, %xmm14;
+       vpslldq $4, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+       /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm3, %xmm14;
+       vpslld $1, %xmm14, %xmm13;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm13, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpxor %xmm15, %xmm4, %xmm4;
+       vpxor %xmm15, %xmm5, %xmm5;
+       vpxor %xmm15, %xmm6, %xmm6;
+
+       vpshufd $0x1b, %xmm0, %xmm0;
+       vpshufd $0x1b, %xmm1, %xmm1;
+       vpshufd $0x1b, %xmm2, %xmm2;
+       vpshufd $0x1b, %xmm4, %xmm4;
+       vpshufd $0x1b, %xmm5, %xmm5;
+       vpshufd $0x1b, %xmm6, %xmm6;
+
+       vmovdqu %xmm0, cmll_sub(22, CTX);
+       vmovdqu %xmm1, cmll_sub(20, CTX);
+       vmovdqu %xmm2, cmll_sub(18, CTX);
+       vmovdqu %xmm4, cmll_sub(14, CTX);
+       vmovdqu %xmm5, cmll_sub(12, CTX);
+       vmovdqu %xmm6, cmll_sub(10, CTX);
+
+       vpshufd $0x1b, cmll_sub(6, CTX), %xmm6;
+       vpshufd $0x1b, cmll_sub(4, CTX), %xmm4;
+       vpshufd $0x1b, cmll_sub(2, CTX), %xmm2;
+       vpshufd $0x1b, cmll_sub(0, CTX), %xmm0;
+
+       /* subl(33) ^= subr(33) & ~subr(24); */
+       vpandn %xmm15, %xmm7, %xmm14;
+       vpslldq $4, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+       /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */
+       vpand %xmm15, %xmm7, %xmm14;
+       vpslld $1, %xmm14, %xmm13;
+       vpsrld $31, %xmm14, %xmm14;
+       vpaddd %xmm13, %xmm14, %xmm14;
+       vpsrldq $12, %xmm14, %xmm14;
+       vpslldq $8, %xmm14, %xmm14;
+       vpxor %xmm14, %xmm15, %xmm15;
+
+       vpxor %xmm15, %xmm6, %xmm6;
+       vpxor %xmm15, %xmm4, %xmm4;
+       vpxor %xmm15, %xmm2, %xmm2;
+       vpxor %xmm15, %xmm0, %xmm0;
+
+       vpshufd $0x1b, %xmm6, %xmm6;
+       vpshufd $0x1b, %xmm4, %xmm4;
+       vpshufd $0x1b, %xmm2, %xmm2;
+       vpshufd $0x1b, %xmm0, %xmm0;
+
+       vpsrldq $8, %xmm2, %xmm3;
+       vpsrldq $8, %xmm4, %xmm5;
+       vpsrldq $8, %xmm6, %xmm7;
+
+        /*
+        * key XOR is end of F-function.
+        */
+       vpxor %xmm2, %xmm0, %xmm0;
+       vpxor %xmm4, %xmm2, %xmm2;
+
+       vmovq %xmm0, cmll_sub(0, CTX);
+       vmovq %xmm3, cmll_sub(2, CTX);
+       vpxor %xmm5, %xmm3, %xmm3;
+       vpxor %xmm6, %xmm4, %xmm4;
+       vpxor %xmm7, %xmm5, %xmm5;
+       vmovq %xmm2, cmll_sub(3, CTX);
+       vmovq %xmm3, cmll_sub(4, CTX);
+       vmovq %xmm4, cmll_sub(5, CTX);
+       vmovq %xmm5, cmll_sub(6, CTX);
+
+       vmovq cmll_sub(7, CTX), %xmm7;
+       vmovq cmll_sub(8, CTX), %xmm8;
+       vmovq cmll_sub(9, CTX), %xmm9;
+       vmovq cmll_sub(10, CTX), %xmm10;
+       /* tl = subl(10) ^ (subr(10) & ~subr(8)); */
+       vpandn %xmm10, %xmm8, %xmm15;
+       vpsrldq $4, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm10, %xmm0;
+       /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm8, %xmm0, %xmm15;
+       vpslld $1, %xmm15, %xmm14;
+       vpsrld $31, %xmm15, %xmm15;
+       vpaddd %xmm14, %xmm15, %xmm15;
+       vpslldq $12, %xmm15, %xmm15;
+       vpsrldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm0, %xmm0;
+
+       vpxor %xmm0, %xmm6, %xmm6;
+       vmovq %xmm6, cmll_sub(7, CTX);
+
+       vmovq cmll_sub(11, CTX), %xmm11;
+       vmovq cmll_sub(12, CTX), %xmm12;
+       vmovq cmll_sub(13, CTX), %xmm13;
+       vmovq cmll_sub(14, CTX), %xmm14;
+       vmovq cmll_sub(15, CTX), %xmm15;
+       /* tl = subl(7) ^ (subr(7) & ~subr(9)); */
+       vpandn %xmm7, %xmm9, %xmm1;
+       vpsrldq $4, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm7, %xmm0;
+       /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm9, %xmm0, %xmm1;
+       vpslld $1, %xmm1, %xmm2;
+       vpsrld $31, %xmm1, %xmm1;
+       vpaddd %xmm2, %xmm1, %xmm1;
+       vpslldq $12, %xmm1, %xmm1;
+       vpsrldq $8, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm0, %xmm0;
+
+       vpxor %xmm11, %xmm0, %xmm0;
+       vpxor %xmm12, %xmm10, %xmm10;
+       vpxor %xmm13, %xmm11, %xmm11;
+       vpxor %xmm14, %xmm12, %xmm12;
+       vpxor %xmm15, %xmm13, %xmm13;
+       vmovq %xmm0, cmll_sub(10, CTX);
+       vmovq %xmm10, cmll_sub(11, CTX);
+       vmovq %xmm11, cmll_sub(12, CTX);
+       vmovq %xmm12, cmll_sub(13, CTX);
+       vmovq %xmm13, cmll_sub(14, CTX);
+
+       vmovq cmll_sub(16, CTX), %xmm6;
+       vmovq cmll_sub(17, CTX), %xmm7;
+       vmovq cmll_sub(18, CTX), %xmm8;
+       vmovq cmll_sub(19, CTX), %xmm9;
+       vmovq cmll_sub(20, CTX), %xmm10;
+       /* tl = subl(18) ^ (subr(18) & ~subr(16)); */
+       vpandn %xmm8, %xmm6, %xmm1;
+       vpsrldq $4, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm8, %xmm0;
+       /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm6, %xmm0, %xmm1;
+       vpslld $1, %xmm1, %xmm2;
+       vpsrld $31, %xmm1, %xmm1;
+       vpaddd %xmm2, %xmm1, %xmm1;
+       vpslldq $12, %xmm1, %xmm1;
+       vpsrldq $8, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm0, %xmm0;
+
+       vpxor %xmm14, %xmm0, %xmm0;
+       vmovq %xmm0, cmll_sub(15, CTX);
+
+       /* tl = subl(15) ^ (subr(15) & ~subr(17)); */
+       vpandn %xmm15, %xmm7, %xmm1;
+       vpsrldq $4, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm15, %xmm0;
+       /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm7, %xmm0, %xmm1;
+       vpslld $1, %xmm1, %xmm2;
+       vpsrld $31, %xmm1, %xmm1;
+       vpaddd %xmm2, %xmm1, %xmm1;
+       vpslldq $12, %xmm1, %xmm1;
+       vpsrldq $8, %xmm1, %xmm1;
+       vpxor %xmm1, %xmm0, %xmm0;
+
+       vmovq cmll_sub(21, CTX), %xmm1;
+       vmovq cmll_sub(22, CTX), %xmm2;
+       vmovq cmll_sub(23, CTX), %xmm3;
+       vmovq cmll_sub(24, CTX), %xmm4;
+
+       vpxor %xmm9, %xmm0, %xmm0;
+       vpxor %xmm10, %xmm8, %xmm8;
+       vpxor %xmm1, %xmm9, %xmm9;
+       vpxor %xmm2, %xmm10, %xmm10;
+       vpxor %xmm3, %xmm1, %xmm1;
+
+       vmovq %xmm0, cmll_sub(18, CTX);
+       vmovq %xmm8, cmll_sub(19, CTX);
+       vmovq %xmm9, cmll_sub(20, CTX);
+       vmovq %xmm10, cmll_sub(21, CTX);
+       vmovq %xmm1, cmll_sub(22, CTX);
+
+       vmovq cmll_sub(25, CTX), %xmm5;
+       vmovq cmll_sub(26, CTX), %xmm6;
+       vmovq cmll_sub(27, CTX), %xmm7;
+       vmovq cmll_sub(28, CTX), %xmm8;
+       vmovq cmll_sub(29, CTX), %xmm9;
+       vmovq cmll_sub(30, CTX), %xmm10;
+       vmovq cmll_sub(31, CTX), %xmm11;
+       vmovq cmll_sub(32, CTX), %xmm12;
+
+       /* tl = subl(26) ^ (subr(26) & ~subr(24)); */
+       vpandn %xmm6, %xmm4, %xmm15;
+       vpsrldq $4, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm6, %xmm0;
+       /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm4, %xmm0, %xmm15;
+       vpslld $1, %xmm15, %xmm14;
+       vpsrld $31, %xmm15, %xmm15;
+       vpaddd %xmm14, %xmm15, %xmm15;
+       vpslldq $12, %xmm15, %xmm15;
+       vpsrldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm0, %xmm0;
+
+       vpxor %xmm0, %xmm2, %xmm2;
+       vmovq %xmm2, cmll_sub(23, CTX);
+
+       /* tl = subl(23) ^ (subr(23) &  ~subr(25)); */
+       vpandn %xmm3, %xmm5, %xmm15;
+       vpsrldq $4, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm3, %xmm0;
+       /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */
+       vpand %xmm5, %xmm0, %xmm15;
+       vpslld $1, %xmm15, %xmm14;
+       vpsrld $31, %xmm15, %xmm15;
+       vpaddd %xmm14, %xmm15, %xmm15;
+       vpslldq $12, %xmm15, %xmm15;
+       vpsrldq $8, %xmm15, %xmm15;
+       vpxor %xmm15, %xmm0, %xmm0;
+
+       vpxor %xmm7, %xmm0, %xmm0;
+       vpxor %xmm8, %xmm6, %xmm6;
+       vpxor %xmm9, %xmm7, %xmm7;
+       vpxor %xmm10, %xmm8, %xmm8;
+       vpxor %xmm11, %xmm9, %xmm9;
+       vpxor %xmm12, %xmm11, %xmm11;
+
+       vmovq %xmm0, cmll_sub(26, CTX);
+       vmovq %xmm6, cmll_sub(27, CTX);
+       vmovq %xmm7, cmll_sub(28, CTX);
+       vmovq %xmm8, cmll_sub(29, CTX);
+       vmovq %xmm9, cmll_sub(30, CTX);
+       vmovq %xmm10, cmll_sub(31, CTX);
+       vmovq %xmm11, cmll_sub(32, CTX);
+
+       /* kw2 and kw4 are unused now. */
+       movq $0, cmll_sub(1, CTX);
+       movq $0, cmll_sub(33, CTX);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;)
+
+.align 8
+.globl _gcry_camellia_aesni_avx_keygen
+ELF(.type  _gcry_camellia_aesni_avx_keygen,@function;)
+
+_gcry_camellia_aesni_avx_keygen:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: key
+        *      %rdx: keylen
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       vmovdqu (%rsi), %xmm0;
+       cmpl $24, %edx;
+       jb __camellia_avx_setup128;
+       je .Lprepare_key192;
+
+       vmovdqu 16(%rsi), %xmm1;
+       jmp __camellia_avx_setup256;
+
+.Lprepare_key192:
+       vpcmpeqd %xmm2, %xmm2, %xmm2;
+       vmovq 16(%rsi), %xmm1;
+
+       vpxor %xmm1, %xmm2, %xmm2;
+       vpslldq $8, %xmm2, %xmm2;
+       vpor %xmm2, %xmm1, %xmm1;
+
+       jmp __camellia_avx_setup256;
+       CFI_ENDPROC();
+ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;)
+
+#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.S
new file mode 100644
index 000000000..5102d1912
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.S
@@ -0,0 +1,34 @@
+/* camellia-aesni-avx2-amd64.S  -  AES-NI/AVX2 implementation of Camellia 
cipher
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+
+#undef CAMELLIA_VAES_BUILD
+#define FUNC_NAME(func) _gcry_camellia_aesni_avx2_ ## func
+
+#include "camellia-aesni-avx2-amd64.h"
+
+#endif /* defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) */
+#endif /* __x86_64 */
diff --git a/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.h 
b/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.h
new file mode 100644
index 000000000..e93c40b89
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/camellia-aesni-avx2-amd64.h
@@ -0,0 +1,1794 @@
+/* camellia-aesni-avx2-amd64.h - AES-NI/VAES/AVX2 implementation of Camellia
+ *
+ * Copyright (C) 2013-2015,2020-2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_CAMELLIA_AESNI_AVX2_AMD64_H
+#define GCRY_CAMELLIA_AESNI_AVX2_AMD64_H
+
+#include "asm-common-amd64.h"
+
+#define CAMELLIA_TABLE_BYTE_LEN 272
+
+/* struct CAMELLIA_context: */
+#define key_table 0
+#define key_bitlength CAMELLIA_TABLE_BYTE_LEN
+
+/* register macros */
+#define CTX %rdi
+#define RIO %r8
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \
+       vpand x, mask4bit, tmp0; \
+       vpandn x, mask4bit, x; \
+       vpsrld $4, x, x; \
+       \
+       vpshufb tmp0, lo_t, tmp0; \
+       vpshufb x, hi_t, x; \
+       vpxor tmp0, x, x;
+
+#define ymm0_x xmm0
+#define ymm1_x xmm1
+#define ymm2_x xmm2
+#define ymm3_x xmm3
+#define ymm4_x xmm4
+#define ymm5_x xmm5
+#define ymm6_x xmm6
+#define ymm7_x xmm7
+#define ymm8_x xmm8
+#define ymm9_x xmm9
+#define ymm10_x xmm10
+#define ymm11_x xmm11
+#define ymm12_x xmm12
+#define ymm13_x xmm13
+#define ymm14_x xmm14
+#define ymm15_x xmm15
+
+#ifdef CAMELLIA_VAES_BUILD
+# define IF_AESNI(...)
+# define IF_VAES(...) __VA_ARGS__
+#else
+# define IF_AESNI(...) __VA_ARGS__
+# define IF_VAES(...)
+#endif
+
+/**********************************************************************
+  32-way camellia
+ **********************************************************************/
+
+/*
+ * IN:
+ *   x0..x7: byte-sliced AB state
+ *   mem_cd: register pointer storing CD state
+ *   key: index for key material
+ * OUT:
+ *   x0..x7: new byte-sliced CD state
+ */
+
+#define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, \
+                 t6, t7, mem_cd, key) \
+       /* \
+        * S-function with AES subbytes \
+        */ \
+       vbroadcasti128 .Linv_shift_row rRIP, t4; \
+       vpbroadcastd .L0f0f0f0f rRIP, t7; \
+       vbroadcasti128 .Lpre_tf_lo_s1 rRIP, t5; \
+       vbroadcasti128 .Lpre_tf_hi_s1 rRIP, t6; \
+       vbroadcasti128 .Lpre_tf_lo_s4 rRIP, t2; \
+       vbroadcasti128 .Lpre_tf_hi_s4 rRIP, t3; \
+       \
+       /* AES inverse shift rows */ \
+       vpshufb t4, x0, x0; \
+       vpshufb t4, x7, x7; \
+       vpshufb t4, x3, x3; \
+       vpshufb t4, x6, x6; \
+       vpshufb t4, x2, x2; \
+       vpshufb t4, x5, x5; \
+       vpshufb t4, x1, x1; \
+       vpshufb t4, x4, x4; \
+       \
+       /* prefilter sboxes 1, 2 and 3 */ \
+       /* prefilter sbox 4 */ \
+       filter_8bit(x0, t5, t6, t7, t4); \
+       filter_8bit(x7, t5, t6, t7, t4); \
+       IF_AESNI(vextracti128 $1, x0, t0##_x); \
+       IF_AESNI(vextracti128 $1, x7, t1##_x); \
+       filter_8bit(x3, t2, t3, t7, t4); \
+       filter_8bit(x6, t2, t3, t7, t4); \
+       IF_AESNI(vextracti128 $1, x3, t3##_x); \
+       IF_AESNI(vextracti128 $1, x6, t2##_x); \
+       filter_8bit(x2, t5, t6, t7, t4); \
+       filter_8bit(x5, t5, t6, t7, t4); \
+       filter_8bit(x1, t5, t6, t7, t4); \
+       filter_8bit(x4, t5, t6, t7, t4); \
+       \
+       vpxor t4##_x, t4##_x, t4##_x; \
+       \
+       /* AES subbytes + AES shift rows */ \
+       IF_AESNI(vextracti128 $1, x2, t6##_x; \
+                vextracti128 $1, x5, t5##_x; \
+                vaesenclast t4##_x, x0##_x, x0##_x; \
+                vaesenclast t4##_x, t0##_x, t0##_x; \
+                vaesenclast t4##_x, x7##_x, x7##_x; \
+                vaesenclast t4##_x, t1##_x, t1##_x; \
+                vaesenclast t4##_x, x3##_x, x3##_x; \
+                vaesenclast t4##_x, t3##_x, t3##_x; \
+                vaesenclast t4##_x, x6##_x, x6##_x; \
+                vaesenclast t4##_x, t2##_x, t2##_x; \
+                vinserti128 $1, t0##_x, x0, x0; \
+                vinserti128 $1, t1##_x, x7, x7; \
+                vinserti128 $1, t3##_x, x3, x3; \
+                vinserti128 $1, t2##_x, x6, x6; \
+                vextracti128 $1, x1, t3##_x; \
+                vextracti128 $1, x4, t2##_x); \
+       vbroadcasti128 .Lpost_tf_lo_s1 rRIP, t0; \
+       vbroadcasti128 .Lpost_tf_hi_s1 rRIP, t1; \
+       IF_AESNI(vaesenclast t4##_x, x2##_x, x2##_x; \
+                vaesenclast t4##_x, t6##_x, t6##_x; \
+                vaesenclast t4##_x, x5##_x, x5##_x; \
+                vaesenclast t4##_x, t5##_x, t5##_x; \
+                vaesenclast t4##_x, x1##_x, x1##_x; \
+                vaesenclast t4##_x, t3##_x, t3##_x; \
+                vaesenclast t4##_x, x4##_x, x4##_x; \
+                vaesenclast t4##_x, t2##_x, t2##_x; \
+                vinserti128 $1, t6##_x, x2, x2; \
+                vinserti128 $1, t5##_x, x5, x5; \
+                vinserti128 $1, t3##_x, x1, x1; \
+                vinserti128 $1, t2##_x, x4, x4); \
+       IF_VAES(vaesenclast t4, x0, x0; \
+               vaesenclast t4, x7, x7; \
+               vaesenclast t4, x3, x3; \
+               vaesenclast t4, x6, x6; \
+               vaesenclast t4, x2, x2; \
+               vaesenclast t4, x5, x5; \
+               vaesenclast t4, x1, x1; \
+               vaesenclast t4, x4, x4); \
+       \
+       /* postfilter sboxes 1 and 4 */ \
+       vbroadcasti128 .Lpost_tf_lo_s3 rRIP, t2; \
+       vbroadcasti128 .Lpost_tf_hi_s3 rRIP, t3; \
+       filter_8bit(x0, t0, t1, t7, t4); \
+       filter_8bit(x7, t0, t1, t7, t4); \
+       filter_8bit(x3, t0, t1, t7, t6); \
+       filter_8bit(x6, t0, t1, t7, t6); \
+       \
+       /* postfilter sbox 3 */ \
+       vbroadcasti128 .Lpost_tf_lo_s2 rRIP, t4; \
+       vbroadcasti128 .Lpost_tf_hi_s2 rRIP, t5; \
+       filter_8bit(x2, t2, t3, t7, t6); \
+       filter_8bit(x5, t2, t3, t7, t6); \
+       \
+       vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \
+       \
+       /* postfilter sbox 2 */ \
+       filter_8bit(x1, t4, t5, t7, t2); \
+       filter_8bit(x4, t4, t5, t7, t2); \
+       vpxor t7, t7, t7; \
+       \
+       vpsrldq $1, t0, t1; \
+       vpsrldq $2, t0, t2; \
+       vpshufb t7, t1, t1; \
+       vpsrldq $3, t0, t3; \
+       \
+       /* P-function */ \
+       vpxor x5, x0, x0; \
+       vpxor x6, x1, x1; \
+       vpxor x7, x2, x2; \
+       vpxor x4, x3, x3; \
+       \
+       vpshufb t7, t2, t2; \
+       vpsrldq $4, t0, t4; \
+       vpshufb t7, t3, t3; \
+       vpsrldq $5, t0, t5; \
+       vpshufb t7, t4, t4; \
+       \
+       vpxor x2, x4, x4; \
+       vpxor x3, x5, x5; \
+       vpxor x0, x6, x6; \
+       vpxor x1, x7, x7; \
+       \
+       vpsrldq $6, t0, t6; \
+       vpshufb t7, t5, t5; \
+       vpshufb t7, t6, t6; \
+       \
+       vpxor x7, x0, x0; \
+       vpxor x4, x1, x1; \
+       vpxor x5, x2, x2; \
+       vpxor x6, x3, x3; \
+       \
+       vpxor x3, x4, x4; \
+       vpxor x0, x5, x5; \
+       vpxor x1, x6, x6; \
+       vpxor x2, x7, x7; /* note: high and low parts swapped */ \
+       \
+       /* Add key material and result to CD (x becomes new CD) */ \
+       \
+       vpxor t6, x1, x1; \
+       vpxor 5 * 32(mem_cd), x1, x1; \
+       \
+       vpsrldq $7, t0, t6; \
+       vpshufb t7, t0, t0; \
+       vpshufb t7, t6, t7; \
+       \
+       vpxor t7, x0, x0; \
+       vpxor 4 * 32(mem_cd), x0, x0; \
+       \
+       vpxor t5, x2, x2; \
+       vpxor 6 * 32(mem_cd), x2, x2; \
+       \
+       vpxor t4, x3, x3; \
+       vpxor 7 * 32(mem_cd), x3, x3; \
+       \
+       vpxor t3, x4, x4; \
+       vpxor 0 * 32(mem_cd), x4, x4; \
+       \
+       vpxor t2, x5, x5; \
+       vpxor 1 * 32(mem_cd), x5, x5; \
+       \
+       vpxor t1, x6, x6; \
+       vpxor 2 * 32(mem_cd), x6, x6; \
+       \
+       vpxor t0, x7, x7; \
+       vpxor 3 * 32(mem_cd), x7, x7;
+
+/*
+ * IN/OUT:
+ *  x0..x7: byte-sliced AB state preloaded
+ *  mem_ab: byte-sliced AB state in memory
+ *  mem_cb: byte-sliced CD state in memory
+ */
+#define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, i, dir, store_ab) \
+       roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                 y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \
+       \
+       vmovdqu x0, 4 * 32(mem_cd); \
+       vmovdqu x1, 5 * 32(mem_cd); \
+       vmovdqu x2, 6 * 32(mem_cd); \
+       vmovdqu x3, 7 * 32(mem_cd); \
+       vmovdqu x4, 0 * 32(mem_cd); \
+       vmovdqu x5, 1 * 32(mem_cd); \
+       vmovdqu x6, 2 * 32(mem_cd); \
+       vmovdqu x7, 3 * 32(mem_cd); \
+       \
+       roundsm32(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \
+                 y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \
+       \
+       store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab);
+
+#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */
+
+#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \
+       /* Store new AB state */ \
+       vmovdqu x4, 4 * 32(mem_ab); \
+       vmovdqu x5, 5 * 32(mem_ab); \
+       vmovdqu x6, 6 * 32(mem_ab); \
+       vmovdqu x7, 7 * 32(mem_ab); \
+       vmovdqu x0, 0 * 32(mem_ab); \
+       vmovdqu x1, 1 * 32(mem_ab); \
+       vmovdqu x2, 2 * 32(mem_ab); \
+       vmovdqu x3, 3 * 32(mem_ab);
+
+#define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, i) \
+       two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \
+       two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \
+       two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store);
+
+#define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, i) \
+       two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \
+       two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \
+       two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store);
+
+/*
+ * IN:
+ *  v0..3: byte-sliced 32-bit integers
+ * OUT:
+ *  v0..3: (IN <<< 1)
+ */
+#define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \
+       vpcmpgtb v0, zero, t0; \
+       vpaddb v0, v0, v0; \
+       vpabsb t0, t0; \
+       \
+       vpcmpgtb v1, zero, t1; \
+       vpaddb v1, v1, v1; \
+       vpabsb t1, t1; \
+       \
+       vpcmpgtb v2, zero, t2; \
+       vpaddb v2, v2, v2; \
+       vpabsb t2, t2; \
+       \
+       vpor t0, v1, v1; \
+       \
+       vpcmpgtb v3, zero, t0; \
+       vpaddb v3, v3, v3; \
+       vpabsb t0, t0; \
+       \
+       vpor t1, v2, v2; \
+       vpor t2, v3, v3; \
+       vpor t0, v0, v0;
+
+/*
+ * IN:
+ *   r: byte-sliced AB state in memory
+ *   l: byte-sliced CD state in memory
+ * OUT:
+ *   x0..x7: new byte-sliced CD state
+ */
+#define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \
+             tt1, tt2, tt3, kll, klr, krl, krr) \
+       /* \
+        * t0 = kll; \
+        * t0 &= ll; \
+        * lr ^= rol32(t0, 1); \
+        */ \
+       vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
+       vpxor tt0, tt0, tt0; \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpand l0, t0, t0; \
+       vpand l1, t1, t1; \
+       vpand l2, t2, t2; \
+       vpand l3, t3, t3; \
+       \
+       rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+       \
+       vpxor l4, t0, l4; \
+       vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
+       vmovdqu l4, 4 * 32(l); \
+       vpxor l5, t1, l5; \
+       vmovdqu l5, 5 * 32(l); \
+       vpxor l6, t2, l6; \
+       vmovdqu l6, 6 * 32(l); \
+       vpxor l7, t3, l7; \
+       vmovdqu l7, 7 * 32(l); \
+       \
+       /* \
+        * t2 = krr; \
+        * t2 |= rr; \
+        * rl ^= t2; \
+        */ \
+       \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpor 4 * 32(r), t0, t0; \
+       vpor 5 * 32(r), t1, t1; \
+       vpor 6 * 32(r), t2, t2; \
+       vpor 7 * 32(r), t3, t3; \
+       \
+       vpxor 0 * 32(r), t0, t0; \
+       vpxor 1 * 32(r), t1, t1; \
+       vpxor 2 * 32(r), t2, t2; \
+       vpxor 3 * 32(r), t3, t3; \
+       vmovdqu t0, 0 * 32(r); \
+       vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
+       vmovdqu t1, 1 * 32(r); \
+       vmovdqu t2, 2 * 32(r); \
+       vmovdqu t3, 3 * 32(r); \
+       \
+       /* \
+        * t2 = krl; \
+        * t2 &= rl; \
+        * rr ^= rol32(t2, 1); \
+        */ \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpand 0 * 32(r), t0, t0; \
+       vpand 1 * 32(r), t1, t1; \
+       vpand 2 * 32(r), t2, t2; \
+       vpand 3 * 32(r), t3, t3; \
+       \
+       rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
+       \
+       vpxor 4 * 32(r), t0, t0; \
+       vpxor 5 * 32(r), t1, t1; \
+       vpxor 6 * 32(r), t2, t2; \
+       vpxor 7 * 32(r), t3, t3; \
+       vmovdqu t0, 4 * 32(r); \
+       vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
+       vmovdqu t1, 5 * 32(r); \
+       vmovdqu t2, 6 * 32(r); \
+       vmovdqu t3, 7 * 32(r); \
+       \
+       /* \
+        * t0 = klr; \
+        * t0 |= lr; \
+        * ll ^= t0; \
+        */ \
+       \
+       vpshufb tt0, t0, t3; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t2; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t1; \
+       vpsrldq $1, t0, t0; \
+       vpshufb tt0, t0, t0; \
+       \
+       vpor l4, t0, t0; \
+       vpor l5, t1, t1; \
+       vpor l6, t2, t2; \
+       vpor l7, t3, t3; \
+       \
+       vpxor l0, t0, l0; \
+       vmovdqu l0, 0 * 32(l); \
+       vpxor l1, t1, l1; \
+       vmovdqu l1, 1 * 32(l); \
+       vpxor l2, t2, l2; \
+       vmovdqu l2, 2 * 32(l); \
+       vpxor l3, t3, l3; \
+       vmovdqu l3, 3 * 32(l);
+
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+       vpunpckhdq x1, x0, t2; \
+       vpunpckldq x1, x0, x0; \
+       \
+       vpunpckldq x3, x2, t1; \
+       vpunpckhdq x3, x2, x2; \
+       \
+       vpunpckhqdq t1, x0, x1; \
+       vpunpcklqdq t1, x0, x0; \
+       \
+       vpunpckhqdq x2, t2, x3; \
+       vpunpcklqdq x2, t2, x2;
+
+#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \
+                             a3, b3, c3, d3, st0, st1) \
+       vmovdqu d2, st0; \
+       vmovdqu d3, st1; \
+       transpose_4x4(a0, a1, a2, a3, d2, d3); \
+       transpose_4x4(b0, b1, b2, b3, d2, d3); \
+       vmovdqu st0, d2; \
+       vmovdqu st1, d3; \
+       \
+       vmovdqu a0, st0; \
+       vmovdqu a1, st1; \
+       transpose_4x4(c0, c1, c2, c3, a0, a1); \
+       transpose_4x4(d0, d1, d2, d3, a0, a1); \
+       \
+       vbroadcasti128 .Lshufb_16x16b rRIP, a0; \
+       vmovdqu st1, a1; \
+       vpshufb a0, a2, a2; \
+       vpshufb a0, a3, a3; \
+       vpshufb a0, b0, b0; \
+       vpshufb a0, b1, b1; \
+       vpshufb a0, b2, b2; \
+       vpshufb a0, b3, b3; \
+       vpshufb a0, a1, a1; \
+       vpshufb a0, c0, c0; \
+       vpshufb a0, c1, c1; \
+       vpshufb a0, c2, c2; \
+       vpshufb a0, c3, c3; \
+       vpshufb a0, d0, d0; \
+       vpshufb a0, d1, d1; \
+       vpshufb a0, d2, d2; \
+       vpshufb a0, d3, d3; \
+       vmovdqu d3, st1; \
+       vmovdqu st0, d3; \
+       vpshufb a0, d3, a0; \
+       vmovdqu d2, st0; \
+       \
+       transpose_4x4(a0, b0, c0, d0, d2, d3); \
+       transpose_4x4(a1, b1, c1, d1, d2, d3); \
+       vmovdqu st0, d2; \
+       vmovdqu st1, d3; \
+       \
+       vmovdqu b0, st0; \
+       vmovdqu b1, st1; \
+       transpose_4x4(a2, b2, c2, d2, b0, b1); \
+       transpose_4x4(a3, b3, c3, d3, b0, b1); \
+       vmovdqu st0, b0; \
+       vmovdqu st1, b1; \
+       /* does not adjust output bytes inside vectors */
+
+/* load blocks to registers and apply pre-whitening */
+#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                    y6, y7, rio, key) \
+       vpbroadcastq key, x0; \
+       vpshufb .Lpack_bswap rRIP, x0, x0; \
+       \
+       vpxor 0 * 32(rio), x0, y7; \
+       vpxor 1 * 32(rio), x0, y6; \
+       vpxor 2 * 32(rio), x0, y5; \
+       vpxor 3 * 32(rio), x0, y4; \
+       vpxor 4 * 32(rio), x0, y3; \
+       vpxor 5 * 32(rio), x0, y2; \
+       vpxor 6 * 32(rio), x0, y1; \
+       vpxor 7 * 32(rio), x0, y0; \
+       vpxor 8 * 32(rio), x0, x7; \
+       vpxor 9 * 32(rio), x0, x6; \
+       vpxor 10 * 32(rio), x0, x5; \
+       vpxor 11 * 32(rio), x0, x4; \
+       vpxor 12 * 32(rio), x0, x3; \
+       vpxor 13 * 32(rio), x0, x2; \
+       vpxor 14 * 32(rio), x0, x1; \
+       vpxor 15 * 32(rio), x0, x0;
+
+/* byteslice pre-whitened blocks and store to temporary memory */
+#define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                     y6, y7, mem_ab, mem_cd) \
+       byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \
+                             y4, y5, y6, y7, (mem_ab), (mem_cd)); \
+       \
+       vmovdqu x0, 0 * 32(mem_ab); \
+       vmovdqu x1, 1 * 32(mem_ab); \
+       vmovdqu x2, 2 * 32(mem_ab); \
+       vmovdqu x3, 3 * 32(mem_ab); \
+       vmovdqu x4, 4 * 32(mem_ab); \
+       vmovdqu x5, 5 * 32(mem_ab); \
+       vmovdqu x6, 6 * 32(mem_ab); \
+       vmovdqu x7, 7 * 32(mem_ab); \
+       vmovdqu y0, 0 * 32(mem_cd); \
+       vmovdqu y1, 1 * 32(mem_cd); \
+       vmovdqu y2, 2 * 32(mem_cd); \
+       vmovdqu y3, 3 * 32(mem_cd); \
+       vmovdqu y4, 4 * 32(mem_cd); \
+       vmovdqu y5, 5 * 32(mem_cd); \
+       vmovdqu y6, 6 * 32(mem_cd); \
+       vmovdqu y7, 7 * 32(mem_cd);
+
+/* de-byteslice, apply post-whitening and store blocks */
+#define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \
+                   y5, y6, y7, key, stack_tmp0, stack_tmp1) \
+       byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \
+                             y3, y7, x3, x7, stack_tmp0, stack_tmp1); \
+       \
+       vmovdqu x0, stack_tmp0; \
+       \
+       vpbroadcastq key, x0; \
+       vpshufb .Lpack_bswap rRIP, x0, x0; \
+       \
+       vpxor x0, y7, y7; \
+       vpxor x0, y6, y6; \
+       vpxor x0, y5, y5; \
+       vpxor x0, y4, y4; \
+       vpxor x0, y3, y3; \
+       vpxor x0, y2, y2; \
+       vpxor x0, y1, y1; \
+       vpxor x0, y0, y0; \
+       vpxor x0, x7, x7; \
+       vpxor x0, x6, x6; \
+       vpxor x0, x5, x5; \
+       vpxor x0, x4, x4; \
+       vpxor x0, x3, x3; \
+       vpxor x0, x2, x2; \
+       vpxor x0, x1, x1; \
+       vpxor stack_tmp0, x0, x0;
+
+#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \
+                    y6, y7, rio) \
+       vmovdqu x0, 0 * 32(rio); \
+       vmovdqu x1, 1 * 32(rio); \
+       vmovdqu x2, 2 * 32(rio); \
+       vmovdqu x3, 3 * 32(rio); \
+       vmovdqu x4, 4 * 32(rio); \
+       vmovdqu x5, 5 * 32(rio); \
+       vmovdqu x6, 6 * 32(rio); \
+       vmovdqu x7, 7 * 32(rio); \
+       vmovdqu y0, 8 * 32(rio); \
+       vmovdqu y1, 9 * 32(rio); \
+       vmovdqu y2, 10 * 32(rio); \
+       vmovdqu y3, 11 * 32(rio); \
+       vmovdqu y4, 12 * 32(rio); \
+       vmovdqu y5, 13 * 32(rio); \
+       vmovdqu y6, 14 * 32(rio); \
+       vmovdqu y7, 15 * 32(rio);
+
+.text
+.align 32
+
+#define SHUFB_BYTES(idx) \
+       0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx)
+
+.Lshufb_16x16b:
+       .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+       .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3)
+
+.Lpack_bswap:
+       .long 0x00010203, 0x04050607, 0x80808080, 0x80808080
+       .long 0x00010203, 0x04050607, 0x80808080, 0x80808080
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox1, sbox2, sbox3:
+ *   swap_bitendianness(
+ *       isom_map_camellia_to_aes(
+ *           camellia_f(
+ *               swap_bitendianess(in)
+ *           )
+ *       )
+ *   )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s1:
+       .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86
+       .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88
+.Lpre_tf_hi_s1:
+       .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a
+       .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23
+
+/*
+ * pre-SubByte transform
+ *
+ * pre-lookup for sbox4:
+ *   swap_bitendianness(
+ *       isom_map_camellia_to_aes(
+ *           camellia_f(
+ *               swap_bitendianess(in <<< 1)
+ *           )
+ *       )
+ *   )
+ *
+ * (note: '⊕ 0xc5' inside camellia_f())
+ */
+.Lpre_tf_lo_s4:
+       .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25
+       .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74
+.Lpre_tf_hi_s4:
+       .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72
+       .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox1, sbox4:
+ *  swap_bitendianness(
+ *      camellia_h(
+ *          isom_map_aes_to_camellia(
+ *              swap_bitendianness(
+ *                  aes_inverse_affine_transform(in)
+ *              )
+ *          )
+ *      )
+ *  )
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s1:
+       .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31
+       .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1
+.Lpost_tf_hi_s1:
+       .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8
+       .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox2:
+ *  swap_bitendianness(
+ *      camellia_h(
+ *          isom_map_aes_to_camellia(
+ *              swap_bitendianness(
+ *                  aes_inverse_affine_transform(in)
+ *              )
+ *          )
+ *      )
+ *  ) <<< 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s2:
+       .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62
+       .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3
+.Lpost_tf_hi_s2:
+       .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51
+       .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18
+
+/*
+ * post-SubByte transform
+ *
+ * post-lookup for sbox3:
+ *  swap_bitendianness(
+ *      camellia_h(
+ *          isom_map_aes_to_camellia(
+ *              swap_bitendianness(
+ *                  aes_inverse_affine_transform(in)
+ *              )
+ *          )
+ *      )
+ *  ) >>> 1
+ *
+ * (note: '⊕ 0x6e' inside camellia_h())
+ */
+.Lpost_tf_lo_s3:
+       .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98
+       .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8
+.Lpost_tf_hi_s3:
+       .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54
+       .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+       .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+       .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+       .long 0x0f0f0f0f
+
+
+.align 8
+ELF(.type   __camellia_enc_blk32,@function;)
+
+__camellia_enc_blk32:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rax: temporary storage, 512 bytes
+        *      %r8d: 24 for 16 byte key, 32 for larger
+        *      %ymm0..%ymm15: 32 plaintext blocks
+        * output:
+        *      %ymm0..%ymm15: 32 encrypted blocks, order swapped:
+        *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+        */
+       CFI_STARTPROC();
+
+       leaq 8 * 32(%rax), %rcx;
+
+       leaq (-8 * 8)(CTX, %r8, 8), %r8;
+
+       inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+                     %ymm15, %rax, %rcx);
+
+.align 8
+.Lenc_loop:
+       enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+                    %ymm15, %rax, %rcx, 0);
+
+       cmpq %r8, CTX;
+       je .Lenc_done;
+       leaq (8 * 8)(CTX), CTX;
+
+       fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+             %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+             %ymm15,
+             ((key_table) + 0)(CTX),
+             ((key_table) + 4)(CTX),
+             ((key_table) + 8)(CTX),
+             ((key_table) + 12)(CTX));
+       jmp .Lenc_loop;
+
+.align 8
+.Lenc_done:
+       /* load CD for output */
+       vmovdqu 0 * 32(%rcx), %ymm8;
+       vmovdqu 1 * 32(%rcx), %ymm9;
+       vmovdqu 2 * 32(%rcx), %ymm10;
+       vmovdqu 3 * 32(%rcx), %ymm11;
+       vmovdqu 4 * 32(%rcx), %ymm12;
+       vmovdqu 5 * 32(%rcx), %ymm13;
+       vmovdqu 6 * 32(%rcx), %ymm14;
+       vmovdqu 7 * 32(%rcx), %ymm15;
+
+       outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                   %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+                   %ymm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 32(%rax));
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;)
+
+.align 8
+ELF(.type   __camellia_dec_blk32,@function;)
+
+__camellia_dec_blk32:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rax: temporary storage, 512 bytes
+        *      %r8d: 24 for 16 byte key, 32 for larger
+        *      %ymm0..%ymm15: 16 encrypted blocks
+        * output:
+        *      %ymm0..%ymm15: 16 plaintext blocks, order swapped:
+        *       7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8
+        */
+       CFI_STARTPROC();
+
+       movq %r8, %rcx;
+       movq CTX, %r8
+       leaq (-8 * 8)(CTX, %rcx, 8), CTX;
+
+       leaq 8 * 32(%rax), %rcx;
+
+       inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                     %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+                     %ymm15, %rax, %rcx);
+
+.align 8
+.Ldec_loop:
+       dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+                    %ymm15, %rax, %rcx, 0);
+
+       cmpq %r8, CTX;
+       je .Ldec_done;
+
+       fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+             %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+             %ymm15,
+             ((key_table) + 8)(CTX),
+             ((key_table) + 12)(CTX),
+             ((key_table) + 0)(CTX),
+             ((key_table) + 4)(CTX));
+
+       leaq (-8 * 8)(CTX), CTX;
+       jmp .Ldec_loop;
+
+.align 8
+.Ldec_done:
+       /* load CD for output */
+       vmovdqu 0 * 32(%rcx), %ymm8;
+       vmovdqu 1 * 32(%rcx), %ymm9;
+       vmovdqu 2 * 32(%rcx), %ymm10;
+       vmovdqu 3 * 32(%rcx), %ymm11;
+       vmovdqu 4 * 32(%rcx), %ymm12;
+       vmovdqu 5 * 32(%rcx), %ymm13;
+       vmovdqu 6 * 32(%rcx), %ymm14;
+       vmovdqu 7 * 32(%rcx), %ymm15;
+
+       outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                   %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+                   %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax));
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;)
+
+#define inc_le128(x, minus_one, tmp) \
+       vpcmpeqq minus_one, x, tmp; \
+       vpsubq minus_one, x, x; \
+       vpslldq $8, tmp, tmp; \
+       vpsubq tmp, x, x;
+
+.align 8
+.globl FUNC_NAME(ctr_enc)
+ELF(.type   FUNC_NAME(ctr_enc),@function;)
+
+FUNC_NAME(ctr_enc):
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (32 blocks)
+        *      %rdx: src (32 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       movq 8(%rcx), %r11;
+       bswapq %r11;
+
+       vzeroupper;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %eax;
+       cmovel %eax, %r8d; /* max */
+
+       subq $(16 * 32), %rsp;
+       andq $~63, %rsp;
+       movq %rsp, %rax;
+
+       vpcmpeqd %ymm15, %ymm15, %ymm15;
+       vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */
+
+       /* load IV and byteswap */
+       vmovdqu (%rcx), %xmm0;
+       vpshufb .Lbswap128_mask rRIP, %xmm0, %xmm0;
+       vmovdqa %xmm0, %xmm1;
+       inc_le128(%xmm0, %xmm15, %xmm14);
+       vbroadcasti128 .Lbswap128_mask rRIP, %ymm14;
+       vinserti128 $1, %xmm0, %ymm1, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm13;
+       vmovdqu %ymm13, 15 * 32(%rax);
+
+       /* check need for handling 64-bit overflow and carry */
+       cmpq $(0xffffffffffffffff - 32), %r11;
+       ja .Lload_ctr_carry;
+
+       /* construct IVs */
+       vpaddq %ymm15, %ymm15, %ymm15; /* ab: -2:0 ; cd: -2:0 */
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm13;
+       vmovdqu %ymm13, 14 * 32(%rax);
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm13;
+       vmovdqu %ymm13, 13 * 32(%rax);
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm12;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm11;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm10;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm9;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm8;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm7;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm6;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm5;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm4;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm3;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm2;
+       vpsubq %ymm15, %ymm0, %ymm0;
+       vpshufb %ymm14, %ymm0, %ymm1;
+       vpsubq %ymm15, %ymm0, %ymm0;  /* +30 ; +31 */
+       vpsubq %xmm15, %xmm0, %xmm13; /* +32 */
+       vpshufb %ymm14, %ymm0, %ymm0;
+       vpshufb %xmm14, %xmm13, %xmm13;
+       vmovdqu %xmm13, (%rcx);
+
+       jmp .Lload_ctr_done;
+
+.align 4
+.Lload_ctr_carry:
+       /* construct IVs */
+       inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le1 ; cd: le2 */
+       inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le2 ; cd: le3 */
+       vpshufb %ymm14, %ymm0, %ymm13;
+       vmovdqu %ymm13, 14 * 32(%rax);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm13;
+       vmovdqu %ymm13, 13 * 32(%rax);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm12;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm11;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm10;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm9;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm8;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm7;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm6;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm5;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm4;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm3;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm2;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vpshufb %ymm14, %ymm0, %ymm1;
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       inc_le128(%ymm0, %ymm15, %ymm13);
+       vextracti128 $1, %ymm0, %xmm13;
+       vpshufb %ymm14, %ymm0, %ymm0;
+       inc_le128(%xmm13, %xmm15, %xmm14);
+       vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13;
+       vmovdqu %xmm13, (%rcx);
+
+.align 4
+.Lload_ctr_done:
+       /* inpack16_pre: */
+       vpbroadcastq (key_table)(CTX), %ymm15;
+       vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+       vpxor %ymm0, %ymm15, %ymm0;
+       vpxor %ymm1, %ymm15, %ymm1;
+       vpxor %ymm2, %ymm15, %ymm2;
+       vpxor %ymm3, %ymm15, %ymm3;
+       vpxor %ymm4, %ymm15, %ymm4;
+       vpxor %ymm5, %ymm15, %ymm5;
+       vpxor %ymm6, %ymm15, %ymm6;
+       vpxor %ymm7, %ymm15, %ymm7;
+       vpxor %ymm8, %ymm15, %ymm8;
+       vpxor %ymm9, %ymm15, %ymm9;
+       vpxor %ymm10, %ymm15, %ymm10;
+       vpxor %ymm11, %ymm15, %ymm11;
+       vpxor %ymm12, %ymm15, %ymm12;
+       vpxor 13 * 32(%rax), %ymm15, %ymm13;
+       vpxor 14 * 32(%rax), %ymm15, %ymm14;
+       vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+       call __camellia_enc_blk32;
+
+       vpxor 0 * 32(%rdx), %ymm7, %ymm7;
+       vpxor 1 * 32(%rdx), %ymm6, %ymm6;
+       vpxor 2 * 32(%rdx), %ymm5, %ymm5;
+       vpxor 3 * 32(%rdx), %ymm4, %ymm4;
+       vpxor 4 * 32(%rdx), %ymm3, %ymm3;
+       vpxor 5 * 32(%rdx), %ymm2, %ymm2;
+       vpxor 6 * 32(%rdx), %ymm1, %ymm1;
+       vpxor 7 * 32(%rdx), %ymm0, %ymm0;
+       vpxor 8 * 32(%rdx), %ymm15, %ymm15;
+       vpxor 9 * 32(%rdx), %ymm14, %ymm14;
+       vpxor 10 * 32(%rdx), %ymm13, %ymm13;
+       vpxor 11 * 32(%rdx), %ymm12, %ymm12;
+       vpxor 12 * 32(%rdx), %ymm11, %ymm11;
+       vpxor 13 * 32(%rdx), %ymm10, %ymm10;
+       vpxor 14 * 32(%rdx), %ymm9, %ymm9;
+       vpxor 15 * 32(%rdx), %ymm8, %ymm8;
+       leaq 32 * 16(%rdx), %rdx;
+
+       write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+                    %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+                    %ymm8, %rsi);
+
+       vzeroall;
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size FUNC_NAME(ctr_enc),.-FUNC_NAME(ctr_enc);)
+
+.align 8
+.globl FUNC_NAME(cbc_dec)
+ELF(.type   FUNC_NAME(cbc_dec),@function;)
+
+FUNC_NAME(cbc_dec):
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (32 blocks)
+        *      %rdx: src (32 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       movq %rcx, %r9;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %eax;
+       cmovel %eax, %r8d; /* max */
+
+       subq $(16 * 32), %rsp;
+       andq $~63, %rsp;
+       movq %rsp, %rax;
+
+       inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+                    %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+                    %ymm15, %rdx, (key_table)(CTX, %r8, 8));
+
+       call __camellia_dec_blk32;
+
+       /* XOR output with IV */
+       vmovdqu %ymm8, (%rax);
+       vmovdqu (%r9), %xmm8;
+       vinserti128 $1, (%rdx), %ymm8, %ymm8;
+       vpxor %ymm8, %ymm7, %ymm7;
+       vmovdqu (%rax), %ymm8;
+       vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6;
+       vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5;
+       vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4;
+       vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3;
+       vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2;
+       vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1;
+       vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0;
+       vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15;
+       vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14;
+       vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13;
+       vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12;
+       vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11;
+       vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10;
+       vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9;
+       vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8;
+       movq (15 * 32 + 16 + 0)(%rdx), %rax;
+       movq (15 * 32 + 16 + 8)(%rdx), %rcx;
+
+       write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+                    %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+                    %ymm8, %rsi);
+
+       /* store new IV */
+       movq %rax, (0)(%r9);
+       movq %rcx, (8)(%r9);
+
+       vzeroall;
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size FUNC_NAME(cbc_dec),.-FUNC_NAME(cbc_dec);)
+
+.align 8
+.globl FUNC_NAME(cfb_dec)
+ELF(.type   FUNC_NAME(cfb_dec),@function;)
+
+FUNC_NAME(cfb_dec):
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (32 blocks)
+        *      %rdx: src (32 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %eax;
+       cmovel %eax, %r8d; /* max */
+
+       subq $(16 * 32), %rsp;
+       andq $~63, %rsp;
+       movq %rsp, %rax;
+
+       /* inpack16_pre: */
+       vpbroadcastq (key_table)(CTX), %ymm0;
+       vpshufb .Lpack_bswap rRIP, %ymm0, %ymm0;
+       vmovdqu (%rcx), %xmm15;
+       vinserti128 $1, (%rdx), %ymm15, %ymm15;
+       vpxor %ymm15, %ymm0, %ymm15;
+       vmovdqu (15 * 32 + 16)(%rdx), %xmm1;
+       vmovdqu %xmm1, (%rcx); /* store new IV */
+       vpxor (0 * 32 + 16)(%rdx), %ymm0, %ymm14;
+       vpxor (1 * 32 + 16)(%rdx), %ymm0, %ymm13;
+       vpxor (2 * 32 + 16)(%rdx), %ymm0, %ymm12;
+       vpxor (3 * 32 + 16)(%rdx), %ymm0, %ymm11;
+       vpxor (4 * 32 + 16)(%rdx), %ymm0, %ymm10;
+       vpxor (5 * 32 + 16)(%rdx), %ymm0, %ymm9;
+       vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm8;
+       vpxor (7 * 32 + 16)(%rdx), %ymm0, %ymm7;
+       vpxor (8 * 32 + 16)(%rdx), %ymm0, %ymm6;
+       vpxor (9 * 32 + 16)(%rdx), %ymm0, %ymm5;
+       vpxor (10 * 32 + 16)(%rdx), %ymm0, %ymm4;
+       vpxor (11 * 32 + 16)(%rdx), %ymm0, %ymm3;
+       vpxor (12 * 32 + 16)(%rdx), %ymm0, %ymm2;
+       vpxor (13 * 32 + 16)(%rdx), %ymm0, %ymm1;
+       vpxor (14 * 32 + 16)(%rdx), %ymm0, %ymm0;
+
+       call __camellia_enc_blk32;
+
+       vpxor 0 * 32(%rdx), %ymm7, %ymm7;
+       vpxor 1 * 32(%rdx), %ymm6, %ymm6;
+       vpxor 2 * 32(%rdx), %ymm5, %ymm5;
+       vpxor 3 * 32(%rdx), %ymm4, %ymm4;
+       vpxor 4 * 32(%rdx), %ymm3, %ymm3;
+       vpxor 5 * 32(%rdx), %ymm2, %ymm2;
+       vpxor 6 * 32(%rdx), %ymm1, %ymm1;
+       vpxor 7 * 32(%rdx), %ymm0, %ymm0;
+       vpxor 8 * 32(%rdx), %ymm15, %ymm15;
+       vpxor 9 * 32(%rdx), %ymm14, %ymm14;
+       vpxor 10 * 32(%rdx), %ymm13, %ymm13;
+       vpxor 11 * 32(%rdx), %ymm12, %ymm12;
+       vpxor 12 * 32(%rdx), %ymm11, %ymm11;
+       vpxor 13 * 32(%rdx), %ymm10, %ymm10;
+       vpxor 14 * 32(%rdx), %ymm9, %ymm9;
+       vpxor 15 * 32(%rdx), %ymm8, %ymm8;
+
+       write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+                    %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+                    %ymm8, %rsi);
+
+       vzeroall;
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size FUNC_NAME(cfb_dec),.-FUNC_NAME(cfb_dec);)
+
+.align 8
+.globl FUNC_NAME(ocb_enc)
+ELF(.type   FUNC_NAME(ocb_enc),@function;)
+
+FUNC_NAME(ocb_enc):
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (32 blocks)
+        *      %rdx: src (32 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[32])
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       subq $(16 * 32 + 4 * 8), %rsp;
+       andq $~63, %rsp;
+       movq %rsp, %rax;
+
+       movq %r10, (16 * 32 + 0 * 8)(%rsp);
+       movq %r11, (16 * 32 + 1 * 8)(%rsp);
+       movq %r12, (16 * 32 + 2 * 8)(%rsp);
+       movq %r13, (16 * 32 + 3 * 8)(%rsp);
+       CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+       CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+       CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+       CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
+
+       vmovdqu (%rcx), %xmm14;
+       vmovdqu (%r8), %xmm13;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), %xmm14, %xmm15; \
+         vpxor (l1reg), %xmm15, %xmm14; \
+         vinserti128 $1, %xmm14, %ymm15, %ymm15; \
+         vpxor yreg, %ymm13, %ymm13; \
+         vpxor yreg, %ymm15, yreg; \
+         vmovdqu %ymm15, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, %ymm0);
+       vmovdqu %ymm0, (15 * 32)(%rax);
+       OCB_INPUT(1, %r12, %r13, %ymm0);
+       vmovdqu %ymm0, (14 * 32)(%rax);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, %ymm0);
+       vmovdqu %ymm0, (13 * 32)(%rax);
+       OCB_INPUT(3, %r12, %r13, %ymm12);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, %ymm11);
+       OCB_INPUT(5, %r12, %r13, %ymm10);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, %ymm9);
+       OCB_INPUT(7, %r12, %r13, %ymm8);
+       movq (16 * 8)(%r9), %r10;
+       movq (17 * 8)(%r9), %r11;
+       movq (18 * 8)(%r9), %r12;
+       movq (19 * 8)(%r9), %r13;
+       OCB_INPUT(8, %r10, %r11, %ymm7);
+       OCB_INPUT(9, %r12, %r13, %ymm6);
+       movq (20 * 8)(%r9), %r10;
+       movq (21 * 8)(%r9), %r11;
+       movq (22 * 8)(%r9), %r12;
+       movq (23 * 8)(%r9), %r13;
+       OCB_INPUT(10, %r10, %r11, %ymm5);
+       OCB_INPUT(11, %r12, %r13, %ymm4);
+       movq (24 * 8)(%r9), %r10;
+       movq (25 * 8)(%r9), %r11;
+       movq (26 * 8)(%r9), %r12;
+       movq (27 * 8)(%r9), %r13;
+       OCB_INPUT(12, %r10, %r11, %ymm3);
+       OCB_INPUT(13, %r12, %r13, %ymm2);
+       movq (28 * 8)(%r9), %r10;
+       movq (29 * 8)(%r9), %r11;
+       movq (30 * 8)(%r9), %r12;
+       movq (31 * 8)(%r9), %r13;
+       OCB_INPUT(14, %r10, %r11, %ymm1);
+       OCB_INPUT(15, %r12, %r13, %ymm0);
+#undef OCB_INPUT
+
+       vextracti128 $1, %ymm13, %xmm15;
+       vmovdqu %xmm14, (%rcx);
+       vpxor %xmm13, %xmm15, %xmm15;
+       vmovdqu %xmm15, (%r8);
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %r10d;
+       cmovel %r10d, %r8d; /* max */
+
+       /* inpack16_pre: */
+       vpbroadcastq (key_table)(CTX), %ymm15;
+       vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+       vpxor %ymm0, %ymm15, %ymm0;
+       vpxor %ymm1, %ymm15, %ymm1;
+       vpxor %ymm2, %ymm15, %ymm2;
+       vpxor %ymm3, %ymm15, %ymm3;
+       vpxor %ymm4, %ymm15, %ymm4;
+       vpxor %ymm5, %ymm15, %ymm5;
+       vpxor %ymm6, %ymm15, %ymm6;
+       vpxor %ymm7, %ymm15, %ymm7;
+       vpxor %ymm8, %ymm15, %ymm8;
+       vpxor %ymm9, %ymm15, %ymm9;
+       vpxor %ymm10, %ymm15, %ymm10;
+       vpxor %ymm11, %ymm15, %ymm11;
+       vpxor %ymm12, %ymm15, %ymm12;
+       vpxor 13 * 32(%rax), %ymm15, %ymm13;
+       vpxor 14 * 32(%rax), %ymm15, %ymm14;
+       vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+       call __camellia_enc_blk32;
+
+       vpxor 0 * 32(%rsi), %ymm7, %ymm7;
+       vpxor 1 * 32(%rsi), %ymm6, %ymm6;
+       vpxor 2 * 32(%rsi), %ymm5, %ymm5;
+       vpxor 3 * 32(%rsi), %ymm4, %ymm4;
+       vpxor 4 * 32(%rsi), %ymm3, %ymm3;
+       vpxor 5 * 32(%rsi), %ymm2, %ymm2;
+       vpxor 6 * 32(%rsi), %ymm1, %ymm1;
+       vpxor 7 * 32(%rsi), %ymm0, %ymm0;
+       vpxor 8 * 32(%rsi), %ymm15, %ymm15;
+       vpxor 9 * 32(%rsi), %ymm14, %ymm14;
+       vpxor 10 * 32(%rsi), %ymm13, %ymm13;
+       vpxor 11 * 32(%rsi), %ymm12, %ymm12;
+       vpxor 12 * 32(%rsi), %ymm11, %ymm11;
+       vpxor 13 * 32(%rsi), %ymm10, %ymm10;
+       vpxor 14 * 32(%rsi), %ymm9, %ymm9;
+       vpxor 15 * 32(%rsi), %ymm8, %ymm8;
+
+       write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+                    %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+                    %ymm8, %rsi);
+
+       vzeroall;
+
+       movq (16 * 32 + 0 * 8)(%rsp), %r10;
+       movq (16 * 32 + 1 * 8)(%rsp), %r11;
+       movq (16 * 32 + 2 * 8)(%rsp), %r12;
+       movq (16 * 32 + 3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size FUNC_NAME(ocb_enc),.-FUNC_NAME(ocb_enc);)
+
+.align 8
+.globl FUNC_NAME(ocb_dec)
+ELF(.type   FUNC_NAME(ocb_dec),@function;)
+
+FUNC_NAME(ocb_dec):
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (32 blocks)
+        *      %rdx: src (32 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[32])
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       subq $(16 * 32 + 4 * 8), %rsp;
+       andq $~63, %rsp;
+       movq %rsp, %rax;
+
+       movq %r10, (16 * 32 + 0 * 8)(%rsp);
+       movq %r11, (16 * 32 + 1 * 8)(%rsp);
+       movq %r12, (16 * 32 + 2 * 8)(%rsp);
+       movq %r13, (16 * 32 + 3 * 8)(%rsp);
+       CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+       CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+       CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+       CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
+
+       vmovdqu (%rcx), %xmm14;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), %xmm14, %xmm15; \
+         vpxor (l1reg), %xmm15, %xmm14; \
+         vinserti128 $1, %xmm14, %ymm15, %ymm15; \
+         vpxor yreg, %ymm15, yreg; \
+         vmovdqu %ymm15, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, %ymm0);
+       vmovdqu %ymm0, (15 * 32)(%rax);
+       OCB_INPUT(1, %r12, %r13, %ymm0);
+       vmovdqu %ymm0, (14 * 32)(%rax);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, %ymm13);
+       OCB_INPUT(3, %r12, %r13, %ymm12);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, %ymm11);
+       OCB_INPUT(5, %r12, %r13, %ymm10);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, %ymm9);
+       OCB_INPUT(7, %r12, %r13, %ymm8);
+       movq (16 * 8)(%r9), %r10;
+       movq (17 * 8)(%r9), %r11;
+       movq (18 * 8)(%r9), %r12;
+       movq (19 * 8)(%r9), %r13;
+       OCB_INPUT(8, %r10, %r11, %ymm7);
+       OCB_INPUT(9, %r12, %r13, %ymm6);
+       movq (20 * 8)(%r9), %r10;
+       movq (21 * 8)(%r9), %r11;
+       movq (22 * 8)(%r9), %r12;
+       movq (23 * 8)(%r9), %r13;
+       OCB_INPUT(10, %r10, %r11, %ymm5);
+       OCB_INPUT(11, %r12, %r13, %ymm4);
+       movq (24 * 8)(%r9), %r10;
+       movq (25 * 8)(%r9), %r11;
+       movq (26 * 8)(%r9), %r12;
+       movq (27 * 8)(%r9), %r13;
+       OCB_INPUT(12, %r10, %r11, %ymm3);
+       OCB_INPUT(13, %r12, %r13, %ymm2);
+       movq (28 * 8)(%r9), %r10;
+       movq (29 * 8)(%r9), %r11;
+       movq (30 * 8)(%r9), %r12;
+       movq (31 * 8)(%r9), %r13;
+       OCB_INPUT(14, %r10, %r11, %ymm1);
+       OCB_INPUT(15, %r12, %r13, %ymm0);
+#undef OCB_INPUT
+
+       vmovdqu %xmm14, (%rcx);
+
+       movq %r8, %r10;
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %r9d;
+       cmovel %r9d, %r8d; /* max */
+
+       /* inpack16_pre: */
+       vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15;
+       vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+       vpxor %ymm0, %ymm15, %ymm0;
+       vpxor %ymm1, %ymm15, %ymm1;
+       vpxor %ymm2, %ymm15, %ymm2;
+       vpxor %ymm3, %ymm15, %ymm3;
+       vpxor %ymm4, %ymm15, %ymm4;
+       vpxor %ymm5, %ymm15, %ymm5;
+       vpxor %ymm6, %ymm15, %ymm6;
+       vpxor %ymm7, %ymm15, %ymm7;
+       vpxor %ymm8, %ymm15, %ymm8;
+       vpxor %ymm9, %ymm15, %ymm9;
+       vpxor %ymm10, %ymm15, %ymm10;
+       vpxor %ymm11, %ymm15, %ymm11;
+       vpxor %ymm12, %ymm15, %ymm12;
+       vpxor %ymm13, %ymm15, %ymm13;
+       vpxor 14 * 32(%rax), %ymm15, %ymm14;
+       vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+       call __camellia_dec_blk32;
+
+       vpxor 0 * 32(%rsi), %ymm7, %ymm7;
+       vpxor 1 * 32(%rsi), %ymm6, %ymm6;
+       vpxor 2 * 32(%rsi), %ymm5, %ymm5;
+       vpxor 3 * 32(%rsi), %ymm4, %ymm4;
+       vpxor 4 * 32(%rsi), %ymm3, %ymm3;
+       vpxor 5 * 32(%rsi), %ymm2, %ymm2;
+       vpxor 6 * 32(%rsi), %ymm1, %ymm1;
+       vpxor 7 * 32(%rsi), %ymm0, %ymm0;
+       vmovdqu %ymm7, (7 * 32)(%rax);
+       vmovdqu %ymm6, (6 * 32)(%rax);
+       vpxor 8 * 32(%rsi), %ymm15, %ymm15;
+       vpxor 9 * 32(%rsi), %ymm14, %ymm14;
+       vpxor 10 * 32(%rsi), %ymm13, %ymm13;
+       vpxor 11 * 32(%rsi), %ymm12, %ymm12;
+       vpxor 12 * 32(%rsi), %ymm11, %ymm11;
+       vpxor 13 * 32(%rsi), %ymm10, %ymm10;
+       vpxor 14 * 32(%rsi), %ymm9, %ymm9;
+       vpxor 15 * 32(%rsi), %ymm8, %ymm8;
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+
+       vpxor %ymm5, %ymm7, %ymm7;
+       vpxor %ymm4, %ymm6, %ymm6;
+       vpxor %ymm3, %ymm7, %ymm7;
+       vpxor %ymm2, %ymm6, %ymm6;
+       vpxor %ymm1, %ymm7, %ymm7;
+       vpxor %ymm0, %ymm6, %ymm6;
+       vpxor %ymm15, %ymm7, %ymm7;
+       vpxor %ymm14, %ymm6, %ymm6;
+       vpxor %ymm13, %ymm7, %ymm7;
+       vpxor %ymm12, %ymm6, %ymm6;
+       vpxor %ymm11, %ymm7, %ymm7;
+       vpxor %ymm10, %ymm6, %ymm6;
+       vpxor %ymm9, %ymm7, %ymm7;
+       vpxor %ymm8, %ymm6, %ymm6;
+       vpxor %ymm7, %ymm6, %ymm7;
+
+       vextracti128 $1, %ymm7, %xmm6;
+       vpxor %xmm6, %xmm7, %xmm7;
+       vpxor (%r10), %xmm7, %xmm7;
+       vmovdqu %xmm7, (%r10);
+
+       vmovdqu 7 * 32(%rax), %ymm7;
+       vmovdqu 6 * 32(%rax), %ymm6;
+
+       write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
+                    %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
+                    %ymm8, %rsi);
+
+       vzeroall;
+
+       movq (16 * 32 + 0 * 8)(%rsp), %r10;
+       movq (16 * 32 + 1 * 8)(%rsp), %r11;
+       movq (16 * 32 + 2 * 8)(%rsp), %r12;
+       movq (16 * 32 + 3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size FUNC_NAME(ocb_dec),.-FUNC_NAME(ocb_dec);)
+
+.align 8
+.globl FUNC_NAME(ocb_auth)
+ELF(.type   FUNC_NAME(ocb_auth),@function;)
+
+FUNC_NAME(ocb_auth):
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: abuf (16 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       subq $(16 * 32 + 4 * 8), %rsp;
+       andq $~63, %rsp;
+       movq %rsp, %rax;
+
+       movq %r10, (16 * 32 + 0 * 8)(%rsp);
+       movq %r11, (16 * 32 + 1 * 8)(%rsp);
+       movq %r12, (16 * 32 + 2 * 8)(%rsp);
+       movq %r13, (16 * 32 + 3 * 8)(%rsp);
+       CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8);
+       CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8);
+       CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8);
+       CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8);
+
+       vmovdqu (%rdx), %xmm14;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rsi), yreg; \
+         vpxor (l0reg), %xmm14, %xmm15; \
+         vpxor (l1reg), %xmm15, %xmm14; \
+         vinserti128 $1, %xmm14, %ymm15, %ymm15; \
+         vpxor yreg, %ymm15, yreg;
+
+       movq (0 * 8)(%r8), %r10;
+       movq (1 * 8)(%r8), %r11;
+       movq (2 * 8)(%r8), %r12;
+       movq (3 * 8)(%r8), %r13;
+       OCB_INPUT(0, %r10, %r11, %ymm0);
+       vmovdqu %ymm0, (15 * 32)(%rax);
+       OCB_INPUT(1, %r12, %r13, %ymm0);
+       vmovdqu %ymm0, (14 * 32)(%rax);
+       movq (4 * 8)(%r8), %r10;
+       movq (5 * 8)(%r8), %r11;
+       movq (6 * 8)(%r8), %r12;
+       movq (7 * 8)(%r8), %r13;
+       OCB_INPUT(2, %r10, %r11, %ymm13);
+       OCB_INPUT(3, %r12, %r13, %ymm12);
+       movq (8 * 8)(%r8), %r10;
+       movq (9 * 8)(%r8), %r11;
+       movq (10 * 8)(%r8), %r12;
+       movq (11 * 8)(%r8), %r13;
+       OCB_INPUT(4, %r10, %r11, %ymm11);
+       OCB_INPUT(5, %r12, %r13, %ymm10);
+       movq (12 * 8)(%r8), %r10;
+       movq (13 * 8)(%r8), %r11;
+       movq (14 * 8)(%r8), %r12;
+       movq (15 * 8)(%r8), %r13;
+       OCB_INPUT(6, %r10, %r11, %ymm9);
+       OCB_INPUT(7, %r12, %r13, %ymm8);
+       movq (16 * 8)(%r8), %r10;
+       movq (17 * 8)(%r8), %r11;
+       movq (18 * 8)(%r8), %r12;
+       movq (19 * 8)(%r8), %r13;
+       OCB_INPUT(8, %r10, %r11, %ymm7);
+       OCB_INPUT(9, %r12, %r13, %ymm6);
+       movq (20 * 8)(%r8), %r10;
+       movq (21 * 8)(%r8), %r11;
+       movq (22 * 8)(%r8), %r12;
+       movq (23 * 8)(%r8), %r13;
+       OCB_INPUT(10, %r10, %r11, %ymm5);
+       OCB_INPUT(11, %r12, %r13, %ymm4);
+       movq (24 * 8)(%r8), %r10;
+       movq (25 * 8)(%r8), %r11;
+       movq (26 * 8)(%r8), %r12;
+       movq (27 * 8)(%r8), %r13;
+       OCB_INPUT(12, %r10, %r11, %ymm3);
+       OCB_INPUT(13, %r12, %r13, %ymm2);
+       movq (28 * 8)(%r8), %r10;
+       movq (29 * 8)(%r8), %r11;
+       movq (30 * 8)(%r8), %r12;
+       movq (31 * 8)(%r8), %r13;
+       OCB_INPUT(14, %r10, %r11, %ymm1);
+       OCB_INPUT(15, %r12, %r13, %ymm0);
+#undef OCB_INPUT
+
+       vmovdqu %xmm14, (%rdx);
+
+       cmpl $128, key_bitlength(CTX);
+       movl $32, %r8d;
+       movl $24, %r10d;
+       cmovel %r10d, %r8d; /* max */
+
+       movq %rcx, %r10;
+
+       /* inpack16_pre: */
+       vpbroadcastq (key_table)(CTX), %ymm15;
+       vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15;
+       vpxor %ymm0, %ymm15, %ymm0;
+       vpxor %ymm1, %ymm15, %ymm1;
+       vpxor %ymm2, %ymm15, %ymm2;
+       vpxor %ymm3, %ymm15, %ymm3;
+       vpxor %ymm4, %ymm15, %ymm4;
+       vpxor %ymm5, %ymm15, %ymm5;
+       vpxor %ymm6, %ymm15, %ymm6;
+       vpxor %ymm7, %ymm15, %ymm7;
+       vpxor %ymm8, %ymm15, %ymm8;
+       vpxor %ymm9, %ymm15, %ymm9;
+       vpxor %ymm10, %ymm15, %ymm10;
+       vpxor %ymm11, %ymm15, %ymm11;
+       vpxor %ymm12, %ymm15, %ymm12;
+       vpxor %ymm13, %ymm15, %ymm13;
+       vpxor 14 * 32(%rax), %ymm15, %ymm14;
+       vpxor 15 * 32(%rax), %ymm15, %ymm15;
+
+       call __camellia_enc_blk32;
+
+       vpxor %ymm7, %ymm6, %ymm6;
+       vpxor %ymm5, %ymm4, %ymm4;
+       vpxor %ymm3, %ymm2, %ymm2;
+       vpxor %ymm1, %ymm0, %ymm0;
+       vpxor %ymm15, %ymm14, %ymm14;
+       vpxor %ymm13, %ymm12, %ymm12;
+       vpxor %ymm11, %ymm10, %ymm10;
+       vpxor %ymm9, %ymm8, %ymm8;
+
+       vpxor %ymm6, %ymm4, %ymm4;
+       vpxor %ymm2, %ymm0, %ymm0;
+       vpxor %ymm14, %ymm12, %ymm12;
+       vpxor %ymm10, %ymm8, %ymm8;
+
+       vpxor %ymm4, %ymm0, %ymm0;
+       vpxor %ymm12, %ymm8, %ymm8;
+
+       vpxor %ymm0, %ymm8, %ymm0;
+
+       vextracti128 $1, %ymm0, %xmm1;
+       vpxor (%r10), %xmm0, %xmm0;
+       vpxor %xmm0, %xmm1, %xmm0;
+       vmovdqu %xmm0, (%r10);
+
+       vzeroall;
+
+       movq (16 * 32 + 0 * 8)(%rsp), %r10;
+       movq (16 * 32 + 1 * 8)(%rsp), %r11;
+       movq (16 * 32 + 2 * 8)(%rsp), %r12;
+       movq (16 * 32 + 3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size FUNC_NAME(ocb_auth),.-FUNC_NAME(ocb_auth);)
+
+#endif /* GCRY_CAMELLIA_AESNI_AVX2_AMD64_H */
diff --git a/grub-core/lib/libgcrypt/cipher/camellia-arm.S 
b/grub-core/lib/libgcrypt/cipher/camellia-arm.S
new file mode 100644
index 000000000..a3d87d110
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/camellia-arm.S
@@ -0,0 +1,626 @@
+/* camellia-arm.S  -  ARM assembly implementation of Camellia cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* struct camellia_ctx: */
+#define key_table 0
+
+/* register macros */
+#define CTX %r0
+#define RTAB1 %ip
+#define RTAB3 %r1
+#define RMASK %lr
+
+#define IL %r2
+#define IR %r3
+
+#define XL %r4
+#define XR %r5
+#define YL %r6
+#define YR %r7
+
+#define RT0 %r8
+#define RT1 %r9
+#define RT2 %r10
+#define RT3 %r11
+
+/* helper macros */
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+       ldrb rout, [rsrc, #((offs) + 3)]; \
+       ldrb rtmp, [rsrc, #((offs) + 2)]; \
+       orr rout, rout, rtmp, lsl #8; \
+       ldrb rtmp, [rsrc, #((offs) + 1)]; \
+       orr rout, rout, rtmp, lsl #16; \
+       ldrb rtmp, [rsrc, #((offs) + 0)]; \
+       orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \
+       mov rtmp0, rin, lsr #8; \
+       strb rin, [rdst, #((offs) + 3)]; \
+       mov rtmp1, rin, lsr #16; \
+       strb rtmp0, [rdst, #((offs) + 2)]; \
+       mov rtmp0, rin, lsr #24; \
+       strb rtmp1, [rdst, #((offs) + 1)]; \
+       strb rtmp0, [rdst, #((offs) + 0)];
+
+#ifdef __ARMEL__
+#ifdef HAVE_ARM_ARCH_V6
+       #define host_to_be(reg, rtmp) \
+               rev reg, reg;
+       #define be_to_host(reg, rtmp) \
+               rev reg, reg;
+#else
+       #define host_to_be(reg, rtmp) \
+               eor     rtmp, reg, reg, ror #16; \
+               mov     rtmp, rtmp, lsr #8; \
+               bic     rtmp, rtmp, #65280; \
+               eor     reg, rtmp, reg, ror #8;
+       #define be_to_host(reg, rtmp) \
+               eor     rtmp, reg, reg, ror #16; \
+               mov     rtmp, rtmp, lsr #8; \
+               bic     rtmp, rtmp, #65280; \
+               eor     reg, rtmp, reg, ror #8;
+#endif
+#else
+       /* nop on big-endian */
+       #define host_to_be(reg, rtmp) /*_*/
+       #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \
+       ldr a, [rin, #0]; \
+       ldr b, [rin, #4]; \
+       be_to_host(a, rtmp); \
+       ldr c, [rin, #8]; \
+       be_to_host(b, rtmp); \
+       ldr d, [rin, #12]; \
+       be_to_host(c, rtmp); \
+       be_to_host(d, rtmp);
+
+#define str_output_aligned_be(rout, a, b, c, d, rtmp) \
+       be_to_host(a, rtmp); \
+       be_to_host(b, rtmp); \
+       str a, [rout, #0]; \
+       be_to_host(c, rtmp); \
+       str b, [rout, #4]; \
+       be_to_host(d, rtmp); \
+       str c, [rout, #8]; \
+       str d, [rout, #12];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+       /* unaligned word reads/writes allowed */
+       #define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \
+               ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp)
+
+       #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+               str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0)
+#else
+       /* need to handle unaligned reads/writes by byte reads */
+       #define ldr_input_be(rin, ra, rb, rc, rd, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_be(ra, rin, 0, rtmp0); \
+                       ldr_unaligned_be(rb, rin, 4, rtmp0); \
+                       ldr_unaligned_be(rc, rin, 8, rtmp0); \
+                       ldr_unaligned_be(rd, rin, 12, rtmp0); \
+                       b 2f; \
+               1:;\
+                       ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp0); \
+               2:;
+
+       #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_be(ra, rout, 0, rtmp0, rtmp1); \
+                       str_unaligned_be(rb, rout, 4, rtmp0, rtmp1); \
+                       str_unaligned_be(rc, rout, 8, rtmp0, rtmp1); \
+                       str_unaligned_be(rd, rout, 12, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0); \
+               2:;
+#endif
+
+/**********************************************************************
+  1-way camellia
+ **********************************************************************/
+#define roundsm(xl, xr, kl, kr, yl, yr) \
+       ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \
+       and  IR, RMASK, xr, lsl#(4);      /*sp1110*/ \
+       ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \
+       and  IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \
+       and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \
+       ldr  IR, [RTAB1,  IR]; \
+       and RT1, RMASK, xl, lsr#(8 - 4);  /*sp3033*/ \
+       eor yl, RT2; \
+       ldr  IL, [RTAB1,  IL]; \
+       eor yr, RT3; \
+       \
+       ldr RT0, [RTAB3, RT0]; \
+       add RTAB1, #4; \
+       ldr RT1, [RTAB3, RT1]; \
+       add RTAB3, #4; \
+       \
+       and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \
+       and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \
+       \
+       eor IR, RT0; \
+       eor IL, RT1; \
+       \
+       ldr RT2, [RTAB1, RT2]; \
+       and RT0, RMASK, xr, lsr#(8 - 4);  /*sp4404*/ \
+       ldr RT3, [RTAB1, RT3]; \
+       and RT1, RMASK, xl, lsl#(4);      /*sp4404*/ \
+       \
+       ldr RT0, [RTAB3, RT0]; \
+       sub RTAB1, #4; \
+       ldr RT1, [RTAB3, RT1]; \
+       sub RTAB3, #4; \
+       \
+       eor IR, RT2; \
+       eor IL, RT3; \
+       eor IR, RT0; \
+       eor IL, RT1; \
+       \
+       eor IR, IL; \
+       eor yr, yr, IL, ror#8; \
+       eor yl, IR; \
+       eor yr, IR;
+
+#define enc_rounds(n) \
+       roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR);
+
+#define dec_rounds(n) \
+       roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \
+       roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \
+       roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR);
+
+/* perform FL and FL⁻¹ */
+#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \
+       ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \
+       ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \
+       and RT0, ll; \
+       ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \
+       orr RT2, rr; \
+       ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \
+       eor rl, RT2; \
+       eor lr, lr, RT0, ror#31; \
+       and RT3, rl; \
+       orr RT1, lr; \
+       eor ll, RT1; \
+       eor rr, rr, RT3, ror#31;
+
+#define enc_fls(n) \
+       fls(XL, XR, YL, YR, \
+           (n) * 2 + 0, (n) * 2 + 1, \
+           (n) * 2 + 2, (n) * 2 + 3);
+
+#define dec_fls(n) \
+       fls(XL, XR, YL, YR, \
+           (n) * 2 + 2, (n) * 2 + 3, \
+           (n) * 2 + 0, (n) * 2 + 1);
+
+#define inpack(n) \
+       ldr_input_be(%r2, XL, XR, YL, YR, RT0); \
+       ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+       ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+       eor XL, RT0; \
+       eor XR, RT1;
+
+#define outunpack(n) \
+       ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \
+       ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \
+       eor YL, RT0; \
+       eor YR, RT1; \
+       str_output_be(%r1, YL, YR, XL, XR, RT0, RT1);
+
+.align 3
+.globl _gcry_camellia_arm_encrypt_block
+.type   _gcry_camellia_arm_encrypt_block,%function;
+
+_gcry_camellia_arm_encrypt_block:
+       /* input:
+        *      %r0: keytable
+        *      %r1: dst
+        *      %r2: src
+        *      %r3: keybitlen
+        */
+       push {%r1, %r4-%r11, %ip, %lr};
+
+       GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3);
+       mov RMASK, #0xff;
+       add RTAB3, RTAB1, #(2 * 4);
+       push {%r3};
+       mov RMASK, RMASK, lsl#4 /* byte mask */
+
+       inpack(0);
+
+       enc_rounds(0);
+       enc_fls(8);
+       enc_rounds(8);
+       enc_fls(16);
+       enc_rounds(16);
+
+       pop {RT0};
+       cmp RT0, #(16 * 8);
+       bne .Lenc_256;
+
+       pop {%r1};
+       outunpack(24);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+
+.Lenc_256:
+       enc_fls(24);
+       enc_rounds(24);
+
+       pop {%r1};
+       outunpack(32);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;
+
+.align 3
+.globl _gcry_camellia_arm_decrypt_block
+.type   _gcry_camellia_arm_decrypt_block,%function;
+
+_gcry_camellia_arm_decrypt_block:
+       /* input:
+        *      %r0: keytable
+        *      %r1: dst
+        *      %r2: src
+        *      %r3: keybitlen
+        */
+       push {%r1, %r4-%r11, %ip, %lr};
+
+       GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3);
+       mov RMASK, #0xff;
+       add RTAB3, RTAB1, #(2 * 4);
+       mov RMASK, RMASK, lsl#4 /* byte mask */
+
+       cmp %r3, #(16 * 8);
+       bne .Ldec_256;
+
+       inpack(24);
+
+.Ldec_128:
+       dec_rounds(16);
+       dec_fls(16);
+       dec_rounds(8);
+       dec_fls(8);
+       dec_rounds(0);
+
+       pop {%r1};
+       outunpack(0);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+
+.Ldec_256:
+       inpack(32);
+       dec_rounds(24);
+       dec_fls(24);
+
+       b .Ldec_128;
+.ltorg
+.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;
+
+.data
+
+/* Encryption/Decryption tables */
+.align 5
+.Lcamellia_sp1110:
+.long 0x70707000
+.Lcamellia_sp0222:
+            .long 0x00e0e0e0
+.Lcamellia_sp3033:
+                        .long 0x38003838
+.Lcamellia_sp4404:
+                                    .long 0x70700070
+.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c
+.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3
+.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0
+.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4
+.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057
+.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea
+.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae
+.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023
+.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b
+.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045
+.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5
+.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed
+.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f
+.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d
+.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092
+.long 0x23232300, 0x00464646, 0x91009191, 0x86860086
+.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af
+.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c
+.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f
+.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e
+.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc
+.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e
+.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b
+.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6
+.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039
+.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5
+.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d
+.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9
+.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a
+.long 0x92929200, 0x00252525, 0x49004949, 0x51510051
+.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c
+.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b
+.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a
+.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb
+.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0
+.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074
+.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b
+.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0
+.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084
+.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df
+.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb
+.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034
+.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076
+.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d
+.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9
+.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1
+.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004
+.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014
+.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a
+.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de
+.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011
+.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032
+.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c
+.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053
+.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2
+.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe
+.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf
+.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3
+.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a
+.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024
+.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8
+.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060
+.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069
+.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa
+.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0
+.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1
+.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062
+.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054
+.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e
+.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0
+.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064
+.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010
+.long 0x12121200, 0x00242424, 0x09000909, 0x00000000
+.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3
+.long 0x20202000, 0x00404040, 0x10001010, 0x75750075
+.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a
+.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6
+.long 0x84848400, 0x00090909, 0x42004242, 0x09090009
+.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd
+.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087
+.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083
+.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd
+.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090
+.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073
+.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6
+.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d
+.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf
+.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052
+.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8
+.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8
+.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6
+.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081
+.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f
+.long 0x04040400, 0x00080808, 0x02000202, 0x13130013
+.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063
+.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9
+.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7
+.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f
+.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc
+.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029
+.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9
+.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f
+.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4
+.long 0x32323200, 0x00646464, 0x19001919, 0x78780078
+.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006
+.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7
+.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071
+.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4
+.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab
+.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088
+.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d
+.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072
+.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9
+.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8
+.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac
+.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036
+.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a
+.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c
+.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1
+.long 0x24242400, 0x00484848, 0x12001212, 0x40400040
+.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3
+.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb
+.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043
+.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015
+.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad
+.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077
+.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080
+.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082
+.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec
+.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027
+.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5
+.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085
+.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035
+.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c
+.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041
+.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef
+.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093
+.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019
+.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021
+.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e
+.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e
+.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065
+.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd
+.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8
+.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f
+.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb
+.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce
+.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030
+.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f
+.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5
+.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a
+.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1
+.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca
+.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047
+.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d
+.long 0x09090900, 0x00121212, 0x84008484, 0x01010001
+.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6
+.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056
+.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d
+.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d
+.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066
+.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc
+.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d
+.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012
+.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020
+.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1
+.long 0x33333300, 0x00666666, 0x99009999, 0x99990099
+.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c
+.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2
+.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e
+.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005
+.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7
+.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031
+.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017
+.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7
+.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058
+.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061
+.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b
+.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c
+.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f
+.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016
+.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018
+.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022
+.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044
+.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2
+.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5
+.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091
+.long 0x13131300, 0x00262626, 0x89008989, 0x08080008
+.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8
+.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc
+.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050
+.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0
+.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d
+.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089
+.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097
+.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b
+.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095
+.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff
+.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2
+.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4
+.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048
+.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7
+.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db
+.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003
+.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da
+.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f
+.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094
+.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c
+.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002
+.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a
+.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033
+.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067
+.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3
+.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f
+.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2
+.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b
+.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026
+.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037
+.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b
+.long 0x88888800, 0x00111111, 0x44004444, 0x96960096
+.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b
+.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be
+.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e
+.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079
+.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c
+.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e
+.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e
+.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5
+.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6
+.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd
+.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059
+.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098
+.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a
+.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046
+.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba
+.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025
+.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042
+.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2
+.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa
+.long 0x40404000, 0x00808080, 0x20002020, 0x07070007
+.long 0x28282800, 0x00505050, 0x14001414, 0x55550055
+.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee
+.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a
+.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049
+.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068
+.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038
+.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4
+.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028
+.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b
+.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9
+.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1
+.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3
+.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4
+.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7
+.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARM_ARCH >= 6*/
diff --git a/grub-core/lib/libgcrypt/cipher/camellia-glue.c 
b/grub-core/lib/libgcrypt/cipher/camellia-glue.c
index a26362177..72c02d774 100644
--- a/grub-core/lib/libgcrypt/cipher/camellia-glue.c
+++ b/grub-core/lib/libgcrypt/cipher/camellia-glue.c
@@ -62,21 +62,219 @@
 #include "g10lib.h"
 #include "cipher.h"
 #include "camellia.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+/* Helper macro to force alignment to 16 bytes.  */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
+#else
+# define ATTR_ALIGNED_16
+#endif
+
+/* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */
+#undef USE_AESNI_AVX
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || 
\
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+#  define USE_AESNI_AVX 1
+# endif
+#endif
+
+/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
+#undef USE_AESNI_AVX2
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || 
\
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+#  define USE_AESNI_AVX2 1
+# endif
+#endif
+
+/* USE_VAES_AVX2 inidicates whether to compile with Intel VAES/AVX2 code. */
+#undef USE_VAES_AVX2
+#if defined(USE_AESNI_AVX2) && defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL)
+# define USE_VAES_AVX2 1
+#endif
 
 typedef struct
 {
-  int keybitlength;
   KEY_TABLE_TYPE keytable;
+  int keybitlength;
+#ifdef USE_AESNI_AVX
+  unsigned int use_aesni_avx:1;        /* AES-NI/AVX implementation shall be 
used.  */
+#endif /*USE_AESNI_AVX*/
+#ifdef USE_AESNI_AVX2
+  unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used.  
*/
+  unsigned int use_vaes_avx2:1; /* VAES/AVX2 implementation shall be used.  */
+#endif /*USE_AESNI_AVX2*/
 } CAMELLIA_context;
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+#ifdef USE_AESNI_AVX
+/* Assembler implementations of Camellia using AES-NI and AVX.  Process data
+   in 16 block same time.
+ */
+extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *offset,
+                                            unsigned char *checksum,
+                                            const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *offset,
+                                            unsigned char *checksum,
+                                            const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx,
+                                            const unsigned char *abuf,
+                                            unsigned char *offset,
+                                            unsigned char *checksum,
+                                            const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx,
+                                           const unsigned char *key,
+                                           unsigned int keylen) ASM_FUNC_ABI;
+#endif
+
+#ifdef USE_AESNI_AVX2
+/* Assembler implementations of Camellia using AES-NI and AVX2.  Process data
+   in 32 block same time.
+ */
+extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx,
+                                             unsigned char *out,
+                                             const unsigned char *in,
+                                             unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx,
+                                             unsigned char *out,
+                                             const unsigned char *in,
+                                             unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx,
+                                             unsigned char *out,
+                                             const unsigned char *in,
+                                             unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx,
+                                             unsigned char *out,
+                                             const unsigned char *in,
+                                             unsigned char *offset,
+                                             unsigned char *checksum,
+                                             const u64 Ls[32]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx,
+                                             unsigned char *out,
+                                             const unsigned char *in,
+                                             unsigned char *offset,
+                                             unsigned char *checksum,
+                                             const u64 Ls[32]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx,
+                                              const unsigned char *abuf,
+                                              unsigned char *offset,
+                                              unsigned char *checksum,
+                                              const u64 Ls[32]) ASM_FUNC_ABI;
+#endif
+
+#ifdef USE_VAES_AVX2
+/* Assembler implementations of Camellia using VAES and AVX2.  Process data
+   in 32 block same time.
+ */
+extern void _gcry_camellia_vaes_avx2_ctr_enc(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_vaes_avx2_cbc_dec(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_vaes_avx2_cfb_dec(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_vaes_avx2_ocb_enc(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *offset,
+                                            unsigned char *checksum,
+                                            const u64 Ls[32]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_vaes_avx2_ocb_dec(CAMELLIA_context *ctx,
+                                            unsigned char *out,
+                                            const unsigned char *in,
+                                            unsigned char *offset,
+                                            unsigned char *checksum,
+                                            const u64 Ls[32]) ASM_FUNC_ABI;
+
+extern void _gcry_camellia_vaes_avx2_ocb_auth(CAMELLIA_context *ctx,
+                                             const unsigned char *abuf,
+                                             unsigned char *offset,
+                                             unsigned char *checksum,
+                                             const u64 Ls[32]) ASM_FUNC_ABI;
+#endif
+
 static const char *selftest(void);
 
+static void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+static void _gcry_camellia_cbc_dec (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+static void _gcry_camellia_cfb_dec (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+static size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                       const void *inbuf_arg, size_t nblocks,
+                                       int encrypt);
+static size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void 
*abuf_arg,
+                                      size_t nblocks);
+
 static gcry_err_code_t
-camellia_setkey(void *c, const byte *key, unsigned keylen)
+camellia_setkey(void *c, const byte *key, unsigned keylen,
+                cipher_bulk_ops_t *bulk_ops)
 {
   CAMELLIA_context *ctx=c;
   static int initialized=0;
   static const char *selftest_failed=NULL;
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) || defined(USE_VAES_AVX2)
+  unsigned int hwf = _gcry_get_hw_features ();
+#endif
 
   if(keylen!=16 && keylen!=24 && keylen!=32)
     return GPG_ERR_INV_KEYLEN;
@@ -92,44 +290,774 @@ camellia_setkey(void *c, const byte *key, unsigned keylen)
   if(selftest_failed)
     return GPG_ERR_SELFTEST_FAILED;
 
+#ifdef USE_AESNI_AVX
+  ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
+#endif
+#ifdef USE_AESNI_AVX2
+  ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
+  ctx->use_vaes_avx2 = 0;
+#endif
+#ifdef USE_VAES_AVX2
+  ctx->use_vaes_avx2 = (hwf & HWF_INTEL_VAES_VPCLMUL) && (hwf & 
HWF_INTEL_AVX2);
+#endif
+
   ctx->keybitlength=keylen*8;
-  Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable);
-  _gcry_burn_stack
-    ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */
-     +(4+32)*sizeof(u32)+2*sizeof(void*)    /* camellia_setup192 */
-     +0+sizeof(int)+2*sizeof(void*)         /* Camellia_Ekeygen */
-     +3*2*sizeof(void*)                     /* Function calls.  */
-     );
+
+  /* Setup bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cbc_dec = _gcry_camellia_cbc_dec;
+  bulk_ops->cfb_dec = _gcry_camellia_cfb_dec;
+  bulk_ops->ctr_enc = _gcry_camellia_ctr_enc;
+  bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt;
+  bulk_ops->ocb_auth  = _gcry_camellia_ocb_auth;
+
+  if (0)
+    { }
+#ifdef USE_AESNI_AVX
+  else if (ctx->use_aesni_avx)
+    _gcry_camellia_aesni_avx_keygen(ctx, key, keylen);
+  else
+#endif
+    {
+      Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable);
+      _gcry_burn_stack
+        ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */
+         +(4+32)*sizeof(u32)+2*sizeof(void*)    /* camellia_setup192 */
+         +0+sizeof(int)+2*sizeof(void*)         /* Camellia_Ekeygen */
+         +3*2*sizeof(void*)                     /* Function calls.  */
+         );
+    }
 
   return 0;
 }
 
-static void
+#ifdef USE_ARM_ASM
+
+/* Assembly implementations of Camellia. */
+extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable,
+                                              byte *outbuf, const byte *inbuf,
+                                              const int keybits);
+
+extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable,
+                                              byte *outbuf, const byte *inbuf,
+                                              const int keybits);
+
+static void Camellia_EncryptBlock(const int keyBitLength,
+                                 const unsigned char *plaintext,
+                                 const KEY_TABLE_TYPE keyTable,
+                                 unsigned char *cipherText)
+{
+  _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext,
+                                    keyBitLength);
+}
+
+static void Camellia_DecryptBlock(const int keyBitLength,
+                                 const unsigned char *cipherText,
+                                 const KEY_TABLE_TYPE keyTable,
+                                 unsigned char *plaintext)
+{
+  _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText,
+                                    keyBitLength);
+}
+
+#ifdef __aarch64__
+#  define CAMELLIA_encrypt_stack_burn_size (0)
+#  define CAMELLIA_decrypt_stack_burn_size (0)
+#else
+#  define CAMELLIA_encrypt_stack_burn_size (15*4)
+#  define CAMELLIA_decrypt_stack_burn_size (15*4)
+#endif
+
+static unsigned int
+camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
+{
+  CAMELLIA_context *ctx = c;
+  Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
+  return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
+}
+
+static unsigned int
+camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
+{
+  CAMELLIA_context *ctx=c;
+  Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
+  return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
+}
+
+#else /*USE_ARM_ASM*/
+
+static unsigned int
 camellia_encrypt(void *c, byte *outbuf, const byte *inbuf)
 {
   CAMELLIA_context *ctx=c;
 
   Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
-  _gcry_burn_stack
-    (sizeof(int)+2*sizeof(unsigned char *)+sizeof(KEY_TABLE_TYPE)
-     +4*sizeof(u32)
-     +2*sizeof(u32*)+4*sizeof(u32)
-     +2*2*sizeof(void*) /* Function calls.  */
-    );
+
+#define CAMELLIA_encrypt_stack_burn_size \
+  (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
+     +4*sizeof(u32)+4*sizeof(u32) \
+     +2*sizeof(u32*)+4*sizeof(u32) \
+     +2*2*sizeof(void*) /* Function calls.  */ \
+    )
+
+  return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size);
 }
 
-static void
+static unsigned int
 camellia_decrypt(void *c, byte *outbuf, const byte *inbuf)
 {
   CAMELLIA_context *ctx=c;
 
   Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf);
-  _gcry_burn_stack
-    (sizeof(int)+2*sizeof(unsigned char *)+sizeof(KEY_TABLE_TYPE)
-     +4*sizeof(u32)
-     +2*sizeof(u32*)+4*sizeof(u32)
-     +2*2*sizeof(void*) /* Function calls.  */
-    );
+
+#define CAMELLIA_decrypt_stack_burn_size \
+    (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \
+     +4*sizeof(u32)+4*sizeof(u32) \
+     +2*sizeof(u32*)+4*sizeof(u32) \
+     +2*2*sizeof(void*) /* Function calls.  */ \
+    )
+
+  return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size);
+}
+
+#endif /*!USE_ARM_ASM*/
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size CAMELLIA_BLOCK_SIZE. */
+static void
+_gcry_camellia_ctr_enc(void *context, unsigned char *ctr,
+                       void *outbuf_arg, const void *inbuf_arg,
+                       size_t nblocks)
+{
+  CAMELLIA_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE];
+  int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      int did_use_aesni_avx2 = 0;
+#ifdef USE_VAES_AVX2
+      int use_vaes = ctx->use_vaes_avx2;
+#endif
+
+      /* Process data in 32 block chunks. */
+      while (nblocks >= 32)
+        {
+#ifdef USE_VAES_AVX2
+          if (use_vaes)
+            _gcry_camellia_vaes_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+          else
+#endif
+            _gcry_camellia_aesni_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+          nblocks -= 32;
+          outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+          inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
+          did_use_aesni_avx2 = 1;
+        }
+
+      if (did_use_aesni_avx2)
+        {
+          int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
+                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx2_burn_stack_depth)
+            burn_stack_depth = avx2_burn_stack_depth;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+      /* TODO: use caching instead? */
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      int did_use_aesni_avx = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+          nblocks -= 16;
+          outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+          inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
+          did_use_aesni_avx = 1;
+        }
+
+      if (did_use_aesni_avx)
+        {
+          int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx_burn_stack_depth)
+            burn_stack_depth = avx_burn_stack_depth;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+      /* TODO: use caching instead? */
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* Encrypt the counter. */
+      Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf);
+      /* XOR the input with the encrypted counter and store in output.  */
+      cipher_block_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE);
+      outbuf += CAMELLIA_BLOCK_SIZE;
+      inbuf  += CAMELLIA_BLOCK_SIZE;
+      /* Increment the counter.  */
+      cipher_block_add(ctr, 1, CAMELLIA_BLOCK_SIZE);
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_camellia_cbc_dec(void *context, unsigned char *iv,
+                       void *outbuf_arg, const void *inbuf_arg,
+                       size_t nblocks)
+{
+  CAMELLIA_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char savebuf[CAMELLIA_BLOCK_SIZE];
+  int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      int did_use_aesni_avx2 = 0;
+#ifdef USE_VAES_AVX2
+      int use_vaes = ctx->use_vaes_avx2;
+#endif
+
+      /* Process data in 32 block chunks. */
+      while (nblocks >= 32)
+        {
+#ifdef USE_VAES_AVX2
+          if (use_vaes)
+            _gcry_camellia_vaes_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+          else
+#endif
+            _gcry_camellia_aesni_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 32;
+          outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+          inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
+          did_use_aesni_avx2 = 1;
+        }
+
+      if (did_use_aesni_avx2)
+        {
+          int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
+                                        2 * sizeof(void *) + ASM_EXTRA_STACK;;
+
+          if (burn_stack_depth < avx2_burn_stack_depth)
+            burn_stack_depth = avx2_burn_stack_depth;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      int did_use_aesni_avx = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+          inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
+          did_use_aesni_avx = 1;
+        }
+
+      if (did_use_aesni_avx)
+        {
+          int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx_burn_stack_depth)
+            burn_stack_depth = avx_burn_stack_depth;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf);
+
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+                                CAMELLIA_BLOCK_SIZE);
+      inbuf += CAMELLIA_BLOCK_SIZE;
+      outbuf += CAMELLIA_BLOCK_SIZE;
+    }
+
+  wipememory(savebuf, sizeof(savebuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_camellia_cfb_dec(void *context, unsigned char *iv,
+                       void *outbuf_arg, const void *inbuf_arg,
+                       size_t nblocks)
+{
+  CAMELLIA_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      int did_use_aesni_avx2 = 0;
+#ifdef USE_VAES_AVX2
+      int use_vaes = ctx->use_vaes_avx2;
+#endif
+
+      /* Process data in 32 block chunks. */
+      while (nblocks >= 32)
+        {
+#ifdef USE_VAES_AVX2
+          if (use_vaes)
+            _gcry_camellia_vaes_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+          else
+#endif
+            _gcry_camellia_aesni_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 32;
+          outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+          inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
+          did_use_aesni_avx2 = 1;
+        }
+
+      if (did_use_aesni_avx2)
+        {
+          int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 +
+                                        2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx2_burn_stack_depth)
+            burn_stack_depth = avx2_burn_stack_depth;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      int did_use_aesni_avx = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+          inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
+          did_use_aesni_avx = 1;
+        }
+
+      if (did_use_aesni_avx)
+        {
+          int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                       2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+          if (burn_stack_depth < avx_burn_stack_depth)
+            burn_stack_depth = avx_burn_stack_depth;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE);
+      outbuf += CAMELLIA_BLOCK_SIZE;
+      inbuf  += CAMELLIA_BLOCK_SIZE;
+    }
+
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                         const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+  CAMELLIA_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+
+  burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size :
+                             CAMELLIA_decrypt_stack_burn_size;
+#else
+  (void)c;
+  (void)outbuf_arg;
+  (void)inbuf_arg;
+  (void)encrypt;
+#endif
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      int did_use_aesni_avx2 = 0;
+#ifdef USE_VAES_AVX2
+      int encrypt_use_vaes = encrypt && ctx->use_vaes_avx2;
+      int decrypt_use_vaes = !encrypt && ctx->use_vaes_avx2;
+#endif
+      u64 Ls[32];
+      unsigned int n = 32 - (blkn % 32);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 32)
+       {
+         for (i = 0; i < 32; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
+         Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(31 + n) % 32];
+
+         /* Process data in 32 block chunks. */
+         while (nblocks >= 32)
+           {
+             blkn += 32;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
+
+             if (0) {}
+#ifdef USE_VAES_AVX2
+             else if (encrypt_use_vaes)
+               _gcry_camellia_vaes_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+                                                 c->u_ctr.ctr, Ls);
+             else if (decrypt_use_vaes)
+               _gcry_camellia_vaes_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+                                                 c->u_ctr.ctr, Ls);
+#endif
+             else if (encrypt)
+               _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, 
c->u_iv.iv,
+                                                 c->u_ctr.ctr, Ls);
+             else
+               _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, 
c->u_iv.iv,
+                                                 c->u_ctr.ctr, Ls);
+
+             nblocks -= 32;
+             outbuf += 32 * CAMELLIA_BLOCK_SIZE;
+             inbuf  += 32 * CAMELLIA_BLOCK_SIZE;
+             did_use_aesni_avx2 = 1;
+           }
+       }
+
+      if (did_use_aesni_avx2)
+       {
+         int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
+                                     2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+         if (burn_stack_depth < avx2_burn_stack_depth)
+           burn_stack_depth = avx2_burn_stack_depth;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      int did_use_aesni_avx = 0;
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             if (encrypt)
+               _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+                                               c->u_ctr.ctr, Ls);
+             else
+               _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+                                               c->u_ctr.ctr, Ls);
+
+             nblocks -= 16;
+             outbuf += 16 * CAMELLIA_BLOCK_SIZE;
+             inbuf  += 16 * CAMELLIA_BLOCK_SIZE;
+             did_use_aesni_avx = 1;
+           }
+       }
+
+      if (did_use_aesni_avx)
+       {
+         int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                     2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+         if (burn_stack_depth < avx_burn_stack_depth)
+           burn_stack_depth = avx_burn_stack_depth;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+  c->u_mode.ocb.data_nblocks = blkn;
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+  return nblocks;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                        size_t nblocks)
+{
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+  CAMELLIA_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  int burn_stack_depth;
+  u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+  burn_stack_depth = CAMELLIA_encrypt_stack_burn_size;
+#else
+  (void)c;
+  (void)abuf_arg;
+#endif
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      int did_use_aesni_avx2 = 0;
+#ifdef USE_VAES_AVX2
+      int use_vaes = ctx->use_vaes_avx2;
+#endif
+      u64 Ls[32];
+      unsigned int n = 32 - (blkn % 32);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 32)
+       {
+         for (i = 0; i < 32; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4];
+         Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(31 + n) % 32];
+
+         /* Process data in 32 block chunks. */
+         while (nblocks >= 32)
+           {
+             blkn += 32;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32);
+
+#ifdef USE_VAES_AVX2
+              if (use_vaes)
+                _gcry_camellia_vaes_avx2_ocb_auth(ctx, abuf,
+                                                  c->u_mode.ocb.aad_offset,
+                                                  c->u_mode.ocb.aad_sum, Ls);
+              else
+#endif
+                _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf,
+                                                   c->u_mode.ocb.aad_offset,
+                                                   c->u_mode.ocb.aad_sum, Ls);
+
+             nblocks -= 32;
+             abuf += 32 * CAMELLIA_BLOCK_SIZE;
+             did_use_aesni_avx2 = 1;
+           }
+       }
+
+      if (did_use_aesni_avx2)
+       {
+         int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE +
+                                     2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+         if (burn_stack_depth < avx2_burn_stack_depth)
+           burn_stack_depth = avx2_burn_stack_depth;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      int did_use_aesni_avx = 0;
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf,
+                                               c->u_mode.ocb.aad_offset,
+                                               c->u_mode.ocb.aad_sum, Ls);
+
+             nblocks -= 16;
+             abuf += 16 * CAMELLIA_BLOCK_SIZE;
+             did_use_aesni_avx = 1;
+           }
+       }
+
+      if (did_use_aesni_avx)
+       {
+         int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE +
+                                     2 * sizeof(void *) + ASM_EXTRA_STACK;
+
+         if (burn_stack_depth < avx_burn_stack_depth)
+           burn_stack_depth = avx_burn_stack_depth;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+  c->u_mode.ocb.aad_nblocks = blkn;
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+  return nblocks;
+}
+
+/* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+  const int nblocks = 32+16+1;
+  const int blocksize = CAMELLIA_BLOCK_SIZE;
+  const int context_size = sizeof(CAMELLIA_context);
+
+  return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey,
+           &camellia_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for CAMELLIA-CBC-128, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+  const int nblocks = 32+16+2;
+  const int blocksize = CAMELLIA_BLOCK_SIZE;
+  const int context_size = sizeof(CAMELLIA_context);
+
+  return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey,
+           &camellia_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for CAMELLIA-CFB-128, tests bulk CFB decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+  const int nblocks = 32+16+2;
+  const int blocksize = CAMELLIA_BLOCK_SIZE;
+  const int context_size = sizeof(CAMELLIA_context);
+
+  return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey,
+           &camellia_encrypt, nblocks, blocksize, context_size);
 }
 
 static const char *
@@ -137,46 +1065,48 @@ selftest(void)
 {
   CAMELLIA_context ctx;
   byte scratch[16];
+  cipher_bulk_ops_t bulk_ops;
+  const char *r;
 
   /* These test vectors are from RFC-3713 */
-  const byte plaintext[]=
+  static const byte plaintext[]=
     {
       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
     };
-  const byte key_128[]=
+  static const byte key_128[]=
     {
       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,
       0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10
     };
-  const byte ciphertext_128[]=
+  static const byte ciphertext_128[]=
     {
       0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73,
       0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43
     };
-  const byte key_192[]=
+  static const byte key_192[]=
     {
       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98,
       0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77
     };
-  const byte ciphertext_192[]=
+  static const byte ciphertext_192[]=
     {
       0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8,
       0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9
     };
-  const byte key_256[]=
+  static const byte key_256[]=
     {
       0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,
       0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,
       0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff
     };
-  const byte ciphertext_256[]=
+  static const byte ciphertext_256[]=
     {
       0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c,
       0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09
     };
 
-  camellia_setkey(&ctx,key_128,sizeof(key_128));
+  camellia_setkey(&ctx,key_128,sizeof(key_128),&bulk_ops);
   camellia_encrypt(&ctx,scratch,plaintext);
   if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0)
     return "CAMELLIA-128 test encryption failed.";
@@ -184,7 +1114,7 @@ selftest(void)
   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
     return "CAMELLIA-128 test decryption failed.";
 
-  camellia_setkey(&ctx,key_192,sizeof(key_192));
+  camellia_setkey(&ctx,key_192,sizeof(key_192),&bulk_ops);
   camellia_encrypt(&ctx,scratch,plaintext);
   if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0)
     return "CAMELLIA-192 test encryption failed.";
@@ -192,7 +1122,7 @@ selftest(void)
   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
     return "CAMELLIA-192 test decryption failed.";
 
-  camellia_setkey(&ctx,key_256,sizeof(key_256));
+  camellia_setkey(&ctx,key_256,sizeof(key_256),&bulk_ops);
   camellia_encrypt(&ctx,scratch,plaintext);
   if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0)
     return "CAMELLIA-256 test encryption failed.";
@@ -200,6 +1130,15 @@ selftest(void)
   if(memcmp(scratch,plaintext,sizeof(plaintext))!=0)
     return "CAMELLIA-256 test decryption failed.";
 
+  if ( (r = selftest_ctr_128 ()) )
+    return r;
+
+  if ( (r = selftest_cbc_128 ()) )
+    return r;
+
+  if ( (r = selftest_cfb_128 ()) )
+    return r;
+
   return NULL;
 }
 
@@ -207,7 +1146,7 @@ selftest(void)
    <http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications_oid.html>,
    retrieved May 1, 2007. */
 
-static gcry_cipher_oid_spec_t camellia128_oids[] =
+static const gcry_cipher_oid_spec_t camellia128_oids[] =
   {
     {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC},
     {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB},
@@ -216,7 +1155,7 @@ static gcry_cipher_oid_spec_t camellia128_oids[] =
     { NULL }
   };
 
-static gcry_cipher_oid_spec_t camellia192_oids[] =
+static const gcry_cipher_oid_spec_t camellia192_oids[] =
   {
     {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC},
     {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB},
@@ -225,7 +1164,7 @@ static gcry_cipher_oid_spec_t camellia192_oids[] =
     { NULL }
   };
 
-static gcry_cipher_oid_spec_t camellia256_oids[] =
+static const gcry_cipher_oid_spec_t camellia256_oids[] =
   {
     {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC},
     {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB},
@@ -236,18 +1175,21 @@ static gcry_cipher_oid_spec_t camellia256_oids[] =
 
 gcry_cipher_spec_t _gcry_cipher_spec_camellia128 =
   {
+    GCRY_CIPHER_CAMELLIA128, {0, 0},
     "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128,
     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_camellia192 =
   {
+    GCRY_CIPHER_CAMELLIA192, {0, 0},
     "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192,
     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_camellia256 =
   {
+    GCRY_CIPHER_CAMELLIA256, {0, 0},
     "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256,
     sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt
   };
diff --git a/grub-core/lib/libgcrypt/cipher/camellia-vaes-avx2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/camellia-vaes-avx2-amd64.S
new file mode 100644
index 000000000..e6e0c78ec
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/camellia-vaes-avx2-amd64.S
@@ -0,0 +1,35 @@
+/* camellia-vaes-avx2-amd64.S  -  VAES/AVX2 implementation of Camellia cipher
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) && \
+    defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL)
+
+#define CAMELLIA_VAES_BUILD 1
+#define FUNC_NAME(func) _gcry_camellia_vaes_avx2_ ## func
+
+#include "camellia-aesni-avx2-amd64.h"
+
+#endif /* defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) */
+#endif /* __x86_64 */
diff --git a/grub-core/lib/libgcrypt/cipher/camellia.c 
b/grub-core/lib/libgcrypt/cipher/camellia.c
index 2e28bce2a..e7085a7ec 100644
--- a/grub-core/lib/libgcrypt/cipher/camellia.c
+++ b/grub-core/lib/libgcrypt/cipher/camellia.c
@@ -14,8 +14,7 @@
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  
USA
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 /*
@@ -23,14 +22,15 @@
  *  http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
  */
 
+#include <config.h>
 #include <string.h>
 #include <stdlib.h>
 
+#include "types.h"
+#include "bufhelp.h"
 #include "camellia.h"
 
-/* u32 must be 32bit word */
-typedef unsigned int u32;
-typedef unsigned char u8;
+typedef byte u8;
 
 /* key constants */
 
@@ -60,17 +60,8 @@ typedef unsigned char u8;
 
 #else /* not MS-VC */
 
-# define GETU32(pt)                            \
-    (((u32)(pt)[0] << 24)                      \
-     ^ ((u32)(pt)[1] << 16)                    \
-     ^ ((u32)(pt)[2] <<  8)                    \
-     ^ ((u32)(pt)[3]))
-
-# define PUTU32(ct, st)  {                     \
-       (ct)[0] = (u8)((st) >> 24);             \
-       (ct)[1] = (u8)((st) >> 16);             \
-       (ct)[2] = (u8)((st) >>  8);             \
-       (ct)[3] = (u8)(st); }
+# define GETU32(pt) buf_get_be32(pt)
+# define PUTU32(ct, st) buf_put_be32(ct, st)
 
 #endif
 
@@ -151,6 +142,8 @@ typedef unsigned char u8;
 
 #define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1)       \
     do {                                                               \
+       yl ^= kl;                                                       \
+       yr ^= kr;                                                       \
        ir = CAMELLIA_SP1110(xr & 0xff)                                 \
            ^ CAMELLIA_SP0222((xr >> 24) & 0xff)                        \
            ^ CAMELLIA_SP3033((xr >> 16) & 0xff)                        \
@@ -159,8 +152,6 @@ typedef unsigned char u8;
            ^ CAMELLIA_SP0222((xl >> 16) & 0xff)                        \
            ^ CAMELLIA_SP3033((xl >> 8) & 0xff)                         \
            ^ CAMELLIA_SP4404(xl & 0xff);                               \
-       il ^= kl;                                                       \
-       ir ^= kr;                                                       \
        ir ^= il;                                                       \
        il = CAMELLIA_RR8(il);                                          \
        il ^= ir;                                                       \
@@ -614,44 +605,6 @@ void camellia_setup128(const unsigned char *key, u32 
*subkey)
     CamelliaSubkeyL(24) = subl(24) ^ subl(23);
     CamelliaSubkeyR(24) = subr(24) ^ subr(23);
 
-    /* apply the inverse of the last half of P-function */
-    dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
-    dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
-    dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
-    dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
-    dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
-    dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
-    dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
-    dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
-    dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
-    dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
-    dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
-    dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
-    dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
-    dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
-    dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
-    dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
-    dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
-    dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
-
     return;
 }
 
@@ -888,56 +841,6 @@ void camellia_setup256(const unsigned char *key, u32 
*subkey)
     CamelliaSubkeyL(32) = subl(32) ^ subl(31);
     CamelliaSubkeyR(32) = subr(32) ^ subr(31);
 
-    /* apply the inverse of the last half of P-function */
-    dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
-    dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
-    dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
-    dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
-    dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
-    dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
-    dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
-    dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
-    dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
-    dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
-    dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
-    dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
-    dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
-    dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
-    dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
-    dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
-    dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
-    dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
-    dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw;
-    dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw;
-    dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw;
-    dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw;
-    dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw;
-    dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw);
-    CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw,CamelliaSubkeyL(31) = dw;
-
     return;
 }
 
@@ -958,14 +861,21 @@ void camellia_setup192(const unsigned char *key, u32 
*subkey)
 }
 
 
+#ifndef USE_ARM_ASM
 /**
  * Stuff related to camellia encryption/decryption
  *
  * "io" must be 4byte aligned and big-endian data.
  */
-void camellia_encrypt128(const u32 *subkey, u32 *io)
+void camellia_encrypt128(const u32 *subkey, u32 *blocks)
 {
     u32 il, ir, t0, t1;
+    u32 io[4];
+
+    io[0] = blocks[0];
+    io[1] = blocks[1];
+    io[2] = blocks[2];
+    io[3] = blocks[3];
 
     /* pre whitening but absorb kw2*/
     io[0] ^= CamelliaSubkeyL(0);
@@ -1050,12 +960,23 @@ void camellia_encrypt128(const u32 *subkey, u32 *io)
     io[2] = t0;
     io[3] = t1;
 
+    blocks[0] = io[0];
+    blocks[1] = io[1];
+    blocks[2] = io[2];
+    blocks[3] = io[3];
+
     return;
 }
 
-void camellia_decrypt128(const u32 *subkey, u32 *io)
+void camellia_decrypt128(const u32 *subkey, u32 *blocks)
 {
     u32 il,ir,t0,t1;               /* temporary valiables */
+    u32 io[4];
+
+    io[0] = blocks[0];
+    io[1] = blocks[1];
+    io[2] = blocks[2];
+    io[3] = blocks[3];
 
     /* pre whitening but absorb kw2*/
     io[0] ^= CamelliaSubkeyL(24);
@@ -1140,15 +1061,26 @@ void camellia_decrypt128(const u32 *subkey, u32 *io)
     io[2] = t0;
     io[3] = t1;
 
+    blocks[0] = io[0];
+    blocks[1] = io[1];
+    blocks[2] = io[2];
+    blocks[3] = io[3];
+
     return;
 }
 
 /**
  * stuff for 192 and 256bit encryption/decryption
  */
-void camellia_encrypt256(const u32 *subkey, u32 *io)
+void camellia_encrypt256(const u32 *subkey, u32 *blocks)
 {
     u32 il,ir,t0,t1;           /* temporary valiables */
+    u32 io[4];
+
+    io[0] = blocks[0];
+    io[1] = blocks[1];
+    io[2] = blocks[2];
+    io[3] = blocks[3];
 
     /* pre whitening but absorb kw2*/
     io[0] ^= CamelliaSubkeyL(0);
@@ -1257,12 +1189,23 @@ void camellia_encrypt256(const u32 *subkey, u32 *io)
     io[2] = t0;
     io[3] = t1;
 
+    blocks[0] = io[0];
+    blocks[1] = io[1];
+    blocks[2] = io[2];
+    blocks[3] = io[3];
+
     return;
 }
 
-void camellia_decrypt256(const u32 *subkey, u32 *io)
+void camellia_decrypt256(const u32 *subkey, u32 *blocks)
 {
     u32 il,ir,t0,t1;           /* temporary valiables */
+    u32 io[4];
+
+    io[0] = blocks[0];
+    io[1] = blocks[1];
+    io[2] = blocks[2];
+    io[3] = blocks[3];
 
     /* pre whitening but absorb kw2*/
     io[0] ^= CamelliaSubkeyL(32);
@@ -1371,8 +1314,15 @@ void camellia_decrypt256(const u32 *subkey, u32 *io)
     io[2] = t0;
     io[3] = t1;
 
+    blocks[0] = io[0];
+    blocks[1] = io[1];
+    blocks[2] = io[2];
+    blocks[3] = io[3];
+
     return;
 }
+#endif /*!USE_ARM_ASM*/
+
 
 /***
  *
@@ -1399,6 +1349,7 @@ void Camellia_Ekeygen(const int keyBitLength,
 }
 
 
+#ifndef USE_ARM_ASM
 void Camellia_EncryptBlock(const int keyBitLength,
                           const unsigned char *plaintext,
                           const KEY_TABLE_TYPE keyTable,
@@ -1459,3 +1410,4 @@ void Camellia_DecryptBlock(const int keyBitLength,
     PUTU32(plaintext + 8, tmp[2]);
     PUTU32(plaintext + 12, tmp[3]);
 }
+#endif /*!USE_ARM_ASM*/
diff --git a/grub-core/lib/libgcrypt/cipher/camellia.h 
b/grub-core/lib/libgcrypt/cipher/camellia.h
index cccf786ca..d7a1e6f4a 100644
--- a/grub-core/lib/libgcrypt/cipher/camellia.h
+++ b/grub-core/lib/libgcrypt/cipher/camellia.h
@@ -30,6 +30,18 @@
  */
 #ifdef HAVE_CONFIG_H
 #include <config.h>
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+# undef USE_ARM_ASM
+# if defined(__ARMEL__)
+#  ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+#   define USE_ARM_ASM 1
+#  endif
+# endif
+# if defined(__AARCH64EL__)
+#  ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+#   define USE_ARM_ASM 1
+#  endif
+# endif
 #endif
 #ifdef CAMELLIA_EXT_SYM_PREFIX
 #define CAMELLIA_PREFIX1(x,y) x ## y
@@ -63,6 +75,7 @@ void Camellia_Ekeygen(const int keyBitLength,
                      const unsigned char *rawKey,
                      KEY_TABLE_TYPE keyTable);
 
+#ifndef USE_ARM_ASM
 void Camellia_EncryptBlock(const int keyBitLength,
                           const unsigned char *plaintext,
                           const KEY_TABLE_TYPE keyTable,
@@ -72,6 +85,7 @@ void Camellia_DecryptBlock(const int keyBitLength,
                           const unsigned char *cipherText,
                           const KEY_TABLE_TYPE keyTable,
                           unsigned char *plaintext);
+#endif /*!USE_ARM_ASM*/
 
 
 #ifdef  __cplusplus
diff --git a/grub-core/lib/libgcrypt/cipher/cast5-amd64.S 
b/grub-core/lib/libgcrypt/cipher/cast5-amd64.S
new file mode 100644
index 000000000..a804654c1
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cast5-amd64.S
@@ -0,0 +1,663 @@
+/* cast5-amd64.S  -  AMD64 assembly implementation of CAST5 cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5)
+
+#include "asm-common-amd64.h"
+
+.text
+
+.extern _gcry_cast5_s1to4;
+
+#define s1 0
+#define s2 (s1 + (4 * 256))
+#define s3 (s2 + (4 * 256))
+#define s4 (s3 + (4 * 256))
+
+/* structure of CAST5_context: */
+#define Km 0
+#define Kr (Km + (16 * 4))
+
+/* register macros */
+#define CTX %rdi
+#define RIO %rsi
+#define RTAB %r8
+
+#define RLR0 %r9
+#define RLR1 %r10
+#define RLR2 %r11
+#define RLR3 %r12
+
+#define RLR0d %r9d
+#define RLR1d %r10d
+#define RLR2d %r11d
+#define RLR3d %r12d
+
+#define RX0 %rax
+#define RX1 %rbx
+#define RX2 %rdx
+
+#define RX0d %eax
+#define RX1d %ebx
+#define RX2d %edx
+
+#define RX0bl %al
+#define RX1bl %bl
+#define RX2bl %dl
+
+#define RX0bh %ah
+#define RX1bh %bh
+#define RX2bh %dh
+
+#define RKR %rcx
+#define RKRd %ecx
+#define RKRbl %cl
+
+#define RT0 %rbp
+#define RT1 %rsi
+
+#define RT0d %ebp
+#define RT1d %esi
+
+#define RKM0d %r13d
+#define RKM1d %r14d
+
+/***********************************************************************
+ * 1-way cast5
+ ***********************************************************************/
+#define dummy(x)
+
+#define shr_kr(none) \
+       shrq $8,                        RKR;
+
+#define F(km, load_next_kr, op0, op1, op2, op3) \
+       op0 ## l RLR0d,                 km ## d; \
+       roll RKRbl,                     km ## d; \
+       rorq $32,                       RLR0; \
+       movzbl km ## bh,                RT0d; \
+       movzbl km ## bl,                RT1d; \
+       roll $16,                       km ## d; \
+       movl s1(RTAB,RT0,4),            RT0d; \
+       op1 ## l s2(RTAB,RT1,4),        RT0d; \
+       load_next_kr(kr_next); \
+       movzbl km ## bh,                RT1d; \
+       movzbl km ## bl,                km ## d; \
+       op2 ## l s3(RTAB,RT1,4),        RT0d; \
+       op3 ## l s4(RTAB,km,4),         RT0d; \
+       xorq RT0,                       RLR0;
+
+#define F1(km, load_next_kr) \
+       F(##km, load_next_kr, add, xor, sub, add)
+#define F2(km, load_next_kr) \
+       F(##km, load_next_kr, xor, sub, add, xor)
+#define F3(km, load_next_kr) \
+       F(##km, load_next_kr, sub, add, xor, sub)
+
+#define get_round_km(n, km) \
+       movl Km+4*(n)(CTX),             km;
+
+#define get_round_kr_enc(n) \
+       movq $0x1010101010101010,       RKR; \
+       \
+       /* merge rorl rk and rorl $16 */ \
+       xorq Kr+(n)(CTX),               RKR;
+
+#define get_round_kr_dec(n) \
+       movq $0x1010101010101010,       RKR; \
+       \
+       /* merge rorl rk and rorl $16 */ \
+       xorq Kr+(n - 7)(CTX),           RKR; \
+       bswapq                          RKR;
+
+#define round_enc(n, FA, FB, fn1, fn2) \
+       get_round_km(n + 1, RX2d); \
+       FA(RX0, fn1); \
+       get_round_km(n + 2, RX0d); \
+       FB(RX2, fn2);
+
+#define round_enc_last(n, FXA, FXB) \
+       get_round_km(n + 1, RX2d); \
+       \
+       FXA(RX0, shr_kr); \
+       FXB(RX2, dummy);
+
+#define round_enc_1(n, FA, FB) \
+       round_enc(n, FA, FB, shr_kr, shr_kr)
+
+#define round_enc_2(n, FA, FB) \
+       round_enc(n, FA, FB, shr_kr, dummy)
+
+#define round_dec(n, FA, FB, fn1, fn2) \
+       get_round_km(n - 1, RX2d); \
+       FA(RX0, fn1); \
+       get_round_km(n - 2, RX0d); \
+       FB(RX2, fn2);
+
+#define round_dec_last(n, FXA, FXB) \
+       get_round_km(n - 1, RX2d); \
+       FXA(RX0, shr_kr); \
+       FXB(RX2, dummy);
+
+#define round_dec_1(n, FA, FB) \
+       round_dec(n, FA, FB, shr_kr, shr_kr)
+
+#define round_dec_2(n, FA, FB) \
+       round_dec(n, FA, FB, shr_kr, dummy)
+
+#define read_block() \
+       movq (RIO),             RLR0; \
+       bswapq                  RLR0;
+
+#define write_block() \
+       bswapq                  RLR0; \
+       rorq $32,               RLR0; \
+       movq RLR0,              (RIO);
+
+.align 8
+.globl _gcry_cast5_amd64_encrypt_block
+ELF(.type   _gcry_cast5_amd64_encrypt_block,@function;)
+
+_gcry_cast5_amd64_encrypt_block:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+
+       movq %rsi, %r10;
+
+       GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+       movq %rdx, RIO;
+       read_block();
+
+       get_round_km(0, RX0d);
+       get_round_kr_enc(0);
+       round_enc_1(0, F1, F2);
+       round_enc_1(2, F3, F1);
+       round_enc_1(4, F2, F3);
+       round_enc_2(6, F1, F2);
+       get_round_kr_enc(8);
+       round_enc_1(8, F3, F1);
+       round_enc_1(10, F2, F3);
+       round_enc_1(12, F1, F2);
+       round_enc_last(14, F3, F1);
+
+       movq %r10, RIO;
+       write_block();
+
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;)
+
+.align 8
+.globl _gcry_cast5_amd64_decrypt_block
+ELF(.type   _gcry_cast5_amd64_decrypt_block,@function;)
+
+_gcry_cast5_amd64_decrypt_block:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+
+       movq %rsi, %r10;
+
+       GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+       movq %rdx, RIO;
+       read_block();
+
+       get_round_km(15, RX0d);
+       get_round_kr_dec(15);
+       round_dec_1(15, F1, F3);
+       round_dec_1(13, F2, F1);
+       round_dec_1(11, F3, F2);
+       round_dec_2(9, F1, F3);
+       get_round_kr_dec(7);
+       round_dec_1(7, F2, F1);
+       round_dec_1(5, F3, F2);
+       round_dec_1(3, F1, F3);
+       round_dec_last(1, F2, F1);
+
+       movq %r10, RIO;
+       write_block();
+
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;)
+
+/**********************************************************************
+  4-way cast5, four blocks parallel
+ **********************************************************************/
+#define F_tail(rlr, rx, op1, op2, op3) \
+       movzbl rx ## bh,                RT0d; \
+       movzbl rx ## bl,                RT1d; \
+       roll $16,                       rx ## d; \
+       movl s1(RTAB,RT0,4),            RT0d; \
+       op1 ## l s2(RTAB,RT1,4),        RT0d; \
+       movzbl rx ## bh,                RT1d; \
+       movzbl rx ## bl,                rx ## d; \
+       op2 ## l s3(RTAB,RT1,4),        RT0d; \
+       op3 ## l s4(RTAB,rx,4),         RT0d; \
+       xorq RT0,                       rlr;
+
+#define F4(km, load_next_kr, op0, op1, op2, op3) \
+       movl km,                        RX0d; \
+       op0 ## l RLR0d,                 RX0d; \
+       roll RKRbl,                     RX0d; \
+       rorq $32,                       RLR0; \
+       \
+       movl km,                        RX1d; \
+       op0 ## l RLR1d,                 RX1d; \
+       roll RKRbl,                     RX1d; \
+       rorq $32,                       RLR1; \
+       \
+       movl km,                        RX2d; \
+       op0 ## l RLR2d,                 RX2d; \
+       roll RKRbl,                     RX2d; \
+       rorq $32,                       RLR2; \
+       \
+       F_tail(RLR0, RX0, op1, op2, op3); \
+       F_tail(RLR1, RX1, op1, op2, op3); \
+       F_tail(RLR2, RX2, op1, op2, op3); \
+       \
+       movl km,                        RX0d; \
+       op0 ## l RLR3d,                 RX0d; \
+       roll RKRbl,                     RX0d; \
+       load_next_kr();                 \
+       rorq $32,                       RLR3; \
+       \
+       F_tail(RLR3, RX0, op1, op2, op3);
+
+#define F4_1(km, load_next_kr) \
+       F4(km, load_next_kr, add, xor, sub, add)
+#define F4_2(km, load_next_kr) \
+       F4(km, load_next_kr, xor, sub, add, xor)
+#define F4_3(km, load_next_kr) \
+       F4(km, load_next_kr, sub, add, xor, sub)
+
+#define round_enc4(n, FA, FB, fn1, fn2) \
+       get_round_km(n + 1, RKM1d); \
+       FA(RKM0d, fn1); \
+       get_round_km(n + 2, RKM0d); \
+       FB(RKM1d, fn2);
+
+#define round_enc_last4(n, FXA, FXB) \
+       get_round_km(n + 1, RKM1d); \
+       FXA(RKM0d, shr_kr); \
+       FXB(RKM1d, dummy);
+
+#define round_enc4_1(n, FA, FB) \
+       round_enc4(n, FA, FB, shr_kr, shr_kr);
+
+#define round_enc4_2(n, FA, FB) \
+       round_enc4(n, FA, FB, shr_kr, dummy);
+
+#define round_dec4(n, FA, FB, fn1, fn2) \
+       get_round_km(n - 1, RKM1d); \
+       FA(RKM0d, fn1); \
+       get_round_km(n - 2, RKM0d); \
+       FB(RKM1d, fn2);
+
+#define round_dec_last4(n, FXA, FXB) \
+       get_round_km(n - 1, RKM1d); \
+       FXA(RKM0d, shr_kr); \
+       FXB(RKM1d, dummy);
+
+#define round_dec4_1(n, FA, FB) \
+       round_dec4(n, FA, FB, shr_kr, shr_kr);
+
+#define round_dec4_2(n, FA, FB) \
+       round_dec4(n, FA, FB, shr_kr, dummy);
+
+#define inbswap_block4(a, b, c, d) \
+       bswapq                  a; \
+       bswapq                  b; \
+       bswapq                  c; \
+       bswapq                  d;
+
+#define outbswap_block4(a, b, c, d) \
+       bswapq                  a; \
+       bswapq                  b; \
+       bswapq                  c; \
+       bswapq                  d; \
+       rorq $32,               a; \
+       rorq $32,               b; \
+       rorq $32,               c; \
+       rorq $32,               d;
+
+.align 8
+ELF(.type   __cast5_enc_blk4,@function;)
+
+__cast5_enc_blk4:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RLR0,RLR1,RLR2,RLR3: four input plaintext blocks
+        * output:
+        *      RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks
+        */
+       CFI_STARTPROC();
+       GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+       get_round_km(0, RKM0d);
+       get_round_kr_enc(0);
+       round_enc4_1(0, F4_1, F4_2);
+       round_enc4_1(2, F4_3, F4_1);
+       round_enc4_1(4, F4_2, F4_3);
+       round_enc4_2(6, F4_1, F4_2);
+       get_round_kr_enc(8);
+       round_enc4_1(8, F4_3, F4_1);
+       round_enc4_1(10, F4_2, F4_3);
+       round_enc4_1(12, F4_1, F4_2);
+       round_enc_last4(14, F4_3, F4_1);
+
+       outbswap_block4(RLR0, RLR1, RLR2, RLR3);
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;)
+
+.align 8
+ELF(.type   __cast5_dec_blk4,@function;)
+
+__cast5_dec_blk4:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RLR0,RLR1,RLR2,RLR3: four input ciphertext blocks
+        * output:
+        *      RLR0,RLR1,RLR2,RLR3: four output plaintext blocks
+        */
+       CFI_STARTPROC();
+       GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB);
+
+       inbswap_block4(RLR0, RLR1, RLR2, RLR3);
+
+       get_round_km(15, RKM0d);
+       get_round_kr_dec(15);
+       round_dec4_1(15, F4_1, F4_3);
+       round_dec4_1(13, F4_2, F4_1);
+       round_dec4_1(11, F4_3, F4_2);
+       round_dec4_2(9, F4_1, F4_3);
+       get_round_kr_dec(7);
+       round_dec4_1(7, F4_2, F4_1);
+       round_dec4_1(5, F4_3, F4_2);
+       round_dec4_1(3, F4_1, F4_3);
+       round_dec_last4(1, F4_2, F4_1);
+
+       outbswap_block4(RLR0, RLR1, RLR2, RLR3);
+       CFI_ENDPROC();
+       ret_spec_stop;
+ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;)
+
+.align 8
+.globl _gcry_cast5_amd64_ctr_enc
+ELF(.type   _gcry_cast5_amd64_ctr_enc,@function;)
+_gcry_cast5_amd64_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (4 blocks)
+        *      %rdx: src (4 blocks)
+        *      %rcx: iv (big endian, 64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+       pushq %r14;
+       CFI_PUSH(%r14);
+
+       pushq %rsi;
+       CFI_PUSH(%rsi);
+       pushq %rdx;
+       CFI_PUSH(%rdx);
+
+       /* load IV and byteswap */
+       movq (%rcx), RX0;
+       bswapq RX0;
+       movq RX0, RLR0;
+
+       /* construct IVs */
+       leaq 1(RX0), RLR1;
+       leaq 2(RX0), RLR2;
+       leaq 3(RX0), RLR3;
+       leaq 4(RX0), RX0;
+       bswapq RX0;
+
+       /* store new IV */
+       movq RX0, (%rcx);
+
+       call __cast5_enc_blk4;
+
+       popq %r14; /*src*/
+       CFI_POP_TMP_REG();
+       popq %r13; /*dst*/
+       CFI_POP_TMP_REG();
+
+       /* XOR key-stream with plaintext */
+       xorq 0 * 8(%r14), RLR0;
+       xorq 1 * 8(%r14), RLR1;
+       xorq 2 * 8(%r14), RLR2;
+       xorq 3 * 8(%r14), RLR3;
+       movq RLR0, 0 * 8(%r13);
+       movq RLR1, 1 * 8(%r13);
+       movq RLR2, 2 * 8(%r13);
+       movq RLR3, 3 * 8(%r13);
+
+       popq %r14;
+       CFI_POP(%r14);
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;)
+
+.align 8
+.globl _gcry_cast5_amd64_cbc_dec
+ELF(.type   _gcry_cast5_amd64_cbc_dec,@function;)
+_gcry_cast5_amd64_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (4 blocks)
+        *      %rdx: src (4 blocks)
+        *      %rcx: iv (64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+       pushq %r14;
+       CFI_PUSH(%r14);
+
+       pushq %rcx;
+       CFI_PUSH(%rcx);
+       pushq %rsi;
+       CFI_PUSH(%rsi);
+       pushq %rdx;
+       CFI_PUSH(%rdx);
+
+       /* load input */
+       movq 0 * 8(%rdx), RLR0;
+       movq 1 * 8(%rdx), RLR1;
+       movq 2 * 8(%rdx), RLR2;
+       movq 3 * 8(%rdx), RLR3;
+
+       call __cast5_dec_blk4;
+
+       popq RX0; /*src*/
+       CFI_POP_TMP_REG();
+       popq RX1; /*dst*/
+       CFI_POP_TMP_REG();
+       popq RX2; /*iv*/
+       CFI_POP_TMP_REG();
+
+       movq 3 * 8(RX0), %r14;
+       xorq      (RX2), RLR0;
+       xorq 0 * 8(RX0), RLR1;
+       xorq 1 * 8(RX0), RLR2;
+       xorq 2 * 8(RX0), RLR3;
+       movq %r14, (RX2); /* store new IV */
+
+       movq RLR0, 0 * 8(RX1);
+       movq RLR1, 1 * 8(RX1);
+       movq RLR2, 2 * 8(RX1);
+       movq RLR3, 3 * 8(RX1);
+
+       popq %r14;
+       CFI_POP(%r14);
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;)
+
+.align 8
+.globl _gcry_cast5_amd64_cfb_dec
+ELF(.type   _gcry_cast5_amd64_cfb_dec,@function;)
+_gcry_cast5_amd64_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (4 blocks)
+        *      %rdx: src (4 blocks)
+        *      %rcx: iv (64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+       pushq %r14;
+       CFI_PUSH(%r14);
+
+       pushq %rsi;
+       CFI_PUSH(%rsi);
+       pushq %rdx;
+       CFI_PUSH(%rdx);
+
+       /* Load input */
+       movq (%rcx), RLR0;
+       movq 0 * 8(%rdx), RLR1;
+       movq 1 * 8(%rdx), RLR2;
+       movq 2 * 8(%rdx), RLR3;
+
+       inbswap_block4(RLR0, RLR1, RLR2, RLR3);
+
+       /* Update IV */
+       movq 3 * 8(%rdx), %rdx;
+       movq %rdx, (%rcx);
+
+       call __cast5_enc_blk4;
+
+       popq %rdx; /*src*/
+       CFI_POP_TMP_REG();
+       popq %rcx; /*dst*/
+       CFI_POP_TMP_REG();
+
+       xorq 0 * 8(%rdx), RLR0;
+       xorq 1 * 8(%rdx), RLR1;
+       xorq 2 * 8(%rdx), RLR2;
+       xorq 3 * 8(%rdx), RLR3;
+       movq RLR0, 0 * 8(%rcx);
+       movq RLR1, 1 * 8(%rcx);
+       movq RLR2, 2 * 8(%rcx);
+       movq RLR3, 3 * 8(%rcx);
+
+       popq %r14;
+       CFI_POP(%r14);
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;)
+
+#endif /*defined(USE_CAST5)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/cast5-arm.S 
b/grub-core/lib/libgcrypt/cipher/cast5-arm.S
new file mode 100644
index 000000000..76ddd2e33
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cast5-arm.S
@@ -0,0 +1,728 @@
+/* cast5-arm.S  -  ARM assembly implementation of CAST5 cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+.extern _gcry_cast5_s1to4;
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* structure of crypto context */
+#define Km 0
+#define Kr (Km + (16 * 4))
+#define Kr_arm_enc (Kr + (16))
+#define Kr_arm_dec (Kr_arm_enc + (16))
+
+/* register macros */
+#define CTX %r0
+#define Rs1 %r7
+#define Rs2 %r8
+#define Rs3 %r9
+#define Rs4 %r10
+#define RMASK %r11
+#define RKM %r1
+#define RKR %r2
+
+#define RL0 %r3
+#define RR0 %r4
+
+#define RL1 %r9
+#define RR1 %r10
+
+#define RT0 %lr
+#define RT1 %ip
+#define RT2 %r5
+#define RT3 %r6
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+       ldrb rout, [rsrc, #((offs) + 0)]; \
+       ldrb rtmp, [rsrc, #((offs) + 1)]; \
+       orr rout, rout, rtmp, lsl #8; \
+       ldrb rtmp, [rsrc, #((offs) + 2)]; \
+       orr rout, rout, rtmp, lsl #16; \
+       ldrb rtmp, [rsrc, #((offs) + 3)]; \
+       orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+       mov rtmp0, rin, lsr #8; \
+       strb rin, [rdst, #((offs) + 0)]; \
+       mov rtmp1, rin, lsr #16; \
+       strb rtmp0, [rdst, #((offs) + 1)]; \
+       mov rtmp0, rin, lsr #24; \
+       strb rtmp1, [rdst, #((offs) + 2)]; \
+       strb rtmp0, [rdst, #((offs) + 3)];
+
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+       ldrb rout, [rsrc, #((offs) + 3)]; \
+       ldrb rtmp, [rsrc, #((offs) + 2)]; \
+       orr rout, rout, rtmp, lsl #8; \
+       ldrb rtmp, [rsrc, #((offs) + 1)]; \
+       orr rout, rout, rtmp, lsl #16; \
+       ldrb rtmp, [rsrc, #((offs) + 0)]; \
+       orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \
+       mov rtmp0, rin, lsr #8; \
+       strb rin, [rdst, #((offs) + 3)]; \
+       mov rtmp1, rin, lsr #16; \
+       strb rtmp0, [rdst, #((offs) + 2)]; \
+       mov rtmp0, rin, lsr #24; \
+       strb rtmp1, [rdst, #((offs) + 1)]; \
+       strb rtmp0, [rdst, #((offs) + 0)];
+
+#ifdef __ARMEL__
+       #define ldr_unaligned_host ldr_unaligned_le
+       #define str_unaligned_host str_unaligned_le
+
+       /* bswap on little-endian */
+#ifdef HAVE_ARM_ARCH_V6
+       #define host_to_be(reg, rtmp) \
+               rev reg, reg;
+       #define be_to_host(reg, rtmp) \
+               rev reg, reg;
+#else
+       #define host_to_be(reg, rtmp) \
+               eor     rtmp, reg, reg, ror #16; \
+               mov     rtmp, rtmp, lsr #8; \
+               bic     rtmp, rtmp, #65280; \
+               eor     reg, rtmp, reg, ror #8;
+       #define be_to_host(reg, rtmp) \
+               eor     rtmp, reg, reg, ror #16; \
+               mov     rtmp, rtmp, lsr #8; \
+               bic     rtmp, rtmp, #65280; \
+               eor     reg, rtmp, reg, ror #8;
+#endif
+#else
+       #define ldr_unaligned_host ldr_unaligned_be
+       #define str_unaligned_host str_unaligned_be
+
+       /* nop on big-endian */
+       #define host_to_be(reg, rtmp) /*_*/
+       #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define host_to_host(x, y) /*_*/
+
+/**********************************************************************
+  1-way cast5
+ **********************************************************************/
+
+#define dummy(n) /*_*/
+
+#define load_kr(n) \
+       ldr RKR, [CTX, #(Kr_arm_enc + (n))]; /* Kr[n] */
+
+#define load_dec_kr(n) \
+       ldr RKR, [CTX, #(Kr_arm_dec + (n) - 3)]; /* Kr[n] */
+
+#define load_km(n) \
+       ldr RKM, [CTX, #(Km + (n) * 4)]; /* Km[n] */
+
+#define shift_kr(dummy) \
+       mov RKR, RKR, lsr #8;
+
+#define F(n, rl, rr, op1, op2, op3, op4, dec, loadkm, shiftkr, loadkr) \
+       op1 RKM, rr; \
+       mov RKM, RKM, ror RKR; \
+       \
+       and RT0, RMASK, RKM, ror #(24); \
+       and RT1, RMASK, RKM, lsr #(16); \
+       and RT2, RMASK, RKM, lsr #(8); \
+       ldr RT0, [Rs1, RT0]; \
+       and RT3, RMASK, RKM; \
+       ldr RT1, [Rs2, RT1]; \
+       shiftkr(RKR); \
+       \
+       ldr RT2, [Rs3, RT2]; \
+       \
+       op2 RT0, RT1; \
+       ldr RT3, [Rs4, RT3]; \
+       op3 RT0, RT2; \
+       loadkm((n) + (1 - ((dec) * 2))); \
+       op4 RT0, RT3; \
+       loadkr((n) + (1 - ((dec) * 2))); \
+       eor rl, RT0;
+
+#define F1(n, rl, rr, dec, loadkm, shiftkr, loadkr) \
+       F(n, rl, rr, add, eor, sub, add, dec, loadkm, shiftkr, loadkr)
+#define F2(n, rl, rr, dec, loadkm, shiftkr, loadkr) \
+       F(n, rl, rr, eor, sub, add, eor, dec, loadkm, shiftkr, loadkr)
+#define F3(n, rl, rr, dec, loadkm, shiftkr, loadkr) \
+       F(n, rl, rr, sub, add, eor, sub, dec, loadkm, shiftkr, loadkr)
+
+#define enc_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+       Fx(n, rl, rr, 0, loadkm, shiftkr, loadkr)
+
+#define dec_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+       Fx(n, rl, rr, 1, loadkm, shiftkr, loadkr)
+
+#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \
+       ldr l0, [rin, #((offs) + 0)]; \
+       ldr r0, [rin, #((offs) + 4)]; \
+       convert(l0, rtmp); \
+       convert(r0, rtmp);
+
+#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \
+       convert(l0, rtmp); \
+       convert(r0, rtmp); \
+       str l0, [rout, #((offs) + 0)]; \
+       str r0, [rout, #((offs) + 4)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+       /* unaligned word reads allowed */
+       #define read_block(rin, offs, l0, r0, rtmp0) \
+               read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0)
+
+       #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \
+               write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0)
+
+       #define read_block_host(rin, offs, l0, r0, rtmp0) \
+               read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0)
+
+       #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \
+               write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0)
+#else
+       /* need to handle unaligned reads by byte reads */
+       #define read_block(rin, offs, l0, r0, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \
+                       ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block_aligned(rin, offs, l0, r0, host_to_be, 
rtmp0); \
+               2:;
+
+       #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \
+                       str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       write_block_aligned(rout, offs, l0, r0, be_to_host, 
rtmp0); \
+               2:;
+
+       #define read_block_host(rin, offs, l0, r0, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \
+                       ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block_aligned(rin, offs, l0, r0, host_to_host, 
rtmp0); \
+               2:;
+
+       #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); 
\
+                       str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); 
\
+                       b 2f; \
+               1:;\
+                       write_block_aligned(rout, offs, l0, r0, host_to_host, 
rtmp0); \
+               2:;
+#endif
+
+.align 3
+.globl _gcry_cast5_arm_encrypt_block
+.type  _gcry_cast5_arm_encrypt_block,%function;
+
+_gcry_cast5_arm_encrypt_block:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst
+        *      %r2: src
+        */
+       push {%r1, %r4-%r11, %ip, %lr};
+
+       GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+       mov RMASK, #(0xff << 2);
+       add Rs2, Rs1, #(0x100*4);
+       add Rs3, Rs1, #(0x100*4*2);
+       add Rs4, Rs1, #(0x100*4*3);
+
+       read_block(%r2, 0, RL0, RR0, RT0);
+
+       load_km(0);
+       load_kr(0);
+       enc_round(0, F1, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(1, F2, RR0, RL0, load_km, shift_kr, dummy);
+       enc_round(2, F3, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(3, F1, RR0, RL0, load_km, dummy, load_kr);
+       enc_round(4, F2, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(5, F3, RR0, RL0, load_km, shift_kr, dummy);
+       enc_round(6, F1, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(7, F2, RR0, RL0, load_km, dummy, load_kr);
+       enc_round(8, F3, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(9, F1, RR0, RL0, load_km, shift_kr, dummy);
+       enc_round(10, F2, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(11, F3, RR0, RL0, load_km, dummy, load_kr);
+       enc_round(12, F1, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(13, F2, RR0, RL0, load_km, shift_kr, dummy);
+       enc_round(14, F3, RL0, RR0, load_km, shift_kr, dummy);
+       enc_round(15, F1, RR0, RL0, dummy, dummy, dummy);
+
+       ldr %r1, [%sp], #4;
+       write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_encrypt_block,.-_gcry_cast5_arm_encrypt_block;
+
+.align 3
+.globl _gcry_cast5_arm_decrypt_block
+.type  _gcry_cast5_arm_decrypt_block,%function;
+
+_gcry_cast5_arm_decrypt_block:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst
+        *      %r2: src
+        */
+       push {%r1, %r4-%r11, %ip, %lr};
+
+       GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+       mov RMASK, #(0xff << 2);
+       add Rs2, Rs1, #(0x100 * 4);
+       add Rs3, Rs1, #(0x100 * 4 * 2);
+       add Rs4, Rs1, #(0x100 * 4 * 3);
+
+       read_block(%r2, 0, RL0, RR0, RT0);
+
+       load_km(15);
+       load_dec_kr(15);
+       dec_round(15, F1, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(14, F3, RR0, RL0, load_km, shift_kr, dummy);
+       dec_round(13, F2, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(12, F1, RR0, RL0, load_km, dummy, load_dec_kr);
+       dec_round(11, F3, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(10, F2, RR0, RL0, load_km, shift_kr, dummy);
+       dec_round(9, F1, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(8, F3, RR0, RL0, load_km, dummy, load_dec_kr);
+       dec_round(7, F2, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(6, F1, RR0, RL0, load_km, shift_kr, dummy);
+       dec_round(5, F3, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(4, F2, RR0, RL0, load_km, dummy, load_dec_kr);
+       dec_round(3, F1, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(2, F3, RR0, RL0, load_km, shift_kr, dummy);
+       dec_round(1, F2, RL0, RR0, load_km, shift_kr, dummy);
+       dec_round(0, F1, RR0, RL0, dummy, dummy, dummy);
+
+       ldr %r1, [%sp], #4;
+       write_block(%r1, 0, RR0, RL0, RT0, RT1);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_decrypt_block,.-_gcry_cast5_arm_decrypt_block;
+
+/**********************************************************************
+  2-way cast5
+ **********************************************************************/
+
+#define F_2w(n, rl0, rr0, rl1, rr1, op1, op2, op3, op4, dec, loadkm, shiftkr, \
+            loadkr) \
+       op1 RT3, RKM, rr0; \
+       op1 RKM, RKM, rr1; \
+       mov RT3, RT3, ror RKR; \
+       mov RKM, RKM, ror RKR; \
+       \
+       and RT0, RMASK, RT3, ror #(24); \
+       and RT1, RMASK, RT3, lsr #(16); \
+       and RT2, RMASK, RT3, lsr #(8); \
+       and RT3, RMASK, RT3; \
+       \
+       ldr RT0, [Rs1, RT0]; \
+       add RT2, #(0x100 * 4); \
+       ldr RT1, [Rs2, RT1]; \
+       add RT3, #(0x100 * 4 * 2); \
+       \
+       ldr RT2, [Rs2, RT2]; \
+       \
+       op2 RT0, RT1; \
+       ldr RT3, [Rs2, RT3]; \
+       and RT1, RMASK, RKM, ror #(24); \
+       op3 RT0, RT2; \
+       and RT2, RMASK, RKM, lsr #(16); \
+       op4 RT0, RT3; \
+       and RT3, RMASK, RKM, lsr #(8); \
+       eor rl0, RT0; \
+       add RT3, #(0x100 * 4); \
+       ldr RT1, [Rs1, RT1]; \
+       and RT0, RMASK, RKM; \
+       ldr RT2, [Rs2, RT2]; \
+       add RT0, #(0x100 * 4 * 2); \
+       \
+       ldr RT3, [Rs2, RT3]; \
+       \
+       op2 RT1, RT2; \
+       ldr RT0, [Rs2, RT0]; \
+       op3 RT1, RT3; \
+       loadkm((n) + (1 - ((dec) * 2))); \
+       op4 RT1, RT0; \
+       loadkr((n) + (1 - ((dec) * 2))); \
+       shiftkr(RKR); \
+       eor rl1, RT1;
+
+#define F1_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \
+       F_2w(n, rl0, rr0, rl1, rr1, add, eor, sub, add, dec, \
+            loadkm, shiftkr, loadkr)
+#define F2_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \
+       F_2w(n, rl0, rr0, rl1, rr1, eor, sub, add, eor, dec, \
+            loadkm, shiftkr, loadkr)
+#define F3_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \
+       F_2w(n, rl0, rr0, rl1, rr1, sub, add, eor, sub, dec, \
+            loadkm, shiftkr, loadkr)
+
+#define enc_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+       Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 0, loadkm, shiftkr, loadkr)
+
+#define dec_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \
+       Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 1, loadkm, shiftkr, loadkr)
+
+#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \
+       ldr l0, [rin, #(0)]; \
+       ldr r0, [rin, #(4)]; \
+       convert(l0, rtmp); \
+       ldr l1, [rin, #(8)]; \
+       convert(r0, rtmp); \
+       ldr r1, [rin, #(12)]; \
+       convert(l1, rtmp); \
+       convert(r1, rtmp);
+
+#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \
+       convert(l0, rtmp); \
+       convert(r0, rtmp); \
+       convert(l1, rtmp); \
+       str l0, [rout, #(0)]; \
+       convert(r1, rtmp); \
+       str r0, [rout, #(4)]; \
+       str l1, [rout, #(8)]; \
+       str r1, [rout, #(12)];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+       /* unaligned word reads allowed */
+       #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+               read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0)
+
+       #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0)
+
+       #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+               read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0)
+
+       #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0)
+#else
+       /* need to handle unaligned reads by byte reads */
+       #define read_block2(rin, l0, r0, l1, r1, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_be(l0, rin, 0, rtmp0); \
+                       ldr_unaligned_be(r0, rin, 4, rtmp0); \
+                       ldr_unaligned_be(l1, rin, 8, rtmp0); \
+                       ldr_unaligned_be(r1, rin, 12, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, 
rtmp0); \
+               2:;
+
+       #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \
+                       str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \
+                       str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \
+                       str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, 
rtmp0); \
+               2:;
+
+       #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_host(l0, rin, 0, rtmp0); \
+                       ldr_unaligned_host(r0, rin, 4, rtmp0); \
+                       ldr_unaligned_host(l1, rin, 8, rtmp0); \
+                       ldr_unaligned_host(r1, rin, 12, rtmp0); \
+                       b 2f; \
+               1:;\
+                       read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, 
rtmp0); \
+               2:;
+
+       #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \
+                       str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \
+                       str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \
+                       str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       write_block2_aligned(rout, l0, r0, l1, r1, 
host_to_host, rtmp0); \
+               2:;
+#endif
+
+.align 3
+.type  _gcry_cast5_arm_enc_blk2,%function;
+
+_gcry_cast5_arm_enc_blk2:
+       /* input:
+        *      preloaded: CTX
+        *      [RL0, RR0], [RL1, RR1]: src
+        * output:
+        *      [RR0, RL0], [RR1, RL1]: dst
+        */
+       push {%lr};
+
+       GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+       mov RMASK, #(0xff << 2);
+       add Rs2, Rs1, #(0x100 * 4);
+
+       load_km(0);
+       load_kr(0);
+       enc_round2(0, F1, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(1, F2, RR, RL, load_km, shift_kr, dummy);
+       enc_round2(2, F3, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(3, F1, RR, RL, load_km, dummy, load_kr);
+       enc_round2(4, F2, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(5, F3, RR, RL, load_km, shift_kr, dummy);
+       enc_round2(6, F1, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(7, F2, RR, RL, load_km, dummy, load_kr);
+       enc_round2(8, F3, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(9, F1, RR, RL, load_km, shift_kr, dummy);
+       enc_round2(10, F2, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(11, F3, RR, RL, load_km, dummy, load_kr);
+       enc_round2(12, F1, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(13, F2, RR, RL, load_km, shift_kr, dummy);
+       enc_round2(14, F3, RL, RR, load_km, shift_kr, dummy);
+       enc_round2(15, F1, RR, RL, dummy, dummy, dummy);
+
+       host_to_be(RR0, RT0);
+       host_to_be(RL0, RT0);
+       host_to_be(RR1, RT0);
+       host_to_be(RL1, RT0);
+
+       pop {%pc};
+.ltorg
+.size _gcry_cast5_arm_enc_blk2,.-_gcry_cast5_arm_enc_blk2;
+
+.align 3
+.globl _gcry_cast5_arm_cfb_dec;
+.type  _gcry_cast5_arm_cfb_dec,%function;
+
+_gcry_cast5_arm_cfb_dec:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst (2 blocks)
+        *      %r2: src (2 blocks)
+        *      %r3: iv (64bit)
+        */
+       push {%r1, %r2, %r4-%r11, %ip, %lr};
+
+       mov %lr, %r3;
+
+       /* Load input (iv/%r3 is aligned, src/%r2 might not be) */
+       ldm %r3, {RL0, RR0};
+       host_to_be(RL0, RT1);
+       host_to_be(RR0, RT1);
+       read_block(%r2, 0, RL1, RR1, %ip);
+
+       /* Update IV, load src[1] and save to iv[0] */
+       read_block_host(%r2, 8, %r5, %r6, %r7);
+       stm %lr, {%r5, %r6};
+
+       bl _gcry_cast5_arm_enc_blk2;
+       /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+       /* %r0: dst, %r1: %src */
+       pop {%r0, %r1};
+
+       /* dst = src ^ result */
+       read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr);
+       eor %r5, %r4;
+       eor %r6, %r3;
+       eor %r7, %r10;
+       eor %r8, %r9;
+       write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_cfb_dec,.-_gcry_cast5_arm_cfb_dec;
+
+.align 3
+.globl _gcry_cast5_arm_ctr_enc;
+.type  _gcry_cast5_arm_ctr_enc,%function;
+
+_gcry_cast5_arm_ctr_enc:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst (2 blocks)
+        *      %r2: src (2 blocks)
+        *      %r3: iv (64bit, big-endian)
+        */
+       push {%r1, %r2, %r4-%r11, %ip, %lr};
+
+       mov %lr, %r3;
+
+       /* Load IV (big => host endian) */
+       read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT1);
+
+       /* Construct IVs */
+       adds RR1, RR0, #1; /* +1 */
+       adc RL1, RL0, #0;
+       adds %r6, RR1, #1; /* +2 */
+       adc %r5, RL1, #0;
+
+       /* Store new IV (host => big-endian) */
+       write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT1);
+
+       bl _gcry_cast5_arm_enc_blk2;
+       /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+       /* %r0: dst, %r1: %src */
+       pop {%r0, %r1};
+
+       /* XOR key-stream with plaintext */
+       read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr);
+       eor %r5, %r4;
+       eor %r6, %r3;
+       eor %r7, %r10;
+       eor %r8, %r9;
+       write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_ctr_enc,.-_gcry_cast5_arm_ctr_enc;
+
+.align 3
+.type  _gcry_cast5_arm_dec_blk2,%function;
+
+_gcry_cast5_arm_dec_blk2:
+       /* input:
+        *      preloaded: CTX
+        *      [RL0, RR0], [RL1, RR1]: src
+        * output:
+        *      [RR0, RL0], [RR1, RL1]: dst
+        */
+
+       GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2);
+       mov RMASK, #(0xff << 2);
+       add Rs2, Rs1, #(0x100 * 4);
+
+       load_km(15);
+       load_dec_kr(15);
+       dec_round2(15, F1, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(14, F3, RR, RL, load_km, shift_kr, dummy);
+       dec_round2(13, F2, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(12, F1, RR, RL, load_km, dummy, load_dec_kr);
+       dec_round2(11, F3, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(10, F2, RR, RL, load_km, shift_kr, dummy);
+       dec_round2(9, F1, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(8, F3, RR, RL, load_km, dummy, load_dec_kr);
+       dec_round2(7, F2, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(6, F1, RR, RL, load_km, shift_kr, dummy);
+       dec_round2(5, F3, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(4, F2, RR, RL, load_km, dummy, load_dec_kr);
+       dec_round2(3, F1, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(2, F3, RR, RL, load_km, shift_kr, dummy);
+       dec_round2(1, F2, RL, RR, load_km, shift_kr, dummy);
+       dec_round2(0, F1, RR, RL, dummy, dummy, dummy);
+
+       host_to_be(RR0, RT0);
+       host_to_be(RL0, RT0);
+       host_to_be(RR1, RT0);
+       host_to_be(RL1, RT0);
+
+       b .Ldec_cbc_tail;
+.ltorg
+.size _gcry_cast5_arm_dec_blk2,.-_gcry_cast5_arm_dec_blk2;
+
+.align 3
+.globl _gcry_cast5_arm_cbc_dec;
+.type  _gcry_cast5_arm_cbc_dec,%function;
+
+_gcry_cast5_arm_cbc_dec:
+       /* input:
+        *      %r0: CTX
+        *      %r1: dst (2 blocks)
+        *      %r2: src (2 blocks)
+        *      %r3: iv (64bit)
+        */
+       push {%r1-%r11, %ip, %lr};
+
+       read_block2(%r2, RL0, RR0, RL1, RR1, RT0);
+
+       /* dec_blk2 is only used by cbc_dec, jump directly in/out instead
+        * of function call. */
+       b _gcry_cast5_arm_dec_blk2;
+.Ldec_cbc_tail:
+       /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */
+
+       /* %r0: dst, %r1: %src, %r2: iv */
+       pop {%r0-%r2};
+
+       /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */
+       read_block_host(%r1, 0, %r7, %r8, %r5);
+       /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */
+       ldm %r2, {%r5, %r6};
+
+       /* out[1] ^= IV+1 */
+       eor %r10, %r7;
+       eor %r9, %r8;
+       /* out[0] ^= IV */
+       eor %r4, %r5;
+       eor %r3, %r6;
+
+       /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */
+       read_block_host(%r1, 8, %r7, %r8, %r5);
+       /* store IV+2 to iv[0] (aligned). */
+       stm %r2, {%r7, %r8};
+
+       /* store result to dst[0-3]. Might be unaligned. */
+       write_block2_host(%r0, %r4, %r3, %r10, %r9, %r5, %r6);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_cast5_arm_cbc_dec,.-_gcry_cast5_arm_cbc_dec;
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARM_ARCH >= 6*/
diff --git a/grub-core/lib/libgcrypt/cipher/cast5.c 
b/grub-core/lib/libgcrypt/cipher/cast5.c
index 9905f5cb9..837ea0fe5 100644
--- a/grub-core/lib/libgcrypt/cipher/cast5.c
+++ b/grub-core/lib/libgcrypt/cipher/cast5.c
@@ -42,22 +42,50 @@
 #include "g10lib.h"
 #include "types.h"
 #include "cipher.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+#  define USE_ARM_ASM 1
+# endif
+#endif
 
 #define CAST5_BLOCKSIZE 8
 
 typedef struct {
     u32  Km[16];
     byte Kr[16];
+#ifdef USE_ARM_ASM
+    u32 Kr_arm_enc[16 / sizeof(u32)];
+    u32 Kr_arm_dec[16 / sizeof(u32)];
+#endif
 } CAST5_context;
 
-static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen);
-static void encrypt_block (void *c, byte *outbuf, const byte *inbuf);
-static void decrypt_block (void *c, byte *outbuf, const byte *inbuf);
+static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen,
+                                    cipher_bulk_ops_t *bulk_ops);
+static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf);
+static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf);
 
 
 
+#define s1 _gcry_cast5_s1to4[0]
+#define s2 _gcry_cast5_s1to4[1]
+#define s3 _gcry_cast5_s1to4[2]
+#define s4 _gcry_cast5_s1to4[3]
 
-static const u32 s1[256] = {
+const u32 _gcry_cast5_s1to4[4][256] = { {
 0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 
0x6003e540, 0xcf9fc949,
 0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 
0xc2e7661d, 0x22d4ff8e,
 0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 
0x887ca41a, 0xa2d2bd2d,
@@ -90,8 +118,7 @@ static const u32 s1[256] = {
 0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 
0x6e2f1e23, 0x83719c9e,
 0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 
0xb141ab08, 0x7cca89b9,
 0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 
0xdd8f0f00, 0x5c8165bf
-};
-static const u32 s2[256] = {
+}, {
 0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 
0x55889c94, 0x72fc0651,
 0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 
0x18dcdb7d, 0xa1d6eff3,
 0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 
0xba83ccb3, 0xe0c3cdfb,
@@ -124,8 +151,7 @@ static const u32 s2[256] = {
 0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 
0x6438bc87, 0xf0b5b1fa,
 0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 
0x5c038323, 0x3e5d3bb9,
 0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 
0x83877605, 0x4523ecf1
-};
-static const u32 s3[256] = {
+}, {
 0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 
0x8c1fc644, 0xaececa90,
 0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 
0xe13c8d83, 0x927010d5,
 0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 
0xa067268b, 0x8272792e,
@@ -158,8 +184,7 @@ static const u32 s3[256] = {
 0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 
0x99833be5, 0x600d457d,
 0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 
0x52bce688, 0x1b03588a,
 0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 
0xe9d3531c, 0xee353783
-};
-static const u32 s4[256] = {
+}, {
 0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 
0x85510443, 0xfa020ed1,
 0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 
0xf3641f63, 0x241e4adf,
 0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 
0x1d2d00d9, 0x24147b15,
@@ -192,7 +217,7 @@ static const u32 s4[256] = {
 0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 
0x6afce36c, 0x0316cc04,
 0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 
0xb657c34d, 0x4edfd282,
 0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 
0x3f85c5c1, 0x0aef7ed2
-};
+} };
 static const u32 s5[256] = {
 0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 
0x44dd9d44, 0x1731167f,
 0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 
0xacf6243a, 0x69befd7a,
@@ -331,24 +356,125 @@ static const u32 s8[256] = {
 };
 
 
-#if defined(__GNUC__) && defined(__i386__)
-static inline u32
-rol(int n, u32 x)
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementations of CAST5. */
+extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf,
+                                           const byte *inbuf);
+
+extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf,
+                                           const byte *inbuf);
+
+/* These assembly implementations process four blocks in parallel. */
+extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out,
+                                     const byte *in, byte *ctr);
+
+extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out,
+                                     const byte *in, byte *iv);
+
+extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out,
+                                     const byte *in, byte *iv);
+
+static void
+do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
 {
-       __asm__("roll %%cl,%0"
-               :"=r" (x)
-               :"0" (x),"c" (n));
-       return x;
+  _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf);
 }
-#else
-#define rol(n,x) ( ((x) << (n)) | ((x) >> (32-(n))) )
-#endif
 
-#define F1(D,m,r)  (  (I = ((m) + (D))), (I=rol((r),I)),   \
+static void
+do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf);
+}
+
+static void
+cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr)
+{
+  _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr);
+}
+
+static void
+cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+  _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv);
+}
+
+static void
+cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv)
+{
+  _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+  CAST5_context *c = (CAST5_context *) context;
+  do_encrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (2*8);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+  CAST5_context *c = (CAST5_context *) context;
+  do_decrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (2*8);
+}
+
+#elif defined(USE_ARM_ASM)
+
+/* ARM assembly implementations of CAST5. */
+extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf,
+                                           const byte *inbuf);
+
+extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf,
+                                           const byte *inbuf);
+
+/* These assembly implementations process two blocks in parallel. */
+extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out,
+                                     const byte *in, byte *ctr);
+
+extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out,
+                                     const byte *in, byte *iv);
+
+extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out,
+                                     const byte *in, byte *iv);
+
+static void
+do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf);
+}
+
+static void
+do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf)
+{
+  _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf);
+}
+
+static unsigned int
+encrypt_block (void *context , byte *outbuf, const byte *inbuf)
+{
+  CAST5_context *c = (CAST5_context *) context;
+  do_encrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (10*4);
+}
+
+static unsigned int
+decrypt_block (void *context, byte *outbuf, const byte *inbuf)
+{
+  CAST5_context *c = (CAST5_context *) context;
+  do_decrypt_block (c, outbuf, inbuf);
+  return /*burn_stack*/ (10*4);
+}
+
+#else /*USE_ARM_ASM*/
+
+#define F1(D,m,r)  (  (I = ((m) + (D))), (I=rol(I,(r))),   \
     (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) )
-#define F2(D,m,r)  (  (I = ((m) ^ (D))), (I=rol((r),I)),   \
+#define F2(D,m,r)  (  (I = ((m) ^ (D))), (I=rol(I,(r))),   \
     (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]) )
-#define F3(D,m,r)  (  (I = ((m) - (D))), (I=rol((r),I)),   \
+#define F3(D,m,r)  (  (I = ((m) - (D))), (I=rol(I,(r))),   \
     (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) )
 
 static void
@@ -357,16 +483,16 @@ do_encrypt_block( CAST5_context *c, byte *outbuf, const 
byte *inbuf )
     u32 l, r, t;
     u32 I;   /* used by the Fx macros */
     u32 *Km;
-    byte *Kr;
+    u32 Kr;
 
     Km = c->Km;
-    Kr = c->Kr;
+    Kr = buf_get_le32(c->Kr + 0);
 
     /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and
      * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.)
      */
-    l = inbuf[0] << 24 | inbuf[1] << 16 | inbuf[2] << 8 | inbuf[3];
-    r = inbuf[4] << 24 | inbuf[5] << 16 | inbuf[6] << 8 | inbuf[7];
+    l = buf_get_be32(inbuf + 0);
+    r = buf_get_be32(inbuf + 4);
 
     /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows:
      * Li = Ri-1;
@@ -376,41 +502,126 @@ do_encrypt_block( CAST5_context *c, byte *outbuf, const 
byte *inbuf )
      * Rounds 3, 6, 9, 12, and 15 use f function Type 3.
      */
 
-    t = l; l = r; r = t ^ F1(r, Km[ 0], Kr[ 0]);
-    t = l; l = r; r = t ^ F2(r, Km[ 1], Kr[ 1]);
-    t = l; l = r; r = t ^ F3(r, Km[ 2], Kr[ 2]);
-    t = l; l = r; r = t ^ F1(r, Km[ 3], Kr[ 3]);
-    t = l; l = r; r = t ^ F2(r, Km[ 4], Kr[ 4]);
-    t = l; l = r; r = t ^ F3(r, Km[ 5], Kr[ 5]);
-    t = l; l = r; r = t ^ F1(r, Km[ 6], Kr[ 6]);
-    t = l; l = r; r = t ^ F2(r, Km[ 7], Kr[ 7]);
-    t = l; l = r; r = t ^ F3(r, Km[ 8], Kr[ 8]);
-    t = l; l = r; r = t ^ F1(r, Km[ 9], Kr[ 9]);
-    t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
-    t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
-    t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
-    t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
-    t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
-    t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
+    t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr = buf_get_le32(c->Kr + 4);
+    t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr = buf_get_le32(c->Kr + 8);
+    t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr = buf_get_le32(c->Kr + 
12);
+    t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31);
 
     /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and
      * concatenate to form the ciphertext.) */
-    outbuf[0] = (r >> 24) & 0xff;
-    outbuf[1] = (r >> 16) & 0xff;
-    outbuf[2] = (r >>  8) & 0xff;
-    outbuf[3] =  r       & 0xff;
-    outbuf[4] = (l >> 24) & 0xff;
-    outbuf[5] = (l >> 16) & 0xff;
-    outbuf[6] = (l >>  8) & 0xff;
-    outbuf[7] =  l       & 0xff;
+    buf_put_be32(outbuf + 0, r);
+    buf_put_be32(outbuf + 4, l);
 }
 
-static void
+static unsigned int
 encrypt_block (void *context , byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_encrypt_block (c, outbuf, inbuf);
-  _gcry_burn_stack (20+4*sizeof(void*));
+  return /*burn_stack*/ (20+4*sizeof(void*));
+}
+
+
+static void
+do_encrypt_block_3( CAST5_context *c, byte *outbuf, const byte *inbuf )
+{
+    u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
+    u32 I;   /* used by the Fx macros */
+    u32 *Km;
+    u32 Kr;
+
+    Km = c->Km;
+    Kr = buf_get_le32(c->Kr + 0);
+
+    l0 = buf_get_be32(inbuf + 0);
+    r0 = buf_get_be32(inbuf + 4);
+    l1 = buf_get_be32(inbuf + 8);
+    r1 = buf_get_be32(inbuf + 12);
+    l2 = buf_get_be32(inbuf + 16);
+    r2 = buf_get_be32(inbuf + 20);
+
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
+    Kr = buf_get_le32(c->Kr + 4);
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
+    Kr = buf_get_le32(c->Kr + 8);
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
+    Kr = buf_get_le32(c->Kr + 12);
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);
+
+    buf_put_be32(outbuf + 0, r0);
+    buf_put_be32(outbuf + 4, l0);
+    buf_put_be32(outbuf + 8, r1);
+    buf_put_be32(outbuf + 12, l1);
+    buf_put_be32(outbuf + 16, r2);
+    buf_put_be32(outbuf + 20, l2);
 }
 
 
@@ -420,47 +631,405 @@ do_decrypt_block (CAST5_context *c, byte *outbuf, const 
byte *inbuf )
     u32 l, r, t;
     u32 I;
     u32 *Km;
-    byte *Kr;
+    u32 Kr;
 
     Km = c->Km;
-    Kr = c->Kr;
-
-    l = inbuf[0] << 24 | inbuf[1] << 16 | inbuf[2] << 8 | inbuf[3];
-    r = inbuf[4] << 24 | inbuf[5] << 16 | inbuf[6] << 8 | inbuf[7];
-
-    t = l; l = r; r = t ^ F1(r, Km[15], Kr[15]);
-    t = l; l = r; r = t ^ F3(r, Km[14], Kr[14]);
-    t = l; l = r; r = t ^ F2(r, Km[13], Kr[13]);
-    t = l; l = r; r = t ^ F1(r, Km[12], Kr[12]);
-    t = l; l = r; r = t ^ F3(r, Km[11], Kr[11]);
-    t = l; l = r; r = t ^ F2(r, Km[10], Kr[10]);
-    t = l; l = r; r = t ^ F1(r, Km[ 9], Kr[ 9]);
-    t = l; l = r; r = t ^ F3(r, Km[ 8], Kr[ 8]);
-    t = l; l = r; r = t ^ F2(r, Km[ 7], Kr[ 7]);
-    t = l; l = r; r = t ^ F1(r, Km[ 6], Kr[ 6]);
-    t = l; l = r; r = t ^ F3(r, Km[ 5], Kr[ 5]);
-    t = l; l = r; r = t ^ F2(r, Km[ 4], Kr[ 4]);
-    t = l; l = r; r = t ^ F1(r, Km[ 3], Kr[ 3]);
-    t = l; l = r; r = t ^ F3(r, Km[ 2], Kr[ 2]);
-    t = l; l = r; r = t ^ F2(r, Km[ 1], Kr[ 1]);
-    t = l; l = r; r = t ^ F1(r, Km[ 0], Kr[ 0]);
-
-    outbuf[0] = (r >> 24) & 0xff;
-    outbuf[1] = (r >> 16) & 0xff;
-    outbuf[2] = (r >>  8) & 0xff;
-    outbuf[3] =  r       & 0xff;
-    outbuf[4] = (l >> 24) & 0xff;
-    outbuf[5] = (l >> 16) & 0xff;
-    outbuf[6] = (l >>  8) & 0xff;
-    outbuf[7] =  l       & 0xff;
+    Kr = buf_get_be32(c->Kr + 12);
+
+    l = buf_get_be32(inbuf + 0);
+    r = buf_get_be32(inbuf + 4);
+
+    t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr = buf_get_be32(c->Kr + 8);
+    t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr = buf_get_be32(c->Kr + 4);
+    t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr = buf_get_be32(c->Kr + 0);
+    t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8;
+    t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31);
+
+    buf_put_be32(outbuf + 0, r);
+    buf_put_be32(outbuf + 4, l);
 }
 
-static void
+static unsigned int
 decrypt_block (void *context, byte *outbuf, const byte *inbuf)
 {
   CAST5_context *c = (CAST5_context *) context;
   do_decrypt_block (c, outbuf, inbuf);
-  _gcry_burn_stack (20+4*sizeof(void*));
+  return /*burn_stack*/ (20+4*sizeof(void*));
+}
+
+
+static void
+do_decrypt_block_3 (CAST5_context *c, byte *outbuf, const byte *inbuf )
+{
+    u32 l0, r0, t0, l1, r1, t1, l2, r2, t2;
+    u32 I;
+    u32 *Km;
+    u32 Kr;
+
+    Km = c->Km;
+    Kr = buf_get_be32(c->Kr + 12);
+
+    l0 = buf_get_be32(inbuf + 0);
+    r0 = buf_get_be32(inbuf + 4);
+    l1 = buf_get_be32(inbuf + 8);
+    r1 = buf_get_be32(inbuf + 12);
+    l2 = buf_get_be32(inbuf + 16);
+    r2 = buf_get_be32(inbuf + 20);
+
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31);
+    Kr = buf_get_be32(c->Kr + 8);
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31);
+    Kr = buf_get_be32(c->Kr + 4);
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31);
+    Kr = buf_get_be32(c->Kr + 0);
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31);
+    Kr >>= 8;
+    t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31);
+           t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31);
+                   t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31);
+
+    buf_put_be32(outbuf + 0, r0);
+    buf_put_be32(outbuf + 4, l0);
+    buf_put_be32(outbuf + 8, r1);
+    buf_put_be32(outbuf + 12, l1);
+    buf_put_be32(outbuf + 16, r2);
+    buf_put_be32(outbuf + 20, l2);
+}
+
+#endif /*!USE_ARM_ASM*/
+
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size CAST5_BLOCKSIZE. */
+static void
+_gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+                   const void *inbuf_arg, size_t nblocks)
+{
+  CAST5_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
+  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 8 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+        nblocks -= 4;
+        outbuf += 4 * CAST5_BLOCKSIZE;
+        inbuf  += 4 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#elif defined(USE_ARM_ASM)
+  {
+    /* Process data in 2 block chunks. */
+    while (nblocks >= 2)
+      {
+        _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+        nblocks -= 2;
+        outbuf += 2 * CAST5_BLOCKSIZE;
+        inbuf  += 2 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+  for ( ;nblocks >= 3; nblocks -= 3)
+    {
+      /* Prepare the counter blocks. */
+      cipher_block_cpy (tmpbuf + 0, ctr, CAST5_BLOCKSIZE);
+      cipher_block_cpy (tmpbuf + 8, ctr, CAST5_BLOCKSIZE);
+      cipher_block_cpy (tmpbuf + 16, ctr, CAST5_BLOCKSIZE);
+      cipher_block_add (tmpbuf + 8, 1, CAST5_BLOCKSIZE);
+      cipher_block_add (tmpbuf + 16, 2, CAST5_BLOCKSIZE);
+      cipher_block_add (ctr, 3, CAST5_BLOCKSIZE);
+      /* Encrypt the counter. */
+      do_encrypt_block_3(ctx, tmpbuf, tmpbuf);
+      /* XOR the input with the encrypted counter and store in output.  */
+      buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE * 3);
+      outbuf += CAST5_BLOCKSIZE * 3;
+      inbuf  += CAST5_BLOCKSIZE * 3;
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* Encrypt the counter. */
+      do_encrypt_block(ctx, tmpbuf, ctr);
+      /* XOR the input with the encrypted counter and store in output.  */
+      cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE);
+      outbuf += CAST5_BLOCKSIZE;
+      inbuf  += CAST5_BLOCKSIZE;
+      /* Increment the counter.  */
+      cipher_block_add (ctr, 1, CAST5_BLOCKSIZE);
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                   const void *inbuf_arg, size_t nblocks)
+{
+  CAST5_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char savebuf[CAST5_BLOCKSIZE * 3];
+  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 8 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 4;
+        outbuf += 4 * CAST5_BLOCKSIZE;
+        inbuf  += 4 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#elif defined(USE_ARM_ASM)
+  {
+    /* Process data in 2 block chunks. */
+    while (nblocks >= 2)
+      {
+        _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 2;
+        outbuf += 2 * CAST5_BLOCKSIZE;
+        inbuf  += 2 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+  for ( ;nblocks >= 3; nblocks -= 3)
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      do_decrypt_block_3 (ctx, savebuf, inbuf);
+
+      cipher_block_xor_1 (savebuf + 0, iv, CAST5_BLOCKSIZE);
+      cipher_block_xor_1 (savebuf + 8, inbuf, CAST5_BLOCKSIZE * 2);
+      cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
+      buf_cpy (outbuf, savebuf, CAST5_BLOCKSIZE * 3);
+      inbuf += CAST5_BLOCKSIZE * 3;
+      outbuf += CAST5_BLOCKSIZE * 3;
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      do_decrypt_block (ctx, savebuf, inbuf);
+
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE);
+      inbuf += CAST5_BLOCKSIZE;
+      outbuf += CAST5_BLOCKSIZE;
+    }
+
+  wipememory(savebuf, sizeof(savebuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                   const void *inbuf_arg, size_t nblocks)
+{
+  CAST5_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[CAST5_BLOCKSIZE * 3];
+  int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE;
+
+#ifdef USE_AMD64_ASM
+  {
+    if (nblocks >= 4)
+      burn_stack_depth += 8 * sizeof(void*);
+
+    /* Process data in 4 block chunks. */
+    while (nblocks >= 4)
+      {
+        cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 4;
+        outbuf += 4 * CAST5_BLOCKSIZE;
+        inbuf  += 4 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#elif defined(USE_ARM_ASM)
+  {
+    /* Process data in 2 block chunks. */
+    while (nblocks >= 2)
+      {
+        _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 2;
+        outbuf += 2 * CAST5_BLOCKSIZE;
+        inbuf  += 2 * CAST5_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM)
+  for ( ;nblocks >= 3; nblocks -= 3 )
+    {
+      cipher_block_cpy (tmpbuf + 0, iv, CAST5_BLOCKSIZE);
+      cipher_block_cpy (tmpbuf + 8, inbuf + 0, CAST5_BLOCKSIZE * 2);
+      cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE);
+      do_encrypt_block_3 (ctx, tmpbuf, tmpbuf);
+      buf_xor (outbuf, inbuf, tmpbuf, CAST5_BLOCKSIZE * 3);
+      outbuf += CAST5_BLOCKSIZE * 3;
+      inbuf  += CAST5_BLOCKSIZE * 3;
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      do_encrypt_block(ctx, iv, iv);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE);
+      outbuf += CAST5_BLOCKSIZE;
+      inbuf  += CAST5_BLOCKSIZE;
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Run the self-tests for CAST5-CTR, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+  const int nblocks = 4+1;
+  const int blocksize = CAST5_BLOCKSIZE;
+  const int context_size = sizeof(CAST5_context);
+
+  return _gcry_selftest_helper_ctr("CAST5", &cast_setkey,
+           &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for CAST5-CBC, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+  const int nblocks = 4+2;
+  const int blocksize = CAST5_BLOCKSIZE;
+  const int context_size = sizeof(CAST5_context);
+
+  return _gcry_selftest_helper_cbc("CAST5", &cast_setkey,
+           &encrypt_block, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for CAST5-CFB, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+  const int nblocks = 4+2;
+  const int blocksize = CAST5_BLOCKSIZE;
+  const int context_size = sizeof(CAST5_context);
+
+  return _gcry_selftest_helper_cfb("CAST5", &cast_setkey,
+           &encrypt_block, nblocks, blocksize, context_size);
 }
 
 
@@ -468,13 +1037,18 @@ static const char*
 selftest(void)
 {
     CAST5_context c;
-    byte key[16]  = { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78,
+    cipher_bulk_ops_t bulk_ops;
+    static const byte key[16] =
+                    { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78,
                      0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A  };
-    byte plain[8] = { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF };
-    byte cipher[8]= { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 };
+    static const byte plain[8] =
+                    { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF };
+    static const byte cipher[8] =
+                    { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 };
     byte buffer[8];
+    const char *r;
 
-    cast_setkey( &c, key, 16 );
+    cast_setkey( &c, key, 16, &bulk_ops );
     encrypt_block( &c, buffer, plain );
     if( memcmp( buffer, cipher, 8 ) )
        return "1";
@@ -495,10 +1069,10 @@ selftest(void)
                        0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E };
 
        for(i=0; i < 1000000; i++ ) {
-           cast_setkey( &c, b0, 16 );
+           cast_setkey( &c, b0, 16, &bulk_ops );
            encrypt_block( &c, a0, a0 );
            encrypt_block( &c, a0+8, a0+8 );
-           cast_setkey( &c, a0, 16 );
+           cast_setkey( &c, a0, 16, &bulk_ops );
            encrypt_block( &c, b0, b0 );
            encrypt_block( &c, b0+8, b0+8 );
        }
@@ -507,6 +1081,16 @@ selftest(void)
 
     }
 #endif
+
+    if ( (r = selftest_cbc ()) )
+      return r;
+
+    if ( (r = selftest_cfb ()) )
+      return r;
+
+    if ( (r = selftest_ctr ()) )
+      return r;
+
     return NULL;
 }
 
@@ -582,10 +1166,10 @@ do_cast_setkey( CAST5_context *c, const byte *key, 
unsigned keylen )
   if( keylen != 16 )
     return GPG_ERR_INV_KEYLEN;
 
-  x[0] = key[0]  << 24 | key[1]  << 16 | key[2]  << 8 | key[3];
-  x[1] = key[4]  << 24 | key[5]  << 16 | key[6]  << 8 | key[7];
-  x[2] = key[8]  << 24 | key[9]  << 16 | key[10] << 8 | key[11];
-  x[3] = key[12] << 24 | key[13] << 16 | key[14] << 8 | key[15];
+  x[0] = buf_get_be32(key + 0);
+  x[1] = buf_get_be32(key + 4);
+  x[2] = buf_get_be32(key + 8);
+  x[3] = buf_get_be32(key + 12);
 
   key_schedule( x, z, k );
   for(i=0; i < 16; i++ )
@@ -594,9 +1178,35 @@ do_cast_setkey( CAST5_context *c, const byte *key, 
unsigned keylen )
   for(i=0; i < 16; i++ )
     c->Kr[i] = k[i] & 0x1f;
 
-  memset(&x,0, sizeof x);
-  memset(&z,0, sizeof z);
-  memset(&k,0, sizeof k);
+#ifdef USE_ARM_ASM
+  for (i = 0; i < 4; i++)
+    {
+      byte Kr_arm[4];
+
+      /* Convert rotate left to rotate right and add shift left
+       * by 2.  */
+      Kr_arm[0] = ((32 - c->Kr[4 * i + 0]) - 2) & 0x1f;
+      Kr_arm[1] = ((32 - c->Kr[4 * i + 1]) - 2) & 0x1f;
+      Kr_arm[2] = ((32 - c->Kr[4 * i + 2]) - 2) & 0x1f;
+      Kr_arm[3] = ((32 - c->Kr[4 * i + 3]) - 2) & 0x1f;
+
+      /* Endian friendly store.  */
+      c->Kr_arm_enc[i] = Kr_arm[0] |
+                        (Kr_arm[1] << 8) |
+                        (Kr_arm[2] << 16) |
+                        (Kr_arm[3] << 24);
+      c->Kr_arm_dec[i] = Kr_arm[3] |
+                        (Kr_arm[2] << 8) |
+                        (Kr_arm[1] << 16) |
+                        (Kr_arm[0] << 24);
+
+      wipememory(Kr_arm, sizeof(Kr_arm));
+    }
+#endif
+
+  wipememory(x, sizeof x);
+  wipememory(z, sizeof z);
+  wipememory(k, sizeof k);
 
 #undef xi
 #undef zi
@@ -604,17 +1214,25 @@ do_cast_setkey( CAST5_context *c, const byte *key, 
unsigned keylen )
 }
 
 static gcry_err_code_t
-cast_setkey (void *context, const byte *key, unsigned keylen )
+cast_setkey (void *context, const byte *key, unsigned keylen,
+             cipher_bulk_ops_t *bulk_ops)
 {
   CAST5_context *c = (CAST5_context *) context;
   gcry_err_code_t rc = do_cast_setkey (c, key, keylen);
-  _gcry_burn_stack (96+7*sizeof(void*));
+
+  /* Setup bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cfb_dec = _gcry_cast5_cfb_dec;
+  bulk_ops->cbc_dec = _gcry_cast5_cbc_dec;
+  bulk_ops->ctr_enc = _gcry_cast5_ctr_enc;
+
   return rc;
 }
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_cast5 =
   {
+    GCRY_CIPHER_CAST5, {0, 0},
     "CAST5", NULL, NULL, CAST5_BLOCKSIZE, 128, sizeof (CAST5_context),
     cast_setkey, encrypt_block, decrypt_block
   };
diff --git a/grub-core/lib/libgcrypt/cipher/chacha20-aarch64.S 
b/grub-core/lib/libgcrypt/cipher/chacha20-aarch64.S
new file mode 100644
index 000000000..2a980b95c
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/chacha20-aarch64.S
@@ -0,0 +1,648 @@
+/* chacha20-aarch64.S - ARMv8/AArch64 accelerated chacha20 blocks function
+ *
+ * Copyright (C) 2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \
+    defined(USE_CHACHA20)
+
+.cpu generic+simd
+
+.text
+
+#include "asm-poly1305-aarch64.h"
+
+/* register macros */
+#define INPUT     x0
+#define DST       x1
+#define SRC       x2
+#define NBLKS     x3
+#define ROUND     x4
+#define INPUT_CTR x5
+#define INPUT_POS x6
+#define CTR       x7
+
+/* vector registers */
+#define X0 v16
+#define X1 v17
+#define X2 v18
+#define X3 v19
+#define X4 v20
+#define X5 v21
+#define X6 v22
+#define X7 v23
+#define X8 v24
+#define X9 v25
+#define X10 v26
+#define X11 v27
+#define X12 v28
+#define X13 v29
+#define X14 v30
+#define X15 v31
+
+#define VCTR    v0
+#define VTMP0   v1
+#define VTMP1   v2
+#define VTMP2   v3
+#define VTMP3   v4
+#define X12_TMP v5
+#define X13_TMP v6
+#define ROT8    v7
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+#define _(...) __VA_ARGS__
+
+#define vpunpckldq(s1, s2, dst) \
+       zip1 dst.4s, s2.4s, s1.4s;
+
+#define vpunpckhdq(s1, s2, dst) \
+       zip2 dst.4s, s2.4s, s1.4s;
+
+#define vpunpcklqdq(s1, s2, dst) \
+       zip1 dst.2d, s2.2d, s1.2d;
+
+#define vpunpckhqdq(s1, s2, dst) \
+       zip2 dst.2d, s2.2d, s1.2d;
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+       vpunpckhdq(x1, x0, t2); \
+       vpunpckldq(x1, x0, x0); \
+       \
+       vpunpckldq(x3, x2, t1); \
+       vpunpckhdq(x3, x2, x2); \
+       \
+       vpunpckhqdq(t1, x0, x1); \
+       vpunpcklqdq(t1, x0, x0); \
+       \
+       vpunpckhqdq(x2, t2, x3); \
+       vpunpcklqdq(x2, t2, x2);
+
+#define clear(x) \
+       movi x.16b, #0;
+
+/**********************************************************************
+  4-way chacha20
+ **********************************************************************/
+
+#define XOR(d,s1,s2) \
+       eor d.16b, s2.16b, s1.16b;
+
+#define PLUS(ds,s) \
+       add ds.4s, ds.4s, s.4s;
+
+#define ROTATE4(dst1,dst2,dst3,dst4,c,src1,src2,src3,src4,iop1,iop2,iop3) \
+       shl dst1.4s, src1.4s, #(c);             \
+       shl dst2.4s, src2.4s, #(c);             \
+       iop1;                                   \
+       shl dst3.4s, src3.4s, #(c);             \
+       shl dst4.4s, src4.4s, #(c);             \
+       iop2;                                   \
+       sri dst1.4s, src1.4s, #(32 - (c));      \
+       sri dst2.4s, src2.4s, #(32 - (c));      \
+       iop3;                                   \
+       sri dst3.4s, src3.4s, #(32 - (c));      \
+       sri dst4.4s, src4.4s, #(32 - (c));
+
+#define ROTATE4_8(dst1,dst2,dst3,dst4,src1,src2,src3,src4,iop1,iop2,iop3) \
+       tbl dst1.16b, {src1.16b}, ROT8.16b;     \
+       iop1;                                   \
+       tbl dst2.16b, {src2.16b}, ROT8.16b;     \
+       iop2;                                   \
+       tbl dst3.16b, {src3.16b}, ROT8.16b;     \
+       iop3;                                   \
+       tbl dst4.16b, {src4.16b}, ROT8.16b;
+
+#define ROTATE4_16(dst1,dst2,dst3,dst4,src1,src2,src3,src4,iop1) \
+       rev32 dst1.8h, src1.8h;                 \
+       rev32 dst2.8h, src2.8h;                 \
+       iop1;                                   \
+       rev32 dst3.8h, src3.8h;                 \
+       rev32 dst4.8h, src4.8h;
+
+#define 
QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4,ign,tmp1,tmp2,tmp3,tmp4,\
+                     
iop1,iop2,iop3,iop4,iop5,iop6,iop7,iop8,iop9,iop10,iop11,iop12,iop13,iop14,\
+                     
iop15,iop16,iop17,iop18,iop19,iop20,iop21,iop22,iop23,iop24,iop25,iop26,\
+                     iop27,iop28,iop29) \
+       PLUS(a1,b1); PLUS(a2,b2); iop1;                                         
\
+       PLUS(a3,b3); PLUS(a4,b4); iop2;                                         
\
+           XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); iop3;                             
\
+           XOR(tmp3,d3,a3); XOR(tmp4,d4,a4); iop4;                             
\
+               ROTATE4_16(d1, d2, d3, d4, tmp1, tmp2, tmp3, tmp4, _(iop5));    
\
+               iop6;                                                           
\
+       PLUS(c1,d1); PLUS(c2,d2); iop7;                                         
\
+       PLUS(c3,d3); PLUS(c4,d4); iop8;                                         
\
+           XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); iop9;                             
\
+           XOR(tmp3,b3,c3); XOR(tmp4,b4,c4); iop10;                            
\
+               ROTATE4(b1, b2, b3, b4, 12, tmp1, tmp2, tmp3, tmp4,             
\
+                       _(iop11), _(iop12), _(iop13)); iop14;                   
\
+       PLUS(a1,b1); PLUS(a2,b2); iop15;                                        
\
+       PLUS(a3,b3); PLUS(a4,b4); iop16;                                        
\
+           XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); iop17;                            
\
+           XOR(tmp3,d3,a3); XOR(tmp4,d4,a4); iop18;                            
\
+               ROTATE4_8(d1, d2, d3, d4, tmp1, tmp2, tmp3, tmp4,               
\
+                         _(iop19), _(iop20), _(iop21)); iop22;                 
\
+       PLUS(c1,d1); PLUS(c2,d2); iop23;                                        
\
+       PLUS(c3,d3); PLUS(c4,d4); iop24;                                        
\
+           XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); iop25;                            
\
+           XOR(tmp3,b3,c3); XOR(tmp4,b4,c4); iop26;                            
\
+               ROTATE4(b1, b2, b3, b4, 7, tmp1, tmp2, tmp3, tmp4,              
\
+                       _(iop27), _(iop28), _(iop29));
+
+.align 4
+.globl _gcry_chacha20_aarch64_blocks4_data_inc_counter
+_gcry_chacha20_aarch64_blocks4_data_inc_counter:
+       .long 0,1,2,3
+
+.align 4
+.globl _gcry_chacha20_aarch64_blocks4_data_rot8
+_gcry_chacha20_aarch64_blocks4_data_rot8:
+       .byte 3,0,1,2
+       .byte 7,4,5,6
+       .byte 11,8,9,10
+       .byte 15,12,13,14
+
+.align 3
+.globl _gcry_chacha20_aarch64_blocks4
+ELF(.type _gcry_chacha20_aarch64_blocks4,%function;)
+
+_gcry_chacha20_aarch64_blocks4:
+       /* input:
+        *      x0: input
+        *      x1: dst
+        *      x2: src
+        *      x3: nblks (multiple of 4)
+        */
+       CFI_STARTPROC()
+
+       GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+       add INPUT_CTR, INPUT, #(12*4);
+       ld1 {ROT8.16b}, [CTR];
+       GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+       mov INPUT_POS, INPUT;
+       ld1 {VCTR.16b}, [CTR];
+
+.Loop4:
+       /* Construct counter vectors X12 and X13 */
+
+       ld1 {X15.16b}, [INPUT_CTR];
+       mov ROUND, #20;
+       ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS];
+
+       dup X12.4s, X15.s[0];
+       dup X13.4s, X15.s[1];
+       ldr CTR, [INPUT_CTR];
+       add X12.4s, X12.4s, VCTR.4s;
+       dup X0.4s, VTMP1.s[0];
+       dup X1.4s, VTMP1.s[1];
+       dup X2.4s, VTMP1.s[2];
+       dup X3.4s, VTMP1.s[3];
+       dup X14.4s, X15.s[2];
+       cmhi VTMP0.4s, VCTR.4s, X12.4s;
+       dup X15.4s, X15.s[3];
+       add CTR, CTR, #4; /* Update counter */
+       dup X4.4s, VTMP2.s[0];
+       dup X5.4s, VTMP2.s[1];
+       dup X6.4s, VTMP2.s[2];
+       dup X7.4s, VTMP2.s[3];
+       sub X13.4s, X13.4s, VTMP0.4s;
+       dup X8.4s, VTMP3.s[0];
+       dup X9.4s, VTMP3.s[1];
+       dup X10.4s, VTMP3.s[2];
+       dup X11.4s, VTMP3.s[3];
+       mov X12_TMP.16b, X12.16b;
+       mov X13_TMP.16b, X13.16b;
+       str CTR, [INPUT_CTR];
+
+.Lround2:
+       subs ROUND, ROUND, #2
+       QUARTERROUND4(X0, X4,  X8, X12,   X1, X5,  X9, X13,
+                     X2, X6, X10, X14,   X3, X7, X11, X15,
+                     tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+                     ,,,,,,,,,,,,,,,,,,,,,,,,,,,,)
+       QUARTERROUND4(X0, X5, X10, X15,   X1, X6, X11, X12,
+                     X2, X7,  X8, X13,   X3, X4,  X9, X14,
+                     tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+                     ,,,,,,,,,,,,,,,,,,,,,,,,,,,,)
+       b.ne .Lround2;
+
+       ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS], #32;
+
+       PLUS(X12, X12_TMP);        /* INPUT + 12 * 4 + counter */
+       PLUS(X13, X13_TMP);        /* INPUT + 13 * 4 + counter */
+
+       dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */
+       dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */
+       dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */
+       dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */
+       PLUS(X0, VTMP2);
+       PLUS(X1, VTMP3);
+       PLUS(X2, X12_TMP);
+       PLUS(X3, X13_TMP);
+
+       dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */
+       dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */
+       dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */
+       dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */
+       ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS];
+       mov INPUT_POS, INPUT;
+       PLUS(X4, VTMP2);
+       PLUS(X5, VTMP3);
+       PLUS(X6, X12_TMP);
+       PLUS(X7, X13_TMP);
+
+       dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */
+       dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */
+       dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */
+       dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */
+       dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */
+       dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */
+       PLUS(X8, VTMP2);
+       PLUS(X9, VTMP3);
+       PLUS(X10, X12_TMP);
+       PLUS(X11, X13_TMP);
+       PLUS(X14, VTMP0);
+       PLUS(X15, VTMP1);
+
+       transpose_4x4(X0, X1, X2, X3, VTMP0, VTMP1, VTMP2);
+       transpose_4x4(X4, X5, X6, X7, VTMP0, VTMP1, VTMP2);
+       transpose_4x4(X8, X9, X10, X11, VTMP0, VTMP1, VTMP2);
+       transpose_4x4(X12, X13, X14, X15, VTMP0, VTMP1, VTMP2);
+
+       subs NBLKS, NBLKS, #4;
+
+       ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+       ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+       eor VTMP0.16b, X0.16b, VTMP0.16b;
+       eor VTMP1.16b, X4.16b, VTMP1.16b;
+       eor VTMP2.16b, X8.16b, VTMP2.16b;
+       eor VTMP3.16b, X12.16b, VTMP3.16b;
+       eor X12_TMP.16b, X1.16b, X12_TMP.16b;
+       eor X13_TMP.16b, X5.16b, X13_TMP.16b;
+       st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+       ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+       st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+       ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+       eor VTMP0.16b, X9.16b, VTMP0.16b;
+       eor VTMP1.16b, X13.16b, VTMP1.16b;
+       eor VTMP2.16b, X2.16b, VTMP2.16b;
+       eor VTMP3.16b, X6.16b, VTMP3.16b;
+       eor X12_TMP.16b, X10.16b, X12_TMP.16b;
+       eor X13_TMP.16b, X14.16b, X13_TMP.16b;
+       st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+       ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+       st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+       eor VTMP0.16b, X3.16b, VTMP0.16b;
+       eor VTMP1.16b, X7.16b, VTMP1.16b;
+       eor VTMP2.16b, X11.16b, VTMP2.16b;
+       eor VTMP3.16b, X15.16b, VTMP3.16b;
+       st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+
+       b.ne .Loop4;
+
+       /* clear the used vector registers and stack */
+       clear(VTMP0);
+       clear(VTMP1);
+       clear(VTMP2);
+       clear(VTMP3);
+       clear(X12_TMP);
+       clear(X13_TMP);
+       clear(X0);
+       clear(X1);
+       clear(X2);
+       clear(X3);
+       clear(X4);
+       clear(X5);
+       clear(X6);
+       clear(X7);
+       clear(X8);
+       clear(X9);
+       clear(X10);
+       clear(X11);
+       clear(X12);
+       clear(X13);
+       clear(X14);
+       clear(X15);
+
+       eor x0, x0, x0
+       ret_spec_stop
+       CFI_ENDPROC()
+ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;)
+
+/**********************************************************************
+  4-way stitched chacha20-poly1305
+ **********************************************************************/
+
+.align 3
+.globl _gcry_chacha20_poly1305_aarch64_blocks4
+ELF(.type _gcry_chacha20_poly1305_aarch64_blocks4,%function;)
+
+_gcry_chacha20_poly1305_aarch64_blocks4:
+       /* input:
+        *      x0: input
+        *      x1: dst
+        *      x2: src
+        *      x3: nblks (multiple of 4)
+        *      x4: poly1305-state
+        *      x5: poly1305-src
+        */
+       CFI_STARTPROC()
+       POLY1305_PUSH_REGS()
+
+       mov POLY_RSTATE, x4;
+       mov POLY_RSRC, x5;
+
+       GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8);
+       add INPUT_CTR, INPUT, #(12*4);
+       ld1 {ROT8.16b}, [CTR];
+       GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter);
+       mov INPUT_POS, INPUT;
+       ld1 {VCTR.16b}, [CTR];
+
+       POLY1305_LOAD_STATE()
+
+.Loop_poly4:
+       /* Construct counter vectors X12 and X13 */
+
+       ld1 {X15.16b}, [INPUT_CTR];
+       ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS];
+
+       dup X12.4s, X15.s[0];
+       dup X13.4s, X15.s[1];
+       ldr CTR, [INPUT_CTR];
+       add X12.4s, X12.4s, VCTR.4s;
+       dup X0.4s, VTMP1.s[0];
+       dup X1.4s, VTMP1.s[1];
+       dup X2.4s, VTMP1.s[2];
+       dup X3.4s, VTMP1.s[3];
+       dup X14.4s, X15.s[2];
+       cmhi VTMP0.4s, VCTR.4s, X12.4s;
+       dup X15.4s, X15.s[3];
+       add CTR, CTR, #4; /* Update counter */
+       dup X4.4s, VTMP2.s[0];
+       dup X5.4s, VTMP2.s[1];
+       dup X6.4s, VTMP2.s[2];
+       dup X7.4s, VTMP2.s[3];
+       sub X13.4s, X13.4s, VTMP0.4s;
+       dup X8.4s, VTMP3.s[0];
+       dup X9.4s, VTMP3.s[1];
+       dup X10.4s, VTMP3.s[2];
+       dup X11.4s, VTMP3.s[3];
+       mov X12_TMP.16b, X12.16b;
+       mov X13_TMP.16b, X13.16b;
+       str CTR, [INPUT_CTR];
+
+       mov ROUND, #20
+.Lround4_with_poly1305_outer:
+       mov POLY_CHACHA_ROUND, #6;
+.Lround4_with_poly1305_inner1:
+                     POLY1305_BLOCK_PART1(0 * 16)
+       QUARTERROUND4(X0, X4,  X8, X12,   X1, X5,  X9, X13,
+                     X2, X6, X10, X14,   X3, X7, X11, X15,
+                     tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+                     POLY1305_BLOCK_PART2(0 * 16),
+                     POLY1305_BLOCK_PART3(),
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5(),
+                     POLY1305_BLOCK_PART6(),
+                     POLY1305_BLOCK_PART7(),
+                     POLY1305_BLOCK_PART8(),
+                     POLY1305_BLOCK_PART9(),
+                     POLY1305_BLOCK_PART10(),
+                     POLY1305_BLOCK_PART11(),
+                     POLY1305_BLOCK_PART12(),
+                     POLY1305_BLOCK_PART13(),
+                     POLY1305_BLOCK_PART14(),
+                     POLY1305_BLOCK_PART15(),
+                     POLY1305_BLOCK_PART16(),
+                     POLY1305_BLOCK_PART17(),
+                     POLY1305_BLOCK_PART18(),
+                     POLY1305_BLOCK_PART19(),
+                     POLY1305_BLOCK_PART20(),
+                     POLY1305_BLOCK_PART21(),
+                     POLY1305_BLOCK_PART22(),
+                     POLY1305_BLOCK_PART23(),
+                     POLY1305_BLOCK_PART24(),
+                     POLY1305_BLOCK_PART25(),
+                     POLY1305_BLOCK_PART26(),
+                     POLY1305_BLOCK_PART27(),
+                     POLY1305_BLOCK_PART28(),
+                     POLY1305_BLOCK_PART29(),
+                     POLY1305_BLOCK_PART1(1 * 16))
+                     POLY1305_BLOCK_PART2(1 * 16)
+       QUARTERROUND4(X0, X5, X10, X15,   X1, X6, X11, X12,
+                     X2, X7,  X8, X13,   X3, X4,  X9, X14,
+                     tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+                     _(add POLY_RSRC, POLY_RSRC, #(2*16)),
+                     POLY1305_BLOCK_PART3(),
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5(),
+                     POLY1305_BLOCK_PART6(),
+                     POLY1305_BLOCK_PART7(),
+                     POLY1305_BLOCK_PART8(),
+                     POLY1305_BLOCK_PART9(),
+                     POLY1305_BLOCK_PART10(),
+                     POLY1305_BLOCK_PART11(),
+                     POLY1305_BLOCK_PART12(),
+                     POLY1305_BLOCK_PART13(),
+                     POLY1305_BLOCK_PART14(),
+                     POLY1305_BLOCK_PART15(),
+                     POLY1305_BLOCK_PART16(),
+                     POLY1305_BLOCK_PART17(),
+                     POLY1305_BLOCK_PART18(),
+                     POLY1305_BLOCK_PART19(),
+                     POLY1305_BLOCK_PART20(),
+                     POLY1305_BLOCK_PART21(),
+                     POLY1305_BLOCK_PART22(),
+                     POLY1305_BLOCK_PART23(),
+                     POLY1305_BLOCK_PART24(),
+                     POLY1305_BLOCK_PART25(),
+                     POLY1305_BLOCK_PART26(),
+                     POLY1305_BLOCK_PART27(),
+                     POLY1305_BLOCK_PART28(),
+                     POLY1305_BLOCK_PART29(),
+                     _(subs POLY_CHACHA_ROUND, POLY_CHACHA_ROUND, #2));
+       b.ne .Lround4_with_poly1305_inner1;
+
+       mov POLY_CHACHA_ROUND, #4;
+.Lround4_with_poly1305_inner2:
+                     POLY1305_BLOCK_PART1(0 * 16)
+       QUARTERROUND4(X0, X4,  X8, X12,   X1, X5,  X9, X13,
+                     X2, X6, X10, X14,   X3, X7, X11, X15,
+                     tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,,
+                     POLY1305_BLOCK_PART2(0 * 16),,
+                     _(add POLY_RSRC, POLY_RSRC, #(1*16)),,
+                     POLY1305_BLOCK_PART3(),,
+                     POLY1305_BLOCK_PART4(),,
+                     POLY1305_BLOCK_PART5(),,
+                     POLY1305_BLOCK_PART6(),,
+                     POLY1305_BLOCK_PART7(),,
+                     POLY1305_BLOCK_PART8(),,
+                     POLY1305_BLOCK_PART9(),,
+                     POLY1305_BLOCK_PART10(),,
+                     POLY1305_BLOCK_PART11(),,
+                     POLY1305_BLOCK_PART12(),,
+                     POLY1305_BLOCK_PART13(),,
+                     POLY1305_BLOCK_PART14(),)
+                     POLY1305_BLOCK_PART15()
+       QUARTERROUND4(X0, X5, X10, X15,   X1, X6, X11, X12,
+                     X2, X7,  X8, X13,   X3, X4,  X9, X14,
+                     tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,
+                     POLY1305_BLOCK_PART16(),,
+                     POLY1305_BLOCK_PART17(),,
+                     POLY1305_BLOCK_PART18(),,
+                     POLY1305_BLOCK_PART19(),,
+                     POLY1305_BLOCK_PART20(),,
+                     POLY1305_BLOCK_PART21(),,
+                     POLY1305_BLOCK_PART22(),,
+                     POLY1305_BLOCK_PART23(),,
+                     POLY1305_BLOCK_PART24(),,
+                     POLY1305_BLOCK_PART25(),,
+                     POLY1305_BLOCK_PART26(),,
+                     POLY1305_BLOCK_PART27(),,
+                     POLY1305_BLOCK_PART28(),,
+                     POLY1305_BLOCK_PART29(),
+                     _(subs POLY_CHACHA_ROUND, POLY_CHACHA_ROUND, #2),)
+       b.ne .Lround4_with_poly1305_inner2;
+
+       subs ROUND, ROUND, #10
+       b.ne .Lround4_with_poly1305_outer;
+
+       ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS], #32;
+
+       PLUS(X12, X12_TMP);        /* INPUT + 12 * 4 + counter */
+       PLUS(X13, X13_TMP);        /* INPUT + 13 * 4 + counter */
+
+       dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */
+       dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */
+       dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */
+       dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */
+       PLUS(X0, VTMP2);
+       PLUS(X1, VTMP3);
+       PLUS(X2, X12_TMP);
+       PLUS(X3, X13_TMP);
+
+       dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */
+       dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */
+       dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */
+       dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */
+       ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS];
+       mov INPUT_POS, INPUT;
+       PLUS(X4, VTMP2);
+       PLUS(X5, VTMP3);
+       PLUS(X6, X12_TMP);
+       PLUS(X7, X13_TMP);
+
+       dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */
+       dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */
+       dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */
+       dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */
+       dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */
+       dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */
+       PLUS(X8, VTMP2);
+       PLUS(X9, VTMP3);
+       PLUS(X10, X12_TMP);
+       PLUS(X11, X13_TMP);
+       PLUS(X14, VTMP0);
+       PLUS(X15, VTMP1);
+
+       transpose_4x4(X0, X1, X2, X3, VTMP0, VTMP1, VTMP2);
+       transpose_4x4(X4, X5, X6, X7, VTMP0, VTMP1, VTMP2);
+       transpose_4x4(X8, X9, X10, X11, VTMP0, VTMP1, VTMP2);
+       transpose_4x4(X12, X13, X14, X15, VTMP0, VTMP1, VTMP2);
+
+       subs NBLKS, NBLKS, #4;
+
+       ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+       ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+       eor VTMP0.16b, X0.16b, VTMP0.16b;
+       eor VTMP1.16b, X4.16b, VTMP1.16b;
+       eor VTMP2.16b, X8.16b, VTMP2.16b;
+       eor VTMP3.16b, X12.16b, VTMP3.16b;
+       eor X12_TMP.16b, X1.16b, X12_TMP.16b;
+       eor X13_TMP.16b, X5.16b, X13_TMP.16b;
+       st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+       ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+       st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+       ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32;
+       eor VTMP0.16b, X9.16b, VTMP0.16b;
+       eor VTMP1.16b, X13.16b, VTMP1.16b;
+       eor VTMP2.16b, X2.16b, VTMP2.16b;
+       eor VTMP3.16b, X6.16b, VTMP3.16b;
+       eor X12_TMP.16b, X10.16b, X12_TMP.16b;
+       eor X13_TMP.16b, X14.16b, X13_TMP.16b;
+       st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+       ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64;
+       st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32;
+       eor VTMP0.16b, X3.16b, VTMP0.16b;
+       eor VTMP1.16b, X7.16b, VTMP1.16b;
+       eor VTMP2.16b, X11.16b, VTMP2.16b;
+       eor VTMP3.16b, X15.16b, VTMP3.16b;
+       st1 {VTMP0.16b-VTMP3.16b}, [DST], #64;
+
+       b.ne .Loop_poly4;
+
+       POLY1305_STORE_STATE()
+
+       /* clear the used vector registers and stack */
+       clear(VTMP0);
+       clear(VTMP1);
+       clear(VTMP2);
+       clear(VTMP3);
+       clear(X12_TMP);
+       clear(X13_TMP);
+       clear(X0);
+       clear(X1);
+       clear(X2);
+       clear(X3);
+       clear(X4);
+       clear(X5);
+       clear(X6);
+       clear(X7);
+       clear(X8);
+       clear(X9);
+       clear(X10);
+       clear(X11);
+       clear(X12);
+       clear(X13);
+       clear(X14);
+       clear(X15);
+
+       eor x0, x0, x0
+       POLY1305_POP_REGS()
+       ret_spec_stop
+       CFI_ENDPROC()
+ELF(.size _gcry_chacha20_poly1305_aarch64_blocks4, 
.-_gcry_chacha20_poly1305_aarch64_blocks4;)
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/chacha20-amd64-avx2.S 
b/grub-core/lib/libgcrypt/cipher/chacha20-amd64-avx2.S
new file mode 100644
index 000000000..9f2a036aa
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/chacha20-amd64-avx2.S
@@ -0,0 +1,601 @@
+/* chacha20-amd64-avx2.S  -  AVX2 implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+   (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+.text
+
+#include "asm-common-amd64.h"
+#include "asm-poly1305-amd64.h"
+
+/* register macros */
+#define INPUT %rdi
+#define DST   %rsi
+#define SRC   %rdx
+#define NBLKS %rcx
+#define ROUND %eax
+
+/* stack structure */
+#define STACK_VEC_X12 (32)
+#define STACK_VEC_X13 (32 + STACK_VEC_X12)
+#define STACK_TMP     (32 + STACK_VEC_X13)
+#define STACK_TMP1    (32 + STACK_TMP)
+
+#define STACK_MAX     (32 + STACK_TMP1)
+
+/* vector registers */
+#define X0 %ymm0
+#define X1 %ymm1
+#define X2 %ymm2
+#define X3 %ymm3
+#define X4 %ymm4
+#define X5 %ymm5
+#define X6 %ymm6
+#define X7 %ymm7
+#define X8 %ymm8
+#define X9 %ymm9
+#define X10 %ymm10
+#define X11 %ymm11
+#define X12 %ymm12
+#define X13 %ymm13
+#define X14 %ymm14
+#define X15 %ymm15
+
+#define X0h %xmm0
+#define X1h %xmm1
+#define X2h %xmm2
+#define X3h %xmm3
+#define X4h %xmm4
+#define X5h %xmm5
+#define X6h %xmm6
+#define X7h %xmm7
+#define X8h %xmm8
+#define X9h %xmm9
+#define X10h %xmm10
+#define X11h %xmm11
+#define X12h %xmm12
+#define X13h %xmm13
+#define X14h %xmm14
+#define X15h %xmm15
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
+       vpunpckhdq x1, x0, t2; \
+       vpunpckldq x1, x0, x0; \
+       \
+       vpunpckldq x3, x2, t1; \
+       vpunpckhdq x3, x2, x2; \
+       \
+       vpunpckhqdq t1, x0, x1; \
+       vpunpcklqdq t1, x0, x0; \
+       \
+       vpunpckhqdq x2, t2, x3; \
+       vpunpcklqdq x2, t2, x2;
+
+/* 2x2 128-bit matrix transpose */
+#define transpose_16byte_2x2(x0,x1,t1) \
+       vmovdqa    x0, t1; \
+       vperm2i128 $0x20, x1, x0, x0; \
+       vperm2i128 $0x31, x1, t1, x1;
+
+/* xor register with unaligned src and save to unaligned dst */
+#define xor_src_dst(dst, src, offset, xreg) \
+       vpxor offset(src), xreg, xreg; \
+       vmovdqu xreg, offset(dst);
+
+/**********************************************************************
+  8-way chacha20
+ **********************************************************************/
+
+#define ROTATE2(v1,v2,c,tmp)   \
+       vpsrld $(32 - (c)), v1, tmp;    \
+       vpslld $(c), v1, v1;            \
+       vpaddb tmp, v1, v1;             \
+       vpsrld $(32 - (c)), v2, tmp;    \
+       vpslld $(c), v2, v2;            \
+       vpaddb tmp, v2, v2;
+
+#define ROTATE_SHUF_2(v1,v2,shuf)      \
+       vpshufb shuf, v1, v1;           \
+       vpshufb shuf, v2, v2;
+
+#define XOR(ds,s) \
+       vpxor s, ds, ds;
+
+#define PLUS(ds,s) \
+       vpaddd s, ds, ds;
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,\
+                     interleave_op1,interleave_op2,\
+                     interleave_op3,interleave_op4)            \
+       vbroadcasti128 .Lshuf_rol16 rRIP, tmp1;                 \
+               interleave_op1;                                 \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2);       \
+           ROTATE_SHUF_2(d1, d2, tmp1);                        \
+               interleave_op2;                                 \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);       \
+           ROTATE2(b1, b2, 12, tmp1);                          \
+       vbroadcasti128 .Lshuf_rol8 rRIP, tmp1;                  \
+               interleave_op3;                                 \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2);       \
+           ROTATE_SHUF_2(d1, d2, tmp1);                        \
+               interleave_op4;                                 \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);       \
+           ROTATE2(b1, b2,  7, tmp1);
+
+.align 32
+chacha20_data:
+.Lshuf_rol16:
+       .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.Lshuf_rol8:
+       .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+.Linc_counter:
+       .byte 0,1,2,3,4,5,6,7
+.Lunsigned_cmp:
+       .long 0x80000000
+
+.align 8
+.globl _gcry_chacha20_amd64_avx2_blocks8
+ELF(.type _gcry_chacha20_amd64_avx2_blocks8,@function;)
+
+_gcry_chacha20_amd64_avx2_blocks8:
+       /* input:
+        *      %rdi: input
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: nblks (multiple of 8)
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       subq $STACK_MAX, %rsp;
+       andq $~31, %rsp;
+
+.Loop8:
+       mov $20, ROUND;
+
+       /* Construct counter vectors X12 and X13 */
+       vpmovzxbd .Linc_counter rRIP, X0;
+       vpbroadcastd .Lunsigned_cmp rRIP, X2;
+       vpbroadcastd (12 * 4)(INPUT), X12;
+       vpbroadcastd (13 * 4)(INPUT), X13;
+       vpaddd X0, X12, X12;
+       vpxor X2, X0, X0;
+       vpxor X2, X12, X1;
+       vpcmpgtd X1, X0, X0;
+       vpsubd X0, X13, X13;
+       vmovdqa X12, (STACK_VEC_X12)(%rsp);
+       vmovdqa X13, (STACK_VEC_X13)(%rsp);
+
+       /* Load vectors */
+       vpbroadcastd (0 * 4)(INPUT), X0;
+       vpbroadcastd (1 * 4)(INPUT), X1;
+       vpbroadcastd (2 * 4)(INPUT), X2;
+       vpbroadcastd (3 * 4)(INPUT), X3;
+       vpbroadcastd (4 * 4)(INPUT), X4;
+       vpbroadcastd (5 * 4)(INPUT), X5;
+       vpbroadcastd (6 * 4)(INPUT), X6;
+       vpbroadcastd (7 * 4)(INPUT), X7;
+       vpbroadcastd (8 * 4)(INPUT), X8;
+       vpbroadcastd (9 * 4)(INPUT), X9;
+       vpbroadcastd (10 * 4)(INPUT), X10;
+       vpbroadcastd (11 * 4)(INPUT), X11;
+       vpbroadcastd (14 * 4)(INPUT), X14;
+       vpbroadcastd (15 * 4)(INPUT), X15;
+       vmovdqa X15, (STACK_TMP)(%rsp);
+
+.Lround2:
+       QUARTERROUND2(X0, X4,  X8, X12,   X1, X5,  X9, X13, tmp:=,X15,,,,)
+       vmovdqa (STACK_TMP)(%rsp), X15;
+       vmovdqa X8, (STACK_TMP)(%rsp);
+       QUARTERROUND2(X2, X6, X10, X14,   X3, X7, X11, X15, tmp:=,X8,,,,)
+       QUARTERROUND2(X0, X5, X10, X15,   X1, X6, X11, X12, tmp:=,X8,,,,)
+       vmovdqa (STACK_TMP)(%rsp), X8;
+       vmovdqa X15, (STACK_TMP)(%rsp);
+       QUARTERROUND2(X2, X7,  X8, X13,   X3, X4,  X9, X14, tmp:=,X15,,,,)
+       sub $2, ROUND;
+       jnz .Lround2;
+
+       vmovdqa X8, (STACK_TMP1)(%rsp);
+
+       /* tmp := X15 */
+       vpbroadcastd (0 * 4)(INPUT), X15;
+       PLUS(X0, X15);
+       vpbroadcastd (1 * 4)(INPUT), X15;
+       PLUS(X1, X15);
+       vpbroadcastd (2 * 4)(INPUT), X15;
+       PLUS(X2, X15);
+       vpbroadcastd (3 * 4)(INPUT), X15;
+       PLUS(X3, X15);
+       vpbroadcastd (4 * 4)(INPUT), X15;
+       PLUS(X4, X15);
+       vpbroadcastd (5 * 4)(INPUT), X15;
+       PLUS(X5, X15);
+       vpbroadcastd (6 * 4)(INPUT), X15;
+       PLUS(X6, X15);
+       vpbroadcastd (7 * 4)(INPUT), X15;
+       PLUS(X7, X15);
+       transpose_4x4(X0, X1, X2, X3, X8, X15);
+       transpose_4x4(X4, X5, X6, X7, X8, X15);
+       vmovdqa (STACK_TMP1)(%rsp), X8;
+       transpose_16byte_2x2(X0, X4, X15);
+       transpose_16byte_2x2(X1, X5, X15);
+       transpose_16byte_2x2(X2, X6, X15);
+       transpose_16byte_2x2(X3, X7, X15);
+       vmovdqa (STACK_TMP)(%rsp), X15;
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1);
+       vpbroadcastd (8 * 4)(INPUT), X0;
+       PLUS(X8, X0);
+       vpbroadcastd (9 * 4)(INPUT), X0;
+       PLUS(X9, X0);
+       vpbroadcastd (10 * 4)(INPUT), X0;
+       PLUS(X10, X0);
+       vpbroadcastd (11 * 4)(INPUT), X0;
+       PLUS(X11, X0);
+       vmovdqa (STACK_VEC_X12)(%rsp), X0;
+       PLUS(X12, X0);
+       vmovdqa (STACK_VEC_X13)(%rsp), X0;
+       PLUS(X13, X0);
+       vpbroadcastd (14 * 4)(INPUT), X0;
+       PLUS(X14, X0);
+       vpbroadcastd (15 * 4)(INPUT), X0;
+       PLUS(X15, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3);
+
+       /* Update counter */
+       addq $8, (12 * 4)(INPUT);
+
+       transpose_4x4(X8, X9, X10, X11, X0, X1);
+       transpose_4x4(X12, X13, X14, X15, X0, X1);
+       xor_src_dst(DST, SRC, (64 * 4 + 16 * 0), X4);
+       xor_src_dst(DST, SRC, (64 * 5 + 16 * 0), X5);
+       transpose_16byte_2x2(X8, X12, X0);
+       transpose_16byte_2x2(X9, X13, X0);
+       transpose_16byte_2x2(X10, X14, X0);
+       transpose_16byte_2x2(X11, X15, X0);
+       xor_src_dst(DST, SRC, (64 * 6 + 16 * 0), X6);
+       xor_src_dst(DST, SRC, (64 * 7 + 16 * 0), X7);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11);
+       xor_src_dst(DST, SRC, (64 * 4 + 16 * 2), X12);
+       xor_src_dst(DST, SRC, (64 * 5 + 16 * 2), X13);
+       xor_src_dst(DST, SRC, (64 * 6 + 16 * 2), X14);
+       xor_src_dst(DST, SRC, (64 * 7 + 16 * 2), X15);
+
+       sub $8, NBLKS;
+       lea (8 * 64)(DST), DST;
+       lea (8 * 64)(SRC), SRC;
+       jnz .Loop8;
+
+       /* clear the used vector registers and stack */
+       vpxor X0, X0, X0;
+       vmovdqa X0, (STACK_VEC_X12)(%rsp);
+       vmovdqa X0, (STACK_VEC_X13)(%rsp);
+       vmovdqa X0, (STACK_TMP)(%rsp);
+       vmovdqa X0, (STACK_TMP1)(%rsp);
+       vzeroall;
+
+       /* eax zeroed by round loop. */
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_amd64_avx2_blocks8,
+         .-_gcry_chacha20_amd64_avx2_blocks8;)
+
+/**********************************************************************
+  8-way stitched chacha20-poly1305
+ **********************************************************************/
+
+#define _ /*_*/
+
+.align 8
+.globl _gcry_chacha20_poly1305_amd64_avx2_blocks8
+ELF(.type _gcry_chacha20_poly1305_amd64_avx2_blocks8,@function;)
+
+_gcry_chacha20_poly1305_amd64_avx2_blocks8:
+       /* input:
+        *      %rdi: input
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: nblks (multiple of 8)
+        *      %r9: poly1305-state
+        *      %r8: poly1305-src
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       vzeroupper;
+
+       subq $(9 * 8) + STACK_MAX + 32, %rsp;
+       andq $~31, %rsp;
+
+       movq %rbx, (STACK_MAX + 0 * 8)(%rsp);
+       movq %r12, (STACK_MAX + 1 * 8)(%rsp);
+       movq %r13, (STACK_MAX + 2 * 8)(%rsp);
+       movq %r14, (STACK_MAX + 3 * 8)(%rsp);
+       movq %r15, (STACK_MAX + 4 * 8)(%rsp);
+       CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8);
+       CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8);
+       CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8);
+       CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8);
+       CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8);
+
+       movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC
+       movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST
+       movq %rcx, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+       /* Load state */
+       POLY1305_LOAD_STATE();
+
+.Loop_poly8:
+
+       /* Construct counter vectors X12 and X13 */
+       vpmovzxbd .Linc_counter rRIP, X0;
+       vpbroadcastd .Lunsigned_cmp rRIP, X2;
+       vpbroadcastd (12 * 4)(INPUT), X12;
+       vpbroadcastd (13 * 4)(INPUT), X13;
+       vpaddd X0, X12, X12;
+       vpxor X2, X0, X0;
+       vpxor X2, X12, X1;
+       vpcmpgtd X1, X0, X0;
+       vpsubd X0, X13, X13;
+       vmovdqa X12, (STACK_VEC_X12)(%rsp);
+       vmovdqa X13, (STACK_VEC_X13)(%rsp);
+
+       /* Load vectors */
+       vpbroadcastd (0 * 4)(INPUT), X0;
+       vpbroadcastd (1 * 4)(INPUT), X1;
+       vpbroadcastd (2 * 4)(INPUT), X2;
+       vpbroadcastd (3 * 4)(INPUT), X3;
+       vpbroadcastd (4 * 4)(INPUT), X4;
+       vpbroadcastd (5 * 4)(INPUT), X5;
+       vpbroadcastd (6 * 4)(INPUT), X6;
+       vpbroadcastd (7 * 4)(INPUT), X7;
+       vpbroadcastd (8 * 4)(INPUT), X8;
+       vpbroadcastd (9 * 4)(INPUT), X9;
+       vpbroadcastd (10 * 4)(INPUT), X10;
+       vpbroadcastd (11 * 4)(INPUT), X11;
+       vpbroadcastd (14 * 4)(INPUT), X14;
+       vpbroadcastd (15 * 4)(INPUT), X15;
+       vmovdqa X15, (STACK_TMP)(%rsp);
+
+       /* Process eight ChaCha20 blocks and 32 Poly1305 blocks. */
+
+       movl $20, (STACK_MAX + 8 * 8 + 4)(%rsp);
+.Lround8_with_poly1305_outer:
+       movl $6, (STACK_MAX + 8 * 8)(%rsp);
+.Lround8_with_poly1305_inner1:
+       /* rounds 0-5 & 10-15 */
+                     POLY1305_BLOCK_PART1(0 * 16)
+       QUARTERROUND2(X0, X4,  X8, X12,   X1, X5,  X9, X13, tmp:=,X15,
+                     POLY1305_BLOCK_PART2(),
+                     POLY1305_BLOCK_PART3(),
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5())
+       vmovdqa (STACK_TMP)(%rsp), X15;
+       vmovdqa X8, (STACK_TMP)(%rsp);
+                     POLY1305_BLOCK_PART1(1 * 16)
+       QUARTERROUND2(X2, X6, X10, X14,   X3, X7, X11, X15, tmp:=,X8,
+                     POLY1305_BLOCK_PART2(),
+                     POLY1305_BLOCK_PART3(),
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5())
+                     POLY1305_BLOCK_PART1(2 * 16)
+       QUARTERROUND2(X0, X5, X10, X15,   X1, X6, X11, X12, tmp:=,X8,
+                     POLY1305_BLOCK_PART2(),
+                     POLY1305_BLOCK_PART3(),
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5())
+       vmovdqa (STACK_TMP)(%rsp), X8;
+       vmovdqa X15, (STACK_TMP)(%rsp);
+                     POLY1305_BLOCK_PART1(3 * 16)
+                     lea (4 * 16)(POLY_RSRC), POLY_RSRC;
+       QUARTERROUND2(X2, X7,  X8, X13,   X3, X4,  X9, X14, tmp:=,X15,
+                     POLY1305_BLOCK_PART2(),
+                     POLY1305_BLOCK_PART3(),
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5())
+
+       subl $2, (STACK_MAX + 8 * 8)(%rsp);
+       jnz .Lround8_with_poly1305_inner1;
+
+       movl $4, (STACK_MAX + 8 * 8)(%rsp);
+.Lround8_with_poly1305_inner2:
+       /* rounds 6-9 & 16-19 */
+                     POLY1305_BLOCK_PART1(0 * 16)
+       QUARTERROUND2(X0, X4,  X8, X12,   X1, X5,  X9, X13, tmp:=,X15,
+                     POLY1305_BLOCK_PART2(),
+                     _,
+                     POLY1305_BLOCK_PART3(),
+                     _)
+       vmovdqa (STACK_TMP)(%rsp), X15;
+       vmovdqa X8, (STACK_TMP)(%rsp);
+       QUARTERROUND2(X2, X6, X10, X14,   X3, X7, X11, X15, tmp:=,X8,
+                     _,
+                     POLY1305_BLOCK_PART4(),
+                     _,
+                     POLY1305_BLOCK_PART5())
+                     POLY1305_BLOCK_PART1(1 * 16);
+                     lea (2 * 16)(POLY_RSRC), POLY_RSRC;
+       QUARTERROUND2(X0, X5, X10, X15,   X1, X6, X11, X12, tmp:=,X8,
+                     _,
+                     POLY1305_BLOCK_PART2(),
+                     _,
+                     POLY1305_BLOCK_PART3())
+       vmovdqa (STACK_TMP)(%rsp), X8;
+       vmovdqa X15, (STACK_TMP)(%rsp);
+       QUARTERROUND2(X2, X7,  X8, X13,   X3, X4,  X9, X14, tmp:=,X15,
+                     POLY1305_BLOCK_PART4(),
+                     _,
+                     POLY1305_BLOCK_PART5(),
+                     _)
+
+       subl $2, (STACK_MAX + 8 * 8)(%rsp);
+       jnz .Lround8_with_poly1305_inner2;
+
+       subl $10, (STACK_MAX + 8 * 8 + 4)(%rsp);
+       jnz .Lround8_with_poly1305_outer;
+
+       movq (STACK_MAX + 5 * 8)(%rsp), SRC;
+       movq (STACK_MAX + 6 * 8)(%rsp), DST;
+
+       vmovdqa X8, (STACK_TMP1)(%rsp);
+
+       /* tmp := X15 */
+       vpbroadcastd (0 * 4)(INPUT), X15;
+       PLUS(X0, X15);
+       vpbroadcastd (1 * 4)(INPUT), X15;
+       PLUS(X1, X15);
+       vpbroadcastd (2 * 4)(INPUT), X15;
+       PLUS(X2, X15);
+       vpbroadcastd (3 * 4)(INPUT), X15;
+       PLUS(X3, X15);
+       vpbroadcastd (4 * 4)(INPUT), X15;
+       PLUS(X4, X15);
+       vpbroadcastd (5 * 4)(INPUT), X15;
+       PLUS(X5, X15);
+       vpbroadcastd (6 * 4)(INPUT), X15;
+       PLUS(X6, X15);
+       vpbroadcastd (7 * 4)(INPUT), X15;
+       PLUS(X7, X15);
+       transpose_4x4(X0, X1, X2, X3, X8, X15);
+       transpose_4x4(X4, X5, X6, X7, X8, X15);
+       vmovdqa (STACK_TMP1)(%rsp), X8;
+       transpose_16byte_2x2(X0, X4, X15);
+       transpose_16byte_2x2(X1, X5, X15);
+       transpose_16byte_2x2(X2, X6, X15);
+       transpose_16byte_2x2(X3, X7, X15);
+       vmovdqa (STACK_TMP)(%rsp), X15;
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1);
+       vpbroadcastd (8 * 4)(INPUT), X0;
+       PLUS(X8, X0);
+       vpbroadcastd (9 * 4)(INPUT), X0;
+       PLUS(X9, X0);
+       vpbroadcastd (10 * 4)(INPUT), X0;
+       PLUS(X10, X0);
+       vpbroadcastd (11 * 4)(INPUT), X0;
+       PLUS(X11, X0);
+       vmovdqa (STACK_VEC_X12)(%rsp), X0;
+       PLUS(X12, X0);
+       vmovdqa (STACK_VEC_X13)(%rsp), X0;
+       PLUS(X13, X0);
+       vpbroadcastd (14 * 4)(INPUT), X0;
+       PLUS(X14, X0);
+       vpbroadcastd (15 * 4)(INPUT), X0;
+       PLUS(X15, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3);
+
+       /* Update counter */
+       addq $8, (12 * 4)(INPUT);
+
+       transpose_4x4(X8, X9, X10, X11, X0, X1);
+       transpose_4x4(X12, X13, X14, X15, X0, X1);
+       xor_src_dst(DST, SRC, (64 * 4 + 16 * 0), X4);
+       xor_src_dst(DST, SRC, (64 * 5 + 16 * 0), X5);
+       transpose_16byte_2x2(X8, X12, X0);
+       transpose_16byte_2x2(X9, X13, X0);
+       transpose_16byte_2x2(X10, X14, X0);
+       transpose_16byte_2x2(X11, X15, X0);
+       xor_src_dst(DST, SRC, (64 * 6 + 16 * 0), X6);
+       xor_src_dst(DST, SRC, (64 * 7 + 16 * 0), X7);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11);
+       xor_src_dst(DST, SRC, (64 * 4 + 16 * 2), X12);
+       xor_src_dst(DST, SRC, (64 * 5 + 16 * 2), X13);
+       xor_src_dst(DST, SRC, (64 * 6 + 16 * 2), X14);
+       xor_src_dst(DST, SRC, (64 * 7 + 16 * 2), X15);
+
+       subq $8, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+       lea (8 * 64)(DST), DST;
+       lea (8 * 64)(SRC), SRC;
+       movq SRC, (STACK_MAX + 5 * 8)(%rsp);
+       movq DST, (STACK_MAX + 6 * 8)(%rsp);
+
+       jnz .Loop_poly8;
+
+       /* Store state */
+       POLY1305_STORE_STATE();
+
+       /* clear the used vector registers and stack */
+       vpxor X0, X0, X0;
+       vmovdqa X0, (STACK_VEC_X12)(%rsp);
+       vmovdqa X0, (STACK_VEC_X13)(%rsp);
+       vmovdqa X0, (STACK_TMP)(%rsp);
+       vmovdqa X0, (STACK_TMP1)(%rsp);
+       vzeroall;
+
+       movq (STACK_MAX + 0 * 8)(%rsp), %rbx;
+       movq (STACK_MAX + 1 * 8)(%rsp), %r12;
+       movq (STACK_MAX + 2 * 8)(%rsp), %r13;
+       movq (STACK_MAX + 3 * 8)(%rsp), %r14;
+       movq (STACK_MAX + 4 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+
+       xorl %eax, %eax;
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_amd64_avx2_blocks8,
+         .-_gcry_chacha20_poly1305_amd64_avx2_blocks8;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/chacha20-amd64-ssse3.S 
b/grub-core/lib/libgcrypt/cipher/chacha20-amd64-ssse3.S
new file mode 100644
index 000000000..6c7379787
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/chacha20-amd64-ssse3.S
@@ -0,0 +1,1012 @@
+/* chacha20-amd64-ssse3.S  -  SSSE3 implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+   (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+.text
+
+#include "asm-common-amd64.h"
+#include "asm-poly1305-amd64.h"
+
+/* register macros */
+#define INPUT %rdi
+#define DST   %rsi
+#define SRC   %rdx
+#define NBLKS %rcx
+#define ROUND %eax
+
+/* stack structure */
+#define STACK_VEC_X12 (16)
+#define STACK_VEC_X13 (16 + STACK_VEC_X12)
+#define STACK_TMP     (16 + STACK_VEC_X13)
+#define STACK_TMP1    (16 + STACK_TMP)
+#define STACK_TMP2    (16 + STACK_TMP1)
+
+#define STACK_MAX     (16 + STACK_TMP2)
+
+/* vector registers */
+#define X0 %xmm0
+#define X1 %xmm1
+#define X2 %xmm2
+#define X3 %xmm3
+#define X4 %xmm4
+#define X5 %xmm5
+#define X6 %xmm6
+#define X7 %xmm7
+#define X8 %xmm8
+#define X9 %xmm9
+#define X10 %xmm10
+#define X11 %xmm11
+#define X12 %xmm12
+#define X13 %xmm13
+#define X14 %xmm14
+#define X15 %xmm15
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+       movdqa    x0, t2; \
+       punpckhdq x1, t2; \
+       punpckldq x1, x0; \
+       \
+       movdqa    x2, t1; \
+       punpckldq x3, t1; \
+       punpckhdq x3, x2; \
+       \
+       movdqa     x0, x1; \
+       punpckhqdq t1, x1; \
+       punpcklqdq t1, x0; \
+       \
+       movdqa     t2, x3; \
+       punpckhqdq x2, x3; \
+       punpcklqdq x2, t2; \
+       movdqa     t2, x2;
+
+/* fill xmm register with 32-bit value from memory */
+#define pbroadcastd(mem32, xreg) \
+       movd mem32, xreg; \
+       pshufd $0, xreg, xreg;
+
+/* xor with unaligned memory operand */
+#define pxor_u(umem128, xreg, t) \
+       movdqu umem128, t; \
+       pxor t, xreg;
+
+/* xor register with unaligned src and save to unaligned dst */
+#define xor_src_dst(dst, src, offset, xreg, t) \
+       pxor_u(offset(src), xreg, t); \
+       movdqu xreg, offset(dst);
+
+#define clear(x) pxor x,x;
+
+/**********************************************************************
+  4-way chacha20
+ **********************************************************************/
+
+#define ROTATE2(v1,v2,c,tmp1,tmp2)     \
+       movdqa v1, tmp1;                \
+       movdqa v2, tmp2;                \
+       psrld $(32 - (c)), v1;          \
+       pslld $(c), tmp1;               \
+       paddb tmp1, v1;                 \
+       psrld $(32 - (c)), v2;          \
+       pslld $(c), tmp2;               \
+       paddb tmp2, v2;
+
+#define ROTATE_SHUF_2(v1,v2,shuf)      \
+       pshufb shuf, v1;                \
+       pshufb shuf, v2;
+
+#define XOR(ds,s) \
+       pxor s, ds;
+
+#define PLUS(ds,s) \
+       paddd s, ds;
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,tmp2,\
+                     interleave_op1,interleave_op2)            \
+       movdqa .Lshuf_rol16 rRIP, tmp1;                         \
+               interleave_op1;                                 \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2);       \
+           ROTATE_SHUF_2(d1, d2, tmp1);                        \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);       \
+           ROTATE2(b1, b2, 12, tmp1, tmp2);                    \
+       movdqa .Lshuf_rol8 rRIP, tmp1;                          \
+               interleave_op2;                                 \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2);       \
+           ROTATE_SHUF_2(d1, d2, tmp1);                        \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);       \
+           ROTATE2(b1, b2,  7, tmp1, tmp2);
+
+chacha20_data:
+.align 16
+.Lshuf_rol16:
+       .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+.Lshuf_rol8:
+       .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+.Lcounter1:
+       .long 1,0,0,0
+.Linc_counter:
+       .long 0,1,2,3
+.Lunsigned_cmp:
+       .long 0x80000000,0x80000000,0x80000000,0x80000000
+
+.align 8
+.globl _gcry_chacha20_amd64_ssse3_blocks4
+ELF(.type _gcry_chacha20_amd64_ssse3_blocks4,@function;)
+
+_gcry_chacha20_amd64_ssse3_blocks4:
+       /* input:
+        *      %rdi: input
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: nblks (multiple of 4)
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       subq $STACK_MAX, %rsp;
+       andq $~15, %rsp;
+
+.Loop4:
+       mov $20, ROUND;
+
+       /* Construct counter vectors X12 and X13 */
+       movdqa .Linc_counter rRIP, X0;
+       movdqa .Lunsigned_cmp rRIP, X2;
+       pbroadcastd((12 * 4)(INPUT), X12);
+       pbroadcastd((13 * 4)(INPUT), X13);
+       paddd X0, X12;
+       movdqa X12, X1;
+       pxor X2, X0;
+       pxor X2, X1;
+       pcmpgtd X1, X0;
+       psubd X0, X13;
+       movdqa X12, (STACK_VEC_X12)(%rsp);
+       movdqa X13, (STACK_VEC_X13)(%rsp);
+
+       /* Load vectors */
+       pbroadcastd((0 * 4)(INPUT), X0);
+       pbroadcastd((1 * 4)(INPUT), X1);
+       pbroadcastd((2 * 4)(INPUT), X2);
+       pbroadcastd((3 * 4)(INPUT), X3);
+       pbroadcastd((4 * 4)(INPUT), X4);
+       pbroadcastd((5 * 4)(INPUT), X5);
+       pbroadcastd((6 * 4)(INPUT), X6);
+       pbroadcastd((7 * 4)(INPUT), X7);
+       pbroadcastd((8 * 4)(INPUT), X8);
+       pbroadcastd((9 * 4)(INPUT), X9);
+       pbroadcastd((10 * 4)(INPUT), X10);
+       pbroadcastd((11 * 4)(INPUT), X11);
+       pbroadcastd((14 * 4)(INPUT), X14);
+       pbroadcastd((15 * 4)(INPUT), X15);
+       movdqa X11, (STACK_TMP)(%rsp);
+       movdqa X15, (STACK_TMP1)(%rsp);
+
+.Lround2_4:
+       QUARTERROUND2(X0, X4,  X8, X12,   X1, X5,  X9, X13, tmp:=,X11,X15,,)
+       movdqa (STACK_TMP)(%rsp), X11;
+       movdqa (STACK_TMP1)(%rsp), X15;
+       movdqa X8, (STACK_TMP)(%rsp);
+       movdqa X9, (STACK_TMP1)(%rsp);
+       QUARTERROUND2(X2, X6, X10, X14,   X3, X7, X11, X15, tmp:=,X8,X9,,)
+       QUARTERROUND2(X0, X5, X10, X15,   X1, X6, X11, X12, tmp:=,X8,X9,,)
+       movdqa (STACK_TMP)(%rsp), X8;
+       movdqa (STACK_TMP1)(%rsp), X9;
+       movdqa X11, (STACK_TMP)(%rsp);
+       movdqa X15, (STACK_TMP1)(%rsp);
+       QUARTERROUND2(X2, X7,  X8, X13,   X3, X4,  X9, X14, tmp:=,X11,X15,,)
+       sub $2, ROUND;
+       jnz .Lround2_4;
+
+       /* tmp := X15 */
+       movdqa (STACK_TMP)(%rsp), X11;
+       pbroadcastd((0 * 4)(INPUT), X15);
+       PLUS(X0, X15);
+       pbroadcastd((1 * 4)(INPUT), X15);
+       PLUS(X1, X15);
+       pbroadcastd((2 * 4)(INPUT), X15);
+       PLUS(X2, X15);
+       pbroadcastd((3 * 4)(INPUT), X15);
+       PLUS(X3, X15);
+       pbroadcastd((4 * 4)(INPUT), X15);
+       PLUS(X4, X15);
+       pbroadcastd((5 * 4)(INPUT), X15);
+       PLUS(X5, X15);
+       pbroadcastd((6 * 4)(INPUT), X15);
+       PLUS(X6, X15);
+       pbroadcastd((7 * 4)(INPUT), X15);
+       PLUS(X7, X15);
+       pbroadcastd((8 * 4)(INPUT), X15);
+       PLUS(X8, X15);
+       pbroadcastd((9 * 4)(INPUT), X15);
+       PLUS(X9, X15);
+       pbroadcastd((10 * 4)(INPUT), X15);
+       PLUS(X10, X15);
+       pbroadcastd((11 * 4)(INPUT), X15);
+       PLUS(X11, X15);
+       movdqa (STACK_VEC_X12)(%rsp), X15;
+       PLUS(X12, X15);
+       movdqa (STACK_VEC_X13)(%rsp), X15;
+       PLUS(X13, X15);
+       movdqa X13, (STACK_TMP)(%rsp);
+       pbroadcastd((14 * 4)(INPUT), X15);
+       PLUS(X14, X15);
+       movdqa (STACK_TMP1)(%rsp), X15;
+       movdqa X14, (STACK_TMP1)(%rsp);
+       pbroadcastd((15 * 4)(INPUT), X13);
+       PLUS(X15, X13);
+       movdqa X15, (STACK_TMP2)(%rsp);
+
+       /* Update counter */
+       addq $4, (12 * 4)(INPUT);
+
+       transpose_4x4(X0, X1, X2, X3, X13, X14, X15);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0, X15);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1, X15);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2, X15);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3, X15);
+       transpose_4x4(X4, X5, X6, X7, X0, X1, X2);
+       movdqa (STACK_TMP)(%rsp), X13;
+       movdqa (STACK_TMP1)(%rsp), X14;
+       movdqa (STACK_TMP2)(%rsp), X15;
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 1), X4, X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 1), X5, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 1), X6, X0);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 1), X7, X0);
+       transpose_4x4(X8, X9, X10, X11, X0, X1, X2);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8, X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10, X0);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11, X0);
+       transpose_4x4(X12, X13, X14, X15, X0, X1, X2);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 3), X12, X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 3), X13, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 3), X14, X0);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 3), X15, X0);
+
+       sub $4, NBLKS;
+       lea (4 * 64)(DST), DST;
+       lea (4 * 64)(SRC), SRC;
+       jnz .Loop4;
+
+       /* clear the used vector registers and stack */
+       clear(X0);
+       movdqa X0, (STACK_VEC_X12)(%rsp);
+       movdqa X0, (STACK_VEC_X13)(%rsp);
+       movdqa X0, (STACK_TMP)(%rsp);
+       movdqa X0, (STACK_TMP1)(%rsp);
+       movdqa X0, (STACK_TMP2)(%rsp);
+       clear(X1);
+       clear(X2);
+       clear(X3);
+       clear(X4);
+       clear(X5);
+       clear(X6);
+       clear(X7);
+       clear(X8);
+       clear(X9);
+       clear(X10);
+       clear(X11);
+       clear(X12);
+       clear(X13);
+       clear(X14);
+       clear(X15);
+
+       /* eax zeroed by round loop. */
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_amd64_ssse3_blocks4,
+         .-_gcry_chacha20_amd64_ssse3_blocks4;)
+
+/**********************************************************************
+  2-way && 1-way chacha20
+ **********************************************************************/
+
+#define ROTATE_SHUF(v1,shuf)           \
+       pshufb shuf, v1;
+
+#define ROTATE(v1,c,tmp1)              \
+       movdqa v1, tmp1;                \
+       psrld $(32 - (c)), v1;          \
+       pslld $(c), tmp1;               \
+       paddb tmp1, v1;
+
+#define WORD_SHUF(v1,shuf)             \
+       pshufd $shuf, v1, v1;
+
+#define QUARTERROUND4(x0,x1,x2,x3,shuf_rol8,shuf_rol16,tmp1,shuf_x1,\
+                     shuf_x2,shuf_x3) \
+       PLUS(x0, x1); XOR(x3, x0); ROTATE_SHUF(x3, shuf_rol16); \
+       PLUS(x2, x3); XOR(x1, x2); ROTATE(x1, 12, tmp1); \
+       PLUS(x0, x1); XOR(x3, x0); ROTATE_SHUF(x3, shuf_rol8); \
+       PLUS(x2, x3); \
+         WORD_SHUF(x3, shuf_x3); \
+                     XOR(x1, x2); \
+         WORD_SHUF(x2, shuf_x2); \
+                                  ROTATE(x1, 7, tmp1); \
+         WORD_SHUF(x1, shuf_x1);
+
+.align 8
+.globl _gcry_chacha20_amd64_ssse3_blocks1
+ELF(.type _gcry_chacha20_amd64_ssse3_blocks1,@function;)
+
+_gcry_chacha20_amd64_ssse3_blocks1:
+       /* input:
+        *      %rdi: input
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: nblks
+        */
+       CFI_STARTPROC();
+
+       /* Load constants */
+       movdqa .Lcounter1 rRIP, X4;
+       movdqa .Lshuf_rol8 rRIP, X5;
+       movdqa .Lshuf_rol16 rRIP, X6;
+
+       /* Load state */
+       movdqu (0 * 4)(INPUT), X10;
+       movdqu (4 * 4)(INPUT), X11;
+       movdqu (8 * 4)(INPUT), X12;
+       movdqu (12 * 4)(INPUT), X13;
+
+       cmp $2, NBLKS;
+       jb .Loop1;
+
+       mov $20, ROUND;
+
+       movdqa X10, X0;
+       movdqa X11, X1;
+       movdqa X12, X2;
+       movdqa X13, X3;
+
+       movdqa X10, X8;
+       movdqa X11, X9;
+       movdqa X12, X14;
+       movdqa X13, X15;
+       paddq X4, X15;
+
+.Lround2_2:
+       QUARTERROUND4(X0, X1, X2,  X3,  X5, X6, X7, 0x39, 0x4e, 0x93);
+       QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93);
+       QUARTERROUND4(X0, X1, X2,  X3,  X5, X6, X7, 0x93, 0x4e, 0x39);
+       QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39);
+       sub $2, ROUND;
+       jnz .Lround2_2;
+
+       PLUS(X0, X10);
+       PLUS(X1, X11);
+       PLUS(X2, X12);
+       PLUS(X3, X13);
+
+       /* Update counter */
+       paddq X4, X13;
+
+       PLUS(X8, X10);
+       PLUS(X9, X11);
+       PLUS(X14, X12);
+       PLUS(X15, X13);
+
+       /* Update counter */
+       paddq X4, X13;
+
+       xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+       xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+       xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+       xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+       xor_src_dst(DST, SRC, 16 * 4, X8, X7);
+       xor_src_dst(DST, SRC, 20 * 4, X9, X7);
+       xor_src_dst(DST, SRC, 24 * 4, X14, X7);
+       xor_src_dst(DST, SRC, 28 * 4, X15, X7);
+
+       lea (2 * 64)(DST), DST;
+       lea (2 * 64)(SRC), SRC;
+
+       clear(X8);
+       clear(X9);
+       clear(X14);
+       clear(X15);
+
+       sub $2, NBLKS;
+       jz .Ldone1;
+
+.Loop1:
+       mov $20, ROUND;
+
+       movdqa X10, X0;
+       movdqa X11, X1;
+       movdqa X12, X2;
+       movdqa X13, X3;
+
+.Lround2_1:
+       QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+       QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+       sub $2, ROUND;
+       jnz .Lround2_1;
+
+       PLUS(X0, X10);
+       PLUS(X1, X11);
+       PLUS(X2, X12);
+       PLUS(X3, X13);
+
+       /* Update counter */
+       paddq X4, X13;
+
+       xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+       xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+       xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+       xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+
+       lea (64)(DST), DST;
+       lea (64)(SRC), SRC;
+
+       sub $1, NBLKS;
+       jnz .Loop1;
+
+.Ldone1:
+       /* Store counter */
+       movdqu X13, (12 * 4)(INPUT);
+
+       /* clear the used vector registers */
+       clear(X0);
+       clear(X1);
+       clear(X2);
+       clear(X3);
+       clear(X4);
+       clear(X5);
+       clear(X6);
+       clear(X7);
+       clear(X10);
+       clear(X11);
+       clear(X12);
+       clear(X13);
+
+       /* eax zeroed by round loop. */
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_amd64_ssse3_blocks1,
+         .-_gcry_chacha20_amd64_ssse3_blocks1;)
+
+/**********************************************************************
+  4-way stitched chacha20-poly1305
+ **********************************************************************/
+
+#define _ /*_*/
+
+.align 8
+.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks4
+ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks4,@function;)
+
+_gcry_chacha20_poly1305_amd64_ssse3_blocks4:
+       /* input:
+        *      %rdi: input
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: nblks (multiple of 4)
+        *      %r9: poly1305-state
+        *      %r8: poly1305-src
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       subq $(9 * 8) + STACK_MAX + 16, %rsp;
+       andq $~15, %rsp;
+
+       movq %rbx, (STACK_MAX + 0 * 8)(%rsp);
+       movq %r12, (STACK_MAX + 1 * 8)(%rsp);
+       movq %r13, (STACK_MAX + 2 * 8)(%rsp);
+       movq %r14, (STACK_MAX + 3 * 8)(%rsp);
+       movq %r15, (STACK_MAX + 4 * 8)(%rsp);
+       CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8);
+       CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8);
+       CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8);
+       CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8);
+       CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8);
+
+       movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC
+       movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST
+       movq %rcx, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+       /* Load state */
+       POLY1305_LOAD_STATE();
+
+.Loop_poly4:
+
+       /* Construct counter vectors X12 and X13 */
+       movdqa .Linc_counter rRIP, X0;
+       movdqa .Lunsigned_cmp rRIP, X2;
+       pbroadcastd((12 * 4)(INPUT), X12);
+       pbroadcastd((13 * 4)(INPUT), X13);
+       paddd X0, X12;
+       movdqa X12, X1;
+       pxor X2, X0;
+       pxor X2, X1;
+       pcmpgtd X1, X0;
+       psubd X0, X13;
+       movdqa X12, (STACK_VEC_X12)(%rsp);
+       movdqa X13, (STACK_VEC_X13)(%rsp);
+
+       /* Load vectors */
+       pbroadcastd((0 * 4)(INPUT), X0);
+       pbroadcastd((1 * 4)(INPUT), X1);
+       pbroadcastd((2 * 4)(INPUT), X2);
+       pbroadcastd((3 * 4)(INPUT), X3);
+       pbroadcastd((4 * 4)(INPUT), X4);
+       pbroadcastd((5 * 4)(INPUT), X5);
+       pbroadcastd((6 * 4)(INPUT), X6);
+       pbroadcastd((7 * 4)(INPUT), X7);
+       pbroadcastd((8 * 4)(INPUT), X8);
+       pbroadcastd((9 * 4)(INPUT), X9);
+       pbroadcastd((10 * 4)(INPUT), X10);
+       pbroadcastd((11 * 4)(INPUT), X11);
+       pbroadcastd((14 * 4)(INPUT), X14);
+       pbroadcastd((15 * 4)(INPUT), X15);
+       movdqa X11, (STACK_TMP)(%rsp);
+       movdqa X15, (STACK_TMP1)(%rsp);
+
+       /* Process four ChaCha20 blocks and sixteen Poly1305 blocks. */
+
+       movl $20, (STACK_MAX + 8 * 8 + 4)(%rsp);
+.Lround4_with_poly1305_outer:
+       movl $6, (STACK_MAX + 8 * 8)(%rsp);
+.Lround4_with_poly1305_inner1:
+       /* rounds 0-5 & 10-15 */
+                     POLY1305_BLOCK_PART1(0 * 16)
+       QUARTERROUND2(X0, X4,  X8, X12,   X1, X5,  X9, X13, tmp:=,X11,X15,
+                     POLY1305_BLOCK_PART2(),
+                     POLY1305_BLOCK_PART3())
+       movdqa (STACK_TMP)(%rsp), X11;
+       movdqa (STACK_TMP1)(%rsp), X15;
+       movdqa X8, (STACK_TMP)(%rsp);
+       movdqa X9, (STACK_TMP1)(%rsp);
+       QUARTERROUND2(X2, X6, X10, X14,   X3, X7, X11, X15, tmp:=,X8,X9,
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5())
+                     POLY1305_BLOCK_PART1(1 * 16)
+                     lea (2 * 16)(POLY_RSRC), POLY_RSRC;
+       QUARTERROUND2(X0, X5, X10, X15,   X1, X6, X11, X12, tmp:=,X8,X9,
+                     POLY1305_BLOCK_PART2(),
+                     POLY1305_BLOCK_PART3())
+       movdqa (STACK_TMP)(%rsp), X8;
+       movdqa (STACK_TMP1)(%rsp), X9;
+       movdqa X11, (STACK_TMP)(%rsp);
+       movdqa X15, (STACK_TMP1)(%rsp);
+       QUARTERROUND2(X2, X7,  X8, X13,   X3, X4,  X9, X14, tmp:=,X11,X15,
+                     POLY1305_BLOCK_PART4(),
+                     POLY1305_BLOCK_PART5())
+
+       subl $2, (STACK_MAX + 8 * 8)(%rsp);
+       jnz .Lround4_with_poly1305_inner1;
+
+       movl $4, (STACK_MAX + 8 * 8)(%rsp);
+.Lround4_with_poly1305_inner2:
+       /* rounds 6-9 & 16-19 */
+                     POLY1305_BLOCK_PART1(0 * 16)
+                     lea (1 * 16)(POLY_RSRC), POLY_RSRC;
+       QUARTERROUND2(X0, X4,  X8, X12,   X1, X5,  X9, X13, tmp:=,X11,X15,
+                     POLY1305_BLOCK_PART2(),
+                     _)
+       movdqa (STACK_TMP)(%rsp), X11;
+       movdqa (STACK_TMP1)(%rsp), X15;
+       movdqa X8, (STACK_TMP)(%rsp);
+       movdqa X9, (STACK_TMP1)(%rsp);
+       QUARTERROUND2(X2, X6, X10, X14,   X3, X7, X11, X15, tmp:=,X8,X9,
+                     POLY1305_BLOCK_PART3(),
+                     _)
+       QUARTERROUND2(X0, X5, X10, X15,   X1, X6, X11, X12, tmp:=,X8,X9,
+                     POLY1305_BLOCK_PART4(),
+                     _)
+       movdqa (STACK_TMP)(%rsp), X8;
+       movdqa (STACK_TMP1)(%rsp), X9;
+       movdqa X11, (STACK_TMP)(%rsp);
+       movdqa X15, (STACK_TMP1)(%rsp);
+       QUARTERROUND2(X2, X7,  X8, X13,   X3, X4,  X9, X14, tmp:=,X11,X15,
+                     POLY1305_BLOCK_PART5(),
+                     _)
+
+       subl $2, (STACK_MAX + 8 * 8)(%rsp);
+       jnz .Lround4_with_poly1305_inner2;
+
+       subl $10, (STACK_MAX + 8 * 8 + 4)(%rsp);
+       jnz .Lround4_with_poly1305_outer;
+
+       /* tmp := X15 */
+       movdqa (STACK_TMP)(%rsp), X11;
+       pbroadcastd((0 * 4)(INPUT), X15);
+       PLUS(X0, X15);
+       pbroadcastd((1 * 4)(INPUT), X15);
+       PLUS(X1, X15);
+       pbroadcastd((2 * 4)(INPUT), X15);
+       PLUS(X2, X15);
+       pbroadcastd((3 * 4)(INPUT), X15);
+       PLUS(X3, X15);
+       pbroadcastd((4 * 4)(INPUT), X15);
+       PLUS(X4, X15);
+       pbroadcastd((5 * 4)(INPUT), X15);
+       PLUS(X5, X15);
+       pbroadcastd((6 * 4)(INPUT), X15);
+       PLUS(X6, X15);
+       pbroadcastd((7 * 4)(INPUT), X15);
+       PLUS(X7, X15);
+       pbroadcastd((8 * 4)(INPUT), X15);
+       PLUS(X8, X15);
+       pbroadcastd((9 * 4)(INPUT), X15);
+       PLUS(X9, X15);
+       pbroadcastd((10 * 4)(INPUT), X15);
+       PLUS(X10, X15);
+       pbroadcastd((11 * 4)(INPUT), X15);
+       PLUS(X11, X15);
+       movdqa (STACK_VEC_X12)(%rsp), X15;
+       PLUS(X12, X15);
+       movdqa (STACK_VEC_X13)(%rsp), X15;
+       PLUS(X13, X15);
+       movdqa X13, (STACK_TMP)(%rsp);
+       pbroadcastd((14 * 4)(INPUT), X15);
+       PLUS(X14, X15);
+       movdqa (STACK_TMP1)(%rsp), X15;
+       movdqa X14, (STACK_TMP1)(%rsp);
+       pbroadcastd((15 * 4)(INPUT), X13);
+       PLUS(X15, X13);
+       movdqa X15, (STACK_TMP2)(%rsp);
+
+       /* Update counter */
+       addq $4, (12 * 4)(INPUT);
+
+       movq (STACK_MAX + 5 * 8)(%rsp), SRC;
+       movq (STACK_MAX + 6 * 8)(%rsp), DST;
+
+       transpose_4x4(X0, X1, X2, X3, X13, X14, X15);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0, X15);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1, X15);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2, X15);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3, X15);
+       transpose_4x4(X4, X5, X6, X7, X0, X1, X2);
+       movdqa (STACK_TMP)(%rsp), X13;
+       movdqa (STACK_TMP1)(%rsp), X14;
+       movdqa (STACK_TMP2)(%rsp), X15;
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 1), X4, X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 1), X5, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 1), X6, X0);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 1), X7, X0);
+       transpose_4x4(X8, X9, X10, X11, X0, X1, X2);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8, X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10, X0);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11, X0);
+       transpose_4x4(X12, X13, X14, X15, X0, X1, X2);
+       xor_src_dst(DST, SRC, (64 * 0 + 16 * 3), X12, X0);
+       xor_src_dst(DST, SRC, (64 * 1 + 16 * 3), X13, X0);
+       xor_src_dst(DST, SRC, (64 * 2 + 16 * 3), X14, X0);
+       xor_src_dst(DST, SRC, (64 * 3 + 16 * 3), X15, X0);
+
+       subq $4, (STACK_MAX + 7 * 8)(%rsp); # NBLKS
+
+       lea (4 * 64)(DST), DST;
+       lea (4 * 64)(SRC), SRC;
+       movq SRC, (STACK_MAX + 5 * 8)(%rsp);
+       movq DST, (STACK_MAX + 6 * 8)(%rsp);
+
+       jnz .Loop_poly4;
+
+       /* Store state */
+       POLY1305_STORE_STATE();
+
+       /* clear the used vector registers and stack */
+       clear(X0);
+       movdqa X0, (STACK_VEC_X12)(%rsp);
+       movdqa X0, (STACK_VEC_X13)(%rsp);
+       movdqa X0, (STACK_TMP)(%rsp);
+       movdqa X0, (STACK_TMP1)(%rsp);
+       movdqa X0, (STACK_TMP2)(%rsp);
+       clear(X1);
+       clear(X2);
+       clear(X3);
+       clear(X4);
+       clear(X5);
+       clear(X6);
+       clear(X7);
+       clear(X8);
+       clear(X9);
+       clear(X10);
+       clear(X11);
+       clear(X12);
+       clear(X13);
+       clear(X14);
+       clear(X15);
+
+       movq (STACK_MAX + 0 * 8)(%rsp), %rbx;
+       movq (STACK_MAX + 1 * 8)(%rsp), %r12;
+       movq (STACK_MAX + 2 * 8)(%rsp), %r13;
+       movq (STACK_MAX + 3 * 8)(%rsp), %r14;
+       movq (STACK_MAX + 4 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+
+       xorl %eax, %eax;
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4,
+         .-_gcry_chacha20_poly1305_amd64_ssse3_blocks4;)
+
+/**********************************************************************
+  2-way && 1-way stitched chacha20-poly1305
+ **********************************************************************/
+
+.align 8
+.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks1
+ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks1,@function;)
+
+_gcry_chacha20_poly1305_amd64_ssse3_blocks1:
+       /* input:
+        *      %rdi: chacha20-state
+        *      %rsi: dst
+        *      %rdx: src
+        *      %rcx: nblks
+        *      %r9: poly1305-state
+        *      %r8: poly1305-src
+        */
+       CFI_STARTPROC();
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       subq $(9 * 8), %rsp;
+       movq %rbx, (0 * 8)(%rsp);
+       movq %r12, (1 * 8)(%rsp);
+       movq %r13, (2 * 8)(%rsp);
+       movq %r14, (3 * 8)(%rsp);
+       movq %r15, (4 * 8)(%rsp);
+       CFI_REG_ON_STACK(rbx, 0 * 8);
+       CFI_REG_ON_STACK(r12, 1 * 8);
+       CFI_REG_ON_STACK(r13, 2 * 8);
+       CFI_REG_ON_STACK(r14, 3 * 8);
+       CFI_REG_ON_STACK(r15, 4 * 8);
+
+       movq %rdx, (5 * 8)(%rsp); # SRC
+       movq %rsi, (6 * 8)(%rsp); # DST
+       movq %rcx, (7 * 8)(%rsp); # NBLKS
+
+       /* Load constants */
+       movdqa .Lcounter1 rRIP, X4;
+       movdqa .Lshuf_rol8 rRIP, X5;
+       movdqa .Lshuf_rol16 rRIP, X6;
+
+       /* Load state */
+       movdqu (0 * 4)(INPUT), X10;
+       movdqu (4 * 4)(INPUT), X11;
+       movdqu (8 * 4)(INPUT), X12;
+       movdqu (12 * 4)(INPUT), X13;
+
+       POLY1305_LOAD_STATE();
+
+       cmpq $2, (7 * 8)(%rsp); #NBLKS
+       jb .Loop_poly1;
+
+       movdqa X10, X0;
+       movdqa X11, X1;
+       movdqa X12, X2;
+       movdqa X13, X3;
+
+       movdqa X10, X8;
+       movdqa X11, X9;
+       movdqa X12, X14;
+       movdqa X13, X15;
+       paddq X4, X15;
+
+       /* Process two ChaCha20 blocks and eight Poly1305 blocks. */
+
+       movl $20, (8 * 8 + 4)(%rsp);
+.Lround2_with_poly1305_outer:
+       movl $8, (8 * 8)(%rsp);
+.Lround2_with_poly1305_inner:
+       POLY1305_BLOCK_PART1(0 * 16);
+         QUARTERROUND4(X0, X1, X2,  X3,  X5, X6, X7, 0x39, 0x4e, 0x93);
+       lea (1 * 16)(POLY_RSRC), POLY_RSRC;
+       POLY1305_BLOCK_PART2();
+         QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93);
+       POLY1305_BLOCK_PART3();
+         QUARTERROUND4(X0, X1, X2,  X3,  X5, X6, X7, 0x93, 0x4e, 0x39);
+       POLY1305_BLOCK_PART4();
+         QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39);
+       POLY1305_BLOCK_PART5();
+
+       subl $2, (8 * 8)(%rsp);
+       jnz .Lround2_with_poly1305_inner;
+
+         QUARTERROUND4(X0, X1, X2,  X3,  X5, X6, X7, 0x39, 0x4e, 0x93);
+         QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93);
+         QUARTERROUND4(X0, X1, X2,  X3,  X5, X6, X7, 0x93, 0x4e, 0x39);
+         QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39);
+
+       subl $10, (8 * 8 + 4)(%rsp);
+       jnz .Lround2_with_poly1305_outer;
+
+       movq (5 * 8)(%rsp), SRC;
+       movq (6 * 8)(%rsp), DST;
+
+       PLUS(X0, X10);
+       PLUS(X1, X11);
+       PLUS(X2, X12);
+       PLUS(X3, X13);
+
+       /* Update counter */
+       paddq X4, X13;
+
+       PLUS(X8, X10);
+       PLUS(X9, X11);
+       PLUS(X14, X12);
+       PLUS(X15, X13);
+
+       /* Update counter */
+       paddq X4, X13;
+
+       xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+       xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+       xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+       xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+       xor_src_dst(DST, SRC, 16 * 4, X8, X7);
+       xor_src_dst(DST, SRC, 20 * 4, X9, X7);
+       xor_src_dst(DST, SRC, 24 * 4, X14, X7);
+       xor_src_dst(DST, SRC, 28 * 4, X15, X7);
+
+       clear(X8);
+       clear(X9);
+       clear(X14);
+       clear(X15);
+
+       subq $2, (7 * 8)(%rsp); # NBLKS
+       lea (2 * 64)(SRC), SRC;
+       lea (2 * 64)(DST), DST;
+       movq SRC, (5 * 8)(%rsp);
+       movq DST, (6 * 8)(%rsp);
+       jz .Ldone_poly1;
+
+.Loop_poly1:
+       movdqa X10, X0;
+       movdqa X11, X1;
+       movdqa X12, X2;
+       movdqa X13, X3;
+
+       /* Process one ChaCha20 block and four Poly1305 blocks. */
+
+       movl $20, (8 * 8 + 4)(%rsp);
+.Lround1_with_poly1305_outer:
+       movl $8, (8 * 8)(%rsp);
+.Lround1_with_poly1305_inner:
+       POLY1305_BLOCK_PART1(0 * 16);
+         QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+       POLY1305_BLOCK_PART2();
+         QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+       lea (1 * 16)(POLY_RSRC), POLY_RSRC;
+
+       POLY1305_BLOCK_PART3();
+         QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+       POLY1305_BLOCK_PART4();
+         QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+       POLY1305_BLOCK_PART5();
+
+       subl $4, (8 * 8)(%rsp);
+       jnz .Lround1_with_poly1305_inner;
+
+         QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93);
+         QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39);
+
+       subl $10, (8 * 8 + 4)(%rsp);
+       jnz .Lround1_with_poly1305_outer;
+
+       movq (5 * 8)(%rsp), SRC;
+       movq (6 * 8)(%rsp), DST;
+
+       PLUS(X0, X10);
+       PLUS(X1, X11);
+       PLUS(X2, X12);
+       PLUS(X3, X13);
+
+       /* Update counter */
+       paddq X4, X13;
+
+       xor_src_dst(DST, SRC, 0 * 4, X0, X7);
+       xor_src_dst(DST, SRC, 4 * 4, X1, X7);
+       xor_src_dst(DST, SRC, 8 * 4, X2, X7);
+       xor_src_dst(DST, SRC, 12 * 4, X3, X7);
+
+       subq $1, (7 * 8)(%rsp); # NBLKS
+       lea (64)(SRC), SRC;
+       lea (64)(DST), DST;
+       movq SRC, (5 * 8)(%rsp);
+       movq DST, (6 * 8)(%rsp);
+
+       jnz .Loop_poly1;
+
+.Ldone_poly1:
+       /* Store state */
+       POLY1305_STORE_STATE();
+
+       movdqu X13, (12 * 4)(INPUT);
+
+       /* clear the used vector registers */
+       clear(X0);
+       clear(X1);
+       clear(X2);
+       clear(X3);
+       clear(X4);
+       clear(X5);
+       clear(X6);
+       clear(X7);
+       clear(X10);
+       clear(X11);
+       clear(X12);
+       clear(X13);
+
+       movq (0 * 8)(%rsp), %rbx;
+       movq (1 * 8)(%rsp), %r12;
+       movq (2 * 8)(%rsp), %r13;
+       movq (3 * 8)(%rsp), %r14;
+       movq (4 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+
+       xorl %eax, %eax;
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks1,
+         .-_gcry_chacha20_poly1305_amd64_ssse3_blocks1;)
+
+#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/chacha20-armv7-neon.S 
b/grub-core/lib/libgcrypt/cipher/chacha20-armv7-neon.S
new file mode 100644
index 000000000..a862be4ec
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/chacha20-armv7-neon.S
@@ -0,0 +1,393 @@
+/* chacha20-armv7-neon.S  -  ARMv7 NEON implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2017,2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.syntax unified
+.fpu neon
+.arm
+
+.text
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* register macros */
+#define INPUT r0
+#define DST   r1
+#define SRC   r2
+#define NBLKS r3
+#define ROUND r4
+
+/* stack structure */
+#define STACK_VEC_X12 (16)
+#define STACK_VEC_X13 (STACK_VEC_X12 + 16)
+#define STACK_TMP     (STACK_VEC_X13 + 16)
+#define STACK_TMP1    (16 + STACK_TMP)
+#define STACK_TMP2    (16 + STACK_TMP1)
+
+#define STACK_MAX     (16 + STACK_TMP2)
+
+/* vector registers */
+#define X0 q0
+#define X1 q1
+#define X2 q2
+#define X3 q3
+#define X4 q4
+#define X5 q5
+#define X6 q6
+#define X7 q7
+#define X8 q8
+#define X9 q9
+#define X10 q10
+#define X11 q11
+#define X12 q12
+#define X13 q13
+#define X14 q14
+#define X15 q15
+
+#define X0l d0
+#define X1l d2
+#define X2l d4
+#define X3l d6
+#define X4l d8
+#define X5l d10
+#define X6l d12
+#define X7l d14
+#define X8l d16
+#define X9l d18
+#define X10l d20
+#define X11l d22
+#define X12l d24
+#define X13l d26
+#define X14l d28
+#define X15l d30
+
+#define X0h d1
+#define X1h d3
+#define X2h d5
+#define X3h d7
+#define X4h d9
+#define X5h d11
+#define X6h d13
+#define X7h d15
+#define X8h d17
+#define X9h d19
+#define X10h d21
+#define X11h d23
+#define X12h d25
+#define X13h d27
+#define X14h d29
+#define X15h d31
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4_part1(_q0, _q1, _q2, _q3)        \
+       vtrn.32 _q0, _q1;                       \
+       vtrn.32 _q2, _q3;
+#define transpose_4x4_part2(_q0, _q1, _q2, _q3)        \
+       vswp _q0##h, _q2##l;                    \
+       vswp _q1##h, _q3##l;
+
+#define clear(x) vmov.i8 x, #0;
+
+/**********************************************************************
+  4-way chacha20
+ **********************************************************************/
+
+#define ROTATE2(dst1,dst2,c,src1,src2)         \
+       vshl.u32 dst1, src1, #(c);              \
+       vshl.u32 dst2, src2, #(c);              \
+       vsri.u32 dst1, src1, #(32 - (c));       \
+       vsri.u32 dst2, src2, #(32 - (c));
+
+#define ROTATE2_16(dst1,dst2,src1,src2)                \
+       vrev32.16 dst1, src1;                   \
+       vrev32.16 dst2, src2;
+
+#define XOR(d,s1,s2) \
+       veor d, s2, s1;
+
+#define PLUS(ds,s) \
+       vadd.u32 ds, ds, s;
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,tmp2)           \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2);     \
+           ROTATE2_16(d1, d2, tmp1, tmp2);                             \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2);     \
+           ROTATE2(b1, b2, 12, tmp1, tmp2);                            \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2);     \
+           ROTATE2(d1, d2,  8, tmp1, tmp2);                            \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2);     \
+           ROTATE2(b1, b2,  7, tmp1, tmp2);
+
+chacha20_data:
+.align 4
+.Linc_counter:
+       .long 0,1,2,3
+
+.align 3
+.globl _gcry_chacha20_armv7_neon_blocks4
+.type _gcry_chacha20_armv7_neon_blocks4,%function;
+
+_gcry_chacha20_armv7_neon_blocks4:
+       /* input:
+        *      r0: input
+        *      r1: dst
+        *      r2: src
+        *      r3: nblks (multiple of 4)
+        */
+
+       vpush {q4-q7};
+       push {r4-r12,lr};
+
+       mov r12, sp
+
+       mov r6, sp;
+       sub r6, r6, #(STACK_MAX);
+       and r6, r6, #(~15);
+       mov sp, r6;
+       GET_DATA_POINTER(r9, .Linc_counter, lr);
+       add lr, INPUT, #(12*4);
+       add r8, sp, #STACK_VEC_X12;
+
+.Loop4:
+       mov ROUND, #20;
+
+       /* Construct counter vectors X12 and X13 */
+
+       vld1.8 {X15}, [lr];
+       mov lr, INPUT;
+       vld1.8 {X8}, [r9];
+       vdup.32 X12, X15l[0];
+       vdup.32 X13, X15l[1];
+       vld1.8 {X3}, [lr]!;
+       vadd.u32 X12, X12, X8;
+       vdup.32 X0, X3l[0];
+       vdup.32 X1, X3l[1];
+       vdup.32 X2, X3h[0];
+       vcgt.u32 X8, X8, X12;
+       vdup.32 X3, X3h[1];
+       vdup.32 X14, X15h[0];
+       vdup.32 X15, X15h[1];
+       vsub.u32 X13, X13, X8;
+       vld1.8 {X7}, [lr]!;
+       vld1.8 {X11}, [lr];
+       vst1.8 {X12, X13}, [r8];
+       vdup.32 X4, X7l[0];
+       vdup.32 X5, X7l[1];
+       vdup.32 X6, X7h[0];
+       vdup.32 X7, X7h[1];
+       vdup.32 X8, X11l[0];
+       vdup.32 X9, X11l[1];
+       vdup.32 X10, X11h[0];
+       vdup.32 X11, X11h[1];
+
+       add r7, sp, #STACK_TMP2;
+       add r6, sp, #STACK_TMP1;
+       add r5, sp, #STACK_TMP;
+       vst1.8 {X15}, [r6];
+       vst1.8 {X11}, [r5];
+
+       mov lr, INPUT;
+.Lround2:
+       subs ROUND, ROUND, #2
+       QUARTERROUND2(X0, X4,  X8, X12,   X1, X5,  X9, X13, tmp:=,X11,X15)
+       vld1.8 {X11}, [r5];
+       vld1.8 {X15}, [r6];
+       vst1.8 {X8}, [r5];
+       vst1.8 {X9}, [r6];
+       QUARTERROUND2(X2, X6, X10, X14,   X3, X7, X11, X15, tmp:=,X8,X9)
+       QUARTERROUND2(X0, X5, X10, X15,   X1, X6, X11, X12, tmp:=,X8,X9)
+       vld1.8 {X8}, [r5];
+       vld1.8 {X9}, [r6];
+       vst1.8 {X11}, [r5];
+       vst1.8 {X15}, [r6];
+       QUARTERROUND2(X2, X7,  X8, X13,   X3, X4,  X9, X14, tmp:=,X11,X15)
+       bne .Lround2;
+
+       vld1.8 {X11}, [lr]!;
+       vst1.8 {X14}, [r7];
+
+       vdup.32 X14, X11l[0]; /* INPUT + 0 * 4 */
+       vdup.32 X15, X11l[1]; /* INPUT + 1 * 4 */
+       PLUS(X0, X14);
+       PLUS(X1, X15);
+       vdup.32 X14, X11h[0]; /* INPUT + 2 * 4 */
+       vdup.32 X15, X11h[1]; /* INPUT + 3 * 4 */
+       PLUS(X2, X14);
+       PLUS(X3, X15);
+
+       vld1.8 {X11}, [r5];
+       vld1.8 {X15}, [r6];
+       vst1.8 {X0}, [r5];
+       vld1.8 {X0}, [lr]!;
+       vst1.8 {X1}, [r6];
+
+       vdup.32 X14, X0l[0]; /* INPUT + 4 * 4 */
+       vdup.32  X1, X0l[1]; /* INPUT + 5 * 4 */
+       PLUS(X4, X14);
+       PLUS(X5, X1);
+       vdup.32 X14, X0h[0]; /* INPUT + 6 * 4 */
+       vdup.32  X1, X0h[1]; /* INPUT + 7 * 4 */
+       PLUS(X6, X14);
+       PLUS(X7, X1);
+
+       vld1.8 {X0}, [lr]!;
+
+       vdup.32 X14, X0l[0]; /* INPUT + 8 * 4 */
+       vdup.32  X1, X0l[1]; /* INPUT + 9 * 4 */
+       PLUS(X8, X14);
+       PLUS(X9, X1);
+       vdup.32 X14, X0h[0]; /* INPUT + 10 * 4 */
+       vdup.32  X1, X0h[1]; /* INPUT + 11 * 4 */
+       PLUS(X10, X14);
+       PLUS(X11, X1);
+
+       vld1.8 {X0}, [lr];
+       add lr, INPUT, #(12*4)
+       vld1.8 {X14}, [r7];
+
+       vdup.32 X1, X0h[0]; /* INPUT + 10 * 4 */
+       ldm lr, {r10, r11}; /* Update counter */
+       vdup.32 X0, X0h[1]; /* INPUT + 11 * 4 */
+       PLUS(X14, X1);
+       PLUS(X15, X0);
+       adds r10, r10, #4;  /* Update counter */
+       vld1.8 {X0, X1}, [r8];
+
+       PLUS(X12, X0);
+       vld1.8 {X0}, [r5];
+       PLUS(X13, X1);
+       adc r11, r11, #0;   /* Update counter */
+
+       vld1.8 {X1}, [r6];
+       stm lr, {r10, r11}; /* Update counter */
+       transpose_4x4_part1(X0, X1, X2, X3);
+       transpose_4x4_part1(X4, X5, X6, X7);
+       transpose_4x4_part1(X8, X9, X10, X11);
+       transpose_4x4_part1(X12, X13, X14, X15);
+       transpose_4x4_part2(X0, X1, X2, X3);
+       transpose_4x4_part2(X4, X5, X6, X7);
+       transpose_4x4_part2(X8, X9, X10, X11);
+       transpose_4x4_part2(X12, X13, X14, X15);
+
+       subs NBLKS, NBLKS, #4;
+
+       vst1.8 {X10}, [r5];
+       add lr, INPUT, #(12*4)
+       vst1.8 {X11}, [r6];
+       vld1.8 {X10, X11}, [SRC]!;
+       veor X10, X0, X10;
+       vld1.8 {X0}, [SRC]!;
+       veor X11, X4, X11;
+       vld1.8 {X4}, [SRC]!;
+       vst1.8 {X10, X11}, [DST]!;
+       vld1.8 {X10, X11}, [SRC]!;
+       veor X0, X8, X0;
+       veor X4, X12, X4;
+       veor X10, X1, X10;
+       veor X11, X5, X11;
+       vst1.8 {X0}, [DST]!;
+       vld1.8 {X0, X1}, [SRC]!;
+       vst1.8 {X4}, [DST]!;
+       vld1.8 {X4, X5}, [SRC]!;
+       vst1.8 {X10, X11}, [DST]!;
+       vld1.8 {X10}, [r5];
+       vld1.8 {X11}, [r6];
+       veor X0, X9, X0;
+       vld1.8 {X8, X9}, [SRC]!;
+       veor X1, X13, X1;
+       vld1.8 {X12, X13}, [SRC]!;
+       veor X4, X2, X4;
+       veor X5, X6, X5;
+       vst1.8 {X0, X1}, [DST]!;
+       vld1.8 {X0, X1}, [SRC]!;
+       vst1.8 {X4, X5}, [DST]!;
+       veor X8, X10, X8;
+       veor X9, X14, X9;
+       veor X12, X3, X12;
+       veor X13, X7, X13;
+       veor X0, X11, X0;
+       veor X1, X15, X1;
+       vst1.8 {X8, X9}, [DST]!;
+       vst1.8 {X12, X13}, [DST]!;
+       vst1.8 {X0, X1}, [DST]!;
+
+       bne .Loop4;
+
+       /* clear the used vector registers and stack */
+       clear(X0);
+       vst1.8 {X0}, [r5];
+       vst1.8 {X0}, [r6];
+       vst1.8 {X0}, [r7];
+       vst1.8 {X0}, [r8]!;
+       vst1.8 {X0}, [r8];
+
+       mov sp, r12
+       clear(X1);
+       clear(X2);
+       clear(X3);
+       clear(X4);
+       clear(X5);
+       clear(X6);
+       clear(X7);
+       clear(X8);
+       clear(X9);
+       clear(X10);
+       clear(X11);
+       clear(X12);
+       clear(X13);
+       clear(X14);
+       clear(X15);
+
+       pop {r4-r12,lr}
+       vpop {q4-q7}
+       eor r0, r0, r0
+       bx lr
+.size _gcry_chacha20_armv7_neon_blocks4, .-_gcry_chacha20_armv7_neon_blocks4;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/chacha20-ppc.c 
b/grub-core/lib/libgcrypt/cipher/chacha20-ppc.c
new file mode 100644
index 000000000..4a21b837d
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/chacha20-ppc.c
@@ -0,0 +1,646 @@
+/* chacha20-ppc.c - PowerPC vector implementation of ChaCha20
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+    defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+    defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+    defined(USE_CHACHA20) && \
+    __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+#include "poly1305-internal.h"
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned int vector4x_u32;
+typedef vector unsigned long long vector2x_u64;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+#ifdef WORDS_BIGENDIAN
+static const vector16x_u8 le_bswap_const =
+  { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+#endif
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_rol_elems(vector4x_u32 v, unsigned int idx)
+{
+#ifndef WORDS_BIGENDIAN
+  return vec_sld (v, v, (16 - (4 * idx)) & 15);
+#else
+  return vec_sld (v, v, (4 * idx) & 15);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_load_le(unsigned long offset, const unsigned char *ptr)
+{
+  vector4x_u32 vec;
+  vec = vec_vsx_ld (offset, (const u32 *)ptr);
+#ifdef WORDS_BIGENDIAN
+  vec = (vector4x_u32)vec_perm((vector16x_u8)vec, (vector16x_u8)vec,
+                              le_bswap_const);
+#endif
+  return vec;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+vec_store_le(vector4x_u32 vec, unsigned long offset, unsigned char *ptr)
+{
+#ifdef WORDS_BIGENDIAN
+  vec = (vector4x_u32)vec_perm((vector16x_u8)vec, (vector16x_u8)vec,
+                              le_bswap_const);
+#endif
+  vec_vsx_st (vec, offset, (u32 *)ptr);
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_add_ctr_u64(vector4x_u32 v, vector4x_u32 a)
+{
+#ifdef WORDS_BIGENDIAN
+  static const vector16x_u8 swap32 =
+    { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 };
+  vector2x_u64 vec, add, sum;
+
+  vec = (vector2x_u64)vec_perm((vector16x_u8)v, (vector16x_u8)v, swap32);
+  add = (vector2x_u64)vec_perm((vector16x_u8)a, (vector16x_u8)a, swap32);
+  sum = vec + add;
+  return (vector4x_u32)vec_perm((vector16x_u8)sum, (vector16x_u8)sum, swap32);
+#else
+  return (vector4x_u32)((vector2x_u64)(v) + (vector2x_u64)(a));
+#endif
+}
+
+
+/**********************************************************************
+  2-way && 1-way chacha20
+ **********************************************************************/
+
+#define ROTATE(v1,rolv)                        \
+       __asm__ ("vrlw %0,%1,%2\n\t" : "=v" (v1) : "v" (v1), "v" (rolv))
+
+#define WORD_ROL(v1,c)                 \
+       ((v1) = vec_rol_elems((v1), (c)))
+
+#define XOR(ds,s) \
+       ((ds) ^= (s))
+
+#define PLUS(ds,s) \
+       ((ds) += (s))
+
+#define QUARTERROUND4(x0,x1,x2,x3,rol_x1,rol_x2,rol_x3) \
+       PLUS(x0, x1); XOR(x3, x0); ROTATE(x3, rotate_16); \
+       PLUS(x2, x3); XOR(x1, x2); ROTATE(x1, rotate_12); \
+       PLUS(x0, x1); XOR(x3, x0); ROTATE(x3, rotate_8); \
+       PLUS(x2, x3); \
+         WORD_ROL(x3, rol_x3); \
+                     XOR(x1, x2); \
+         WORD_ROL(x2, rol_x2); \
+                                  ROTATE(x1, rotate_7); \
+         WORD_ROL(x1, rol_x1);
+
+#define ADD_U64(v,a) \
+       (v = vec_add_ctr_u64(v, a))
+
+unsigned int ASM_FUNC_ATTR
+_gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src,
+                           size_t nblks)
+{
+  vector4x_u32 counter_1 = { 1, 0, 0, 0 };
+  vector4x_u32 rotate_16 = { 16, 16, 16, 16 };
+  vector4x_u32 rotate_12 = { 12, 12, 12, 12 };
+  vector4x_u32 rotate_8 = { 8, 8, 8, 8 };
+  vector4x_u32 rotate_7 = { 7, 7, 7, 7 };
+  vector4x_u32 state0, state1, state2, state3;
+  vector4x_u32 v0, v1, v2, v3;
+  vector4x_u32 v4, v5, v6, v7;
+  int i;
+
+  /* force preload of constants to vector registers */
+  __asm__ ("": "+v" (counter_1) :: "memory");
+  __asm__ ("": "+v" (rotate_16) :: "memory");
+  __asm__ ("": "+v" (rotate_12) :: "memory");
+  __asm__ ("": "+v" (rotate_8) :: "memory");
+  __asm__ ("": "+v" (rotate_7) :: "memory");
+
+  state0 = vec_vsx_ld(0 * 16, state);
+  state1 = vec_vsx_ld(1 * 16, state);
+  state2 = vec_vsx_ld(2 * 16, state);
+  state3 = vec_vsx_ld(3 * 16, state);
+
+  while (nblks >= 2)
+    {
+      v0 = state0;
+      v1 = state1;
+      v2 = state2;
+      v3 = state3;
+
+      v4 = state0;
+      v5 = state1;
+      v6 = state2;
+      v7 = state3;
+      ADD_U64(v7, counter_1);
+
+      for (i = 20; i > 0; i -= 2)
+       {
+         QUARTERROUND4(v0, v1, v2, v3, 1, 2, 3);
+         QUARTERROUND4(v4, v5, v6, v7, 1, 2, 3);
+         QUARTERROUND4(v0, v1, v2, v3, 3, 2, 1);
+         QUARTERROUND4(v4, v5, v6, v7, 3, 2, 1);
+       }
+
+      v0 += state0;
+      v1 += state1;
+      v2 += state2;
+      v3 += state3;
+      ADD_U64(state3, counter_1); /* update counter */
+      v4 += state0;
+      v5 += state1;
+      v6 += state2;
+      v7 += state3;
+      ADD_U64(state3, counter_1); /* update counter */
+
+      v0 ^= vec_load_le(0 * 16, src);
+      v1 ^= vec_load_le(1 * 16, src);
+      v2 ^= vec_load_le(2 * 16, src);
+      v3 ^= vec_load_le(3 * 16, src);
+      vec_store_le(v0, 0 * 16, dst);
+      vec_store_le(v1, 1 * 16, dst);
+      vec_store_le(v2, 2 * 16, dst);
+      vec_store_le(v3, 3 * 16, dst);
+      src += 64;
+      dst += 64;
+      v4 ^= vec_load_le(0 * 16, src);
+      v5 ^= vec_load_le(1 * 16, src);
+      v6 ^= vec_load_le(2 * 16, src);
+      v7 ^= vec_load_le(3 * 16, src);
+      vec_store_le(v4, 0 * 16, dst);
+      vec_store_le(v5, 1 * 16, dst);
+      vec_store_le(v6, 2 * 16, dst);
+      vec_store_le(v7, 3 * 16, dst);
+      src += 64;
+      dst += 64;
+
+      nblks -= 2;
+    }
+
+  while (nblks)
+    {
+      v0 = state0;
+      v1 = state1;
+      v2 = state2;
+      v3 = state3;
+
+      for (i = 20; i > 0; i -= 2)
+       {
+         QUARTERROUND4(v0, v1, v2, v3, 1, 2, 3);
+         QUARTERROUND4(v0, v1, v2, v3, 3, 2, 1);
+       }
+
+      v0 += state0;
+      v1 += state1;
+      v2 += state2;
+      v3 += state3;
+      ADD_U64(state3, counter_1); /* update counter */
+
+      v0 ^= vec_load_le(0 * 16, src);
+      v1 ^= vec_load_le(1 * 16, src);
+      v2 ^= vec_load_le(2 * 16, src);
+      v3 ^= vec_load_le(3 * 16, src);
+      vec_store_le(v0, 0 * 16, dst);
+      vec_store_le(v1, 1 * 16, dst);
+      vec_store_le(v2, 2 * 16, dst);
+      vec_store_le(v3, 3 * 16, dst);
+      src += 64;
+      dst += 64;
+
+      nblks--;
+    }
+
+  vec_vsx_st(state3, 3 * 16, state); /* store counter */
+
+  return 0;
+}
+
+
+/**********************************************************************
+  4-way chacha20
+ **********************************************************************/
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3) ({ \
+       vector4x_u32 t1 = vec_mergeh(x0, x2); \
+       vector4x_u32 t2 = vec_mergel(x0, x2); \
+       vector4x_u32 t3 = vec_mergeh(x1, x3); \
+       x3 = vec_mergel(x1, x3); \
+       x0 = vec_mergeh(t1, t3); \
+       x1 = vec_mergel(t1, t3); \
+       x2 = vec_mergeh(t2, x3); \
+       x3 = vec_mergel(t2, x3); \
+      })
+
+#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2)                 \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2);       \
+           ROTATE(d1, rotate_16); ROTATE(d2, rotate_16);       \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);       \
+           ROTATE(b1, rotate_12); ROTATE(b2, rotate_12);       \
+       PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2);       \
+           ROTATE(d1, rotate_8); ROTATE(d2, rotate_8);         \
+       PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2);       \
+           ROTATE(b1, rotate_7); ROTATE(b2, rotate_7);
+
+unsigned int ASM_FUNC_ATTR
+_gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src,
+                           size_t nblks)
+{
+  vector4x_u32 counters_0123 = { 0, 1, 2, 3 };
+  vector4x_u32 counter_4 = { 4, 0, 0, 0 };
+  vector4x_u32 rotate_16 = { 16, 16, 16, 16 };
+  vector4x_u32 rotate_12 = { 12, 12, 12, 12 };
+  vector4x_u32 rotate_8 = { 8, 8, 8, 8 };
+  vector4x_u32 rotate_7 = { 7, 7, 7, 7 };
+  vector4x_u32 state0, state1, state2, state3;
+  vector4x_u32 v0, v1, v2, v3, v4, v5, v6, v7;
+  vector4x_u32 v8, v9, v10, v11, v12, v13, v14, v15;
+  vector4x_u32 tmp;
+  int i;
+
+  /* force preload of constants to vector registers */
+  __asm__ ("": "+v" (counters_0123) :: "memory");
+  __asm__ ("": "+v" (counter_4) :: "memory");
+  __asm__ ("": "+v" (rotate_16) :: "memory");
+  __asm__ ("": "+v" (rotate_12) :: "memory");
+  __asm__ ("": "+v" (rotate_8) :: "memory");
+  __asm__ ("": "+v" (rotate_7) :: "memory");
+
+  state0 = vec_vsx_ld(0 * 16, state);
+  state1 = vec_vsx_ld(1 * 16, state);
+  state2 = vec_vsx_ld(2 * 16, state);
+  state3 = vec_vsx_ld(3 * 16, state);
+
+  do
+    {
+      v0 = vec_splat(state0, 0);
+      v1 = vec_splat(state0, 1);
+      v2 = vec_splat(state0, 2);
+      v3 = vec_splat(state0, 3);
+      v4 = vec_splat(state1, 0);
+      v5 = vec_splat(state1, 1);
+      v6 = vec_splat(state1, 2);
+      v7 = vec_splat(state1, 3);
+      v8 = vec_splat(state2, 0);
+      v9 = vec_splat(state2, 1);
+      v10 = vec_splat(state2, 2);
+      v11 = vec_splat(state2, 3);
+      v12 = vec_splat(state3, 0);
+      v13 = vec_splat(state3, 1);
+      v14 = vec_splat(state3, 2);
+      v15 = vec_splat(state3, 3);
+
+      v12 += counters_0123;
+      v13 -= vec_cmplt(v12, counters_0123);
+
+      for (i = 20; i > 0; i -= 2)
+       {
+         QUARTERROUND2(v0, v4,  v8, v12,   v1, v5,  v9, v13)
+         QUARTERROUND2(v2, v6, v10, v14,   v3, v7, v11, v15)
+         QUARTERROUND2(v0, v5, v10, v15,   v1, v6, v11, v12)
+         QUARTERROUND2(v2, v7,  v8, v13,   v3, v4,  v9, v14)
+       }
+
+      v0 += vec_splat(state0, 0);
+      v1 += vec_splat(state0, 1);
+      v2 += vec_splat(state0, 2);
+      v3 += vec_splat(state0, 3);
+      v4 += vec_splat(state1, 0);
+      v5 += vec_splat(state1, 1);
+      v6 += vec_splat(state1, 2);
+      v7 += vec_splat(state1, 3);
+      v8 += vec_splat(state2, 0);
+      v9 += vec_splat(state2, 1);
+      v10 += vec_splat(state2, 2);
+      v11 += vec_splat(state2, 3);
+      tmp = vec_splat(state3, 0);
+      tmp += counters_0123;
+      v12 += tmp;
+      v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123);
+      v14 += vec_splat(state3, 2);
+      v15 += vec_splat(state3, 3);
+      ADD_U64(state3, counter_4); /* update counter */
+
+      transpose_4x4(v0, v1, v2, v3);
+      transpose_4x4(v4, v5, v6, v7);
+      transpose_4x4(v8, v9, v10, v11);
+      transpose_4x4(v12, v13, v14, v15);
+
+      v0 ^= vec_load_le((64 * 0 + 16 * 0), src);
+      v1 ^= vec_load_le((64 * 1 + 16 * 0), src);
+      v2 ^= vec_load_le((64 * 2 + 16 * 0), src);
+      v3 ^= vec_load_le((64 * 3 + 16 * 0), src);
+
+      v4 ^= vec_load_le((64 * 0 + 16 * 1), src);
+      v5 ^= vec_load_le((64 * 1 + 16 * 1), src);
+      v6 ^= vec_load_le((64 * 2 + 16 * 1), src);
+      v7 ^= vec_load_le((64 * 3 + 16 * 1), src);
+
+      v8 ^= vec_load_le((64 * 0 + 16 * 2), src);
+      v9 ^= vec_load_le((64 * 1 + 16 * 2), src);
+      v10 ^= vec_load_le((64 * 2 + 16 * 2), src);
+      v11 ^= vec_load_le((64 * 3 + 16 * 2), src);
+
+      v12 ^= vec_load_le((64 * 0 + 16 * 3), src);
+      v13 ^= vec_load_le((64 * 1 + 16 * 3), src);
+      v14 ^= vec_load_le((64 * 2 + 16 * 3), src);
+      v15 ^= vec_load_le((64 * 3 + 16 * 3), src);
+
+      vec_store_le(v0, (64 * 0 + 16 * 0), dst);
+      vec_store_le(v1, (64 * 1 + 16 * 0), dst);
+      vec_store_le(v2, (64 * 2 + 16 * 0), dst);
+      vec_store_le(v3, (64 * 3 + 16 * 0), dst);
+
+      vec_store_le(v4, (64 * 0 + 16 * 1), dst);
+      vec_store_le(v5, (64 * 1 + 16 * 1), dst);
+      vec_store_le(v6, (64 * 2 + 16 * 1), dst);
+      vec_store_le(v7, (64 * 3 + 16 * 1), dst);
+
+      vec_store_le(v8, (64 * 0 + 16 * 2), dst);
+      vec_store_le(v9, (64 * 1 + 16 * 2), dst);
+      vec_store_le(v10, (64 * 2 + 16 * 2), dst);
+      vec_store_le(v11, (64 * 3 + 16 * 2), dst);
+
+      vec_store_le(v12, (64 * 0 + 16 * 3), dst);
+      vec_store_le(v13, (64 * 1 + 16 * 3), dst);
+      vec_store_le(v14, (64 * 2 + 16 * 3), dst);
+      vec_store_le(v15, (64 * 3 + 16 * 3), dst);
+
+      src += 4*64;
+      dst += 4*64;
+
+      nblks -= 4;
+    }
+  while (nblks);
+
+  vec_vsx_st(state3, 3 * 16, state); /* store counter */
+
+  return 0;
+}
+
+
+#if SIZEOF_UNSIGNED_LONG == 8
+
+/**********************************************************************
+  4-way stitched chacha20-poly1305
+ **********************************************************************/
+
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+      __asm__ ("addc %0, %3, %0\n" \
+              "adde %1, %4, %1\n" \
+              "adde %2, %5, %2\n" \
+              : "+r" (A0), "+r" (A1), "+r" (A2) \
+              : "r" (B0), "r" (B1), "r" (B2) \
+              : "cc" )
+
+#define MUL_MOD_1305_64_PART1(H2, H1, H0, R1, R0, R1_MULT5) do { \
+    /* x = a * r (partial mod 2^130-5) */ \
+    umul_ppmm(x0_hi, x0_lo, H0, R0);  /* h0 * r0 */ \
+    umul_ppmm(x1_hi, x1_lo, H0, R1);  /* h0 * r1 */ \
+    \
+    umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \
+  } while (0)
+
+#define MUL_MOD_1305_64_PART2(H2, H1, H0, R1, R0, R1_MULT5) do { \
+    add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \
+    umul_ppmm(t1_hi, t1_lo, H1, R0);       /* h1 * r0 */ \
+    add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \
+    \
+    t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \
+    t1_hi = H2 * R0;       /* h2 * r0 */ \
+    add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \
+    \
+    /* carry propagation */ \
+    H2 = H0 & 3; \
+    H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \
+    ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
+  } while (0)
+
+#define POLY1305_BLOCK_PART1(in_pos) do { \
+    m0 = buf_get_le64(poly1305_src + (in_pos) + 0); \
+    m1 = buf_get_le64(poly1305_src + (in_pos) + 8); \
+    /* a = h + m */ \
+    ADD_1305_64(h2, h1, h0, m2, m1, m0); \
+    /* h = a * r (partial mod 2^130-5) */ \
+    MUL_MOD_1305_64_PART1(h2, h1, h0, r1, r0, r1_mult5); \
+  } while (0)
+
+#define POLY1305_BLOCK_PART2(in_pos) do { \
+    MUL_MOD_1305_64_PART2(h2, h1, h0, r1, r0, r1_mult5); \
+  } while (0)
+
+unsigned int ASM_FUNC_ATTR
+_gcry_chacha20_poly1305_ppc8_blocks4(u32 *state, byte *dst, const byte *src,
+                                    size_t nblks, POLY1305_STATE *st,
+                                    const byte *poly1305_src)
+{
+  vector4x_u32 counters_0123 = { 0, 1, 2, 3 };
+  vector4x_u32 counter_4 = { 4, 0, 0, 0 };
+  vector4x_u32 rotate_16 = { 16, 16, 16, 16 };
+  vector4x_u32 rotate_12 = { 12, 12, 12, 12 };
+  vector4x_u32 rotate_8 = { 8, 8, 8, 8 };
+  vector4x_u32 rotate_7 = { 7, 7, 7, 7 };
+  vector4x_u32 state0, state1, state2, state3;
+  vector4x_u32 v0, v1, v2, v3, v4, v5, v6, v7;
+  vector4x_u32 v8, v9, v10, v11, v12, v13, v14, v15;
+  vector4x_u32 tmp;
+  u64 r0, r1, r1_mult5;
+  u64 h0, h1, h2;
+  u64 m0, m1, m2;
+  u64 x0_lo, x0_hi, x1_lo, x1_hi;
+  u64 t0_lo, t0_hi, t1_lo, t1_hi;
+  unsigned int i, o;
+
+  /* load poly1305 state */
+  m2 = 1;
+  h0 = st->h[0] + ((u64)st->h[1] << 32);
+  h1 = st->h[2] + ((u64)st->h[3] << 32);
+  h2 = st->h[4];
+  r0 = st->r[0] + ((u64)st->r[1] << 32);
+  r1 = st->r[2] + ((u64)st->r[3] << 32);
+  r1_mult5 = (r1 >> 2) + r1;
+
+  /* force preload of constants to vector registers */
+  __asm__ ("": "+v" (counters_0123) :: "memory");
+  __asm__ ("": "+v" (counter_4) :: "memory");
+  __asm__ ("": "+v" (rotate_16) :: "memory");
+  __asm__ ("": "+v" (rotate_12) :: "memory");
+  __asm__ ("": "+v" (rotate_8) :: "memory");
+  __asm__ ("": "+v" (rotate_7) :: "memory");
+
+  state0 = vec_vsx_ld(0 * 16, state);
+  state1 = vec_vsx_ld(1 * 16, state);
+  state2 = vec_vsx_ld(2 * 16, state);
+  state3 = vec_vsx_ld(3 * 16, state);
+
+  do
+    {
+      v0 = vec_splat(state0, 0);
+      v1 = vec_splat(state0, 1);
+      v2 = vec_splat(state0, 2);
+      v3 = vec_splat(state0, 3);
+      v4 = vec_splat(state1, 0);
+      v5 = vec_splat(state1, 1);
+      v6 = vec_splat(state1, 2);
+      v7 = vec_splat(state1, 3);
+      v8 = vec_splat(state2, 0);
+      v9 = vec_splat(state2, 1);
+      v10 = vec_splat(state2, 2);
+      v11 = vec_splat(state2, 3);
+      v12 = vec_splat(state3, 0);
+      v13 = vec_splat(state3, 1);
+      v14 = vec_splat(state3, 2);
+      v15 = vec_splat(state3, 3);
+
+      v12 += counters_0123;
+      v13 -= vec_cmplt(v12, counters_0123);
+
+      for (o = 20; o; o -= 10)
+       {
+         for (i = 8; i; i -= 2)
+           {
+             POLY1305_BLOCK_PART1(0 * 16);
+             QUARTERROUND2(v0, v4,  v8, v12,   v1, v5,  v9, v13)
+             POLY1305_BLOCK_PART2();
+             QUARTERROUND2(v2, v6, v10, v14,   v3, v7, v11, v15)
+             POLY1305_BLOCK_PART1(1 * 16);
+             poly1305_src += 2 * 16;
+             QUARTERROUND2(v0, v5, v10, v15,   v1, v6, v11, v12)
+             POLY1305_BLOCK_PART2();
+             QUARTERROUND2(v2, v7,  v8, v13,   v3, v4,  v9, v14)
+           }
+
+         QUARTERROUND2(v0, v4,  v8, v12,   v1, v5,  v9, v13)
+         QUARTERROUND2(v2, v6, v10, v14,   v3, v7, v11, v15)
+         QUARTERROUND2(v0, v5, v10, v15,   v1, v6, v11, v12)
+         QUARTERROUND2(v2, v7,  v8, v13,   v3, v4,  v9, v14)
+       }
+
+      v0 += vec_splat(state0, 0);
+      v1 += vec_splat(state0, 1);
+      v2 += vec_splat(state0, 2);
+      v3 += vec_splat(state0, 3);
+      v4 += vec_splat(state1, 0);
+      v5 += vec_splat(state1, 1);
+      v6 += vec_splat(state1, 2);
+      v7 += vec_splat(state1, 3);
+      v8 += vec_splat(state2, 0);
+      v9 += vec_splat(state2, 1);
+      v10 += vec_splat(state2, 2);
+      v11 += vec_splat(state2, 3);
+      tmp = vec_splat(state3, 0);
+      tmp += counters_0123;
+      v12 += tmp;
+      v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123);
+      v14 += vec_splat(state3, 2);
+      v15 += vec_splat(state3, 3);
+      ADD_U64(state3, counter_4); /* update counter */
+
+      transpose_4x4(v0, v1, v2, v3);
+      transpose_4x4(v4, v5, v6, v7);
+      transpose_4x4(v8, v9, v10, v11);
+      transpose_4x4(v12, v13, v14, v15);
+
+      v0 ^= vec_load_le((64 * 0 + 16 * 0), src);
+      v1 ^= vec_load_le((64 * 1 + 16 * 0), src);
+      v2 ^= vec_load_le((64 * 2 + 16 * 0), src);
+      v3 ^= vec_load_le((64 * 3 + 16 * 0), src);
+
+      v4 ^= vec_load_le((64 * 0 + 16 * 1), src);
+      v5 ^= vec_load_le((64 * 1 + 16 * 1), src);
+      v6 ^= vec_load_le((64 * 2 + 16 * 1), src);
+      v7 ^= vec_load_le((64 * 3 + 16 * 1), src);
+
+      v8 ^= vec_load_le((64 * 0 + 16 * 2), src);
+      v9 ^= vec_load_le((64 * 1 + 16 * 2), src);
+      v10 ^= vec_load_le((64 * 2 + 16 * 2), src);
+      v11 ^= vec_load_le((64 * 3 + 16 * 2), src);
+
+      v12 ^= vec_load_le((64 * 0 + 16 * 3), src);
+      v13 ^= vec_load_le((64 * 1 + 16 * 3), src);
+      v14 ^= vec_load_le((64 * 2 + 16 * 3), src);
+      v15 ^= vec_load_le((64 * 3 + 16 * 3), src);
+
+      vec_store_le(v0, (64 * 0 + 16 * 0), dst);
+      vec_store_le(v1, (64 * 1 + 16 * 0), dst);
+      vec_store_le(v2, (64 * 2 + 16 * 0), dst);
+      vec_store_le(v3, (64 * 3 + 16 * 0), dst);
+
+      vec_store_le(v4, (64 * 0 + 16 * 1), dst);
+      vec_store_le(v5, (64 * 1 + 16 * 1), dst);
+      vec_store_le(v6, (64 * 2 + 16 * 1), dst);
+      vec_store_le(v7, (64 * 3 + 16 * 1), dst);
+
+      vec_store_le(v8, (64 * 0 + 16 * 2), dst);
+      vec_store_le(v9, (64 * 1 + 16 * 2), dst);
+      vec_store_le(v10, (64 * 2 + 16 * 2), dst);
+      vec_store_le(v11, (64 * 3 + 16 * 2), dst);
+
+      vec_store_le(v12, (64 * 0 + 16 * 3), dst);
+      vec_store_le(v13, (64 * 1 + 16 * 3), dst);
+      vec_store_le(v14, (64 * 2 + 16 * 3), dst);
+      vec_store_le(v15, (64 * 3 + 16 * 3), dst);
+
+      src += 4*64;
+      dst += 4*64;
+
+      nblks -= 4;
+    }
+  while (nblks);
+
+  vec_vsx_st(state3, 3 * 16, state); /* store counter */
+
+  /* store poly1305 state */
+  st->h[0] = h0;
+  st->h[1] = h0 >> 32;
+  st->h[2] = h1;
+  st->h[3] = h1 >> 32;
+  st->h[4] = h2;
+
+  return 0;
+}
+
+#endif /* SIZEOF_UNSIGNED_LONG == 8 */
+
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
diff --git a/grub-core/lib/libgcrypt/cipher/chacha20-s390x.S 
b/grub-core/lib/libgcrypt/cipher/chacha20-s390x.S
new file mode 100644
index 000000000..9b1d59c6a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/chacha20-s390x.S
@@ -0,0 +1,1561 @@
+/* chacha20-s390x.S  -  zSeries implementation of ChaCha20 cipher
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_S390X_VX)
+
+#include "asm-common-s390x.h"
+#include "asm-poly1305-s390x.h"
+
+.machine "z13+vx"
+.text
+
+.balign 16
+.Lconsts:
+.Lwordswap:
+       .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3
+.Lbswap128:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+.Lbswap32:
+       .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+.Lone:
+       .long 0, 0, 0, 1
+.Ladd_counter_0123:
+       .long 0, 1, 2, 3
+.Ladd_counter_4567:
+       .long 4, 5, 6, 7
+
+/* register macros */
+#define INPUT %r2
+#define DST   %r3
+#define SRC   %r4
+#define NBLKS %r0
+#define ROUND %r1
+
+/* stack structure */
+
+#define STACK_FRAME_STD    (8 * 16 + 8 * 4)
+#define STACK_FRAME_F8_F15 (8 * 8)
+#define STACK_FRAME_Y0_Y15 (16 * 16)
+#define STACK_FRAME_CTR    (4 * 16)
+#define STACK_FRAME_PARAMS (6 * 8)
+
+#define STACK_MAX   (STACK_FRAME_STD + STACK_FRAME_F8_F15 + \
+                    STACK_FRAME_Y0_Y15 + STACK_FRAME_CTR + \
+                    STACK_FRAME_PARAMS)
+
+#define STACK_F8     (STACK_MAX - STACK_FRAME_F8_F15)
+#define STACK_F9     (STACK_F8 + 8)
+#define STACK_F10    (STACK_F9 + 8)
+#define STACK_F11    (STACK_F10 + 8)
+#define STACK_F12    (STACK_F11 + 8)
+#define STACK_F13    (STACK_F12 + 8)
+#define STACK_F14    (STACK_F13 + 8)
+#define STACK_F15    (STACK_F14 + 8)
+#define STACK_Y0_Y15 (STACK_F8 - STACK_FRAME_Y0_Y15)
+#define STACK_CTR    (STACK_Y0_Y15 - STACK_FRAME_CTR)
+#define STACK_INPUT  (STACK_CTR - STACK_FRAME_PARAMS)
+#define STACK_DST    (STACK_INPUT + 8)
+#define STACK_SRC    (STACK_DST + 8)
+#define STACK_NBLKS  (STACK_SRC + 8)
+#define STACK_POCTX  (STACK_NBLKS + 8)
+#define STACK_POSRC  (STACK_POCTX + 8)
+
+#define STACK_G0_H3  STACK_Y0_Y15
+
+/* vector registers */
+#define A0 %v0
+#define A1 %v1
+#define A2 %v2
+#define A3 %v3
+
+#define B0 %v4
+#define B1 %v5
+#define B2 %v6
+#define B3 %v7
+
+#define C0 %v8
+#define C1 %v9
+#define C2 %v10
+#define C3 %v11
+
+#define D0 %v12
+#define D1 %v13
+#define D2 %v14
+#define D3 %v15
+
+#define E0 %v16
+#define E1 %v17
+#define E2 %v18
+#define E3 %v19
+
+#define F0 %v20
+#define F1 %v21
+#define F2 %v22
+#define F3 %v23
+
+#define G0 %v24
+#define G1 %v25
+#define G2 %v26
+#define G3 %v27
+
+#define H0 %v28
+#define H1 %v29
+#define H2 %v30
+#define H3 %v31
+
+#define IO0 E0
+#define IO1 E1
+#define IO2 E2
+#define IO3 E3
+#define IO4 F0
+#define IO5 F1
+#define IO6 F2
+#define IO7 F3
+
+#define S0 G0
+#define S1 G1
+#define S2 G2
+#define S3 G3
+
+#define TMP0 H0
+#define TMP1 H1
+#define TMP2 H2
+#define TMP3 H3
+
+#define X0 A0
+#define X1 A1
+#define X2 A2
+#define X3 A3
+#define X4 B0
+#define X5 B1
+#define X6 B2
+#define X7 B3
+#define X8 C0
+#define X9 C1
+#define X10 C2
+#define X11 C3
+#define X12 D0
+#define X13 D1
+#define X14 D2
+#define X15 D3
+
+#define Y0 E0
+#define Y1 E1
+#define Y2 E2
+#define Y3 E3
+#define Y4 F0
+#define Y5 F1
+#define Y6 F2
+#define Y7 F3
+#define Y8 G0
+#define Y9 G1
+#define Y10 G2
+#define Y11 G3
+#define Y12 H0
+#define Y13 H1
+#define Y14 H2
+#define Y15 H3
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+#define _ /*_*/
+
+#define CLEAR(x,...) vzero x;
+
+#define START_STACK(last_r) \
+       lgr %r0, %r15; \
+       lghi %r1, ~15; \
+       stmg %r6, last_r, 6 * 8(%r15); \
+       aghi %r0, -STACK_MAX; \
+       ngr %r0, %r1; \
+       lgr %r1, %r15; \
+       CFI_DEF_CFA_REGISTER(1); \
+       lgr %r15, %r0; \
+       stg %r1, 0(%r15); \
+       CFI_CFA_ON_STACK(0, 0); \
+       std %f8, STACK_F8(%r15); \
+       std %f9, STACK_F9(%r15); \
+       std %f10, STACK_F10(%r15); \
+       std %f11, STACK_F11(%r15); \
+       std %f12, STACK_F12(%r15); \
+       std %f13, STACK_F13(%r15); \
+       std %f14, STACK_F14(%r15); \
+       std %f15, STACK_F15(%r15);
+
+#define END_STACK(last_r) \
+       lg %r1, 0(%r15); \
+       ld %f8, STACK_F8(%r15); \
+       ld %f9, STACK_F9(%r15); \
+       ld %f10, STACK_F10(%r15); \
+       ld %f11, STACK_F11(%r15); \
+       ld %f12, STACK_F12(%r15); \
+       ld %f13, STACK_F13(%r15); \
+       ld %f14, STACK_F14(%r15); \
+       ld %f15, STACK_F15(%r15); \
+       lmg %r6, last_r, 6 * 8(%r1); \
+       lgr %r15, %r1; \
+       CFI_DEF_CFA_REGISTER(DW_REGNO_SP);
+
+#define PLUS(dst,src) \
+       vaf dst, dst, src;
+
+#define XOR(dst,src) \
+       vx dst, dst, src;
+
+#define ROTATE(v1,c) \
+       verllf v1, v1, (c)(0);
+
+#define WORD_ROTATE(v1,s) \
+       vsldb v1, v1, v1, ((s) * 4);
+
+#define DST_1(OPER, I, J) \
+       OPER(A##I, J);
+
+#define DST_2(OPER, I, J) \
+       OPER(A##I, J); OPER(B##I, J);
+
+#define DST_4(OPER, I, J) \
+       OPER(A##I, J); OPER(B##I, J); OPER(C##I, J); OPER(D##I, J);
+
+#define DST_8(OPER, I, J) \
+       OPER(A##I, J); OPER(B##I, J); OPER(C##I, J); OPER(D##I, J); \
+       OPER(E##I, J); OPER(F##I, J); OPER(G##I, J); OPER(H##I, J);
+
+#define DST_SRC_1(OPER, I, J) \
+       OPER(A##I, A##J);
+
+#define DST_SRC_2(OPER, I, J) \
+       OPER(A##I, A##J); OPER(B##I, B##J);
+
+#define DST_SRC_4(OPER, I, J) \
+       OPER(A##I, A##J); OPER(B##I, B##J); OPER(C##I, C##J); \
+       OPER(D##I, D##J);
+
+#define DST_SRC_8(OPER, I, J) \
+       OPER(A##I, A##J); OPER(B##I, B##J); OPER(C##I, C##J); \
+       OPER(D##I, D##J); OPER(E##I, E##J); OPER(F##I, F##J); \
+       OPER(G##I, G##J); OPER(H##I, H##J);
+
+/**********************************************************************
+  round macros
+ **********************************************************************/
+
+#define QUARTERROUND4_POLY(wrot_1,wrot_2,wrot_3,op1,op2) \
+       op1; DST_SRC_1(PLUS, 0, 1); DST_SRC_1(XOR, 3, 0); DST_1(ROTATE, 3, 16); 
\
+       DST_SRC_1(PLUS, 2, 3); DST_SRC_1(XOR, 1, 2); DST_1(ROTATE, 1, 12); \
+       DST_SRC_1(PLUS, 0, 1); DST_SRC_1(XOR, 3, 0); DST_1(ROTATE, 3, 8); \
+       op2; DST_SRC_1(PLUS, 2, 3); DST_SRC_1(XOR, 1, 2); DST_1(ROTATE, 1, 7); \
+       DST_1(WORD_ROTATE, 3, wrot_3); \
+       DST_1(WORD_ROTATE, 2, wrot_2); \
+       DST_1(WORD_ROTATE, 1, wrot_1);
+
+#define QUARTERROUND4(wrot_1,wrot_2,wrot_3) \
+       QUARTERROUND4_POLY(wrot_1,wrot_2,wrot_3,,)
+
+#define QUARTERROUND4_2_POLY(wrot_1,wrot_2,wrot_3,op1,op2,op3,op4) \
+       op1; DST_SRC_2(PLUS, 0, 1); DST_SRC_2(XOR, 3, 0); DST_2(ROTATE, 3, 16); 
\
+       DST_SRC_2(PLUS, 2, 3); op2; DST_SRC_2(XOR, 1, 2); DST_2(ROTATE, 1, 12); 
\
+       DST_SRC_2(PLUS, 0, 1); DST_SRC_2(XOR, 3, 0); op3; DST_2(ROTATE, 3, 8); \
+       DST_SRC_2(PLUS, 2, 3); DST_SRC_2(XOR, 1, 2); DST_2(ROTATE, 1, 7); op4; \
+       DST_2(WORD_ROTATE, 3, wrot_3); \
+       DST_2(WORD_ROTATE, 2, wrot_2); \
+       DST_2(WORD_ROTATE, 1, wrot_1);
+
+#define QUARTERROUND4_2(wrot_1,wrot_2,wrot_3) \
+       QUARTERROUND4_2_POLY(wrot_1,wrot_2,wrot_3,,,,)
+
+#define QUARTERROUND4_4_POLY(wrot_1,wrot_2,wrot_3,op1,op2,op3,op4,op5,op6) \
+       DST_SRC_4(PLUS, 0, 1); DST_SRC_4(XOR, 3, 0); op1; DST_4(ROTATE, 3, 16); 
\
+       DST_SRC_4(PLUS, 2, 3); op2; DST_SRC_4(XOR, 1, 2); DST_4(ROTATE, 1, 12); 
\
+       op3; DST_SRC_4(PLUS, 0, 1); DST_SRC_4(XOR, 3, 0); op4; DST_4(ROTATE, 3, 
8); \
+       DST_SRC_4(PLUS, 2, 3); op5; DST_SRC_4(XOR, 1, 2); DST_4(ROTATE, 1, 7); \
+       op6; \
+       DST_4(WORD_ROTATE, 3, wrot_3); \
+       DST_4(WORD_ROTATE, 2, wrot_2); \
+       DST_4(WORD_ROTATE, 1, wrot_1);
+
+#define QUARTERROUND4_4(wrot_1,wrot_2,wrot_3) \
+       QUARTERROUND4_4_POLY(wrot_1,wrot_2,wrot_3,,,,,,)
+
+/**********************************************************************
+  4-way && 2-way && 1-way chacha20 ("horizontal")
+ **********************************************************************/
+
+.balign 8
+.globl _gcry_chacha20_s390x_vx_blocks4_2_1
+ELF(.type _gcry_chacha20_s390x_vx_blocks4_2_1,@function;)
+
+_gcry_chacha20_s390x_vx_blocks4_2_1:
+       /* input:
+        *      %r2: input
+        *      %r3: dst
+        *      %r4: src
+        *      %r5: nblks
+        */
+       CFI_STARTPROC();
+
+       START_STACK(%r7);
+       lgr NBLKS, %r5;
+
+       /* Load constants. */
+       larl %r7, .Lconsts;
+       vl TMP0, (.Lwordswap - .Lconsts)(%r7);
+       vl TMP1, (.Lone - .Lconsts)(%r7);
+       vl TMP2, (.Lbswap128 - .Lconsts)(%r7);
+
+       /* Load state. */
+       vlm S0, S3, 0(INPUT);
+       vperm S0, S0, S0, TMP0;
+       vperm S1, S1, S1, TMP0;
+       vperm S2, S2, S2, TMP0;
+       vperm S3, S3, S3, TMP0;
+
+       clgijl NBLKS, 4, .Lloop2;
+
+.balign 4
+.Lloop4:
+       /* Process four chacha20 blocks. */
+       vlr TMP3, S3;
+       lghi ROUND, (20 / 2);
+       vlr A0, S0;
+       vlr A1, S1;
+       vlr A2, S2;
+       vlr A3, TMP3;
+       vag TMP3, TMP3, TMP1;
+       vlr B0, S0;
+       vlr B1, S1;
+       vlr B2, S2;
+       vlr B3, TMP3;
+       vag TMP3, TMP3, TMP1;
+       vlr C0, S0;
+       vlr C1, S1;
+       vlr C2, S2;
+       vlr C3, TMP3;
+       vlr D0, S0;
+       vlr D1, S1;
+       vlr D2, S2;
+       vag D3, TMP3, TMP1;
+
+       slgfi NBLKS, 4;
+
+.balign 4
+.Lround2_4:
+       QUARTERROUND4_4(3, 2, 1);
+       QUARTERROUND4_4(1, 2, 3);
+       brctg ROUND, .Lround2_4;
+
+       vlm IO0, IO7, 0(SRC);
+
+       PLUS(A0, S0);
+       PLUS(A1, S1);
+       PLUS(A2, S2);
+       PLUS(A3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       PLUS(B0, S0);
+       PLUS(B1, S1);
+       PLUS(B2, S2);
+       PLUS(B3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       vperm A0, A0, A0, TMP2;
+       vperm A1, A1, A1, TMP2;
+       vperm A2, A2, A2, TMP2;
+       vperm A3, A3, A3, TMP2;
+       vperm B0, B0, B0, TMP2;
+       vperm B1, B1, B1, TMP2;
+       vperm B2, B2, B2, TMP2;
+       vperm B3, B3, B3, TMP2;
+       PLUS(C0, S0);
+       PLUS(C1, S1);
+       PLUS(C2, S2);
+       PLUS(C3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       PLUS(D0, S0);
+       PLUS(D1, S1);
+       PLUS(D2, S2);
+       PLUS(D3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       vperm C0, C0, C0, TMP2;
+       vperm C1, C1, C1, TMP2;
+       vperm C2, C2, C2, TMP2;
+       vperm C3, C3, C3, TMP2;
+       vperm D0, D0, D0, TMP2;
+       vperm D1, D1, D1, TMP2;
+       vperm D2, D2, D2, TMP2;
+       vperm D3, D3, D3, TMP2;
+
+       XOR(IO0, A0);
+       XOR(IO1, A1);
+       XOR(IO2, A2);
+       XOR(IO3, A3);
+       XOR(IO4, B0);
+       XOR(IO5, B1);
+       XOR(IO6, B2);
+       XOR(IO7, B3);
+       vlm A0, B3, 128(SRC);
+       vstm IO0, IO7, 0(DST);
+       XOR(A0, C0);
+       XOR(A1, C1);
+       XOR(A2, C2);
+       XOR(A3, C3);
+       XOR(B0, D0);
+       XOR(B1, D1);
+       XOR(B2, D2);
+       XOR(B3, D3);
+       vstm A0, B3, 128(DST);
+
+       aghi SRC, 256;
+       aghi DST, 256;
+
+       clgijhe NBLKS, 4, .Lloop4;
+
+       CLEAR(C0);
+       CLEAR(C1);
+       CLEAR(C2);
+       CLEAR(C3);
+       CLEAR(D0);
+       CLEAR(D1);
+       CLEAR(D2);
+       CLEAR(D3);
+
+.balign 4
+.Lloop2:
+       clgijl NBLKS, 2, .Lloop1;
+
+       /* Process two chacha20 blocks. */
+       lghi ROUND, (20 / 2);
+       vlr A0, S0;
+       vlr A1, S1;
+       vlr A2, S2;
+       vlr A3, S3;
+       vlr B0, S0;
+       vlr B1, S1;
+       vlr B2, S2;
+       vag B3, S3, TMP1;
+
+       slgfi NBLKS, 2;
+
+.balign 4
+.Lround2_2:
+       QUARTERROUND4_2(3, 2, 1);
+       QUARTERROUND4_2(1, 2, 3);
+       brctg ROUND, .Lround2_2;
+
+       vlm IO0, IO7, 0(SRC);
+
+       PLUS(A0, S0);
+       PLUS(A1, S1);
+       PLUS(A2, S2);
+       PLUS(A3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       PLUS(B0, S0);
+       PLUS(B1, S1);
+       PLUS(B2, S2);
+       PLUS(B3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       vperm A0, A0, A0, TMP2;
+       vperm A1, A1, A1, TMP2;
+       vperm A2, A2, A2, TMP2;
+       vperm A3, A3, A3, TMP2;
+       vperm B0, B0, B0, TMP2;
+       vperm B1, B1, B1, TMP2;
+       vperm B2, B2, B2, TMP2;
+       vperm B3, B3, B3, TMP2;
+
+       XOR(IO0, A0);
+       XOR(IO1, A1);
+       XOR(IO2, A2);
+       XOR(IO3, A3);
+       XOR(IO4, B0);
+       XOR(IO5, B1);
+       XOR(IO6, B2);
+       XOR(IO7, B3);
+       vstm IO0, IO7, 0(DST);
+
+       aghi SRC, 128;
+       aghi DST, 128;
+
+       clgijhe NBLKS, 2, .Lloop2;
+
+       CLEAR(B0);
+       CLEAR(B1);
+       CLEAR(B2);
+       CLEAR(B3);
+
+.balign 4
+.Lloop1:
+       clgijl NBLKS, 1, .Ldone;
+
+       /* Process one chacha20 block.*/
+       lghi ROUND, (20 / 2);
+       vlr A0, S0;
+       vlr A1, S1;
+       vlr A2, S2;
+       vlr A3, S3;
+
+       slgfi NBLKS, 1;
+
+.balign 4
+.Lround2_1:
+       QUARTERROUND4(3, 2, 1);
+       QUARTERROUND4(1, 2, 3);
+       brct ROUND, .Lround2_1;
+
+       vlm IO0, IO3, 0(SRC);
+
+       PLUS(A0, S0);
+       PLUS(A1, S1);
+       PLUS(A2, S2);
+       PLUS(A3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+
+       vperm A0, A0, A0, TMP2;
+       vperm A1, A1, A1, TMP2;
+       vperm A2, A2, A2, TMP2;
+       vperm A3, A3, A3, TMP2;
+       XOR(IO0, A0);
+       XOR(IO1, A1);
+       XOR(IO2, A2);
+       XOR(IO3, A3);
+       vstm IO0, IO3, 0(DST);
+
+       aghi SRC, 64;
+       aghi DST, 64;
+
+       clgijhe NBLKS, 1, .Lloop1;
+
+.balign 4
+.Ldone:
+       /* Store counter. */
+       vperm S3, S3, S3, TMP0;
+       vst S3, (48)(INPUT);
+
+       /* Clear the used vector registers. */
+       CLEAR(A0);
+       CLEAR(A1);
+       CLEAR(A2);
+       CLEAR(A3);
+       CLEAR(IO0);
+       CLEAR(IO1);
+       CLEAR(IO2);
+       CLEAR(IO3);
+       CLEAR(IO4);
+       CLEAR(IO5);
+       CLEAR(IO6);
+       CLEAR(IO7);
+       CLEAR(TMP0);
+       CLEAR(TMP1);
+       CLEAR(TMP2);
+
+       END_STACK(%r7);
+       xgr %r2, %r2;
+       br %r14;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_s390x_vx_blocks4_2_1,
+    .-_gcry_chacha20_s390x_vx_blocks4_2_1;)
+
+/**********************************************************************
+  4-way && 2-way && 1-way stitched chacha20-poly1305 ("horizontal")
+ **********************************************************************/
+
+.balign 8
+.globl _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1
+ELF(.type _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1,@function;)
+
+_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1:
+       /* input:
+        *       %r2: input
+        *       %r3: dst
+        *       %r4: src
+        *       %r5: nblks
+        *       %r6: poly1305 state
+        * 160(%r15): poly1305 src
+        */
+       CFI_STARTPROC();
+
+       START_STACK(%r14);
+       lgr NBLKS, %r5;
+
+       /* Load constants. */
+       larl %r8, .Lconsts;
+       vl TMP0, (.Lwordswap - .Lconsts)(%r8);
+       vl TMP1, (.Lone - .Lconsts)(%r8);
+       vl TMP2, (.Lbswap128 - .Lconsts)(%r8);
+
+       /* Load state. */
+       vlm S0, S3, 0(INPUT);
+       vperm S0, S0, S0, TMP0;
+       vperm S1, S1, S1, TMP0;
+       vperm S2, S2, S2, TMP0;
+       vperm S3, S3, S3, TMP0;
+
+       /* Store parameters to stack. */
+       stmg %r2, %r6, STACK_INPUT(%r15);
+
+       lgr POLY_RSTATE, %r6;
+       lgr NBLKS, %r5;
+
+       lg POLY_RSRC, 0(%r15);
+       lg POLY_RSRC, 160(POLY_RSRC);
+       stg POLY_RSRC, STACK_POSRC(%r15);
+
+       /* Load poly1305 state */
+       POLY1305_LOAD_STATE();
+
+       clgijl NBLKS, 4, .Lloop2_poly;
+
+.balign 4
+.Lloop4_poly:
+       /* Process four chacha20 blocks and 16 poly1305 blocks. */
+       vlr TMP3, S3;
+       lghi ROUND, (20 / 4);
+       vlr A0, S0;
+       vlr A1, S1;
+       vlr A2, S2;
+       vlr A3, TMP3;
+       vag TMP3, TMP3, TMP1;
+       vlr B0, S0;
+       vlr B1, S1;
+       vlr B2, S2;
+       vlr B3, TMP3;
+       vag TMP3, TMP3, TMP1;
+       vlr C0, S0;
+       vlr C1, S1;
+       vlr C2, S2;
+       vlr C3, TMP3;
+       vlr D0, S0;
+       vlr D1, S1;
+       vlr D2, S2;
+       vag D3, TMP3, TMP1;
+
+       slgfi NBLKS, 4;
+
+.balign 4
+.Lround4_4_poly:
+       /* Total 15 poly1305 blocks processed by this loop. */
+       QUARTERROUND4_4_POLY(3, 2, 1,
+                            POLY1305_BLOCK_PART1(0 * 16),
+                            POLY1305_BLOCK_PART2(),
+                            POLY1305_BLOCK_PART3(),
+                            POLY1305_BLOCK_PART4(),
+                            POLY1305_BLOCK_PART5(),
+                            POLY1305_BLOCK_PART6());
+       QUARTERROUND4_4_POLY(1, 2, 3,
+                            POLY1305_BLOCK_PART7(),
+                            POLY1305_BLOCK_PART8(),
+                            POLY1305_BLOCK_PART1(1 * 16),
+                            POLY1305_BLOCK_PART2(),
+                            POLY1305_BLOCK_PART3(),
+                            POLY1305_BLOCK_PART4());
+       QUARTERROUND4_4_POLY(3, 2, 1,
+                            POLY1305_BLOCK_PART5(),
+                            POLY1305_BLOCK_PART6(),
+                            POLY1305_BLOCK_PART7(),
+                            POLY1305_BLOCK_PART8(),
+                            POLY1305_BLOCK_PART1(2 * 16);
+                             INC_POLY1305_SRC(3 * 16),
+                            POLY1305_BLOCK_PART2());
+       QUARTERROUND4_4_POLY(1, 2, 3,
+                            POLY1305_BLOCK_PART3(),
+                            POLY1305_BLOCK_PART4(),
+                            POLY1305_BLOCK_PART5(),
+                            POLY1305_BLOCK_PART6(),
+                            POLY1305_BLOCK_PART7(),
+                            POLY1305_BLOCK_PART8());
+       brctg ROUND, .Lround4_4_poly;
+
+       POLY1305_BLOCK_PART1(0 * 16);
+       INC_POLY1305_SRC(1 * 16);
+       stg POLY_RSRC, STACK_POSRC(%r15);
+
+       lg %r14, STACK_SRC(%r15);
+       vlm IO0, IO7, 0(%r14);
+
+       PLUS(A0, S0);
+       PLUS(A1, S1);
+       PLUS(A2, S2);
+       PLUS(A3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       POLY1305_BLOCK_PART2();
+       PLUS(B0, S0);
+       PLUS(B1, S1);
+       PLUS(B2, S2);
+       PLUS(B3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       POLY1305_BLOCK_PART3();
+       vperm A0, A0, A0, TMP2;
+       vperm A1, A1, A1, TMP2;
+       vperm A2, A2, A2, TMP2;
+       vperm A3, A3, A3, TMP2;
+       vperm B0, B0, B0, TMP2;
+       vperm B1, B1, B1, TMP2;
+       vperm B2, B2, B2, TMP2;
+       vperm B3, B3, B3, TMP2;
+       POLY1305_BLOCK_PART4();
+       PLUS(C0, S0);
+       PLUS(C1, S1);
+       PLUS(C2, S2);
+       PLUS(C3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       PLUS(D0, S0);
+       PLUS(D1, S1);
+       PLUS(D2, S2);
+       PLUS(D3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       POLY1305_BLOCK_PART5();
+       vperm C0, C0, C0, TMP2;
+       vperm C1, C1, C1, TMP2;
+       vperm C2, C2, C2, TMP2;
+       vperm C3, C3, C3, TMP2;
+       vperm D0, D0, D0, TMP2;
+       vperm D1, D1, D1, TMP2;
+       vperm D2, D2, D2, TMP2;
+       vperm D3, D3, D3, TMP2;
+
+       POLY1305_BLOCK_PART6();
+       XOR(IO0, A0);
+       XOR(IO1, A1);
+       XOR(IO2, A2);
+       XOR(IO3, A3);
+       XOR(IO4, B0);
+       XOR(IO5, B1);
+       XOR(IO6, B2);
+       XOR(IO7, B3);
+       vlm A0, B3, 128(%r14);
+       aghi %r14, 256;
+       stg %r14, STACK_SRC(%r15);
+
+       lg %r14, STACK_DST(%r15);
+       POLY1305_BLOCK_PART7();
+       vstm IO0, IO7, 0(%r14);
+       XOR(A0, C0);
+       XOR(A1, C1);
+       XOR(A2, C2);
+       XOR(A3, C3);
+       XOR(B0, D0);
+       XOR(B1, D1);
+       XOR(B2, D2);
+       XOR(B3, D3);
+       POLY1305_BLOCK_PART8();
+       vstm A0, B3, 128(%r14);
+       aghi %r14, 256;
+       stg %r14, STACK_DST(%r15);
+
+       lg POLY_RSRC, STACK_POSRC(%r15);
+
+       clgijhe NBLKS, 4, .Lloop4_poly;
+
+       CLEAR(C0);
+       CLEAR(C1);
+       CLEAR(C2);
+       CLEAR(C3);
+       CLEAR(D0);
+       CLEAR(D1);
+       CLEAR(D2);
+       CLEAR(D3);
+
+.balign 4
+.Lloop2_poly:
+       clgijl NBLKS, 2, .Lloop1_poly;
+
+       /* Process two chacha20 and eight poly1305 blocks. */
+       lghi ROUND, ((20 - 4) / 2);
+       vlr A0, S0;
+       vlr A1, S1;
+       vlr A2, S2;
+       vlr A3, S3;
+       vlr B0, S0;
+       vlr B1, S1;
+       vlr B2, S2;
+       vag B3, S3, TMP1;
+
+       slgfi NBLKS, 2;
+
+.balign 4
+.Lround4_2_poly:
+       /* Total eight poly1305 blocks processed by this loop. */
+       QUARTERROUND4_2_POLY(3, 2, 1,
+                            POLY1305_BLOCK_PART1(0 * 16),
+                            POLY1305_BLOCK_PART2(),
+                            POLY1305_BLOCK_PART3(),
+                            POLY1305_BLOCK_PART4());
+                            INC_POLY1305_SRC(1 * 16);
+       QUARTERROUND4_2_POLY(1, 2, 3,
+                            POLY1305_BLOCK_PART5(),
+                            POLY1305_BLOCK_PART6(),
+                            POLY1305_BLOCK_PART7(),
+                            POLY1305_BLOCK_PART8());
+       brctg ROUND, .Lround4_2_poly;
+
+       stg POLY_RSRC, STACK_POSRC(%r15);
+       lg %r14, STACK_SRC(%r15);
+
+       QUARTERROUND4_2(3, 2, 1);
+       QUARTERROUND4_2(1, 2, 3);
+       QUARTERROUND4_2(3, 2, 1);
+       QUARTERROUND4_2(1, 2, 3);
+
+       vlm IO0, IO7, 0(%r14);
+       aghi %r14, 128;
+       stg %r14, STACK_SRC(%r15);
+
+       PLUS(A0, S0);
+       PLUS(A1, S1);
+       PLUS(A2, S2);
+       PLUS(A3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       PLUS(B0, S0);
+       PLUS(B1, S1);
+       PLUS(B2, S2);
+       PLUS(B3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+       vperm A0, A0, A0, TMP2;
+       vperm A1, A1, A1, TMP2;
+       vperm A2, A2, A2, TMP2;
+       vperm A3, A3, A3, TMP2;
+       vperm B0, B0, B0, TMP2;
+       vperm B1, B1, B1, TMP2;
+       vperm B2, B2, B2, TMP2;
+       vperm B3, B3, B3, TMP2;
+
+       lg %r14, STACK_DST(%r15);
+       XOR(IO0, A0);
+       XOR(IO1, A1);
+       XOR(IO2, A2);
+       XOR(IO3, A3);
+       XOR(IO4, B0);
+       XOR(IO5, B1);
+       XOR(IO6, B2);
+       XOR(IO7, B3);
+       vstm IO0, IO7, 0(%r14);
+       aghi %r14, 128;
+       stg %r14, STACK_DST(%r15);
+
+       lg POLY_RSRC, STACK_POSRC(%r15);
+
+       clgijhe NBLKS, 2, .Lloop2_poly;
+
+       CLEAR(B0);
+       CLEAR(B1);
+       CLEAR(B2);
+       CLEAR(B3);
+
+.balign 4
+.Lloop1_poly:
+       clgijl NBLKS, 1, .Ldone_poly;
+
+       /* Process one chacha20 block and four poly1305 blocks.*/
+       lghi ROUND, ((20 - 4) / 4);
+       vlr A0, S0;
+       vlr A1, S1;
+       vlr A2, S2;
+       vlr A3, S3;
+
+       slgfi NBLKS, 1;
+
+.balign 4
+.Lround4_1_poly:
+       /* Total four poly1305 blocks processed by this loop. */
+       QUARTERROUND4_POLY(3, 2, 1,
+                          POLY1305_BLOCK_PART1(0 * 16),
+                          POLY1305_BLOCK_PART2());
+                          INC_POLY1305_SRC(1 * 16);
+       QUARTERROUND4_POLY(1, 2, 3,
+                          POLY1305_BLOCK_PART3(),
+                          POLY1305_BLOCK_PART4());
+       QUARTERROUND4_POLY(3, 2, 1,
+                          POLY1305_BLOCK_PART5(),
+                          POLY1305_BLOCK_PART6());
+       QUARTERROUND4_POLY(1, 2, 3,
+                          POLY1305_BLOCK_PART7(),
+                          POLY1305_BLOCK_PART8());
+       brct ROUND, .Lround4_1_poly;
+
+       stg POLY_RSRC, STACK_POSRC(%r15);
+       lg %r14, STACK_SRC(%r15);
+
+       QUARTERROUND4(3, 2, 1);
+       QUARTERROUND4(1, 2, 3);
+       QUARTERROUND4(3, 2, 1);
+       QUARTERROUND4(1, 2, 3);
+
+       vlm IO0, IO3, 0(%r14);
+       aghi %r14, 64;
+       stg %r14, STACK_SRC(%r15);
+
+       PLUS(A0, S0);
+       PLUS(A1, S1);
+       PLUS(A2, S2);
+       PLUS(A3, S3);
+       vag S3, S3, TMP1; /* Update counter. */
+
+       lg %r14, STACK_DST(%r15);
+       vperm A0, A0, A0, TMP2;
+       vperm A1, A1, A1, TMP2;
+       vperm A2, A2, A2, TMP2;
+       vperm A3, A3, A3, TMP2;
+       XOR(IO0, A0);
+       XOR(IO1, A1);
+       XOR(IO2, A2);
+       XOR(IO3, A3);
+       vstm IO0, IO3, 0(%r14);
+       aghi %r14, 64;
+       stg %r14, STACK_DST(%r15);
+
+       lg POLY_RSRC, STACK_POSRC(%r15);
+
+       clgijhe NBLKS, 1, .Lloop1_poly;
+
+.balign 4
+.Ldone_poly:
+       /* Store poly1305 state */
+       lg POLY_RSTATE, STACK_POCTX(%r15);
+       POLY1305_STORE_STATE();
+
+       /* Store counter. */
+       lg INPUT, STACK_INPUT(%r15);
+       vperm S3, S3, S3, TMP0;
+       vst S3, (48)(INPUT);
+
+       /* Clear the used vector registers. */
+       CLEAR(A0);
+       CLEAR(A1);
+       CLEAR(A2);
+       CLEAR(A3);
+       CLEAR(IO0);
+       CLEAR(IO1);
+       CLEAR(IO2);
+       CLEAR(IO3);
+       CLEAR(IO4);
+       CLEAR(IO5);
+       CLEAR(IO6);
+       CLEAR(IO7);
+       CLEAR(TMP0);
+       CLEAR(TMP1);
+       CLEAR(TMP2);
+
+       END_STACK(%r14);
+       xgr %r2, %r2;
+       br %r14;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1,
+    .-_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1;)
+
+/**********************************************************************
+  8-way chacha20 ("vertical")
+ **********************************************************************/
+
+#define QUARTERROUND4_V8_POLY(x0,x1,x2,x3,x4,x5,x6,x7,\
+                             x8,x9,x10,x11,x12,x13,x14,x15,\
+                             y0,y1,y2,y3,y4,y5,y6,y7,\
+                             y8,y9,y10,y11,y12,y13,y14,y15,\
+                             op1,op2,op3,op4,op5,op6,op7,op8,\
+                             op9,op10,op11,op12) \
+       op1;                                                    \
+       PLUS(x0, x1); PLUS(x4, x5);                             \
+       PLUS(x8, x9); PLUS(x12, x13);                           \
+       PLUS(y0, y1); PLUS(y4, y5);                             \
+       PLUS(y8, y9); PLUS(y12, y13);                           \
+           op2;                                                \
+           XOR(x3, x0);  XOR(x7, x4);                          \
+           XOR(x11, x8); XOR(x15, x12);                        \
+           XOR(y3, y0);  XOR(y7, y4);                          \
+           XOR(y11, y8); XOR(y15, y12);                        \
+               op3;                                            \
+               ROTATE(x3, 16); ROTATE(x7, 16);                 \
+               ROTATE(x11, 16); ROTATE(x15, 16);               \
+               ROTATE(y3, 16); ROTATE(y7, 16);                 \
+               ROTATE(y11, 16); ROTATE(y15, 16);               \
+       op4;                                                    \
+       PLUS(x2, x3); PLUS(x6, x7);                             \
+       PLUS(x10, x11); PLUS(x14, x15);                         \
+       PLUS(y2, y3); PLUS(y6, y7);                             \
+       PLUS(y10, y11); PLUS(y14, y15);                         \
+           op5;                                                \
+           XOR(x1, x2); XOR(x5, x6);                           \
+           XOR(x9, x10); XOR(x13, x14);                        \
+           XOR(y1, y2); XOR(y5, y6);                           \
+           XOR(y9, y10); XOR(y13, y14);                        \
+               op6;                                            \
+               ROTATE(x1,12); ROTATE(x5,12);                   \
+               ROTATE(x9,12); ROTATE(x13,12);                  \
+               ROTATE(y1,12); ROTATE(y5,12);                   \
+               ROTATE(y9,12); ROTATE(y13,12);                  \
+       op7;                                                    \
+       PLUS(x0, x1); PLUS(x4, x5);                             \
+       PLUS(x8, x9); PLUS(x12, x13);                           \
+       PLUS(y0, y1); PLUS(y4, y5);                             \
+       PLUS(y8, y9); PLUS(y12, y13);                           \
+           op8;                                                \
+           XOR(x3, x0); XOR(x7, x4);                           \
+           XOR(x11, x8); XOR(x15, x12);                        \
+           XOR(y3, y0); XOR(y7, y4);                           \
+           XOR(y11, y8); XOR(y15, y12);                        \
+               op9;                                            \
+               ROTATE(x3,8); ROTATE(x7,8);                     \
+               ROTATE(x11,8); ROTATE(x15,8);                   \
+               ROTATE(y3,8); ROTATE(y7,8);                     \
+               ROTATE(y11,8); ROTATE(y15,8);                   \
+       op10;                                                   \
+       PLUS(x2, x3); PLUS(x6, x7);                             \
+       PLUS(x10, x11); PLUS(x14, x15);                         \
+       PLUS(y2, y3); PLUS(y6, y7);                             \
+       PLUS(y10, y11); PLUS(y14, y15);                         \
+           op11;                                               \
+           XOR(x1, x2); XOR(x5, x6);                           \
+           XOR(x9, x10); XOR(x13, x14);                        \
+           XOR(y1, y2); XOR(y5, y6);                           \
+           XOR(y9, y10); XOR(y13, y14);                        \
+               op12;                                           \
+               ROTATE(x1,7); ROTATE(x5,7);                     \
+               ROTATE(x9,7); ROTATE(x13,7);                    \
+               ROTATE(y1,7); ROTATE(y5,7);                     \
+               ROTATE(y9,7); ROTATE(y13,7);
+
+#define 
QUARTERROUND4_V8(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,\
+                        y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15) 
\
+       QUARTERROUND4_V8_POLY(x0,x1,x2,x3,x4,x5,x6,x7,\
+                             x8,x9,x10,x11,x12,x13,x14,x15,\
+                             y0,y1,y2,y3,y4,y5,y6,y7,\
+                             y8,y9,y10,y11,y12,y13,y14,y15,\
+                             ,,,,,,,,,,,)
+
+#define TRANSPOSE_4X4_2(v0,v1,v2,v3,va,vb,vc,vd,tmp0,tmp1,tmp2,tmpa,tmpb,tmpc) 
\
+         vmrhf tmp0, v0, v1;                                   \
+         vmrhf tmp1, v2, v3;                                   \
+         vmrlf tmp2, v0, v1;                                   \
+         vmrlf   v3, v2, v3;                                   \
+         vmrhf tmpa, va, vb;                                   \
+         vmrhf tmpb, vc, vd;                                   \
+         vmrlf tmpc, va, vb;                                   \
+         vmrlf   vd, vc, vd;                                   \
+         vpdi v0, tmp0, tmp1, 0;                               \
+         vpdi v1, tmp0, tmp1, 5;                               \
+         vpdi v2, tmp2,   v3, 0;                               \
+         vpdi v3, tmp2,   v3, 5;                               \
+         vpdi va, tmpa, tmpb, 0;                               \
+         vpdi vb, tmpa, tmpb, 5;                               \
+         vpdi vc, tmpc,   vd, 0;                               \
+         vpdi vd, tmpc,   vd, 5;
+
+.balign 8
+.globl _gcry_chacha20_s390x_vx_blocks8
+ELF(.type _gcry_chacha20_s390x_vx_blocks8,@function;)
+
+_gcry_chacha20_s390x_vx_blocks8:
+       /* input:
+        *      %r2: input
+        *      %r3: dst
+        *      %r4: src
+        *      %r5: nblks (multiple of 8)
+        */
+       CFI_STARTPROC();
+
+       START_STACK(%r8);
+       lgr NBLKS, %r5;
+
+       larl %r7, .Lconsts;
+
+       /* Load counter. */
+       lg %r8, (12 * 4)(INPUT);
+       rllg %r8, %r8, 32;
+
+.balign 4
+       /* Process eight chacha20 blocks per loop. */
+.Lloop8:
+       vlm Y0, Y3, 0(INPUT);
+
+       slgfi NBLKS, 8;
+       lghi ROUND, (20 / 2);
+
+       /* Construct counter vectors X12/X13 & Y12/Y13. */
+       vl X4, (.Ladd_counter_0123 - .Lconsts)(%r7);
+       vl Y4, (.Ladd_counter_4567 - .Lconsts)(%r7);
+       vrepf Y12, Y3, 0;
+       vrepf Y13, Y3, 1;
+       vaccf X5, Y12, X4;
+       vaccf Y5, Y12, Y4;
+       vaf X12, Y12, X4;
+       vaf Y12, Y12, Y4;
+       vaf X13, Y13, X5;
+       vaf Y13, Y13, Y5;
+
+       vrepf X0, Y0, 0;
+       vrepf X1, Y0, 1;
+       vrepf X2, Y0, 2;
+       vrepf X3, Y0, 3;
+       vrepf X4, Y1, 0;
+       vrepf X5, Y1, 1;
+       vrepf X6, Y1, 2;
+       vrepf X7, Y1, 3;
+       vrepf X8, Y2, 0;
+       vrepf X9, Y2, 1;
+       vrepf X10, Y2, 2;
+       vrepf X11, Y2, 3;
+       vrepf X14, Y3, 2;
+       vrepf X15, Y3, 3;
+
+       /* Store counters for blocks 0-7. */
+       vstm X12, X13, (STACK_CTR + 0 * 16)(%r15);
+       vstm Y12, Y13, (STACK_CTR + 2 * 16)(%r15);
+
+       vlr Y0, X0;
+       vlr Y1, X1;
+       vlr Y2, X2;
+       vlr Y3, X3;
+       vlr Y4, X4;
+       vlr Y5, X5;
+       vlr Y6, X6;
+       vlr Y7, X7;
+       vlr Y8, X8;
+       vlr Y9, X9;
+       vlr Y10, X10;
+       vlr Y11, X11;
+       vlr Y14, X14;
+       vlr Y15, X15;
+
+       /* Update and store counter. */
+       agfi %r8, 8;
+       rllg %r5, %r8, 32;
+       stg %r5, (12 * 4)(INPUT);
+
+.balign 4
+.Lround2_8:
+       QUARTERROUND4_V8(X0, X4,  X8, X12,   X1, X5,  X9, X13,
+                        X2, X6, X10, X14,   X3, X7, X11, X15,
+                        Y0, Y4,  Y8, Y12,   Y1, Y5,  Y9, Y13,
+                        Y2, Y6, Y10, Y14,   Y3, Y7, Y11, Y15);
+       QUARTERROUND4_V8(X0, X5, X10, X15,   X1, X6, X11, X12,
+                        X2, X7,  X8, X13,   X3, X4,  X9, X14,
+                        Y0, Y5, Y10, Y15,   Y1, Y6, Y11, Y12,
+                        Y2, Y7,  Y8, Y13,   Y3, Y4,  Y9, Y14);
+       brctg ROUND, .Lround2_8;
+
+       /* Store blocks 4-7. */
+       vstm Y0, Y15, STACK_Y0_Y15(%r15);
+
+       /* Load counters for blocks 0-3. */
+       vlm Y0, Y1, (STACK_CTR + 0 * 16)(%r15);
+
+       lghi ROUND, 1;
+       j .Lfirst_output_4blks_8;
+
+.balign 4
+.Lsecond_output_4blks_8:
+       /* Load blocks 4-7. */
+       vlm X0, X15, STACK_Y0_Y15(%r15);
+
+       /* Load counters for blocks 4-7. */
+       vlm Y0, Y1, (STACK_CTR + 2 * 16)(%r15);
+
+       lghi ROUND, 0;
+
+.balign 4
+       /* Output four chacha20 blocks per loop. */
+.Lfirst_output_4blks_8:
+       vlm Y12, Y15, 0(INPUT);
+       PLUS(X12, Y0);
+       PLUS(X13, Y1);
+       vrepf Y0, Y12, 0;
+       vrepf Y1, Y12, 1;
+       vrepf Y2, Y12, 2;
+       vrepf Y3, Y12, 3;
+       vrepf Y4, Y13, 0;
+       vrepf Y5, Y13, 1;
+       vrepf Y6, Y13, 2;
+       vrepf Y7, Y13, 3;
+       vrepf Y8, Y14, 0;
+       vrepf Y9, Y14, 1;
+       vrepf Y10, Y14, 2;
+       vrepf Y11, Y14, 3;
+       vrepf Y14, Y15, 2;
+       vrepf Y15, Y15, 3;
+       PLUS(X0, Y0);
+       PLUS(X1, Y1);
+       PLUS(X2, Y2);
+       PLUS(X3, Y3);
+       PLUS(X4, Y4);
+       PLUS(X5, Y5);
+       PLUS(X6, Y6);
+       PLUS(X7, Y7);
+       PLUS(X8, Y8);
+       PLUS(X9, Y9);
+       PLUS(X10, Y10);
+       PLUS(X11, Y11);
+       PLUS(X14, Y14);
+       PLUS(X15, Y15);
+
+       vl Y15, (.Lbswap32 - .Lconsts)(%r7);
+       TRANSPOSE_4X4_2(X0, X1, X2, X3, X4, X5, X6, X7,
+                       Y9, Y10, Y11, Y12, Y13, Y14);
+       TRANSPOSE_4X4_2(X8, X9, X10, X11, X12, X13, X14, X15,
+                       Y9, Y10, Y11, Y12, Y13, Y14);
+
+       vlm Y0, Y14, 0(SRC);
+       vperm X0, X0, X0, Y15;
+       vperm X1, X1, X1, Y15;
+       vperm X2, X2, X2, Y15;
+       vperm X3, X3, X3, Y15;
+       vperm X4, X4, X4, Y15;
+       vperm X5, X5, X5, Y15;
+       vperm X6, X6, X6, Y15;
+       vperm X7, X7, X7, Y15;
+       vperm X8, X8, X8, Y15;
+       vperm X9, X9, X9, Y15;
+       vperm X10, X10, X10, Y15;
+       vperm X11, X11, X11, Y15;
+       vperm X12, X12, X12, Y15;
+       vperm X13, X13, X13, Y15;
+       vperm X14, X14, X14, Y15;
+       vperm X15, X15, X15, Y15;
+       vl Y15, (15 * 16)(SRC);
+
+       XOR(Y0, X0);
+       XOR(Y1, X4);
+       XOR(Y2, X8);
+       XOR(Y3, X12);
+       XOR(Y4, X1);
+       XOR(Y5, X5);
+       XOR(Y6, X9);
+       XOR(Y7, X13);
+       XOR(Y8, X2);
+       XOR(Y9, X6);
+       XOR(Y10, X10);
+       XOR(Y11, X14);
+       XOR(Y12, X3);
+       XOR(Y13, X7);
+       XOR(Y14, X11);
+       XOR(Y15, X15);
+       vstm Y0, Y15, 0(DST);
+
+       aghi SRC, 256;
+       aghi DST, 256;
+
+       clgije ROUND, 1, .Lsecond_output_4blks_8;
+
+       clgijhe NBLKS, 8, .Lloop8;
+
+       /* Clear the used vector registers. */
+       DST_8(CLEAR, 0, _);
+       DST_8(CLEAR, 1, _);
+       DST_8(CLEAR, 2, _);
+       DST_8(CLEAR, 3, _);
+
+       /* Clear sensitive data in stack. */
+       vlm Y0, Y15, STACK_Y0_Y15(%r15);
+       vlm Y0, Y3, STACK_CTR(%r15);
+
+       END_STACK(%r8);
+       xgr %r2, %r2;
+       br %r14;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_s390x_vx_blocks8,
+    .-_gcry_chacha20_s390x_vx_blocks8;)
+
+/**********************************************************************
+  8-way stitched chacha20-poly1305 ("vertical")
+ **********************************************************************/
+
+.balign 8
+.globl _gcry_chacha20_poly1305_s390x_vx_blocks8
+ELF(.type _gcry_chacha20_poly1305_s390x_vx_blocks8,@function;)
+
+_gcry_chacha20_poly1305_s390x_vx_blocks8:
+       /* input:
+        *       %r2: input
+        *       %r3: dst
+        *       %r4: src
+        *       %r5: nblks (multiple of 8)
+        *       %r6: poly1305 state
+        * 160(%r15): poly1305 src
+        */
+       CFI_STARTPROC();
+
+       START_STACK(%r14);
+
+       /* Store parameters to stack. */
+       stmg %r2, %r6, STACK_INPUT(%r15);
+
+       lgr POLY_RSTATE, %r6;
+       lgr NBLKS, %r5;
+
+       lg POLY_RSRC, 0(%r15);
+       lg POLY_RSRC, 160(POLY_RSRC);
+       stg POLY_RSRC, STACK_POSRC(%r15);
+
+       /* Load poly1305 state */
+       POLY1305_LOAD_STATE();
+
+.balign 4
+       /* Process eight chacha20 blocks and 32 poly1305 blocks per loop. */
+.Lloop8_poly:
+       lg INPUT, STACK_INPUT(%r15);
+       larl %r8, .Lconsts;
+
+       vlm Y0, Y3, 0(INPUT);
+
+       slgfi NBLKS, 8;
+       lghi ROUND, (20 / 2);
+
+       /* Construct counter vectors X12/X13 & Y12/Y13. */
+       vl X4, (.Ladd_counter_0123 - .Lconsts)(%r8);
+       vl Y4, (.Ladd_counter_4567 - .Lconsts)(%r8);
+       lg %r8, (12 * 4)(INPUT); /* Update counter. */
+       vrepf Y12, Y3, 0;
+       vrepf Y13, Y3, 1;
+       vaccf X5, Y12, X4;
+       vaccf Y5, Y12, Y4;
+       vaf X12, Y12, X4;
+       vaf Y12, Y12, Y4;
+       vaf X13, Y13, X5;
+       vaf Y13, Y13, Y5;
+       rllg %r8, %r8, 32;
+
+       vrepf X0, Y0, 0;
+       vrepf X1, Y0, 1;
+       vrepf X2, Y0, 2;
+       vrepf X3, Y0, 3;
+       vrepf X4, Y1, 0;
+       vrepf X5, Y1, 1;
+       vrepf X6, Y1, 2;
+       vrepf X7, Y1, 3;
+       vrepf X8, Y2, 0;
+       vrepf X9, Y2, 1;
+       vrepf X10, Y2, 2;
+       vrepf X11, Y2, 3;
+       vrepf X14, Y3, 2;
+       vrepf X15, Y3, 3;
+       agfi %r8, 8;
+
+       /* Store counters for blocks 0-7. */
+       vstm X12, X13, (STACK_CTR + 0 * 16)(%r15);
+       vstm Y12, Y13, (STACK_CTR + 2 * 16)(%r15);
+       rllg %r8, %r8, 32;
+
+       vlr Y0, X0;
+       vlr Y1, X1;
+       vlr Y2, X2;
+       vlr Y3, X3;
+       vlr Y4, X4;
+       vlr Y5, X5;
+       vlr Y6, X6;
+       vlr Y7, X7;
+       vlr Y8, X8;
+       vlr Y9, X9;
+       vlr Y10, X10;
+       vlr Y11, X11;
+       vlr Y14, X14;
+       vlr Y15, X15;
+       stg %r8, (12 * 4)(INPUT);
+
+.balign 4
+.Lround2_8_poly:
+       /* Total 30 poly1305 blocks processed by this loop. */
+       QUARTERROUND4_V8_POLY(X0, X4,  X8, X12,   X1, X5,  X9, X13,
+                             X2, X6, X10, X14,   X3, X7, X11, X15,
+                             Y0, Y4,  Y8, Y12,   Y1, Y5,  Y9, Y13,
+                             Y2, Y6, Y10, Y14,   Y3, Y7, Y11, Y15,
+                             POLY1305_BLOCK_PART1(0 * 16),
+                             POLY1305_BLOCK_PART2(),
+                             POLY1305_BLOCK_PART3(),
+                             POLY1305_BLOCK_PART4(),
+                             POLY1305_BLOCK_PART5(),
+                             POLY1305_BLOCK_PART6(),
+                             POLY1305_BLOCK_PART7(),
+                             POLY1305_BLOCK_PART8(),
+                             POLY1305_BLOCK_PART1(1 * 16),
+                             POLY1305_BLOCK_PART2(),
+                             POLY1305_BLOCK_PART3(),
+                             POLY1305_BLOCK_PART4());
+       QUARTERROUND4_V8_POLY(X0, X5, X10, X15,   X1, X6, X11, X12,
+                             X2, X7,  X8, X13,   X3, X4,  X9, X14,
+                             Y0, Y5, Y10, Y15,   Y1, Y6, Y11, Y12,
+                             Y2, Y7,  Y8, Y13,   Y3, Y4,  Y9, Y14,
+                             POLY1305_BLOCK_PART5(),
+                             POLY1305_BLOCK_PART6(),
+                             POLY1305_BLOCK_PART7(),
+                             POLY1305_BLOCK_PART8(),
+                             POLY1305_BLOCK_PART1(2 * 16);
+                               INC_POLY1305_SRC(3 * 16),
+                             POLY1305_BLOCK_PART2(),
+                             POLY1305_BLOCK_PART3(),
+                             POLY1305_BLOCK_PART4(),
+                             POLY1305_BLOCK_PART5(),
+                             POLY1305_BLOCK_PART6(),
+                             POLY1305_BLOCK_PART7(),
+                             POLY1305_BLOCK_PART8());
+       brctg ROUND, .Lround2_8_poly;
+
+       POLY1305_BLOCK_PART1(0 * 16);
+
+       /* Store blocks 4-7. */
+       vstm Y0, Y15, STACK_Y0_Y15(%r15);
+
+       /* Load counters for blocks 0-3. */
+       vlm Y0, Y1, (STACK_CTR + 0 * 16)(%r15);
+
+       stg POLY_RSRC, STACK_POSRC(%r15); /* %r14 used for INPUT/SRC/DST 
pointer. */
+
+       lghi ROUND, 1;
+       j .Lfirst_output_4blks_8_poly;
+
+.balign 4
+.Lsecond_output_4blks_8_poly:
+
+       POLY1305_BLOCK_PART1(1 * 16);
+
+       /* Load blocks 4-7. */
+       vlm X0, X15, STACK_Y0_Y15(%r15);
+
+       /* Load counters for blocks 4-7. */
+       vlm Y0, Y1, (STACK_CTR + 2 * 16)(%r15);
+
+       INC_POLY1305_SRC(2 * 16);
+       stg POLY_RSRC, STACK_POSRC(%r15); /* %r14 used for INPUT/SRC/DST 
pointer. */
+
+       lghi ROUND, 0;
+
+.balign 4
+       /* Output four chacha20 blocks and one poly1305 block per loop. */
+.Lfirst_output_4blks_8_poly:
+       lg %r14, STACK_INPUT(%r15);
+       vlm Y12, Y15, 0(%r14);
+       POLY1305_BLOCK_PART2();
+       PLUS(X12, Y0);
+       PLUS(X13, Y1);
+       vrepf Y0, Y12, 0;
+       vrepf Y1, Y12, 1;
+       vrepf Y2, Y12, 2;
+       vrepf Y3, Y12, 3;
+       vrepf Y4, Y13, 0;
+       vrepf Y5, Y13, 1;
+       vrepf Y6, Y13, 2;
+       vrepf Y7, Y13, 3;
+       vrepf Y8, Y14, 0;
+       vrepf Y9, Y14, 1;
+       vrepf Y10, Y14, 2;
+       vrepf Y11, Y14, 3;
+       vrepf Y14, Y15, 2;
+       vrepf Y15, Y15, 3;
+       POLY1305_BLOCK_PART3();
+       PLUS(X0, Y0);
+       PLUS(X1, Y1);
+       PLUS(X2, Y2);
+       PLUS(X3, Y3);
+       PLUS(X4, Y4);
+       PLUS(X5, Y5);
+       PLUS(X6, Y6);
+       PLUS(X7, Y7);
+       PLUS(X8, Y8);
+       PLUS(X9, Y9);
+       PLUS(X10, Y10);
+       PLUS(X11, Y11);
+       PLUS(X14, Y14);
+       PLUS(X15, Y15);
+       POLY1305_BLOCK_PART4();
+
+       larl %r14, .Lconsts;
+       vl Y15, (.Lbswap32 - .Lconsts)(%r14);
+       TRANSPOSE_4X4_2(X0, X1, X2, X3, X4, X5, X6, X7,
+                       Y9, Y10, Y11, Y12, Y13, Y14);
+       lg %r14, STACK_SRC(%r15);
+       POLY1305_BLOCK_PART5();
+       TRANSPOSE_4X4_2(X8, X9, X10, X11, X12, X13, X14, X15,
+                       Y9, Y10, Y11, Y12, Y13, Y14);
+
+       vlm Y0, Y14, 0(%r14);
+       POLY1305_BLOCK_PART6();
+       vperm X0, X0, X0, Y15;
+       vperm X1, X1, X1, Y15;
+       vperm X2, X2, X2, Y15;
+       vperm X3, X3, X3, Y15;
+       vperm X4, X4, X4, Y15;
+       vperm X5, X5, X5, Y15;
+       vperm X6, X6, X6, Y15;
+       vperm X7, X7, X7, Y15;
+       vperm X8, X8, X8, Y15;
+       vperm X9, X9, X9, Y15;
+       vperm X10, X10, X10, Y15;
+       vperm X11, X11, X11, Y15;
+       vperm X12, X12, X12, Y15;
+       vperm X13, X13, X13, Y15;
+       vperm X14, X14, X14, Y15;
+       vperm X15, X15, X15, Y15;
+       vl Y15, (15 * 16)(%r14);
+       POLY1305_BLOCK_PART7();
+
+       aghi %r14, 256;
+       stg %r14, STACK_SRC(%r15);
+       lg %r14, STACK_DST(%r15);
+
+       XOR(Y0, X0);
+       XOR(Y1, X4);
+       XOR(Y2, X8);
+       XOR(Y3, X12);
+       XOR(Y4, X1);
+       XOR(Y5, X5);
+       XOR(Y6, X9);
+       XOR(Y7, X13);
+       XOR(Y8, X2);
+       XOR(Y9, X6);
+       XOR(Y10, X10);
+       XOR(Y11, X14);
+       XOR(Y12, X3);
+       XOR(Y13, X7);
+       XOR(Y14, X11);
+       XOR(Y15, X15);
+       POLY1305_BLOCK_PART8();
+       vstm Y0, Y15, 0(%r14);
+
+       aghi %r14, 256;
+       stg %r14, STACK_DST(%r15);
+
+       lg POLY_RSRC, STACK_POSRC(%r15);
+
+       clgije ROUND, 1, .Lsecond_output_4blks_8_poly;
+
+       clgijhe NBLKS, 8, .Lloop8_poly;
+
+       /* Store poly1305 state */
+       lg POLY_RSTATE, STACK_POCTX(%r15);
+       POLY1305_STORE_STATE();
+
+       /* Clear the used vector registers */
+       DST_8(CLEAR, 0, _);
+       DST_8(CLEAR, 1, _);
+       DST_8(CLEAR, 2, _);
+       DST_8(CLEAR, 3, _);
+
+       /* Clear sensitive data in stack. */
+       vlm Y0, Y15, STACK_Y0_Y15(%r15);
+       vlm Y0, Y3, STACK_CTR(%r15);
+
+       END_STACK(%r14);
+       xgr %r2, %r2;
+       br %r14;
+       CFI_ENDPROC();
+ELF(.size _gcry_chacha20_poly1305_s390x_vx_blocks8,
+    .-_gcry_chacha20_poly1305_s390x_vx_blocks8;)
+
+#endif /*HAVE_GCC_INLINE_ASM_S390X_VX*/
+#endif /*__s390x__*/
diff --git a/grub-core/lib/libgcrypt/cipher/chacha20.c 
b/grub-core/lib/libgcrypt/cipher/chacha20.c
new file mode 100644
index 000000000..497594a0b
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/chacha20.c
@@ -0,0 +1,1306 @@
+/* chacha20.c  -  Bernstein's ChaCha20 cipher
+ * Copyright (C) 2014,2017-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * For a description of the algorithm, see:
+ *   http://cr.yp.to/chacha.html
+ */
+
+/*
+ * Based on D. J. Bernstein reference implementation at
+ * http://cr.yp.to/chacha.html:
+ *
+ * chacha-regs.c version 20080118
+ * D. J. Bernstein
+ * Public domain.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+
+#define CHACHA20_MIN_KEY_SIZE 16        /* Bytes.  */
+#define CHACHA20_MAX_KEY_SIZE 32        /* Bytes.  */
+#define CHACHA20_BLOCK_SIZE   64        /* Bytes.  */
+#define CHACHA20_MIN_IV_SIZE   8        /* Bytes.  */
+#define CHACHA20_MAX_IV_SIZE  12        /* Bytes.  */
+#define CHACHA20_CTR_SIZE     16        /* Bytes.  */
+
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+   (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+/* USE_ARMV7_NEON indicates whether to enable ARMv7 NEON assembly code. */
+#undef USE_ARMV7_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_NEON)
+#  define USE_ARMV7_NEON 1
+# endif
+#endif
+
+/* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly
+ * code. */
+#undef USE_AARCH64_SIMD
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(__AARCH64EL__) \
+       && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+       && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON)
+#  define USE_AARCH64_SIMD 1
+# endif
+#endif
+
+/* USE_PPC_VEC indicates whether to enable PowerPC vector
+ * accelerated code. */
+#undef USE_PPC_VEC
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+#  if __GNUC__ >= 4
+#   define USE_PPC_VEC 1
+#  endif
+# endif
+#endif
+
+/* USE_S390X_VX indicates whether to enable zSeries code. */
+#undef USE_S390X_VX
+#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+# if defined(HAVE_GCC_INLINE_ASM_S390X_VX)
+#  define USE_S390X_VX 1
+# endif /* USE_S390X_VX */
+#endif
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+#else
+# define ASM_FUNC_ABI
+#endif
+
+
+typedef struct CHACHA20_context_s
+{
+  u32 input[16];
+  unsigned char pad[CHACHA20_BLOCK_SIZE];
+  unsigned int unused; /* bytes in the pad.  */
+  unsigned int use_ssse3:1;
+  unsigned int use_avx2:1;
+  unsigned int use_neon:1;
+  unsigned int use_ppc:1;
+  unsigned int use_s390x:1;
+} CHACHA20_context_t;
+
+
+#ifdef USE_SSSE3
+
+unsigned int _gcry_chacha20_amd64_ssse3_blocks4(u32 *state, byte *dst,
+                                               const byte *src,
+                                               size_t nblks) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_amd64_ssse3_blocks1(u32 *state, byte *dst,
+                                               const byte *src,
+                                               size_t nblks) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks4(
+               u32 *state, byte *dst, const byte *src, size_t nblks,
+               void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks1(
+               u32 *state, byte *dst, const byte *src, size_t nblks,
+               void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI;
+
+#endif /* USE_SSSE3 */
+
+#ifdef USE_AVX2
+
+unsigned int _gcry_chacha20_amd64_avx2_blocks8(u32 *state, byte *dst,
+                                              const byte *src,
+                                              size_t nblks) ASM_FUNC_ABI;
+
+unsigned int _gcry_chacha20_poly1305_amd64_avx2_blocks8(
+               u32 *state, byte *dst, const byte *src, size_t nblks,
+               void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI;
+
+#endif /* USE_AVX2 */
+
+#ifdef USE_PPC_VEC
+
+unsigned int _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst,
+                                        const byte *src,
+                                        size_t nblks);
+
+unsigned int _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst,
+                                        const byte *src,
+                                        size_t nblks);
+
+#undef USE_PPC_VEC_POLY1305
+#if SIZEOF_UNSIGNED_LONG == 8
+#define USE_PPC_VEC_POLY1305 1
+unsigned int _gcry_chacha20_poly1305_ppc8_blocks4(
+               u32 *state, byte *dst, const byte *src, size_t nblks,
+               POLY1305_STATE *st, const byte *poly1305_src);
+#endif /* SIZEOF_UNSIGNED_LONG == 8 */
+
+#endif /* USE_PPC_VEC */
+
+#ifdef USE_S390X_VX
+
+unsigned int _gcry_chacha20_s390x_vx_blocks8(u32 *state, byte *dst,
+                                            const byte *src, size_t nblks);
+
+unsigned int _gcry_chacha20_s390x_vx_blocks4_2_1(u32 *state, byte *dst,
+                                                const byte *src, size_t nblks);
+
+#undef USE_S390X_VX_POLY1305
+#if SIZEOF_UNSIGNED_LONG == 8
+#define USE_S390X_VX_POLY1305 1
+unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks8(
+               u32 *state, byte *dst, const byte *src, size_t nblks,
+               POLY1305_STATE *st, const byte *poly1305_src);
+
+unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1(
+               u32 *state, byte *dst, const byte *src, size_t nblks,
+               POLY1305_STATE *st, const byte *poly1305_src);
+#endif /* SIZEOF_UNSIGNED_LONG == 8 */
+
+#endif /* USE_S390X_VX */
+
+#ifdef USE_ARMV7_NEON
+
+unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst,
+                                              const byte *src,
+                                              size_t nblks);
+
+#endif /* USE_ARMV7_NEON */
+
+#ifdef USE_AARCH64_SIMD
+
+unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst,
+                                           const byte *src, size_t nblks);
+
+unsigned int _gcry_chacha20_poly1305_aarch64_blocks4(
+               u32 *state, byte *dst, const byte *src, size_t nblks,
+               void *poly1305_state, const byte *poly1305_src);
+
+#endif /* USE_AARCH64_SIMD */
+
+
+static const char *selftest (void);
+
+
+#define ROTATE(v,c)    (rol(v,c))
+#define XOR(v,w)       ((v) ^ (w))
+#define PLUS(v,w)      ((u32)((v) + (w)))
+#define PLUSONE(v)     (PLUS((v),1))
+
+#define QUARTERROUND(a,b,c,d) \
+  a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \
+  c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \
+  a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \
+  c = PLUS(c,d); b = ROTATE(XOR(b,c), 7);
+
+#define BUF_XOR_LE32(dst, src, offset, x) \
+  buf_put_le32((dst) + (offset), buf_get_le32((src) + (offset)) ^ (x))
+
+static unsigned int
+do_chacha20_blocks (u32 *input, byte *dst, const byte *src, size_t nblks)
+{
+  u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
+  unsigned int i;
+
+  while (nblks)
+    {
+      x0 = input[0];
+      x1 = input[1];
+      x2 = input[2];
+      x3 = input[3];
+      x4 = input[4];
+      x5 = input[5];
+      x6 = input[6];
+      x7 = input[7];
+      x8 = input[8];
+      x9 = input[9];
+      x10 = input[10];
+      x11 = input[11];
+      x12 = input[12];
+      x13 = input[13];
+      x14 = input[14];
+      x15 = input[15];
+
+      for (i = 20; i > 0; i -= 2)
+       {
+         QUARTERROUND(x0, x4,  x8, x12)
+         QUARTERROUND(x1, x5,  x9, x13)
+         QUARTERROUND(x2, x6, x10, x14)
+         QUARTERROUND(x3, x7, x11, x15)
+         QUARTERROUND(x0, x5, x10, x15)
+         QUARTERROUND(x1, x6, x11, x12)
+         QUARTERROUND(x2, x7,  x8, x13)
+         QUARTERROUND(x3, x4,  x9, x14)
+       }
+
+      x0 = PLUS(x0, input[0]);
+      x1 = PLUS(x1, input[1]);
+      x2 = PLUS(x2, input[2]);
+      x3 = PLUS(x3, input[3]);
+      x4 = PLUS(x4, input[4]);
+      x5 = PLUS(x5, input[5]);
+      x6 = PLUS(x6, input[6]);
+      x7 = PLUS(x7, input[7]);
+      x8 = PLUS(x8, input[8]);
+      x9 = PLUS(x9, input[9]);
+      x10 = PLUS(x10, input[10]);
+      x11 = PLUS(x11, input[11]);
+      x12 = PLUS(x12, input[12]);
+      x13 = PLUS(x13, input[13]);
+      x14 = PLUS(x14, input[14]);
+      x15 = PLUS(x15, input[15]);
+
+      input[12] = PLUSONE(input[12]);
+      input[13] = PLUS(input[13], !input[12]);
+
+      BUF_XOR_LE32(dst, src, 0, x0);
+      BUF_XOR_LE32(dst, src, 4, x1);
+      BUF_XOR_LE32(dst, src, 8, x2);
+      BUF_XOR_LE32(dst, src, 12, x3);
+      BUF_XOR_LE32(dst, src, 16, x4);
+      BUF_XOR_LE32(dst, src, 20, x5);
+      BUF_XOR_LE32(dst, src, 24, x6);
+      BUF_XOR_LE32(dst, src, 28, x7);
+      BUF_XOR_LE32(dst, src, 32, x8);
+      BUF_XOR_LE32(dst, src, 36, x9);
+      BUF_XOR_LE32(dst, src, 40, x10);
+      BUF_XOR_LE32(dst, src, 44, x11);
+      BUF_XOR_LE32(dst, src, 48, x12);
+      BUF_XOR_LE32(dst, src, 52, x13);
+      BUF_XOR_LE32(dst, src, 56, x14);
+      BUF_XOR_LE32(dst, src, 60, x15);
+
+      src += CHACHA20_BLOCK_SIZE;
+      dst += CHACHA20_BLOCK_SIZE;
+      nblks--;
+    }
+
+  /* burn_stack */
+  return (17 * sizeof(u32) + 6 * sizeof(void *));
+}
+
+
+static unsigned int
+chacha20_blocks (CHACHA20_context_t *ctx, byte *dst, const byte *src,
+                size_t nblks)
+{
+#ifdef USE_SSSE3
+  if (ctx->use_ssse3)
+    {
+      return _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, dst, src, nblks);
+    }
+#endif
+
+#ifdef USE_PPC_VEC
+  if (ctx->use_ppc)
+    {
+      return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks);
+    }
+#endif
+
+#ifdef USE_S390X_VX
+  if (ctx->use_s390x)
+    {
+      return _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, dst, src, nblks);
+    }
+#endif
+
+  return do_chacha20_blocks (ctx->input, dst, src, nblks);
+}
+
+
+static void
+chacha20_keysetup (CHACHA20_context_t *ctx, const byte *key,
+                   unsigned int keylen)
+{
+  static const char sigma[16] = "expand 32-byte k";
+  static const char tau[16] = "expand 16-byte k";
+  const char *constants;
+
+  ctx->input[4] = buf_get_le32(key + 0);
+  ctx->input[5] = buf_get_le32(key + 4);
+  ctx->input[6] = buf_get_le32(key + 8);
+  ctx->input[7] = buf_get_le32(key + 12);
+  if (keylen == CHACHA20_MAX_KEY_SIZE) /* 256 bits */
+    {
+      key += 16;
+      constants = sigma;
+    }
+  else /* 128 bits */
+    {
+      constants = tau;
+    }
+  ctx->input[8] = buf_get_le32(key + 0);
+  ctx->input[9] = buf_get_le32(key + 4);
+  ctx->input[10] = buf_get_le32(key + 8);
+  ctx->input[11] = buf_get_le32(key + 12);
+  ctx->input[0] = buf_get_le32(constants + 0);
+  ctx->input[1] = buf_get_le32(constants + 4);
+  ctx->input[2] = buf_get_le32(constants + 8);
+  ctx->input[3] = buf_get_le32(constants + 12);
+}
+
+
+static void
+chacha20_ivsetup (CHACHA20_context_t * ctx, const byte *iv, size_t ivlen)
+{
+  if (ivlen == CHACHA20_CTR_SIZE)
+    {
+      ctx->input[12] = buf_get_le32 (iv + 0);
+      ctx->input[13] = buf_get_le32 (iv + 4);
+      ctx->input[14] = buf_get_le32 (iv + 8);
+      ctx->input[15] = buf_get_le32 (iv + 12);
+    }
+  else if (ivlen == CHACHA20_MAX_IV_SIZE)
+    {
+      ctx->input[12] = 0;
+      ctx->input[13] = buf_get_le32 (iv + 0);
+      ctx->input[14] = buf_get_le32 (iv + 4);
+      ctx->input[15] = buf_get_le32 (iv + 8);
+    }
+  else if (ivlen == CHACHA20_MIN_IV_SIZE)
+    {
+      ctx->input[12] = 0;
+      ctx->input[13] = 0;
+      ctx->input[14] = buf_get_le32 (iv + 0);
+      ctx->input[15] = buf_get_le32 (iv + 4);
+    }
+  else
+    {
+      ctx->input[12] = 0;
+      ctx->input[13] = 0;
+      ctx->input[14] = 0;
+      ctx->input[15] = 0;
+    }
+}
+
+
+static void
+chacha20_setiv (void *context, const byte *iv, size_t ivlen)
+{
+  CHACHA20_context_t *ctx = (CHACHA20_context_t *) context;
+
+  /* draft-nir-cfrg-chacha20-poly1305-02 defines 96-bit and 64-bit nonce. */
+  if (iv && ivlen != CHACHA20_MAX_IV_SIZE && ivlen != CHACHA20_MIN_IV_SIZE
+      && ivlen != CHACHA20_CTR_SIZE)
+    log_info ("WARNING: chacha20_setiv: bad ivlen=%u\n", (u32) ivlen);
+
+  if (iv && (ivlen == CHACHA20_MAX_IV_SIZE || ivlen == CHACHA20_MIN_IV_SIZE
+             || ivlen == CHACHA20_CTR_SIZE))
+    chacha20_ivsetup (ctx, iv, ivlen);
+  else
+    chacha20_ivsetup (ctx, NULL, 0);
+
+  /* Reset the unused pad bytes counter.  */
+  ctx->unused = 0;
+}
+
+
+static gcry_err_code_t
+chacha20_do_setkey (CHACHA20_context_t *ctx,
+                    const byte *key, unsigned int keylen)
+{
+  static int initialized;
+  static const char *selftest_failed;
+  unsigned int features = _gcry_get_hw_features ();
+
+  if (!initialized)
+    {
+      initialized = 1;
+      selftest_failed = selftest ();
+      if (selftest_failed)
+        log_error ("CHACHA20 selftest failed (%s)\n", selftest_failed);
+    }
+  if (selftest_failed)
+    return GPG_ERR_SELFTEST_FAILED;
+
+  if (keylen != CHACHA20_MAX_KEY_SIZE && keylen != CHACHA20_MIN_KEY_SIZE)
+    return GPG_ERR_INV_KEYLEN;
+
+#ifdef USE_SSSE3
+  ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0;
+#endif
+#ifdef USE_AVX2
+  ctx->use_avx2 = (features & HWF_INTEL_AVX2) != 0;
+#endif
+#ifdef USE_ARMV7_NEON
+  ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+#endif
+#ifdef USE_AARCH64_SIMD
+  ctx->use_neon = (features & HWF_ARM_NEON) != 0;
+#endif
+#ifdef USE_PPC_VEC
+  ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0;
+#endif
+#ifdef USE_S390X_VX
+  ctx->use_s390x = (features & HWF_S390X_VX) != 0;
+#endif
+
+  (void)features;
+
+  chacha20_keysetup (ctx, key, keylen);
+
+  /* We default to a zero nonce.  */
+  chacha20_setiv (ctx, NULL, 0);
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+chacha20_setkey (void *context, const byte *key, unsigned int keylen,
+                 cipher_bulk_ops_t *bulk_ops)
+{
+  CHACHA20_context_t *ctx = (CHACHA20_context_t *) context;
+  gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen);
+  (void)bulk_ops;
+  _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *));
+  return rc;
+}
+
+
+static unsigned int
+do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf,
+                                const byte *inbuf, size_t length)
+{
+  static const unsigned char zero_pad[CHACHA20_BLOCK_SIZE] = { 0, };
+  unsigned int nburn, burn = 0;
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 8;
+      nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf,
+                                               nblocks);
+      burn = nburn > burn ? nburn : burn;
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_SSSE3
+  if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 4;
+      nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf,
+                                                nblocks);
+      burn = nburn > burn ? nburn : burn;
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_ARMV7_NEON
+  if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 4;
+      nburn = _gcry_chacha20_armv7_neon_blocks4(ctx->input, outbuf, inbuf,
+                                               nblocks);
+      burn = nburn > burn ? nburn : burn;
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_AARCH64_SIMD
+  if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 4;
+      nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf,
+                                            nblocks);
+      burn = nburn > burn ? nburn : burn;
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_PPC_VEC
+  if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 4;
+      nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, nblocks);
+      burn = nburn > burn ? nburn : burn;
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_S390X_VX
+  if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 8)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 8;
+      nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf,
+                                             nblocks);
+      burn = nburn > burn ? nburn : burn;
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+  if (length >= CHACHA20_BLOCK_SIZE)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nburn = chacha20_blocks(ctx, outbuf, inbuf, nblocks);
+      burn = nburn > burn ? nburn : burn;
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+
+  if (length > 0)
+    {
+      nburn = chacha20_blocks(ctx, ctx->pad, zero_pad, 1);
+      burn = nburn > burn ? nburn : burn;
+
+      buf_xor (outbuf, inbuf, ctx->pad, length);
+      ctx->unused = CHACHA20_BLOCK_SIZE - length;
+    }
+
+  if (burn)
+    burn += 5 * sizeof(void *);
+
+  return burn;
+}
+
+
+static void
+chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf,
+                         size_t length)
+{
+  CHACHA20_context_t *ctx = (CHACHA20_context_t *) context;
+  unsigned int nburn, burn = 0;
+
+  if (!length)
+    return;
+
+  if (ctx->unused)
+    {
+      unsigned char *p = ctx->pad;
+      size_t n;
+
+      gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE);
+
+      n = ctx->unused;
+      if (n > length)
+        n = length;
+
+      buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n);
+      length -= n;
+      outbuf += n;
+      inbuf += n;
+      ctx->unused -= n;
+
+      if (!length)
+        return;
+      gcry_assert (!ctx->unused);
+    }
+
+  nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, length);
+  burn = nburn > burn ? nburn : burn;
+
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+
+gcry_err_code_t
+_gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf,
+                               const byte *inbuf, size_t length)
+{
+  CHACHA20_context_t *ctx = (void *) &c->context.c;
+  unsigned int nburn, burn = 0;
+  byte *authptr = NULL;
+
+  if (!length)
+    return 0;
+
+  if (ctx->unused)
+    {
+      unsigned char *p = ctx->pad;
+      size_t n;
+
+      gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE);
+
+      n = ctx->unused;
+      if (n > length)
+        n = length;
+
+      buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n);
+      nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, n);
+      burn = nburn > burn ? nburn : burn;
+      length -= n;
+      outbuf += n;
+      inbuf += n;
+      ctx->unused -= n;
+
+      if (!length)
+       {
+         if (burn)
+           _gcry_burn_stack (burn);
+
+         return 0;
+       }
+      gcry_assert (!ctx->unused);
+    }
+
+  gcry_assert (c->u_mode.poly1305.ctx.leftover == 0);
+
+  if (0)
+    { }
+#ifdef USE_AVX2
+  else if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8)
+    {
+      nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, 8);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 8 * CHACHA20_BLOCK_SIZE;
+      outbuf += 8 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 8 * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+#ifdef USE_SSSE3
+  else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, 4);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 4 * CHACHA20_BLOCK_SIZE;
+      outbuf += 4 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 4 * CHACHA20_BLOCK_SIZE;
+    }
+  else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 2)
+    {
+      nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 2);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 2 * CHACHA20_BLOCK_SIZE;
+      outbuf += 2 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 2 * CHACHA20_BLOCK_SIZE;
+    }
+  else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE)
+    {
+      nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 1);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 1 * CHACHA20_BLOCK_SIZE;
+      outbuf += 1 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 1 * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+#ifdef USE_AARCH64_SIMD
+  else if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, 4);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 4 * CHACHA20_BLOCK_SIZE;
+      outbuf += 4 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 4 * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+#ifdef USE_PPC_VEC_POLY1305
+  else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 4 * CHACHA20_BLOCK_SIZE;
+      outbuf += 4 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 4 * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+#ifdef USE_S390X_VX_POLY1305
+  else if (ctx->use_s390x && length >= 2 * CHACHA20_BLOCK_SIZE * 8)
+    {
+      nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, 8);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 8 * CHACHA20_BLOCK_SIZE;
+      outbuf += 8 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 8 * CHACHA20_BLOCK_SIZE;
+    }
+  else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 4)
+    {
+      nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 
4);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 4 * CHACHA20_BLOCK_SIZE;
+      outbuf += 4 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 4 * CHACHA20_BLOCK_SIZE;
+    }
+  else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 2)
+    {
+      nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 
2);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 2 * CHACHA20_BLOCK_SIZE;
+      outbuf += 2 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 2 * CHACHA20_BLOCK_SIZE;
+    }
+  else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE)
+    {
+      nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 
1);
+      burn = nburn > burn ? nburn : burn;
+
+      authptr = outbuf;
+      length -= 1 * CHACHA20_BLOCK_SIZE;
+      outbuf += 1 * CHACHA20_BLOCK_SIZE;
+      inbuf  += 1 * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+  if (authptr)
+    {
+      size_t authoffset = outbuf - authptr;
+
+#ifdef USE_AVX2
+      if (ctx->use_avx2 &&
+         length >= 8 * CHACHA20_BLOCK_SIZE &&
+         authoffset >= 8 * CHACHA20_BLOCK_SIZE)
+       {
+         size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+         nblocks -= nblocks % 8;
+
+         nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8(
+                     ctx->input, outbuf, inbuf, nblocks,
+                     &c->u_mode.poly1305.ctx.state, authptr);
+         burn = nburn > burn ? nburn : burn;
+
+         length  -= nblocks * CHACHA20_BLOCK_SIZE;
+         outbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+         inbuf   += nblocks * CHACHA20_BLOCK_SIZE;
+         authptr += nblocks * CHACHA20_BLOCK_SIZE;
+       }
+#endif
+
+#ifdef USE_SSSE3
+      if (ctx->use_ssse3)
+       {
+         if (length >= 4 * CHACHA20_BLOCK_SIZE &&
+             authoffset >= 4 * CHACHA20_BLOCK_SIZE)
+           {
+             size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+             nblocks -= nblocks % 4;
+
+             nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4(
+                         ctx->input, outbuf, inbuf, nblocks,
+                         &c->u_mode.poly1305.ctx.state, authptr);
+             burn = nburn > burn ? nburn : burn;
+
+             length  -= nblocks * CHACHA20_BLOCK_SIZE;
+             outbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+             inbuf   += nblocks * CHACHA20_BLOCK_SIZE;
+             authptr += nblocks * CHACHA20_BLOCK_SIZE;
+           }
+
+         if (length >= CHACHA20_BLOCK_SIZE &&
+             authoffset >= CHACHA20_BLOCK_SIZE)
+           {
+             size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+             nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1(
+                         ctx->input, outbuf, inbuf, nblocks,
+                         &c->u_mode.poly1305.ctx.state, authptr);
+             burn = nburn > burn ? nburn : burn;
+
+             length  -= nblocks * CHACHA20_BLOCK_SIZE;
+             outbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+             inbuf   += nblocks * CHACHA20_BLOCK_SIZE;
+             authptr += nblocks * CHACHA20_BLOCK_SIZE;
+           }
+       }
+#endif
+
+#ifdef USE_AARCH64_SIMD
+      if (ctx->use_neon &&
+         length >= 4 * CHACHA20_BLOCK_SIZE &&
+         authoffset >= 4 * CHACHA20_BLOCK_SIZE)
+       {
+         size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+         nblocks -= nblocks % 4;
+
+         nburn = _gcry_chacha20_poly1305_aarch64_blocks4(
+                     ctx->input, outbuf, inbuf, nblocks,
+                     &c->u_mode.poly1305.ctx.state, authptr);
+         burn = nburn > burn ? nburn : burn;
+
+         length  -= nblocks * CHACHA20_BLOCK_SIZE;
+         outbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+         inbuf   += nblocks * CHACHA20_BLOCK_SIZE;
+         authptr += nblocks * CHACHA20_BLOCK_SIZE;
+       }
+#endif
+
+#ifdef USE_PPC_VEC_POLY1305
+      if (ctx->use_ppc &&
+         length >= 4 * CHACHA20_BLOCK_SIZE &&
+         authoffset >= 4 * CHACHA20_BLOCK_SIZE)
+       {
+         size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+         nblocks -= nblocks % 4;
+
+         nburn = _gcry_chacha20_poly1305_ppc8_blocks4(
+                     ctx->input, outbuf, inbuf, nblocks,
+                     &c->u_mode.poly1305.ctx.state, authptr);
+         burn = nburn > burn ? nburn : burn;
+
+         length  -= nblocks * CHACHA20_BLOCK_SIZE;
+         outbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+         inbuf   += nblocks * CHACHA20_BLOCK_SIZE;
+         authptr += nblocks * CHACHA20_BLOCK_SIZE;
+       }
+#endif
+
+#ifdef USE_S390X_VX_POLY1305
+      if (ctx->use_s390x)
+       {
+         if (length >= 8 * CHACHA20_BLOCK_SIZE &&
+             authoffset >= 8 * CHACHA20_BLOCK_SIZE)
+           {
+             size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+             nblocks -= nblocks % 8;
+
+             burn = _gcry_chacha20_poly1305_s390x_vx_blocks8(
+                         ctx->input, outbuf, inbuf, nblocks,
+                         &c->u_mode.poly1305.ctx.state, authptr);
+             burn = nburn > burn ? nburn : burn;
+
+             length  -= nblocks * CHACHA20_BLOCK_SIZE;
+             outbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+             inbuf   += nblocks * CHACHA20_BLOCK_SIZE;
+             authptr += nblocks * CHACHA20_BLOCK_SIZE;
+           }
+
+         if (length >= CHACHA20_BLOCK_SIZE &&
+             authoffset >= CHACHA20_BLOCK_SIZE)
+           {
+             size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+             burn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1(
+                         ctx->input, outbuf, inbuf, nblocks,
+                         &c->u_mode.poly1305.ctx.state, authptr);
+             burn = nburn > burn ? nburn : burn;
+
+             length  -= nblocks * CHACHA20_BLOCK_SIZE;
+             outbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+             inbuf   += nblocks * CHACHA20_BLOCK_SIZE;
+             authptr += nblocks * CHACHA20_BLOCK_SIZE;
+           }
+       }
+#endif
+
+      if (authoffset > 0)
+       {
+         _gcry_poly1305_update (&c->u_mode.poly1305.ctx, authptr, authoffset);
+         authptr += authoffset;
+         authoffset = 0;
+       }
+
+      gcry_assert(authptr == outbuf);
+    }
+
+  while (length)
+    {
+      size_t currlen = length;
+
+      /* Since checksumming is done after encryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for checksumming. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen);
+      burn = nburn > burn ? nburn : burn;
+
+      nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf,
+                                         currlen);
+      burn = nburn > burn ? nburn : burn;
+
+      outbuf += currlen;
+      inbuf += currlen;
+      length -= currlen;
+    }
+
+  if (burn)
+    _gcry_burn_stack (burn);
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf,
+                               const byte *inbuf, size_t length)
+{
+  CHACHA20_context_t *ctx = (void *) &c->context.c;
+  unsigned int nburn, burn = 0;
+
+  if (!length)
+    return 0;
+
+  if (ctx->unused)
+    {
+      unsigned char *p = ctx->pad;
+      size_t n;
+
+      gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE);
+
+      n = ctx->unused;
+      if (n > length)
+        n = length;
+
+      nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, n);
+      burn = nburn > burn ? nburn : burn;
+      buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n);
+      length -= n;
+      outbuf += n;
+      inbuf += n;
+      ctx->unused -= n;
+
+      if (!length)
+       {
+         if (burn)
+           _gcry_burn_stack (burn);
+
+         return 0;
+       }
+      gcry_assert (!ctx->unused);
+    }
+
+  gcry_assert (c->u_mode.poly1305.ctx.leftover == 0);
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 8;
+
+      nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8(
+                       ctx->input, outbuf, inbuf, nblocks,
+                       &c->u_mode.poly1305.ctx.state, inbuf);
+      burn = nburn > burn ? nburn : burn;
+
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_SSSE3
+  if (ctx->use_ssse3)
+    {
+      if (length >= 4 * CHACHA20_BLOCK_SIZE)
+       {
+         size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+         nblocks -= nblocks % 4;
+
+         nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4(
+                           ctx->input, outbuf, inbuf, nblocks,
+                           &c->u_mode.poly1305.ctx.state, inbuf);
+         burn = nburn > burn ? nburn : burn;
+
+         length -= nblocks * CHACHA20_BLOCK_SIZE;
+         outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+         inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+       }
+
+      if (length >= CHACHA20_BLOCK_SIZE)
+       {
+         size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+         nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1(
+                           ctx->input, outbuf, inbuf, nblocks,
+                           &c->u_mode.poly1305.ctx.state, inbuf);
+         burn = nburn > burn ? nburn : burn;
+
+         length -= nblocks * CHACHA20_BLOCK_SIZE;
+         outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+         inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+       }
+    }
+#endif
+
+#ifdef USE_AARCH64_SIMD
+  if (ctx->use_neon && length >= 4 * CHACHA20_BLOCK_SIZE)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 4;
+
+      nburn = _gcry_chacha20_poly1305_aarch64_blocks4(
+                       ctx->input, outbuf, inbuf, nblocks,
+                       &c->u_mode.poly1305.ctx.state, inbuf);
+      burn = nburn > burn ? nburn : burn;
+
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_PPC_VEC_POLY1305
+  if (ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE)
+    {
+      size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+      nblocks -= nblocks % 4;
+
+      nburn = _gcry_chacha20_poly1305_ppc8_blocks4(
+                       ctx->input, outbuf, inbuf, nblocks,
+                       &c->u_mode.poly1305.ctx.state, inbuf);
+      burn = nburn > burn ? nburn : burn;
+
+      length -= nblocks * CHACHA20_BLOCK_SIZE;
+      outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+      inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+    }
+#endif
+
+#ifdef USE_S390X_VX_POLY1305
+  if (ctx->use_s390x)
+    {
+      if (length >= 8 * CHACHA20_BLOCK_SIZE)
+       {
+         size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+         nblocks -= nblocks % 8;
+
+         nburn = _gcry_chacha20_poly1305_s390x_vx_blocks8(
+                           ctx->input, outbuf, inbuf, nblocks,
+                           &c->u_mode.poly1305.ctx.state, inbuf);
+         burn = nburn > burn ? nburn : burn;
+
+         length -= nblocks * CHACHA20_BLOCK_SIZE;
+         outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+         inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+       }
+
+      if (length >= CHACHA20_BLOCK_SIZE)
+       {
+         size_t nblocks = length / CHACHA20_BLOCK_SIZE;
+
+         nburn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1(
+                           ctx->input, outbuf, inbuf, nblocks,
+                           &c->u_mode.poly1305.ctx.state, inbuf);
+         burn = nburn > burn ? nburn : burn;
+
+         length -= nblocks * CHACHA20_BLOCK_SIZE;
+         outbuf += nblocks * CHACHA20_BLOCK_SIZE;
+         inbuf  += nblocks * CHACHA20_BLOCK_SIZE;
+       }
+    }
+#endif
+
+  while (length)
+    {
+      size_t currlen = length;
+
+      /* Since checksumming is done before decryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for decryption. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf,
+                                         currlen);
+      burn = nburn > burn ? nburn : burn;
+
+      nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen);
+      burn = nburn > burn ? nburn : burn;
+
+      outbuf += currlen;
+      inbuf += currlen;
+      length -= currlen;
+    }
+
+  if (burn)
+    _gcry_burn_stack (burn);
+
+  return 0;
+}
+
+
+static const char *
+selftest (void)
+{
+  byte ctxbuf[sizeof(CHACHA20_context_t) + 15];
+  CHACHA20_context_t *ctx;
+  byte scratch[127 + 1];
+  byte buf[512 + 64 + 4];
+  int i;
+
+  /* From draft-strombergson-chacha-test-vectors */
+  static byte key_1[] = {
+    0xc4, 0x6e, 0xc1, 0xb1, 0x8c, 0xe8, 0xa8, 0x78,
+    0x72, 0x5a, 0x37, 0xe7, 0x80, 0xdf, 0xb7, 0x35,
+    0x1f, 0x68, 0xed, 0x2e, 0x19, 0x4c, 0x79, 0xfb,
+    0xc6, 0xae, 0xbe, 0xe1, 0xa6, 0x67, 0x97, 0x5d
+  };
+  static const byte nonce_1[] =
+    { 0x1a, 0xda, 0x31, 0xd5, 0xcf, 0x68, 0x82, 0x21 };
+  static const byte plaintext_1[127] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  };
+  static const byte ciphertext_1[127] = {
+    0xf6, 0x3a, 0x89, 0xb7, 0x5c, 0x22, 0x71, 0xf9,
+    0x36, 0x88, 0x16, 0x54, 0x2b, 0xa5, 0x2f, 0x06,
+    0xed, 0x49, 0x24, 0x17, 0x92, 0x30, 0x2b, 0x00,
+    0xb5, 0xe8, 0xf8, 0x0a, 0xe9, 0xa4, 0x73, 0xaf,
+    0xc2, 0x5b, 0x21, 0x8f, 0x51, 0x9a, 0xf0, 0xfd,
+    0xd4, 0x06, 0x36, 0x2e, 0x8d, 0x69, 0xde, 0x7f,
+    0x54, 0xc6, 0x04, 0xa6, 0xe0, 0x0f, 0x35, 0x3f,
+    0x11, 0x0f, 0x77, 0x1b, 0xdc, 0xa8, 0xab, 0x92,
+    0xe5, 0xfb, 0xc3, 0x4e, 0x60, 0xa1, 0xd9, 0xa9,
+    0xdb, 0x17, 0x34, 0x5b, 0x0a, 0x40, 0x27, 0x36,
+    0x85, 0x3b, 0xf9, 0x10, 0xb0, 0x60, 0xbd, 0xf1,
+    0xf8, 0x97, 0xb6, 0x29, 0x0f, 0x01, 0xd1, 0x38,
+    0xae, 0x2c, 0x4c, 0x90, 0x22, 0x5b, 0xa9, 0xea,
+    0x14, 0xd5, 0x18, 0xf5, 0x59, 0x29, 0xde, 0xa0,
+    0x98, 0xca, 0x7a, 0x6c, 0xcf, 0xe6, 0x12, 0x27,
+    0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33
+  };
+
+  /* 16-byte alignment required for amd64 implementation. */
+  ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15);
+
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+  chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+  scratch[sizeof (scratch) - 1] = 0;
+  chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1);
+  if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1))
+    return "ChaCha20 encryption test 1 failed.";
+  if (scratch[sizeof (scratch) - 1])
+    return "ChaCha20 wrote too much.";
+  chacha20_setkey (ctx, key_1, sizeof (key_1), NULL);
+  chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+  chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1);
+  if (memcmp (scratch, plaintext_1, sizeof plaintext_1))
+    return "ChaCha20 decryption test 1 failed.";
+
+  for (i = 0; i < sizeof buf; i++)
+    buf[i] = i;
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+  chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+  /*encrypt */
+  chacha20_encrypt_stream (ctx, buf, buf, sizeof buf);
+  /*decrypt */
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+  chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+  chacha20_encrypt_stream (ctx, buf, buf, 1);
+  chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1);
+  chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1,
+                           buf + (sizeof buf) - 1, 1);
+  for (i = 0; i < sizeof buf; i++)
+    if (buf[i] != (byte) i)
+      return "ChaCha20 encryption test 2 failed.";
+
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+  chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+  /* encrypt */
+  for (i = 0; i < sizeof buf; i++)
+    chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1);
+  /* decrypt */
+  chacha20_setkey (ctx, key_1, sizeof key_1, NULL);
+  chacha20_setiv (ctx, nonce_1, sizeof nonce_1);
+  chacha20_encrypt_stream (ctx, buf, buf, sizeof buf);
+  for (i = 0; i < sizeof buf; i++)
+    if (buf[i] != (byte) i)
+      return "ChaCha20 encryption test 3 failed.";
+
+  return NULL;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_chacha20 = {
+  GCRY_CIPHER_CHACHA20,
+  {0, 0},                       /* flags */
+  "CHACHA20",                   /* name */
+  NULL,                         /* aliases */
+  NULL,                         /* oids */
+  1,                            /* blocksize in bytes. */
+  CHACHA20_MAX_KEY_SIZE * 8,    /* standard key length in bits. */
+  sizeof (CHACHA20_context_t),
+  chacha20_setkey,
+  NULL,
+  NULL,
+  chacha20_encrypt_stream,
+  chacha20_encrypt_stream,
+  NULL,
+  NULL,
+  chacha20_setiv
+};
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-aeswrap.c 
b/grub-core/lib/libgcrypt/cipher/cipher-aeswrap.c
new file mode 100644
index 000000000..76c64fb57
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-aeswrap.c
@@ -0,0 +1,380 @@
+/* cipher-aeswrap.c  - Generic AESWRAP mode implementation
+ * Copyright (C) 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+/* Perform the wrap algorithm W as specified by NIST SP 800-38F.
+   Cipher block size must be 128-bit.  */
+static gcry_err_code_t
+wrap (gcry_cipher_hd_t c, byte *outbuf, size_t inbuflen)
+{
+  int j, x;
+  size_t n, i;
+  unsigned char *r, *a, *b;
+  unsigned char t[8];
+  unsigned int burn, nburn;
+
+#if MAX_BLOCKSIZE < 8
+#error Invalid block size
+#endif
+  /* We require a cipher with a 128 bit block length.  */
+  if (c->spec->blocksize != 16)
+    return GPG_ERR_INV_LENGTH;
+
+  /* Input data must be multiple of 64 bits.  */
+  if (inbuflen % 8)
+    return GPG_ERR_INV_ARG;
+
+  n = inbuflen / 8;
+
+  /* We need at least three 64 bit blocks.  */
+  if (n < 3)
+    return GPG_ERR_INV_ARG;
+
+  burn = 0;
+
+  r = outbuf;
+  a = outbuf;  /* We store A directly in OUTBUF.  */
+  b = c->u_ctr.ctr;  /* B is also used to concatenate stuff.  */
+
+  memset (t, 0, sizeof t); /* t := 0.  */
+
+  for (j = 0; j <= 5; j++)
+    {
+      for (i = 1; i < n; i++)
+        {
+          /* B := CIPH_k( A | R[i] ) */
+          memcpy (b, a, 8);
+          memcpy (b+8, r+i*8, 8);
+          nburn = c->spec->encrypt (&c->context.c, b, b);
+          burn = nburn > burn ? nburn : burn;
+          /* t := t + 1  */
+          for (x = 7; x >= 0; x--)
+            if (++t[x])
+              break;
+          /* A := MSB_64(B) ^ t */
+          cipher_block_xor (a, b, t, 8);
+          /* R[i] := LSB_64(B) */
+          memcpy (r+i*8, b+8, 8);
+        }
+   }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+/* Perform the Key Wrap algorithm as specified by RFC3394.  We
+   implement this as a mode usable with any cipher algorithm of
+   blocksize 128.  */
+gcry_err_code_t
+_gcry_cipher_keywrap_encrypt (gcry_cipher_hd_t c,
+                              byte *outbuf, size_t outbuflen,
+                              const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+  unsigned char *r = outbuf;
+
+  /* We require a cipher with a 128 bit block length.  */
+  if (c->spec->blocksize != 16)
+    return GPG_ERR_INV_LENGTH;
+
+  /* The output buffer must be able to hold the input data plus one
+     additional block.  */
+  if (outbuflen < inbuflen + 8)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  /* Input data must be multiple of 64 bits.  */
+  if (inbuflen % 8)
+    return GPG_ERR_INV_ARG;
+
+  /* We need at least two 64 bit blocks.  */
+  if ((inbuflen / 8) < 2)
+    return GPG_ERR_INV_ARG;
+
+  /* Copy the inbuf to the outbuf. */
+  memmove (r+8, inbuf, inbuflen);
+
+  /* If an IV has been set we use that IV as the Alternative Initial
+     Value; if it has not been set we use the standard value.  */
+  if (c->marks.iv)
+    memcpy (r, c->u_iv.iv, 8);
+  else
+    memset (r, 0xa6, 8);
+
+  err = wrap (c, r, inbuflen + 8);
+
+  return err;
+}
+
+
+static const unsigned char icv2[] = { 0xA6, 0x59, 0x59, 0xA6 };
+
+/* Perform the Key Wrap algorithm as specified by RFC5649.  */
+gcry_err_code_t
+_gcry_cipher_keywrap_encrypt_padding (gcry_cipher_hd_t c,
+                                      byte *outbuf, size_t outbuflen,
+                                      const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+  unsigned char *r = outbuf;
+  unsigned int padlen;
+
+  /* We require a cipher with a 128 bit block length.  */
+  if (c->spec->blocksize != 16)
+    return GPG_ERR_INV_LENGTH;
+
+  /* The output buffer must be able to hold the input data plus one
+     additional block and padding.  */
+  if (outbuflen < ((inbuflen + 7)/8)*8 + 8)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if (inbuflen % 8)
+    padlen = 8 - (inbuflen % 8);
+  else
+    padlen = 0;
+
+  memcpy (r, icv2, 4);
+  r[4] = ((inbuflen >> 24) & 0xff);
+  r[5] = ((inbuflen >> 16) & 0xff);
+  r[6] = ((inbuflen >> 8) & 0xff);
+  r[7] = (inbuflen & 0xff);
+  memcpy (r+8, inbuf, inbuflen);
+  if (padlen)
+    memset (r+8+inbuflen, 0, padlen);
+
+  if (inbuflen <= 8)
+    {
+      unsigned int burn;
+
+      burn = c->spec->encrypt (&c->context.c, r, r);
+      if (burn > 0)
+        _gcry_burn_stack (burn + 4 * sizeof(void *));
+      err = 0;
+    }
+  else
+    err = wrap (c, r, ((inbuflen + 7)/8)*8 + 8);
+
+  return err;
+}
+
+
+/* Perform the unwrap algorithm W^-1 as specified by NIST SP 800-38F.
+   Cipher block size must be 128-bit.  */
+static gcry_err_code_t
+unwrap (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, size_t inbuflen)
+{
+  int j, x;
+  size_t n, i;
+  unsigned char *r, *a, *b;
+  unsigned char t[8];
+  unsigned int burn, nburn;
+
+#if MAX_BLOCKSIZE < 8
+#error Invalid block size
+#endif
+  /* We require a cipher with a 128 bit block length.  */
+  if (c->spec->blocksize != 16)
+    return GPG_ERR_INV_LENGTH;
+
+  /* Input data must be multiple of 64 bits.  */
+  if (inbuflen % 8)
+    return GPG_ERR_INV_ARG;
+
+  n = inbuflen / 8;
+
+  /* We need at least three 64 bit blocks.  */
+  if (n < 3)
+    return GPG_ERR_INV_ARG;
+
+  burn = 0;
+
+  r = outbuf;
+  a = c->lastiv;  /* We use c->LASTIV as buffer for A.  */
+  b = c->u_ctr.ctr;     /* B is also used to concatenate stuff.  */
+
+  /* Copy the inbuf to the outbuf and save A. */
+  memcpy (a, inbuf, 8);
+  memmove (r, inbuf+8, inbuflen-8);
+  n--; /* Reduce to actual number of data blocks.  */
+
+  /* t := 6 * n  */
+  i = n * 6;  /* The range is valid because: n = inbuflen / 8 - 1.  */
+  for (x=0; x < 8 && x < sizeof (i); x++)
+    t[7-x] = i >> (8*x);
+  for (; x < 8; x++)
+    t[7-x] = 0;
+
+  for (j = 5; j >= 0; j--)
+    {
+      for (i = n; i >= 1; i--)
+        {
+          /* B := CIPH_k^-1( (A ^ t)| R[i] ) */
+          cipher_block_xor (b, a, t, 8);
+          memcpy (b+8, r+(i-1)*8, 8);
+          nburn = c->spec->decrypt (&c->context.c, b, b);
+          burn = nburn > burn ? nburn : burn;
+          /* t := t - 1  */
+          for (x = 7; x >= 0; x--)
+            if (--t[x] != 0xff)
+              break;
+          /* A := MSB_64(B) */
+          memcpy (a, b, 8);
+          /* R[i] := LSB_64(B) */
+          memcpy (r+(i-1)*8, b+8, 8);
+        }
+   }
+  wipememory (b, 16);  /* Clear scratch area.  */
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+/* Perform the Key Unwrap algorithm as specified by RFC3394 and
+   RFC5649.  */
+gcry_err_code_t
+_gcry_cipher_keywrap_decrypt_auto (gcry_cipher_hd_t c,
+                                   byte *outbuf, size_t outbuflen,
+                                   const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+
+  /* We require a cipher with a 128 bit block length.  */
+  if (c->spec->blocksize != 16)
+    return GPG_ERR_INV_LENGTH;
+
+  /* The output buffer must be able to hold the input data minus one
+     additional block.  Fixme: The caller has more restrictive checks
+     - we may want to fix them for this mode.  */
+  if (outbuflen + 8 < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  /* Input data must be multiple of 64 bits.  */
+  if (inbuflen % 8)
+    return GPG_ERR_INV_ARG;
+
+  if (inbuflen == 16)
+    {
+      unsigned int burn;
+      unsigned char t[16];
+
+      if (!(c->flags & GCRY_CIPHER_EXTENDED))
+        return GPG_ERR_BUFFER_TOO_SHORT;
+
+      burn = c->spec->decrypt (&c->context.c, t, inbuf);
+      if (burn > 0)
+        _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+      if (memcmp (t, icv2, 4))
+        err = GPG_ERR_CHECKSUM;
+      else
+        {
+          unsigned int plen = (t[4]<<24) | (t[5]<<16) | (t[6]<<8) | t[7];
+
+          err = 0;
+          if (plen > 8)
+            err = GPG_ERR_CHECKSUM;
+          else if (plen)
+            {
+              int i;
+
+              for (i = 0; i < 16 - (8+plen); i++)
+                if (t[8+plen+i])
+                  {
+                    err = GPG_ERR_CHECKSUM;
+                    break;
+                  }
+              if (!err)
+                {
+                  memcpy (outbuf, t+8, 8);
+                  memcpy (c->u_mode.wrap.plen, t+4, 4);
+                }
+            }
+        }
+    }
+  else
+    {
+      /* We need at least three 64 bit blocks.  */
+      if ((inbuflen / 8) < 3)
+        return GPG_ERR_INV_ARG;
+
+      err = unwrap (c, outbuf, inbuf, inbuflen);
+      if (!err)
+        {
+          unsigned char *a;
+
+          a = c->lastiv;  /* We use c->LASTIV as buffer for A.  */
+
+          /* If an IV has been set we compare against this Alternative Initial
+             Value; if it has not been set we compare against the standard IV. 
 */
+          if (c->marks.iv && !memcmp (a, c->u_iv.iv, 8))
+            memset (c->u_mode.wrap.plen, 0, 4);
+          else if (!memcmp (a, icv2, 4)) /* It's a packet wrapped by KWP.  */
+            {
+              unsigned int plen = (a[4]<<24) | (a[5]<<16) | (a[6]<<8) | a[7];
+              int padlen = inbuflen - 8 - plen;
+
+              if (!(c->flags & GCRY_CIPHER_EXTENDED))
+                err = GPG_ERR_CHECKSUM;
+              else if (padlen < 0 || padlen > 7)
+                err = GPG_ERR_CHECKSUM;
+              else if (padlen)
+                {
+                  int i;
+
+                  for (i = 0; i < padlen; i++)
+                    if (outbuf[plen+i])
+                      {
+                        err = GPG_ERR_CHECKSUM;
+                        break;
+                      }
+                }
+              if (!err)
+                memcpy (c->u_mode.wrap.plen, a+4, 4);
+            }
+          else                  /* It's a packet wrapped by KW.  */
+            {
+              int i;
+
+              for (i = 0; i < 8; i++)
+                if (a[i] != 0xa6)
+                  {
+                    err = GPG_ERR_CHECKSUM;
+                    break;
+                  }
+              if (!err)
+                memset (c->u_mode.wrap.plen, 0, 4);
+            }
+        }
+    }
+
+  return err;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-cbc.c 
b/grub-core/lib/libgcrypt/cipher/cipher-cbc.c
new file mode 100644
index 000000000..d4df1e72a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-cbc.c
@@ -0,0 +1,292 @@
+/* cipher-cbc.c  - Generic CBC mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ *               2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "./cipher-internal.h"
+#include "bufhelp.h"
+
+
+
+static inline unsigned int
+cbc_encrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf,
+                  const unsigned char *inbuf, size_t nblocks, size_t blocksize,
+                  int is_cbc_cmac)
+{
+
+  unsigned int burn, nburn;
+  size_t n;
+
+  burn = 0;
+
+  if (c->bulk.cbc_enc)
+    {
+      c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks,
+                       is_cbc_cmac);
+    }
+  else
+    {
+      gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+      unsigned char *ivp;
+
+      ivp = c->u_iv.iv;
+
+      for (n=0; n < nblocks; n++ )
+        {
+          cipher_block_xor (outbuf, inbuf, ivp, blocksize);
+          nburn = enc_fn ( &c->context.c, outbuf, outbuf );
+          burn = nburn > burn ? nburn : burn;
+          ivp = outbuf;
+          inbuf += blocksize;
+          if (!is_cbc_cmac)
+            outbuf += blocksize;
+        }
+
+      if (ivp != c->u_iv.iv)
+        cipher_block_cpy (c->u_iv.iv, ivp, blocksize);
+    }
+
+  return burn;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_encrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  size_t blocksize_mask = blocksize - 1;
+  size_t nblocks = inbuflen >> blocksize_shift;
+  int is_cbc_cmac = !!(c->flags & GCRY_CIPHER_CBC_MAC);
+  unsigned int burn;
+
+  if (outbuflen < (is_cbc_cmac ? blocksize : inbuflen))
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if (inbuflen & blocksize_mask)
+    return GPG_ERR_INV_LENGTH;
+
+  burn = cbc_encrypt_inner(c, outbuf, inbuf, nblocks, blocksize, is_cbc_cmac);
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_cts_encrypt (gcry_cipher_hd_t c,
+                              unsigned char *outbuf, size_t outbuflen,
+                              const unsigned char *inbuf, size_t inbuflen)
+{
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  size_t blocksize_mask = blocksize - 1;
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t nblocks = inbuflen >> blocksize_shift;
+  unsigned int burn, nburn;
+  unsigned char *ivp;
+  int i;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if ((inbuflen & blocksize_mask) && !(inbuflen > blocksize))
+    return GPG_ERR_INV_LENGTH;
+
+  burn = 0;
+
+  if (inbuflen > blocksize)
+    {
+      if ((inbuflen & blocksize_mask) == 0)
+       nblocks--;
+    }
+
+  burn = cbc_encrypt_inner(c, outbuf, inbuf, nblocks, blocksize, 0);
+  inbuf += nblocks << blocksize_shift;
+  outbuf += nblocks << blocksize_shift;
+
+  if (inbuflen > blocksize)
+    {
+      /* We have to be careful here, since outbuf might be equal to
+         inbuf.  */
+      size_t restbytes;
+      unsigned char b;
+
+      if ((inbuflen & blocksize_mask) == 0)
+        restbytes = blocksize;
+      else
+        restbytes = inbuflen & blocksize_mask;
+
+      outbuf -= blocksize;
+      for (ivp = c->u_iv.iv, i = 0; i < restbytes; i++)
+        {
+          b = inbuf[i];
+          outbuf[blocksize + i] = outbuf[i];
+          outbuf[i] = b ^ *ivp++;
+        }
+      for (; i < blocksize; i++)
+        outbuf[i] = 0 ^ *ivp++;
+
+      nburn = enc_fn (&c->context.c, outbuf, outbuf);
+      burn = nburn > burn ? nburn : burn;
+      cipher_block_cpy (c->u_iv.iv, outbuf, blocksize);
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+static inline unsigned int
+cbc_decrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf,
+                  const unsigned char *inbuf, size_t nblocks, size_t blocksize)
+{
+  unsigned int burn, nburn;
+  size_t n;
+
+  burn = 0;
+
+  if (c->bulk.cbc_dec)
+    {
+      c->bulk.cbc_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+    }
+  else
+    {
+      gcry_cipher_decrypt_t dec_fn = c->spec->decrypt;
+
+      for (n = 0; n < nblocks; n++)
+        {
+          /* Because outbuf and inbuf might be the same, we must not overwrite
+             the original ciphertext block.  We use LASTIV as intermediate
+             storage here because it is not used otherwise.  */
+          nburn = dec_fn ( &c->context.c, c->lastiv, inbuf );
+          burn = nburn > burn ? nburn : burn;
+          cipher_block_xor_n_copy_2 (outbuf, c->lastiv, c->u_iv.iv, inbuf,
+                                     blocksize);
+          inbuf  += blocksize;
+          outbuf += blocksize;
+        }
+    }
+
+  return burn;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  size_t blocksize_mask = blocksize - 1;
+  size_t nblocks = inbuflen >> blocksize_shift;
+  unsigned int burn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if (inbuflen & blocksize_mask)
+    return GPG_ERR_INV_LENGTH;
+
+  burn = cbc_decrypt_inner(c, outbuf, inbuf, nblocks, blocksize);
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cbc_cts_decrypt (gcry_cipher_hd_t c,
+                              unsigned char *outbuf, size_t outbuflen,
+                              const unsigned char *inbuf, size_t inbuflen)
+{
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  size_t blocksize_mask = blocksize - 1;
+  gcry_cipher_decrypt_t dec_fn = c->spec->decrypt;
+  size_t nblocks = inbuflen >> blocksize_shift;
+  unsigned int burn, nburn;
+  int i;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if ((inbuflen & blocksize_mask) && !(inbuflen > blocksize))
+    return GPG_ERR_INV_LENGTH;
+
+  burn = 0;
+
+  if (inbuflen > blocksize)
+    {
+      nblocks--;
+      if ((inbuflen & blocksize_mask) == 0)
+       nblocks--;
+      cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize);
+    }
+
+  burn = cbc_decrypt_inner(c, outbuf, inbuf, nblocks, blocksize);
+  inbuf  += nblocks << blocksize_shift;
+  outbuf += nblocks << blocksize_shift;
+
+  if (inbuflen > blocksize)
+    {
+      size_t restbytes;
+
+      if ((inbuflen & blocksize_mask) == 0)
+        restbytes = blocksize;
+      else
+        restbytes = inbuflen & blocksize_mask;
+
+      cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */
+      buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */
+
+      nburn = dec_fn ( &c->context.c, outbuf, inbuf );
+      burn = nburn > burn ? nburn : burn;
+      buf_xor(outbuf, outbuf, c->u_iv.iv, restbytes);
+
+      buf_cpy (outbuf + blocksize, outbuf, restbytes);
+      for(i=restbytes; i < blocksize; i++)
+        c->u_iv.iv[i] = outbuf[i];
+      nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv);
+      burn = nburn > burn ? nburn : burn;
+      cipher_block_xor(outbuf, outbuf, c->lastiv, blocksize);
+      /* c->lastiv is now really lastlastiv, does this matter? */
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-ccm.c 
b/grub-core/lib/libgcrypt/cipher/cipher-ccm.c
new file mode 100644
index 000000000..dcb268d08
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-ccm.c
@@ -0,0 +1,415 @@
+/* cipher-ccm.c - CTR mode with CBC-MAC mode implementation
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+#define set_burn(burn, nburn) do { \
+  unsigned int __nburn = (nburn); \
+  (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0)
+
+
+static unsigned int
+do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen,
+            int do_padding)
+{
+  const unsigned int blocksize = 16;
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  unsigned char tmp[blocksize];
+  unsigned int burn = 0;
+  unsigned int unused = c->u_mode.ccm.mac_unused;
+  size_t nblocks;
+  size_t n;
+
+  if (inlen == 0 && (unused == 0 || !do_padding))
+    return 0;
+
+  do
+    {
+      if (inlen + unused < blocksize || unused > 0)
+        {
+         n = (inlen > blocksize - unused) ? blocksize - unused : inlen;
+
+         buf_cpy (&c->u_mode.ccm.macbuf[unused], inbuf, n);
+         unused += n;
+         inlen -= n;
+         inbuf += n;
+        }
+      if (!inlen)
+        {
+          if (!do_padding)
+            break;
+
+         n = blocksize - unused;
+         if (n > 0)
+           {
+             memset (&c->u_mode.ccm.macbuf[unused], 0, n);
+             unused = blocksize;
+           }
+        }
+
+      if (unused > 0)
+        {
+          /* Process one block from macbuf.  */
+          cipher_block_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf,
+                           blocksize);
+          set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ));
+
+          unused = 0;
+        }
+
+      if (c->bulk.cbc_enc)
+        {
+          nblocks = inlen / blocksize;
+          c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, tmp, inbuf, nblocks, 1);
+          inbuf += nblocks * blocksize;
+          inlen -= nblocks * blocksize;
+
+          wipememory (tmp, sizeof(tmp));
+        }
+      else
+        {
+          while (inlen >= blocksize)
+            {
+              cipher_block_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize);
+
+              set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv 
));
+
+              inlen -= blocksize;
+              inbuf += blocksize;
+            }
+        }
+    }
+  while (inlen > 0);
+
+  c->u_mode.ccm.mac_unused = unused;
+
+  if (burn)
+    burn += 4 * sizeof(void *);
+
+  return burn;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce,
+                            size_t noncelen)
+{
+  unsigned int marks_key;
+  size_t L = 15 - noncelen;
+  size_t L_;
+
+  L_ = L - 1;
+
+  if (!nonce)
+    return GPG_ERR_INV_ARG;
+  /* Length field must be 2, 3, ..., or 8. */
+  if (L < 2 || L > 8)
+    return GPG_ERR_INV_LENGTH;
+
+  /* Reset state */
+  marks_key = c->marks.key;
+  memset (&c->u_mode, 0, sizeof(c->u_mode));
+  memset (&c->marks, 0, sizeof(c->marks));
+  memset (&c->u_iv, 0, sizeof(c->u_iv));
+  memset (&c->u_ctr, 0, sizeof(c->u_ctr));
+  memset (c->lastiv, 0, sizeof(c->lastiv));
+  c->unused = 0;
+  c->marks.key = marks_key;
+
+  /* Setup CTR */
+  c->u_ctr.ctr[0] = L_;
+  memcpy (&c->u_ctr.ctr[1], nonce, noncelen);
+  memset (&c->u_ctr.ctr[1 + noncelen], 0, L);
+
+  /* Setup IV */
+  c->u_iv.iv[0] = L_;
+  memcpy (&c->u_iv.iv[1], nonce, noncelen);
+  /* Add (8 * M_ + 64 * flags) to iv[0] and set iv[noncelen + 1 ... 15] later
+     in set_aad.  */
+  memset (&c->u_iv.iv[1 + noncelen], 0, L);
+
+  c->u_mode.ccm.nonce = 1;
+
+  return GPG_ERR_NO_ERROR;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_set_lengths (gcry_cipher_hd_t c, u64 encryptlen, u64 aadlen,
+                              u64 taglen)
+{
+  unsigned int burn = 0;
+  unsigned char b0[16];
+  size_t noncelen = 15 - (c->u_iv.iv[0] + 1);
+  u64 M = taglen;
+  u64 M_;
+  int i;
+
+  M_ = (M - 2) / 2;
+
+  /* Authentication field must be 4, 6, 8, 10, 12, 14 or 16. */
+  if ((M_ * 2 + 2) != M || M < 4 || M > 16)
+    return GPG_ERR_INV_LENGTH;
+  if (!c->u_mode.ccm.nonce || c->marks.tag)
+    return GPG_ERR_INV_STATE;
+  if (c->u_mode.ccm.lengths)
+    return GPG_ERR_INV_STATE;
+
+  c->u_mode.ccm.authlen = taglen;
+  c->u_mode.ccm.encryptlen = encryptlen;
+  c->u_mode.ccm.aadlen = aadlen;
+
+  /* Complete IV setup.  */
+  c->u_iv.iv[0] += (aadlen > 0) * 64 + M_ * 8;
+  for (i = 16 - 1; i >= 1 + noncelen; i--)
+    {
+      c->u_iv.iv[i] = encryptlen & 0xff;
+      encryptlen >>= 8;
+    }
+
+  memcpy (b0, c->u_iv.iv, 16);
+  memset (c->u_iv.iv, 0, 16);
+
+  set_burn (burn, do_cbc_mac (c, b0, 16, 0));
+
+  if (aadlen == 0)
+    {
+      /* Do nothing.  */
+    }
+  else if (aadlen > 0 && aadlen <= (unsigned int)0xfeff)
+    {
+      b0[0] = (aadlen >> 8) & 0xff;
+      b0[1] = aadlen & 0xff;
+      set_burn (burn, do_cbc_mac (c, b0, 2, 0));
+    }
+  else if (aadlen > 0xfeff && aadlen <= (unsigned int)0xffffffff)
+    {
+      b0[0] = 0xff;
+      b0[1] = 0xfe;
+      buf_put_be32(&b0[2], aadlen);
+      set_burn (burn, do_cbc_mac (c, b0, 6, 0));
+    }
+  else if (aadlen > (unsigned int)0xffffffff)
+    {
+      b0[0] = 0xff;
+      b0[1] = 0xff;
+      buf_put_be64(&b0[2], aadlen);
+      set_burn (burn, do_cbc_mac (c, b0, 10, 0));
+    }
+
+  /* Generate S_0 and increase counter.  */
+  set_burn (burn, c->spec->encrypt ( &c->context.c, c->u_mode.ccm.s0,
+                                     c->u_ctr.ctr ));
+  c->u_ctr.ctr[15]++;
+
+  if (burn)
+    _gcry_burn_stack (burn + sizeof(void *) * 5);
+
+  c->u_mode.ccm.lengths = 1;
+
+  return GPG_ERR_NO_ERROR;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
+                               size_t abuflen)
+{
+  unsigned int burn;
+
+  if (abuflen > 0 && !abuf)
+    return GPG_ERR_INV_ARG;
+  if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->marks.tag)
+    return GPG_ERR_INV_STATE;
+  if (abuflen > c->u_mode.ccm.aadlen)
+    return GPG_ERR_INV_LENGTH;
+
+  c->u_mode.ccm.aadlen -= abuflen;
+  burn = do_cbc_mac (c, abuf, abuflen, c->u_mode.ccm.aadlen == 0);
+
+  if (burn)
+    _gcry_burn_stack (burn + sizeof(void *) * 5);
+
+  return GPG_ERR_NO_ERROR;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_tag (gcry_cipher_hd_t c, unsigned char *outbuf,
+                     size_t outbuflen, int check)
+{
+  unsigned int burn;
+
+  if (!outbuf || outbuflen == 0)
+    return GPG_ERR_INV_ARG;
+  /* Tag length must be same as initial authlen.  */
+  if (c->u_mode.ccm.authlen != outbuflen)
+    return GPG_ERR_INV_LENGTH;
+  if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->u_mode.ccm.aadlen > 
0)
+    return GPG_ERR_INV_STATE;
+  /* Initial encrypt length must match with length of actual data processed.  
*/
+  if (c->u_mode.ccm.encryptlen > 0)
+    return GPG_ERR_UNFINISHED;
+
+  if (!c->marks.tag)
+    {
+      burn = do_cbc_mac (c, NULL, 0, 1); /* Perform final padding.  */
+
+      /* Add S_0 */
+      cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16);
+
+      wipememory (c->u_ctr.ctr, 16);
+      wipememory (c->u_mode.ccm.s0, 16);
+      wipememory (c->u_mode.ccm.macbuf, 16);
+
+      if (burn)
+        _gcry_burn_stack (burn + sizeof(void *) * 5);
+
+      c->marks.tag = 1;
+    }
+
+  if (!check)
+    {
+      memcpy (outbuf, c->u_iv.iv, outbuflen);
+      return GPG_ERR_NO_ERROR;
+    }
+  else
+    {
+      return buf_eq_const(outbuf, c->u_iv.iv, outbuflen) ?
+             GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM;
+    }
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+                         size_t taglen)
+{
+  return _gcry_cipher_ccm_tag (c, outtag, taglen, 0);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+                           size_t taglen)
+{
+  return _gcry_cipher_ccm_tag (c, (unsigned char *)intag, taglen, 1);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf,
+                          size_t outbuflen, const unsigned char *inbuf,
+                          size_t inbuflen)
+{
+  gcry_err_code_t err = 0;
+  unsigned int burn = 0;
+  unsigned int nburn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths ||
+      c->u_mode.ccm.aadlen > 0)
+    return GPG_ERR_INV_STATE;
+  if (inbuflen > c->u_mode.ccm.encryptlen)
+    return GPG_ERR_INV_LENGTH;
+
+  while (inbuflen)
+    {
+      size_t currlen = inbuflen;
+
+      /* Since checksumming is done before encryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for encryption. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      c->u_mode.ccm.encryptlen -= currlen;
+      nburn = do_cbc_mac (c, inbuf, currlen, 0);
+      burn = nburn > burn ? nburn : burn;
+
+      err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+      if (err)
+       break;
+
+      outbuf += currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      inbuflen -= currlen;
+    }
+
+  if (burn)
+    _gcry_burn_stack (burn + sizeof(void *) * 5);
+  return err;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ccm_decrypt (gcry_cipher_hd_t c, unsigned char *outbuf,
+                          size_t outbuflen, const unsigned char *inbuf,
+                          size_t inbuflen)
+{
+  gcry_err_code_t err = 0;
+  unsigned int burn = 0;
+  unsigned int nburn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths ||
+      c->u_mode.ccm.aadlen > 0)
+    return GPG_ERR_INV_STATE;
+  if (inbuflen > c->u_mode.ccm.encryptlen)
+    return GPG_ERR_INV_LENGTH;
+
+  while (inbuflen)
+    {
+      size_t currlen = inbuflen;
+
+      /* Since checksumming is done after decryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for checksumming. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+      if (err)
+       break;
+
+      c->u_mode.ccm.encryptlen -= currlen;
+      nburn = do_cbc_mac (c, outbuf, currlen, 0);
+      burn = nburn > burn ? nburn : burn;
+
+      outbuf += currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      inbuflen -= currlen;
+    }
+
+  if (burn)
+    _gcry_burn_stack (burn + sizeof(void *) * 5);
+  return err;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-cfb.c 
b/grub-core/lib/libgcrypt/cipher/cipher-cfb.c
new file mode 100644
index 000000000..012c6c13c
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-cfb.c
@@ -0,0 +1,317 @@
+/* cipher-cfb.c  - Generic CFB mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ *               2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  unsigned char *ivp;
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  size_t blocksize_x_2 = blocksize + blocksize;
+  unsigned int burn, nburn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if ( inbuflen <= c->unused )
+    {
+      /* Short enough to be encoded by the remaining XOR mask. */
+      /* XOR the input with the IV and store input into IV. */
+      ivp = c->u_iv.iv + blocksize - c->unused;
+      buf_xor_2dst(outbuf, ivp, inbuf, inbuflen);
+      c->unused -= inbuflen;
+      return 0;
+    }
+
+  burn = 0;
+
+  if ( c->unused )
+    {
+      /* XOR the input with the IV and store input into IV */
+      inbuflen -= c->unused;
+      ivp = c->u_iv.iv + blocksize - c->unused;
+      buf_xor_2dst(outbuf, ivp, inbuf, c->unused);
+      outbuf += c->unused;
+      inbuf += c->unused;
+      c->unused = 0;
+    }
+
+  /* Now we can process complete blocks.  We use a loop as long as we
+     have at least 2 blocks and use conditions for the rest.  This
+     also allows to use a bulk encryption function if available.  */
+  if (inbuflen >= blocksize_x_2 && c->bulk.cfb_enc)
+    {
+      size_t nblocks = inbuflen >> blocksize_shift;
+      c->bulk.cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+      outbuf += nblocks << blocksize_shift;
+      inbuf  += nblocks << blocksize_shift;
+      inbuflen -= nblocks << blocksize_shift;
+    }
+  else
+    {
+      while ( inbuflen >= blocksize_x_2 )
+        {
+          /* Encrypt the IV. */
+          nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+          burn = nburn > burn ? nburn : burn;
+          /* XOR the input with the IV and store input into IV.  */
+          cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+          outbuf += blocksize;
+          inbuf += blocksize;
+          inbuflen -= blocksize;
+        }
+    }
+
+  if ( inbuflen >= blocksize )
+    {
+      /* Save the current IV and then encrypt the IV. */
+      cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
+      nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      burn = nburn > burn ? nburn : burn;
+      /* XOR the input with the IV and store input into IV */
+      cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize);
+      outbuf += blocksize;
+      inbuf += blocksize;
+      inbuflen -= blocksize;
+    }
+  if ( inbuflen )
+    {
+      /* Save the current IV and then encrypt the IV. */
+      cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize );
+      nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      burn = nburn > burn ? nburn : burn;
+      c->unused = blocksize;
+      /* Apply the XOR. */
+      c->unused -= inbuflen;
+      buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, inbuflen);
+      outbuf += inbuflen;
+      inbuf += inbuflen;
+      inbuflen = 0;
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  unsigned char *ivp;
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  size_t blocksize_x_2 = blocksize + blocksize;
+  unsigned int burn, nburn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if (inbuflen <= c->unused)
+    {
+      /* Short enough to be encoded by the remaining XOR mask. */
+      /* XOR the input with the IV and store input into IV. */
+      ivp = c->u_iv.iv + blocksize - c->unused;
+      buf_xor_n_copy(outbuf, ivp, inbuf, inbuflen);
+      c->unused -= inbuflen;
+      return 0;
+    }
+
+  burn = 0;
+
+  if (c->unused)
+    {
+      /* XOR the input with the IV and store input into IV. */
+      inbuflen -= c->unused;
+      ivp = c->u_iv.iv + blocksize - c->unused;
+      buf_xor_n_copy(outbuf, ivp, inbuf, c->unused);
+      outbuf += c->unused;
+      inbuf += c->unused;
+      c->unused = 0;
+    }
+
+  /* Now we can process complete blocks.  We use a loop as long as we
+     have at least 2 blocks and use conditions for the rest.  This
+     also allows to use a bulk encryption function if available.  */
+  if (inbuflen >= blocksize_x_2 && c->bulk.cfb_dec)
+    {
+      size_t nblocks = inbuflen >> blocksize_shift;
+      c->bulk.cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+      outbuf += nblocks << blocksize_shift;
+      inbuf  += nblocks << blocksize_shift;
+      inbuflen -= nblocks << blocksize_shift;
+    }
+  else
+    {
+      while (inbuflen >= blocksize_x_2 )
+        {
+          /* Encrypt the IV. */
+          nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+          burn = nburn > burn ? nburn : burn;
+          /* XOR the input with the IV and store input into IV. */
+          cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+          outbuf += blocksize;
+          inbuf += blocksize;
+          inbuflen -= blocksize;
+        }
+    }
+
+  if (inbuflen >= blocksize )
+    {
+      /* Save the current IV and then encrypt the IV. */
+      cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize);
+      nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      burn = nburn > burn ? nburn : burn;
+      /* XOR the input with the IV and store input into IV */
+      cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize);
+      outbuf += blocksize;
+      inbuf += blocksize;
+      inbuflen -= blocksize;
+    }
+
+  if (inbuflen)
+    {
+      /* Save the current IV and then encrypt the IV. */
+      cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize );
+      nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      burn = nburn > burn ? nburn : burn;
+      c->unused = blocksize;
+      /* Apply the XOR. */
+      c->unused -= inbuflen;
+      buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, inbuflen);
+      outbuf += inbuflen;
+      inbuf += inbuflen;
+      inbuflen = 0;
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cfb8_encrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t blocksize = c->spec->blocksize;
+  unsigned int burn, nburn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  burn = 0;
+
+  while ( inbuflen > 0)
+    {
+      int i;
+
+      /* Encrypt the IV. */
+      nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv );
+      burn = nburn > burn ? nburn : burn;
+
+      outbuf[0] = c->lastiv[0] ^ inbuf[0];
+
+      /* Bitshift iv by 8 bit to the left */
+      for (i = 0; i < blocksize-1; i++)
+        c->u_iv.iv[i] = c->u_iv.iv[i+1];
+
+      /* append cipher text to iv */
+      c->u_iv.iv[blocksize-1] = outbuf[0];
+
+      outbuf += 1;
+      inbuf += 1;
+      inbuflen -= 1;
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cfb8_decrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t blocksize = c->spec->blocksize;
+  unsigned int burn, nburn;
+  unsigned char appendee;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  burn = 0;
+
+  while (inbuflen > 0)
+    {
+      int i;
+
+      /* Encrypt the IV. */
+      nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv );
+      burn = nburn > burn ? nburn : burn;
+
+      /* inbuf might == outbuf, make sure we keep the value
+         so we can append it later */
+      appendee = inbuf[0];
+
+      outbuf[0] = inbuf[0] ^ c->lastiv[0];
+
+      /* Bitshift iv by 8 bit to the left */
+      for (i = 0; i < blocksize-1; i++)
+        c->u_iv.iv[i] = c->u_iv.iv[i+1];
+
+      c->u_iv.iv[blocksize-1] = appendee;
+
+      outbuf += 1;
+      inbuf += 1;
+      inbuflen -= 1;
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-cmac.c 
b/grub-core/lib/libgcrypt/cipher/cipher-cmac.c
new file mode 100644
index 000000000..4efd1e19b
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-cmac.c
@@ -0,0 +1,292 @@
+/* cmac.c - CMAC, Cipher-based MAC.
+ * Copyright (C) 2013,2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+
+#define set_burn(burn, nburn) do { \
+  unsigned int __nburn = (nburn); \
+  (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0)
+
+
+gcry_err_code_t
+_gcry_cmac_write (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
+                 const byte * inbuf, size_t inlen)
+{
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  byte outbuf[MAX_BLOCKSIZE];
+  unsigned int burn = 0;
+  unsigned int nblocks;
+  size_t n;
+
+  if (ctx->tag)
+    return GPG_ERR_INV_STATE;
+
+  if (!inbuf)
+    return GPG_ERR_INV_ARG;
+
+  if (inlen == 0)
+    return 0;
+
+  /* Last block is needed for cmac_final.  */
+  if (ctx->mac_unused + inlen <= blocksize)
+    {
+      buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, inlen);
+      ctx->mac_unused += inlen;
+      inbuf += inlen;
+      inlen -= inlen;
+
+      return 0;
+    }
+
+  if (ctx->mac_unused)
+    {
+      n = inlen;
+      if (n > blocksize - ctx->mac_unused)
+       n = blocksize - ctx->mac_unused;
+
+      buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, n);
+      ctx->mac_unused += n;
+      inbuf += n;
+      inlen -= n;
+
+      cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
+      set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv));
+
+      ctx->mac_unused = 0;
+    }
+
+  if (c->bulk.cbc_enc && inlen > blocksize)
+    {
+      nblocks = inlen >> blocksize_shift;
+      nblocks -= ((nblocks << blocksize_shift) == inlen);
+
+      c->bulk.cbc_enc (&c->context.c, ctx->u_iv.iv, outbuf, inbuf, nblocks, 1);
+      inbuf += nblocks << blocksize_shift;
+      inlen -= nblocks << blocksize_shift;
+
+      wipememory (outbuf, sizeof (outbuf));
+    }
+  else
+    while (inlen > blocksize)
+      {
+        cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, inbuf, blocksize);
+        set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv));
+        inlen -= blocksize;
+        inbuf += blocksize;
+      }
+
+  /* Make sure that last block is passed to cmac_final.  */
+  if (inlen == 0)
+    BUG ();
+
+  n = inlen;
+  if (n > blocksize - ctx->mac_unused)
+    n = blocksize - ctx->mac_unused;
+
+  buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, n);
+  ctx->mac_unused += n;
+  inbuf += n;
+  inlen -= n;
+
+  if (burn)
+    _gcry_burn_stack (burn + 4 * sizeof (void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cmac_generate_subkeys (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx)
+{
+  const unsigned int blocksize = c->spec->blocksize;
+  byte rb, carry, t, bi;
+  unsigned int burn;
+  int i, j;
+  union
+  {
+    size_t _aligned;
+    byte buf[MAX_BLOCKSIZE];
+  } u;
+
+  /* Tell compiler that we require a cipher with a 64bit or 128 bit block
+   * length, to allow better optimization of this function.  */
+  if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1))
+    return GPG_ERR_INV_CIPHER_MODE;
+
+  if (MAX_BLOCKSIZE < blocksize)
+    BUG ();
+
+  /* encrypt zero block */
+  memset (u.buf, 0, blocksize);
+  burn = c->spec->encrypt (&c->context.c, u.buf, u.buf);
+
+  /* Currently supported blocksizes are 16 and 8. */
+  rb = blocksize == 16 ? 0x87 : 0x1B /* blocksize == 8 */ ;
+
+  for (j = 0; j < 2; j++)
+    {
+      /* Generate subkeys K1 and K2 */
+      carry = 0;
+      for (i = blocksize - 1; i >= 0; i--)
+        {
+          bi = u.buf[i];
+          t = carry | (bi << 1);
+          carry = bi >> 7;
+          u.buf[i] = t & 0xff;
+          ctx->subkeys[j][i] = u.buf[i];
+        }
+      u.buf[blocksize - 1] ^= carry ? rb : 0;
+      ctx->subkeys[j][blocksize - 1] = u.buf[blocksize - 1];
+    }
+
+  wipememory (&u, sizeof (u));
+  if (burn)
+    _gcry_burn_stack (burn + 4 * sizeof (void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cmac_final (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx)
+{
+  const unsigned int blocksize = c->spec->blocksize;
+  unsigned int count = ctx->mac_unused;
+  unsigned int burn;
+  byte *subkey;
+
+  /* Tell compiler that we require a cipher with a 64bit or 128 bit block
+   * length, to allow better optimization of this function.  */
+  if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1))
+    return GPG_ERR_INV_CIPHER_MODE;
+
+  if (count == blocksize)
+    subkey = ctx->subkeys[0];        /* K1 */
+  else
+    {
+      subkey = ctx->subkeys[1];      /* K2 */
+      ctx->macbuf[count++] = 0x80;
+      while (count < blocksize)
+        ctx->macbuf[count++] = 0;
+    }
+
+  cipher_block_xor (ctx->macbuf, ctx->macbuf, subkey, blocksize);
+
+  cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize);
+  burn = c->spec->encrypt (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv);
+  if (burn)
+    _gcry_burn_stack (burn + 4 * sizeof (void *));
+
+  ctx->mac_unused = 0;
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+cmac_tag (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
+         unsigned char *tag, size_t taglen, int check)
+{
+  gcry_err_code_t ret;
+
+  if (!tag || taglen == 0 || taglen > c->spec->blocksize)
+    return GPG_ERR_INV_ARG;
+
+  if (!ctx->tag)
+    {
+      ret = _gcry_cmac_final (c, ctx);
+      if (ret != 0)
+       return ret;
+
+      ctx->tag = 1;
+    }
+
+  if (!check)
+    {
+      memcpy (tag, ctx->u_iv.iv, taglen);
+      return GPG_ERR_NO_ERROR;
+    }
+  else
+    {
+      return buf_eq_const (tag, ctx->u_iv.iv, taglen) ?
+        GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM;
+    }
+}
+
+
+void
+_gcry_cmac_reset (gcry_cmac_context_t *ctx)
+{
+  char tmp_buf[sizeof(ctx->subkeys)];
+
+  /* Only keep subkeys when reseting context. */
+
+  buf_cpy (tmp_buf, ctx->subkeys, sizeof(ctx->subkeys));
+  memset (ctx, 0, sizeof(*ctx));
+  buf_cpy (ctx->subkeys, tmp_buf, sizeof(ctx->subkeys));
+  wipememory (tmp_buf, sizeof(tmp_buf));
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cmac_authenticate (gcry_cipher_hd_t c,
+                                const unsigned char *abuf, size_t abuflen)
+{
+  if (abuflen > 0 && !abuf)
+    return GPG_ERR_INV_ARG;
+  /* To support new blocksize, update cmac_generate_subkeys() then add new
+     blocksize here. */
+  if (c->spec->blocksize != 16 && c->spec->blocksize != 8)
+    return GPG_ERR_INV_CIPHER_MODE;
+
+  return _gcry_cmac_write (c, &c->u_mode.cmac, abuf, abuflen);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cmac_get_tag (gcry_cipher_hd_t c,
+                           unsigned char *outtag, size_t taglen)
+{
+  return cmac_tag (c, &c->u_mode.cmac, outtag, taglen, 0);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_cmac_check_tag (gcry_cipher_hd_t c,
+                             const unsigned char *intag, size_t taglen)
+{
+  return cmac_tag (c, &c->u_mode.cmac, (unsigned char *) intag, taglen, 1);
+}
+
+gcry_err_code_t
+_gcry_cipher_cmac_set_subkeys (gcry_cipher_hd_t c)
+{
+  return _gcry_cmac_generate_subkeys (c, &c->u_mode.cmac);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-ctr.c 
b/grub-core/lib/libgcrypt/cipher/cipher-ctr.c
new file mode 100644
index 000000000..d66c56877
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-ctr.c
@@ -0,0 +1,131 @@
+/* cipher-ctr.c  - Generic CTR mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ *               2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_ctr_encrypt_ctx (gcry_cipher_hd_t c,
+                             unsigned char *outbuf, size_t outbuflen,
+                             const unsigned char *inbuf, size_t inbuflen,
+                             void *algo_context)
+{
+  size_t n;
+  int i;
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  size_t nblocks;
+  unsigned int burn, nburn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  burn = 0;
+
+  /* First process a left over encrypted counter.  */
+  if (c->unused)
+    {
+      gcry_assert (c->unused < blocksize);
+      i = blocksize - c->unused;
+      n = c->unused > inbuflen ? inbuflen : c->unused;
+      buf_xor(outbuf, inbuf, &c->lastiv[i], n);
+      c->unused -= n;
+      inbuf  += n;
+      outbuf += n;
+      inbuflen -= n;
+    }
+
+  /* Use a bulk method if available.  */
+  nblocks = inbuflen >> blocksize_shift;
+  if (nblocks && c->bulk.ctr_enc)
+    {
+      c->bulk.ctr_enc (algo_context, c->u_ctr.ctr, outbuf, inbuf, nblocks);
+      inbuf  += nblocks << blocksize_shift;
+      outbuf += nblocks << blocksize_shift;
+      inbuflen -= nblocks << blocksize_shift;
+    }
+
+  /* If we don't have a bulk method use the standard method.  We also
+     use this method for the a remaining partial block.  */
+  if (inbuflen)
+    {
+      unsigned char tmp[MAX_BLOCKSIZE];
+
+      n = blocksize;
+      do
+        {
+          nburn = enc_fn (algo_context, tmp, c->u_ctr.ctr);
+          burn = nburn > burn ? nburn : burn;
+
+         cipher_block_add(c->u_ctr.ctr, 1, blocksize);
+
+          if (inbuflen < blocksize)
+            break;
+          cipher_block_xor(outbuf, inbuf, tmp, blocksize);
+
+          inbuflen -= n;
+          outbuf += n;
+          inbuf += n;
+        }
+      while (inbuflen);
+
+      if (inbuflen)
+        {
+          n = inbuflen;
+          buf_xor(outbuf, inbuf, tmp, inbuflen);
+
+          inbuflen -= n;
+          outbuf += n;
+          inbuf += n;
+        }
+
+      /* Save the unused bytes of the counter.  */
+      c->unused = blocksize - n;
+      if (c->unused)
+        buf_cpy (c->lastiv+n, tmp+n, c->unused);
+
+      wipememory (tmp, sizeof tmp);
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c,
+                         unsigned char *outbuf, size_t outbuflen,
+                         const unsigned char *inbuf, size_t inbuflen)
+{
+  return _gcry_cipher_ctr_encrypt_ctx (c, outbuf, outbuflen, inbuf, inbuflen,
+                                      &c->context.c);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-eax.c 
b/grub-core/lib/libgcrypt/cipher/cipher-eax.c
new file mode 100644
index 000000000..08f815a9e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-eax.c
@@ -0,0 +1,289 @@
+/* cipher-eax.c  -  EAX implementation
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_eax_encrypt (gcry_cipher_hd_t c,
+                          byte *outbuf, size_t outbuflen,
+                          const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.iv)
+    {
+      err = _gcry_cipher_eax_set_nonce (c, NULL, 0);
+      if (err != 0)
+       return err;
+    }
+
+  while (inbuflen)
+    {
+      size_t currlen = inbuflen;
+
+      /* Since checksumming is done after encryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for checksumming. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+      if (err != 0)
+       return err;
+
+      err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, outbuf,
+                             currlen);
+      if (err != 0)
+       return err;
+
+      outbuf += currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      inbuflen -= currlen;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_decrypt (gcry_cipher_hd_t c,
+                          byte *outbuf, size_t outbuflen,
+                          const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.iv)
+    {
+      err = _gcry_cipher_eax_set_nonce (c, NULL, 0);
+      if (err != 0)
+       return err;
+    }
+
+  while (inbuflen)
+    {
+      size_t currlen = inbuflen;
+
+      /* Since checksumming is done before decryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for decryption. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, inbuf,
+                             currlen);
+      if (err != 0)
+       return err;
+
+      err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen);
+      if (err != 0)
+       return err;
+
+      outbuf += currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      inbuflen -= currlen;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_authenticate (gcry_cipher_hd_t c,
+                               const byte * aadbuf, size_t aadbuflen)
+{
+  gcry_err_code_t err;
+
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.iv)
+    {
+      err = _gcry_cipher_eax_set_nonce (c, NULL, 0);
+      if (err != 0)
+       return err;
+    }
+
+  return _gcry_cmac_write (c, &c->u_mode.eax.cmac_header, aadbuf, aadbuflen);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_setkey (gcry_cipher_hd_t c)
+{
+  gcry_err_code_t err;
+
+  err = _gcry_cmac_generate_subkeys (c, &c->u_mode.eax.cmac_header);
+  if (err != 0)
+    return err;
+
+  buf_cpy (c->u_mode.eax.cmac_ciphertext.subkeys,
+          c->u_mode.eax.cmac_header.subkeys,
+          sizeof(c->u_mode.eax.cmac_header.subkeys));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_set_nonce (gcry_cipher_hd_t c, const byte *nonce,
+                           size_t noncelen)
+{
+  gcry_cmac_context_t nonce_cmac;
+  unsigned char initbuf[MAX_BLOCKSIZE];
+  gcry_err_code_t err;
+
+  c->marks.iv = 0;
+  c->marks.tag = 0;
+
+  _gcry_cmac_reset (&c->u_mode.eax.cmac_header);
+  _gcry_cmac_reset (&c->u_mode.eax.cmac_ciphertext);
+
+  /* Calculate nonce CMAC */
+
+  memset(&nonce_cmac, 0, sizeof(nonce_cmac));
+  memset(&initbuf, 0, sizeof(initbuf));
+
+  buf_cpy (&nonce_cmac.subkeys, c->u_mode.eax.cmac_header.subkeys,
+          sizeof(c->u_mode.eax.cmac_header.subkeys));
+
+  err = _gcry_cmac_write (c, &nonce_cmac, initbuf, c->spec->blocksize);
+  if (err != 0)
+    return err;
+
+  if (noncelen != 0)
+    {
+      err = _gcry_cmac_write (c, &nonce_cmac, nonce, noncelen);
+      if (err != 0)
+        return err;
+    }
+
+  err = _gcry_cmac_final (c, &nonce_cmac);
+  if (err != 0)
+    return err;
+
+  cipher_block_cpy (c->u_iv.iv, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
+  cipher_block_cpy (c->u_ctr.ctr, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE);
+
+  wipememory (&nonce_cmac, sizeof(nonce_cmac));
+
+  /* Prepare header CMAC */
+
+  initbuf[c->spec->blocksize - 1] = 1;
+  err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_header, initbuf,
+                         c->spec->blocksize);
+  if (err != 0)
+    return err;
+
+  /* Prepare ciphertext CMAC */
+
+  initbuf[c->spec->blocksize - 1] = 2;
+  err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, initbuf,
+                         c->spec->blocksize);
+  if (err != 0)
+    return err;
+
+  c->marks.iv = 1;
+  c->marks.tag = 0;
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+_gcry_cipher_eax_tag (gcry_cipher_hd_t c,
+                      byte *outbuf, size_t outbuflen, int check)
+{
+  gcry_err_code_t err;
+
+  if (!c->marks.tag)
+    {
+      err = _gcry_cmac_final (c, &c->u_mode.eax.cmac_header);
+      if (err != 0)
+       return err;
+
+      err = _gcry_cmac_final (c, &c->u_mode.eax.cmac_ciphertext);
+      if (err != 0)
+       return err;
+
+      cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_header.u_iv.iv,
+                          MAX_BLOCKSIZE);
+      cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_ciphertext.u_iv.iv,
+                          MAX_BLOCKSIZE);
+
+      _gcry_cmac_reset (&c->u_mode.eax.cmac_header);
+      _gcry_cmac_reset (&c->u_mode.eax.cmac_ciphertext);
+
+      c->marks.tag = 1;
+    }
+
+  if (!check)
+    {
+      if (outbuflen > c->spec->blocksize)
+        outbuflen = c->spec->blocksize;
+
+      /* NB: We already checked that OUTBUF is large enough to hold
+       * the result or has valid truncated length.  */
+      memcpy (outbuf, c->u_iv.iv, outbuflen);
+    }
+  else
+    {
+      /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF
+       * and thus we need to compare its length first.  */
+      if (!(outbuflen <= c->spec->blocksize)
+          || !buf_eq_const (outbuf, c->u_iv.iv, outbuflen))
+        return GPG_ERR_CHECKSUM;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_eax_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+                          size_t taglen)
+{
+  return _gcry_cipher_eax_tag (c, outtag, taglen, 0);
+}
+
+gcry_err_code_t
+_gcry_cipher_eax_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+                            size_t taglen)
+{
+  return _gcry_cipher_eax_tag (c, (unsigned char *) intag, taglen, 1);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv7-neon.S 
b/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv7-neon.S
new file mode 100644
index 000000000..16502b4ad
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv7-neon.S
@@ -0,0 +1,341 @@
+/* cipher-gcm-armv7-neon.S - ARM/NEON accelerated GHASH
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.syntax unified
+.fpu neon
+.arm
+
+.text
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+.align 4
+gcry_gcm_reduction_constant:
+.Lrconst64:
+  .quad 0xc200000000000000
+
+/* Register macros */
+
+#define rhash q0
+#define rhash_l d0
+#define rhash_h d1
+
+#define rh1 q1
+#define rh1_l d2
+#define rh1_h d3
+
+#define rbuf q2
+#define rbuf_l d4
+#define rbuf_h d5
+
+#define rbuf1 q3
+#define rbuf1_l d6
+#define rbuf1_h d7
+
+#define t0q q4
+#define t0l d8
+#define t0h d9
+
+#define t1q q5
+#define t1l d10
+#define t1h d11
+
+#define t2q q6
+#define t2l d12
+#define t2h d13
+
+#define t3q q7
+#define t3l d14
+#define t3h d15
+
+/* q8 */
+#define k16 d16
+#define k32 d17
+
+/* q9 */
+#define k48 d18
+
+#define k0 q10
+
+#define rr0 q11
+#define rr0_l d22
+#define rr0_h d23
+
+#define rr1 q12
+#define rr1_l d24
+#define rr1_h d25
+
+#define rt0 q13
+#define rt0_l d26
+#define rt0_h d27
+
+#define rt1 q14
+#define rt1_l d28
+#define rt1_h d29
+
+#define rrconst q15
+#define rrconst_l d30
+#define rrconst_h d31
+
+/* Macro for 64x64=>128 carry-less multiplication using vmull.p8 instruction.
+ *
+ * From "Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R. Fast Software
+ * Polynomial Multiplication on ARM Processors using the NEON Engine. The
+ * Second International Workshop on Modern Cryptography and Security
+ * Engineering — MoCrySEn, 2013". */
+
+#define vmull_p64(rq, rl, rh, ad, bd) \
+       vext.8 t0l, ad, ad, $1; \
+       vmull.p8 t0q, t0l, bd; \
+       vext.8 rl, bd, bd, $1; \
+       vmull.p8 rq, ad, rl; \
+       vext.8 t1l, ad, ad, $2; \
+       vmull.p8 t1q, t1l, bd; \
+       vext.8 t3l, bd, bd, $2; \
+       vmull.p8 t3q, ad, t3l; \
+       vext.8 t2l, ad, ad, $3; \
+       vmull.p8 t2q, t2l, bd; \
+       veor t0q, t0q, rq; \
+       vext.8 rl, bd, bd, $3; \
+       vmull.p8 rq, ad, rl; \
+       veor t1q, t1q, t3q; \
+       vext.8 t3l, bd, bd, $4; \
+       vmull.p8 t3q, ad, t3l; \
+       veor t0l, t0l, t0h; \
+       vand t0h, t0h, k48; \
+       veor t1l, t1l, t1h; \
+       vand t1h, t1h, k32; \
+       veor t2q, t2q, rq; \
+       veor t0l, t0l, t0h; \
+       veor t1l, t1l, t1h; \
+       veor t2l, t2l, t2h; \
+       vand t2h, t2h, k16; \
+       veor t3l, t3l, t3h; \
+       vmov.i64 t3h, $0; \
+       vext.8 t0q, t0q, t0q, $15; \
+       veor t2l, t2l, t2h; \
+       vext.8 t1q, t1q, t1q, $14; \
+       vmull.p8 rq, ad, bd; \
+       vext.8 t2q, t2q, t2q, $13; \
+       vext.8 t3q, t3q, t3q, $12; \
+       veor t0q, t0q, t1q; \
+       veor t2q, t2q, t3q; \
+       veor rq, rq, t0q; \
+       veor rq, rq, t2q;
+
+/* GHASH macros.
+ *
+ * See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+
+/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1)
+ *  Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'.
+ */
+#define PMUL_128x128(r0, r1, a, b, t1, t2, interleave_op) \
+        veor t1##_h, b##_l, b##_h; \
+        veor t1##_l, a##_l, a##_h; \
+        vmull_p64( r0, r0##_l, r0##_h, a##_l, b##_l ); \
+        vmull_p64( r1, r1##_l, r1##_h, a##_h, b##_h ); \
+        vmull_p64( t2, t2##_h, t2##_l, t1##_h, t1##_l ); \
+        interleave_op; \
+        veor t2, r0; \
+        veor t2, r1; \
+        veor r0##_h, t2##_l; \
+        veor r1##_l, t2##_h;
+
+/* Reduction using Xor and Shift.
+ * Input: 'r0:r1', Output: 'a'
+ *
+ * See "Shay Gueron, Michael E. Kounavis. Intel Carry-Less Multiplication
+ * Instruction and its Usage for Computing the GCM Mode" for details.
+ */
+#define REDUCTION(a, r0, r1, t, interleave_op) \
+        vshl.u32 t0q, r0, #31; \
+        vshl.u32 t1q, r0, #30; \
+        vshl.u32 t2q, r0, #25; \
+        veor t0q, t0q, t1q; \
+        veor t0q, t0q, t2q; \
+        vext.8 t, t0q, k0, #4; \
+        vext.8 t0q, k0, t0q, #(16-12); \
+        veor r0, r0, t0q; \
+        interleave_op; \
+        vshr.u32 t0q, r0, #1; \
+        vshr.u32 t1q, r0, #2; \
+        vshr.u32 t2q, r0, #7; \
+        veor t0q, t0q, t1q; \
+        veor t0q, t0q, t2q; \
+        veor t0q, t0q, t; \
+        veor r0, r0, t0q; \
+        veor a, r0, r1;
+
+#define _(...) __VA_ARGS__
+#define __ _()
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
+
+/*
+ * unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result,
+ *                                      const byte *buf, size_t nblocks);
+ */
+.align 3
+.globl _gcry_ghash_armv7_neon
+.type  _gcry_ghash_armv7_neon,%function;
+_gcry_ghash_armv7_neon:
+  /* input:
+   *    r0: gcm_key
+   *    r1: result/hash
+   *    r2: buf
+   *    r3: nblocks
+   */
+  push {r4-r6, lr}
+
+  cmp r3, #0
+  beq .Ldo_nothing
+
+  vpush {q4-q7}
+
+  vld1.64 {rhash}, [r1]
+  vld1.64 {rh1}, [r0]
+
+  vrev64.8 rhash, rhash /* byte-swap */
+
+  vmov.i64 k0, #0x0
+  vmov.i64 k16, #0xffff
+  vmov.i64 k32, #0xffffffff
+  vmov.i64 k48, #0xffffffffffff
+
+  vext.8 rhash, rhash, rhash, #8
+
+  /* Handle remaining blocks. */
+
+  vld1.64 {rbuf}, [r2]!
+  subs r3, r3, #1
+
+  vrev64.8 rbuf, rbuf /* byte-swap */
+  vext.8 rbuf, rbuf, rbuf, #8
+
+  veor rhash, rhash, rbuf
+
+  beq .Lend
+
+.Loop:
+  vld1.64 {rbuf}, [r2]!
+  PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(vrev64.8 rbuf, rbuf))
+  REDUCTION(rhash, rr0, rr1, rt0, _(vext.8 rbuf, rbuf, rbuf, #8))
+  subs r3, r3, #1
+  veor rhash, rhash, rbuf
+
+  bne .Loop
+
+.Lend:
+  PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(CLEAR_REG(rbuf)))
+  REDUCTION(rhash, rr0, rr1, rt0, _(CLEAR_REG(rh1)))
+
+.Ldone:
+  CLEAR_REG(rr1)
+  vrev64.8 rhash, rhash /* byte-swap */
+  CLEAR_REG(rt0)
+  CLEAR_REG(rr0)
+  vext.8 rhash, rhash, rhash, #8
+  CLEAR_REG(rt1)
+  CLEAR_REG(t0q)
+  CLEAR_REG(t1q)
+  CLEAR_REG(t2q)
+  CLEAR_REG(t3q)
+  vst1.64 {rhash}, [r1]
+  CLEAR_REG(rhash)
+
+  vpop {q4-q7}
+
+.Ldo_nothing:
+  mov r0, #0
+  pop {r4-r6, pc}
+.size _gcry_ghash_armv7_neon,.-_gcry_ghash_armv7_neon;
+
+
+/*
+ * void _gcry_ghash_armv7_neon (void *gcm_key);
+ */
+.align 3
+.globl _gcry_ghash_setup_armv7_neon
+.type  _gcry_ghash_setup_armv7_neon,%function;
+_gcry_ghash_setup_armv7_neon:
+  /* input:
+   *   r0: gcm_key
+   */
+
+  vpush {q4-q7}
+
+  GET_DATA_POINTER(r2, .Lrconst64, r3)
+
+  vld1.64 {rrconst_h}, [r2]
+
+#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \
+        /* H <<< 1 */ \
+        vshr.s64 ma, ib, #63; \
+        vshr.u64 oa, ib, #63; \
+        vshr.u64 ob, ia, #63; \
+        vand ma, const_d; \
+        vshl.u64 ib, ib, #1; \
+        vshl.u64 ia, ia, #1; \
+        vorr ob, ib; \
+        vorr oa, ia; \
+        veor ob, ma; \
+        vst1.64 {oa, ob}, [r_out]
+
+  vld1.64 {rhash}, [r0]
+  vrev64.8 rhash, rhash /* byte-swap */
+  vext.8 rhash, rhash, rhash, #8
+
+  vmov rbuf1, rhash
+  GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */
+
+  CLEAR_REG(rh1)
+  CLEAR_REG(rhash)
+  CLEAR_REG(rbuf1)
+  CLEAR_REG(rrconst)
+  vpop {q4-q7}
+  bx lr
+.size _gcry_ghash_setup_armv7_neon,.-_gcry_ghash_setup_armv7_neon;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S 
b/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S
new file mode 100644
index 000000000..00c547de4
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S
@@ -0,0 +1,588 @@
+/* cipher-gcm-armv8-aarch32-ce.S - ARM/CE accelerated GHASH
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+.align 4
+gcry_gcm_reduction_constant:
+.Lrconst64:
+  .quad 0xc200000000000000
+
+
+/* Register macros */
+
+#define rhash q0
+#define rhash_l d0
+#define rhash_h d1
+
+#define rh1 q1
+#define rh1_l d2
+#define rh1_h d3
+
+#define rbuf q2
+#define rbuf_l d4
+#define rbuf_h d5
+
+#define rbuf1 q3
+#define rbuf1_l d6
+#define rbuf1_h d7
+
+#define rbuf2 q4
+#define rbuf2_l d8
+#define rbuf2_h d9
+
+#define rbuf3 q5
+#define rbuf3_l d10
+#define rbuf3_h d11
+
+#define rh2 q6
+#define rh2_l d12
+#define rh2_h d13
+
+#define rh3 q7
+#define rh3_l d14
+#define rh3_h d15
+
+#define rh4 q8
+#define rh4_l d16
+#define rh4_h d17
+
+#define rr2 q9
+#define rr2_l d18
+#define rr2_h d19
+
+#define rr3 q10
+#define rr3_l d20
+#define rr3_h d21
+
+#define rr0 q11
+#define rr0_l d22
+#define rr0_h d23
+
+#define rr1 q12
+#define rr1_l d24
+#define rr1_h d25
+
+#define rt0 q13
+#define rt0_l d26
+#define rt0_h d27
+
+#define rt1 q14
+#define rt1_l d28
+#define rt1_h d29
+
+#define rrconst q15
+#define rrconst_l d30
+#define rrconst_h d31
+
+/* GHASH macros */
+
+/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+
+/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1)
+ *  Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'.
+ */
+#define PMUL_128x128(r0, r1, a, b, t, interleave_op) \
+        veor t##_h, b##_l, b##_h; \
+        veor t##_l, a##_l, a##_h; \
+        vmull.p64 r0, a##_l, b##_l; \
+        vmull.p64 r1, a##_h, b##_h; \
+        vmull.p64 t, t##_h, t##_l; \
+        interleave_op; \
+        veor t, r0; \
+        veor t, r1; \
+        veor r0##_h, t##_l; \
+        veor r1##_l, t##_h;
+
+/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A)
+ *  Note: 'r1A' may be 'aA' or 'bA', 'r0A' must not be either 'aA' or 'bA'.
+ * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B)
+ *  Note: 'r1B' may be 'aB' or 'bB', 'r0B' must not be either 'aB' or 'bB'.
+ */
+#define PMUL_128x128_2(r0A, r1A, aA, bA, r0B, r1B, aB, bB, tA, tB, 
interleave_op) \
+        veor tA##_h, bA##_l, bA##_h; \
+        veor tA##_l, aA##_l, aA##_h; \
+          veor tB##_h, bB##_l, bB##_h; \
+          veor tB##_l, aB##_l, aB##_h; \
+        vmull.p64 r0A, aA##_l, bA##_l; \
+        vmull.p64 r1A, aA##_h, bA##_h; \
+        vmull.p64 tA, tA##_h, tA##_l; \
+          vmull.p64 r0B, aB##_l, bB##_l; \
+          vmull.p64 r1B, aB##_h, bB##_h; \
+          vmull.p64 tB, tB##_h, tB##_l; \
+        interleave_op; \
+        veor tA, r0A; \
+        veor tA, r1A; \
+          veor tB, r0B; \
+          veor tB, r1B; \
+        veor r0A##_h, tA##_l; \
+        veor r1A##_l, tA##_h; \
+          veor r0B##_h, tB##_l; \
+          veor r1B##_l, tB##_h; \
+
+/* Input: 'r0:r1', Output: 'a' */
+#define REDUCTION(a, r0, r1, rconst, t, interleave_op) \
+        vmull.p64 t, r0##_l, rconst; \
+        veor r0##_h, t##_l; \
+        veor r1##_l, t##_h; \
+        interleave_op; \
+        vmull.p64 t, r0##_h, rconst; \
+        veor r1, t; \
+        veor a, r0, r1;
+
+#define _(...) __VA_ARGS__
+#define __ _()
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
+
+/*
+ * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result,
+ *                                          const byte *buf, size_t nblocks,
+ *                                          void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_armv8_ce_pmull
+.type  _gcry_ghash_armv8_ce_pmull,%function;
+_gcry_ghash_armv8_ce_pmull:
+  /* input:
+   *    r0: gcm_key
+   *    r1: result/hash
+   *    r2: buf
+   *    r3: nblocks
+   *    %st+0: gcm_table
+   */
+  push {r4-r6, lr}
+
+  cmp r3, #0
+  beq .Ldo_nothing
+
+  GET_DATA_POINTER(r4, .Lrconst64, lr)
+
+  vld1.64 {rhash}, [r1]
+  vld1.64 {rh1}, [r0]
+
+  vrev64.8 rhash, rhash /* byte-swap */
+  vld1.64 {rrconst_h}, [r4]
+  vext.8 rhash, rhash, rhash, #8
+
+  cmp r3, #4
+  blo .Less_than_4
+
+  /* Bulk processing of 4 blocks per loop iteration. */
+
+  ldr r5, [sp, #(4*4)];
+  add r6, r5, #32
+
+  vpush {q4-q7}
+
+  vld1.64 {rh2-rh3}, [r5]
+  vld1.64 {rh4}, [r6]
+
+  vld1.64 {rbuf-rbuf1}, [r2]!
+  sub r3, r3, #4
+  vld1.64 {rbuf2-rbuf3}, [r2]!
+
+  cmp r3, #4
+  vrev64.8 rbuf, rbuf /* byte-swap */
+  vrev64.8 rbuf1, rbuf1 /* byte-swap */
+  vrev64.8 rbuf2, rbuf2 /* byte-swap */
+  vrev64.8 rbuf3, rbuf3 /* byte-swap */
+
+  vext.8 rbuf, rbuf, rbuf, #8
+  vext.8 rbuf1, rbuf1, rbuf1, #8
+  vext.8 rbuf2, rbuf2, rbuf2, #8
+  vext.8 rbuf3, rbuf3, rbuf3, #8
+  veor rhash, rhash, rbuf /* in0 ^ hash */
+
+  blo .Lend_4
+
+.Loop_4:
+  /* (in0 ^ hash) * H⁴ => rr2:rr3 */
+  /* (in1) * H³ => rr0:rr1 */
+  PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __)
+
+  vld1.64 {rbuf-rbuf1}, [r2]!
+  sub r3, r3, #4
+  veor rr0, rr0, rr2
+  veor rr1, rr1, rr3
+
+  /* (in2) * H² => rr2:rr3 */
+  /* (in3) * H¹ => rhash:rbuf3 */
+  PMUL_128x128_2(rr2, rr3, rbuf2, rh2, rhash, rbuf3, rbuf3, rh1, rt0, rt1,
+                 _(vrev64.8 rbuf, rbuf))
+
+  vld1.64 {rbuf2}, [r2]!
+
+  vrev64.8 rbuf1, rbuf1
+  veor rr0, rr0, rr2
+  veor rr1, rr1, rr3
+
+  cmp r3, #4
+  vext.8 rbuf, rbuf, rbuf, #8
+  vext.8 rbuf1, rbuf1, rbuf1, #8
+
+  veor rr0, rr0, rhash
+  veor rr1, rr1, rbuf3
+
+  vld1.64 {rbuf3}, [r2]!
+
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt1,
+            _(vrev64.8 rbuf2, rbuf2;
+              vrev64.8 rbuf3, rbuf3))
+
+  vext.8 rbuf2, rbuf2, rbuf2, #8
+  vext.8 rbuf3, rbuf3, rbuf3, #8
+  veor rhash, rhash, rbuf /* in0 ^ hash */
+
+  bhs .Loop_4
+
+.Lend_4:
+  /* (in0 ^ hash) * H⁴ => rr2:rr3 */
+  /* (in1) * H³ => rr0:rr1 */
+  PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __)
+
+  /* (in2) * H² => rhash:rbuf */
+  /* (in3) * H¹ => rbuf1:rbuf2 */
+  PMUL_128x128_2(rhash, rbuf, rbuf2, rh2, rbuf1, rbuf2, rbuf3, rh1, rt0, rt1,
+                 _(veor rr0, rr0, rr2;
+                   veor rr1, rr1, rr3))
+
+  veor rr0, rr0, rhash
+  veor rr1, rr1, rbuf
+
+  veor rr0, rr0, rbuf1
+  veor rr1, rr1, rbuf2
+
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt1,
+            _(CLEAR_REG(rr2);
+              CLEAR_REG(rr3);
+              CLEAR_REG(rbuf1);
+              CLEAR_REG(rbuf2);
+              CLEAR_REG(rbuf3);
+              CLEAR_REG(rh2);
+              CLEAR_REG(rh3);
+              CLEAR_REG(rh4)))
+
+  vpop {q4-q7}
+
+  cmp r3, #0
+  beq .Ldone
+
+.Less_than_4:
+  /* Handle remaining blocks. */
+
+  vld1.64 {rbuf}, [r2]!
+  subs r3, r3, #1
+
+  vrev64.8 rbuf, rbuf /* byte-swap */
+  vext.8 rbuf, rbuf, rbuf, #8
+
+  veor rhash, rhash, rbuf
+
+  beq .Lend
+
+.Loop:
+  vld1.64 {rbuf}, [r2]!
+  subs r3, r3, #1
+  PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(vrev64.8 rbuf, rbuf))
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(vext.8 rbuf, rbuf, rbuf, #8))
+  veor rhash, rhash, rbuf
+
+  bne .Loop
+
+.Lend:
+  PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(CLEAR_REG(rbuf)))
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(CLEAR_REG(rh1)))
+
+.Ldone:
+  CLEAR_REG(rr1)
+  vrev64.8 rhash, rhash /* byte-swap */
+  CLEAR_REG(rt0)
+  CLEAR_REG(rr0)
+  vext.8 rhash, rhash, rhash, #8
+  CLEAR_REG(rt1)
+  vst1.64 {rhash}, [r1]
+  CLEAR_REG(rhash)
+
+.Ldo_nothing:
+  mov r0, #0
+  pop {r4-r6, pc}
+.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;
+
+
+/*
+ * unsigned int _gcry_polyval_armv8_ce_pmull (void *gcm_key, byte *result,
+ *                                            const byte *buf, size_t nblocks,
+ *                                            void *gcm_table);
+ */
+.align 3
+.globl _gcry_polyval_armv8_ce_pmull
+.type  _gcry_polyval_armv8_ce_pmull,%function;
+_gcry_polyval_armv8_ce_pmull:
+  /* input:
+   *    r0: gcm_key
+   *    r1: result/hash
+   *    r2: buf
+   *    r3: nblocks
+   *    %st+0: gcm_table
+   */
+  push {r4-r6, lr}
+
+  cmp r3, #0
+  beq .Lpolyval_do_nothing
+
+  GET_DATA_POINTER(r4, .Lrconst64, lr)
+
+  vld1.64 {rhash}, [r1]
+  vld1.64 {rh1}, [r0]
+
+  vrev64.8 rhash, rhash /* byte-swap */
+  vld1.64 {rrconst_h}, [r4]
+  vext.8 rhash, rhash, rhash, #8
+
+  cmp r3, #4
+  blo .Lpolyval_less_than_4
+
+  /* Bulk processing of 4 blocks per loop iteration. */
+
+  ldr r5, [sp, #(4*4)];
+  add r6, r5, #32
+
+  vpush {q4-q7}
+
+  vld1.64 {rh2-rh3}, [r5]
+  vld1.64 {rh4}, [r6]
+
+  vld1.64 {rbuf-rbuf1}, [r2]!
+  sub r3, r3, #4
+  vld1.64 {rbuf2-rbuf3}, [r2]!
+
+  cmp r3, #4
+  veor rhash, rhash, rbuf /* in0 ^ hash */
+
+  blo .Lpolyval_end_4
+
+.Lpolyval_loop_4:
+  /* (in0 ^ hash) * H⁴ => rr2:rr3 */
+  /* (in1) * H³ => rr0:rr1 */
+  PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __)
+
+  vld1.64 {rbuf-rbuf1}, [r2]!
+  sub r3, r3, #4
+  veor rr0, rr0, rr2
+  veor rr1, rr1, rr3
+
+  /* (in2) * H² => rr2:rr3 */
+  /* (in3) * H¹ => rhash:rbuf3 */
+  PMUL_128x128_2(rr2, rr3, rbuf2, rh2, rhash, rbuf3, rbuf3, rh1, rt0, rt1, __)
+
+  vld1.64 {rbuf2}, [r2]!
+
+  veor rr0, rr0, rr2
+  veor rr1, rr1, rr3
+
+  cmp r3, #4
+
+  veor rr0, rr0, rhash
+  veor rr1, rr1, rbuf3
+
+  vld1.64 {rbuf3}, [r2]!
+
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt1, __)
+
+  veor rhash, rhash, rbuf /* in0 ^ hash */
+
+  bhs .Lpolyval_loop_4
+
+.Lpolyval_end_4:
+  /* (in0 ^ hash) * H⁴ => rr2:rr3 */
+  /* (in1) * H³ => rr0:rr1 */
+  PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __)
+
+  /* (in2) * H² => rhash:rbuf */
+  /* (in3) * H¹ => rbuf1:rbuf2 */
+  PMUL_128x128_2(rhash, rbuf, rbuf2, rh2, rbuf1, rbuf2, rbuf3, rh1, rt0, rt1,
+                 _(veor rr0, rr0, rr2;
+                   veor rr1, rr1, rr3))
+
+  veor rr0, rr0, rhash
+  veor rr1, rr1, rbuf
+
+  veor rr0, rr0, rbuf1
+  veor rr1, rr1, rbuf2
+
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt1,
+            _(CLEAR_REG(rr2);
+              CLEAR_REG(rr3);
+              CLEAR_REG(rbuf1);
+              CLEAR_REG(rbuf2);
+              CLEAR_REG(rbuf3);
+              CLEAR_REG(rh2);
+              CLEAR_REG(rh3);
+              CLEAR_REG(rh4)))
+
+  vpop {q4-q7}
+
+  cmp r3, #0
+  beq .Lpolyval_done
+
+.Lpolyval_less_than_4:
+  /* Handle remaining blocks. */
+
+  vld1.64 {rbuf}, [r2]!
+  subs r3, r3, #1
+
+  veor rhash, rhash, rbuf
+
+  beq .Lpolyval_end
+
+.Lpolyval_loop:
+  vld1.64 {rbuf}, [r2]!
+  subs r3, r3, #1
+  PMUL_128x128(rr0, rr1, rhash, rh1, rt0, __)
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, __)
+  veor rhash, rhash, rbuf
+
+  bne .Lpolyval_loop
+
+.Lpolyval_end:
+  PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(CLEAR_REG(rbuf)))
+  REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(CLEAR_REG(rh1)))
+
+.Lpolyval_done:
+  CLEAR_REG(rr1)
+  vrev64.8 rhash, rhash /* byte-swap */
+  CLEAR_REG(rt0)
+  CLEAR_REG(rr0)
+  vext.8 rhash, rhash, rhash, #8
+  CLEAR_REG(rt1)
+  vst1.64 {rhash}, [r1]
+  CLEAR_REG(rhash)
+
+.Lpolyval_do_nothing:
+  mov r0, #0
+  pop {r4-r6, pc}
+.size _gcry_polyval_armv8_ce_pmull,.-_gcry_polyval_armv8_ce_pmull;
+
+
+/*
+ * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_setup_armv8_ce_pmull
+.type  _gcry_ghash_setup_armv8_ce_pmull,%function;
+_gcry_ghash_setup_armv8_ce_pmull:
+  /* input:
+   *   r0: gcm_key
+   *   r1: gcm_table
+   */
+
+  vpush {q4-q7}
+
+  GET_DATA_POINTER(r2, .Lrconst64, r3)
+
+  vld1.64 {rrconst_h}, [r2]
+
+#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \
+        /* H <<< 1 */ \
+        vshr.s64 ma, ib, #63; \
+        vshr.u64 oa, ib, #63; \
+        vshr.u64 ob, ia, #63; \
+        vand ma, const_d; \
+        vshl.u64 ib, ib, #1; \
+        vshl.u64 ia, ia, #1; \
+        vorr ob, ib; \
+        vorr oa, ia; \
+        veor ob, ma; \
+        vst1.64 {oa, ob}, [r_out]
+
+  vld1.64 {rhash}, [r0]
+  vrev64.8 rhash, rhash /* byte-swap */
+  vext.8 rhash, rhash, rhash, #8
+
+  vmov rbuf1, rhash
+  GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */
+
+  /* H² */
+  PMUL_128x128(rr0, rr1, rbuf1, rh1, rt0, __)
+  REDUCTION(rh2, rr0, rr1, rrconst_h, rt0, __)
+  vmov rhash, rh2
+  GCM_LSH_1(r1, rh2_l, rh2_h, rrconst_h, rbuf1_l, rbuf1_h, rt1_l) /* H²<<<1 */
+  add r1, r1, #16
+
+  /* H³ */
+  PMUL_128x128(rr0, rr1, rhash, rh1, rt1, __)
+  REDUCTION(rh3, rr0, rr1, rrconst_h, rt1, __)
+
+  /* H⁴ */
+  PMUL_128x128(rr0, rr1, rhash, rbuf1, rt0, __)
+  REDUCTION(rh4, rr0, rr1, rrconst_h, rt0, __)
+
+  GCM_LSH_1(r1, rh3_l, rh3_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H³<<<1 */
+  add r1, r1, #16
+  GCM_LSH_1(r1, rh4_l, rh4_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H⁴<<<1 */
+
+  CLEAR_REG(rt0)
+  CLEAR_REG(rt1)
+  CLEAR_REG(rr1)
+  CLEAR_REG(rr0)
+  CLEAR_REG(rh1)
+  CLEAR_REG(rh2)
+  CLEAR_REG(rh3)
+  CLEAR_REG(rh4)
+  CLEAR_REG(rhash)
+  CLEAR_REG(rbuf1)
+  CLEAR_REG(rrconst)
+  vpop {q4-q7}
+  bx lr
+.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S 
b/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S
new file mode 100644
index 000000000..e6714249f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S
@@ -0,0 +1,652 @@
+/* cipher-gcm-armv8-aarch64-ce.S - ARM/CE accelerated GHASH
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Constants */
+
+.align 4
+gcry_gcm_reduction_constant:
+.Lrconst:
+  .quad 0x87
+
+
+/* Register macros */
+
+#define rhash   v0
+#define rr0     v1
+#define rr1     v2
+#define rbuf    v3
+#define rbuf1   v4
+#define rbuf2   v5
+#define rbuf3   v6
+#define rbuf4   v7
+#define rbuf5   v8
+#define rr2     v9
+#define rr3     v10
+#define rr4     v11
+#define rr5     v12
+#define rr6     v13
+#define rr7     v14
+#define rr8     v15
+#define rr9     v16
+
+#define rrconst v18
+#define rh1     v19
+#define rh2     v20
+#define rh3     v21
+#define rh4     v22
+#define rh5     v23
+#define rh6     v24
+#define t0      v25
+#define t1      v26
+#define t2      v27
+#define t3      v28
+#define t4      v29
+#define t5      v30
+#define vZZ     v31
+
+/* GHASH macros */
+
+/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+ * Cryptology — CT-RSA 2015" for details.
+ */
+
+/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) */
+#define PMUL_128x128(r0, r1, a, b, T0, T1, interleave_op) \
+       ext T0.16b, b.16b, b.16b, #8; \
+       pmull r0.1q, a.1d, b.1d; \
+       pmull2 r1.1q, a.2d, b.2d; \
+       pmull T1.1q, a.1d, T0.1d; \
+       pmull2 T0.1q, a.2d, T0.2d; \
+       interleave_op; \
+       eor T0.16b, T0.16b, T1.16b; \
+       ext T1.16b, vZZ.16b, T0.16b, #8; \
+       ext T0.16b, T0.16b, vZZ.16b, #8; \
+       eor r0.16b, r0.16b, T1.16b; \
+       eor r1.16b, r1.16b, T0.16b;
+
+/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A)
+ * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B)
+ * Input: 'aC' and 'bC', Output: 'r0C:r1C' (low 128-bits in r0C, high in r1C)
+ */
+#define PMUL_128x128_3(r0A, r1A, aA, bA, t0A, t1A, \
+                       r0B, r1B, aB, bB, t0B, t1B, \
+                       r0C, r1C, aC, bC, t0C, t1C,  interleave_op) \
+        ext t0A.16b, bA.16b, bA.16b, #8; \
+        pmull r0A.1q, aA.1d, bA.1d; \
+        pmull2 r1A.1q, aA.2d, bA.2d; \
+          ext t0B.16b, bB.16b, bB.16b, #8; \
+          pmull r0B.1q, aB.1d, bB.1d; \
+          pmull2 r1B.1q, aB.2d, bB.2d; \
+            ext t0C.16b, bC.16b, bC.16b, #8; \
+            pmull r0C.1q, aC.1d, bC.1d; \
+            pmull2 r1C.1q, aC.2d, bC.2d; \
+        pmull t1A.1q, aA.1d, t0A.1d; \
+        pmull2 t0A.1q, aA.2d, t0A.2d; \
+          pmull t1B.1q, aB.1d, t0B.1d; \
+          pmull2 t0B.1q, aB.2d, t0B.2d; \
+            pmull t1C.1q, aC.1d, t0C.1d; \
+            pmull2 t0C.1q, aC.2d, t0C.2d; \
+        eor t0A.16b, t0A.16b, t1A.16b; \
+          eor t0B.16b, t0B.16b, t1B.16b; \
+            eor t0C.16b, t0C.16b, t1C.16b; \
+              interleave_op; \
+        ext t1A.16b, vZZ.16b, t0A.16b, #8; \
+        ext t0A.16b, t0A.16b, vZZ.16b, #8; \
+          ext t1B.16b, vZZ.16b, t0B.16b, #8; \
+          ext t0B.16b, t0B.16b, vZZ.16b, #8; \
+            ext t1C.16b, vZZ.16b, t0C.16b, #8; \
+            ext t0C.16b, t0C.16b, vZZ.16b, #8; \
+        eor r0A.16b, r0A.16b, t1A.16b; \
+        eor r1A.16b, r1A.16b, t0A.16b; \
+          eor r0B.16b, r0B.16b, t1B.16b; \
+          eor r1B.16b, r1B.16b, t0B.16b; \
+            eor r0C.16b, r0C.16b, t1C.16b; \
+            eor r1C.16b, r1C.16b, t0C.16b; \
+
+/* Input: 'r0:r1', Output: 'a' */
+#define REDUCTION(a, r0, r1, rconst, T0, T1, interleave_op1, interleave_op2, \
+                  interleave_op3) \
+        pmull2 T0.1q, r1.2d, rconst.2d; \
+        interleave_op1; \
+        ext T1.16b, T0.16b, vZZ.16b, #8; \
+        ext T0.16b, vZZ.16b, T0.16b, #8; \
+        interleave_op2; \
+        eor r1.16b, r1.16b, T1.16b; \
+        eor r0.16b, r0.16b, T0.16b; \
+        pmull T0.1q, r1.1d, rconst.1d; \
+        interleave_op3; \
+        eor a.16b, r0.16b, T0.16b;
+
+/* Other functional macros */
+
+#define _(...) __VA_ARGS__
+#define __ _()
+
+#define CLEAR_REG(reg) movi reg.16b, #0;
+
+#define VPUSH_ABI \
+        stp d8, d9, [sp, #-16]!; \
+        CFI_ADJUST_CFA_OFFSET(16); \
+        stp d10, d11, [sp, #-16]!; \
+        CFI_ADJUST_CFA_OFFSET(16); \
+        stp d12, d13, [sp, #-16]!; \
+        CFI_ADJUST_CFA_OFFSET(16); \
+        stp d14, d15, [sp, #-16]!; \
+        CFI_ADJUST_CFA_OFFSET(16);
+
+#define VPOP_ABI \
+        ldp d14, d15, [sp], #16; \
+        CFI_ADJUST_CFA_OFFSET(-16); \
+        ldp d12, d13, [sp], #16; \
+        CFI_ADJUST_CFA_OFFSET(-16); \
+        ldp d10, d11, [sp], #16; \
+        CFI_ADJUST_CFA_OFFSET(-16); \
+        ldp d8, d9, [sp], #16; \
+        CFI_ADJUST_CFA_OFFSET(-16);
+
+/*
+ * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result,
+ *                                          const byte *buf, size_t nblocks,
+ *                                          void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_armv8_ce_pmull
+ELF(.type  _gcry_ghash_armv8_ce_pmull,%function;)
+_gcry_ghash_armv8_ce_pmull:
+  /* input:
+   *    x0: gcm_key
+   *    x1: result/hash
+   *    x2: buf
+   *    x3: nblocks
+   *    x4: gcm_table
+   */
+  CFI_STARTPROC();
+
+  cbz x3, .Ldo_nothing;
+
+  GET_DATA_POINTER(x5, .Lrconst)
+
+  eor vZZ.16b, vZZ.16b, vZZ.16b
+  ld1 {rhash.16b}, [x1]
+  ld1 {rh1.16b}, [x0]
+
+  rbit rhash.16b, rhash.16b /* bit-swap */
+  ld1r {rrconst.2d}, [x5]
+
+  cmp x3, #6
+  b.lo .Less_than_6
+
+  add x6, x4, #64
+  VPUSH_ABI
+
+  ld1 {rh2.16b-rh5.16b}, [x4]
+  ld1 {rh6.16b}, [x6]
+
+  sub x3, x3, #6
+
+  ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16)
+  ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16)
+
+  rbit rbuf.16b, rbuf.16b /* bit-swap */
+  rbit rbuf1.16b, rbuf1.16b /* bit-swap */
+  rbit rbuf2.16b, rbuf2.16b /* bit-swap */
+  rbit rbuf3.16b, rbuf3.16b /* bit-swap */
+  rbit rbuf4.16b, rbuf4.16b /* bit-swap */
+  rbit rbuf5.16b, rbuf5.16b /* bit-swap */
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  cmp x3, #6
+  b.lo .Lend_6
+
+.Loop_6:
+
+  /* (in1) * H⁵ => rr0:rr1 */
+  /* (in2) * H⁴ => rr2:rr3 */
+  /* (in0 ^ hash) * H⁶ => rr4:rr5 */
+  PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1,
+                 rr2, rr3, rbuf2, rh4, t2, t3,
+                 rr4, rr5, rhash, rh6, t4, t5,
+                 _(sub x3, x3, #6))
+
+  ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16)
+  cmp x3, #6
+
+  eor rr0.16b, rr0.16b, rr2.16b
+  eor rr1.16b, rr1.16b, rr3.16b
+
+  /* (in3) * H³ => rr2:rr3 */
+  /* (in4) * H² => rr6:rr7 */
+  /* (in5) * H¹ => rr8:rr9 */
+  PMUL_128x128_3(rr2, rr3, rbuf3, rh3, t0, t1,
+                 rr6, rr7, rbuf4, rh2, t2, t3,
+                 rr8, rr9, rbuf5, rh1, t4, t5,
+                 _(eor rr0.16b, rr0.16b, rr4.16b;
+                   eor rr1.16b, rr1.16b, rr5.16b))
+
+  eor rr0.16b, rr0.16b, rr2.16b
+  eor rr1.16b, rr1.16b, rr3.16b
+  rbit rbuf.16b, rbuf.16b
+  eor rr0.16b, rr0.16b, rr6.16b
+  eor rr1.16b, rr1.16b, rr7.16b
+  rbit rbuf1.16b, rbuf1.16b
+  eor rr0.16b, rr0.16b, rr8.16b
+  eor rr1.16b, rr1.16b, rr9.16b
+  ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16)
+
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+            _(rbit rbuf2.16b, rbuf2.16b),
+            _(rbit rbuf3.16b, rbuf3.16b),
+            _(rbit rbuf4.16b, rbuf4.16b))
+
+  rbit rbuf5.16b, rbuf5.16b
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  b.hs .Loop_6
+
+.Lend_6:
+
+  /* (in1) * H⁵ => rr0:rr1 */
+  /* (in0 ^ hash) * H⁶ => rr2:rr3 */
+  /* (in2) * H⁴ => rr4:rr5 */
+  PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1,
+                 rr2, rr3, rhash, rh6, t2, t3,
+                 rr4, rr5, rbuf2, rh4, t4, t5,
+                 __)
+  eor rr0.16b, rr0.16b, rr2.16b
+  eor rr1.16b, rr1.16b, rr3.16b
+  eor rr0.16b, rr0.16b, rr4.16b
+  eor rr1.16b, rr1.16b, rr5.16b
+
+  /* (in3) * H³ => rhash:rbuf */
+  /* (in4) * H² => rr6:rr7 */
+  /* (in5) * H¹ => rr8:rr9 */
+  PMUL_128x128_3(rhash, rbuf, rbuf3, rh3, t0, t1,
+                 rr6, rr7, rbuf4, rh2, t2, t3,
+                 rr8, rr9, rbuf5, rh1, t4, t5,
+                 _(CLEAR_REG(rh4);
+                   CLEAR_REG(rh5);
+                   CLEAR_REG(rh6)))
+  eor rr0.16b, rr0.16b, rhash.16b
+  eor rr1.16b, rr1.16b, rbuf.16b
+  eor rr0.16b, rr0.16b, rr6.16b
+  eor rr1.16b, rr1.16b, rr7.16b
+  eor rr0.16b, rr0.16b, rr8.16b
+  eor rr1.16b, rr1.16b, rr9.16b
+
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+            _(CLEAR_REG(rh2);
+              CLEAR_REG(rh3);
+              CLEAR_REG(rr2);
+              CLEAR_REG(rbuf2);
+              CLEAR_REG(rbuf3)),
+            _(CLEAR_REG(rr3);
+              CLEAR_REG(rr4);
+              CLEAR_REG(rr5);
+              CLEAR_REG(rr6);
+              CLEAR_REG(rr7)),
+            _(CLEAR_REG(rr8);
+              CLEAR_REG(rr9);
+              CLEAR_REG(rbuf1);
+              CLEAR_REG(rbuf2)))
+
+  CLEAR_REG(rbuf4)
+  CLEAR_REG(rbuf5)
+  CLEAR_REG(t2)
+  CLEAR_REG(t3)
+  CLEAR_REG(t4)
+  CLEAR_REG(t5)
+
+  VPOP_ABI
+
+  cbz x3, .Ldone
+
+.Less_than_6:
+  /* Handle remaining blocks. */
+
+  ld1 {rbuf.16b}, [x2], #16
+  sub x3, x3, #1
+
+  rbit rbuf.16b, rbuf.16b /* bit-swap */
+
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  cbz x3, .Lend
+
+.Loop:
+  PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(ld1 {rbuf.16b}, [x2], #16))
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+            _(sub x3, x3, #1),
+            _(rbit rbuf.16b, rbuf.16b),
+            __)
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  cbnz x3, .Loop
+
+.Lend:
+  PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(CLEAR_REG(rbuf)))
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, __, _(CLEAR_REG(rh1)), __)
+
+.Ldone:
+  CLEAR_REG(rr1)
+  CLEAR_REG(rr0)
+  rbit rhash.16b, rhash.16b /* bit-swap */
+  CLEAR_REG(t0)
+  CLEAR_REG(t1)
+
+  st1 {rhash.2d}, [x1]
+  CLEAR_REG(rhash)
+
+.Ldo_nothing:
+  mov x0, #0
+  ret_spec_stop
+  CFI_ENDPROC()
+ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;)
+
+
+/*
+ * unsigned int _gcry_polyval_armv8_ce_pmull (void *gcm_key, byte *result,
+ *                                            const byte *buf, size_t nblocks,
+ *                                            void *gcm_table);
+ */
+.align 3
+.globl _gcry_polyval_armv8_ce_pmull
+ELF(.type  _gcry_polyval_armv8_ce_pmull,%function;)
+_gcry_polyval_armv8_ce_pmull:
+  /* input:
+   *    x0: gcm_key
+   *    x1: result/hash
+   *    x2: buf
+   *    x3: nblocks
+   *    x4: gcm_table
+   */
+  CFI_STARTPROC();
+
+  cbz x3, .Lpolyval_do_nothing;
+
+  GET_DATA_POINTER(x5, .Lrconst)
+
+  eor vZZ.16b, vZZ.16b, vZZ.16b
+  ld1 {rhash.16b}, [x1]
+  ld1 {rh1.16b}, [x0]
+
+  rbit rhash.16b, rhash.16b /* bit-swap */
+  ld1r {rrconst.2d}, [x5]
+
+  cmp x3, #6
+  b.lo .Lpolyval_less_than_6
+
+  add x6, x4, #64
+  VPUSH_ABI
+
+  ld1 {rh2.16b-rh5.16b}, [x4]
+  ld1 {rh6.16b}, [x6]
+
+  sub x3, x3, #6
+
+  ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16)
+  ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16)
+  rev64 rbuf.16b, rbuf.16b /* byte-swap */
+  rev64 rbuf1.16b, rbuf1.16b /* byte-swap */
+  rev64 rbuf2.16b, rbuf2.16b /* byte-swap */
+  rev64 rbuf3.16b, rbuf3.16b /* byte-swap */
+  rev64 rbuf4.16b, rbuf4.16b /* byte-swap */
+  rev64 rbuf5.16b, rbuf5.16b /* byte-swap */
+  ext rbuf.16b, rbuf.16b, rbuf.16b, #8 /* byte-swap */
+  ext rbuf1.16b, rbuf1.16b, rbuf1.16b, #8 /* byte-swap */
+  ext rbuf2.16b, rbuf2.16b, rbuf2.16b, #8 /* byte-swap */
+  ext rbuf3.16b, rbuf3.16b, rbuf3.16b, #8 /* byte-swap */
+  ext rbuf4.16b, rbuf4.16b, rbuf4.16b, #8 /* byte-swap */
+  ext rbuf5.16b, rbuf5.16b, rbuf5.16b, #8 /* byte-swap */
+  rbit rbuf.16b, rbuf.16b /* bit-swap */
+  rbit rbuf1.16b, rbuf1.16b /* bit-swap */
+  rbit rbuf2.16b, rbuf2.16b /* bit-swap */
+  rbit rbuf3.16b, rbuf3.16b /* bit-swap */
+  rbit rbuf4.16b, rbuf4.16b /* bit-swap */
+  rbit rbuf5.16b, rbuf5.16b /* bit-swap */
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  cmp x3, #6
+  b.lo .Lpolyval_end_6
+
+.Lpolyval_loop_6:
+
+  /* (in1) * H⁵ => rr0:rr1 */
+  /* (in2) * H⁴ => rr2:rr3 */
+  /* (in0 ^ hash) * H⁶ => rr4:rr5 */
+  PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1,
+                 rr2, rr3, rbuf2, rh4, t2, t3,
+                 rr4, rr5, rhash, rh6, t4, t5,
+                 _(sub x3, x3, #6))
+
+  ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16)
+  cmp x3, #6
+
+  eor rr0.16b, rr0.16b, rr2.16b
+  eor rr1.16b, rr1.16b, rr3.16b
+
+  /* (in3) * H³ => rr2:rr3 */
+  /* (in4) * H² => rr6:rr7 */
+  /* (in5) * H¹ => rr8:rr9 */
+  PMUL_128x128_3(rr2, rr3, rbuf3, rh3, t0, t1,
+                 rr6, rr7, rbuf4, rh2, t2, t3,
+                 rr8, rr9, rbuf5, rh1, t4, t5,
+                 _(eor rr0.16b, rr0.16b, rr4.16b;
+                   eor rr1.16b, rr1.16b, rr5.16b))
+
+  rev64 rbuf.16b, rbuf.16b /* byte-swap */
+  rev64 rbuf1.16b, rbuf1.16b /* byte-swap */
+  rev64 rbuf2.16b, rbuf2.16b /* byte-swap */
+  ext rbuf.16b, rbuf.16b, rbuf.16b, #8 /* byte-swap */
+  ext rbuf1.16b, rbuf1.16b, rbuf1.16b, #8 /* byte-swap */
+  ext rbuf2.16b, rbuf2.16b, rbuf2.16b, #8 /* byte-swap */
+
+  eor rr0.16b, rr0.16b, rr2.16b
+  eor rr1.16b, rr1.16b, rr3.16b
+  rbit rbuf.16b, rbuf.16b /* bit-swap */
+  eor rr0.16b, rr0.16b, rr6.16b
+  eor rr1.16b, rr1.16b, rr7.16b
+  rbit rbuf1.16b, rbuf1.16b /* bit-swap */
+  eor rr0.16b, rr0.16b, rr8.16b
+  eor rr1.16b, rr1.16b, rr9.16b
+  ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16)
+
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+            _(rbit rbuf2.16b, rbuf2.16b), /* bit-swap */
+            _(rev64 rbuf3.16b, rbuf3.16b), /* byte-swap */
+            _(rev64 rbuf4.16b, rbuf4.16b)) /* byte-swap */
+
+  rev64 rbuf5.16b, rbuf5.16b /* byte-swap */
+  ext rbuf3.16b, rbuf3.16b, rbuf3.16b, #8 /* byte-swap */
+
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  ext rbuf4.16b, rbuf4.16b, rbuf4.16b, #8 /* byte-swap */
+  ext rbuf5.16b, rbuf5.16b, rbuf5.16b, #8 /* byte-swap */
+  rbit rbuf3.16b, rbuf3.16b /* bit-swap */
+  rbit rbuf4.16b, rbuf4.16b /* bit-swap */
+  rbit rbuf5.16b, rbuf5.16b /* bit-swap */
+
+  b.hs .Lpolyval_loop_6
+
+.Lpolyval_end_6:
+
+  /* (in1) * H⁵ => rr0:rr1 */
+  /* (in0 ^ hash) * H⁶ => rr2:rr3 */
+  /* (in2) * H⁴ => rr4:rr5 */
+  PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1,
+                 rr2, rr3, rhash, rh6, t2, t3,
+                 rr4, rr5, rbuf2, rh4, t4, t5,
+                 __)
+  eor rr0.16b, rr0.16b, rr2.16b
+  eor rr1.16b, rr1.16b, rr3.16b
+  eor rr0.16b, rr0.16b, rr4.16b
+  eor rr1.16b, rr1.16b, rr5.16b
+
+  /* (in3) * H³ => rhash:rbuf */
+  /* (in4) * H² => rr6:rr7 */
+  /* (in5) * H¹ => rr8:rr9 */
+  PMUL_128x128_3(rhash, rbuf, rbuf3, rh3, t0, t1,
+                 rr6, rr7, rbuf4, rh2, t2, t3,
+                 rr8, rr9, rbuf5, rh1, t4, t5,
+                 _(CLEAR_REG(rh4);
+                   CLEAR_REG(rh5);
+                   CLEAR_REG(rh6)))
+  eor rr0.16b, rr0.16b, rhash.16b
+  eor rr1.16b, rr1.16b, rbuf.16b
+  eor rr0.16b, rr0.16b, rr6.16b
+  eor rr1.16b, rr1.16b, rr7.16b
+  eor rr0.16b, rr0.16b, rr8.16b
+  eor rr1.16b, rr1.16b, rr9.16b
+
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+            _(CLEAR_REG(rh2);
+              CLEAR_REG(rh3);
+              CLEAR_REG(rr2);
+              CLEAR_REG(rbuf2);
+              CLEAR_REG(rbuf3)),
+            _(CLEAR_REG(rr3);
+              CLEAR_REG(rr4);
+              CLEAR_REG(rr5);
+              CLEAR_REG(rr6);
+              CLEAR_REG(rr7)),
+            _(CLEAR_REG(rr8);
+              CLEAR_REG(rr9);
+              CLEAR_REG(rbuf1);
+              CLEAR_REG(rbuf2)))
+
+  CLEAR_REG(rbuf4)
+  CLEAR_REG(rbuf5)
+  CLEAR_REG(t2)
+  CLEAR_REG(t3)
+  CLEAR_REG(t4)
+  CLEAR_REG(t5)
+
+  VPOP_ABI
+
+  cbz x3, .Lpolyval_done
+
+.Lpolyval_less_than_6:
+  /* Handle remaining blocks. */
+
+  ld1 {rbuf.16b}, [x2], #16
+  sub x3, x3, #1
+
+  rev64 rbuf.16b, rbuf.16b /* byte-swap */
+  ext rbuf.16b, rbuf.16b, rbuf.16b, #8 /* byte-swap */
+  rbit rbuf.16b, rbuf.16b /* bit-swap */
+
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  cbz x3, .Lpolyval_end
+
+.Lpolyval_loop:
+  PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(ld1 {rbuf.16b}, [x2], #16))
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1,
+            _(sub x3, x3, #1;
+              rev64 rbuf.16b, rbuf.16b), /* byte-swap */
+            _(ext rbuf.16b, rbuf.16b, rbuf.16b, #8), /* byte-swap */
+            _(rbit rbuf.16b, rbuf.16b)) /* bit-swap */
+  eor rhash.16b, rhash.16b, rbuf.16b
+
+  cbnz x3, .Lpolyval_loop
+
+.Lpolyval_end:
+  PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(CLEAR_REG(rbuf)))
+  REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, __, _(CLEAR_REG(rh1)), __)
+
+.Lpolyval_done:
+  CLEAR_REG(rr1)
+  CLEAR_REG(rr0)
+  rbit rhash.16b, rhash.16b /* bit-swap */
+  CLEAR_REG(t0)
+  CLEAR_REG(t1)
+
+  st1 {rhash.2d}, [x1]
+  CLEAR_REG(rhash)
+
+.Lpolyval_do_nothing:
+  mov x0, #0
+  ret_spec_stop
+  CFI_ENDPROC()
+ELF(.size _gcry_polyval_armv8_ce_pmull,.-_gcry_polyval_armv8_ce_pmull;)
+
+
+/*
+ * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table);
+ */
+.align 3
+.globl _gcry_ghash_setup_armv8_ce_pmull
+ELF(.type  _gcry_ghash_setup_armv8_ce_pmull,%function;)
+_gcry_ghash_setup_armv8_ce_pmull:
+  /* input:
+   *   x0: gcm_key
+   *   x1: gcm_table
+   */
+  CFI_STARTPROC()
+
+  GET_DATA_POINTER(x2, .Lrconst)
+
+  eor vZZ.16b, vZZ.16b, vZZ.16b
+
+  /* H¹ */
+  ld1 {rh1.16b}, [x0]
+  rbit rh1.16b, rh1.16b
+  st1 {rh1.16b}, [x0]
+
+  ld1r {rrconst.2d}, [x2]
+
+  /* H² */
+  PMUL_128x128(rr0, rr1, rh1, rh1, t0, t1, __)
+  REDUCTION(rh2, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+  /* H³ */
+  PMUL_128x128(rr0, rr1, rh2, rh1, t0, t1, __)
+  REDUCTION(rh3, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+  /* H⁴ */
+  PMUL_128x128(rr0, rr1, rh2, rh2, t0, t1, __)
+  REDUCTION(rh4, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+  /* H⁵ */
+  PMUL_128x128(rr0, rr1, rh2, rh3, t0, t1, __)
+  REDUCTION(rh5, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+  /* H⁶ */
+  PMUL_128x128(rr0, rr1, rh3, rh3, t0, t1, __)
+  REDUCTION(rh6, rr0, rr1, rrconst, t0, t1, __, __, __)
+
+  st1 {rh2.16b-rh4.16b}, [x1], #(3*16)
+  st1 {rh5.16b-rh6.16b}, [x1]
+
+  ret_spec_stop
+  CFI_ENDPROC()
+ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;)
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-gcm-intel-pclmul.c 
b/grub-core/lib/libgcrypt/cipher/cipher-gcm-intel-pclmul.c
new file mode 100644
index 000000000..daf807d0a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-gcm-intel-pclmul.c
@@ -0,0 +1,914 @@
+/* cipher-gcm-intel-pclmul.c  -  Intel PCLMUL accelerated Galois Counter Mode
+ *                               implementation
+ * Copyright (C) 2013-2014,2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+#ifdef GCM_USE_INTEL_PCLMUL
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+#  pragma GCC target("no-sse")
+#endif
+#if __clang__
+#  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = 
function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR        NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+
+
+/*
+ Intel PCLMUL ghash based on white paper:
+  "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the
+   GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis.
+ */
+static ASM_FUNC_ATTR_INLINE void reduction(void)
+{
+  /* input: <xmm1:xmm3> */
+
+  asm volatile (/* first phase of the reduction */
+                "movdqa %%xmm3, %%xmm6\n\t"
+                "movdqa %%xmm3, %%xmm5\n\t"
+                "psllq $1, %%xmm6\n\t"  /* packed right shifting << 63 */
+                "pxor %%xmm3, %%xmm6\n\t"
+                "psllq $57, %%xmm5\n\t"  /* packed right shifting << 57 */
+                "psllq $62, %%xmm6\n\t"  /* packed right shifting << 62 */
+                "pxor %%xmm5, %%xmm6\n\t" /* xor the shifted versions */
+                "pshufd $0x6a, %%xmm6, %%xmm5\n\t"
+                "pshufd $0xae, %%xmm6, %%xmm6\n\t"
+                "pxor %%xmm5, %%xmm3\n\t" /* first phase of the reduction
+                                             complete */
+
+                /* second phase of the reduction */
+                "pxor %%xmm3, %%xmm1\n\t" /* xor the shifted versions */
+                "psrlq $1, %%xmm3\n\t"    /* packed left shifting >> 1 */
+                "pxor %%xmm3, %%xmm6\n\t"
+                "psrlq $1, %%xmm3\n\t"    /* packed left shifting >> 2 */
+                "pxor %%xmm3, %%xmm1\n\t"
+                "psrlq $5, %%xmm3\n\t"    /* packed left shifting >> 7 */
+                "pxor %%xmm3, %%xmm6\n\t"
+                "pxor %%xmm6, %%xmm1\n\t" /* the result is in xmm1 */
+                ::: "memory" );
+}
+
+static ASM_FUNC_ATTR_INLINE void gfmul_pclmul(void)
+{
+  /* Input: XMM0 and XMM1, Output: XMM1. Input XMM0 stays unmodified.
+     Input must be converted to little-endian.
+   */
+  asm volatile (/* gfmul, xmm0 has operator a and xmm1 has operator b. */
+                "pshufd $78, %%xmm0, %%xmm2\n\t"
+                "pshufd $78, %%xmm1, %%xmm4\n\t"
+                "pxor %%xmm0, %%xmm2\n\t" /* xmm2 holds a0+a1 */
+                "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds b0+b1 */
+
+                "movdqa %%xmm0, %%xmm3\n\t"
+                "pclmulqdq $0, %%xmm1, %%xmm3\n\t"  /* xmm3 holds a0*b0 */
+                "pclmulqdq $17, %%xmm0, %%xmm1\n\t" /* xmm6 holds a1*b1 */
+                "movdqa %%xmm3, %%xmm5\n\t"
+                "pclmulqdq $0, %%xmm2, %%xmm4\n\t"  /* xmm4 holds 
(a0+a1)*(b0+b1) */
+
+                "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */
+                "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds 
a0*b0+a1*b1+(a0+a1)*(b0+b1) */
+                "movdqa %%xmm4, %%xmm5\n\t"
+                "psrldq $8, %%xmm4\n\t"
+                "pslldq $8, %%xmm5\n\t"
+                "pxor %%xmm5, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm1\n\t" /* <xmm1:xmm3> holds the result of 
the
+                                             carry-less multiplication of xmm0
+                                             by xmm1 */
+                ::: "memory" );
+
+  reduction();
+}
+
+#define GFMUL_AGGR4_ASM_1(be_to_le)                                            
\
+    /* perform clmul and merge results... */                                   
\
+    "movdqu 2*16(%[h_table]), %%xmm2\n\t" /* Load H4 */                        
\
+    "movdqu 0*16(%[buf]), %%xmm5\n\t"                                          
\
+    be_to_le("pshufb %[be_mask], %%xmm5\n\t") /* be => le */                   
\
+    "pxor %%xmm5, %%xmm1\n\t"                                                  
\
+                                                                               
\
+    "pshufd $78, %%xmm2, %%xmm5\n\t"                                           
\
+    "pshufd $78, %%xmm1, %%xmm4\n\t"                                           
\
+    "pxor %%xmm2, %%xmm5\n\t" /* xmm5 holds 4:a0+a1 */                         
\
+    "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds 4:b0+b1 */                         
\
+    "movdqa %%xmm2, %%xmm3\n\t"                                                
\
+    "pclmulqdq $0, %%xmm1, %%xmm3\n\t"   /* xmm3 holds 4:a0*b0 */              
\
+    "pclmulqdq $17, %%xmm2, %%xmm1\n\t"  /* xmm1 holds 4:a1*b1 */              
\
+    "pclmulqdq $0, %%xmm5, %%xmm4\n\t"   /* xmm4 holds 4:(a0+a1)*(b0+b1) */    
\
+                                                                               
\
+    "movdqu 1*16(%[h_table]), %%xmm5\n\t" /* Load H3 */                        
\
+    "movdqu 1*16(%[buf]), %%xmm2\n\t"                                          
\
+    be_to_le("pshufb %[be_mask], %%xmm2\n\t") /* be => le */                   
\
+                                                                               
\
+    "pshufd $78, %%xmm5, %%xmm0\n\t"                                           
\
+    "pshufd $78, %%xmm2, %%xmm7\n\t"                                           
\
+    "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 3:a0+a1 */                         
\
+    "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */                         
\
+    "movdqa %%xmm5, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm2, %%xmm6\n\t"  /* xmm6 holds 3:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */      
\
+                                                                               
\
+    "movdqu 2*16(%[buf]), %%xmm5\n\t"                                          
\
+    be_to_le("pshufb %[be_mask], %%xmm5\n\t") /* be => le */                   
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4:a0*b0 */                       
\
+    "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4:a1*b1 */                       
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4:(a0+a1)*(b0+b1) */             
\
+                                                                               
\
+    "movdqu 0*16(%[h_table]), %%xmm2\n\t" /* Load H2 */                        
\
+                                                                               
\
+    "pshufd $78, %%xmm2, %%xmm0\n\t"                                           
\
+    "pshufd $78, %%xmm5, %%xmm7\n\t"                                           
\
+    "pxor %%xmm2, %%xmm0\n\t" /* xmm0 holds 2:a0+a1 */                         
\
+    "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 2:b0+b1 */                         
\
+    "movdqa %%xmm2, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm5, %%xmm6\n\t"  /* xmm6 holds 2:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm2, %%xmm5\n\t" /* xmm5 holds 2:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 2:(a0+a1)*(b0+b1) */      
\
+                                                                               
\
+    "movdqu 3*16(%[buf]), %%xmm2\n\t"                                          
\
+    be_to_le("pshufb %[be_mask], %%xmm2\n\t") /* be => le */                   
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4:a0*b0 */                     
\
+    "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4:a1*b1 */                     
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4:(a0+a1)*(b0+b1) */
+
+#define GFMUL_AGGR4_ASM_2()                                                    
\
+    "movdqu %[h_1], %%xmm5\n\t" /* Load H1 */                                  
\
+                                                                               
\
+    "pshufd $78, %%xmm5, %%xmm0\n\t"                                           
\
+    "pshufd $78, %%xmm2, %%xmm7\n\t"                                           
\
+    "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 1:a0+a1 */                         
\
+    "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 1:b0+b1 */                         
\
+    "movdqa %%xmm5, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm2, %%xmm6\n\t"  /* xmm6 holds 1:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 1:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 1:(a0+a1)*(b0+b1) */      
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+4:a0*b0 */                   
\
+    "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+4:a1*b1 */                   
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 1+2+3+4:(a0+a1)*(b0+b1) */         
\
+                                                                               
\
+    /* aggregated reduction... */                                              
\
+    "movdqa %%xmm3, %%xmm5\n\t"                                                
\
+    "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */                     
\
+    "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */     
\
+    "movdqa %%xmm4, %%xmm5\n\t"                                                
\
+    "psrldq $8, %%xmm4\n\t"                                                    
\
+    "pslldq $8, %%xmm5\n\t"                                                    
\
+    "pxor %%xmm5, %%xmm3\n\t"                                                  
\
+    "pxor %%xmm4, %%xmm1\n\t" /* <xmm1:xmm3> holds the result of the           
\
+                                  carry-less multiplication of xmm0            
\
+                                  by xmm1 */
+
+#define be_to_le(...) __VA_ARGS__
+#define le_to_le(...) /*_*/
+
+static ASM_FUNC_ATTR_INLINE void
+gfmul_pclmul_aggr4(const void *buf, const void *h_1, const void *h_table,
+                  const unsigned char *be_mask)
+{
+  /* Input:
+      Hash: XMM1
+     Output:
+      Hash: XMM1
+   */
+  asm volatile (GFMUL_AGGR4_ASM_1(be_to_le)
+                :
+                : [buf] "r" (buf),
+                  [h_table] "r" (h_table),
+                  [be_mask] "m" (*be_mask)
+                : "memory" );
+
+  asm volatile (GFMUL_AGGR4_ASM_2()
+                :
+                : [h_1] "m" (*(const unsigned char *)h_1)
+                : "memory" );
+
+  reduction();
+}
+
+static ASM_FUNC_ATTR_INLINE void
+gfmul_pclmul_aggr4_le(const void *buf, const void *h_1, const void *h_table)
+{
+  /* Input:
+      Hash: XMM1
+     Output:
+      Hash: XMM1
+   */
+  asm volatile (GFMUL_AGGR4_ASM_1(le_to_le)
+                :
+                : [buf] "r" (buf),
+                  [h_table] "r" (h_table)
+                : "memory" );
+
+  asm volatile (GFMUL_AGGR4_ASM_2()
+                :
+                : [h_1] "m" (*(const unsigned char *)h_1)
+                : "memory" );
+
+  reduction();
+}
+
+#ifdef __x86_64__
+
+#define GFMUL_AGGR8_ASM(be_to_le)                                              
\
+    /* Load H6, H7, H8. */                                                     
\
+    "movdqu 6*16(%[h_table]), %%xmm10\n\t"                                     
\
+    "movdqu 5*16(%[h_table]), %%xmm9\n\t"                                      
\
+    "movdqu 4*16(%[h_table]), %%xmm8\n\t"                                      
\
+                                                                               
\
+    /* perform clmul and merge results... */                                   
\
+    "movdqu 0*16(%[buf]), %%xmm5\n\t"                                          
\
+    "movdqu 1*16(%[buf]), %%xmm2\n\t"                                          
\
+    be_to_le("pshufb %%xmm15, %%xmm5\n\t") /* be => le */                      
\
+    be_to_le("pshufb %%xmm15, %%xmm2\n\t") /* be => le */                      
\
+    "pxor %%xmm5, %%xmm1\n\t"                                                  
\
+                                                                               
\
+    "pshufd $78, %%xmm10, %%xmm5\n\t"                                          
\
+    "pshufd $78, %%xmm1, %%xmm4\n\t"                                           
\
+    "pxor %%xmm10, %%xmm5\n\t" /* xmm5 holds 8:a0+a1 */                        
\
+    "pxor %%xmm1, %%xmm4\n\t"  /* xmm4 holds 8:b0+b1 */                        
\
+    "movdqa %%xmm10, %%xmm3\n\t"                                               
\
+    "pclmulqdq $0, %%xmm1, %%xmm3\n\t"   /* xmm3 holds 8:a0*b0 */              
\
+    "pclmulqdq $17, %%xmm10, %%xmm1\n\t" /* xmm1 holds 8:a1*b1 */              
\
+    "pclmulqdq $0, %%xmm5, %%xmm4\n\t"   /* xmm4 holds 8:(a0+a1)*(b0+b1) */    
\
+                                                                               
\
+    "pshufd $78, %%xmm9, %%xmm11\n\t"                                          
\
+    "pshufd $78, %%xmm2, %%xmm7\n\t"                                           
\
+    "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 7:a0+a1 */                       
\
+    "pxor %%xmm2, %%xmm7\n\t"  /* xmm7 holds 7:b0+b1 */                        
\
+    "movdqa %%xmm9, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm2, %%xmm6\n\t"  /* xmm6 holds 7:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm9, %%xmm2\n\t" /* xmm2 holds 7:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 7:(a0+a1)*(b0+b1) */     
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 7+8:a0*b0 */                       
\
+    "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 7+8:a1*b1 */                       
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 7+8:(a0+a1)*(b0+b1) */             
\
+                                                                               
\
+    "movdqu 2*16(%[buf]), %%xmm5\n\t"                                          
\
+    "movdqu 3*16(%[buf]), %%xmm2\n\t"                                          
\
+    be_to_le("pshufb %%xmm15, %%xmm5\n\t") /* be => le */                      
\
+    be_to_le("pshufb %%xmm15, %%xmm2\n\t") /* be => le */                      
\
+                                                                               
\
+    "pshufd $78, %%xmm8, %%xmm11\n\t"                                          
\
+    "pshufd $78, %%xmm5, %%xmm7\n\t"                                           
\
+    "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 6:a0+a1 */                       
\
+    "pxor %%xmm5, %%xmm7\n\t"  /* xmm7 holds 6:b0+b1 */                        
\
+    "movdqa %%xmm8, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm5, %%xmm6\n\t"  /* xmm6 holds 6:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm8, %%xmm5\n\t" /* xmm5 holds 6:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 6:(a0+a1)*(b0+b1) */     
\
+                                                                               
\
+    /* Load H3, H4, H5. */                                                     
\
+    "movdqu 3*16(%[h_table]), %%xmm10\n\t"                                     
\
+    "movdqu 2*16(%[h_table]), %%xmm9\n\t"                                      
\
+    "movdqu 1*16(%[h_table]), %%xmm8\n\t"                                      
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 6+7+8:a0*b0 */                     
\
+    "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 6+7+8:a1*b1 */                     
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 6+7+8:(a0+a1)*(b0+b1) */           
\
+                                                                               
\
+    "pshufd $78, %%xmm10, %%xmm11\n\t"                                         
\
+    "pshufd $78, %%xmm2, %%xmm7\n\t"                                           
\
+    "pxor %%xmm10, %%xmm11\n\t" /* xmm11 holds 5:a0+a1 */                      
\
+    "pxor %%xmm2, %%xmm7\n\t"   /* xmm7 holds 5:b0+b1 */                       
\
+    "movdqa %%xmm10, %%xmm6\n\t"                                               
\
+    "pclmulqdq $0, %%xmm2, %%xmm6\n\t"   /* xmm6 holds 5:a0*b0 */              
\
+    "pclmulqdq $17, %%xmm10, %%xmm2\n\t" /* xmm2 holds 5:a1*b1 */              
\
+    "pclmulqdq $0, %%xmm11, %%xmm7\n\t"  /* xmm7 holds 5:(a0+a1)*(b0+b1) */    
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 5+6+7+8:a0*b0 */                   
\
+    "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 5+6+7+8:a1*b1 */                   
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 5+6+7+8:(a0+a1)*(b0+b1) */         
\
+                                                                               
\
+    "movdqu 4*16(%[buf]), %%xmm5\n\t"                                          
\
+    "movdqu 5*16(%[buf]), %%xmm2\n\t"                                          
\
+    be_to_le("pshufb %%xmm15, %%xmm5\n\t") /* be => le */                      
\
+    be_to_le("pshufb %%xmm15, %%xmm2\n\t") /* be => le */                      
\
+                                                                               
\
+    "pshufd $78, %%xmm9, %%xmm11\n\t"                                          
\
+    "pshufd $78, %%xmm5, %%xmm7\n\t"                                           
\
+    "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */                       
\
+    "pxor %%xmm5, %%xmm7\n\t"  /* xmm7 holds 4:b0+b1 */                        
\
+    "movdqa %%xmm9, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm5, %%xmm6\n\t"  /* xmm6 holds 4:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm9, %%xmm5\n\t" /* xmm5 holds 4:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 4:(a0+a1)*(b0+b1) */     
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 4+5+6+7+8:a0*b0 */                 
\
+    "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 4+5+6+7+8:a1*b1 */                 
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 4+5+6+7+8:(a0+a1)*(b0+b1) */       
\
+                                                                               
\
+    "pshufd $78, %%xmm8, %%xmm11\n\t"                                          
\
+    "pshufd $78, %%xmm2, %%xmm7\n\t"                                           
\
+    "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */                       
\
+    "pxor %%xmm2, %%xmm7\n\t"  /* xmm7 holds 3:b0+b1 */                        
\
+    "movdqa %%xmm8, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm2, %%xmm6\n\t"  /* xmm6 holds 3:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm8, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */     
\
+                                                                               
\
+    "movdqu 0*16(%[h_table]), %%xmm8\n\t" /* Load H2 */                        
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4+5+6+7+8:a0*b0 */               
\
+    "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4+5+6+7+8:a1*b1 */               
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4+5+6+7+8:(a0+a1)*(b0+b1) */     
\
+                                                                               
\
+    "movdqu 6*16(%[buf]), %%xmm5\n\t"                                          
\
+    "movdqu 7*16(%[buf]), %%xmm2\n\t"                                          
\
+    be_to_le("pshufb %%xmm15, %%xmm5\n\t") /* be => le */                      
\
+    be_to_le("pshufb %%xmm15, %%xmm2\n\t") /* be => le */                      
\
+                                                                               
\
+    "pshufd $78, %%xmm8, %%xmm11\n\t"                                          
\
+    "pshufd $78, %%xmm5, %%xmm7\n\t"                                           
\
+    "pxor %%xmm8, %%xmm11\n\t"  /* xmm11 holds 4:a0+a1 */                      
\
+    "pxor %%xmm5, %%xmm7\n\t"   /* xmm7 holds 4:b0+b1 */                       
\
+    "movdqa %%xmm8, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm5, %%xmm6\n\t"   /* xmm6 holds 4:a0*b0 */              
\
+    "pclmulqdq $17, %%xmm8, %%xmm5\n\t"  /* xmm5 holds 4:a1*b1 */              
\
+    "pclmulqdq $0, %%xmm11, %%xmm7\n\t"  /* xmm7 holds 4:(a0+a1)*(b0+b1) */    
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4+5+6+7+8:a0*b0 */             
\
+    "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4+5+6+7+8:a1*b1 */             
\
+    "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4+5+6+7+8:(a0+a1)*(b0+b1) */   
\
+                                                                               
\
+    "pshufd $78, %%xmm0, %%xmm11\n\t"                                          
\
+    "pshufd $78, %%xmm2, %%xmm7\n\t"                                           
\
+    "pxor %%xmm0, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */                       
\
+    "pxor %%xmm2, %%xmm7\n\t"  /* xmm7 holds 3:b0+b1 */                        
\
+    "movdqa %%xmm0, %%xmm6\n\t"                                                
\
+    "pclmulqdq $0, %%xmm2, %%xmm6\n\t"  /* xmm6 holds 3:a0*b0 */               
\
+    "pclmulqdq $17, %%xmm0, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */               
\
+    "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */     
\
+                                                                               
\
+    "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+3+4+5+6+7+8:a0*b0 */         
\
+    "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+3+4+5+6+7+8:a1*b1 */         
\
+    "pxor %%xmm7, %%xmm4\n\t"/* xmm4 holds 1+2+3+3+4+5+6+7+8:(a0+a1)*(b0+b1) 
*/\
+                                                                               
\
+    /* aggregated reduction... */                                              
\
+    "movdqa %%xmm3, %%xmm5\n\t"                                                
\
+    "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */                     
\
+    "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */     
\
+    "movdqa %%xmm4, %%xmm5\n\t"                                                
\
+    "psrldq $8, %%xmm4\n\t"                                                    
\
+    "pslldq $8, %%xmm5\n\t"                                                    
\
+    "pxor %%xmm5, %%xmm3\n\t"                                                  
\
+    "pxor %%xmm4, %%xmm1\n\t" /* <xmm1:xmm3> holds the result of the           
\
+                                  carry-less multiplication of xmm0            
\
+                                  by xmm1 */
+
+static ASM_FUNC_ATTR_INLINE void
+gfmul_pclmul_aggr8(const void *buf, const void *h_table)
+{
+  /* Input:
+      H¹: XMM0
+      bemask: XMM15
+      Hash: XMM1
+     Output:
+      Hash: XMM1
+     Inputs XMM0 and XMM15 stays unmodified.
+   */
+  asm volatile (GFMUL_AGGR8_ASM(be_to_le)
+                :
+                : [buf] "r" (buf),
+                  [h_table] "r" (h_table)
+                : "memory" );
+
+  reduction();
+}
+
+static ASM_FUNC_ATTR_INLINE void
+gfmul_pclmul_aggr8_le(const void *buf, const void *h_table)
+{
+  /* Input:
+      H¹: XMM0
+      Hash: XMM1
+     Output:
+      Hash: XMM1
+     Inputs XMM0 and XMM15 stays unmodified.
+   */
+  asm volatile (GFMUL_AGGR8_ASM(le_to_le)
+                :
+                : [buf] "r" (buf),
+                  [h_table] "r" (h_table)
+                : "memory" );
+
+  reduction();
+}
+#endif
+
+static ASM_FUNC_ATTR_INLINE void gcm_lsh(void *h, unsigned int hoffs)
+{
+  static const u64 pconst[2] __attribute__ ((aligned (16))) =
+    { U64_C(0x0000000000000001), U64_C(0xc200000000000000) };
+
+  asm volatile ("movdqu (%[h]), %%xmm2\n\t"
+                "pshufd $0xff, %%xmm2, %%xmm3\n\t"
+                "movdqa %%xmm2, %%xmm4\n\t"
+                "psrad $31, %%xmm3\n\t"
+                "pslldq $8, %%xmm4\n\t"
+                "pand %[pconst], %%xmm3\n\t"
+                "paddq %%xmm2, %%xmm2\n\t"
+                "psrlq $63, %%xmm4\n\t"
+                "pxor %%xmm3, %%xmm2\n\t"
+                "pxor %%xmm4, %%xmm2\n\t"
+                "movdqu %%xmm2, (%[h])\n\t"
+                :
+                : [pconst] "m" (*pconst),
+                  [h] "r" ((byte *)h + hoffs)
+                : "memory" );
+}
+
+void ASM_FUNC_ATTR
+_gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c)
+{
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+#if defined(__x86_64__) && defined(__WIN64__)
+  char win64tmp[10 * 16];
+
+  /* XMM6-XMM15 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t"
+                "movdqu %%xmm7,  1*16(%0)\n\t"
+                "movdqu %%xmm8,  2*16(%0)\n\t"
+                "movdqu %%xmm9,  3*16(%0)\n\t"
+                "movdqu %%xmm10, 4*16(%0)\n\t"
+                "movdqu %%xmm11, 5*16(%0)\n\t"
+                "movdqu %%xmm12, 6*16(%0)\n\t"
+                "movdqu %%xmm13, 7*16(%0)\n\t"
+                "movdqu %%xmm14, 8*16(%0)\n\t"
+                "movdqu %%xmm15, 9*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#endif
+
+  /* Swap endianness of hsub. */
+  asm volatile ("movdqu (%[key]), %%xmm0\n\t"
+                "pshufb %[be_mask], %%xmm0\n\t"
+                "movdqu %%xmm0, (%[key])\n\t"
+                :
+                : [key] "r" (c->u_mode.gcm.u_ghash_key.key),
+                  [be_mask] "m" (*be_mask)
+                : "memory");
+
+  gcm_lsh(c->u_mode.gcm.u_ghash_key.key, 0); /* H <<< 1 */
+
+  asm volatile ("movdqa %%xmm0, %%xmm1\n\t"
+                "movdqu (%[key]), %%xmm0\n\t" /* load H <<< 1 */
+                :
+                : [key] "r" (c->u_mode.gcm.u_ghash_key.key)
+                : "memory");
+
+  gfmul_pclmul (); /* H<<<1•H => H² */
+
+  asm volatile ("movdqu %%xmm1, 0*16(%[h_table])\n\t"
+                "movdqa %%xmm1, %%xmm7\n\t"
+                :
+                : [h_table] "r" (c->u_mode.gcm.gcm_table)
+                : "memory");
+
+  gcm_lsh(c->u_mode.gcm.gcm_table, 0 * 16); /* H² <<< 1 */
+  gfmul_pclmul (); /* H<<<1•H² => H³ */
+
+  asm volatile ("movdqa %%xmm7, %%xmm0\n\t"
+                "movdqu %%xmm1, 1*16(%[h_table])\n\t"
+                "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */
+                :
+                : [h_table] "r" (c->u_mode.gcm.gcm_table)
+                : "memory");
+
+  gfmul_pclmul (); /* H²<<<1•H² => H⁴ */
+
+  asm volatile ("movdqu %%xmm1, 2*16(%[h_table])\n\t"
+                "movdqa %%xmm1, %%xmm0\n\t"
+                "movdqu (%[key]), %%xmm1\n\t" /* load H <<< 1 */
+                :
+                : [h_table] "r" (c->u_mode.gcm.gcm_table),
+                  [key] "r" (c->u_mode.gcm.u_ghash_key.key)
+                : "memory");
+
+  gcm_lsh(c->u_mode.gcm.gcm_table, 1 * 16); /* H³ <<< 1 */
+  gcm_lsh(c->u_mode.gcm.gcm_table, 2 * 16); /* H⁴ <<< 1 */
+
+#ifdef __x86_64__
+  gfmul_pclmul (); /* H<<<1•H⁴ => H⁵ */
+
+  asm volatile ("movdqu %%xmm1, 3*16(%[h_table])\n\t"
+                "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */
+                :
+                : [h_table] "r" (c->u_mode.gcm.gcm_table)
+                : "memory");
+
+  gfmul_pclmul (); /* H²<<<1•H⁴ => H⁶ */
+
+  asm volatile ("movdqu %%xmm1, 4*16(%[h_table])\n\t"
+                "movdqu 1*16(%[h_table]), %%xmm1\n\t" /* load H³ <<< 1 */
+                :
+                : [h_table] "r" (c->u_mode.gcm.gcm_table)
+                : "memory");
+
+  gfmul_pclmul (); /* H³<<<1•H⁴ => H⁷ */
+
+  asm volatile ("movdqu %%xmm1, 5*16(%[h_table])\n\t"
+                "movdqu 2*16(%[h_table]), %%xmm1\n\t" /* load H⁴ <<< 1 */
+                :
+                : [h_table] "r" (c->u_mode.gcm.gcm_table)
+                : "memory");
+
+  gfmul_pclmul (); /* H³<<<1•H⁴ => H⁸ */
+
+  asm volatile ("movdqu %%xmm1, 6*16(%[h_table])\n\t"
+                :
+                : [h_table] "r" (c->u_mode.gcm.gcm_table)
+                : "memory");
+
+  gcm_lsh(c->u_mode.gcm.gcm_table, 3 * 16); /* H⁵ <<< 1 */
+  gcm_lsh(c->u_mode.gcm.gcm_table, 4 * 16); /* H⁶ <<< 1 */
+  gcm_lsh(c->u_mode.gcm.gcm_table, 5 * 16); /* H⁷ <<< 1 */
+  gcm_lsh(c->u_mode.gcm.gcm_table, 6 * 16); /* H⁸ <<< 1 */
+
+#ifdef __WIN64__
+  /* Clear/restore used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "movdqu 0*16(%0), %%xmm6\n\t"
+                "movdqu 1*16(%0), %%xmm7\n\t"
+                "movdqu 2*16(%0), %%xmm8\n\t"
+                "movdqu 3*16(%0), %%xmm9\n\t"
+                "movdqu 4*16(%0), %%xmm10\n\t"
+                "movdqu 5*16(%0), %%xmm11\n\t"
+                "movdqu 6*16(%0), %%xmm12\n\t"
+                "movdqu 7*16(%0), %%xmm13\n\t"
+                "movdqu 8*16(%0), %%xmm14\n\t"
+                "movdqu 9*16(%0), %%xmm15\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#else
+  /* Clear used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "pxor %%xmm6, %%xmm6\n\t"
+                "pxor %%xmm7, %%xmm7\n\t"
+                "pxor %%xmm8, %%xmm8\n\t"
+                "pxor %%xmm9, %%xmm9\n\t"
+                "pxor %%xmm10, %%xmm10\n\t"
+                "pxor %%xmm11, %%xmm11\n\t"
+                "pxor %%xmm12, %%xmm12\n\t"
+                "pxor %%xmm13, %%xmm13\n\t"
+                "pxor %%xmm14, %%xmm14\n\t"
+                "pxor %%xmm15, %%xmm15\n\t"
+                ::: "memory" );
+#endif
+#endif
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                          size_t nblocks)
+{
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+#if defined(__x86_64__) && defined(__WIN64__)
+  char win64tmp[10 * 16];
+#endif
+
+  if (nblocks == 0)
+    return 0;
+
+#if defined(__x86_64__) && defined(__WIN64__)
+  /* XMM6-XMM15 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t"
+                "movdqu %%xmm7,  1*16(%0)\n\t"
+                "movdqu %%xmm8,  2*16(%0)\n\t"
+                "movdqu %%xmm9,  3*16(%0)\n\t"
+                "movdqu %%xmm10, 4*16(%0)\n\t"
+                "movdqu %%xmm11, 5*16(%0)\n\t"
+                "movdqu %%xmm12, 6*16(%0)\n\t"
+                "movdqu %%xmm13, 7*16(%0)\n\t"
+                "movdqu %%xmm14, 8*16(%0)\n\t"
+                "movdqu %%xmm15, 9*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#endif
+
+  /* Preload hash. */
+  asm volatile ("movdqa %[be_mask], %%xmm7\n\t"
+                "movdqu %[hash], %%xmm1\n\t"
+                "pshufb %%xmm7, %%xmm1\n\t" /* be => le */
+                :
+                : [hash] "m" (*result),
+                  [be_mask] "m" (*be_mask)
+                : "memory" );
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      /* Preload H1. */
+      asm volatile ("movdqa %%xmm7, %%xmm15\n\t"
+                    "movdqa %[h_1], %%xmm0\n\t"
+                    :
+                    : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key)
+                    : "memory" );
+
+      while (nblocks >= 8)
+        {
+          gfmul_pclmul_aggr8 (buf, c->u_mode.gcm.gcm_table);
+
+          buf += 8 * blocksize;
+          nblocks -= 8;
+        }
+#ifndef __WIN64__
+      /* Clear used x86-64/XMM registers. */
+      asm volatile( "pxor %%xmm8, %%xmm8\n\t"
+                    "pxor %%xmm9, %%xmm9\n\t"
+                    "pxor %%xmm10, %%xmm10\n\t"
+                    "pxor %%xmm11, %%xmm11\n\t"
+                    "pxor %%xmm12, %%xmm12\n\t"
+                    "pxor %%xmm13, %%xmm13\n\t"
+                    "pxor %%xmm14, %%xmm14\n\t"
+                    "pxor %%xmm15, %%xmm15\n\t"
+                    ::: "memory" );
+#endif
+    }
+#endif
+
+  while (nblocks >= 4)
+    {
+      gfmul_pclmul_aggr4 (buf, c->u_mode.gcm.u_ghash_key.key,
+                          c->u_mode.gcm.gcm_table, be_mask);
+
+      buf += 4 * blocksize;
+      nblocks -= 4;
+    }
+
+  if (nblocks)
+    {
+      /* Preload H1. */
+      asm volatile ("movdqa %[h_1], %%xmm0\n\t"
+                    :
+                    : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key)
+                    : "memory" );
+
+      while (nblocks)
+        {
+          asm volatile ("movdqu %[buf], %%xmm2\n\t"
+                        "pshufb %[be_mask], %%xmm2\n\t" /* be => le */
+                        "pxor %%xmm2, %%xmm1\n\t"
+                        :
+                        : [buf] "m" (*buf), [be_mask] "m" (*be_mask)
+                        : "memory" );
+
+          gfmul_pclmul ();
+
+          buf += blocksize;
+          nblocks--;
+        }
+    }
+
+  /* Store hash. */
+  asm volatile ("pshufb %[be_mask], %%xmm1\n\t" /* be => le */
+                "movdqu %%xmm1, %[hash]\n\t"
+                : [hash] "=m" (*result)
+                : [be_mask] "m" (*be_mask)
+                : "memory" );
+
+#if defined(__x86_64__) && defined(__WIN64__)
+  /* Clear/restore used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "movdqu 0*16(%0), %%xmm6\n\t"
+                "movdqu 1*16(%0), %%xmm7\n\t"
+                "movdqu 2*16(%0), %%xmm8\n\t"
+                "movdqu 3*16(%0), %%xmm9\n\t"
+                "movdqu 4*16(%0), %%xmm10\n\t"
+                "movdqu 5*16(%0), %%xmm11\n\t"
+                "movdqu 6*16(%0), %%xmm12\n\t"
+                "movdqu 7*16(%0), %%xmm13\n\t"
+                "movdqu 8*16(%0), %%xmm14\n\t"
+                "movdqu 9*16(%0), %%xmm15\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#else
+  /* Clear used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "pxor %%xmm6, %%xmm6\n\t"
+                "pxor %%xmm7, %%xmm7\n\t"
+                ::: "memory" );
+#endif
+
+  return 0;
+}
+
+unsigned int ASM_FUNC_ATTR
+_gcry_polyval_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                            size_t nblocks)
+{
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+#if defined(__x86_64__) && defined(__WIN64__)
+  char win64tmp[10 * 16];
+#endif
+
+  if (nblocks == 0)
+    return 0;
+
+#if defined(__x86_64__) && defined(__WIN64__)
+  /* XMM6-XMM15 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t"
+                "movdqu %%xmm7,  1*16(%0)\n\t"
+                "movdqu %%xmm8,  2*16(%0)\n\t"
+                "movdqu %%xmm9,  3*16(%0)\n\t"
+                "movdqu %%xmm10, 4*16(%0)\n\t"
+                "movdqu %%xmm11, 5*16(%0)\n\t"
+                "movdqu %%xmm12, 6*16(%0)\n\t"
+                "movdqu %%xmm13, 7*16(%0)\n\t"
+                "movdqu %%xmm14, 8*16(%0)\n\t"
+                "movdqu %%xmm15, 9*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#endif
+
+  /* Preload hash. */
+  asm volatile ("pxor %%xmm7, %%xmm7\n\t"
+                "movdqu %[hash], %%xmm1\n\t"
+                "pshufb %[be_mask], %%xmm1\n\t" /* be => le */
+                :
+                : [hash] "m" (*result),
+                  [be_mask] "m" (*be_mask)
+                : "memory" );
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      /* Preload H1. */
+      asm volatile ("pxor %%xmm15, %%xmm15\n\t"
+                    "movdqa %[h_1], %%xmm0\n\t"
+                    :
+                    : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key)
+                    : "memory" );
+
+      while (nblocks >= 8)
+        {
+          gfmul_pclmul_aggr8_le (buf, c->u_mode.gcm.gcm_table);
+
+          buf += 8 * blocksize;
+          nblocks -= 8;
+        }
+#ifndef __WIN64__
+      /* Clear used x86-64/XMM registers. */
+      asm volatile( "pxor %%xmm8, %%xmm8\n\t"
+                    "pxor %%xmm9, %%xmm9\n\t"
+                    "pxor %%xmm10, %%xmm10\n\t"
+                    "pxor %%xmm11, %%xmm11\n\t"
+                    "pxor %%xmm12, %%xmm12\n\t"
+                    "pxor %%xmm13, %%xmm13\n\t"
+                    "pxor %%xmm14, %%xmm14\n\t"
+                    "pxor %%xmm15, %%xmm15\n\t"
+                    ::: "memory" );
+#endif
+    }
+#endif
+
+  while (nblocks >= 4)
+    {
+      gfmul_pclmul_aggr4_le (buf, c->u_mode.gcm.u_ghash_key.key,
+                             c->u_mode.gcm.gcm_table);
+
+      buf += 4 * blocksize;
+      nblocks -= 4;
+    }
+
+  if (nblocks)
+    {
+      /* Preload H1. */
+      asm volatile ("movdqa %[h_1], %%xmm0\n\t"
+                    :
+                    : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key)
+                    : "memory" );
+
+      while (nblocks)
+        {
+          asm volatile ("movdqu %[buf], %%xmm2\n\t"
+                        "pxor %%xmm2, %%xmm1\n\t"
+                        :
+                        : [buf] "m" (*buf)
+                        : "memory" );
+
+          gfmul_pclmul ();
+
+          buf += blocksize;
+          nblocks--;
+        }
+    }
+
+  /* Store hash. */
+  asm volatile ("pshufb %[be_mask], %%xmm1\n\t" /* be => le */
+                "movdqu %%xmm1, %[hash]\n\t"
+                : [hash] "=m" (*result)
+                : [be_mask] "m" (*be_mask)
+                : "memory" );
+
+#if defined(__x86_64__) && defined(__WIN64__)
+  /* Clear/restore used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "movdqu 0*16(%0), %%xmm6\n\t"
+                "movdqu 1*16(%0), %%xmm7\n\t"
+                "movdqu 2*16(%0), %%xmm8\n\t"
+                "movdqu 3*16(%0), %%xmm9\n\t"
+                "movdqu 4*16(%0), %%xmm10\n\t"
+                "movdqu 5*16(%0), %%xmm11\n\t"
+                "movdqu 6*16(%0), %%xmm12\n\t"
+                "movdqu 7*16(%0), %%xmm13\n\t"
+                "movdqu 8*16(%0), %%xmm14\n\t"
+                "movdqu 9*16(%0), %%xmm15\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory" );
+#else
+  /* Clear used registers. */
+  asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                "pxor %%xmm4, %%xmm4\n\t"
+                "pxor %%xmm5, %%xmm5\n\t"
+                "pxor %%xmm6, %%xmm6\n\t"
+                "pxor %%xmm7, %%xmm7\n\t"
+                ::: "memory" );
+#endif
+
+  return 0;
+}
+
+#if __clang__
+#  pragma clang attribute pop
+#endif
+
+#endif /* GCM_USE_INTEL_PCLMUL */
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-gcm-ppc.c 
b/grub-core/lib/libgcrypt/cipher/cipher-gcm-ppc.c
new file mode 100644
index 000000000..4f75e95cf
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-gcm-ppc.c
@@ -0,0 +1,551 @@
+/* cipher-gcm-ppc.c  -  Power 8 vpmsum accelerated Galois Counter Mode
+ *                      implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Based on GHASH implementation by Andy Polyakov from CRYPTOGAMS
+ * distribution (ppc/ghashp8-ppc.pl). Specifically, it uses his register
+ * allocation (which then defers to your compiler's register allocation),
+ * instead of re-implementing Gerald Estrin's Scheme of parallelized
+ * multiplication of polynomials, as I did not understand this algorithm at
+ * the time.
+ *
+ * Original copyright license follows:
+ *
+ *  Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>
+ *  All rights reserved.
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *        * Redistributions of source code must retain copyright notices,
+ *          this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above
+ *          copyright notice, this list of conditions and the following
+ *          disclaimer in the documentation and/or other materials
+ *          provided with the distribution.
+ *
+ *        * Neither the name of the CRYPTOGAMS nor the names of its
+ *          copyright holder and contributors may be used to endorse or
+ *          promote products derived from this software without specific
+ *          prior written permission.
+ *
+ *  ALTERNATIVELY, provided that this notice is retained in full, this
+ *  product may be distributed under the terms of the GNU General Public
+ *  License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+ *  those given above.
+ *
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ *  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ *  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ *  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * SPDX-License-Identifier: (BSD-3-Clause OR GPL-2.0-only)
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdint.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+#ifdef GCM_USE_PPC_VPMSUM
+
+#include <altivec.h>
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR        NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+
+#define ALIGNED_16 __attribute__ ((aligned (16)))
+
+typedef vector unsigned char vector16x_u8;
+typedef vector signed char vector16x_s8;
+typedef vector unsigned long long vector2x_u64;
+typedef vector unsigned long long block;
+
+static ASM_FUNC_ATTR_INLINE block
+asm_xor(block a, block b)
+{
+  block r;
+  __asm__ volatile ("xxlxor %x0, %x1, %x2"
+                   : "=wa" (r)
+                   : "wa" (a), "wa" (b));
+  return r;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_vpmsumd(block a, block b)
+{
+  block r;
+  __asm__ volatile ("vpmsumd %0, %1, %2"
+                   : "=v" (r)
+                   : "v" (a), "v" (b));
+  return r;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_swap_u64(block a)
+{
+  block r;
+  __asm__ volatile ("xxswapd %x0, %x1"
+                   : "=wa" (r)
+                   : "wa" (a));
+  return r;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_mergelo(block l, block r)
+{
+  block ret;
+  __asm__ volatile ("xxmrgld %x0, %x1, %x2\n\t"
+                   : "=wa" (ret)
+                   : "wa" (l), "wa" (r));
+  return ret;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_mergehi(block l, block r)
+{
+  block ret;
+  __asm__ volatile ("xxmrghd %x0, %x1, %x2\n\t"
+                   : "=wa" (ret)
+                   : "wa" (l), "wa" (r));
+  return ret;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_rot_block_left(block a)
+{
+  block r;
+  block zero = { 0, 0 };
+  __asm__ volatile ("xxmrgld %x0, %x1, %x2"
+                   : "=wa" (r)
+                   : "wa" (a), "wa" (zero));
+  return r;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_rot_block_right(block a)
+{
+  block r;
+  block zero = { 0, 0 };
+  __asm__ volatile ("xxsldwi %x0, %x2, %x1, 2"
+                   : "=wa" (r)
+                   : "wa" (a), "wa" (zero));
+  return r;
+}
+
+/* vsl is a slightly strange function in the way the shift is passed... */
+static ASM_FUNC_ATTR_INLINE block
+asm_ashl_128(block a, vector16x_u8 shift)
+{
+  block r;
+  __asm__ volatile ("vsl %0, %1, %2"
+                   : "=v" (r)
+                   : "v" (a), "v" (shift));
+  return r;
+}
+
+#define STORE_TABLE(gcm_table, slot, vec) \
+  vec_store_he (((block)vec), slot * 16, (unsigned char *)(gcm_table));
+
+static ASM_FUNC_ATTR_INLINE void
+vec_store_he(block vec, unsigned long offset, unsigned char *ptr)
+{
+  /* GCC vec_vsx_ld is generating two instructions on little-endian. Use
+   * lxvd2x directly instead. */
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("stxvd2x %x0, 0, %1\n\t"
+                   :
+                   : "wa" (vec), "r" ((uintptr_t)ptr)
+                   : "memory", "r0");
+  else
+#endif
+    __asm__ volatile ("stxvd2x %x0, %1, %2\n\t"
+                     :
+                     : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+}
+
+#define VEC_LOAD_BE(in_ptr, bswap_const) \
+  vec_be_swap(vec_load_he (0, (const unsigned char *)(in_ptr)), bswap_const)
+
+static ASM_FUNC_ATTR_INLINE block
+vec_load_he(unsigned long offset, const unsigned char *ptr)
+{
+  block vec;
+  /* GCC vec_vsx_ld is generating two instructions on little-endian. Use
+   * lxvd2x directly instead. */
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvd2x %x0, 0, %1\n\t"
+                   : "=wa" (vec)
+                   : "r" ((uintptr_t)ptr)
+                   : "memory", "r0");
+  else
+#endif
+    __asm__ volatile ("lxvd2x %x0, %1, %2\n\t"
+                     : "=wa" (vec)
+                     : "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+vec_be_swap(block vec, vector16x_u8 be_bswap_const)
+{
+#ifndef WORDS_BIGENDIAN
+  __asm__ volatile ("vperm %0, %1, %1, %2\n\t"
+                   : "=v" (vec)
+                   : "v" (vec), "v" (be_bswap_const));
+#else
+  (void)be_bswap_const;
+#endif
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+vec_dup_byte_elem(block vec, int idx)
+{
+#ifndef WORDS_BIGENDIAN
+  return (block)vec_splat((vector16x_s8)vec, idx);
+#else
+  return (block)vec_splat((vector16x_s8)vec, (15 - idx) & 15);
+#endif
+}
+
+/* Power ghash based on papers:
+   "The Galois/Counter Mode of Operation (GCM)"; David A. McGrew, John Viega
+   "Intel® Carry-Less Multiplication Instruction and its Usage for Computing
+    the GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis.
+
+   After saving the magic c2 constant and pre-formatted version of the key,
+   we pre-process the key for parallel hashing. This takes advantage of the
+   identity of addition over a galois field being identital to XOR, and thus
+   can be parellized (S 2.2, page 3). We multiply and add (galois field
+   versions) the key over multiple iterations and save the result. This can
+   later be galois added (XORed) with parallel processed input (Estrin's
+   Scheme).
+
+   The ghash "key" is a salt. */
+void ASM_FUNC_ATTR
+_gcry_ghash_setup_ppc_vpmsum (void *gcm_table_arg, void *gcm_key)
+{
+  static const vector16x_u8 bswap_const ALIGNED_16 =
+    { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
+  static const byte c2[16] ALIGNED_16 =
+    { 0xc2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  static const vector16x_u8 one ALIGNED_16 =
+    { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
+  uint64_t *gcm_table = gcm_table_arg;
+  block T0, T1, T2;
+  block C2, H, H1, H1l, H1h, H2, H2l, H2h;
+  block H3l, H3, H3h, H4l, H4, H4h, T3, T4;
+  vector16x_s8 most_sig_of_H, t7, carry;
+
+  H = VEC_LOAD_BE(gcm_key, bswap_const);
+  C2 = VEC_LOAD_BE(c2, bswap_const);
+  most_sig_of_H = (vector16x_s8)vec_dup_byte_elem(H, 15);
+  t7 = vec_splat_s8(7);
+  carry = most_sig_of_H >> t7;
+  carry &= (vector16x_s8)C2; /* only interested in certain carries. */
+  H1 = asm_ashl_128(H, one);
+  H1 ^= (block)carry; /* complete the <<< 1 */
+
+  T1 = asm_swap_u64 (H1);
+  H1l = asm_rot_block_right (T1);
+  H1h = asm_rot_block_left (T1);
+  C2 = asm_rot_block_right (C2);
+
+  STORE_TABLE (gcm_table, 0, C2);
+  STORE_TABLE (gcm_table, 1, H1l);
+  STORE_TABLE (gcm_table, 2, T1);
+  STORE_TABLE (gcm_table, 3, H1h);
+
+  /* pre-process coefficients for Gerald Estrin's scheme for parallel
+   * multiplication of polynomials
+   */
+  H2l = asm_vpmsumd (H1l, H1); /* do not need to mask in
+                                  because 0 * anything -> 0 */
+  H2 = asm_vpmsumd (T1, H1);
+  H2h = asm_vpmsumd (H1h, H1);
+
+  /* reduce 1 */
+  T0 = asm_vpmsumd (H2l, C2);
+
+  H2l ^= asm_rot_block_left (H2);
+  H2h ^= asm_rot_block_right (H2);
+  H2l = asm_swap_u64 (H2l);
+  H2l ^= T0;
+  /* reduce 2 */
+  T0 = asm_swap_u64 (H2l);
+  H2l = asm_vpmsumd (H2l, C2);
+  H2 = H2l ^ H2h ^ T0;
+
+  T2 = asm_swap_u64 (H2);
+  H2l = asm_rot_block_right (T2);
+  H2h = asm_rot_block_left (T2);
+
+  STORE_TABLE (gcm_table, 4, H2l);
+  STORE_TABLE (gcm_table, 5, T2);
+  STORE_TABLE (gcm_table, 6, H2h);
+
+  H3l = asm_vpmsumd (H2l, H1);
+  H4l = asm_vpmsumd (H2l, H2);
+  H3 = asm_vpmsumd (T2, H1);
+  H4 = asm_vpmsumd (T2, H2);
+  H3h = asm_vpmsumd (H2h, H1);
+  H4h = asm_vpmsumd (H2h, H2);
+
+  T3 = asm_vpmsumd (H3l, C2);
+  T4 = asm_vpmsumd (H4l, C2);
+
+  H3l ^= asm_rot_block_left (H3);
+  H3h ^= asm_rot_block_right (H3);
+  H4l ^= asm_rot_block_left (H4);
+  H4h ^= asm_rot_block_right (H4);
+
+  H3 = asm_swap_u64 (H3l);
+  H4 = asm_swap_u64 (H4l);
+
+  H3 ^= T3;
+  H4 ^= T4;
+
+  /* We could have also b64 switched reduce and reduce2, however as we are
+     using the unrotated H and H2 above to vpmsum, this is marginally better. 
*/
+  T3 = asm_swap_u64 (H3);
+  T4 = asm_swap_u64 (H4);
+
+  H3 = asm_vpmsumd (H3, C2);
+  H4 = asm_vpmsumd (H4, C2);
+
+  T3 ^= H3h;
+  T4 ^= H4h;
+  H3 ^= T3;
+  H4 ^= T4;
+  H3 = asm_swap_u64 (H3);
+  H4 = asm_swap_u64 (H4);
+
+  H3l = asm_rot_block_right (H3);
+  H3h = asm_rot_block_left (H3);
+  H4l = asm_rot_block_right (H4);
+  H4h = asm_rot_block_left (H4);
+
+  STORE_TABLE (gcm_table, 7, H3l);
+  STORE_TABLE (gcm_table, 8, H3);
+  STORE_TABLE (gcm_table, 9, H3h);
+  STORE_TABLE (gcm_table, 10, H4l);
+  STORE_TABLE (gcm_table, 11, H4);
+  STORE_TABLE (gcm_table, 12, H4h);
+}
+
+unsigned int ASM_FUNC_ATTR
+_gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table,
+                       const byte *buf, const size_t nblocks)
+{
+  static const vector16x_u8 bswap_const ALIGNED_16 =
+    { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
+  block c2, H0l, H0m, H0h, H4l, H4m, H4h, H2m, H3l, H3m, H3h, Hl;
+  block Hm, Hh, in, in0, in1, in2, in3, Hm_right, Hl_rotate, cur;
+  size_t blocks_remaining = nblocks;
+  size_t not_multiple_of_four;
+  block t0;
+
+  cur = vec_be_swap (vec_load_he (0, result), bswap_const);
+
+  c2 = vec_load_he (0, gcm_table);
+  H0l = vec_load_he (16, gcm_table);
+  H0m = vec_load_he (32, gcm_table);
+  H0h = vec_load_he (48, gcm_table);
+
+  for (not_multiple_of_four = nblocks % 4; not_multiple_of_four;
+       not_multiple_of_four--)
+    {
+      in = vec_be_swap (vec_load_he (0, buf), bswap_const);
+      buf += 16;
+      blocks_remaining--;
+      cur ^= in;
+
+      Hl = asm_vpmsumd (cur, H0l);
+      Hm = asm_vpmsumd (cur, H0m);
+      Hh = asm_vpmsumd (cur, H0h);
+
+      t0 = asm_vpmsumd (Hl, c2);
+
+      Hl ^= asm_rot_block_left (Hm);
+
+      Hm_right = asm_rot_block_right (Hm);
+      Hh ^= Hm_right;
+      Hl_rotate = asm_swap_u64 (Hl);
+      Hl_rotate ^= t0;
+      Hl = asm_swap_u64 (Hl_rotate);
+      Hl_rotate = asm_vpmsumd (Hl_rotate, c2);
+      Hl ^= Hh;
+      Hl ^= Hl_rotate;
+
+      cur = Hl;
+  }
+
+  if (blocks_remaining > 0)
+    {
+      block Xl, Xm, Xh, Xl1, Xm1, Xh1, Xm2, Xl3, Xm3, Xh3, Xl_rotate;
+      block H21l, H21h, merge_l, merge_h;
+      block t1, t2;
+
+      H2m = vec_load_he (48 + 32, gcm_table);
+      H3l = vec_load_he (48 * 2 + 16, gcm_table);
+      H3m = vec_load_he (48 * 2 + 32, gcm_table);
+      H3h = vec_load_he (48 * 2 + 48, gcm_table);
+      H4l = vec_load_he (48 * 3 + 16, gcm_table);
+      H4m = vec_load_he (48 * 3 + 32, gcm_table);
+      H4h = vec_load_he (48 * 3 + 48, gcm_table);
+
+      in0 = vec_load_he (0, buf);
+      in1 = vec_load_he (16, buf);
+      in2 = vec_load_he (32, buf);
+      in3 = vec_load_he (48, buf);
+      in0 = vec_be_swap(in0, bswap_const);
+      in1 = vec_be_swap(in1, bswap_const);
+      in2 = vec_be_swap(in2, bswap_const);
+      in3 = vec_be_swap(in3, bswap_const);
+
+      Xh = asm_xor (in0, cur);
+
+      Xl1 = asm_vpmsumd (in1, H3l);
+      Xm1 = asm_vpmsumd (in1, H3m);
+      Xh1 = asm_vpmsumd (in1, H3h);
+
+      H21l = asm_mergehi (H2m, H0m);
+      H21h = asm_mergelo (H2m, H0m);
+      merge_l = asm_mergelo (in2, in3);
+      merge_h = asm_mergehi (in2, in3);
+
+      Xm2 = asm_vpmsumd (in2, H2m);
+      Xl3 = asm_vpmsumd (merge_l, H21l);
+      Xm3 = asm_vpmsumd (in3, H0m);
+      Xh3 = asm_vpmsumd (merge_h, H21h);
+
+      Xm2 = asm_xor (Xm2, Xm1);
+      Xl3 = asm_xor (Xl3, Xl1);
+      Xm3 = asm_xor (Xm3, Xm2);
+      Xh3 = asm_xor (Xh3, Xh1);
+
+      /* Gerald Estrin's scheme for parallel multiplication of polynomials */
+      while (1)
+        {
+         buf += 64;
+         blocks_remaining -= 4;
+         if (!blocks_remaining)
+           break;
+
+         in0 = vec_load_he (0, buf);
+         in1 = vec_load_he (16, buf);
+         in2 = vec_load_he (32, buf);
+         in3 = vec_load_he (48, buf);
+         in1 = vec_be_swap(in1, bswap_const);
+         in2 = vec_be_swap(in2, bswap_const);
+         in3 = vec_be_swap(in3, bswap_const);
+         in0 = vec_be_swap(in0, bswap_const);
+
+         Xl = asm_vpmsumd (Xh, H4l);
+         Xm = asm_vpmsumd (Xh, H4m);
+         Xh = asm_vpmsumd (Xh, H4h);
+         Xl1 = asm_vpmsumd (in1, H3l);
+         Xm1 = asm_vpmsumd (in1, H3m);
+         Xh1 = asm_vpmsumd (in1, H3h);
+
+         Xl = asm_xor (Xl, Xl3);
+         Xm = asm_xor (Xm, Xm3);
+         Xh = asm_xor (Xh, Xh3);
+         merge_l = asm_mergelo (in2, in3);
+         merge_h = asm_mergehi (in2, in3);
+
+         t0 = asm_vpmsumd (Xl, c2);
+         Xl3 = asm_vpmsumd (merge_l, H21l);
+         Xh3 = asm_vpmsumd (merge_h, H21h);
+
+         t1 = asm_rot_block_left (Xm);
+         t2 = asm_rot_block_right (Xm);
+         Xl = asm_xor(Xl, t1);
+         Xh = asm_xor(Xh, t2);
+
+         Xl = asm_swap_u64 (Xl);
+         Xl = asm_xor(Xl, t0);
+
+         Xl_rotate = asm_swap_u64 (Xl);
+         Xm2 = asm_vpmsumd (in2, H2m);
+         Xm3 = asm_vpmsumd (in3, H0m);
+         Xl = asm_vpmsumd (Xl, c2);
+
+         Xl3 = asm_xor (Xl3, Xl1);
+         Xh3 = asm_xor (Xh3, Xh1);
+         Xh = asm_xor (Xh, in0);
+         Xm2 = asm_xor (Xm2, Xm1);
+         Xh = asm_xor (Xh, Xl_rotate);
+         Xm3 = asm_xor (Xm3, Xm2);
+         Xh = asm_xor (Xh, Xl);
+       }
+
+      Xl = asm_vpmsumd (Xh, H4l);
+      Xm = asm_vpmsumd (Xh, H4m);
+      Xh = asm_vpmsumd (Xh, H4h);
+
+      Xl = asm_xor (Xl, Xl3);
+      Xm = asm_xor (Xm, Xm3);
+
+      t0 = asm_vpmsumd (Xl, c2);
+
+      Xh = asm_xor (Xh, Xh3);
+      t1 = asm_rot_block_left (Xm);
+      t2 = asm_rot_block_right (Xm);
+      Xl = asm_xor (Xl, t1);
+      Xh = asm_xor (Xh, t2);
+
+      Xl = asm_swap_u64 (Xl);
+      Xl = asm_xor (Xl, t0);
+
+      Xl_rotate = asm_swap_u64 (Xl);
+      Xl = asm_vpmsumd (Xl, c2);
+      Xh = asm_xor (Xh, Xl_rotate);
+      cur = asm_xor (Xh, Xl);
+    }
+
+  vec_store_he (vec_be_swap (cur, bswap_const), 0, result);
+
+  return 0;
+}
+
+#endif /* GCM_USE_PPC_VPMSUM */
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-gcm-siv.c 
b/grub-core/lib/libgcrypt/cipher/cipher-gcm-siv.c
new file mode 100644
index 000000000..9ebc00366
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-gcm-siv.c
@@ -0,0 +1,664 @@
+/* cipher-gcm-siv.c  - GCM-SIV implementation (RFC 8452)
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+#define GCM_SIV_NONCE_LENGTH (96 / 8)
+
+
+static inline void
+mulx_ghash (byte *a)
+{
+  u64 t[2], mask;
+
+  t[0] = buf_get_be64(a + 0);
+  t[1] = buf_get_be64(a + 8);
+  mask = -(t[1] & 1) & 0xe1;
+  mask <<= 56;
+
+  buf_put_be64(a + 8, (t[1] >> 1) ^ (t[0] << 63));
+  buf_put_be64(a + 0, (t[0] >> 1) ^ mask);
+}
+
+
+static inline void
+gcm_siv_bytecounter_add (u32 ctr[2], size_t add)
+{
+  if (sizeof(add) > sizeof(u32))
+    {
+      u32 high_add = ((add >> 31) >> 1) & 0xffffffff;
+      ctr[1] += high_add;
+    }
+
+  ctr[0] += add;
+  if (ctr[0] >= add)
+    return;
+  ++ctr[1];
+}
+
+
+static inline int
+gcm_siv_check_len (u32 ctr[2])
+{
+  /* len(plaintext/aadlen) <= 2^39-256 bits == 2^36-32 bytes == 2^32-2 blocks 
*/
+  if (ctr[1] > 0xfU)
+    return 0;
+  if (ctr[1] < 0xfU)
+    return 1;
+
+  if (ctr[0] <= 0xffffffe0U)
+    return 1;
+
+  return 0;
+}
+
+
+static void
+polyval_set_key (gcry_cipher_hd_t c, const byte *auth_key)
+{
+  cipher_block_bswap (c->u_mode.gcm.u_ghash_key.key, auth_key,
+                     GCRY_SIV_BLOCK_LEN);
+  mulx_ghash (c->u_mode.gcm.u_ghash_key.key);
+  _gcry_cipher_gcm_setupM (c);
+}
+
+
+static void
+do_polyval_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf,
+              size_t buflen, int do_padding)
+{
+  unsigned int blocksize = GCRY_SIV_BLOCK_LEN;
+  unsigned int unused = c->u_mode.gcm.mac_unused;
+  ghash_fn_t ghash_fn = c->u_mode.gcm.ghash_fn;
+  ghash_fn_t polyval_fn = c->u_mode.gcm.polyval_fn;
+  byte tmp_blocks[16][GCRY_SIV_BLOCK_LEN];
+  size_t nblocks, n;
+  unsigned int burn = 0, nburn;
+  unsigned int num_blks_used = 0;
+
+  if (buflen == 0 && (unused == 0 || !do_padding))
+    return;
+
+  do
+    {
+      if (buflen > 0 && (buflen + unused < blocksize || unused > 0))
+        {
+          n = blocksize - unused;
+          n = n < buflen ? n : buflen;
+
+          buf_cpy (&c->u_mode.gcm.macbuf[unused], buf, n);
+
+          unused += n;
+          buf += n;
+          buflen -= n;
+        }
+      if (!buflen)
+        {
+          if (!do_padding && unused < blocksize)
+           {
+             break;
+           }
+
+         n = blocksize - unused;
+         if (n > 0)
+           {
+             memset (&c->u_mode.gcm.macbuf[unused], 0, n);
+             unused = blocksize;
+           }
+        }
+
+      if (unused > 0)
+        {
+          gcry_assert (unused == blocksize);
+
+          /* Process one block from macbuf.  */
+          if (polyval_fn)
+            {
+              nburn = polyval_fn (c, hash, c->u_mode.gcm.macbuf, 1);
+            }
+          else
+            {
+              cipher_block_bswap (c->u_mode.gcm.macbuf, c->u_mode.gcm.macbuf,
+                                  blocksize);
+              nburn = ghash_fn (c, hash, c->u_mode.gcm.macbuf, 1);
+            }
+
+          burn = nburn > burn ? nburn : burn;
+          unused = 0;
+        }
+
+      nblocks = buflen / blocksize;
+
+      while (nblocks)
+        {
+          if (polyval_fn)
+            {
+              n = nblocks;
+              nburn = polyval_fn (c, hash, buf, n);
+            }
+          else
+            {
+              for (n = 0; n < (nblocks > 16 ? 16 : nblocks); n++)
+                cipher_block_bswap (tmp_blocks[n], buf + n * blocksize,
+                                    blocksize);
+
+              num_blks_used = n > num_blks_used ? n : num_blks_used;
+
+              nburn = ghash_fn (c, hash, tmp_blocks[0], n);
+            }
+
+          burn = nburn > burn ? nburn : burn;
+          buf += n * blocksize;
+          buflen -= n * blocksize;
+          nblocks -= n;
+        }
+    }
+  while (buflen > 0);
+
+  c->u_mode.gcm.mac_unused = unused;
+
+  if (num_blks_used)
+    wipememory (tmp_blocks, num_blks_used * blocksize);
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+
+static void
+do_ctr_le32 (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf,
+            size_t inbuflen)
+{
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  unsigned char tmp[GCRY_SIV_BLOCK_LEN];
+  unsigned int burn = 0, nburn;
+  size_t nblocks;
+
+  if (inbuflen == 0)
+    return;
+
+  /* Use a bulk method if available.  */
+  nblocks = inbuflen / GCRY_SIV_BLOCK_LEN;
+  if (nblocks && c->bulk.ctr32le_enc)
+    {
+      c->bulk.ctr32le_enc (c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks);
+      inbuf  += nblocks * GCRY_SIV_BLOCK_LEN;
+      outbuf += nblocks * GCRY_SIV_BLOCK_LEN;
+      inbuflen -= nblocks * GCRY_SIV_BLOCK_LEN;
+    }
+
+  do
+    {
+      nburn = enc_fn (c->context.c, tmp, c->u_ctr.ctr);
+      burn = nburn > burn ? nburn : burn;
+
+      buf_put_le32(c->u_ctr.ctr, buf_get_le32(c->u_ctr.ctr) + 1);
+
+      if (inbuflen < GCRY_SIV_BLOCK_LEN)
+       break;
+      cipher_block_xor(outbuf, inbuf, tmp, GCRY_SIV_BLOCK_LEN);
+
+      inbuflen -= GCRY_SIV_BLOCK_LEN;
+      outbuf += GCRY_SIV_BLOCK_LEN;
+      inbuf += GCRY_SIV_BLOCK_LEN;
+    }
+  while (inbuflen);
+
+  if (inbuflen)
+    {
+      buf_xor(outbuf, inbuf, tmp, inbuflen);
+
+      outbuf += inbuflen;
+      inbuf += inbuflen;
+      inbuflen -= inbuflen;
+    }
+
+  wipememory (tmp, sizeof(tmp));
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+}
+
+
+static int
+gcm_siv_selftest (gcry_cipher_hd_t c)
+{
+  static const byte in1[GCRY_SIV_BLOCK_LEN] =
+      "\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
+  static const byte out1[GCRY_SIV_BLOCK_LEN] =
+      "\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00";
+  static const byte in2[GCRY_SIV_BLOCK_LEN] =
+      "\x9c\x98\xc0\x4d\xf9\x38\x7d\xed\x82\x81\x75\xa9\x2b\xa6\x52\xd8";
+  static const byte out2[GCRY_SIV_BLOCK_LEN] =
+      "\x4e\x4c\x60\x26\xfc\x9c\x3e\xf6\xc1\x40\xba\xd4\x95\xd3\x29\x6c";
+  static const byte polyval_key[GCRY_SIV_BLOCK_LEN] =
+      "\x25\x62\x93\x47\x58\x92\x42\x76\x1d\x31\xf8\x26\xba\x4b\x75\x7b";
+  static const byte ghash_key[GCRY_SIV_BLOCK_LEN] =
+      "\xdc\xba\xa5\xdd\x13\x7c\x18\x8e\xbb\x21\x49\x2c\x23\xc9\xb1\x12";
+  static const byte polyval_data[GCRY_SIV_BLOCK_LEN * 2] =
+      "\x4f\x4f\x95\x66\x8c\x83\xdf\xb6\x40\x17\x62\xbb\x2d\x01\xa2\x62"
+      "\xd1\xa2\x4d\xdd\x27\x21\xd0\x06\xbb\xe4\x5f\x20\xd3\xc9\xf3\x62";
+  static const byte polyval_tag[GCRY_SIV_BLOCK_LEN] =
+      "\xf7\xa3\xb4\x7b\x84\x61\x19\xfa\xe5\xb7\x86\x6c\xf5\xe5\xb7\x7e";
+  byte tmp[GCRY_SIV_BLOCK_LEN];
+
+  /* Test mulx_ghash */
+  memcpy (tmp, in1, GCRY_SIV_BLOCK_LEN);
+  mulx_ghash (tmp);
+  if (memcmp (tmp, out1, GCRY_SIV_BLOCK_LEN) != 0)
+    return -1;
+
+  memcpy (tmp, in2, GCRY_SIV_BLOCK_LEN);
+  mulx_ghash (tmp);
+  if (memcmp (tmp, out2, GCRY_SIV_BLOCK_LEN) != 0)
+    return -1;
+
+  /* Test GHASH key generation */
+  memcpy (tmp, polyval_key, GCRY_SIV_BLOCK_LEN);
+  cipher_block_bswap (tmp, tmp, GCRY_SIV_BLOCK_LEN);
+  mulx_ghash (tmp);
+  if (memcmp (tmp, ghash_key, GCRY_SIV_BLOCK_LEN) != 0)
+    return -1;
+
+  /* Test POLYVAL */
+  memset (&c->u_mode.gcm, 0, sizeof(c->u_mode.gcm));
+  polyval_set_key (c, polyval_key);
+  memset (&tmp, 0, sizeof(tmp));
+  do_polyval_buf (c, tmp, polyval_data, GCRY_SIV_BLOCK_LEN * 2, 1);
+  cipher_block_bswap (tmp, tmp, GCRY_SIV_BLOCK_LEN);
+  if (memcmp (tmp, polyval_tag, GCRY_SIV_BLOCK_LEN) != 0)
+    return -1;
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_setkey (gcry_cipher_hd_t c, unsigned int keylen)
+{
+  static int done;
+
+  if (keylen != 16 && keylen != 32)
+    return GPG_ERR_INV_KEYLEN;
+
+  if (!done)
+    {
+      if (gcm_siv_selftest (c))
+       return GPG_ERR_SELFTEST_FAILED;
+
+      done = 1;
+    }
+
+  c->marks.iv = 0;
+  c->marks.tag = 0;
+  memset (&c->u_mode.gcm, 0, sizeof(c->u_mode.gcm));
+  c->u_mode.gcm.siv_keylen = keylen;
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_set_nonce (gcry_cipher_hd_t c, const byte *iv,
+                               size_t ivlen)
+{
+  byte auth_key[GCRY_SIV_BLOCK_LEN];
+  byte tmp_in[GCRY_SIV_BLOCK_LEN];
+  byte tmp[GCRY_SIV_BLOCK_LEN];
+  byte enc_key[32];
+  gcry_err_code_t err;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (ivlen != GCM_SIV_NONCE_LENGTH)
+    return GPG_ERR_INV_ARG;
+  if (c->u_mode.gcm.siv_keylen == 0)
+    return GPG_ERR_INV_STATE;
+  if (c->marks.iv)
+    {
+      /* If nonce is already set, use cipher_reset or setkey first to reset
+       * cipher state. */
+      return GPG_ERR_INV_STATE;
+    }
+
+  memset (c->u_mode.gcm.aadlen, 0, sizeof(c->u_mode.gcm.aadlen));
+  memset (c->u_mode.gcm.datalen, 0, sizeof(c->u_mode.gcm.datalen));
+  memset (c->u_mode.gcm.u_tag.tag, 0, sizeof(c->u_mode.gcm.u_tag.tag));
+  c->u_mode.gcm.datalen_over_limits = 0;
+  c->u_mode.gcm.ghash_data_finalized = 0;
+  c->u_mode.gcm.ghash_aad_finalized = 0;
+
+  memset (c->u_iv.iv, 0, GCRY_SIV_BLOCK_LEN);
+  memcpy (c->u_iv.iv, iv, ivlen);
+  memcpy (tmp_in + 4, iv, ivlen);
+
+  /* Derive message authentication key */
+  buf_put_le32(tmp_in, 0);
+  c->spec->encrypt (&c->context.c, tmp, tmp_in);
+  memcpy (auth_key + 0, tmp, 8);
+
+  buf_put_le32(tmp_in, 1);
+  c->spec->encrypt (&c->context.c, tmp, tmp_in);
+  memcpy (auth_key + 8, tmp, 8);
+
+  polyval_set_key (c, auth_key);
+  wipememory (auth_key, sizeof(auth_key));
+
+  /* Derive message encryption key */
+  buf_put_le32(tmp_in, 2);
+  c->spec->encrypt (&c->context.c, tmp, tmp_in);
+  memcpy (enc_key + 0, tmp, 8);
+
+  buf_put_le32(tmp_in, 3);
+  c->spec->encrypt (&c->context.c, tmp, tmp_in);
+  memcpy (enc_key + 8, tmp, 8);
+
+  if (c->u_mode.gcm.siv_keylen >= 24)
+    {
+      buf_put_le32(tmp_in, 4);
+      c->spec->encrypt (&c->context.c, tmp, tmp_in);
+      memcpy (enc_key + 16, tmp, 8);
+    }
+
+  if (c->u_mode.gcm.siv_keylen >= 32)
+    {
+      buf_put_le32(tmp_in, 5);
+      c->spec->encrypt (&c->context.c, tmp, tmp_in);
+      memcpy (enc_key + 24, tmp, 8);
+    }
+
+  wipememory (tmp, sizeof(tmp));
+  wipememory (tmp_in, sizeof(tmp_in));
+
+  err = c->spec->setkey (&c->context.c, enc_key, c->u_mode.gcm.siv_keylen,
+                        &c->bulk);
+  wipememory (enc_key, sizeof(enc_key));
+  if (err)
+    return err;
+
+  c->marks.iv = 1;
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_authenticate (gcry_cipher_hd_t c,
+                                  const byte *aadbuf, size_t aadbuflen)
+{
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (c->marks.tag
+      || !c->marks.iv
+      || c->u_mode.gcm.ghash_aad_finalized
+      || c->u_mode.gcm.ghash_data_finalized
+      || !c->u_mode.gcm.ghash_fn)
+    return GPG_ERR_INV_STATE;
+
+  gcm_siv_bytecounter_add (c->u_mode.gcm.aadlen, aadbuflen);
+  if (!gcm_siv_check_len (c->u_mode.gcm.aadlen))
+    {
+      c->u_mode.gcm.datalen_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+  do_polyval_buf (c, c->u_mode.gcm.u_tag.tag, aadbuf, aadbuflen, 0);
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_encrypt (gcry_cipher_hd_t c,
+                             byte *outbuf, size_t outbuflen,
+                             const byte *inbuf, size_t inbuflen)
+{
+  u32 bitlengths[2][2];
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (c->marks.tag
+      || !c->marks.iv
+      || c->u_mode.gcm.ghash_data_finalized
+      || !c->u_mode.gcm.ghash_fn)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->u_mode.gcm.ghash_aad_finalized)
+    {
+      /* Start of encryption marks end of AAD stream. */
+      do_polyval_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+      c->u_mode.gcm.ghash_aad_finalized = 1;
+    }
+
+  gcm_siv_bytecounter_add (c->u_mode.gcm.datalen, inbuflen);
+  if (!gcm_siv_check_len (c->u_mode.gcm.datalen))
+    {
+      c->u_mode.gcm.datalen_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+  /* Plaintext and padding to POLYVAL. */
+  do_polyval_buf (c, c->u_mode.gcm.u_tag.tag, inbuf, inbuflen, 1);
+  c->u_mode.gcm.ghash_data_finalized = 1;
+
+  /* aad length */
+  bitlengths[0][0] = le_bswap32(c->u_mode.gcm.aadlen[0] << 3);
+  bitlengths[0][1] = le_bswap32((c->u_mode.gcm.aadlen[0] >> 29) |
+                                (c->u_mode.gcm.aadlen[1] << 3));
+  /* data length */
+  bitlengths[1][0] = le_bswap32(c->u_mode.gcm.datalen[0] << 3);
+  bitlengths[1][1] = le_bswap32((c->u_mode.gcm.datalen[0] >> 29) |
+                                (c->u_mode.gcm.datalen[1] << 3));
+
+  /* Length block to POLYVAL. */
+  do_polyval_buf(c, c->u_mode.gcm.u_tag.tag, (byte *)bitlengths,
+                GCRY_SIV_BLOCK_LEN, 1);
+  wipememory (bitlengths, sizeof(bitlengths));
+
+  /* Prepare tag and counter. */
+  cipher_block_bswap (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.u_tag.tag,
+                     GCRY_SIV_BLOCK_LEN);
+  cipher_block_xor (c->u_mode.gcm.tagiv, c->u_iv.iv, c->u_mode.gcm.u_tag.tag,
+                   GCRY_SIV_BLOCK_LEN);
+  c->u_mode.gcm.tagiv[GCRY_SIV_BLOCK_LEN - 1] &= 0x7f;
+  c->spec->encrypt (&c->context.c, c->u_mode.gcm.tagiv, c->u_mode.gcm.tagiv);
+  c->marks.tag = 1;
+  memcpy (c->u_ctr.ctr, c->u_mode.gcm.tagiv, GCRY_SIV_BLOCK_LEN);
+  c->u_ctr.ctr[GCRY_SIV_BLOCK_LEN - 1] |= 0x80;
+
+  /* Encrypt data */
+  do_ctr_le32 (c, outbuf, inbuf, inbuflen);
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_set_decryption_tag (gcry_cipher_hd_t c,
+                                        const byte *tag, size_t taglen)
+{
+  if (taglen != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_INV_ARG;
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  memcpy (c->u_mode.gcm.tagiv, tag, GCRY_SIV_BLOCK_LEN);
+  c->marks.tag = 1;
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_decrypt (gcry_cipher_hd_t c,
+                             byte *outbuf, size_t outbuflen,
+                             const byte *inbuf, size_t inbuflen)
+{
+  byte expected_tag[GCRY_SIV_BLOCK_LEN];
+  u32 bitlengths[2][2];
+  gcry_err_code_t rc = 0;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (!c->marks.tag
+      || !c->marks.iv
+      || c->u_mode.gcm.ghash_data_finalized
+      || !c->u_mode.gcm.ghash_fn)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->u_mode.gcm.ghash_aad_finalized)
+    {
+      /* Start of encryption marks end of AAD stream. */
+      do_polyval_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+      c->u_mode.gcm.ghash_aad_finalized = 1;
+    }
+
+  gcm_siv_bytecounter_add (c->u_mode.gcm.datalen, inbuflen);
+  if (!gcm_siv_check_len (c->u_mode.gcm.datalen))
+    {
+      c->u_mode.gcm.datalen_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+  /* Prepare counter. */
+  memcpy (c->u_ctr.ctr, c->u_mode.gcm.tagiv, GCRY_SIV_BLOCK_LEN);
+  c->u_ctr.ctr[GCRY_SIV_BLOCK_LEN - 1] |= 0x80;
+
+  /* Decrypt data. */
+  do_ctr_le32 (c, outbuf, inbuf, inbuflen);
+
+  /* Plaintext and padding to POLYVAL. */
+  do_polyval_buf (c, c->u_mode.gcm.u_tag.tag, outbuf, inbuflen, 1);
+  c->u_mode.gcm.ghash_data_finalized = 1;
+
+  /* aad length */
+  bitlengths[0][0] = le_bswap32(c->u_mode.gcm.aadlen[0] << 3);
+  bitlengths[0][1] = le_bswap32((c->u_mode.gcm.aadlen[0] >> 29) |
+                                (c->u_mode.gcm.aadlen[1] << 3));
+  /* data length */
+  bitlengths[1][0] = le_bswap32(c->u_mode.gcm.datalen[0] << 3);
+  bitlengths[1][1] = le_bswap32((c->u_mode.gcm.datalen[0] >> 29) |
+                                (c->u_mode.gcm.datalen[1] << 3));
+
+  /* Length block to POLYVAL. */
+  do_polyval_buf(c, c->u_mode.gcm.u_tag.tag, (byte *)bitlengths,
+                GCRY_SIV_BLOCK_LEN, 1);
+  wipememory (bitlengths, sizeof(bitlengths));
+
+  /* Prepare tag. */
+  cipher_block_bswap (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.u_tag.tag,
+                     GCRY_SIV_BLOCK_LEN);
+  cipher_block_xor (expected_tag, c->u_iv.iv, c->u_mode.gcm.u_tag.tag,
+                   GCRY_SIV_BLOCK_LEN);
+  expected_tag[GCRY_SIV_BLOCK_LEN - 1] &= 0x7f;
+  c->spec->encrypt (&c->context.c, expected_tag, expected_tag);
+
+  if (!buf_eq_const(c->u_mode.gcm.tagiv, expected_tag, GCRY_SIV_BLOCK_LEN))
+    {
+      wipememory (outbuf, inbuflen);
+      rc = GPG_ERR_CHECKSUM;
+    }
+
+  wipememory (expected_tag, sizeof(expected_tag));
+  return rc;
+}
+
+
+static gcry_err_code_t
+_gcry_cipher_gcm_siv_tag (gcry_cipher_hd_t c,
+                         byte * outbuf, size_t outbuflen, int check)
+{
+  gcry_err_code_t err;
+
+  if (!c->marks.tag)
+    {
+      if (!c->u_mode.gcm.ghash_fn)
+        return GPG_ERR_INV_STATE;
+
+      if (!c->marks.tag)
+        {
+          /* Finalize GCM-SIV with zero-length plaintext. */
+          err = _gcry_cipher_gcm_siv_encrypt (c, NULL, 0, NULL, 0);
+          if (err != 0)
+            return err;
+        }
+    }
+
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (!c->u_mode.gcm.ghash_data_finalized)
+    return GPG_ERR_INV_STATE;
+  if (!c->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  if (!check)
+    {
+      if (outbuflen > GCRY_SIV_BLOCK_LEN)
+        outbuflen = GCRY_SIV_BLOCK_LEN;
+
+      /* NB: We already checked that OUTBUF is large enough to hold
+       * the result or has valid truncated length.  */
+      memcpy (outbuf, c->u_mode.gcm.tagiv, outbuflen);
+    }
+  else
+    {
+      /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF
+       * and thus we need to compare its length first.  */
+      if (outbuflen != GCRY_SIV_BLOCK_LEN
+          || !buf_eq_const (outbuf, c->u_mode.gcm.tagiv, outbuflen))
+        return GPG_ERR_CHECKSUM;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+                             size_t taglen)
+{
+  return _gcry_cipher_gcm_siv_tag (c, outtag, taglen, 0);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_siv_check_tag (gcry_cipher_hd_t c,
+                                  const unsigned char *intag,
+                                  size_t taglen)
+{
+  return _gcry_cipher_gcm_siv_tag (c, (unsigned char *)intag, taglen, 1);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-gcm.c 
b/grub-core/lib/libgcrypt/cipher/cipher-gcm.c
new file mode 100644
index 000000000..fc79986e5
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-gcm.c
@@ -0,0 +1,1263 @@
+/* cipher-gcm.c  - Generic Galois Counter Mode implementation
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ * Copyright (C) 2013, 2018-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+/* Helper macro to force alignment to 16 or 64 bytes.  */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_64  __attribute__ ((aligned (64)))
+#else
+# define ATTR_ALIGNED_64
+#endif
+
+
+#ifdef GCM_USE_INTEL_PCLMUL
+extern void _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c);
+
+extern unsigned int _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result,
+                                              const byte *buf, size_t nblocks);
+
+extern unsigned int _gcry_polyval_intel_pclmul (gcry_cipher_hd_t c,
+                                                byte *result,
+                                                const byte *buf,
+                                                size_t nblocks);
+#endif
+
+#ifdef GCM_USE_ARM_PMULL
+extern void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table);
+
+extern unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result,
+                                                const byte *buf, size_t 
nblocks,
+                                                void *gcm_table);
+
+extern unsigned int _gcry_polyval_armv8_ce_pmull (void *gcm_key, byte *result,
+                                                  const byte *buf,
+                                                  size_t nblocks,
+                                                  void *gcm_table);
+
+static void
+ghash_setup_armv8_ce_pmull (gcry_cipher_hd_t c)
+{
+  _gcry_ghash_setup_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key,
+                                   c->u_mode.gcm.gcm_table);
+}
+
+static unsigned int
+ghash_armv8_ce_pmull (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                      size_t nblocks)
+{
+  return _gcry_ghash_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, result, buf,
+                                    nblocks, c->u_mode.gcm.gcm_table);
+}
+
+static unsigned int
+polyval_armv8_ce_pmull (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                        size_t nblocks)
+{
+  return _gcry_polyval_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, result,
+                                      buf, nblocks, c->u_mode.gcm.gcm_table);
+}
+#endif /* GCM_USE_ARM_PMULL */
+
+#ifdef GCM_USE_ARM_NEON
+extern void _gcry_ghash_setup_armv7_neon (void *gcm_key);
+
+extern unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result,
+                                           const byte *buf, size_t nblocks);
+
+static void
+ghash_setup_armv7_neon (gcry_cipher_hd_t c)
+{
+  _gcry_ghash_setup_armv7_neon(c->u_mode.gcm.u_ghash_key.key);
+}
+
+static unsigned int
+ghash_armv7_neon (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                 size_t nblocks)
+{
+  return _gcry_ghash_armv7_neon(c->u_mode.gcm.u_ghash_key.key, result, buf,
+                               nblocks);
+}
+#endif /* GCM_USE_ARM_NEON */
+
+#ifdef GCM_USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+ghash_s390x_kimd (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                 size_t nblocks)
+{
+  u128_t params[2];
+
+  memcpy (&params[0], result, 16);
+  memcpy (&params[1], c->u_mode.gcm.u_ghash_key.key, 16);
+
+  kimd_execute (KMID_FUNCTION_GHASH, &params, buf, nblocks * 16);
+
+  memcpy (result, &params[0], 16);
+  wipememory (params, sizeof(params));
+  return 0;
+}
+#endif /* GCM_USE_S390X_CRYPTO*/
+
+#ifdef GCM_USE_PPC_VPMSUM
+extern void _gcry_ghash_setup_ppc_vpmsum (void *gcm_table, void *gcm_key);
+
+/* result is 128-bits */
+extern unsigned int _gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table,
+                                           const byte *buf, size_t nblocks);
+
+static void
+ghash_setup_ppc_vpmsum (gcry_cipher_hd_t c)
+{
+  _gcry_ghash_setup_ppc_vpmsum(c->u_mode.gcm.gcm_table,
+                              c->u_mode.gcm.u_ghash_key.key);
+}
+
+static unsigned int
+ghash_ppc_vpmsum (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                 size_t nblocks)
+{
+  return _gcry_ghash_ppc_vpmsum(result, c->u_mode.gcm.gcm_table, buf,
+                               nblocks);
+}
+#endif /* GCM_USE_PPC_VPMSUM */
+
+#ifdef GCM_USE_TABLES
+static struct
+{
+  volatile u32 counter_head;
+  u32 cacheline_align[64 / 4 - 1];
+  u16 R[256];
+  volatile u32 counter_tail;
+} gcm_table ATTR_ALIGNED_64 =
+  {
+    0,
+    { 0, },
+    {
+      0x0000, 0x01c2, 0x0384, 0x0246, 0x0708, 0x06ca, 0x048c, 0x054e,
+      0x0e10, 0x0fd2, 0x0d94, 0x0c56, 0x0918, 0x08da, 0x0a9c, 0x0b5e,
+      0x1c20, 0x1de2, 0x1fa4, 0x1e66, 0x1b28, 0x1aea, 0x18ac, 0x196e,
+      0x1230, 0x13f2, 0x11b4, 0x1076, 0x1538, 0x14fa, 0x16bc, 0x177e,
+      0x3840, 0x3982, 0x3bc4, 0x3a06, 0x3f48, 0x3e8a, 0x3ccc, 0x3d0e,
+      0x3650, 0x3792, 0x35d4, 0x3416, 0x3158, 0x309a, 0x32dc, 0x331e,
+      0x2460, 0x25a2, 0x27e4, 0x2626, 0x2368, 0x22aa, 0x20ec, 0x212e,
+      0x2a70, 0x2bb2, 0x29f4, 0x2836, 0x2d78, 0x2cba, 0x2efc, 0x2f3e,
+      0x7080, 0x7142, 0x7304, 0x72c6, 0x7788, 0x764a, 0x740c, 0x75ce,
+      0x7e90, 0x7f52, 0x7d14, 0x7cd6, 0x7998, 0x785a, 0x7a1c, 0x7bde,
+      0x6ca0, 0x6d62, 0x6f24, 0x6ee6, 0x6ba8, 0x6a6a, 0x682c, 0x69ee,
+      0x62b0, 0x6372, 0x6134, 0x60f6, 0x65b8, 0x647a, 0x663c, 0x67fe,
+      0x48c0, 0x4902, 0x4b44, 0x4a86, 0x4fc8, 0x4e0a, 0x4c4c, 0x4d8e,
+      0x46d0, 0x4712, 0x4554, 0x4496, 0x41d8, 0x401a, 0x425c, 0x439e,
+      0x54e0, 0x5522, 0x5764, 0x56a6, 0x53e8, 0x522a, 0x506c, 0x51ae,
+      0x5af0, 0x5b32, 0x5974, 0x58b6, 0x5df8, 0x5c3a, 0x5e7c, 0x5fbe,
+      0xe100, 0xe0c2, 0xe284, 0xe346, 0xe608, 0xe7ca, 0xe58c, 0xe44e,
+      0xef10, 0xeed2, 0xec94, 0xed56, 0xe818, 0xe9da, 0xeb9c, 0xea5e,
+      0xfd20, 0xfce2, 0xfea4, 0xff66, 0xfa28, 0xfbea, 0xf9ac, 0xf86e,
+      0xf330, 0xf2f2, 0xf0b4, 0xf176, 0xf438, 0xf5fa, 0xf7bc, 0xf67e,
+      0xd940, 0xd882, 0xdac4, 0xdb06, 0xde48, 0xdf8a, 0xddcc, 0xdc0e,
+      0xd750, 0xd692, 0xd4d4, 0xd516, 0xd058, 0xd19a, 0xd3dc, 0xd21e,
+      0xc560, 0xc4a2, 0xc6e4, 0xc726, 0xc268, 0xc3aa, 0xc1ec, 0xc02e,
+      0xcb70, 0xcab2, 0xc8f4, 0xc936, 0xcc78, 0xcdba, 0xcffc, 0xce3e,
+      0x9180, 0x9042, 0x9204, 0x93c6, 0x9688, 0x974a, 0x950c, 0x94ce,
+      0x9f90, 0x9e52, 0x9c14, 0x9dd6, 0x9898, 0x995a, 0x9b1c, 0x9ade,
+      0x8da0, 0x8c62, 0x8e24, 0x8fe6, 0x8aa8, 0x8b6a, 0x892c, 0x88ee,
+      0x83b0, 0x8272, 0x8034, 0x81f6, 0x84b8, 0x857a, 0x873c, 0x86fe,
+      0xa9c0, 0xa802, 0xaa44, 0xab86, 0xaec8, 0xaf0a, 0xad4c, 0xac8e,
+      0xa7d0, 0xa612, 0xa454, 0xa596, 0xa0d8, 0xa11a, 0xa35c, 0xa29e,
+      0xb5e0, 0xb422, 0xb664, 0xb7a6, 0xb2e8, 0xb32a, 0xb16c, 0xb0ae,
+      0xbbf0, 0xba32, 0xb874, 0xb9b6, 0xbcf8, 0xbd3a, 0xbf7c, 0xbebe,
+    },
+    0
+  };
+
+#define gcmR gcm_table.R
+
+static inline
+void prefetch_table(const void *tab, size_t len)
+{
+  const volatile byte *vtab = tab;
+  size_t i;
+
+  for (i = 0; len - i >= 8 * 32; i += 8 * 32)
+    {
+      (void)vtab[i + 0 * 32];
+      (void)vtab[i + 1 * 32];
+      (void)vtab[i + 2 * 32];
+      (void)vtab[i + 3 * 32];
+      (void)vtab[i + 4 * 32];
+      (void)vtab[i + 5 * 32];
+      (void)vtab[i + 6 * 32];
+      (void)vtab[i + 7 * 32];
+    }
+  for (; i < len; i += 32)
+    {
+      (void)vtab[i];
+    }
+
+  (void)vtab[len - 1];
+}
+
+static inline void
+do_prefetch_tables (const void *gcmM, size_t gcmM_size)
+{
+  /* Modify counters to trigger copy-on-write and unsharing if physical pages
+   * of look-up table are shared between processes.  Modifying counters also
+   * causes checksums for pages to change and hint same-page merging algorithm
+   * that these pages are frequently changing.  */
+  gcm_table.counter_head++;
+  gcm_table.counter_tail++;
+
+  /* Prefetch look-up tables to cache.  */
+  prefetch_table(gcmM, gcmM_size);
+  prefetch_table(&gcm_table, sizeof(gcm_table));
+}
+
+#ifdef GCM_TABLES_USE_U64
+static void
+bshift (u64 * b0, u64 * b1)
+{
+  u64 t[2], mask;
+
+  t[0] = *b0;
+  t[1] = *b1;
+  mask = -(t[1] & 1) & 0xe1;
+  mask <<= 56;
+
+  *b1 = (t[1] >> 1) ^ (t[0] << 63);
+  *b0 = (t[0] >> 1) ^ mask;
+}
+
+static void
+do_fillM (unsigned char *h, u64 *M)
+{
+  int i, j;
+
+  M[0 + 0] = 0;
+  M[0 + 16] = 0;
+
+  M[8 + 0] = buf_get_be64 (h + 0);
+  M[8 + 16] = buf_get_be64 (h + 8);
+
+  for (i = 4; i > 0; i /= 2)
+    {
+      M[i + 0] = M[2 * i + 0];
+      M[i + 16] = M[2 * i + 16];
+
+      bshift (&M[i], &M[i + 16]);
+    }
+
+  for (i = 2; i < 16; i *= 2)
+    for (j = 1; j < i; j++)
+      {
+        M[(i + j) + 0] = M[i + 0] ^ M[j + 0];
+        M[(i + j) + 16] = M[i + 16] ^ M[j + 16];
+      }
+
+  for (i = 0; i < 16; i++)
+    {
+      M[i + 32] = (M[i + 0] >> 4) ^ ((u64) gcmR[(M[i + 16] & 0xf) << 4] << 48);
+      M[i + 48] = (M[i + 16] >> 4) ^ (M[i + 0] << 60);
+    }
+}
+
+static inline unsigned int
+do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM)
+{
+  u64 V[2];
+  u64 tmp[2];
+  const u64 *M;
+  u64 T;
+  u32 A;
+  int i;
+
+  cipher_block_xor (V, result, buf, 16);
+  V[0] = be_bswap64 (V[0]);
+  V[1] = be_bswap64 (V[1]);
+
+  /* First round can be manually tweaked based on fact that 'tmp' is zero. */
+  M = &gcmM[(V[1] & 0xf) + 32];
+  V[1] >>= 4;
+  tmp[0] = M[0];
+  tmp[1] = M[16];
+  tmp[0] ^= gcmM[(V[1] & 0xf) + 0];
+  tmp[1] ^= gcmM[(V[1] & 0xf) + 16];
+  V[1] >>= 4;
+
+  i = 6;
+  while (1)
+    {
+      M = &gcmM[(V[1] & 0xf) + 32];
+      V[1] >>= 4;
+
+      A = tmp[1] & 0xff;
+      T = tmp[0];
+      tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[1] & 0xf) + 0];
+      tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[1] & 0xf) + 16];
+
+      tmp[0] ^= M[0];
+      tmp[1] ^= M[16];
+
+      if (i == 0)
+        break;
+
+      V[1] >>= 4;
+      --i;
+    }
+
+  i = 7;
+  while (1)
+    {
+      M = &gcmM[(V[0] & 0xf) + 32];
+      V[0] >>= 4;
+
+      A = tmp[1] & 0xff;
+      T = tmp[0];
+      tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[0] & 0xf) + 0];
+      tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[0] & 0xf) + 16];
+
+      tmp[0] ^= M[0];
+      tmp[1] ^= M[16];
+
+      if (i == 0)
+        break;
+
+      V[0] >>= 4;
+      --i;
+    }
+
+  buf_put_be64 (result + 0, tmp[0]);
+  buf_put_be64 (result + 8, tmp[1]);
+
+  return (sizeof(V) + sizeof(T) + sizeof(tmp) +
+          sizeof(int)*2 + sizeof(void*)*5);
+}
+
+#else /*!GCM_TABLES_USE_U64*/
+
+static void
+bshift (u32 * M, int i)
+{
+  u32 t[4], mask;
+
+  t[0] = M[i * 4 + 0];
+  t[1] = M[i * 4 + 1];
+  t[2] = M[i * 4 + 2];
+  t[3] = M[i * 4 + 3];
+  mask = -(t[3] & 1) & 0xe1;
+
+  M[i * 4 + 3] = (t[3] >> 1) ^ (t[2] << 31);
+  M[i * 4 + 2] = (t[2] >> 1) ^ (t[1] << 31);
+  M[i * 4 + 1] = (t[1] >> 1) ^ (t[0] << 31);
+  M[i * 4 + 0] = (t[0] >> 1) ^ (mask << 24);
+}
+
+static void
+do_fillM (unsigned char *h, u32 *M)
+{
+  int i, j;
+
+  M[0 * 4 + 0] = 0;
+  M[0 * 4 + 1] = 0;
+  M[0 * 4 + 2] = 0;
+  M[0 * 4 + 3] = 0;
+
+  M[8 * 4 + 0] = buf_get_be32 (h + 0);
+  M[8 * 4 + 1] = buf_get_be32 (h + 4);
+  M[8 * 4 + 2] = buf_get_be32 (h + 8);
+  M[8 * 4 + 3] = buf_get_be32 (h + 12);
+
+  for (i = 4; i > 0; i /= 2)
+    {
+      M[i * 4 + 0] = M[2 * i * 4 + 0];
+      M[i * 4 + 1] = M[2 * i * 4 + 1];
+      M[i * 4 + 2] = M[2 * i * 4 + 2];
+      M[i * 4 + 3] = M[2 * i * 4 + 3];
+
+      bshift (M, i);
+    }
+
+  for (i = 2; i < 16; i *= 2)
+    for (j = 1; j < i; j++)
+      {
+        M[(i + j) * 4 + 0] = M[i * 4 + 0] ^ M[j * 4 + 0];
+        M[(i + j) * 4 + 1] = M[i * 4 + 1] ^ M[j * 4 + 1];
+        M[(i + j) * 4 + 2] = M[i * 4 + 2] ^ M[j * 4 + 2];
+        M[(i + j) * 4 + 3] = M[i * 4 + 3] ^ M[j * 4 + 3];
+      }
+
+  for (i = 0; i < 4 * 16; i += 4)
+    {
+      M[i + 0 + 64] = (M[i + 0] >> 4)
+                      ^ ((u64) gcmR[(M[i + 3] << 4) & 0xf0] << 16);
+      M[i + 1 + 64] = (M[i + 1] >> 4) ^ (M[i + 0] << 28);
+      M[i + 2 + 64] = (M[i + 2] >> 4) ^ (M[i + 1] << 28);
+      M[i + 3 + 64] = (M[i + 3] >> 4) ^ (M[i + 2] << 28);
+    }
+}
+
+static inline unsigned int
+do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM)
+{
+  byte V[16];
+  u32 tmp[4];
+  u32 v;
+  const u32 *M, *m;
+  u32 T[3];
+  int i;
+
+  cipher_block_xor (V, result, buf, 16); /* V is big-endian */
+
+  /* First round can be manually tweaked based on fact that 'tmp' is zero. */
+  i = 15;
+
+  v = V[i];
+  M = &gcmM[(v & 0xf) * 4 + 64];
+  v = (v & 0xf0) >> 4;
+  m = &gcmM[v * 4];
+  v = V[--i];
+
+  tmp[0] = M[0] ^ m[0];
+  tmp[1] = M[1] ^ m[1];
+  tmp[2] = M[2] ^ m[2];
+  tmp[3] = M[3] ^ m[3];
+
+  while (1)
+    {
+      M = &gcmM[(v & 0xf) * 4 + 64];
+      v = (v & 0xf0) >> 4;
+      m = &gcmM[v * 4];
+
+      T[0] = tmp[0];
+      T[1] = tmp[1];
+      T[2] = tmp[2];
+      tmp[0] = (T[0] >> 8) ^ ((u32) gcmR[tmp[3] & 0xff] << 16) ^ m[0];
+      tmp[1] = (T[0] << 24) ^ (tmp[1] >> 8) ^ m[1];
+      tmp[2] = (T[1] << 24) ^ (tmp[2] >> 8) ^ m[2];
+      tmp[3] = (T[2] << 24) ^ (tmp[3] >> 8) ^ m[3];
+
+      tmp[0] ^= M[0];
+      tmp[1] ^= M[1];
+      tmp[2] ^= M[2];
+      tmp[3] ^= M[3];
+
+      if (i == 0)
+        break;
+
+      v = V[--i];
+    }
+
+  buf_put_be32 (result + 0, tmp[0]);
+  buf_put_be32 (result + 4, tmp[1]);
+  buf_put_be32 (result + 8, tmp[2]);
+  buf_put_be32 (result + 12, tmp[3]);
+
+  return (sizeof(V) + sizeof(T) + sizeof(tmp) +
+          sizeof(int)*2 + sizeof(void*)*6);
+}
+#endif /*!GCM_TABLES_USE_U64*/
+
+#define fillM(c) \
+  do_fillM (c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.gcm_table)
+#define GHASH(c, result, buf) do_ghash (result, buf, c->u_mode.gcm.gcm_table)
+#define prefetch_tables(c) \
+  do_prefetch_tables(c->u_mode.gcm.gcm_table, sizeof(c->u_mode.gcm.gcm_table))
+
+#else
+
+static unsigned long
+bshift (unsigned long *b)
+{
+  unsigned long c;
+  int i;
+  c = b[3] & 1;
+  for (i = 3; i > 0; i--)
+    {
+      b[i] = (b[i] >> 1) | (b[i - 1] << 31);
+    }
+  b[i] >>= 1;
+  return c;
+}
+
+static unsigned int
+do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf)
+{
+  unsigned long V[4];
+  int i, j;
+  byte *p;
+
+#ifdef WORDS_BIGENDIAN
+  p = result;
+#else
+  unsigned long T[4];
+
+  cipher_block_xor (V, result, buf, 16);
+  for (i = 0; i < 4; i++)
+    {
+      V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8;
+      V[i] = (V[i] & 0x0000ffff) << 16 | (V[i] & 0xffff0000) >> 16;
+    }
+  p = (byte *) T;
+#endif
+
+  memset (p, 0, 16);
+
+  for (i = 0; i < 16; i++)
+    {
+      for (j = 0x80; j; j >>= 1)
+        {
+          if (hsub[i] & j)
+            cipher_block_xor (p, p, V, 16);
+          if (bshift (V))
+            V[0] ^= 0xe1000000;
+        }
+    }
+#ifndef WORDS_BIGENDIAN
+  for (i = 0, p = (byte *) T; i < 16; i += 4, p += 4)
+    {
+      result[i + 0] = p[3];
+      result[i + 1] = p[2];
+      result[i + 2] = p[1];
+      result[i + 3] = p[0];
+    }
+#endif
+
+  return (sizeof(V) + sizeof(T) + sizeof(int)*2 + sizeof(void*)*5);
+}
+
+#define fillM(c) do { } while (0)
+#define GHASH(c, result, buf) do_ghash (c->u_mode.gcm.u_ghash_key.key, result, 
buf)
+#define prefetch_tables(c) do {} while (0)
+
+#endif /* !GCM_USE_TABLES */
+
+
+static unsigned int
+ghash_internal (gcry_cipher_hd_t c, byte *result, const byte *buf,
+                size_t nblocks)
+{
+  const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+  unsigned int burn = 0;
+
+  prefetch_tables (c);
+
+  while (nblocks)
+    {
+      burn = GHASH (c, result, buf);
+      buf += blocksize;
+      nblocks--;
+    }
+
+  return burn + (burn ? 5*sizeof(void*) : 0);
+}
+
+
+static void
+setupM (gcry_cipher_hd_t c)
+{
+  unsigned int features = _gcry_get_hw_features ();
+
+  c->u_mode.gcm.ghash_fn = NULL;
+  c->u_mode.gcm.polyval_fn = NULL;
+
+  if (0)
+    {
+      (void)features;
+    }
+#ifdef GCM_USE_INTEL_PCLMUL
+  else if (features & HWF_INTEL_PCLMUL)
+    {
+      c->u_mode.gcm.ghash_fn = _gcry_ghash_intel_pclmul;
+      c->u_mode.gcm.polyval_fn = _gcry_polyval_intel_pclmul;
+      _gcry_ghash_setup_intel_pclmul (c);
+    }
+#endif
+#ifdef GCM_USE_ARM_PMULL
+  else if (features & HWF_ARM_PMULL)
+    {
+      c->u_mode.gcm.ghash_fn = ghash_armv8_ce_pmull;
+      c->u_mode.gcm.polyval_fn = polyval_armv8_ce_pmull;
+      ghash_setup_armv8_ce_pmull (c);
+    }
+#endif
+#ifdef GCM_USE_ARM_NEON
+  else if (features & HWF_ARM_NEON)
+    {
+      c->u_mode.gcm.ghash_fn = ghash_armv7_neon;
+      ghash_setup_armv7_neon (c);
+    }
+#endif
+#ifdef GCM_USE_PPC_VPMSUM
+  else if (features & HWF_PPC_VCRYPTO)
+    {
+      c->u_mode.gcm.ghash_fn = ghash_ppc_vpmsum;
+      ghash_setup_ppc_vpmsum (c);
+    }
+#endif
+#ifdef GCM_USE_S390X_CRYPTO
+  else if (features & HWF_S390X_MSA)
+    {
+      if (kimd_query () & km_function_to_mask (KMID_FUNCTION_GHASH))
+       {
+         c->u_mode.gcm.ghash_fn = ghash_s390x_kimd;
+       }
+    }
+#endif
+
+  if (c->u_mode.gcm.ghash_fn == NULL)
+    {
+      c->u_mode.gcm.ghash_fn = ghash_internal;
+      fillM (c);
+    }
+}
+
+
+static inline void
+gcm_bytecounter_add (u32 ctr[2], size_t add)
+{
+  if (sizeof(add) > sizeof(u32))
+    {
+      u32 high_add = ((add >> 31) >> 1) & 0xffffffff;
+      ctr[1] += high_add;
+    }
+
+  ctr[0] += add;
+  if (ctr[0] >= add)
+    return;
+  ++ctr[1];
+}
+
+
+static inline u32
+gcm_add32_be128 (byte *ctr, unsigned int add)
+{
+  /* 'ctr' must be aligned to four bytes. */
+  const unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+  u32 *pval = (u32 *)(void *)(ctr + blocksize - sizeof(u32));
+  u32 val;
+
+  val = be_bswap32(*pval) + add;
+  *pval = be_bswap32(val);
+
+  return val; /* return result as host-endian value */
+}
+
+
+static inline int
+gcm_check_datalen (u32 ctr[2])
+{
+  /* len(plaintext) <= 2^39-256 bits == 2^36-32 bytes == 2^32-2 blocks */
+  if (ctr[1] > 0xfU)
+    return 0;
+  if (ctr[1] < 0xfU)
+    return 1;
+
+  if (ctr[0] <= 0xffffffe0U)
+    return 1;
+
+  return 0;
+}
+
+
+static inline int
+gcm_check_aadlen_or_ivlen (u32 ctr[2])
+{
+  /* len(aad/iv) <= 2^64-1 bits ~= 2^61-1 bytes */
+  if (ctr[1] > 0x1fffffffU)
+    return 0;
+  if (ctr[1] < 0x1fffffffU)
+    return 1;
+
+  if (ctr[0] <= 0xffffffffU)
+    return 1;
+
+  return 0;
+}
+
+
+static void
+do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf,
+             size_t buflen, int do_padding)
+{
+  unsigned int blocksize = GCRY_GCM_BLOCK_LEN;
+  unsigned int unused = c->u_mode.gcm.mac_unused;
+  ghash_fn_t ghash_fn = c->u_mode.gcm.ghash_fn;
+  size_t nblocks, n;
+  unsigned int burn = 0;
+
+  if (buflen == 0 && (unused == 0 || !do_padding))
+    return;
+
+  do
+    {
+      if (buflen > 0 && (buflen + unused < blocksize || unused > 0))
+        {
+          n = blocksize - unused;
+          n = n < buflen ? n : buflen;
+
+          buf_cpy (&c->u_mode.gcm.macbuf[unused], buf, n);
+
+          unused += n;
+          buf += n;
+          buflen -= n;
+        }
+      if (!buflen)
+        {
+          if (!do_padding && unused < blocksize)
+           {
+             break;
+           }
+
+         n = blocksize - unused;
+         if (n > 0)
+           {
+             memset (&c->u_mode.gcm.macbuf[unused], 0, n);
+             unused = blocksize;
+           }
+        }
+
+      if (unused > 0)
+        {
+          gcry_assert (unused == blocksize);
+
+          /* Process one block from macbuf.  */
+          burn = ghash_fn (c, hash, c->u_mode.gcm.macbuf, 1);
+          unused = 0;
+        }
+
+      nblocks = buflen / blocksize;
+
+      if (nblocks)
+        {
+          burn = ghash_fn (c, hash, buf, nblocks);
+          buf += blocksize * nblocks;
+          buflen -= blocksize * nblocks;
+        }
+    }
+  while (buflen > 0);
+
+  c->u_mode.gcm.mac_unused = unused;
+
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+
+static gcry_err_code_t
+gcm_ctr_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+                 const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err = 0;
+
+  while (inbuflen)
+    {
+      u32 nblocks_to_overflow;
+      u32 num_ctr_increments;
+      u32 curr_ctr_low;
+      size_t currlen = inbuflen;
+      byte ctr_copy[GCRY_GCM_BLOCK_LEN];
+      int fix_ctr = 0;
+
+      /* GCM CTR increments only least significant 32-bits, without carry
+       * to upper 96-bits of counter.  Using generic CTR implementation
+       * directly would carry 32-bit overflow to upper 96-bit.  Detect
+       * if input length is long enough to cause overflow, and limit
+       * input length so that CTR overflow happen but updated CTR value is
+       * not used to encrypt further input.  After overflow, upper 96 bits
+       * of CTR are restored to cancel out modification done by generic CTR
+       * encryption. */
+
+      if (inbuflen > c->unused)
+        {
+          curr_ctr_low = gcm_add32_be128 (c->u_ctr.ctr, 0);
+
+          /* Number of CTR increments this inbuflen would cause. */
+          num_ctr_increments = (inbuflen - c->unused) / GCRY_GCM_BLOCK_LEN +
+                               !!((inbuflen - c->unused) % GCRY_GCM_BLOCK_LEN);
+
+          if ((u32)(num_ctr_increments + curr_ctr_low) < curr_ctr_low)
+            {
+              nblocks_to_overflow = 0xffffffffU - curr_ctr_low + 1;
+              currlen = nblocks_to_overflow * GCRY_GCM_BLOCK_LEN + c->unused;
+              if (currlen > inbuflen)
+                {
+                  currlen = inbuflen;
+                }
+
+              fix_ctr = 1;
+              cipher_block_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN);
+            }
+        }
+
+      err = _gcry_cipher_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen);
+      if (err != 0)
+        return err;
+
+      if (fix_ctr)
+        {
+          /* Lower 32-bits of CTR should now be zero. */
+          gcry_assert(gcm_add32_be128 (c->u_ctr.ctr, 0) == 0);
+
+          /* Restore upper part of CTR. */
+          buf_cpy(c->u_ctr.ctr, ctr_copy, GCRY_GCM_BLOCK_LEN - sizeof(u32));
+
+          wipememory(ctr_copy, sizeof(ctr_copy));
+        }
+
+      inbuflen -= currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      outbuf += currlen;
+    }
+
+  return err;
+}
+
+
+static gcry_err_code_t
+gcm_crypt_inner (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+                const byte *inbuf, size_t inbuflen, int encrypt)
+{
+  gcry_err_code_t err;
+
+  while (inbuflen)
+    {
+      size_t currlen = inbuflen;
+
+      /* Use a bulk method if available.  */
+      if (c->bulk.gcm_crypt)
+       {
+         /* Bulk method requires that there is no cached data. */
+         if (inbuflen >= GCRY_GCM_BLOCK_LEN && c->u_mode.gcm.mac_unused == 0)
+           {
+             size_t nblks = inbuflen / GCRY_GCM_BLOCK_LEN;
+             size_t nleft;
+             size_t ndone;
+
+             nleft = c->bulk.gcm_crypt (c, outbuf, inbuf, nblks, encrypt);
+             ndone = nblks - nleft;
+
+             inbuf += ndone * GCRY_GCM_BLOCK_LEN;
+             outbuf += ndone * GCRY_GCM_BLOCK_LEN;
+             inbuflen -= ndone * GCRY_GCM_BLOCK_LEN;
+             outbuflen -= ndone * GCRY_GCM_BLOCK_LEN;
+
+             if (inbuflen == 0)
+               break;
+
+             currlen = inbuflen;
+           }
+         else if (c->u_mode.gcm.mac_unused > 0
+                  && inbuflen >= GCRY_GCM_BLOCK_LEN
+                         + (16 - c->u_mode.gcm.mac_unused))
+           {
+             /* Handle just enough data so that cache is depleted, and on
+              * next loop iteration use bulk method. */
+             currlen = 16 - c->u_mode.gcm.mac_unused;
+
+             gcry_assert(currlen);
+           }
+       }
+
+      /* Since checksumming is done after/before encryption/decryption,
+       * process input in 24KiB chunks to keep data loaded in L1 cache for
+       * checksumming/decryption. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      if (!encrypt)
+       do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, inbuf, currlen, 0);
+
+      err = gcm_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen);
+      if (err != 0)
+       return err;
+
+      if (encrypt)
+       do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, outbuf, currlen, 0);
+
+      outbuf += currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      inbuflen -= currlen;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_encrypt (gcry_cipher_hd_t c,
+                          byte *outbuf, size_t outbuflen,
+                          const byte *inbuf, size_t inbuflen)
+{
+  static const unsigned char zerobuf[MAX_BLOCKSIZE];
+
+  if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (c->marks.tag
+      || c->u_mode.gcm.ghash_data_finalized
+      || !c->u_mode.gcm.ghash_fn)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.iv)
+    _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN);
+
+  if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->u_mode.gcm.ghash_aad_finalized)
+    {
+      /* Start of encryption marks end of AAD stream. */
+      do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+      c->u_mode.gcm.ghash_aad_finalized = 1;
+    }
+
+  gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen);
+  if (!gcm_check_datalen(c->u_mode.gcm.datalen))
+    {
+      c->u_mode.gcm.datalen_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+  return gcm_crypt_inner (c, outbuf, outbuflen, inbuf, inbuflen, 1);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_decrypt (gcry_cipher_hd_t c,
+                          byte *outbuf, size_t outbuflen,
+                          const byte *inbuf, size_t inbuflen)
+{
+  static const unsigned char zerobuf[MAX_BLOCKSIZE];
+
+  if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (c->marks.tag
+      || c->u_mode.gcm.ghash_data_finalized
+      || !c->u_mode.gcm.ghash_fn)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.iv)
+    _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN);
+
+  if (!c->u_mode.gcm.ghash_aad_finalized)
+    {
+      /* Start of decryption marks end of AAD stream. */
+      do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+      c->u_mode.gcm.ghash_aad_finalized = 1;
+    }
+
+  gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen);
+  if (!gcm_check_datalen(c->u_mode.gcm.datalen))
+    {
+      c->u_mode.gcm.datalen_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+  return gcm_crypt_inner (c, outbuf, outbuflen, inbuf, inbuflen, 0);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c,
+                               const byte * aadbuf, size_t aadbuflen)
+{
+  static const unsigned char zerobuf[MAX_BLOCKSIZE];
+
+  if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (c->marks.tag
+      || c->u_mode.gcm.ghash_aad_finalized
+      || c->u_mode.gcm.ghash_data_finalized
+      || !c->u_mode.gcm.ghash_fn)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.iv)
+    _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN);
+
+  gcm_bytecounter_add(c->u_mode.gcm.aadlen, aadbuflen);
+  if (!gcm_check_aadlen_or_ivlen(c->u_mode.gcm.aadlen))
+    {
+      c->u_mode.gcm.datalen_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+  do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, aadbuf, aadbuflen, 0);
+
+  return 0;
+}
+
+
+void
+_gcry_cipher_gcm_setupM (gcry_cipher_hd_t c)
+{
+  setupM (c);
+}
+
+
+void
+_gcry_cipher_gcm_setkey (gcry_cipher_hd_t c)
+{
+  memset (c->u_mode.gcm.u_ghash_key.key, 0, GCRY_GCM_BLOCK_LEN);
+
+  c->spec->encrypt (&c->context.c, c->u_mode.gcm.u_ghash_key.key,
+                    c->u_mode.gcm.u_ghash_key.key);
+  setupM (c);
+}
+
+
+static gcry_err_code_t
+_gcry_cipher_gcm_initiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
+{
+  memset (c->u_mode.gcm.aadlen, 0, sizeof(c->u_mode.gcm.aadlen));
+  memset (c->u_mode.gcm.datalen, 0, sizeof(c->u_mode.gcm.datalen));
+  memset (c->u_mode.gcm.u_tag.tag, 0, GCRY_GCM_BLOCK_LEN);
+  c->u_mode.gcm.datalen_over_limits = 0;
+  c->u_mode.gcm.ghash_data_finalized = 0;
+  c->u_mode.gcm.ghash_aad_finalized = 0;
+
+  if (ivlen == 0)
+    return GPG_ERR_INV_LENGTH;
+
+  if (ivlen != GCRY_GCM_BLOCK_LEN - 4)
+    {
+      u32 iv_bytes[2] = {0, 0};
+      u32 bitlengths[2][2];
+
+      if (!c->u_mode.gcm.ghash_fn)
+        return GPG_ERR_INV_STATE;
+
+      memset(c->u_ctr.ctr, 0, GCRY_GCM_BLOCK_LEN);
+
+      gcm_bytecounter_add(iv_bytes, ivlen);
+      if (!gcm_check_aadlen_or_ivlen(iv_bytes))
+        {
+          c->u_mode.gcm.datalen_over_limits = 1;
+          return GPG_ERR_INV_LENGTH;
+        }
+
+      do_ghash_buf(c, c->u_ctr.ctr, iv, ivlen, 1);
+
+      /* iv length, 64-bit */
+      bitlengths[1][1] = be_bswap32(iv_bytes[0] << 3);
+      bitlengths[1][0] = be_bswap32((iv_bytes[0] >> 29) |
+                                    (iv_bytes[1] << 3));
+      /* zeros, 64-bit */
+      bitlengths[0][1] = 0;
+      bitlengths[0][0] = 0;
+
+      do_ghash_buf(c, c->u_ctr.ctr, (byte*)bitlengths, GCRY_GCM_BLOCK_LEN, 1);
+
+      wipememory (iv_bytes, sizeof iv_bytes);
+      wipememory (bitlengths, sizeof bitlengths);
+    }
+  else
+    {
+      /* 96-bit IV is handled differently. */
+      memcpy (c->u_ctr.ctr, iv, ivlen);
+      c->u_ctr.ctr[12] = c->u_ctr.ctr[13] = c->u_ctr.ctr[14] = 0;
+      c->u_ctr.ctr[15] = 1;
+    }
+
+  c->spec->encrypt (&c->context.c, c->u_mode.gcm.tagiv, c->u_ctr.ctr);
+
+  gcm_add32_be128 (c->u_ctr.ctr, 1);
+
+  c->unused = 0;
+  c->marks.iv = 1;
+  c->marks.tag = 0;
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
+{
+  c->marks.iv = 0;
+  c->marks.tag = 0;
+  c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0;
+
+  if (fips_mode ())
+    {
+      /* Direct invocation of GCM setiv in FIPS mode disables encryption. */
+      c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 1;
+    }
+
+  return _gcry_cipher_gcm_initiv (c, iv, ivlen);
+}
+
+
+#if 0 && TODO
+void
+_gcry_cipher_gcm_geniv (gcry_cipher_hd_t c,
+                        byte *ivout, size_t ivoutlen, const byte *nonce,
+                        size_t noncelen)
+{
+  /* nonce:    user provided part (might be null) */
+  /* noncelen: check if proper length (if nonce not null) */
+  /* ivout:    iv used to initialize gcm, output to user */
+  /* ivoutlen: check correct size */
+  byte iv[IVLEN];
+
+  if (!ivout)
+    return GPG_ERR_INV_ARG;
+  if (ivoutlen != IVLEN)
+    return GPG_ERR_INV_LENGTH;
+  if (nonce != NULL && !is_nonce_ok_len(noncelen))
+    return GPG_ERR_INV_ARG;
+
+  gcm_generate_iv(iv, nonce, noncelen);
+
+  c->marks.iv = 0;
+  c->marks.tag = 0;
+  c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0;
+
+  _gcry_cipher_gcm_initiv (c, iv, IVLEN);
+
+  buf_cpy(ivout, iv, IVLEN);
+  wipememory(iv, sizeof(iv));
+}
+#endif
+
+
+static int
+is_tag_length_valid(size_t taglen)
+{
+  switch (taglen)
+    {
+    /* Allowed tag lengths from NIST SP 800-38D.  */
+    case 128 / 8: /* GCRY_GCM_BLOCK_LEN */
+    case 120 / 8:
+    case 112 / 8:
+    case 104 / 8:
+    case 96 / 8:
+    case 64 / 8:
+    case 32 / 8:
+      return 1;
+
+    default:
+      return 0;
+    }
+}
+
+static gcry_err_code_t
+_gcry_cipher_gcm_tag (gcry_cipher_hd_t c,
+                      byte * outbuf, size_t outbuflen, int check)
+{
+  if (!(is_tag_length_valid (outbuflen) || outbuflen >= GCRY_GCM_BLOCK_LEN))
+    return GPG_ERR_INV_LENGTH;
+  if (c->u_mode.gcm.datalen_over_limits)
+    return GPG_ERR_INV_LENGTH;
+
+  if (!c->marks.tag)
+    {
+      u32 bitlengths[2][2];
+
+      if (!c->u_mode.gcm.ghash_fn)
+        return GPG_ERR_INV_STATE;
+
+      /* aad length */
+      bitlengths[0][1] = be_bswap32(c->u_mode.gcm.aadlen[0] << 3);
+      bitlengths[0][0] = be_bswap32((c->u_mode.gcm.aadlen[0] >> 29) |
+                                    (c->u_mode.gcm.aadlen[1] << 3));
+      /* data length */
+      bitlengths[1][1] = be_bswap32(c->u_mode.gcm.datalen[0] << 3);
+      bitlengths[1][0] = be_bswap32((c->u_mode.gcm.datalen[0] >> 29) |
+                                    (c->u_mode.gcm.datalen[1] << 3));
+
+      /* Finalize data-stream. */
+      do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1);
+      c->u_mode.gcm.ghash_aad_finalized = 1;
+      c->u_mode.gcm.ghash_data_finalized = 1;
+
+      /* Add bitlengths to tag. */
+      do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths,
+                   GCRY_GCM_BLOCK_LEN, 1);
+      cipher_block_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv,
+                        c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN);
+      c->marks.tag = 1;
+
+      wipememory (bitlengths, sizeof (bitlengths));
+      wipememory (c->u_mode.gcm.macbuf, GCRY_GCM_BLOCK_LEN);
+      wipememory (c->u_mode.gcm.tagiv, GCRY_GCM_BLOCK_LEN);
+      wipememory (c->u_mode.gcm.aadlen, sizeof (c->u_mode.gcm.aadlen));
+      wipememory (c->u_mode.gcm.datalen, sizeof (c->u_mode.gcm.datalen));
+    }
+
+  if (!check)
+    {
+      if (outbuflen > GCRY_GCM_BLOCK_LEN)
+        outbuflen = GCRY_GCM_BLOCK_LEN;
+
+      /* NB: We already checked that OUTBUF is large enough to hold
+       * the result or has valid truncated length.  */
+      memcpy (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen);
+    }
+  else
+    {
+      /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF
+       * and thus we need to compare its length first.  */
+      if (!is_tag_length_valid (outbuflen)
+          || !buf_eq_const (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen))
+        return GPG_ERR_CHECKSUM;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_gcm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+                          size_t taglen)
+{
+  /* Outputting authentication tag is part of encryption. */
+  if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode)
+    return GPG_ERR_INV_STATE;
+
+  return _gcry_cipher_gcm_tag (c, outtag, taglen, 0);
+}
+
+gcry_err_code_t
+_gcry_cipher_gcm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+                            size_t taglen)
+{
+  return _gcry_cipher_gcm_tag (c, (unsigned char *) intag, taglen, 1);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-internal.h 
b/grub-core/lib/libgcrypt/cipher/cipher-internal.h
new file mode 100644
index 000000000..c8a1097ad
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-internal.h
@@ -0,0 +1,946 @@
+/* cipher-internal.h  - Internal defs for cipher.c
+ * Copyright (C) 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_CIPHER_INTERNAL_H
+#define G10_CIPHER_INTERNAL_H
+
+#include "./poly1305-internal.h"
+
+
+/* The maximum supported size of a block in bytes.  */
+#define MAX_BLOCKSIZE 16
+
+/* The length for an OCB block.  Although OCB supports any block
+   length it does not make sense to use a 64 bit blocklen (and cipher)
+   because this reduces the security margin to an unacceptable state.
+   Thus we require a cipher with 128 bit blocklength.  */
+#define OCB_BLOCK_LEN  (128/8)
+
+/* The size of the pre-computed L table for OCB.  This takes the same
+   size as the table used for GCM and thus we don't save anything by
+   not using such a table.  */
+#define OCB_L_TABLE_SIZE 16
+
+
+/* Check the above constants.  */
+#if OCB_BLOCK_LEN > MAX_BLOCKSIZE
+# error OCB_BLOCKLEN > MAX_BLOCKSIZE
+#endif
+
+
+
+/* Magic values for the context structure.  */
+#define CTX_MAGIC_NORMAL 0x24091964
+#define CTX_MAGIC_SECURE 0x46919042
+
+/* Try to use 16 byte aligned cipher context for better performance.
+   We use the aligned attribute, thus it is only possible to implement
+   this with gcc.  */
+#undef NEED_16BYTE_ALIGNED_CONTEXT
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define NEED_16BYTE_ALIGNED_CONTEXT 1
+#endif
+
+/* Undef this symbol to trade GCM speed for 256 bytes of memory per context */
+#define GCM_USE_TABLES 1
+
+
+/* GCM_USE_INTEL_PCLMUL indicates whether to compile GCM with Intel PCLMUL
+   code.  */
+#undef GCM_USE_INTEL_PCLMUL
+#if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES)
+# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+#  if __GNUC__ >= 4
+#   define GCM_USE_INTEL_PCLMUL 1
+#  endif
+# endif
+#endif /* GCM_USE_INTEL_PCLMUL */
+
+/* GCM_USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */
+#undef GCM_USE_ARM_PMULL
+#if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(GCM_USE_TABLES)
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+#  define GCM_USE_ARM_PMULL 1
+# elif defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+#  define GCM_USE_ARM_PMULL 1
+# endif
+#endif /* GCM_USE_ARM_PMULL */
+
+/* GCM_USE_ARM_NEON indicates whether to compile GCM with ARMv7 NEON code. */
+#undef GCM_USE_ARM_NEON
+#if defined(GCM_USE_TABLES)
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON)
+#  define GCM_USE_ARM_NEON 1
+#endif
+#endif /* GCM_USE_ARM_NEON */
+
+/* GCM_USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef GCM_USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define GCM_USE_S390X_CRYPTO 1
+#endif /* GCM_USE_S390X_CRYPTO */
+
+/* GCM_USE_PPC_VPMSUM indicates whether to compile GCM with PPC Power 8
+ * polynomial multiplication instruction. */
+#undef GCM_USE_PPC_VPMSUM
+#if defined(GCM_USE_TABLES)
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && defined(__powerpc64__) && \
+    defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+    defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && __GNUC__ >= 4
+#  define GCM_USE_PPC_VPMSUM 1
+#  define NEED_16BYTE_ALIGNED_CONTEXT 1 /* this also aligns gcm_table */
+#endif
+#endif /* GCM_USE_PPC_VPMSUM */
+
+typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result,
+                                    const byte *buf, size_t nblocks);
+
+
+/* A structure with function pointers for mode operations. */
+typedef struct cipher_mode_ops
+{
+  gcry_err_code_t (*encrypt)(gcry_cipher_hd_t c, unsigned char *outbuf,
+                            size_t outbuflen, const unsigned char *inbuf,
+                            size_t inbuflen);
+  gcry_err_code_t (*decrypt)(gcry_cipher_hd_t c, unsigned char *outbuf,
+                            size_t outbuflen, const unsigned char *inbuf,
+                            size_t inbuflen);
+  gcry_err_code_t (*setiv)(gcry_cipher_hd_t c, const unsigned char *iv,
+                          size_t ivlen);
+
+  gcry_err_code_t (*authenticate)(gcry_cipher_hd_t c,
+                                 const unsigned char *abuf, size_t abuflen);
+  gcry_err_code_t (*get_tag)(gcry_cipher_hd_t c, unsigned char *outtag,
+                            size_t taglen);
+  gcry_err_code_t (*check_tag)(gcry_cipher_hd_t c, const unsigned char *intag,
+                              size_t taglen);
+} cipher_mode_ops_t;
+
+
+/* A structure with function pointers for bulk operations.  The cipher
+   algorithm setkey function initializes them when bulk operations are
+   available and the actual encryption routines use them if they are
+   not NULL.  */
+typedef struct cipher_bulk_ops
+{
+  void (*cfb_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+                 const void *inbuf_arg, size_t nblocks);
+  void (*cfb_dec)(void *context, unsigned char *iv, void *outbuf_arg,
+                 const void *inbuf_arg, size_t nblocks);
+  void (*cbc_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+                 const void *inbuf_arg, size_t nblocks, int cbc_mac);
+  void (*cbc_dec)(void *context, unsigned char *iv, void *outbuf_arg,
+                 const void *inbuf_arg, size_t nblocks);
+  void (*ofb_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+                 const void *inbuf_arg, size_t nblocks);
+  void (*ctr_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+                 const void *inbuf_arg, size_t nblocks);
+  void (*ctr32le_enc)(void *context, unsigned char *iv, void *outbuf_arg,
+                     const void *inbuf_arg, size_t nblocks);
+  size_t (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg,
+                     const void *inbuf_arg, size_t nblocks, int encrypt);
+  size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks);
+  void (*xts_crypt)(void *context, unsigned char *tweak, void *outbuf_arg,
+                   const void *inbuf_arg, size_t nblocks, int encrypt);
+  size_t (*gcm_crypt)(gcry_cipher_hd_t c, void *outbuf_arg,
+                     const void *inbuf_arg, size_t nblocks, int encrypt);
+} cipher_bulk_ops_t;
+
+
+/* A VIA processor with the Padlock engine as well as the Intel AES_NI
+   instructions require an alignment of most data on a 16 byte
+   boundary.  Because we trick out the compiler while allocating the
+   context, the align attribute as used in rijndael.c does not work on
+   its own.  Thus we need to make sure that the entire context
+   structure is a aligned on that boundary.  We achieve this by
+   defining a new type and use that instead of our usual alignment
+   type.  */
+typedef union
+{
+  PROPERLY_ALIGNED_TYPE foo;
+#ifdef NEED_16BYTE_ALIGNED_CONTEXT
+  char bar[16] __attribute__ ((aligned (16)));
+#endif
+  char c[1];
+} cipher_context_alignment_t;
+
+
+/* Storage structure for CMAC, for CMAC and EAX modes. */
+typedef struct {
+  /* The initialization vector. Also contains tag after finalization. */
+  union {
+    cipher_context_alignment_t iv_align;
+    unsigned char iv[MAX_BLOCKSIZE];
+  } u_iv;
+
+  /* Subkeys for tag creation, not cleared by gcry_cipher_reset. */
+  unsigned char subkeys[2][MAX_BLOCKSIZE];
+
+  /* Space to save partial input lengths for MAC. */
+  unsigned char macbuf[MAX_BLOCKSIZE];
+
+  int mac_unused;  /* Number of unprocessed bytes in MACBUF. */
+  unsigned int tag:1; /* Set to 1 if tag has been finalized.  */
+} gcry_cmac_context_t;
+
+
+/* The handle structure.  */
+struct gcry_cipher_handle
+{
+  int magic;
+  size_t actual_handle_size;     /* Allocated size of this handle. */
+  size_t handle_offset;          /* Offset to the malloced block.  */
+  gcry_cipher_spec_t *spec;
+
+  /* The algorithm id.  This is a hack required because the module
+     interface does not easily allow to retrieve this value. */
+  int algo;
+
+  /* A structure with function pointers for mode operations. */
+  cipher_mode_ops_t mode_ops;
+
+  /* A structure with function pointers for bulk operations.  Due to
+     limitations of the module system (we don't want to change the
+     API) we need to keep these function pointers here.  */
+  cipher_bulk_ops_t bulk;
+
+  int mode;
+  unsigned int flags;
+
+  struct {
+    unsigned int key:1; /* Set to 1 if a key has been set.  */
+    unsigned int iv:1;  /* Set to 1 if a IV has been set.  */
+    unsigned int tag:1; /* Set to 1 if a tag is finalized. */
+    unsigned int finalize:1; /* Next encrypt/decrypt has the final data.  */
+    unsigned int allow_weak_key:1; /* Set to 1 if weak keys are allowed. */
+  } marks;
+
+  /* The initialization vector.  For best performance we make sure
+     that it is properly aligned.  In particular some implementations
+     of bulk operations expect an 16 byte aligned IV.  IV is also used
+     to store CBC-MAC in CCM mode; counter IV is stored in U_CTR.  For
+     OCB mode it is used for the offset value.  */
+  union {
+    cipher_context_alignment_t iv_align;
+    unsigned char iv[MAX_BLOCKSIZE];
+  } u_iv;
+
+  /* The counter for CTR mode.  This field is also used by AESWRAP and
+     thus we can't use the U_IV union.  For OCB mode it is used for
+     the checksum.  */
+  union {
+    cipher_context_alignment_t iv_align;
+    unsigned char ctr[MAX_BLOCKSIZE];
+  } u_ctr;
+
+  /* Space to save an IV or CTR for chaining operations.  */
+  unsigned char lastiv[MAX_BLOCKSIZE];
+  int unused;  /* Number of unused bytes in LASTIV. */
+
+  union {
+    /* Mode specific storage for CCM mode. */
+    struct {
+      u64 encryptlen;
+      u64 aadlen;
+      unsigned int authlen;
+
+      /* Space to save partial input lengths for MAC. */
+      unsigned char macbuf[GCRY_CCM_BLOCK_LEN];
+      int mac_unused;  /* Number of unprocessed bytes in MACBUF. */
+
+      unsigned char s0[GCRY_CCM_BLOCK_LEN];
+
+      unsigned int nonce:1; /* Set to 1 if nonce has been set.  */
+      unsigned int lengths:1; /* Set to 1 if CCM length parameters has been
+                                 processed.  */
+    } ccm;
+
+    /* Mode specific storage for Poly1305 mode. */
+    struct {
+      /* byte counter for AAD. */
+      u32 aadcount[2];
+
+      /* byte counter for data. */
+      u32 datacount[2];
+
+      unsigned int aad_finalized:1;
+      unsigned int bytecount_over_limits:1;
+
+      poly1305_context_t ctx;
+    } poly1305;
+
+    /* Mode specific storage for CMAC mode. */
+    gcry_cmac_context_t cmac;
+
+    /* Mode specific storage for EAX mode. */
+    struct {
+      /* CMAC for header (AAD). */
+      gcry_cmac_context_t cmac_header;
+
+      /* CMAC for ciphertext. */
+      gcry_cmac_context_t cmac_ciphertext;
+    } eax;
+
+    /* Mode specific storage for GCM mode and GCM-SIV mode. */
+    struct {
+      /* The interim tag for GCM mode.  */
+      union {
+        cipher_context_alignment_t iv_align;
+        unsigned char tag[MAX_BLOCKSIZE];
+      } u_tag;
+
+      /* Space to save partial input lengths for MAC. */
+      unsigned char macbuf[GCRY_CCM_BLOCK_LEN];
+      int mac_unused;  /* Number of unprocessed bytes in MACBUF. */
+
+      /* byte counters for GCM */
+      u32 aadlen[2];
+      u32 datalen[2];
+
+      /* encrypted tag counter */
+      unsigned char tagiv[MAX_BLOCKSIZE];
+
+      unsigned int ghash_data_finalized:1;
+      unsigned int ghash_aad_finalized:1;
+
+      unsigned int datalen_over_limits:1;
+      unsigned int disallow_encryption_because_of_setiv_in_fips_mode:1;
+
+      /* --- Following members are not cleared in gcry_cipher_reset --- */
+
+      /* GHASH multiplier from key.  */
+      union {
+        cipher_context_alignment_t iv_align;
+        unsigned char key[MAX_BLOCKSIZE];
+      } u_ghash_key;
+
+      /* Pre-calculated table for GCM. */
+#ifdef GCM_USE_TABLES
+ #if (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__))
+      #define GCM_TABLES_USE_U64 1
+      u64 gcm_table[4 * 16];
+ #else
+      #undef GCM_TABLES_USE_U64
+      u32 gcm_table[8 * 16];
+ #endif
+#endif
+
+      /* GHASH implementation in use. */
+      ghash_fn_t ghash_fn;
+
+      /* POLYVAL implementation in use (GCM-SIV). */
+      ghash_fn_t polyval_fn;
+
+      /* Key length used for GCM-SIV key generating key. */
+      unsigned int siv_keylen;
+    } gcm;
+
+    /* Mode specific storage for OCB mode. */
+    struct {
+      /* --- Following members are not cleared in gcry_cipher_reset --- */
+
+      /* Helper variables and pre-computed table of L values.  */
+      unsigned char L_star[OCB_BLOCK_LEN];
+      unsigned char L_dollar[OCB_BLOCK_LEN];
+      unsigned char L0L1[OCB_BLOCK_LEN];
+      unsigned char L[OCB_L_TABLE_SIZE][OCB_BLOCK_LEN];
+
+      /* --- Following members are cleared in gcry_cipher_reset --- */
+
+      /* The tag is valid if marks.tag has been set.  */
+      unsigned char tag[OCB_BLOCK_LEN];
+
+      /* A buffer to hold the offset for the AAD processing.  */
+      unsigned char aad_offset[OCB_BLOCK_LEN];
+
+      /* A buffer to hold the current sum of AAD processing.  We can't
+         use tag here because tag may already hold the preprocessed
+         checksum of the data.  */
+      unsigned char aad_sum[OCB_BLOCK_LEN];
+
+      /* A buffer to store AAD data not yet processed.  */
+      unsigned char aad_leftover[OCB_BLOCK_LEN];
+
+      /* Number of data/aad blocks processed so far.  */
+      u64 data_nblocks;
+      u64 aad_nblocks;
+
+      /* Number of valid bytes in AAD_LEFTOVER.  */
+      unsigned char aad_nleftover;
+
+      /* Length of the tag.  Fixed for now but may eventually be
+         specified using a set of gcry_cipher_flags.  */
+      unsigned char taglen;
+
+      /* Flags indicating that the final data/aad block has been
+         processed.  */
+      unsigned int data_finalized:1;
+      unsigned int aad_finalized:1;
+    } ocb;
+
+    /* Mode specific storage for XTS mode. */
+    struct {
+      /* Pointer to tweak cipher context, allocated after actual
+       * cipher context. */
+      char *tweak_context;
+    } xts;
+
+    /* Mode specific storage for SIV mode. */
+    struct {
+      /* Tag used for decryption. */
+      unsigned char dec_tag[GCRY_SIV_BLOCK_LEN];
+
+      /* S2V state. */
+      unsigned char s2v_d[GCRY_SIV_BLOCK_LEN];
+
+      /* Number of AAD elements processed. */
+      unsigned int aad_count:8;
+
+      /* Flags for SIV state. */
+      unsigned int dec_tag_set:1;
+
+      /* --- Following members are not cleared in gcry_cipher_reset --- */
+
+      /* S2V CMAC state. */
+      gcry_cmac_context_t s2v_cmac;
+      unsigned char s2v_zero_block[GCRY_SIV_BLOCK_LEN];
+
+      /* Pointer to CTR cipher context, allocated after actual
+       * cipher context. */
+      char *ctr_context;
+    } siv;
+
+    /* Mode specific storage for WRAP mode. */
+    struct {
+      unsigned char plen[4];
+    } wrap;
+  } u_mode;
+
+  /* What follows are two contexts of the cipher in use.  The first
+     one needs to be aligned well enough for the cipher operation
+     whereas the second one is a copy created by cipher_setkey and
+     used by cipher_reset.  That second copy has no need for proper
+     aligment because it is only accessed by memcpy.  */
+  cipher_context_alignment_t context;
+};
+
+
+/*-- cipher-cbc.c --*/
+gcry_err_code_t _gcry_cipher_cbc_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cbc_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cbc_cts_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cbc_cts_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+
+/*-- cipher-cfb.c --*/
+gcry_err_code_t _gcry_cipher_cfb_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cfb_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cfb8_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_cfb8_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+
+
+/*-- cipher-ofb.c --*/
+gcry_err_code_t _gcry_cipher_ofb_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+
+/*-- cipher-ctr.c --*/
+gcry_err_code_t _gcry_cipher_ctr_encrypt_ctx
+/*           */ (gcry_cipher_hd_t c,
+                unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen,
+                void *algo_context);
+gcry_err_code_t _gcry_cipher_ctr_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+
+
+/*-- cipher-aeswrap.c --*/
+gcry_err_code_t _gcry_cipher_keywrap_encrypt
+/*           */   (gcry_cipher_hd_t c,
+                   byte *outbuf, size_t outbuflen,
+                   const byte *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_keywrap_encrypt_padding
+/*           */   (gcry_cipher_hd_t c,
+                   byte *outbuf, size_t outbuflen,
+                   const byte *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_keywrap_decrypt_auto
+/*           */   (gcry_cipher_hd_t c,
+                   byte *outbuf, size_t outbuflen,
+                   const byte *inbuf, size_t inbuflen);
+
+
+/*-- cipher-ccm.c --*/
+gcry_err_code_t _gcry_cipher_ccm_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ccm_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ccm_set_nonce
+/*           */ (gcry_cipher_hd_t c, const unsigned char *nonce,
+                 size_t noncelen);
+gcry_err_code_t _gcry_cipher_ccm_authenticate
+/*           */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t 
abuflen);
+gcry_err_code_t _gcry_cipher_ccm_set_lengths
+/*           */ (gcry_cipher_hd_t c, u64 encryptedlen, u64 aadlen, u64 taglen);
+gcry_err_code_t _gcry_cipher_ccm_get_tag
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_ccm_check_tag
+/*           */ (gcry_cipher_hd_t c,
+                 const unsigned char *intag, size_t taglen);
+
+
+/*-- cipher-cmac.c --*/
+gcry_err_code_t _gcry_cmac_generate_subkeys
+/*           */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx);
+gcry_err_code_t _gcry_cmac_write
+/*           */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx,
+                const byte * inbuf, size_t inlen);
+gcry_err_code_t _gcry_cmac_final
+/*           */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx);
+void _gcry_cmac_reset (gcry_cmac_context_t *ctx);
+
+
+/*-- cipher-eax.c --*/
+gcry_err_code_t _gcry_cipher_eax_encrypt
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outbuf, size_t outbuflen,
+                   const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_eax_decrypt
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outbuf, size_t outbuflen,
+                   const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_eax_set_nonce
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *nonce, size_t noncelen);
+gcry_err_code_t _gcry_cipher_eax_authenticate
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *aadbuf, size_t aadbuflen);
+gcry_err_code_t _gcry_cipher_eax_get_tag
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_eax_check_tag
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *intag, size_t taglen);
+gcry_err_code_t _gcry_cipher_eax_setkey
+/*           */   (gcry_cipher_hd_t c);
+
+
+/*-- cipher-gcm.c --*/
+gcry_err_code_t _gcry_cipher_gcm_encrypt
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outbuf, size_t outbuflen,
+                   const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_gcm_decrypt
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outbuf, size_t outbuflen,
+                   const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_gcm_setiv
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *iv, size_t ivlen);
+gcry_err_code_t _gcry_cipher_gcm_authenticate
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *aadbuf, size_t aadbuflen);
+gcry_err_code_t _gcry_cipher_gcm_get_tag
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_gcm_check_tag
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *intag, size_t taglen);
+void _gcry_cipher_gcm_setkey
+/*           */   (gcry_cipher_hd_t c);
+void _gcry_cipher_gcm_setupM
+/*           */   (gcry_cipher_hd_t c);
+
+
+/*-- cipher-poly1305.c --*/
+gcry_err_code_t _gcry_cipher_poly1305_encrypt
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outbuf, size_t outbuflen,
+                   const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_poly1305_decrypt
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outbuf, size_t outbuflen,
+                   const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_poly1305_setiv
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *iv, size_t ivlen);
+gcry_err_code_t _gcry_cipher_poly1305_authenticate
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *aadbuf, size_t aadbuflen);
+gcry_err_code_t _gcry_cipher_poly1305_get_tag
+/*           */   (gcry_cipher_hd_t c,
+                   unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_poly1305_check_tag
+/*           */   (gcry_cipher_hd_t c,
+                   const unsigned char *intag, size_t taglen);
+void _gcry_cipher_poly1305_setkey
+/*           */   (gcry_cipher_hd_t c);
+
+
+/*-- chacha20.c --*/
+gcry_err_code_t _gcry_chacha20_poly1305_encrypt
+/*           */   (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf,
+                  size_t length);
+gcry_err_code_t _gcry_chacha20_poly1305_decrypt
+/*           */   (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf,
+                  size_t length);
+
+
+/*-- cipher-ocb.c --*/
+gcry_err_code_t _gcry_cipher_ocb_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ocb_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_ocb_set_nonce
+/*           */ (gcry_cipher_hd_t c, const unsigned char *nonce,
+                 size_t noncelen);
+gcry_err_code_t _gcry_cipher_ocb_authenticate
+/*           */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t 
abuflen);
+gcry_err_code_t _gcry_cipher_ocb_get_tag
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_ocb_check_tag
+/*           */ (gcry_cipher_hd_t c,
+                 const unsigned char *intag, size_t taglen);
+void _gcry_cipher_ocb_setkey
+/*           */ (gcry_cipher_hd_t c);
+
+
+/*-- cipher-xts.c --*/
+gcry_err_code_t _gcry_cipher_xts_encrypt
+/*           */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_xts_decrypt
+/*           */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen);
+
+
+/*-- cipher-siv.c --*/
+gcry_err_code_t _gcry_cipher_siv_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_siv_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_siv_set_nonce
+/*           */ (gcry_cipher_hd_t c, const unsigned char *nonce,
+                 size_t noncelen);
+gcry_err_code_t _gcry_cipher_siv_authenticate
+/*           */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t 
abuflen);
+gcry_err_code_t _gcry_cipher_siv_set_decryption_tag
+/*           */ (gcry_cipher_hd_t c, const byte *tag, size_t taglen);
+gcry_err_code_t _gcry_cipher_siv_get_tag
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_siv_check_tag
+/*           */ (gcry_cipher_hd_t c,
+                 const unsigned char *intag, size_t taglen);
+gcry_err_code_t _gcry_cipher_siv_setkey
+/*           */ (gcry_cipher_hd_t c,
+                 const unsigned char *ctrkey, size_t ctrkeylen);
+
+
+/*-- cipher-gcm-siv.c --*/
+gcry_err_code_t _gcry_cipher_gcm_siv_encrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_gcm_siv_decrypt
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outbuf, size_t outbuflen,
+                 const unsigned char *inbuf, size_t inbuflen);
+gcry_err_code_t _gcry_cipher_gcm_siv_set_nonce
+/*           */ (gcry_cipher_hd_t c, const unsigned char *nonce,
+                 size_t noncelen);
+gcry_err_code_t _gcry_cipher_gcm_siv_authenticate
+/*           */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t 
abuflen);
+gcry_err_code_t _gcry_cipher_gcm_siv_set_decryption_tag
+/*           */ (gcry_cipher_hd_t c, const byte *tag, size_t taglen);
+gcry_err_code_t _gcry_cipher_gcm_siv_get_tag
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_gcm_siv_check_tag
+/*           */ (gcry_cipher_hd_t c,
+                 const unsigned char *intag, size_t taglen);
+gcry_err_code_t _gcry_cipher_gcm_siv_setkey
+/*           */ (gcry_cipher_hd_t c, unsigned int keylen);
+
+
+/* Return the L-value for block N.  Note: 'cipher_ocb.c' ensures that N
+ * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can
+ * be directly passed to _gcry_ctz() function and resulting index will
+ * never overflow the table.  */
+static inline const unsigned char *
+ocb_get_l (gcry_cipher_hd_t c, u64 n)
+{
+  unsigned long ntz;
+
+#if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4)
+  /* Assumes that N != 0. */
+  asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+        : [ntz] "=r" (ntz)
+        : [low] "r" ((unsigned long)n)
+        : "cc");
+#else
+  ntz = _gcry_ctz (n);
+#endif
+
+  return c->u_mode.ocb.L[ntz];
+}
+
+
+/* Return bit-shift of blocksize. */
+static inline unsigned int _gcry_blocksize_shift(gcry_cipher_hd_t c)
+{
+  /* Only blocksizes 8 and 16 are used. Return value in such way
+   * that compiler can optimize calling functions based on this.  */
+  return c->spec->blocksize == 8 ? 3 : 4;
+}
+
+
+/* Optimized function for adding value to cipher block. */
+static inline void
+cipher_block_add(void *_dstsrc, unsigned int add, size_t blocksize)
+{
+  byte *dstsrc = _dstsrc;
+  u64 s[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_be64(dstsrc + 0, buf_get_be64(dstsrc + 0) + add);
+    }
+  else /* blocksize == 16 */
+    {
+      s[0] = buf_get_be64(dstsrc + 8);
+      s[1] = buf_get_be64(dstsrc + 0);
+      s[0] += add;
+      s[1] += (s[0] < add);
+      buf_put_be64(dstsrc + 8, s[0]);
+      buf_put_be64(dstsrc + 0, s[1]);
+    }
+}
+
+
+/* Optimized function for cipher block copying */
+static inline void
+cipher_block_cpy(void *_dst, const void *_src, size_t blocksize)
+{
+  byte *dst = _dst;
+  const byte *src = _src;
+  u64 s[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_he64(dst + 0, buf_get_he64(src + 0));
+    }
+  else /* blocksize == 16 */
+    {
+      s[0] = buf_get_he64(src + 0);
+      s[1] = buf_get_he64(src + 8);
+      buf_put_he64(dst + 0, s[0]);
+      buf_put_he64(dst + 8, s[1]);
+    }
+}
+
+
+/* Optimized function for cipher block xoring */
+static inline void
+cipher_block_xor(void *_dst, const void *_src1, const void *_src2,
+                 size_t blocksize)
+{
+  byte *dst = _dst;
+  const byte *src1 = _src1;
+  const byte *src2 = _src2;
+  u64 s1[2];
+  u64 s2[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_he64(dst + 0, buf_get_he64(src1 + 0) ^ buf_get_he64(src2 + 0));
+    }
+  else /* blocksize == 16 */
+    {
+      s1[0] = buf_get_he64(src1 + 0);
+      s1[1] = buf_get_he64(src1 + 8);
+      s2[0] = buf_get_he64(src2 + 0);
+      s2[1] = buf_get_he64(src2 + 8);
+      buf_put_he64(dst + 0, s1[0] ^ s2[0]);
+      buf_put_he64(dst + 8, s1[1] ^ s2[1]);
+    }
+}
+
+
+/* Optimized function for in-place cipher block xoring */
+static inline void
+cipher_block_xor_1(void *_dst, const void *_src, size_t blocksize)
+{
+  cipher_block_xor (_dst, _dst, _src, blocksize);
+}
+
+
+/* Optimized function for cipher block xoring with two destination cipher
+   blocks.  Used mainly by CFB mode encryption.  */
+static inline void
+cipher_block_xor_2dst(void *_dst1, void *_dst2, const void *_src,
+                      size_t blocksize)
+{
+  byte *dst1 = _dst1;
+  byte *dst2 = _dst2;
+  const byte *src = _src;
+  u64 d2[2];
+  u64 s[2];
+
+  if (blocksize == 8)
+    {
+      d2[0] = buf_get_he64(dst2 + 0) ^ buf_get_he64(src + 0);
+      buf_put_he64(dst2 + 0, d2[0]);
+      buf_put_he64(dst1 + 0, d2[0]);
+    }
+  else /* blocksize == 16 */
+    {
+      s[0] = buf_get_he64(src + 0);
+      s[1] = buf_get_he64(src + 8);
+      d2[0] = buf_get_he64(dst2 + 0);
+      d2[1] = buf_get_he64(dst2 + 8);
+      d2[0] = d2[0] ^ s[0];
+      d2[1] = d2[1] ^ s[1];
+      buf_put_he64(dst2 + 0, d2[0]);
+      buf_put_he64(dst2 + 8, d2[1]);
+      buf_put_he64(dst1 + 0, d2[0]);
+      buf_put_he64(dst1 + 8, d2[1]);
+    }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+   Used by mainly CBC mode decryption.  */
+static inline void
+cipher_block_xor_n_copy_2(void *_dst_xor, const void *_src_xor,
+                          void *_srcdst_cpy, const void *_src_cpy,
+                          size_t blocksize)
+{
+  byte *dst_xor = _dst_xor;
+  byte *srcdst_cpy = _srcdst_cpy;
+  const byte *src_xor = _src_xor;
+  const byte *src_cpy = _src_cpy;
+  u64 sc[2];
+  u64 sx[2];
+  u64 sdc[2];
+
+  if (blocksize == 8)
+    {
+      sc[0] = buf_get_he64(src_cpy + 0);
+      buf_put_he64(dst_xor + 0,
+                   buf_get_he64(srcdst_cpy + 0) ^ buf_get_he64(src_xor + 0));
+      buf_put_he64(srcdst_cpy + 0, sc[0]);
+    }
+  else /* blocksize == 16 */
+    {
+      sc[0] = buf_get_he64(src_cpy + 0);
+      sc[1] = buf_get_he64(src_cpy + 8);
+      sx[0] = buf_get_he64(src_xor + 0);
+      sx[1] = buf_get_he64(src_xor + 8);
+      sdc[0] = buf_get_he64(srcdst_cpy + 0);
+      sdc[1] = buf_get_he64(srcdst_cpy + 8);
+      sx[0] ^= sdc[0];
+      sx[1] ^= sdc[1];
+      buf_put_he64(dst_xor + 0, sx[0]);
+      buf_put_he64(dst_xor + 8, sx[1]);
+      buf_put_he64(srcdst_cpy + 0, sc[0]);
+      buf_put_he64(srcdst_cpy + 8, sc[1]);
+    }
+}
+
+
+/* Optimized function for combined cipher block byte-swapping.  */
+static inline void
+cipher_block_bswap (void *_dst_bswap, const void *_src_bswap,
+                    size_t blocksize)
+{
+  byte *dst_bswap = _dst_bswap;
+  const byte *src_bswap = _src_bswap;
+  u64 t[2];
+
+  if (blocksize == 8)
+    {
+      buf_put_le64(dst_bswap, buf_get_be64(src_bswap));
+    }
+  else
+    {
+      t[0] = buf_get_be64(src_bswap + 0);
+      t[1] = buf_get_be64(src_bswap + 8);
+      buf_put_le64(dst_bswap + 8, t[0]);
+      buf_put_le64(dst_bswap + 0, t[1]);
+    }
+}
+
+
+/* Optimized function for combined cipher block xoring and copying.
+   Used by mainly CFB mode decryption.  */
+static inline void
+cipher_block_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src,
+                        size_t blocksize)
+{
+  cipher_block_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, blocksize);
+}
+
+
+#endif /*G10_CIPHER_INTERNAL_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-ocb.c 
b/grub-core/lib/libgcrypt/cipher/cipher-ocb.c
new file mode 100644
index 000000000..bfafa4c86
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-ocb.c
@@ -0,0 +1,762 @@
+/* cipher-ocb.c -  OCB cipher mode
+ * Copyright (C) 2015, 2016 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * OCB is covered by several patents but may be used freely by most
+ * software.  See http://web.cs.ucdavis.edu/~rogaway/ocb/license.htm .
+ * In particular license 1 is suitable for Libgcrypt: See
+ * http://web.cs.ucdavis.edu/~rogaway/ocb/license1.pdf for the full
+ * license document; it basically says:
+ *
+ *   License 1 — License for Open-Source Software Implementations of OCB
+ *               (Jan 9, 2013)
+ *
+ *   Under this license, you are authorized to make, use, and
+ *   distribute open-source software implementations of OCB. This
+ *   license terminates for you if you sue someone over their
+ *   open-source software implementation of OCB claiming that you have
+ *   a patent covering their implementation.
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+/* Double the OCB_BLOCK_LEN sized block B in-place.  */
+static inline void
+double_block (u64 b[2])
+{
+  u64 l_0, l, r;
+
+  l = b[1];
+  r = b[0];
+
+  l_0 = -(l >> 63);
+  l = (l + l) ^ (r >> 63);
+  r = (r + r) ^ (l_0 & 135);
+
+  b[1] = l;
+  b[0] = r;
+}
+
+
+/* Copy OCB_BLOCK_LEN from buffer S starting at bit offset BITOFF to
+ * buffer D.  */
+static void
+bit_copy (unsigned char d[16], const unsigned char s[24], unsigned int bitoff)
+{
+  u64 s0l, s1l, s1r, s2r;
+  unsigned int shift;
+  unsigned int byteoff;
+
+  byteoff = bitoff / 8;
+  shift = bitoff % 8;
+
+  s0l = buf_get_be64 (s + byteoff + 0);
+  s1l = buf_get_be64 (s + byteoff + 8);
+  s1r = shift ? s1l : 0;
+  s2r = shift ? buf_get_be64 (s + 16) << (8 * byteoff) : 0;
+
+  buf_put_be64 (d + 0, (s0l << shift) | (s1r >> ((64 - shift) & 63)));
+  buf_put_be64 (d + 8, (s1l << shift) | (s2r >> ((64 - shift) & 63)));
+}
+
+
+/* Get L_big value for block N, where N is multiple of 65536. */
+static void
+ocb_get_L_big (gcry_cipher_hd_t c, u64 n, unsigned char *l_buf)
+{
+  int ntz = _gcry_ctz64 (n);
+  u64 L[2];
+
+  gcry_assert(ntz >= OCB_L_TABLE_SIZE);
+
+  L[1] = buf_get_be64 (c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]);
+  L[0] = buf_get_be64 (c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1] + 8);
+
+  for (ntz -= OCB_L_TABLE_SIZE - 1; ntz; ntz--)
+    double_block (L);
+
+  buf_put_be64 (l_buf + 0, L[1]);
+  buf_put_be64 (l_buf + 8, L[0]);
+}
+
+
+/* Called after key has been set. Sets up L table. */
+void
+_gcry_cipher_ocb_setkey (gcry_cipher_hd_t c)
+{
+  unsigned char ktop[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
+  u64 L[2];
+  int i;
+
+  /* L_star = E(zero_128) */
+  memset (ktop, 0, OCB_BLOCK_LEN);
+  nburn = c->spec->encrypt (&c->context.c, c->u_mode.ocb.L_star, ktop);
+  burn = nburn > burn ? nburn : burn;
+  /* L_dollar = double(L_star)  */
+  L[1] = buf_get_be64 (c->u_mode.ocb.L_star);
+  L[0] = buf_get_be64 (c->u_mode.ocb.L_star + 8);
+  double_block (L);
+  buf_put_be64 (c->u_mode.ocb.L_dollar + 0, L[1]);
+  buf_put_be64 (c->u_mode.ocb.L_dollar + 8, L[0]);
+  /* L_0 = double(L_dollar), ...  */
+  double_block (L);
+  buf_put_be64 (c->u_mode.ocb.L[0] + 0, L[1]);
+  buf_put_be64 (c->u_mode.ocb.L[0] + 8, L[0]);
+  for (i = 1; i < OCB_L_TABLE_SIZE; i++)
+    {
+      double_block (L);
+      buf_put_be64 (c->u_mode.ocb.L[i] + 0, L[1]);
+      buf_put_be64 (c->u_mode.ocb.L[i] + 8, L[0]);
+    }
+  /* Precalculated offset L0+L1 */
+  cipher_block_xor (c->u_mode.ocb.L0L1,
+                   c->u_mode.ocb.L[0], c->u_mode.ocb.L[1], OCB_BLOCK_LEN);
+
+  /* Cleanup */
+  wipememory (ktop, sizeof ktop);
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
+}
+
+
+/* Set the nonce for OCB.  This requires that the key has been set.
+   Using it again resets start a new encryption cycle using the same
+   key.  */
+gcry_err_code_t
+_gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce,
+                            size_t noncelen)
+{
+  unsigned char ktop[OCB_BLOCK_LEN];
+  unsigned char stretch[OCB_BLOCK_LEN + 8];
+  unsigned int bottom;
+  unsigned int burn = 0;
+  unsigned int nburn;
+
+  /* Check args.  */
+  if (!c->marks.key)
+    return GPG_ERR_INV_STATE;  /* Key must have been set first.  */
+  switch (c->u_mode.ocb.taglen)
+    {
+    case 8:
+    case 12:
+    case 16:
+      break;
+    default:
+      return GPG_ERR_BUG; /* Invalid tag length. */
+    }
+
+  if (c->spec->blocksize != OCB_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (!nonce)
+    return GPG_ERR_INV_ARG;
+  /* 120 bit is the allowed maximum.  In addition we impose a minimum
+     of 64 bit.  */
+  if (noncelen > (120/8) || noncelen < (64/8) || noncelen >= OCB_BLOCK_LEN)
+    return GPG_ERR_INV_LENGTH;
+
+  /* Prepare the nonce.  */
+  memset (ktop, 0, OCB_BLOCK_LEN);
+  buf_cpy (ktop + (OCB_BLOCK_LEN - noncelen), nonce, noncelen);
+  ktop[0] = ((c->u_mode.ocb.taglen * 8) % 128) << 1;
+  ktop[OCB_BLOCK_LEN - noncelen - 1] |= 1;
+  bottom = ktop[OCB_BLOCK_LEN - 1] & 0x3f;
+  ktop[OCB_BLOCK_LEN - 1] &= 0xc0; /* Zero the bottom bits.  */
+  nburn = c->spec->encrypt (&c->context.c, ktop, ktop);
+  burn = nburn > burn ? nburn : burn;
+  /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */
+  cipher_block_cpy (stretch, ktop, OCB_BLOCK_LEN);
+  cipher_block_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8);
+  /* Offset_0 = Stretch[1+bottom..128+bottom]
+     (We use the IV field to store the offset) */
+  bit_copy (c->u_iv.iv, stretch, bottom);
+  c->marks.iv = 1;
+
+  /* Checksum_0 = zeros(128)
+     (We use the CTR field to store the checksum) */
+  memset (c->u_ctr.ctr, 0, OCB_BLOCK_LEN);
+
+  /* Clear AAD buffer.  */
+  memset (c->u_mode.ocb.aad_offset, 0, OCB_BLOCK_LEN);
+  memset (c->u_mode.ocb.aad_sum, 0, OCB_BLOCK_LEN);
+
+  /* Setup other values.  */
+  memset (c->lastiv, 0, sizeof(c->lastiv));
+  c->unused = 0;
+  c->marks.tag = 0;
+  c->marks.finalize = 0;
+  c->u_mode.ocb.data_nblocks = 0;
+  c->u_mode.ocb.aad_nblocks = 0;
+  c->u_mode.ocb.aad_nleftover = 0;
+  c->u_mode.ocb.data_finalized = 0;
+  c->u_mode.ocb.aad_finalized = 0;
+
+  /* log_printhex ("L_*       ", c->u_mode.ocb.L_star, OCB_BLOCK_LEN); */
+  /* log_printhex ("L_$       ", c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN); */
+  /* log_printhex ("L_0       ", c->u_mode.ocb.L[0], OCB_BLOCK_LEN); */
+  /* log_printhex ("L_1       ", c->u_mode.ocb.L[1], OCB_BLOCK_LEN); */
+  /* log_debug (   "bottom    : %u (decimal)\n", bottom); */
+  /* log_printhex ("Ktop      ", ktop, OCB_BLOCK_LEN); */
+  /* log_printhex ("Stretch   ", stretch, sizeof stretch); */
+  /* log_printhex ("Offset_0  ", c->u_iv.iv, OCB_BLOCK_LEN); */
+
+  /* Cleanup */
+  wipememory (ktop, sizeof ktop);
+  wipememory (stretch, sizeof stretch);
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
+
+  return 0;
+}
+
+
+/* Process additional authentication data.  This implementation allows
+   to add additional authentication data at any time before the final
+   gcry_cipher_gettag.  */
+gcry_err_code_t
+_gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf,
+                               size_t abuflen)
+{
+  const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+  const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
+  unsigned char l_tmp[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
+  size_t n;
+
+  /* Check that a nonce and thus a key has been set and that we have
+     not yet computed the tag.  We also return an error if the aad has
+     been finalized (i.e. a short block has been processed).  */
+  if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized)
+    return GPG_ERR_INV_STATE;
+
+  /* Check correct usage and arguments.  */
+  if (c->spec->blocksize != OCB_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+
+  /* Process remaining data from the last call first.  */
+  if (c->u_mode.ocb.aad_nleftover)
+    {
+      n = abuflen;
+      if (n > OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover)
+       n = OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover;
+
+      buf_cpy (&c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover],
+              abuf, n);
+      c->u_mode.ocb.aad_nleftover += n;
+      abuf += n;
+      abuflen -= n;
+
+      if (c->u_mode.ocb.aad_nleftover == OCB_BLOCK_LEN)
+        {
+          c->u_mode.ocb.aad_nblocks++;
+
+          if ((c->u_mode.ocb.aad_nblocks % table_maxblks) == 0)
+            {
+              /* Table overflow, L needs to be generated. */
+              ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks + 1, l_tmp);
+            }
+          else
+            {
+              cipher_block_cpy (l_tmp, ocb_get_l (c, 
c->u_mode.ocb.aad_nblocks),
+                                OCB_BLOCK_LEN);
+            }
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset,
+                            c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+          c->u_mode.ocb.aad_nleftover = 0;
+        }
+    }
+
+  if (!abuflen)
+    {
+      if (burn > 0)
+        _gcry_burn_stack (burn + 4*sizeof(void*));
+
+      return 0;
+    }
+
+  /* Full blocks handling. */
+  while (abuflen >= OCB_BLOCK_LEN)
+    {
+      size_t nblks = abuflen / OCB_BLOCK_LEN;
+      size_t nmaxblks;
+
+      /* Check how many blocks to process till table overflow. */
+      nmaxblks = (c->u_mode.ocb.aad_nblocks + 1) % table_maxblks;
+      nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
+
+      if (nmaxblks == 0)
+        {
+          /* Table overflow, generate L and process one block. */
+          c->u_mode.ocb.aad_nblocks++;
+          ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+                            OCB_BLOCK_LEN);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+          abuf += OCB_BLOCK_LEN;
+          abuflen -= OCB_BLOCK_LEN;
+          nblks--;
+
+          /* With overflow handled, retry loop again. Next overflow will
+           * happen after 65535 blocks. */
+          continue;
+        }
+
+      nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+      /* Use a bulk method if available.  */
+      if (nblks && c->bulk.ocb_auth)
+        {
+          size_t nleft;
+          size_t ndone;
+
+          nleft = c->bulk.ocb_auth (c, abuf, nblks);
+          ndone = nblks - nleft;
+
+          abuf += ndone * OCB_BLOCK_LEN;
+          abuflen -= ndone * OCB_BLOCK_LEN;
+          nblks = nleft;
+        }
+
+      /* Hash all full blocks.  */
+      while (nblks)
+        {
+          c->u_mode.ocb.aad_nblocks++;
+
+          gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+                              ocb_get_l (c, c->u_mode.ocb.aad_nblocks),
+                              OCB_BLOCK_LEN);
+          /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+          cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf,
+                            OCB_BLOCK_LEN);
+          nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+          burn = nburn > burn ? nburn : burn;
+          cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+          abuf += OCB_BLOCK_LEN;
+          abuflen -= OCB_BLOCK_LEN;
+          nblks--;
+        }
+    }
+
+  /* Store away the remaining data.  */
+  if (abuflen)
+    {
+      n = abuflen;
+      if (n > OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover)
+       n = OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover;
+
+      buf_cpy (&c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover],
+              abuf, n);
+      c->u_mode.ocb.aad_nleftover += n;
+      abuf += n;
+      abuflen -= n;
+    }
+
+  gcry_assert (!abuflen);
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
+
+  return 0;
+}
+
+
+/* Hash final partial AAD block.  */
+static void
+ocb_aad_finalize (gcry_cipher_hd_t c)
+{
+  unsigned char l_tmp[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
+
+  /* Check that a nonce and thus a key has been set and that we have
+     not yet computed the tag.  We also skip this if the aad has been
+     finalized.  */
+  if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized)
+    return;
+  if (c->spec->blocksize != OCB_BLOCK_LEN)
+    return;  /* Ooops.  */
+
+  /* Hash final partial block if any.  */
+  if (c->u_mode.ocb.aad_nleftover)
+    {
+      /* Offset_* = Offset_m xor L_*  */
+      cipher_block_xor_1 (c->u_mode.ocb.aad_offset,
+                          c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+      /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_*  */
+      buf_cpy (l_tmp, c->u_mode.ocb.aad_leftover, c->u_mode.ocb.aad_nleftover);
+      memset (l_tmp + c->u_mode.ocb.aad_nleftover, 0,
+              OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover);
+      l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80;
+      cipher_block_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN);
+      /* Sum = Sum_m xor ENCIPHER(K, CipherInput)  */
+      nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp);
+      burn = nburn > burn ? nburn : burn;
+      cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN);
+
+      c->u_mode.ocb.aad_nleftover = 0;
+    }
+
+  /* Mark AAD as finalized so that gcry_cipher_ocb_authenticate can
+   * return an erro when called again.  */
+  c->u_mode.ocb.aad_finalized = 1;
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
+}
+
+
+
+/* Checksumming for encrypt and decrypt.  */
+static void
+ocb_checksum (unsigned char *chksum, const unsigned char *plainbuf,
+              size_t nblks)
+{
+  while (nblks > 0)
+    {
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      cipher_block_xor_1(chksum, plainbuf, OCB_BLOCK_LEN);
+
+      plainbuf += OCB_BLOCK_LEN;
+      nblks--;
+    }
+}
+
+
+/* Common code for encrypt and decrypt.  */
+static gcry_err_code_t
+ocb_crypt (gcry_cipher_hd_t c, int encrypt,
+           unsigned char *outbuf, size_t outbuflen,
+           const unsigned char *inbuf, size_t inbuflen)
+{
+  const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+  const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1);
+  unsigned char l_tmp[OCB_BLOCK_LEN];
+  unsigned int burn = 0;
+  unsigned int nburn;
+  gcry_cipher_encrypt_t crypt_fn =
+      encrypt ? c->spec->encrypt : c->spec->decrypt;
+
+  /* Check that a nonce and thus a key has been set and that we are
+     not yet in end of data state. */
+  if (!c->marks.iv || c->u_mode.ocb.data_finalized)
+    return GPG_ERR_INV_STATE;
+
+  /* Check correct usage and arguments.  */
+  if (c->spec->blocksize != OCB_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->marks.finalize)
+    ; /* Allow arbitarty length. */
+  else if ((inbuflen % OCB_BLOCK_LEN))
+    return GPG_ERR_INV_LENGTH;  /* We support only full blocks for now.  */
+
+  /* Full blocks handling. */
+  while (inbuflen >= OCB_BLOCK_LEN)
+    {
+      size_t nblks = inbuflen / OCB_BLOCK_LEN;
+      size_t nmaxblks;
+
+      /* Check how many blocks to process till table overflow. */
+      nmaxblks = (c->u_mode.ocb.data_nblocks + 1) % table_maxblks;
+      nmaxblks = (table_maxblks - nmaxblks) % table_maxblks;
+
+      if (nmaxblks == 0)
+        {
+          /* Table overflow, generate L and process one block. */
+          c->u_mode.ocb.data_nblocks++;
+          ocb_get_L_big(c, c->u_mode.ocb.data_nblocks, l_tmp);
+
+          if (encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, inbuf, 1);
+            }
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          cipher_block_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN);
+          /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+          cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+          nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+          burn = nburn > burn ? nburn : burn;
+          cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+          if (!encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, outbuf, 1);
+            }
+
+          inbuf += OCB_BLOCK_LEN;
+          inbuflen -= OCB_BLOCK_LEN;
+          outbuf += OCB_BLOCK_LEN;
+          outbuflen =- OCB_BLOCK_LEN;
+          nblks--;
+
+          /* With overflow handled, retry loop again. Next overflow will
+           * happen after 65535 blocks. */
+          continue;
+        }
+
+      nblks = nblks < nmaxblks ? nblks : nmaxblks;
+
+      /* Since checksum xoring is done before/after encryption/decryption,
+       process input in 24KiB chunks to keep data loaded in L1 cache for
+       checksumming. */
+      if (nblks > 24 * 1024 / OCB_BLOCK_LEN)
+       nblks = 24 * 1024 / OCB_BLOCK_LEN;
+
+      /* Use a bulk method if available.  */
+      if (nblks && c->bulk.ocb_crypt)
+        {
+          size_t nleft;
+          size_t ndone;
+
+          nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt);
+          ndone = nblks - nleft;
+
+          inbuf += ndone * OCB_BLOCK_LEN;
+          outbuf += ndone * OCB_BLOCK_LEN;
+          inbuflen -= ndone * OCB_BLOCK_LEN;
+          outbuflen -= ndone * OCB_BLOCK_LEN;
+          nblks = nleft;
+        }
+
+      if (nblks)
+        {
+          size_t nblks_chksum = nblks;
+
+          if (encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr, inbuf, nblks_chksum);
+            }
+
+          /* Encrypt all full blocks.  */
+          while (nblks)
+            {
+              c->u_mode.ocb.data_nblocks++;
+
+              gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask);
+
+              /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+              cipher_block_xor_1 (c->u_iv.iv,
+                                  ocb_get_l (c, c->u_mode.ocb.data_nblocks),
+                                  OCB_BLOCK_LEN);
+              /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+              cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN);
+              nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+              burn = nburn > burn ? nburn : burn;
+              cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN);
+
+              inbuf += OCB_BLOCK_LEN;
+              inbuflen -= OCB_BLOCK_LEN;
+              outbuf += OCB_BLOCK_LEN;
+              outbuflen =- OCB_BLOCK_LEN;
+              nblks--;
+            }
+
+          if (!encrypt)
+            {
+              /* Checksum_i = Checksum_{i-1} xor P_i  */
+              ocb_checksum (c->u_ctr.ctr,
+                            outbuf - nblks_chksum * OCB_BLOCK_LEN,
+                            nblks_chksum);
+            }
+        }
+    }
+
+  /* Encrypt final partial block.  Note that we expect INBUFLEN to be
+     shorter than OCB_BLOCK_LEN (see above).  */
+  if (inbuflen)
+    {
+      unsigned char pad[OCB_BLOCK_LEN];
+
+      /* Offset_* = Offset_m xor L_*  */
+      cipher_block_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN);
+      /* Pad = ENCIPHER(K, Offset_*) */
+      nburn = c->spec->encrypt (&c->context.c, pad, c->u_iv.iv);
+      burn = nburn > burn ? nburn : burn;
+
+      if (encrypt)
+        {
+          /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) 
*/
+          /* Note that INBUFLEN is less than OCB_BLOCK_LEN.  */
+          buf_cpy (l_tmp, inbuf, inbuflen);
+          memset (l_tmp + inbuflen, 0, OCB_BLOCK_LEN - inbuflen);
+          l_tmp[inbuflen] = 0x80;
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+          /* C_* = P_* xor Pad[1..bitlen(P_*)] */
+          buf_xor (outbuf, inbuf, pad, inbuflen);
+        }
+      else
+        {
+          /* P_* = C_* xor Pad[1..bitlen(C_*)] */
+          /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) 
*/
+          cipher_block_cpy (l_tmp, pad, OCB_BLOCK_LEN);
+          buf_cpy (l_tmp, inbuf, inbuflen);
+          cipher_block_xor_1 (l_tmp, pad, OCB_BLOCK_LEN);
+          l_tmp[inbuflen] = 0x80;
+          buf_cpy (outbuf, l_tmp, inbuflen);
+
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN);
+        }
+    }
+
+  /* Compute the tag if the finalize flag has been set.  */
+  if (c->marks.finalize)
+    {
+      /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */
+      cipher_block_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv,
+                        OCB_BLOCK_LEN);
+      cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar,
+                          OCB_BLOCK_LEN);
+      nburn = c->spec->encrypt (&c->context.c,
+                                c->u_mode.ocb.tag, c->u_mode.ocb.tag);
+      burn = nburn > burn ? nburn : burn;
+
+      c->u_mode.ocb.data_finalized = 1;
+      /* Note that the the final part of the tag computation is done
+         by _gcry_cipher_ocb_get_tag.  */
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4*sizeof(void*));
+
+  return 0;
+}
+
+
+/* Encrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF.  OUTBUFLEN gives
+   the allocated size of OUTBUF.  This function accepts only multiples
+   of a full block unless gcry_cipher_final has been called in which
+   case the next block may have any length.  */
+gcry_err_code_t
+_gcry_cipher_ocb_encrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+
+{
+  return ocb_crypt (c, 1, outbuf, outbuflen, inbuf, inbuflen);
+}
+
+
+/* Decrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF.  OUTBUFLEN gives
+   the allocated size of OUTBUF.  This function accepts only multiples
+   of a full block unless gcry_cipher_final has been called in which
+   case the next block may have any length.  */
+gcry_err_code_t
+_gcry_cipher_ocb_decrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  return ocb_crypt (c, 0, outbuf, outbuflen, inbuf, inbuflen);
+}
+
+
+/* Compute the tag.  The last data operation has already done some
+   part of it.  To allow adding AAD even after having done all data,
+   we finish the tag computation only here.  */
+static void
+compute_tag_if_needed (gcry_cipher_hd_t c)
+{
+  if (!c->marks.tag)
+    {
+      ocb_aad_finalize (c);
+      cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum,
+                          OCB_BLOCK_LEN);
+      c->marks.tag = 1;
+    }
+}
+
+
+/* Copy the already computed tag to OUTTAG.  OUTTAGSIZE is the
+   allocated size of OUTTAG; the function returns an error if that is
+   too short to hold the tag.  */
+gcry_err_code_t
+_gcry_cipher_ocb_get_tag (gcry_cipher_hd_t c,
+                          unsigned char *outtag, size_t outtagsize)
+{
+  if (c->u_mode.ocb.taglen > outtagsize)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (!c->u_mode.ocb.data_finalized)
+    return GPG_ERR_INV_STATE; /* Data has not yet been finalized.  */
+
+  compute_tag_if_needed (c);
+
+  memcpy (outtag, c->u_mode.ocb.tag, c->u_mode.ocb.taglen);
+
+  return 0;
+}
+
+
+/* Check that the tag (INTAG,TAGLEN) matches the computed tag for the
+   handle C.  */
+gcry_err_code_t
+_gcry_cipher_ocb_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+                           size_t taglen)
+{
+  size_t n;
+
+  if (!c->u_mode.ocb.data_finalized)
+    return GPG_ERR_INV_STATE; /* Data has not yet been finalized.  */
+
+  compute_tag_if_needed (c);
+
+  n = c->u_mode.ocb.taglen;
+  if (taglen < n)
+    n = taglen;
+
+  if (!buf_eq_const (intag, c->u_mode.ocb.tag, n)
+      || c->u_mode.ocb.taglen != taglen)
+    return GPG_ERR_CHECKSUM;
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-ofb.c 
b/grub-core/lib/libgcrypt/cipher/cipher-ofb.c
new file mode 100644
index 000000000..09db397e6
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-ofb.c
@@ -0,0 +1,108 @@
+/* cipher-ofb.c  - Generic OFB mode implementation
+ * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
+ *               2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+gcry_err_code_t
+_gcry_cipher_ofb_encrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  unsigned char *ivp;
+  gcry_cipher_encrypt_t enc_fn = c->spec->encrypt;
+  size_t blocksize_shift = _gcry_blocksize_shift(c);
+  size_t blocksize = 1 << blocksize_shift;
+  unsigned int burn, nburn;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  if ( inbuflen <= c->unused )
+    {
+      /* Short enough to be encoded by the remaining XOR mask. */
+      /* XOR the input with the IV */
+      ivp = c->u_iv.iv + blocksize - c->unused;
+      buf_xor(outbuf, ivp, inbuf, inbuflen);
+      c->unused -= inbuflen;
+      return 0;
+    }
+
+  burn = 0;
+
+  if( c->unused )
+    {
+      inbuflen -= c->unused;
+      ivp = c->u_iv.iv + blocksize - c->unused;
+      buf_xor(outbuf, ivp, inbuf, c->unused);
+      outbuf += c->unused;
+      inbuf += c->unused;
+      c->unused = 0;
+    }
+
+  /* Now we can process complete blocks. */
+  if (c->bulk.ofb_enc)
+    {
+      size_t nblocks = inbuflen >> blocksize_shift;
+      c->bulk.ofb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
+      outbuf += nblocks << blocksize_shift;
+      inbuf  += nblocks << blocksize_shift;
+      inbuflen -= nblocks << blocksize_shift;
+    }
+  else
+    {
+      while ( inbuflen >= blocksize )
+       {
+         /* Encrypt the IV (and save the current one). */
+         nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+         burn = nburn > burn ? nburn : burn;
+         cipher_block_xor(outbuf, c->u_iv.iv, inbuf, blocksize);
+         outbuf += blocksize;
+         inbuf += blocksize;
+         inbuflen -= blocksize;
+       }
+    }
+
+  if ( inbuflen )
+    { /* process the remaining bytes */
+      nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
+      burn = nburn > burn ? nburn : burn;
+      c->unused = blocksize;
+      c->unused -= inbuflen;
+      buf_xor(outbuf, c->u_iv.iv, inbuf, inbuflen);
+      outbuf += inbuflen;
+      inbuf += inbuflen;
+      inbuflen = 0;
+    }
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-poly1305.c 
b/grub-core/lib/libgcrypt/cipher/cipher-poly1305.c
new file mode 100644
index 000000000..9abd8b0ce
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-poly1305.c
@@ -0,0 +1,379 @@
+/* cipher-poly1305.c  -  Poly1305 based AEAD cipher mode, RFC-8439
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+#include "./poly1305-internal.h"
+
+
+static inline int
+poly1305_bytecounter_add (u32 ctr[2], size_t add)
+{
+  int overflow = 0;
+
+  if (sizeof(add) > sizeof(u32))
+    {
+      u32 high_add = ((add >> 31) >> 1) & 0xffffffff;
+      ctr[1] += high_add;
+      if (ctr[1] < high_add)
+        overflow = 1;
+    }
+
+  ctr[0] += add;
+  if (ctr[0] >= add)
+    return overflow;
+
+  ctr[1] += 1;
+  return (ctr[1] < 1) || overflow;
+}
+
+
+static void
+poly1305_fill_bytecounts (gcry_cipher_hd_t c)
+{
+  u32 lenbuf[4];
+
+  lenbuf[0] = le_bswap32(c->u_mode.poly1305.aadcount[0]);
+  lenbuf[1] = le_bswap32(c->u_mode.poly1305.aadcount[1]);
+  lenbuf[2] = le_bswap32(c->u_mode.poly1305.datacount[0]);
+  lenbuf[3] = le_bswap32(c->u_mode.poly1305.datacount[1]);
+  _gcry_poly1305_update (&c->u_mode.poly1305.ctx, (byte*)lenbuf,
+                        sizeof(lenbuf));
+
+  wipememory(lenbuf, sizeof(lenbuf));
+}
+
+
+static void
+poly1305_do_padding (gcry_cipher_hd_t c, u32 ctr[2])
+{
+  static const byte zero_padding_buf[15] = {};
+  u32 padding_count;
+
+  /* Padding to 16 byte boundary. */
+  if (ctr[0] % 16 > 0)
+    {
+      padding_count = 16 - ctr[0] % 16;
+
+      _gcry_poly1305_update (&c->u_mode.poly1305.ctx, zero_padding_buf,
+                            padding_count);
+    }
+}
+
+
+static void
+poly1305_aad_finish (gcry_cipher_hd_t c)
+{
+  /* After AAD, feed padding bytes so we get 16 byte alignment. */
+  poly1305_do_padding (c, c->u_mode.poly1305.aadcount);
+
+  /* Start of encryption marks end of AAD stream. */
+  c->u_mode.poly1305.aad_finalized = 1;
+
+  c->u_mode.poly1305.datacount[0] = 0;
+  c->u_mode.poly1305.datacount[1] = 0;
+}
+
+
+static gcry_err_code_t
+poly1305_set_zeroiv (gcry_cipher_hd_t c)
+{
+  byte zero[8] = { 0, };
+
+  return _gcry_cipher_poly1305_setiv (c, zero, sizeof(zero));
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_authenticate (gcry_cipher_hd_t c,
+                                   const byte * aadbuf, size_t aadbuflen)
+{
+  if (c->u_mode.poly1305.bytecount_over_limits)
+    return GPG_ERR_INV_LENGTH;
+  if (c->u_mode.poly1305.aad_finalized)
+    return GPG_ERR_INV_STATE;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.iv)
+    poly1305_set_zeroiv(c);
+
+  if (poly1305_bytecounter_add(c->u_mode.poly1305.aadcount, aadbuflen))
+    {
+      c->u_mode.poly1305.bytecount_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+  _gcry_poly1305_update (&c->u_mode.poly1305.ctx, aadbuf, aadbuflen);
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_encrypt (gcry_cipher_hd_t c,
+                              byte *outbuf, size_t outbuflen,
+                              const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+  if (c->u_mode.poly1305.bytecount_over_limits)
+    return GPG_ERR_INV_LENGTH;
+
+  if (!c->marks.iv)
+    {
+      err = poly1305_set_zeroiv(c);
+      if (err)
+        return err;
+    }
+
+  if (!c->u_mode.poly1305.aad_finalized)
+    poly1305_aad_finish(c);
+
+  if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen))
+    {
+      c->u_mode.poly1305.bytecount_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+#ifdef USE_CHACHA20
+  if (LIKELY(inbuflen > 0) && LIKELY(c->spec->algo == GCRY_CIPHER_CHACHA20))
+    {
+      return _gcry_chacha20_poly1305_encrypt (c, outbuf, inbuf, inbuflen);
+    }
+#endif
+
+  while (inbuflen)
+    {
+      size_t currlen = inbuflen;
+
+      /* Since checksumming is done after encryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for checksumming. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      c->spec->stencrypt(&c->context.c, outbuf, (byte*)inbuf, currlen);
+
+      _gcry_poly1305_update (&c->u_mode.poly1305.ctx, outbuf, currlen);
+
+      outbuf += currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      inbuflen -= currlen;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_decrypt (gcry_cipher_hd_t c,
+                              byte *outbuf, size_t outbuflen,
+                              const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+  if (c->u_mode.poly1305.bytecount_over_limits)
+    return GPG_ERR_INV_LENGTH;
+
+  if (!c->marks.iv)
+    {
+      err = poly1305_set_zeroiv(c);
+      if (err)
+        return err;
+    }
+
+  if (!c->u_mode.poly1305.aad_finalized)
+    poly1305_aad_finish(c);
+
+  if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen))
+    {
+      c->u_mode.poly1305.bytecount_over_limits = 1;
+      return GPG_ERR_INV_LENGTH;
+    }
+
+#ifdef USE_CHACHA20
+  if (LIKELY(inbuflen > 0) && LIKELY(c->spec->algo == GCRY_CIPHER_CHACHA20))
+    {
+      return _gcry_chacha20_poly1305_decrypt (c, outbuf, inbuf, inbuflen);
+    }
+#endif
+
+  while (inbuflen)
+    {
+      size_t currlen = inbuflen;
+
+      /* Since checksumming is done before decryption, process input in 24KiB
+       * chunks to keep data loaded in L1 cache for decryption. */
+      if (currlen > 24 * 1024)
+       currlen = 24 * 1024;
+
+      _gcry_poly1305_update (&c->u_mode.poly1305.ctx, inbuf, currlen);
+
+      c->spec->stdecrypt(&c->context.c, outbuf, (byte*)inbuf, currlen);
+
+      outbuf += currlen;
+      inbuf += currlen;
+      outbuflen -= currlen;
+      inbuflen -= currlen;
+    }
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+_gcry_cipher_poly1305_tag (gcry_cipher_hd_t c,
+                          byte * outbuf, size_t outbuflen, int check)
+{
+  gcry_err_code_t err;
+
+  if (outbuflen < POLY1305_TAGLEN)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->u_mode.poly1305.bytecount_over_limits)
+    return GPG_ERR_INV_LENGTH;
+
+  if (!c->marks.iv)
+    {
+      err = poly1305_set_zeroiv(c);
+      if (err)
+        return err;
+    }
+
+  if (!c->u_mode.poly1305.aad_finalized)
+    poly1305_aad_finish(c);
+
+  if (!c->marks.tag)
+    {
+      /* After data, feed padding bytes so we get 16 byte alignment. */
+      poly1305_do_padding (c, c->u_mode.poly1305.datacount);
+
+      /* Write byte counts to poly1305. */
+      poly1305_fill_bytecounts(c);
+
+      _gcry_poly1305_finish(&c->u_mode.poly1305.ctx, c->u_iv.iv);
+
+      c->marks.tag = 1;
+    }
+
+  if (!check)
+    {
+      memcpy (outbuf, c->u_iv.iv, POLY1305_TAGLEN);
+    }
+  else
+    {
+      /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF
+       * and thus we need to compare its length first.  */
+      if (outbuflen != POLY1305_TAGLEN
+          || !buf_eq_const (outbuf, c->u_iv.iv, POLY1305_TAGLEN))
+        return GPG_ERR_CHECKSUM;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_get_tag (gcry_cipher_hd_t c, unsigned char *outtag,
+                          size_t taglen)
+{
+  return _gcry_cipher_poly1305_tag (c, outtag, taglen, 0);
+}
+
+gcry_err_code_t
+_gcry_cipher_poly1305_check_tag (gcry_cipher_hd_t c, const unsigned char 
*intag,
+                            size_t taglen)
+{
+  return _gcry_cipher_poly1305_tag (c, (unsigned char *) intag, taglen, 1);
+}
+
+
+void
+_gcry_cipher_poly1305_setkey (gcry_cipher_hd_t c)
+{
+  c->u_mode.poly1305.aadcount[0] = 0;
+  c->u_mode.poly1305.aadcount[1] = 0;
+
+  c->u_mode.poly1305.datacount[0] = 0;
+  c->u_mode.poly1305.datacount[1] = 0;
+
+  c->u_mode.poly1305.bytecount_over_limits = 0;
+  c->u_mode.poly1305.aad_finalized = 0;
+  c->marks.tag = 0;
+  c->marks.iv = 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_poly1305_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
+{
+  byte tmpbuf[64]; /* size of ChaCha20 block */
+  gcry_err_code_t err;
+
+  /* IV must be 96-bits */
+  if (!iv && ivlen != (96 / 8))
+    return GPG_ERR_INV_ARG;
+
+  memset(&c->u_mode.poly1305.ctx, 0, sizeof(c->u_mode.poly1305.ctx));
+
+  c->u_mode.poly1305.aadcount[0] = 0;
+  c->u_mode.poly1305.aadcount[1] = 0;
+
+  c->u_mode.poly1305.datacount[0] = 0;
+  c->u_mode.poly1305.datacount[1] = 0;
+
+  c->u_mode.poly1305.bytecount_over_limits = 0;
+  c->u_mode.poly1305.aad_finalized = 0;
+  c->marks.tag = 0;
+  c->marks.iv = 0;
+
+  /* Set up IV for stream cipher. */
+  c->spec->setiv (&c->context.c, iv, ivlen);
+
+  /* Get the first block from ChaCha20. */
+  memset(tmpbuf, 0, sizeof(tmpbuf));
+  c->spec->stencrypt(&c->context.c, tmpbuf, tmpbuf, sizeof(tmpbuf));
+
+  /* Use the first 32-bytes as Poly1305 key. */
+  err = _gcry_poly1305_init (&c->u_mode.poly1305.ctx, tmpbuf, POLY1305_KEYLEN);
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+
+  if (err)
+    return err;
+
+  c->marks.iv = 1;
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-selftest.c 
b/grub-core/lib/libgcrypt/cipher/cipher-selftest.c
new file mode 100644
index 000000000..d7f38a426
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-selftest.c
@@ -0,0 +1,512 @@
+/* cipher-selftest.c - Helper functions for bulk encryption selftests.
+ * Copyright (C) 2013,2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#ifdef HAVE_SYSLOG
+# include <syslog.h>
+#endif /*HAVE_SYSLOG*/
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "cipher-internal.h"
+
+#ifdef HAVE_STDINT_H
+# include <stdint.h> /* uintptr_t */
+#elif defined(HAVE_INTTYPES_H)
+# include <inttypes.h>
+#else
+/* In this case, uintptr_t is provided by config.h. */
+#endif
+
+/* Helper macro to force alignment to 16 bytes.  */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
+#else
+# define ATTR_ALIGNED_16
+#endif
+
+
+/* Return an allocated buffers of size CONTEXT_SIZE with an alignment
+   of 16.  The caller must free that buffer using the address returned
+   at R_MEM.  Returns NULL and sets ERRNO on failure.  */
+void *
+_gcry_cipher_selftest_alloc_ctx (const int context_size, unsigned char **r_mem)
+{
+  int offs;
+  unsigned int ctx_aligned_size, memsize;
+
+  ctx_aligned_size = context_size + 15;
+  ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+  memsize = ctx_aligned_size + 16;
+
+  *r_mem = xtrycalloc (1, memsize);
+  if (!*r_mem)
+    return NULL;
+
+  offs = (16 - ((uintptr_t)*r_mem & 15)) & 15;
+  return (void*)(*r_mem + offs);
+}
+
+
+/* Run the self-tests for <block cipher>-CBC-<block size>, tests bulk CBC
+   decryption.  Returns NULL on success. */
+const char *
+_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t 
setkey_func,
+                          gcry_cipher_encrypt_t encrypt_one,
+                          const int nblocks, const int blocksize,
+                          const int context_size)
+{
+  cipher_bulk_ops_t bulk_ops = { 0, };
+  int i, offs;
+  unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem;
+  unsigned int ctx_aligned_size, memsize;
+
+  static const unsigned char key[16] ATTR_ALIGNED_16 = {
+      0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+      0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22
+    };
+
+  /* Allocate buffers, align first two elements to 16 bytes and latter to
+     block size.  */
+  ctx_aligned_size = context_size + 15;
+  ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+  memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 
16;
+
+  mem = xtrycalloc (1, memsize);
+  if (!mem)
+    return "failed to allocate memory";
+
+  offs = (16 - ((uintptr_t)mem & 15)) & 15;
+  ctx = (void*)(mem + offs);
+  iv = ctx + ctx_aligned_size;
+  iv2 = iv + blocksize;
+  plaintext = iv2 + blocksize;
+  plaintext2 = plaintext + nblocks * blocksize;
+  ciphertext = plaintext2 + nblocks * blocksize;
+
+  /* Initialize ctx */
+  if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR)
+   {
+     xfree(mem);
+     return "setkey failed";
+   }
+
+  /* Test single block code path */
+  memset (iv, 0x4e, blocksize);
+  memset (iv2, 0x4e, blocksize);
+  for (i = 0; i < blocksize; i++)
+    plaintext[i] = i;
+
+  /* CBC manually.  */
+  buf_xor (ciphertext, iv, plaintext, blocksize);
+  encrypt_one (ctx, ciphertext, ciphertext);
+  memcpy (iv, ciphertext, blocksize);
+
+  /* CBC decrypt.  */
+  bulk_ops.cbc_dec (ctx, iv2, plaintext2, ciphertext, 1);
+  if (memcmp (plaintext2, plaintext, blocksize))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CBC-%d test failed (plaintext mismatch)", cipher,
+             blocksize * 8);
+#else
+      (void)cipher; /* Not used.  */
+#endif
+      return "selftest for CBC failed - see syslog for details";
+    }
+
+  if (memcmp (iv2, iv, blocksize))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CBC-%d test failed (IV mismatch)", cipher, blocksize * 8);
+#endif
+      return "selftest for CBC failed - see syslog for details";
+    }
+
+  /* Test parallelized code paths */
+  memset (iv, 0x5f, blocksize);
+  memset (iv2, 0x5f, blocksize);
+
+  for (i = 0; i < nblocks * blocksize; i++)
+    plaintext[i] = i;
+
+  /* Create CBC ciphertext manually.  */
+  for (i = 0; i < nblocks * blocksize; i+=blocksize)
+    {
+      buf_xor (&ciphertext[i], iv, &plaintext[i], blocksize);
+      encrypt_one (ctx, &ciphertext[i], &ciphertext[i]);
+      memcpy (iv, &ciphertext[i], blocksize);
+    }
+
+  /* Decrypt using bulk CBC and compare result.  */
+  bulk_ops.cbc_dec (ctx, iv2, plaintext2, ciphertext, nblocks);
+
+  if (memcmp (plaintext2, plaintext, nblocks * blocksize))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CBC-%d test failed (plaintext mismatch, parallel path)",
+             cipher, blocksize * 8);
+#endif
+      return "selftest for CBC failed - see syslog for details";
+    }
+  if (memcmp (iv2, iv, blocksize))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CBC-%d test failed (IV mismatch, parallel path)",
+             cipher, blocksize * 8);
+#endif
+      return "selftest for CBC failed - see syslog for details";
+    }
+
+  xfree (mem);
+  return NULL;
+}
+
+/* Run the self-tests for <block cipher>-CFB-<block size>, tests bulk CFB
+   decryption.  Returns NULL on success. */
+const char *
+_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t 
setkey_func,
+                          gcry_cipher_encrypt_t encrypt_one,
+                          const int nblocks, const int blocksize,
+                          const int context_size)
+{
+  cipher_bulk_ops_t bulk_ops = { 0, };
+  int i, offs;
+  unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem;
+  unsigned int ctx_aligned_size, memsize;
+
+  static const unsigned char key[16] ATTR_ALIGNED_16 = {
+      0x11,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+      0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x33
+    };
+
+  /* Allocate buffers, align first two elements to 16 bytes and latter to
+     block size.  */
+  ctx_aligned_size = context_size + 15;
+  ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+  memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 
16;
+
+  mem = xtrycalloc (1, memsize);
+  if (!mem)
+    return "failed to allocate memory";
+
+  offs = (16 - ((uintptr_t)mem & 15)) & 15;
+  ctx = (void*)(mem + offs);
+  iv = ctx + ctx_aligned_size;
+  iv2 = iv + blocksize;
+  plaintext = iv2 + blocksize;
+  plaintext2 = plaintext + nblocks * blocksize;
+  ciphertext = plaintext2 + nblocks * blocksize;
+
+  /* Initialize ctx */
+  if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR)
+   {
+     xfree(mem);
+     return "setkey failed";
+   }
+
+  /* Test single block code path */
+  memset(iv, 0xd3, blocksize);
+  memset(iv2, 0xd3, blocksize);
+  for (i = 0; i < blocksize; i++)
+    plaintext[i] = i;
+
+  /* CFB manually.  */
+  encrypt_one (ctx, ciphertext, iv);
+  buf_xor_2dst (iv, ciphertext, plaintext, blocksize);
+
+  /* CFB decrypt.  */
+  bulk_ops.cfb_dec (ctx, iv2, plaintext2, ciphertext, 1);
+  if (memcmp(plaintext2, plaintext, blocksize))
+    {
+      xfree(mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CFB-%d test failed (plaintext mismatch)", cipher,
+             blocksize * 8);
+#else
+      (void)cipher; /* Not used.  */
+#endif
+      return "selftest for CFB failed - see syslog for details";
+    }
+
+  if (memcmp(iv2, iv, blocksize))
+    {
+      xfree(mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CFB-%d test failed (IV mismatch)", cipher, blocksize * 8);
+#endif
+      return "selftest for CFB failed - see syslog for details";
+    }
+
+  /* Test parallelized code paths */
+  memset(iv, 0xe6, blocksize);
+  memset(iv2, 0xe6, blocksize);
+
+  for (i = 0; i < nblocks * blocksize; i++)
+    plaintext[i] = i;
+
+  /* Create CFB ciphertext manually.  */
+  for (i = 0; i < nblocks * blocksize; i+=blocksize)
+    {
+      encrypt_one (ctx, &ciphertext[i], iv);
+      buf_xor_2dst (iv, &ciphertext[i], &plaintext[i], blocksize);
+    }
+
+  /* Decrypt using bulk CBC and compare result.  */
+  bulk_ops.cfb_dec (ctx, iv2, plaintext2, ciphertext, nblocks);
+
+  if (memcmp(plaintext2, plaintext, nblocks * blocksize))
+    {
+      xfree(mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CFB-%d test failed (plaintext mismatch, parallel path)",
+              cipher, blocksize * 8);
+#endif
+      return "selftest for CFB failed - see syslog for details";
+    }
+  if (memcmp(iv2, iv, blocksize))
+    {
+      xfree(mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CFB-%d test failed (IV mismatch, parallel path)", cipher,
+             blocksize * 8);
+#endif
+      return "selftest for CFB failed - see syslog for details";
+    }
+
+  xfree(mem);
+  return NULL;
+}
+
+/* Run the self-tests for <block cipher>-CTR-<block size>, tests IV increment
+   of bulk CTR encryption.  Returns NULL on success. */
+const char *
+_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t 
setkey_func,
+                          gcry_cipher_encrypt_t encrypt_one,
+                          const int nblocks, const int blocksize,
+                          const int context_size)
+{
+  cipher_bulk_ops_t bulk_ops = { 0, };
+  int i, j, offs, diff;
+  unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *ciphertext2,
+                *iv, *iv2, *mem;
+  unsigned int ctx_aligned_size, memsize;
+
+  static const unsigned char key[16] ATTR_ALIGNED_16 = {
+      0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+      0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21
+    };
+
+  /* Allocate buffers, align first two elements to 16 bytes and latter to
+     block size.  */
+  ctx_aligned_size = context_size + 15;
+  ctx_aligned_size -= ctx_aligned_size & 0xf;
+
+  memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 4) + 
16;
+
+  mem = xtrycalloc (1, memsize);
+  if (!mem)
+    return "failed to allocate memory";
+
+  offs = (16 - ((uintptr_t)mem & 15)) & 15;
+  ctx = (void*)(mem + offs);
+  iv = ctx + ctx_aligned_size;
+  iv2 = iv + blocksize;
+  plaintext = iv2 + blocksize;
+  plaintext2 = plaintext + nblocks * blocksize;
+  ciphertext = plaintext2 + nblocks * blocksize;
+  ciphertext2 = ciphertext + nblocks * blocksize;
+
+  /* Initialize ctx */
+  if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR)
+   {
+     xfree(mem);
+     return "setkey failed";
+   }
+
+  /* Test single block code path */
+  memset (iv, 0xff, blocksize);
+  for (i = 0; i < blocksize; i++)
+    plaintext[i] = i;
+
+  /* CTR manually.  */
+  encrypt_one (ctx, ciphertext, iv);
+  for (i = 0; i < blocksize; i++)
+    ciphertext[i] ^= plaintext[i];
+  for (i = blocksize; i > 0; i--)
+    {
+      iv[i-1]++;
+      if (iv[i-1])
+        break;
+    }
+
+  memset (iv2, 0xff, blocksize);
+  bulk_ops.ctr_enc (ctx, iv2, plaintext2, ciphertext, 1);
+
+  if (memcmp (plaintext2, plaintext, blocksize))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CTR-%d test failed (plaintext mismatch)", cipher,
+             blocksize * 8);
+#else
+      (void)cipher; /* Not used.  */
+#endif
+      return "selftest for CTR failed - see syslog for details";
+    }
+
+  if (memcmp (iv2, iv, blocksize))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CTR-%d test failed (IV mismatch)", cipher,
+             blocksize * 8);
+#endif
+      return "selftest for CTR failed - see syslog for details";
+    }
+
+  /* Test bulk encryption with typical IV. */
+  memset(iv, 0x57, blocksize-4);
+  iv[blocksize-1] = 1;
+  iv[blocksize-2] = 0;
+  iv[blocksize-3] = 0;
+  iv[blocksize-4] = 0;
+  memset(iv2, 0x57, blocksize-4);
+  iv2[blocksize-1] = 1;
+  iv2[blocksize-2] = 0;
+  iv2[blocksize-3] = 0;
+  iv2[blocksize-4] = 0;
+
+  for (i = 0; i < blocksize * nblocks; i++)
+    plaintext2[i] = plaintext[i] = i;
+
+  /* Create CTR ciphertext manually.  */
+  for (i = 0; i < blocksize * nblocks; i+=blocksize)
+    {
+      encrypt_one (ctx, &ciphertext[i], iv);
+      for (j = 0; j < blocksize; j++)
+        ciphertext[i+j] ^= plaintext[i+j];
+      for (j = blocksize; j > 0; j--)
+        {
+          iv[j-1]++;
+          if (iv[j-1])
+            break;
+        }
+    }
+
+  bulk_ops.ctr_enc (ctx, iv2, ciphertext2, plaintext2, nblocks);
+
+  if (memcmp (ciphertext2, ciphertext, blocksize * nblocks))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CTR-%d test failed (ciphertext mismatch, bulk)", cipher,
+              blocksize * 8);
+#endif
+      return "selftest for CTR failed - see syslog for details";
+    }
+  if (memcmp(iv2, iv, blocksize))
+    {
+      xfree (mem);
+#ifdef HAVE_SYSLOG
+      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+              "%s-CTR-%d test failed (IV mismatch, bulk)", cipher,
+              blocksize * 8);
+#endif
+      return "selftest for CTR failed - see syslog for details";
+    }
+
+  /* Test parallelized code paths (check counter overflow handling) */
+  for (diff = 0; diff < nblocks; diff++) {
+    memset(iv, 0xff, blocksize);
+    iv[blocksize-1] -= diff;
+    iv[0] = iv[1] = 0;
+    iv[2] = 0x07;
+
+    for (i = 0; i < blocksize * nblocks; i++)
+      plaintext[i] = i;
+
+    /* Create CTR ciphertext manually.  */
+    for (i = 0; i < blocksize * nblocks; i+=blocksize)
+      {
+        encrypt_one (ctx, &ciphertext[i], iv);
+        for (j = 0; j < blocksize; j++)
+          ciphertext[i+j] ^= plaintext[i+j];
+        for (j = blocksize; j > 0; j--)
+          {
+            iv[j-1]++;
+            if (iv[j-1])
+              break;
+          }
+      }
+
+    /* Decrypt using bulk CTR and compare result.  */
+    memset(iv2, 0xff, blocksize);
+    iv2[blocksize-1] -= diff;
+    iv2[0] = iv2[1] = 0;
+    iv2[2] = 0x07;
+
+    bulk_ops.ctr_enc (ctx, iv2, plaintext2, ciphertext, nblocks);
+
+    if (memcmp (plaintext2, plaintext, blocksize * nblocks))
+      {
+        xfree (mem);
+#ifdef HAVE_SYSLOG
+        syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+                "%s-CTR-%d test failed (plaintext mismatch, diff: %d)", cipher,
+               blocksize * 8, diff);
+#endif
+        return "selftest for CTR failed - see syslog for details";
+      }
+    if (memcmp(iv2, iv, blocksize))
+      {
+        xfree (mem);
+#ifdef HAVE_SYSLOG
+        syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
+                "%s-CTR-%d test failed (IV mismatch, diff: %d)", cipher,
+               blocksize * 8, diff);
+#endif
+        return "selftest for CTR failed - see syslog for details";
+      }
+  }
+
+  xfree (mem);
+  return NULL;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-selftest.h 
b/grub-core/lib/libgcrypt/cipher/cipher-selftest.h
new file mode 100644
index 000000000..c3090ad12
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-selftest.h
@@ -0,0 +1,69 @@
+/* cipher-selftest.h - Helper functions for bulk encryption selftests.
+ * Copyright (C) 2013,2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_SELFTEST_HELP_H
+#define G10_SELFTEST_HELP_H
+
+#include <config.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+
+typedef void (*gcry_cipher_bulk_cbc_dec_t)(void *context, unsigned char *iv,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks);
+
+typedef void (*gcry_cipher_bulk_cfb_dec_t)(void *context, unsigned char *iv,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks);
+
+typedef void (*gcry_cipher_bulk_ctr_enc_t)(void *context, unsigned char *iv,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks);
+
+/* Helper function to allocate an aligned context for selftests.  */
+void *_gcry_cipher_selftest_alloc_ctx (const int context_size,
+                                       unsigned char **r_mem);
+
+
+/* Helper function for bulk CBC decryption selftest */
+const char *
+_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey,
+                          gcry_cipher_encrypt_t encrypt_one,
+                          const int nblocks, const int blocksize,
+                          const int context_size);
+
+/* Helper function for bulk CFB decryption selftest */
+const char *
+_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey,
+                          gcry_cipher_encrypt_t encrypt_one,
+                          const int nblocks, const int blocksize,
+                          const int context_size);
+
+/* Helper function for bulk CTR encryption selftest */
+const char *
+_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey,
+                          gcry_cipher_encrypt_t encrypt_one,
+                          const int nblocks, const int blocksize,
+                          const int context_size);
+
+#endif /*G10_SELFTEST_HELP_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-siv.c 
b/grub-core/lib/libgcrypt/cipher/cipher-siv.c
new file mode 100644
index 000000000..160beb485
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-siv.c
@@ -0,0 +1,375 @@
+/* cipher-siv.c  -  SIV implementation (RFC 5297)
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+static inline void
+s2v_double (unsigned char *d)
+{
+  u64 hi, lo, mask;
+
+  hi = buf_get_be64(d + 0);
+  lo = buf_get_be64(d + 8);
+
+  mask = -(hi >> 63);
+  hi = (hi << 1) ^ (lo >> 63);
+  lo = (lo << 1) ^ (mask & 0x87);
+
+  buf_put_be64(d + 0, hi);
+  buf_put_be64(d + 8, lo);
+}
+
+
+static void
+s2v_pad (unsigned char *out, const byte *in, size_t inlen)
+{
+  static const unsigned char padding[GCRY_SIV_BLOCK_LEN] = { 0x80 };
+
+  gcry_assert(inlen < GCRY_SIV_BLOCK_LEN);
+
+  buf_cpy (out, in, inlen);
+  buf_cpy (out + inlen, padding, GCRY_SIV_BLOCK_LEN - inlen);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_setkey (gcry_cipher_hd_t c,
+                        const unsigned char *ctrkey, size_t ctrkeylen)
+{
+  static const unsigned char zero[GCRY_SIV_BLOCK_LEN] = { 0 };
+  gcry_err_code_t err;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+
+  c->u_mode.siv.aad_count = 0;
+  c->u_mode.siv.dec_tag_set = 0;
+  c->marks.tag = 0;
+  c->marks.iv = 0;
+
+  /* Set CTR mode key. */
+  err = c->spec->setkey (c->u_mode.siv.ctr_context, ctrkey, ctrkeylen,
+                        &c->bulk);
+  if (err != 0)
+    return err;
+
+  /* Initialize S2V. */
+  memset (&c->u_mode.siv.s2v_cmac, 0, sizeof(c->u_mode.siv.s2v_cmac));
+  err = _gcry_cmac_generate_subkeys (c, &c->u_mode.siv.s2v_cmac);
+  if (err != 0)
+    return err;
+
+  err = _gcry_cmac_write (c, &c->u_mode.siv.s2v_cmac, zero, 
GCRY_SIV_BLOCK_LEN);
+  if (err != 0)
+    return err;
+
+  err = _gcry_cmac_final (c, &c->u_mode.siv.s2v_cmac);
+  if (err != 0)
+    return err;
+
+  memcpy (c->u_mode.siv.s2v_zero_block, c->u_mode.siv.s2v_cmac.u_iv.iv,
+         GCRY_SIV_BLOCK_LEN);
+  memcpy (c->u_mode.siv.s2v_d, c->u_mode.siv.s2v_zero_block,
+         GCRY_SIV_BLOCK_LEN);
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_authenticate (gcry_cipher_hd_t c,
+                               const byte *aadbuf, size_t aadbuflen)
+{
+  gcry_err_code_t err;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+  if (c->marks.iv)
+    return GPG_ERR_INV_STATE;
+
+  if (c->u_mode.siv.aad_count >= 126)
+    return GPG_ERR_INV_STATE; /* Too many AD vector components. */
+
+  c->u_mode.siv.aad_count++;
+
+  _gcry_cmac_reset (&c->u_mode.siv.s2v_cmac);
+
+  err = _gcry_cmac_write (c, &c->u_mode.siv.s2v_cmac, aadbuf, aadbuflen);
+  if (err != 0)
+    return err;
+
+  err = _gcry_cmac_final (c, &c->u_mode.siv.s2v_cmac);
+  if (err != 0)
+    return err;
+
+  s2v_double (c->u_mode.siv.s2v_d);
+  cipher_block_xor_1 (c->u_mode.siv.s2v_d, c->u_mode.siv.s2v_cmac.u_iv.iv,
+                     GCRY_SIV_BLOCK_LEN);
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_set_nonce (gcry_cipher_hd_t c, const byte *nonce,
+                           size_t noncelen)
+{
+  gcry_err_code_t err;
+
+  err = _gcry_cipher_siv_authenticate (c, nonce, noncelen);
+  if (err)
+    return err;
+
+  /* Nonce is the last AD before plaintext. */
+  c->marks.iv = 1;
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+s2v_plaintext (gcry_cipher_hd_t c, const byte *plain, size_t plainlen)
+{
+  gcry_err_code_t err;
+
+  if (c->u_mode.siv.aad_count >= 127)
+    return GPG_ERR_INV_STATE; /* Too many AD vector components. */
+
+  _gcry_cmac_reset (&c->u_mode.siv.s2v_cmac);
+
+  if (plainlen >= GCRY_SIV_BLOCK_LEN)
+    {
+      err = _gcry_cmac_write (c, &c->u_mode.siv.s2v_cmac, plain,
+                             plainlen - GCRY_SIV_BLOCK_LEN);
+      if (err)
+        return err;
+
+      cipher_block_xor_1 (c->u_mode.siv.s2v_d,
+                         plain + plainlen - GCRY_SIV_BLOCK_LEN,
+                         GCRY_SIV_BLOCK_LEN);
+
+      err = _gcry_cmac_write (c, &c->u_mode.siv.s2v_cmac, c->u_mode.siv.s2v_d,
+                             GCRY_SIV_BLOCK_LEN);
+      if (err)
+        return err;
+    }
+  else
+    {
+      unsigned char pad_sn[GCRY_SIV_BLOCK_LEN];
+
+      s2v_double (c->u_mode.siv.s2v_d);
+      s2v_pad (pad_sn, plain, plainlen);
+      cipher_block_xor_1 (pad_sn, c->u_mode.siv.s2v_d, GCRY_SIV_BLOCK_LEN);
+
+      err = _gcry_cmac_write (c, &c->u_mode.siv.s2v_cmac, pad_sn,
+                             GCRY_SIV_BLOCK_LEN);
+      wipememory (pad_sn, sizeof(pad_sn));
+      if (err)
+        return err;
+    }
+
+  c->u_mode.siv.aad_count++;
+
+  return _gcry_cmac_final (c, &c->u_mode.siv.s2v_cmac);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_encrypt (gcry_cipher_hd_t c,
+                          byte *outbuf, size_t outbuflen,
+                          const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+  u64 q_lo;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+  if (c->u_mode.siv.dec_tag_set)
+    return GPG_ERR_INV_STATE;
+
+  /* Pass plaintext to S2V. */
+  err = s2v_plaintext (c, inbuf, inbuflen);
+  if (err != 0)
+    return err;
+
+  /* Clear 31th and 63th bits. */
+  memcpy (c->u_ctr.ctr, c->u_mode.siv.s2v_cmac.u_iv.iv, GCRY_SIV_BLOCK_LEN);
+  q_lo = buf_get_be64(c->u_ctr.ctr + 8);
+  q_lo &= ~((u64)1 << 31);
+  q_lo &= ~((u64)1 << 63);
+  buf_put_be64(c->u_ctr.ctr + 8, q_lo);
+
+  /* Encrypt plaintext. */
+  err = _gcry_cipher_ctr_encrypt_ctx(c, outbuf, outbuflen, inbuf, inbuflen,
+                                    c->u_mode.siv.ctr_context);
+  if (err != 0)
+    return err;
+
+  c->marks.tag = 1;
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_set_decryption_tag (gcry_cipher_hd_t c,
+                                    const byte *tag, size_t taglen)
+{
+  if (taglen != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_INV_ARG;
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  memcpy (&c->u_mode.siv.dec_tag, tag, GCRY_SIV_BLOCK_LEN);
+  c->u_mode.siv.dec_tag_set = 1;
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_decrypt (gcry_cipher_hd_t c,
+                          byte *outbuf, size_t outbuflen,
+                          const byte *inbuf, size_t inbuflen)
+{
+  gcry_err_code_t err;
+  u64 q_lo;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (c->marks.tag)
+    return GPG_ERR_INV_STATE;
+  if (!c->u_mode.siv.dec_tag_set)
+    return GPG_ERR_INV_STATE;
+
+  /* Clear 31th and 63th bits. */
+  memcpy (c->u_ctr.ctr, c->u_mode.siv.dec_tag, GCRY_SIV_BLOCK_LEN);
+  q_lo = buf_get_be64(c->u_ctr.ctr + 8);
+  q_lo &= ~((u64)1 << 31);
+  q_lo &= ~((u64)1 << 63);
+  buf_put_be64(c->u_ctr.ctr + 8, q_lo);
+
+  /* Decrypt ciphertext. */
+  err = _gcry_cipher_ctr_encrypt_ctx(c, outbuf, outbuflen, inbuf, inbuflen,
+                                    c->u_mode.siv.ctr_context);
+  if (err != 0)
+    return err;
+
+  /* Pass plaintext to S2V. */
+  err = s2v_plaintext (c, outbuf, inbuflen);
+  if (err != 0)
+    return err;
+
+  c->marks.tag = 1;
+
+  if (!buf_eq_const(c->u_mode.siv.s2v_cmac.u_iv.iv, c->u_mode.siv.dec_tag,
+                   GCRY_SIV_BLOCK_LEN))
+    {
+      wipememory (outbuf, inbuflen);
+      return GPG_ERR_CHECKSUM;
+    }
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_get_tag (gcry_cipher_hd_t c, unsigned char *outbuf,
+                          size_t outbuflen)
+{
+  gcry_err_code_t err;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (c->u_mode.siv.dec_tag_set)
+    return GPG_ERR_INV_STATE;
+
+  if (!c->marks.tag)
+    {
+      /* Finalize SIV with zero-length plaintext. */
+      err = s2v_plaintext (c, NULL, 0);
+      if (err != 0)
+        return err;
+
+      c->marks.tag = 1;
+    }
+
+  if (outbuflen > GCRY_SIV_BLOCK_LEN)
+    outbuflen = GCRY_SIV_BLOCK_LEN;
+
+  /* We already checked that OUTBUF is large enough to hold
+   * the result or has valid truncated length.  */
+  memcpy (outbuf, c->u_mode.siv.s2v_cmac.u_iv.iv, outbuflen);
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_siv_check_tag (gcry_cipher_hd_t c, const unsigned char *intag,
+                            size_t taglen)
+{
+  gcry_err_code_t err;
+  size_t n;
+
+  if (c->spec->blocksize != GCRY_SIV_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+
+  if (!c->marks.tag)
+    {
+      /* Finalize SIV with zero-length plaintext. */
+      err = s2v_plaintext (c, NULL, 0);
+      if (err != 0)
+        return err;
+
+      c->marks.tag = 1;
+    }
+
+  n = GCRY_SIV_BLOCK_LEN;
+  if (taglen < n)
+    n = taglen;
+
+  if (!buf_eq_const(c->u_mode.siv.s2v_cmac.u_iv.iv, intag, n)
+      || GCRY_SIV_BLOCK_LEN != taglen)
+    {
+      return GPG_ERR_CHECKSUM;
+    }
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher-xts.c 
b/grub-core/lib/libgcrypt/cipher/cipher-xts.c
new file mode 100644
index 000000000..0522a271a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/cipher-xts.c
@@ -0,0 +1,189 @@
+/* cipher-xts.c  - XTS mode implementation
+ * Copyright (C) 2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "./cipher-internal.h"
+
+
+static inline void xts_gfmul_byA (unsigned char *out, const unsigned char *in)
+{
+  u64 hi = buf_get_le64 (in + 8);
+  u64 lo = buf_get_le64 (in + 0);
+  u64 carry = -(hi >> 63) & 0x87;
+
+  hi = (hi << 1) + (lo >> 63);
+  lo = (lo << 1) ^ carry;
+
+  buf_put_le64 (out + 8, hi);
+  buf_put_le64 (out + 0, lo);
+}
+
+
+static inline void xts_inc128 (unsigned char *seqno)
+{
+  u64 lo = buf_get_le64 (seqno + 0);
+  u64 hi = buf_get_le64 (seqno + 8);
+
+  hi += !(++lo);
+
+  buf_put_le64 (seqno + 0, lo);
+  buf_put_le64 (seqno + 8, hi);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_xts_crypt (gcry_cipher_hd_t c,
+                       unsigned char *outbuf, size_t outbuflen,
+                       const unsigned char *inbuf, size_t inbuflen,
+                       int encrypt)
+{
+  gcry_cipher_encrypt_t tweak_fn = c->spec->encrypt;
+  gcry_cipher_encrypt_t crypt_fn =
+    encrypt ? c->spec->encrypt : c->spec->decrypt;
+  union
+  {
+    cipher_context_alignment_t xcx;
+    byte x1[GCRY_XTS_BLOCK_LEN];
+    u64 x64[GCRY_XTS_BLOCK_LEN / sizeof(u64)];
+  } tmp;
+  unsigned int burn, nburn;
+  size_t nblocks;
+
+  if (c->spec->blocksize != GCRY_XTS_BLOCK_LEN)
+    return GPG_ERR_CIPHER_ALGO;
+  if (outbuflen < inbuflen)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+  if (inbuflen < GCRY_XTS_BLOCK_LEN)
+    return GPG_ERR_BUFFER_TOO_SHORT;
+
+  /* Data-unit max length: 2^20 blocks. */
+  if (inbuflen > GCRY_XTS_BLOCK_LEN << 20)
+    return GPG_ERR_INV_LENGTH;
+
+  nblocks = inbuflen / GCRY_XTS_BLOCK_LEN;
+  nblocks -= !encrypt && (inbuflen % GCRY_XTS_BLOCK_LEN) != 0;
+
+  /* Generate first tweak value.  */
+  burn = tweak_fn (c->u_mode.xts.tweak_context, c->u_ctr.ctr, c->u_iv.iv);
+
+  /* Use a bulk method if available.  */
+  if (nblocks && c->bulk.xts_crypt)
+    {
+      c->bulk.xts_crypt (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks,
+                        encrypt);
+      inbuf  += nblocks * GCRY_XTS_BLOCK_LEN;
+      outbuf += nblocks * GCRY_XTS_BLOCK_LEN;
+      inbuflen -= nblocks * GCRY_XTS_BLOCK_LEN;
+      nblocks = 0;
+    }
+
+  /* If we don't have a bulk method use the standard method.  We also
+    use this method for the a remaining partial block.  */
+
+  while (nblocks)
+    {
+      /* Xor-Encrypt/Decrypt-Xor block. */
+      cipher_block_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
+      burn = nburn > burn ? nburn : burn;
+      cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+
+      outbuf += GCRY_XTS_BLOCK_LEN;
+      inbuf += GCRY_XTS_BLOCK_LEN;
+      inbuflen -= GCRY_XTS_BLOCK_LEN;
+      nblocks--;
+
+      /* Generate next tweak. */
+      xts_gfmul_byA (c->u_ctr.ctr, c->u_ctr.ctr);
+    }
+
+  /* Handle remaining data with ciphertext stealing. */
+  if (inbuflen)
+    {
+      if (!encrypt)
+       {
+         gcry_assert (inbuflen > GCRY_XTS_BLOCK_LEN);
+         gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN * 2);
+
+         /* Generate last tweak. */
+         xts_gfmul_byA (tmp.x1, c->u_ctr.ctr);
+
+         /* Decrypt last block first. */
+         cipher_block_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+         nburn = crypt_fn (&c->context.c, outbuf, outbuf);
+         burn = nburn > burn ? nburn : burn;
+         cipher_block_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN);
+
+         inbuflen -= GCRY_XTS_BLOCK_LEN;
+         inbuf += GCRY_XTS_BLOCK_LEN;
+         outbuf += GCRY_XTS_BLOCK_LEN;
+       }
+
+      gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN);
+      outbuf -= GCRY_XTS_BLOCK_LEN;
+
+      /* Steal ciphertext from previous block. */
+      cipher_block_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN);
+      buf_cpy (tmp.x64, inbuf, inbuflen);
+      buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen);
+
+      /* Decrypt/Encrypt last block. */
+      cipher_block_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+      nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1);
+      burn = nburn > burn ? nburn : burn;
+      cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN);
+    }
+
+  /* Auto-increment data-unit sequence number */
+  xts_inc128 (c->u_iv.iv);
+
+  wipememory (&tmp, sizeof(tmp));
+  wipememory (c->u_ctr.ctr, sizeof(c->u_ctr.ctr));
+
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_xts_encrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1);
+}
+
+
+gcry_err_code_t
+_gcry_cipher_xts_decrypt (gcry_cipher_hd_t c,
+                          unsigned char *outbuf, size_t outbuflen,
+                          const unsigned char *inbuf, size_t inbuflen)
+{
+  return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/cipher.c 
b/grub-core/lib/libgcrypt/cipher/cipher.c
index 9852d6a5a..d1443a621 100644
--- a/grub-core/lib/libgcrypt/cipher/cipher.c
+++ b/grub-core/lib/libgcrypt/cipher/cipher.c
@@ -1,6 +1,7 @@
 /* cipher.c  - cipher dispatcher
  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
  *               2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -25,483 +26,330 @@
 #include <errno.h>
 
 #include "g10lib.h"
+#include "../src/gcrypt-testapi.h"
 #include "cipher.h"
-#include "ath.h"
-
-#define MAX_BLOCKSIZE 16
-#define TABLE_SIZE 14
-#define CTX_MAGIC_NORMAL 0x24091964
-#define CTX_MAGIC_SECURE 0x46919042
-
-/* Try to use 16 byte aligned cipher context for better performance.
-   We use the aligned attribute, thus it is only possible to implement
-   this with gcc.  */
-#undef NEED_16BYTE_ALIGNED_CONTEXT
-#if defined (__GNUC__)
-# define NEED_16BYTE_ALIGNED_CONTEXT 1
-#endif
+#include "./cipher-internal.h"
 
-/* A dummy extraspec so that we do not need to tests the extraspec
-   field from the module specification against NULL and instead
-   directly test the respective fields of extraspecs.  */
-static cipher_extra_spec_t dummy_extra_spec;
 
 /* This is the list of the default ciphers, which are included in
    libgcrypt.  */
-static struct cipher_table_entry
-{
-  gcry_cipher_spec_t *cipher;
-  cipher_extra_spec_t *extraspec;
-  unsigned int algorithm;
-  int fips_allowed;
-} cipher_table[] =
+static gcry_cipher_spec_t * const cipher_list[] =
   {
 #if USE_BLOWFISH
-    { &_gcry_cipher_spec_blowfish,
-      &dummy_extra_spec,                  GCRY_CIPHER_BLOWFISH },
+     &_gcry_cipher_spec_blowfish,
 #endif
 #if USE_DES
-    { &_gcry_cipher_spec_des,
-      &dummy_extra_spec,                  GCRY_CIPHER_DES },
-    { &_gcry_cipher_spec_tripledes,
-      &_gcry_cipher_extraspec_tripledes,  GCRY_CIPHER_3DES, 1 },
+     &_gcry_cipher_spec_des,
+     &_gcry_cipher_spec_tripledes,
 #endif
 #if USE_ARCFOUR
-    { &_gcry_cipher_spec_arcfour,
-      &dummy_extra_spec,                  GCRY_CIPHER_ARCFOUR },
+     &_gcry_cipher_spec_arcfour,
 #endif
 #if USE_CAST5
-    { &_gcry_cipher_spec_cast5,
-      &dummy_extra_spec,                  GCRY_CIPHER_CAST5 },
+     &_gcry_cipher_spec_cast5,
 #endif
 #if USE_AES
-    { &_gcry_cipher_spec_aes,
-      &_gcry_cipher_extraspec_aes,        GCRY_CIPHER_AES,    1 },
-    { &_gcry_cipher_spec_aes192,
-      &_gcry_cipher_extraspec_aes192,     GCRY_CIPHER_AES192, 1 },
-    { &_gcry_cipher_spec_aes256,
-      &_gcry_cipher_extraspec_aes256,     GCRY_CIPHER_AES256, 1 },
+     &_gcry_cipher_spec_aes,
+     &_gcry_cipher_spec_aes192,
+     &_gcry_cipher_spec_aes256,
 #endif
 #if USE_TWOFISH
-    { &_gcry_cipher_spec_twofish,
-      &dummy_extra_spec,                  GCRY_CIPHER_TWOFISH },
-    { &_gcry_cipher_spec_twofish128,
-      &dummy_extra_spec,                  GCRY_CIPHER_TWOFISH128 },
+     &_gcry_cipher_spec_twofish,
+     &_gcry_cipher_spec_twofish128,
 #endif
 #if USE_SERPENT
-    { &_gcry_cipher_spec_serpent128,
-      &dummy_extra_spec,                  GCRY_CIPHER_SERPENT128 },
-    { &_gcry_cipher_spec_serpent192,
-      &dummy_extra_spec,                  GCRY_CIPHER_SERPENT192 },
-    { &_gcry_cipher_spec_serpent256,
-      &dummy_extra_spec,                  GCRY_CIPHER_SERPENT256 },
+     &_gcry_cipher_spec_serpent128,
+     &_gcry_cipher_spec_serpent192,
+     &_gcry_cipher_spec_serpent256,
 #endif
 #if USE_RFC2268
-    { &_gcry_cipher_spec_rfc2268_40,
-      &dummy_extra_spec,                  GCRY_CIPHER_RFC2268_40 },
+     &_gcry_cipher_spec_rfc2268_40,
+     &_gcry_cipher_spec_rfc2268_128,
 #endif
 #if USE_SEED
-    { &_gcry_cipher_spec_seed,
-      &dummy_extra_spec,                  GCRY_CIPHER_SEED },
+     &_gcry_cipher_spec_seed,
 #endif
 #if USE_CAMELLIA
-    { &_gcry_cipher_spec_camellia128,
-      &dummy_extra_spec,                  GCRY_CIPHER_CAMELLIA128 },
-    { &_gcry_cipher_spec_camellia192,
-      &dummy_extra_spec,                  GCRY_CIPHER_CAMELLIA192 },
-    { &_gcry_cipher_spec_camellia256,
-      &dummy_extra_spec,                  GCRY_CIPHER_CAMELLIA256 },
+     &_gcry_cipher_spec_camellia128,
+     &_gcry_cipher_spec_camellia192,
+     &_gcry_cipher_spec_camellia256,
+#endif
+#if USE_IDEA
+     &_gcry_cipher_spec_idea,
+#endif
+#if USE_SALSA20
+     &_gcry_cipher_spec_salsa20,
+     &_gcry_cipher_spec_salsa20r12,
 #endif
-#ifdef USE_IDEA
-    { &_gcry_cipher_spec_idea,
-      &dummy_extra_spec,                  GCRY_CIPHER_IDEA },
+#if USE_GOST28147
+     &_gcry_cipher_spec_gost28147,
+     &_gcry_cipher_spec_gost28147_mesh,
 #endif
-    { NULL                    }
+#if USE_CHACHA20
+     &_gcry_cipher_spec_chacha20,
+#endif
+#if USE_SM4
+     &_gcry_cipher_spec_sm4,
+#endif
+    NULL
   };
 
-/* List of registered ciphers.  */
-static gcry_module_t ciphers_registered;
-
-/* This is the lock protecting CIPHERS_REGISTERED.  */
-static ath_mutex_t ciphers_registered_lock = ATH_MUTEX_INITIALIZER;
-
-/* Flag to check whether the default ciphers have already been
-   registered.  */
-static int default_ciphers_registered;
-
-/* Convenient macro for registering the default ciphers.  */
-#define REGISTER_DEFAULT_CIPHERS                   \
-  do                                               \
-    {                                              \
-      ath_mutex_lock (&ciphers_registered_lock);   \
-      if (! default_ciphers_registered)            \
-        {                                          \
-          cipher_register_default ();              \
-          default_ciphers_registered = 1;          \
-        }                                          \
-      ath_mutex_unlock (&ciphers_registered_lock); \
-    }                                              \
-  while (0)
-
-
-/* A VIA processor with the Padlock engine as well as the Intel AES_NI
-   instructions require an alignment of most data on a 16 byte
-   boundary.  Because we trick out the compiler while allocating the
-   context, the align attribute as used in rijndael.c does not work on
-   its own.  Thus we need to make sure that the entire context
-   structure is a aligned on that boundary.  We achieve this by
-   defining a new type and use that instead of our usual alignment
-   type.  */
-typedef union
-{
-  PROPERLY_ALIGNED_TYPE foo;
-#ifdef NEED_16BYTE_ALIGNED_CONTEXT
-  char bar[16] __attribute__ ((aligned (16)));
+/* Cipher implementations starting with index 0 (enum gcry_cipher_algos) */
+static gcry_cipher_spec_t * const cipher_list_algo0[] =
+  {
+    NULL, /* GCRY_CIPHER_NONE */
+#if USE_IDEA
+    &_gcry_cipher_spec_idea,
+#else
+    NULL,
+#endif
+#if USE_DES
+    &_gcry_cipher_spec_tripledes,
+#else
+    NULL,
+#endif
+#if USE_CAST5
+    &_gcry_cipher_spec_cast5,
+#else
+    NULL,
+#endif
+#if USE_BLOWFISH
+    &_gcry_cipher_spec_blowfish,
+#else
+    NULL,
+#endif
+    NULL, /* GCRY_CIPHER_SAFER_SK128 */
+    NULL, /* GCRY_CIPHER_DES_SK */
+#if USE_AES
+    &_gcry_cipher_spec_aes,
+    &_gcry_cipher_spec_aes192,
+    &_gcry_cipher_spec_aes256,
+#else
+    NULL,
+    NULL,
+    NULL,
+#endif
+#if USE_TWOFISH
+    &_gcry_cipher_spec_twofish
+#else
+    NULL
 #endif
-  char c[1];
-} cipher_context_alignment_t;
+  };
 
+/* Cipher implementations starting with index 301 (enum gcry_cipher_algos) */
+static gcry_cipher_spec_t * const cipher_list_algo301[] =
+  {
+#if USE_ARCFOUR
+    &_gcry_cipher_spec_arcfour,
+#else
+    NULL,
+#endif
+#if USE_DES
+    &_gcry_cipher_spec_des,
+#else
+    NULL,
+#endif
+#if USE_TWOFISH
+    &_gcry_cipher_spec_twofish128,
+#else
+    NULL,
+#endif
+#if USE_SERPENT
+    &_gcry_cipher_spec_serpent128,
+    &_gcry_cipher_spec_serpent192,
+    &_gcry_cipher_spec_serpent256,
+#else
+    NULL,
+    NULL,
+    NULL,
+#endif
+#if USE_RFC2268
+    &_gcry_cipher_spec_rfc2268_40,
+    &_gcry_cipher_spec_rfc2268_128,
+#else
+    NULL,
+    NULL,
+#endif
+#if USE_SEED
+    &_gcry_cipher_spec_seed,
+#else
+    NULL,
+#endif
+#if USE_CAMELLIA
+    &_gcry_cipher_spec_camellia128,
+    &_gcry_cipher_spec_camellia192,
+    &_gcry_cipher_spec_camellia256,
+#else
+    NULL,
+    NULL,
+    NULL,
+#endif
+#if USE_SALSA20
+    &_gcry_cipher_spec_salsa20,
+    &_gcry_cipher_spec_salsa20r12,
+#else
+    NULL,
+    NULL,
+#endif
+#if USE_GOST28147
+    &_gcry_cipher_spec_gost28147,
+#else
+    NULL,
+#endif
+#if USE_CHACHA20
+    &_gcry_cipher_spec_chacha20,
+#else
+    NULL,
+#endif
+#if USE_GOST28147
+    &_gcry_cipher_spec_gost28147_mesh,
+#else
+    NULL,
+#endif
+#if USE_SM4
+     &_gcry_cipher_spec_sm4,
+#else
+    NULL,
+#endif
+  };
 
-/* The handle structure.  */
-struct gcry_cipher_handle
-{
-  int magic;
-  size_t actual_handle_size;     /* Allocated size of this handle. */
-  size_t handle_offset;          /* Offset to the malloced block.  */
-  gcry_cipher_spec_t *cipher;
-  cipher_extra_spec_t *extraspec;
-  gcry_module_t module;
-
-  /* The algorithm id.  This is a hack required because the module
-     interface does not easily allow to retrieve this value. */
-  int algo;
-
-  /* A structure with function pointers for bulk operations.  Due to
-     limitations of the module system (we don't want to change the
-     API) we need to keep these function pointers here.  The cipher
-     open function intializes them and the actual encryption routines
-     use them if they are not NULL.  */
-  struct {
-    void (*cfb_enc)(void *context, unsigned char *iv,
-                    void *outbuf_arg, const void *inbuf_arg,
-                    unsigned int nblocks);
-    void (*cfb_dec)(void *context, unsigned char *iv,
-                    void *outbuf_arg, const void *inbuf_arg,
-                    unsigned int nblocks);
-    void (*cbc_enc)(void *context, unsigned char *iv,
-                    void *outbuf_arg, const void *inbuf_arg,
-                    unsigned int nblocks, int cbc_mac);
-    void (*cbc_dec)(void *context, unsigned char *iv,
-                    void *outbuf_arg, const void *inbuf_arg,
-                    unsigned int nblocks);
-    void (*ctr_enc)(void *context, unsigned char *iv,
-                    void *outbuf_arg, const void *inbuf_arg,
-                    unsigned int nblocks);
-  } bulk;
-
-
-  int mode;
-  unsigned int flags;
-
-  struct {
-    unsigned int key:1; /* Set to 1 if a key has been set.  */
-    unsigned int iv:1;  /* Set to 1 if a IV has been set.  */
-  } marks;
-
-  /* The initialization vector.  For best performance we make sure
-     that it is properly aligned.  In particular some implementations
-     of bulk operations expect an 16 byte aligned IV.  */
-  union {
-    cipher_context_alignment_t iv_align;
-    unsigned char iv[MAX_BLOCKSIZE];
-  } u_iv;
-
-  /* The counter for CTR mode.  This field is also used by AESWRAP and
-     thus we can't use the U_IV union.  */
-  union {
-    cipher_context_alignment_t iv_align;
-    unsigned char ctr[MAX_BLOCKSIZE];
-  } u_ctr;
-
-  /* Space to save an IV or CTR for chaining operations.  */
-  unsigned char lastiv[MAX_BLOCKSIZE];
-  int unused;  /* Number of unused bytes in LASTIV. */
-
-  /* What follows are two contexts of the cipher in use.  The first
-     one needs to be aligned well enough for the cipher operation
-     whereas the second one is a copy created by cipher_setkey and
-     used by cipher_reset.  That second copy has no need for proper
-     aligment because it is only accessed by memcpy.  */
-  cipher_context_alignment_t context;
-};
 
+static void _gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode);
 
 
-/* These dummy functions are used in case a cipher implementation
-   refuses to provide it's own functions.  */
-
-static gcry_err_code_t
-dummy_setkey (void *c, const unsigned char *key, unsigned int keylen)
-{
-  (void)c;
-  (void)key;
-  (void)keylen;
-  return GPG_ERR_NO_ERROR;
-}
-
-static void
-dummy_encrypt_block (void *c,
-                    unsigned char *outbuf, const unsigned char *inbuf)
-{
-  (void)c;
-  (void)outbuf;
-  (void)inbuf;
-  BUG();
-}
-
-static void
-dummy_decrypt_block (void *c,
-                    unsigned char *outbuf, const unsigned char *inbuf)
-{
-  (void)c;
-  (void)outbuf;
-  (void)inbuf;
-  BUG();
-}
-
-static void
-dummy_encrypt_stream (void *c,
-                     unsigned char *outbuf, const unsigned char *inbuf,
-                     unsigned int n)
+static int
+map_algo (int algo)
 {
-  (void)c;
-  (void)outbuf;
-  (void)inbuf;
-  (void)n;
-  BUG();
+  return algo;
 }
 
-static void
-dummy_decrypt_stream (void *c,
-                     unsigned char *outbuf, const unsigned char *inbuf,
-                     unsigned int n)
-{
-  (void)c;
-  (void)outbuf;
-  (void)inbuf;
-  (void)n;
-  BUG();
-}
 
-
-/* Internal function.  Register all the ciphers included in
-   CIPHER_TABLE.  Note, that this function gets only used by the macro
-   REGISTER_DEFAULT_CIPHERS which protects it using a mutex. */
-static void
-cipher_register_default (void)
+/* Return the spec structure for the cipher algorithm ALGO.  For
+   an unknown algorithm NULL is returned.  */
+static gcry_cipher_spec_t *
+spec_from_algo (int algo)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  int i;
-
-  for (i = 0; !err && cipher_table[i].cipher; i++)
-    {
-      if (! cipher_table[i].cipher->setkey)
-       cipher_table[i].cipher->setkey = dummy_setkey;
-      if (! cipher_table[i].cipher->encrypt)
-       cipher_table[i].cipher->encrypt = dummy_encrypt_block;
-      if (! cipher_table[i].cipher->decrypt)
-       cipher_table[i].cipher->decrypt = dummy_decrypt_block;
-      if (! cipher_table[i].cipher->stencrypt)
-       cipher_table[i].cipher->stencrypt = dummy_encrypt_stream;
-      if (! cipher_table[i].cipher->stdecrypt)
-       cipher_table[i].cipher->stdecrypt = dummy_decrypt_stream;
-
-      if ( fips_mode () && !cipher_table[i].fips_allowed )
-        continue;
-
-      err = _gcry_module_add (&ciphers_registered,
-                             cipher_table[i].algorithm,
-                             (void *) cipher_table[i].cipher,
-                             (void *) cipher_table[i].extraspec,
-                             NULL);
-    }
+  gcry_cipher_spec_t *spec = NULL;
 
-  if (err)
-    BUG ();
-}
+  algo = map_algo (algo);
 
-/* Internal callback function.  Used via _gcry_module_lookup.  */
-static int
-gcry_cipher_lookup_func_name (void *spec, void *data)
-{
-  gcry_cipher_spec_t *cipher = (gcry_cipher_spec_t *) spec;
-  char *name = (char *) data;
-  const char **aliases = cipher->aliases;
-  int i, ret = ! stricmp (name, cipher->name);
+  if (algo >= 0 && algo < DIM(cipher_list_algo0))
+    spec = cipher_list_algo0[algo];
+  else if (algo >= 301 && algo < 301 + DIM(cipher_list_algo301))
+    spec = cipher_list_algo301[algo - 301];
 
-  if (aliases)
-    for (i = 0; aliases[i] && (! ret); i++)
-      ret = ! stricmp (name, aliases[i]);
+  if (spec)
+    gcry_assert (spec->algo == algo);
 
-  return ret;
+  return spec;
 }
 
-/* Internal callback function.  Used via _gcry_module_lookup.  */
-static int
-gcry_cipher_lookup_func_oid (void *spec, void *data)
-{
-  gcry_cipher_spec_t *cipher = (gcry_cipher_spec_t *) spec;
-  char *oid = (char *) data;
-  gcry_cipher_oid_spec_t *oid_specs = cipher->oids;
-  int ret = 0, i;
-
-  if (oid_specs)
-    for (i = 0; oid_specs[i].oid && (! ret); i++)
-      if (! stricmp (oid, oid_specs[i].oid))
-       ret = 1;
 
-  return ret;
-}
-
-/* Internal function.  Lookup a cipher entry by it's name.  */
-static gcry_module_t
-gcry_cipher_lookup_name (const char *name)
+/* Lookup a cipher's spec by its name.  */
+static gcry_cipher_spec_t *
+spec_from_name (const char *name)
 {
-  gcry_module_t cipher;
+  gcry_cipher_spec_t *spec;
+  int idx;
+  const char **aliases;
 
-  cipher = _gcry_module_lookup (ciphers_registered, (void *) name,
-                               gcry_cipher_lookup_func_name);
+  for (idx=0; (spec = cipher_list[idx]); idx++)
+    {
+      if (!stricmp (name, spec->name))
+        return spec;
+      if (spec->aliases)
+        {
+          for (aliases = spec->aliases; *aliases; aliases++)
+            if (!stricmp (name, *aliases))
+              return spec;
+        }
+    }
 
-  return cipher;
+  return NULL;
 }
 
-/* Internal function.  Lookup a cipher entry by it's oid.  */
-static gcry_module_t
-gcry_cipher_lookup_oid (const char *oid)
-{
-  gcry_module_t cipher;
-
-  cipher = _gcry_module_lookup (ciphers_registered, (void *) oid,
-                               gcry_cipher_lookup_func_oid);
 
-  return cipher;
-}
-
-/* Register a new cipher module whose specification can be found in
-   CIPHER.  On success, a new algorithm ID is stored in ALGORITHM_ID
-   and a pointer representhing this module is stored in MODULE.  */
-gcry_error_t
-_gcry_cipher_register (gcry_cipher_spec_t *cipher,
-                       cipher_extra_spec_t *extraspec,
-                       int *algorithm_id,
-                       gcry_module_t *module)
+/* Lookup a cipher's spec by its OID.  */
+static gcry_cipher_spec_t *
+spec_from_oid (const char *oid)
 {
-  gcry_err_code_t err = 0;
-  gcry_module_t mod;
-
-  /* We do not support module loading in fips mode.  */
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  ath_mutex_lock (&ciphers_registered_lock);
-  err = _gcry_module_add (&ciphers_registered, 0,
-                         (void *)cipher,
-                         (void *)(extraspec? extraspec : &dummy_extra_spec),
-                          &mod);
-  ath_mutex_unlock (&ciphers_registered_lock);
+  gcry_cipher_spec_t *spec;
+  const gcry_cipher_oid_spec_t *oid_specs;
+  int idx, j;
 
-  if (! err)
+  for (idx=0; (spec = cipher_list[idx]); idx++)
     {
-      *module = mod;
-      *algorithm_id = mod->mod_id;
+      oid_specs = spec->oids;
+      if (oid_specs)
+        {
+          for (j = 0; oid_specs[j].oid; j++)
+            if (!stricmp (oid, oid_specs[j].oid))
+              return spec;
+        }
     }
 
-  return gcry_error (err);
+  return NULL;
 }
 
-/* Unregister the cipher identified by MODULE, which must have been
-   registered with gcry_cipher_register.  */
-void
-gcry_cipher_unregister (gcry_module_t module)
-{
-  ath_mutex_lock (&ciphers_registered_lock);
-  _gcry_module_release (module);
-  ath_mutex_unlock (&ciphers_registered_lock);
-}
 
-/* Locate the OID in the oid table and return the index or -1 when not
-   found.  An opitonal "oid." or "OID." prefix in OID is ignored, the
-   OID is expected to be in standard IETF dotted notation.  The
-   internal algorithm number is returned in ALGORITHM unless it
-   ispassed as NULL.  A pointer to the specification of the module
-   implementing this algorithm is return in OID_SPEC unless passed as
-   NULL.*/
-static int
-search_oid (const char *oid, int *algorithm, gcry_cipher_oid_spec_t *oid_spec)
+/* Locate the OID in the oid table and return the spec or NULL if not
+   found.  An optional "oid." or "OID." prefix in OID is ignored, the
+   OID is expected to be in standard IETF dotted notation.  A pointer
+   to the OID specification of the module implementing this algorithm
+   is return in OID_SPEC unless passed as NULL.*/
+static gcry_cipher_spec_t *
+search_oid (const char *oid, gcry_cipher_oid_spec_t *oid_spec)
 {
-  gcry_module_t module;
-  int ret = 0;
+  gcry_cipher_spec_t *spec;
+  int i;
 
-  if (oid && ((! strncmp (oid, "oid.", 4))
-             || (! strncmp (oid, "OID.", 4))))
+  if (!oid)
+    return NULL;
+
+  if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4))
     oid += 4;
 
-  module = gcry_cipher_lookup_oid (oid);
-  if (module)
+  spec = spec_from_oid (oid);
+  if (spec && spec->oids)
     {
-      gcry_cipher_spec_t *cipher = module->spec;
-      int i;
-
-      for (i = 0; cipher->oids[i].oid && !ret; i++)
-       if (! stricmp (oid, cipher->oids[i].oid))
+      for (i = 0; spec->oids[i].oid; i++)
+       if (!stricmp (oid, spec->oids[i].oid))
          {
-           if (algorithm)
-             *algorithm = module->mod_id;
            if (oid_spec)
-             *oid_spec = cipher->oids[i];
-           ret = 1;
+             *oid_spec = spec->oids[i];
+            return spec;
          }
-      _gcry_module_release (module);
     }
 
-  return ret;
+  return NULL;
 }
 
+
 /* Map STRING to the cipher algorithm identifier.  Returns the
    algorithm ID of the cipher for the given name or 0 if the name is
    not known.  It is valid to pass NULL for STRING which results in a
    return value of 0. */
 int
-gcry_cipher_map_name (const char *string)
+_gcry_cipher_map_name (const char *string)
 {
-  gcry_module_t cipher;
-  int ret, algorithm = 0;
+  gcry_cipher_spec_t *spec;
 
-  if (! string)
+  if (!string)
     return 0;
 
-  REGISTER_DEFAULT_CIPHERS;
-
   /* If the string starts with a digit (optionally prefixed with
      either "OID." or "oid."), we first look into our table of ASN.1
      object identifiers to figure out the algorithm */
 
-  ath_mutex_lock (&ciphers_registered_lock);
-
-  ret = search_oid (string, &algorithm, NULL);
-  if (! ret)
-    {
-      cipher = gcry_cipher_lookup_name (string);
-      if (cipher)
-       {
-         algorithm = cipher->mod_id;
-         _gcry_module_release (cipher);
-       }
-    }
+  spec = search_oid (string, NULL);
+  if (spec)
+    return spec->algo;
 
-  ath_mutex_unlock (&ciphers_registered_lock);
+  spec = spec_from_name (string);
+  if (spec)
+    return spec->algo;
 
-  return algorithm;
+  return 0;
 }
 
 
@@ -510,80 +358,48 @@ gcry_cipher_map_name (const char *string)
    with that OID or 0 if no mode is known.  Passing NULL for string
    yields a return value of 0. */
 int
-gcry_cipher_mode_from_oid (const char *string)
+_gcry_cipher_mode_from_oid (const char *string)
 {
+  gcry_cipher_spec_t *spec;
   gcry_cipher_oid_spec_t oid_spec;
-  int ret = 0, mode = 0;
 
   if (!string)
     return 0;
 
-  ath_mutex_lock (&ciphers_registered_lock);
-  ret = search_oid (string, NULL, &oid_spec);
-  if (ret)
-    mode = oid_spec.mode;
-  ath_mutex_unlock (&ciphers_registered_lock);
+  spec = search_oid (string, &oid_spec);
+  if (spec)
+    return oid_spec.mode;
 
-  return mode;
+  return 0;
 }
 
 
-/* Map the cipher algorithm whose ID is contained in ALGORITHM to a
-   string representation of the algorithm name.  For unknown algorithm
-   IDs this function returns "?".  */
-static const char *
-cipher_algo_to_string (int algorithm)
-{
-  gcry_module_t cipher;
-  const char *name;
-
-  REGISTER_DEFAULT_CIPHERS;
-
-  ath_mutex_lock (&ciphers_registered_lock);
-  cipher = _gcry_module_lookup_id (ciphers_registered, algorithm);
-  if (cipher)
-    {
-      name = ((gcry_cipher_spec_t *) cipher->spec)->name;
-      _gcry_module_release (cipher);
-    }
-  else
-    name = "?";
-  ath_mutex_unlock (&ciphers_registered_lock);
-
-  return name;
-}
-
 /* Map the cipher algorithm identifier ALGORITHM to a string
    representing this algorithm.  This string is the default name as
-   used by Libgcrypt.  An pointer to an empty string is returned for
-   an unknown algorithm.  NULL is never returned. */
+   used by Libgcrypt.  A "?" is returned for an unknown algorithm.
+   NULL is never returned. */
 const char *
-gcry_cipher_algo_name (int algorithm)
+_gcry_cipher_algo_name (int algorithm)
 {
-  return cipher_algo_to_string (algorithm);
+  gcry_cipher_spec_t *spec;
+
+  spec = spec_from_algo (algorithm);
+  return spec? spec->name : "?";
 }
 
 
 /* Flag the cipher algorithm with the identifier ALGORITHM as
    disabled.  There is no error return, the function does nothing for
-   unknown algorithms.  Disabled algorithms are vitually not available
-   in Libgcrypt. */
+   unknown algorithms.  Disabled algorithms are virtually not
+   available in Libgcrypt.  This is not thread safe and should thus be
+   called early. */
 static void
-disable_cipher_algo (int algorithm)
+disable_cipher_algo (int algo)
 {
-  gcry_module_t cipher;
+  gcry_cipher_spec_t *spec = spec_from_algo (algo);
 
-  REGISTER_DEFAULT_CIPHERS;
-
-  ath_mutex_lock (&ciphers_registered_lock);
-  cipher = _gcry_module_lookup_id (ciphers_registered, algorithm);
-  if (cipher)
-    {
-      if (! (cipher->flags & FLAG_MODULE_DISABLED))
-       cipher->flags |= FLAG_MODULE_DISABLED;
-      _gcry_module_release (cipher);
-    }
-  ath_mutex_unlock (&ciphers_registered_lock);
+  if (spec)
+    spec->flags.disabled = 1;
 }
 
 
@@ -593,24 +409,13 @@ disable_cipher_algo (int algorithm)
 static gcry_err_code_t
 check_cipher_algo (int algorithm)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  gcry_module_t cipher;
-
-  REGISTER_DEFAULT_CIPHERS;
+  gcry_cipher_spec_t *spec;
 
-  ath_mutex_lock (&ciphers_registered_lock);
-  cipher = _gcry_module_lookup_id (ciphers_registered, algorithm);
-  if (cipher)
-    {
-      if (cipher->flags & FLAG_MODULE_DISABLED)
-       err = GPG_ERR_CIPHER_ALGO;
-      _gcry_module_release (cipher);
-    }
-  else
-    err = GPG_ERR_CIPHER_ALGO;
-  ath_mutex_unlock (&ciphers_registered_lock);
+  spec = spec_from_algo (algorithm);
+  if (spec && !spec->flags.disabled && (spec->flags.fips || !fips_mode ()))
+    return 0;
 
-  return err;
+  return GPG_ERR_CIPHER_ALGO;
 }
 
 
@@ -619,45 +424,36 @@ check_cipher_algo (int algorithm)
 static unsigned int
 cipher_get_keylen (int algorithm)
 {
-  gcry_module_t cipher;
+  gcry_cipher_spec_t *spec;
   unsigned len = 0;
 
-  REGISTER_DEFAULT_CIPHERS;
-
-  ath_mutex_lock (&ciphers_registered_lock);
-  cipher = _gcry_module_lookup_id (ciphers_registered, algorithm);
-  if (cipher)
+  spec = spec_from_algo (algorithm);
+  if (spec)
     {
-      len = ((gcry_cipher_spec_t *) cipher->spec)->keylen;
+      len = spec->keylen;
       if (!len)
        log_bug ("cipher %d w/o key length\n", algorithm);
-      _gcry_module_release (cipher);
     }
-  ath_mutex_unlock (&ciphers_registered_lock);
 
   return len;
 }
 
+
 /* Return the block length of the cipher algorithm with the identifier
    ALGORITHM.  This function return 0 for an invalid algorithm.  */
 static unsigned int
 cipher_get_blocksize (int algorithm)
 {
-  gcry_module_t cipher;
+  gcry_cipher_spec_t *spec;
   unsigned len = 0;
 
-  REGISTER_DEFAULT_CIPHERS;
-
-  ath_mutex_lock (&ciphers_registered_lock);
-  cipher = _gcry_module_lookup_id (ciphers_registered, algorithm);
-  if (cipher)
+  spec = spec_from_algo (algorithm);
+  if (spec)
     {
-      len = ((gcry_cipher_spec_t *) cipher->spec)->blocksize;
-      if (! len)
-         log_bug ("cipher %d w/o blocksize\n", algorithm);
-      _gcry_module_release (cipher);
+      len = spec->blocksize;
+      if (!len)
+        log_bug ("cipher %d w/o blocksize\n", algorithm);
     }
-  ath_mutex_unlock (&ciphers_registered_lock);
 
   return len;
 }
@@ -677,45 +473,46 @@ cipher_get_blocksize (int algorithm)
 
    Values for these flags may be combined using OR.
  */
-gcry_error_t
-gcry_cipher_open (gcry_cipher_hd_t *handle,
-                 int algo, int mode, unsigned int flags)
+gcry_err_code_t
+_gcry_cipher_open (gcry_cipher_hd_t *handle,
+                   int algo, int mode, unsigned int flags)
+{
+  gcry_err_code_t rc;
+  gcry_cipher_hd_t h = NULL;
+
+  if (mode >= GCRY_CIPHER_MODE_INTERNAL)
+    rc = GPG_ERR_INV_CIPHER_MODE;
+  else
+    rc = _gcry_cipher_open_internal (&h, algo, mode, flags);
+
+  *handle = rc ? NULL : h;
+
+  return rc;
+}
+
+
+gcry_err_code_t
+_gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
+                           int algo, int mode, unsigned int flags)
 {
   int secure = (flags & GCRY_CIPHER_SECURE);
-  gcry_cipher_spec_t *cipher = NULL;
-  cipher_extra_spec_t *extraspec = NULL;
-  gcry_module_t module = NULL;
+  gcry_cipher_spec_t *spec;
   gcry_cipher_hd_t h = NULL;
-  gcry_err_code_t err = 0;
+  gcry_err_code_t err;
 
   /* If the application missed to call the random poll function, we do
      it here to ensure that it is used once in a while. */
   _gcry_fast_random_poll ();
 
-  REGISTER_DEFAULT_CIPHERS;
-
-  /* Fetch the according module and check whether the cipher is marked
-     available for use.  */
-  ath_mutex_lock (&ciphers_registered_lock);
-  module = _gcry_module_lookup_id (ciphers_registered, algo);
-  if (module)
-    {
-      /* Found module.  */
-
-      if (module->flags & FLAG_MODULE_DISABLED)
-       {
-         /* Not available for use.  */
-         err = GPG_ERR_CIPHER_ALGO;
-       }
-      else
-        {
-          cipher = (gcry_cipher_spec_t *) module->spec;
-          extraspec = module->extraspec;
-        }
-    }
-  else
+  spec = spec_from_algo (algo);
+  if (!spec)
     err = GPG_ERR_CIPHER_ALGO;
-  ath_mutex_unlock (&ciphers_registered_lock);
+  else if (spec->flags.disabled)
+    err = GPG_ERR_CIPHER_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    err = GPG_ERR_CIPHER_ALGO;
+  else
+    err = 0;
 
   /* check flags */
   if ((! err)
@@ -723,8 +520,9 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
                     | GCRY_CIPHER_SECURE
                     | GCRY_CIPHER_ENABLE_SYNC
                     | GCRY_CIPHER_CBC_CTS
-                    | GCRY_CIPHER_CBC_MAC))
-         || (flags & GCRY_CIPHER_CBC_CTS & GCRY_CIPHER_CBC_MAC)))
+                    | GCRY_CIPHER_CBC_MAC
+                     | GCRY_CIPHER_EXTENDED))
+         || ((flags & GCRY_CIPHER_CBC_CTS) && (flags & GCRY_CIPHER_CBC_MAC))))
     err = GPG_ERR_CIPHER_ALGO;
 
   /* check that a valid mode has been requested */
@@ -734,17 +532,65 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
       case GCRY_CIPHER_MODE_ECB:
       case GCRY_CIPHER_MODE_CBC:
       case GCRY_CIPHER_MODE_CFB:
+      case GCRY_CIPHER_MODE_CFB8:
       case GCRY_CIPHER_MODE_OFB:
       case GCRY_CIPHER_MODE_CTR:
       case GCRY_CIPHER_MODE_AESWRAP:
-       if ((cipher->encrypt == dummy_encrypt_block)
-           || (cipher->decrypt == dummy_decrypt_block))
+      case GCRY_CIPHER_MODE_CMAC:
+      case GCRY_CIPHER_MODE_EAX:
+       if (!spec->encrypt || !spec->decrypt)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       break;
+
+      case GCRY_CIPHER_MODE_CCM:
+       if (!spec->encrypt || !spec->decrypt)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       else if (spec->blocksize != GCRY_CCM_BLOCK_LEN)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       break;
+
+      case GCRY_CIPHER_MODE_XTS:
+       if (!spec->encrypt || !spec->decrypt)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       else if (spec->blocksize != GCRY_XTS_BLOCK_LEN)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       break;
+
+      case GCRY_CIPHER_MODE_GCM:
+       if (!spec->encrypt || !spec->decrypt)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       else if (spec->blocksize != GCRY_GCM_BLOCK_LEN)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       break;
+
+      case GCRY_CIPHER_MODE_SIV:
+      case GCRY_CIPHER_MODE_GCM_SIV:
+       if (!spec->encrypt || !spec->decrypt)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       else if (spec->blocksize != GCRY_SIV_BLOCK_LEN)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       break;
+
+      case GCRY_CIPHER_MODE_POLY1305:
+       if (!spec->stencrypt || !spec->stdecrypt || !spec->setiv)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       else if (spec->algo != GCRY_CIPHER_CHACHA20)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       break;
+
+      case GCRY_CIPHER_MODE_OCB:
+        /* Note that our implementation allows only for 128 bit block
+           length algorithms.  Lower block lengths would be possible
+           but we do not implement them because they limit the
+           security too much.  */
+       if (!spec->encrypt || !spec->decrypt)
+         err = GPG_ERR_INV_CIPHER_MODE;
+       else if (spec->blocksize != GCRY_OCB_BLOCK_LEN)
          err = GPG_ERR_INV_CIPHER_MODE;
        break;
 
       case GCRY_CIPHER_MODE_STREAM:
-       if ((cipher->stencrypt == dummy_encrypt_stream)
-           || (cipher->stdecrypt == dummy_decrypt_stream))
+       if (!spec->stencrypt || !spec->stdecrypt)
          err = GPG_ERR_INV_CIPHER_MODE;
        break;
 
@@ -769,30 +615,44 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
   if (! err)
     {
       size_t size = (sizeof (*h)
-                     + 2 * cipher->contextsize
+                     + 2 * spec->contextsize
                      - sizeof (cipher_context_alignment_t)
 #ifdef NEED_16BYTE_ALIGNED_CONTEXT
                      + 15  /* Space for leading alignment gap.  */
 #endif /*NEED_16BYTE_ALIGNED_CONTEXT*/
                      );
 
+      /* Space needed per mode.  */
+      switch (mode)
+       {
+       case GCRY_CIPHER_MODE_XTS:
+       case GCRY_CIPHER_MODE_SIV:
+         /* Additional cipher context for tweak. */
+         size += 2 * spec->contextsize + 15;
+         break;
+
+       default:
+         break;
+       }
+
       if (secure)
-       h = gcry_calloc_secure (1, size);
+       h = xtrycalloc_secure (1, size);
       else
-       h = gcry_calloc (1, size);
+       h = xtrycalloc (1, size);
 
       if (! h)
        err = gpg_err_code_from_syserror ();
       else
        {
           size_t off = 0;
+         char *tc;
 
 #ifdef NEED_16BYTE_ALIGNED_CONTEXT
-          if ( ((unsigned long)h & 0x0f) )
+          if ( ((uintptr_t)h & 0x0f) )
             {
               /* The malloced block is not aligned on a 16 byte
                  boundary.  Correct for this.  */
-              off = 16 - ((unsigned long)h & 0x0f);
+              off = 16 - ((uintptr_t)h & 0x0f);
               h = (void*)((char*)h + off);
             }
 #endif /*NEED_16BYTE_ALIGNED_CONTEXT*/
@@ -800,57 +660,51 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
          h->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL;
           h->actual_handle_size = size - off;
           h->handle_offset = off;
-         h->cipher = cipher;
-         h->extraspec = extraspec;
-         h->module = module;
+         h->spec = spec;
           h->algo = algo;
          h->mode = mode;
          h->flags = flags;
 
-          /* Setup bulk encryption routines.  */
-          switch (algo)
+          /* Setup mode routines. */
+          _gcry_cipher_setup_mode_ops(h, mode);
+
+          /* Setup defaults depending on the mode.  */
+          switch (mode)
             {
-#ifdef USE_AES
-            case GCRY_CIPHER_AES128:
-            case GCRY_CIPHER_AES192:
-            case GCRY_CIPHER_AES256:
-              h->bulk.cfb_enc = _gcry_aes_cfb_enc;
-              h->bulk.cfb_dec = _gcry_aes_cfb_dec;
-              h->bulk.cbc_enc = _gcry_aes_cbc_enc;
-              h->bulk.cbc_dec = _gcry_aes_cbc_dec;
-              h->bulk.ctr_enc = _gcry_aes_ctr_enc;
+            case GCRY_CIPHER_MODE_OCB:
+              h->u_mode.ocb.taglen = 16; /* Bytes.  */
               break;
-#endif /*USE_AES*/
+
+           case GCRY_CIPHER_MODE_XTS:
+             tc = h->context.c + spec->contextsize * 2;
+             tc += (16 - (uintptr_t)tc % 16) % 16;
+             h->u_mode.xts.tweak_context = tc;
+             break;
+
+           case GCRY_CIPHER_MODE_SIV:
+             tc = h->context.c + spec->contextsize * 2;
+             tc += (16 - (uintptr_t)tc % 16) % 16;
+             h->u_mode.siv.ctr_context = tc;
+             break;
 
             default:
               break;
             }
-       }
+        }
     }
 
   /* Done.  */
 
-  if (err)
-    {
-      if (module)
-       {
-         /* Release module.  */
-         ath_mutex_lock (&ciphers_registered_lock);
-         _gcry_module_release (module);
-         ath_mutex_unlock (&ciphers_registered_lock);
-       }
-    }
-
   *handle = err ? NULL : h;
 
-  return gcry_error (err);
+  return err;
 }
 
 
 /* Release all resources associated with the cipher handle H. H may be
    NULL in which case this is a no-operation. */
 void
-gcry_cipher_close (gcry_cipher_hd_t h)
+_gcry_cipher_close (gcry_cipher_hd_t h)
 {
   size_t off;
 
@@ -864,11 +718,6 @@ gcry_cipher_close (gcry_cipher_hd_t h)
   else
     h->magic = 0;
 
-  /* Release module.  */
-  ath_mutex_lock (&ciphers_registered_lock);
-  _gcry_module_release (h->module);
-  ath_mutex_unlock (&ciphers_registered_lock);
-
   /* We always want to wipe out the memory even when the context has
      been allocated in secure memory.  The user might have disabled
      secure memory or is using his own implementation which does not
@@ -878,55 +727,148 @@ gcry_cipher_close (gcry_cipher_hd_t h)
   off = h->handle_offset;
   wipememory (h, h->actual_handle_size);
 
-  gcry_free ((char*)h - off);
+  xfree ((char*)h - off);
 }
 
 
 /* Set the key to be used for the encryption context C to KEY with
    length KEYLEN.  The length should match the required length. */
-static gcry_error_t
-cipher_setkey (gcry_cipher_hd_t c, byte *key, unsigned int keylen)
+static gcry_err_code_t
+cipher_setkey (gcry_cipher_hd_t c, byte *key, size_t keylen)
 {
-  gcry_err_code_t ret;
+  gcry_err_code_t rc;
+
+  if (c->mode == GCRY_CIPHER_MODE_XTS)
+    {
+      /* XTS uses two keys. */
+      if (keylen % 2)
+       return GPG_ERR_INV_KEYLEN;
+      keylen /= 2;
+
+      if (fips_mode ())
+       {
+         /* Reject key if subkeys Key_1 and Key_2 are equal.
+            See "Implementation Guidance for FIPS 140-2, A.9 XTS-AES
+            Key Generation Requirements" for details.  */
+         if (buf_eq_const (key, key + keylen, keylen))
+           return GPG_ERR_WEAK_KEY;
+       }
+    }
+  else if (c->mode == GCRY_CIPHER_MODE_SIV)
+    {
+      /* SIV uses two keys. */
+      if (keylen % 2)
+       return GPG_ERR_INV_KEYLEN;
+      keylen /= 2;
+    }
 
-  ret = (*c->cipher->setkey) (&c->context.c, key, keylen);
-  if (!ret)
+  rc = c->spec->setkey (&c->context.c, key, keylen, &c->bulk);
+  if (!rc || (c->marks.allow_weak_key && rc == GPG_ERR_WEAK_KEY))
     {
       /* Duplicate initial context.  */
-      memcpy ((void *) ((char *) &c->context.c + c->cipher->contextsize),
+      memcpy ((void *) ((char *) &c->context.c + c->spec->contextsize),
               (void *) &c->context.c,
-              c->cipher->contextsize);
+              c->spec->contextsize);
       c->marks.key = 1;
+
+      switch (c->mode)
+        {
+        case GCRY_CIPHER_MODE_CMAC:
+          rc = _gcry_cipher_cmac_set_subkeys (c);
+          break;
+
+        case GCRY_CIPHER_MODE_EAX:
+          rc = _gcry_cipher_eax_setkey (c);
+          break;
+
+        case GCRY_CIPHER_MODE_GCM:
+          _gcry_cipher_gcm_setkey (c);
+          break;
+
+        case GCRY_CIPHER_MODE_GCM_SIV:
+          rc = _gcry_cipher_gcm_siv_setkey (c, keylen);
+          if (rc)
+           c->marks.key = 0;
+          break;
+
+        case GCRY_CIPHER_MODE_OCB:
+          _gcry_cipher_ocb_setkey (c);
+          break;
+
+        case GCRY_CIPHER_MODE_POLY1305:
+          _gcry_cipher_poly1305_setkey (c);
+          break;
+
+       case GCRY_CIPHER_MODE_XTS:
+         /* Setup tweak cipher with second part of XTS key. */
+         rc = c->spec->setkey (c->u_mode.xts.tweak_context, key + keylen,
+                               keylen, &c->bulk);
+         if (!rc || (c->marks.allow_weak_key && rc == GPG_ERR_WEAK_KEY))
+           {
+             /* Duplicate initial tweak context.  */
+             memcpy (c->u_mode.xts.tweak_context + c->spec->contextsize,
+                     c->u_mode.xts.tweak_context, c->spec->contextsize);
+           }
+         else
+           c->marks.key = 0;
+         break;
+
+        case GCRY_CIPHER_MODE_SIV:
+         /* Setup CTR cipher with second part of SIV key. */
+          rc = _gcry_cipher_siv_setkey (c, key + keylen, keylen);
+         if (!rc || (c->marks.allow_weak_key && rc == GPG_ERR_WEAK_KEY))
+           {
+             /* Duplicate initial CTR context.  */
+             memcpy (c->u_mode.siv.ctr_context + c->spec->contextsize,
+                     c->u_mode.siv.ctr_context, c->spec->contextsize);
+           }
+         else
+           c->marks.key = 0;
+          break;
+
+        default:
+          break;
+        }
     }
   else
     c->marks.key = 0;
 
-  return gcry_error (ret);
+  return rc;
 }
 
 
 /* Set the IV to be used for the encryption context C to IV with
    length IVLEN.  The length should match the required length. */
-static void
-cipher_setiv( gcry_cipher_hd_t c, const byte *iv, unsigned ivlen )
+static gcry_err_code_t
+cipher_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen)
 {
-  memset (c->u_iv.iv, 0, c->cipher->blocksize);
+  /* If the cipher has its own IV handler, we use only this one.  This
+     is currently used for stream ciphers requiring a nonce.  */
+  if (c->spec->setiv)
+    {
+      c->spec->setiv (&c->context.c, iv, ivlen);
+      return 0;
+    }
+
+  memset (c->u_iv.iv, 0, c->spec->blocksize);
   if (iv)
     {
-      if (ivlen != c->cipher->blocksize)
+      if (ivlen != c->spec->blocksize)
         {
           log_info ("WARNING: cipher_setiv: ivlen=%u blklen=%u\n",
-                    ivlen, (unsigned int)c->cipher->blocksize);
+                    (unsigned int)ivlen, (unsigned int)c->spec->blocksize);
           fips_signal_error ("IV length does not match blocklength");
         }
-      if (ivlen > c->cipher->blocksize)
-        ivlen = c->cipher->blocksize;
+      if (ivlen > c->spec->blocksize)
+        ivlen = c->spec->blocksize;
       memcpy (c->u_iv.iv, iv, ivlen);
       c->marks.iv = 1;
     }
   else
       c->marks.iv = 0;
   c->unused = 0;
+
+  return 0;
 }
 
 
@@ -935,1008 +877,655 @@ cipher_setiv( gcry_cipher_hd_t c, const byte *iv, 
unsigned ivlen )
 static void
 cipher_reset (gcry_cipher_hd_t c)
 {
+  unsigned int marks_key, marks_allow_weak_key;
+
+  marks_key = c->marks.key;
+  marks_allow_weak_key = c->marks.allow_weak_key;
+
   memcpy (&c->context.c,
-         (char *) &c->context.c + c->cipher->contextsize,
-         c->cipher->contextsize);
+         (char *) &c->context.c + c->spec->contextsize,
+         c->spec->contextsize);
   memset (&c->marks, 0, sizeof c->marks);
-  memset (c->u_iv.iv, 0, c->cipher->blocksize);
-  memset (c->lastiv, 0, c->cipher->blocksize);
-  memset (c->u_ctr.ctr, 0, c->cipher->blocksize);
-}
+  memset (c->u_iv.iv, 0, c->spec->blocksize);
+  memset (c->lastiv, 0, c->spec->blocksize);
+  memset (c->u_ctr.ctr, 0, c->spec->blocksize);
+  c->unused = 0;
 
+  c->marks.key = marks_key;
+  c->marks.allow_weak_key = marks_allow_weak_key;
 
-
-static gcry_err_code_t
-do_ecb_encrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
-{
-  unsigned int blocksize = c->cipher->blocksize;
-  unsigned int n, nblocks;
+  switch (c->mode)
+    {
+    case GCRY_CIPHER_MODE_CMAC:
+      _gcry_cmac_reset(&c->u_mode.cmac);
+      break;
 
-  if (outbuflen < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
-  if ((inbuflen % blocksize))
-    return GPG_ERR_INV_LENGTH;
+    case GCRY_CIPHER_MODE_EAX:
+      _gcry_cmac_reset(&c->u_mode.eax.cmac_header);
+      _gcry_cmac_reset(&c->u_mode.eax.cmac_ciphertext);
+      break;
 
-  nblocks = inbuflen / c->cipher->blocksize;
+    case GCRY_CIPHER_MODE_GCM:
+    case GCRY_CIPHER_MODE_GCM_SIV:
+      /* Only clear head of u_mode, keep ghash_key and gcm_table. */
+      {
+        byte *u_mode_pos = (void *)&c->u_mode;
+        byte *ghash_key_pos = c->u_mode.gcm.u_ghash_key.key;
+        size_t u_mode_head_length = ghash_key_pos - u_mode_pos;
 
-  for (n=0; n < nblocks; n++ )
-    {
-      c->cipher->encrypt (&c->context.c, outbuf, (byte*)/*arggg*/inbuf);
-      inbuf  += blocksize;
-      outbuf += blocksize;
-    }
-  return 0;
-}
+        memset (&c->u_mode, 0, u_mode_head_length);
+      }
+      break;
 
-static gcry_err_code_t
-do_ecb_decrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
-{
-  unsigned int blocksize = c->cipher->blocksize;
-  unsigned int n, nblocks;
-
-  if (outbuflen < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
-  if ((inbuflen % blocksize))
-    return GPG_ERR_INV_LENGTH;
-  nblocks = inbuflen / c->cipher->blocksize;
-
-  for (n=0; n < nblocks; n++ )
-    {
-      c->cipher->decrypt (&c->context.c, outbuf, (byte*)/*arggg*/inbuf );
-      inbuf  += blocksize;
-      outbuf += blocksize;
-    }
-
-  return 0;
-}
+    case GCRY_CIPHER_MODE_POLY1305:
+      memset (&c->u_mode.poly1305, 0, sizeof c->u_mode.poly1305);
+      break;
 
+    case GCRY_CIPHER_MODE_CCM:
+      memset (&c->u_mode.ccm, 0, sizeof c->u_mode.ccm);
+      break;
 
-static gcry_err_code_t
-do_cbc_encrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
-{
-  unsigned int n;
-  unsigned char *ivp;
-  int i;
-  size_t blocksize = c->cipher->blocksize;
-  unsigned nblocks = inbuflen / blocksize;
+    case GCRY_CIPHER_MODE_OCB:
+      {
+       const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE;
+       byte *u_mode_head_pos = (void *)&c->u_mode.ocb;
+       byte *u_mode_tail_pos = (void *)&c->u_mode.ocb.tag;
+       size_t u_mode_head_length = u_mode_tail_pos - u_mode_head_pos;
+       size_t u_mode_tail_length = sizeof(c->u_mode.ocb) - u_mode_head_length;
 
-  if (outbuflen < ((c->flags & GCRY_CIPHER_CBC_MAC)? blocksize : inbuflen))
-    return GPG_ERR_BUFFER_TOO_SHORT;
+       if (c->u_mode.ocb.aad_nblocks < table_maxblks)
+         {
+           /* Precalculated L-values are still ok after reset, no need
+            * to clear. */
+           memset (u_mode_tail_pos, 0, u_mode_tail_length);
+         }
+       else
+         {
+           /* Reinitialize L table. */
+           memset (&c->u_mode.ocb, 0, sizeof(c->u_mode.ocb));
+           _gcry_cipher_ocb_setkey (c);
+         }
 
-  if ((inbuflen % c->cipher->blocksize)
-      && !(inbuflen > c->cipher->blocksize
-           && (c->flags & GCRY_CIPHER_CBC_CTS)))
-    return GPG_ERR_INV_LENGTH;
+       /* Setup default taglen.  */
+       c->u_mode.ocb.taglen = 16;
+      }
+      break;
 
-  if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize)
-    {
-      if ((inbuflen % blocksize) == 0)
-       nblocks--;
-    }
+    case GCRY_CIPHER_MODE_XTS:
+      memcpy (c->u_mode.xts.tweak_context,
+             c->u_mode.xts.tweak_context + c->spec->contextsize,
+             c->spec->contextsize);
+      break;
 
-  if (c->bulk.cbc_enc)
-    {
-      c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks,
-                       (c->flags & GCRY_CIPHER_CBC_MAC));
-      inbuf  += nblocks * blocksize;
-      if (!(c->flags & GCRY_CIPHER_CBC_MAC))
-        outbuf += nblocks * blocksize;
-    }
-  else
-    {
-      for (n=0; n < nblocks; n++ )
-        {
-          for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-            outbuf[i] = inbuf[i] ^ *ivp++;
-          c->cipher->encrypt ( &c->context.c, outbuf, outbuf );
-          memcpy (c->u_iv.iv, outbuf, blocksize );
-          inbuf  += blocksize;
-          if (!(c->flags & GCRY_CIPHER_CBC_MAC))
-            outbuf += blocksize;
-        }
-    }
+    case GCRY_CIPHER_MODE_SIV:
+      /* Only clear head of u_mode, keep s2v_cmac and ctr_context. */
+      {
+        byte *u_mode_pos = (void *)&c->u_mode;
+        byte *tail_pos = (void *)&c->u_mode.siv.s2v_cmac;
+        size_t u_mode_head_length = tail_pos - u_mode_pos;
 
-  if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize)
-    {
-      /* We have to be careful here, since outbuf might be equal to
-         inbuf.  */
-      int restbytes;
-      unsigned char b;
+        memset (&c->u_mode, 0, u_mode_head_length);
 
-      if ((inbuflen % blocksize) == 0)
-        restbytes = blocksize;
-      else
-        restbytes = inbuflen % blocksize;
+       memcpy (c->u_mode.siv.ctr_context,
+               c->u_mode.siv.ctr_context + c->spec->contextsize,
+               c->spec->contextsize);
 
-      outbuf -= blocksize;
-      for (ivp = c->u_iv.iv, i = 0; i < restbytes; i++)
-        {
-          b = inbuf[i];
-          outbuf[blocksize + i] = outbuf[i];
-          outbuf[i] = b ^ *ivp++;
-        }
-      for (; i < blocksize; i++)
-        outbuf[i] = 0 ^ *ivp++;
+       memcpy (c->u_mode.siv.s2v_d, c->u_mode.siv.s2v_zero_block,
+               GCRY_SIV_BLOCK_LEN);
+      }
+      break;
 
-      c->cipher->encrypt (&c->context.c, outbuf, outbuf);
-      memcpy (c->u_iv.iv, outbuf, blocksize);
+    default:
+      break; /* u_mode unused by other modes. */
     }
-
-  return 0;
 }
 
 
+
 static gcry_err_code_t
-do_cbc_decrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
+do_ecb_crypt (gcry_cipher_hd_t c,
+              unsigned char *outbuf, size_t outbuflen,
+              const unsigned char *inbuf, size_t inbuflen,
+              gcry_cipher_encrypt_t crypt_fn)
 {
-  unsigned int n;
-  unsigned char *ivp;
-  int i;
-  size_t blocksize = c->cipher->blocksize;
-  unsigned int nblocks = inbuflen / blocksize;
+  unsigned int blocksize = c->spec->blocksize;
+  size_t n, nblocks;
+  unsigned int burn, nburn;
 
   if (outbuflen < inbuflen)
     return GPG_ERR_BUFFER_TOO_SHORT;
-
-  if ((inbuflen % c->cipher->blocksize)
-      && !(inbuflen > c->cipher->blocksize
-           && (c->flags & GCRY_CIPHER_CBC_CTS)))
+  if ((inbuflen % blocksize))
     return GPG_ERR_INV_LENGTH;
 
-  if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize)
-    {
-      nblocks--;
-      if ((inbuflen % blocksize) == 0)
-       nblocks--;
-      memcpy (c->lastiv, c->u_iv.iv, blocksize);
-    }
+  nblocks = inbuflen / blocksize;
+  burn = 0;
 
-  if (c->bulk.cbc_dec)
-    {
-      c->bulk.cbc_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
-      inbuf  += nblocks * blocksize;
-      outbuf += nblocks * blocksize;
-    }
-  else
+  for (n=0; n < nblocks; n++ )
     {
-      for (n=0; n < nblocks; n++ )
-        {
-          /* Because outbuf and inbuf might be the same, we have to
-           * save the original ciphertext block.  We use LASTIV for
-           * this here because it is not used otherwise. */
-          memcpy (c->lastiv, inbuf, blocksize);
-          c->cipher->decrypt ( &c->context.c, outbuf, inbuf );
-          for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-           outbuf[i] ^= *ivp++;
-          memcpy(c->u_iv.iv, c->lastiv, blocksize );
-          inbuf  += c->cipher->blocksize;
-          outbuf += c->cipher->blocksize;
-        }
+      nburn = crypt_fn (&c->context.c, outbuf, inbuf);
+      burn = nburn > burn ? nburn : burn;
+      inbuf  += blocksize;
+      outbuf += blocksize;
     }
 
-  if ((c->flags & GCRY_CIPHER_CBC_CTS) && inbuflen > blocksize)
-    {
-      int restbytes;
-
-      if ((inbuflen % blocksize) == 0)
-        restbytes = blocksize;
-      else
-        restbytes = inbuflen % blocksize;
-
-      memcpy (c->lastiv, c->u_iv.iv, blocksize );         /* Save Cn-2. */
-      memcpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */
-
-      c->cipher->decrypt ( &c->context.c, outbuf, inbuf );
-      for (ivp=c->u_iv.iv,i=0; i < restbytes; i++ )
-        outbuf[i] ^= *ivp++;
-
-      memcpy(outbuf + blocksize, outbuf, restbytes);
-      for(i=restbytes; i < blocksize; i++)
-        c->u_iv.iv[i] = outbuf[i];
-      c->cipher->decrypt (&c->context.c, outbuf, c->u_iv.iv);
-      for(ivp=c->lastiv,i=0; i < blocksize; i++ )
-        outbuf[i] ^= *ivp++;
-      /* c->lastiv is now really lastlastiv, does this matter? */
-    }
+  if (burn > 0)
+    _gcry_burn_stack (burn + 4 * sizeof(void *));
 
   return 0;
 }
 
-
 static gcry_err_code_t
-do_cfb_encrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
+do_ecb_encrypt (gcry_cipher_hd_t c,
+                unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen)
 {
-  unsigned char *ivp;
-  size_t blocksize = c->cipher->blocksize;
-  size_t blocksize_x_2 = blocksize + blocksize;
+  return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 
c->spec->encrypt);
+}
 
-  if (outbuflen < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
+static gcry_err_code_t
+do_ecb_decrypt (gcry_cipher_hd_t c,
+                unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen)
+{
+  return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 
c->spec->decrypt);
+}
 
-  if ( inbuflen <= c->unused )
-    {
-      /* Short enough to be encoded by the remaining XOR mask. */
-      /* XOR the input with the IV and store input into IV. */
-      for (ivp=c->u_iv.iv+c->cipher->blocksize - c->unused;
-           inbuflen;
-           inbuflen--, c->unused-- )
-        *outbuf++ = (*ivp++ ^= *inbuf++);
-      return 0;
-    }
 
-  if ( c->unused )
-    {
-      /* XOR the input with the IV and store input into IV */
-      inbuflen -= c->unused;
-      for(ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
-        *outbuf++ = (*ivp++ ^= *inbuf++);
-    }
-
-  /* Now we can process complete blocks.  We use a loop as long as we
-     have at least 2 blocks and use conditions for the rest.  This
-     also allows to use a bulk encryption function if available.  */
-  if (inbuflen >= blocksize_x_2 && c->bulk.cfb_enc)
-    {
-      unsigned int nblocks = inbuflen / blocksize;
-      c->bulk.cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
-      outbuf += nblocks * blocksize;
-      inbuf  += nblocks * blocksize;
-      inbuflen -= nblocks * blocksize;
-    }
-  else
-    {
-      while ( inbuflen >= blocksize_x_2 )
-        {
-          int i;
-          /* Encrypt the IV. */
-          c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-          /* XOR the input with the IV and store input into IV.  */
-          for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-            *outbuf++ = (*ivp++ ^= *inbuf++);
-          inbuflen -= blocksize;
-        }
-    }
+static gcry_err_code_t
+do_stream_encrypt (gcry_cipher_hd_t c,
+                unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen)
+{
+  (void)outbuflen;
+  c->spec->stencrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen);
+  return 0;
+}
 
-  if ( inbuflen >= blocksize )
-    {
-      int i;
-      /* Save the current IV and then encrypt the IV. */
-      memcpy( c->lastiv, c->u_iv.iv, blocksize );
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-      /* XOR the input with the IV and store input into IV */
-      for(ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-        *outbuf++ = (*ivp++ ^= *inbuf++);
-      inbuflen -= blocksize;
-    }
-  if ( inbuflen )
-    {
-      /* Save the current IV and then encrypt the IV. */
-      memcpy( c->lastiv, c->u_iv.iv, blocksize );
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-      c->unused = blocksize;
-      /* Apply the XOR. */
-      c->unused -= inbuflen;
-      for(ivp=c->u_iv.iv; inbuflen; inbuflen-- )
-        *outbuf++ = (*ivp++ ^= *inbuf++);
-    }
+static gcry_err_code_t
+do_stream_decrypt (gcry_cipher_hd_t c,
+                unsigned char *outbuf, size_t outbuflen,
+                const unsigned char *inbuf, size_t inbuflen)
+{
+  (void)outbuflen;
+  c->spec->stdecrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen);
   return 0;
 }
 
 
 static gcry_err_code_t
-do_cfb_decrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
+do_encrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+                         const byte *inbuf, size_t inbuflen)
 {
-  unsigned char *ivp;
-  unsigned long temp;
-  int i;
-  size_t blocksize = c->cipher->blocksize;
-  size_t blocksize_x_2 = blocksize + blocksize;
+  gcry_err_code_t rc;
 
-  if (outbuflen < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
+  (void)outbuflen;
 
-  if (inbuflen <= c->unused)
+  switch (c->mode)
     {
-      /* Short enough to be encoded by the remaining XOR mask. */
-      /* XOR the input with the IV and store input into IV. */
-      for (ivp=c->u_iv.iv+blocksize - c->unused;
-           inbuflen;
-           inbuflen--, c->unused--)
-        {
-          temp = *inbuf++;
-          *outbuf++ = *ivp ^ temp;
-          *ivp++ = temp;
-        }
-      return 0;
-    }
+    case GCRY_CIPHER_MODE_CMAC:
+      rc = GPG_ERR_INV_CIPHER_MODE;
+      break;
 
-  if (c->unused)
-    {
-      /* XOR the input with the IV and store input into IV. */
-      inbuflen -= c->unused;
-      for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
+    case GCRY_CIPHER_MODE_NONE:
+      if (fips_mode () || !_gcry_get_debug_flag (0))
         {
-          temp = *inbuf++;
-          *outbuf++ = *ivp ^ temp;
-          *ivp++ = temp;
+          fips_signal_error ("cipher mode NONE used");
+          rc = GPG_ERR_INV_CIPHER_MODE;
         }
-    }
-
-  /* Now we can process complete blocks.  We use a loop as long as we
-     have at least 2 blocks and use conditions for the rest.  This
-     also allows to use a bulk encryption function if available.  */
-  if (inbuflen >= blocksize_x_2 && c->bulk.cfb_dec)
-    {
-      unsigned int nblocks = inbuflen / blocksize;
-      c->bulk.cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks);
-      outbuf += nblocks * blocksize;
-      inbuf  += nblocks * blocksize;
-      inbuflen -= nblocks * blocksize;
-    }
-  else
-    {
-      while (inbuflen >= blocksize_x_2 )
+      else
         {
-          /* Encrypt the IV. */
-          c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-          /* XOR the input with the IV and store input into IV. */
-          for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-            {
-              temp = *inbuf++;
-              *outbuf++ = *ivp ^ temp;
-              *ivp++ = temp;
-            }
-          inbuflen -= blocksize;
+          if (inbuf != outbuf)
+            memmove (outbuf, inbuf, inbuflen);
+          rc = 0;
         }
-    }
+      break;
 
-  if (inbuflen >= blocksize )
-    {
-      /* Save the current IV and then encrypt the IV. */
-      memcpy ( c->lastiv, c->u_iv.iv, blocksize);
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-      /* XOR the input with the IV and store input into IV */
-      for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-        {
-          temp = *inbuf++;
-          *outbuf++ = *ivp ^ temp;
-          *ivp++ = temp;
-        }
-      inbuflen -= blocksize;
+    default:
+      log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode );
+      rc = GPG_ERR_INV_CIPHER_MODE;
+      break;
     }
 
-  if (inbuflen)
-    {
-      /* Save the current IV and then encrypt the IV. */
-      memcpy ( c->lastiv, c->u_iv.iv, blocksize );
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-      c->unused = blocksize;
-      /* Apply the XOR. */
-      c->unused -= inbuflen;
-      for (ivp=c->u_iv.iv; inbuflen; inbuflen-- )
-        {
-          temp = *inbuf++;
-          *outbuf++ = *ivp ^ temp;
-          *ivp++ = temp;
-        }
-    }
-  return 0;
+  return rc;
 }
 
-
 static gcry_err_code_t
-do_ofb_encrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
+do_decrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen,
+                         const byte *inbuf, size_t inbuflen)
 {
-  unsigned char *ivp;
-  size_t blocksize = c->cipher->blocksize;
+  gcry_err_code_t rc;
 
-  if (outbuflen < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
+  (void)outbuflen;
 
-  if ( inbuflen <= c->unused )
+  switch (c->mode)
     {
-      /* Short enough to be encoded by the remaining XOR mask. */
-      /* XOR the input with the IV */
-      for (ivp=c->u_iv.iv+c->cipher->blocksize - c->unused;
-           inbuflen;
-           inbuflen--, c->unused-- )
-        *outbuf++ = (*ivp++ ^ *inbuf++);
-      return 0;
-    }
+    case GCRY_CIPHER_MODE_CMAC:
+      rc = GPG_ERR_INV_CIPHER_MODE;
+      break;
 
-  if( c->unused )
-    {
-      inbuflen -= c->unused;
-      for(ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
-        *outbuf++ = (*ivp++ ^ *inbuf++);
-    }
+    case GCRY_CIPHER_MODE_NONE:
+      if (fips_mode () || !_gcry_get_debug_flag (0))
+        {
+          fips_signal_error ("cipher mode NONE used");
+          rc = GPG_ERR_INV_CIPHER_MODE;
+        }
+      else
+        {
+          if (inbuf != outbuf)
+            memmove (outbuf, inbuf, inbuflen);
+          rc = 0;
+        }
+      break;
 
-  /* Now we can process complete blocks. */
-  while ( inbuflen >= blocksize )
-    {
-      int i;
-      /* Encrypt the IV (and save the current one). */
-      memcpy( c->lastiv, c->u_iv.iv, blocksize );
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-
-      for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-        *outbuf++ = (*ivp++ ^ *inbuf++);
-      inbuflen -= blocksize;
-    }
-  if ( inbuflen )
-    { /* process the remaining bytes */
-      memcpy( c->lastiv, c->u_iv.iv, blocksize );
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-      c->unused = blocksize;
-      c->unused -= inbuflen;
-      for(ivp=c->u_iv.iv; inbuflen; inbuflen-- )
-        *outbuf++ = (*ivp++ ^ *inbuf++);
+    default:
+      log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode );
+      rc = GPG_ERR_INV_CIPHER_MODE;
+      break;
     }
-  return 0;
+
+  return rc;
 }
 
-static gcry_err_code_t
-do_ofb_decrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
-{
-  unsigned char *ivp;
-  size_t blocksize = c->cipher->blocksize;
 
-  if (outbuflen < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
+/****************
+ * Encrypt IN and write it to OUT.  If IN is NULL, in-place encryption has
+ * been requested.
+ */
+gcry_err_code_t
+_gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize,
+                      const void *in, size_t inlen)
+{
+  gcry_err_code_t rc;
 
-  if( inbuflen <= c->unused )
+  if (!in)  /* Caller requested in-place encryption.  */
     {
-      /* Short enough to be encoded by the remaining XOR mask. */
-      for (ivp=c->u_iv.iv+blocksize - c->unused; inbuflen; 
inbuflen--,c->unused--)
-        *outbuf++ = *ivp++ ^ *inbuf++;
-      return 0;
+      in = out;
+      inlen = outsize;
     }
 
-  if ( c->unused )
+  if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key)
     {
-      inbuflen -= c->unused;
-      for (ivp=c->u_iv.iv+blocksize - c->unused; c->unused; c->unused-- )
-        *outbuf++ = *ivp++ ^ *inbuf++;
+      log_error ("cipher_encrypt: key not set\n");
+      return GPG_ERR_MISSING_KEY;
     }
 
-  /* Now we can process complete blocks. */
-  while ( inbuflen >= blocksize )
-    {
-      int i;
-      /* Encrypt the IV (and save the current one). */
-      memcpy( c->lastiv, c->u_iv.iv, blocksize );
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-      for (ivp=c->u_iv.iv,i=0; i < blocksize; i++ )
-        *outbuf++ = *ivp++ ^ *inbuf++;
-      inbuflen -= blocksize;
-    }
-  if ( inbuflen )
-    { /* Process the remaining bytes. */
-      /* Encrypt the IV (and save the current one). */
-      memcpy( c->lastiv, c->u_iv.iv, blocksize );
-      c->cipher->encrypt ( &c->context.c, c->u_iv.iv, c->u_iv.iv );
-      c->unused = blocksize;
-      c->unused -= inbuflen;
-      for (ivp=c->u_iv.iv; inbuflen; inbuflen-- )
-        *outbuf++ = *ivp++ ^ *inbuf++;
-    }
-  return 0;
-}
+  rc = h->mode_ops.encrypt (h, out, outsize, in, inlen);
 
+  /* Failsafe: Make sure that the plaintext will never make it into
+     OUT if the encryption returned an error.  */
+  if (rc && out)
+    memset (out, 0x42, outsize);
 
-static gcry_err_code_t
-do_ctr_encrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
-{
-  unsigned int n;
-  int i;
-  unsigned int blocksize = c->cipher->blocksize;
-  unsigned int nblocks;
+  return rc;
+}
 
-  if (outbuflen < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
 
-  /* First process a left over encrypted counter.  */
-  if (c->unused)
+/****************
+ * Decrypt IN and write it to OUT.  If IN is NULL, in-place encryption has
+ * been requested.
+ */
+gcry_err_code_t
+_gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize,
+                      const void *in, size_t inlen)
+{
+  if (!in) /* Caller requested in-place encryption. */
     {
-      gcry_assert (c->unused < blocksize);
-      i = blocksize - c->unused;
-      for (n=0; c->unused && n < inbuflen; c->unused--, n++, i++)
-        {
-          /* XOR input with encrypted counter and store in output.  */
-          outbuf[n] = inbuf[n] ^ c->lastiv[i];
-        }
-      inbuf  += n;
-      outbuf += n;
-      inbuflen -= n;
+      in = out;
+      inlen = outsize;
     }
 
-
-  /* Use a bulk method if available.  */
-  nblocks = inbuflen / blocksize;
-  if (nblocks && c->bulk.ctr_enc)
+  if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key)
     {
-      c->bulk.ctr_enc (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks);
-      inbuf  += nblocks * blocksize;
-      outbuf += nblocks * blocksize;
-      inbuflen -= nblocks * blocksize;
+      log_error ("cipher_decrypt: key not set\n");
+      return GPG_ERR_MISSING_KEY;
     }
 
-  /* If we don't have a bulk method use the standard method.  We also
-     use this method for the a remaining partial block.  */
-  if (inbuflen)
-    {
-      unsigned char tmp[MAX_BLOCKSIZE];
-
-      for (n=0; n < inbuflen; n++)
-        {
-          if ((n % blocksize) == 0)
-            {
-              c->cipher->encrypt (&c->context.c, tmp, c->u_ctr.ctr);
-
-              for (i = blocksize; i > 0; i--)
-                {
-                  c->u_ctr.ctr[i-1]++;
-                  if (c->u_ctr.ctr[i-1] != 0)
-                    break;
-                }
-            }
-
-          /* XOR input with encrypted counter and store in output.  */
-          outbuf[n] = inbuf[n] ^ tmp[n % blocksize];
-        }
+  return h->mode_ops.decrypt (h, out, outsize, in, inlen);
+}
 
-      /* Save the unused bytes of the counter.  */
-      n %= blocksize;
-      c->unused = (blocksize - n) % blocksize;
-      if (c->unused)
-        memcpy (c->lastiv+n, tmp+n, c->unused);
 
-      wipememory (tmp, sizeof tmp);
+/****************
+ * Used for PGP's somewhat strange CFB mode. Only works if
+ * the corresponding flag is set.
+ */
+static void
+cipher_sync (gcry_cipher_hd_t c)
+{
+  if ((c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused)
+    {
+      memmove (c->u_iv.iv + c->unused,
+               c->u_iv.iv, c->spec->blocksize - c->unused);
+      memcpy (c->u_iv.iv,
+              c->lastiv + c->spec->blocksize - c->unused, c->unused);
+      c->unused = 0;
     }
-
-  return 0;
 }
 
-static gcry_err_code_t
-do_ctr_decrypt (gcry_cipher_hd_t c,
-                unsigned char *outbuf, unsigned int outbuflen,
-                const unsigned char *inbuf, unsigned int inbuflen)
+
+gcry_err_code_t
+_gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, size_t keylen)
 {
-  return do_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+  return cipher_setkey (hd, (void*)key, keylen);
 }
 
 
-/* Perform the AES-Wrap algorithm as specified by RFC3394.  We
-   implement this as a mode usable with any cipher algorithm of
-   blocksize 128.  */
-static gcry_err_code_t
-do_aeswrap_encrypt (gcry_cipher_hd_t c, byte *outbuf, unsigned int outbuflen,
-                    const byte *inbuf, unsigned int inbuflen )
+gcry_err_code_t
+_gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen)
 {
-  int j, x;
-  unsigned int n, i;
-  unsigned char *r, *a, *b;
-  unsigned char t[8];
-
-#if MAX_BLOCKSIZE < 8
-#error Invalid block size
-#endif
-  /* We require a cipher with a 128 bit block length.  */
-  if (c->cipher->blocksize != 16)
-    return GPG_ERR_INV_LENGTH;
-
-  /* The output buffer must be able to hold the input data plus one
-     additional block.  */
-  if (outbuflen < inbuflen + 8)
-    return GPG_ERR_BUFFER_TOO_SHORT;
-  /* Input data must be multiple of 64 bits.  */
-  if (inbuflen % 8)
-    return GPG_ERR_INV_ARG;
+  return hd->mode_ops.setiv (hd, iv, ivlen);
+}
 
-  n = inbuflen / 8;
 
-  /* We need at least two 64 bit blocks.  */
-  if (n < 2)
+/* Set counter for CTR mode.  (CTR,CTRLEN) must denote a buffer of
+   block size length, or (NULL,0) to set the CTR to the all-zero
+   block. */
+gpg_err_code_t
+_gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen)
+{
+  if (ctr && ctrlen == hd->spec->blocksize)
+    {
+      memcpy (hd->u_ctr.ctr, ctr, hd->spec->blocksize);
+      hd->unused = 0;
+    }
+  else if (!ctr || !ctrlen)
+    {
+      memset (hd->u_ctr.ctr, 0, hd->spec->blocksize);
+      hd->unused = 0;
+    }
+  else
     return GPG_ERR_INV_ARG;
 
-  r = outbuf;
-  a = outbuf;  /* We store A directly in OUTBUF.  */
-  b = c->u_ctr.ctr;  /* B is also used to concatenate stuff.  */
+  return 0;
+}
 
-  /* If an IV has been set we use that IV as the Alternative Initial
-     Value; if it has not been set we use the standard value.  */
-  if (c->marks.iv)
-    memcpy (a, c->u_iv.iv, 8);
+gpg_err_code_t
+_gcry_cipher_getctr (gcry_cipher_hd_t hd, void *ctr, size_t ctrlen)
+{
+  if (ctr && ctrlen == hd->spec->blocksize)
+    memcpy (ctr, hd->u_ctr.ctr, hd->spec->blocksize);
   else
-    memset (a, 0xa6, 8);
-
-  /* Copy the inbuf to the outbuf. */
-  memmove (r+8, inbuf, inbuflen);
-
-  memset (t, 0, sizeof t); /* t := 0.  */
-
-  for (j = 0; j <= 5; j++)
-    {
-      for (i = 1; i <= n; i++)
-        {
-          /* B := AES_k( A | R[i] ) */
-          memcpy (b, a, 8);
-          memcpy (b+8, r+i*8, 8);
-          c->cipher->encrypt (&c->context.c, b, b);
-          /* t := t + 1  */
-         for (x = 7; x >= 0; x--)
-           {
-             t[x]++;
-             if (t[x])
-               break;
-           }
-          /* A := MSB_64(B) ^ t */
-          for (x=0; x < 8; x++)
-            a[x] = b[x] ^ t[x];
-          /* R[i] := LSB_64(B) */
-          memcpy (r+i*8, b+8, 8);
-        }
-   }
+    return GPG_ERR_INV_ARG;
 
   return 0;
 }
 
-/* Perform the AES-Unwrap algorithm as specified by RFC3394.  We
-   implement this as a mode usable with any cipher algorithm of
-   blocksize 128.  */
-static gcry_err_code_t
-do_aeswrap_decrypt (gcry_cipher_hd_t c, byte *outbuf, unsigned int outbuflen,
-                    const byte *inbuf, unsigned int inbuflen)
+
+gcry_err_code_t
+_gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf,
+                           size_t abuflen)
 {
-  int j, x;
-  unsigned int n, i;
-  unsigned char *r, *a, *b;
-  unsigned char t[8];
+  gcry_err_code_t rc;
 
-#if MAX_BLOCKSIZE < 8
-#error Invalid block size
-#endif
-  /* We require a cipher with a 128 bit block length.  */
-  if (c->cipher->blocksize != 16)
-    return GPG_ERR_INV_LENGTH;
+  if (hd->mode_ops.authenticate)
+    {
+      rc = hd->mode_ops.authenticate (hd, abuf, abuflen);
+    }
+  else
+    {
+      log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode);
+      rc = GPG_ERR_INV_CIPHER_MODE;
+    }
 
-  /* The output buffer must be able to hold the input data minus one
-     additional block.  Fixme: The caller has more restrictive checks
-     - we may want to fix them for this mode.  */
-  if (outbuflen + 8  < inbuflen)
-    return GPG_ERR_BUFFER_TOO_SHORT;
-  /* Input data must be multiple of 64 bits.  */
-  if (inbuflen % 8)
-    return GPG_ERR_INV_ARG;
+  return rc;
+}
 
-  n = inbuflen / 8;
 
-  /* We need at least three 64 bit blocks.  */
-  if (n < 3)
-    return GPG_ERR_INV_ARG;
+gcry_err_code_t
+_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen)
+{
+  gcry_err_code_t rc;
 
-  r = outbuf;
-  a = c->lastiv;  /* We use c->LASTIV as buffer for A.  */
-  b = c->u_ctr.ctr;     /* B is also used to concatenate stuff.  */
+  if (hd->mode_ops.get_tag)
+    {
+      rc = hd->mode_ops.get_tag (hd, outtag, taglen);
+    }
+  else
+    {
+      log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode);
+      rc = GPG_ERR_INV_CIPHER_MODE;
+    }
 
-  /* Copy the inbuf to the outbuf and save A. */
-  memcpy (a, inbuf, 8);
-  memmove (r, inbuf+8, inbuflen-8);
-  n--; /* Reduce to actual number of data blocks.  */
+  return rc;
+}
 
-  /* t := 6 * n  */
-  i = n * 6;  /* The range is valid because: n = inbuflen / 8 - 1.  */
-  for (x=0; x < 8 && x < sizeof (i); x++)
-    t[7-x] = i >> (8*x);
-  for (; x < 8; x++)
-    t[7-x] = 0;
 
-  for (j = 5; j >= 0; j--)
-    {
-      for (i = n; i >= 1; i--)
-        {
-          /* B := AES_k^1( (A ^ t)| R[i] ) */
-          for (x = 0; x < 8; x++)
-            b[x] = a[x] ^ t[x];
-          memcpy (b+8, r+(i-1)*8, 8);
-          c->cipher->decrypt (&c->context.c, b, b);
-          /* t := t - 1  */
-         for (x = 7; x >= 0; x--)
-           {
-             t[x]--;
-             if (t[x] != 0xff)
-               break;
-           }
-          /* A := MSB_64(B) */
-          memcpy (a, b, 8);
-          /* R[i] := LSB_64(B) */
-          memcpy (r+(i-1)*8, b+8, 8);
-        }
-   }
+gcry_err_code_t
+_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen)
+{
+  gcry_err_code_t rc;
 
-  /* If an IV has been set we compare against this Alternative Initial
-     Value; if it has not been set we compare against the standard IV.  */
-  if (c->marks.iv)
-    j = memcmp (a, c->u_iv.iv, 8);
+  if (hd->mode_ops.check_tag)
+    {
+      rc = hd->mode_ops.check_tag (hd, intag, taglen);
+    }
   else
     {
-      for (j=0, x=0; x < 8; x++)
-        if (a[x] != 0xa6)
-          {
-            j=1;
-            break;
-          }
+      log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode);
+      rc = GPG_ERR_INV_CIPHER_MODE;
     }
-  return j? GPG_ERR_CHECKSUM : 0;
+
+  return rc;
 }
 
 
-/****************
- * Encrypt INBUF to OUTBUF with the mode selected at open.
- * inbuf and outbuf may overlap or be the same.
- * Depending on the mode some constraints apply to INBUFLEN.
- */
-static gcry_err_code_t
-cipher_encrypt (gcry_cipher_hd_t c, byte *outbuf, unsigned int outbuflen,
-               const byte *inbuf, unsigned int inbuflen)
-{
-  gcry_err_code_t rc;
 
-  switch (c->mode)
+static void
+_gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode)
+{
+  /* Setup encryption and decryption routines. */
+  switch (mode)
     {
+    case GCRY_CIPHER_MODE_STREAM:
+      c->mode_ops.encrypt = do_stream_encrypt;
+      c->mode_ops.decrypt = do_stream_decrypt;
+      break;
+
     case GCRY_CIPHER_MODE_ECB:
-      rc = do_ecb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+      c->mode_ops.encrypt = do_ecb_encrypt;
+      c->mode_ops.decrypt = do_ecb_decrypt;
       break;
 
     case GCRY_CIPHER_MODE_CBC:
-      rc = do_cbc_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+      if (!(c->flags & GCRY_CIPHER_CBC_CTS))
+        {
+          c->mode_ops.encrypt = _gcry_cipher_cbc_encrypt;
+          c->mode_ops.decrypt = _gcry_cipher_cbc_decrypt;
+        }
+      else
+        {
+          c->mode_ops.encrypt = _gcry_cipher_cbc_cts_encrypt;
+          c->mode_ops.decrypt = _gcry_cipher_cbc_cts_decrypt;
+        }
       break;
 
     case GCRY_CIPHER_MODE_CFB:
-      rc = do_cfb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+      c->mode_ops.encrypt = _gcry_cipher_cfb_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_cfb_decrypt;
+      break;
+
+    case GCRY_CIPHER_MODE_CFB8:
+      c->mode_ops.encrypt = _gcry_cipher_cfb8_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_cfb8_decrypt;
       break;
 
     case GCRY_CIPHER_MODE_OFB:
-      rc = do_ofb_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+      c->mode_ops.encrypt = _gcry_cipher_ofb_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ofb_encrypt;
       break;
 
     case GCRY_CIPHER_MODE_CTR:
-      rc = do_ctr_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+      c->mode_ops.encrypt = _gcry_cipher_ctr_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ctr_encrypt;
       break;
 
     case GCRY_CIPHER_MODE_AESWRAP:
-      rc = do_aeswrap_encrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+      c->mode_ops.decrypt = _gcry_cipher_keywrap_decrypt_auto;
+      if (!(c->flags & GCRY_CIPHER_EXTENDED))
+        c->mode_ops.encrypt = _gcry_cipher_keywrap_encrypt;
+      else
+        c->mode_ops.encrypt = _gcry_cipher_keywrap_encrypt_padding;
       break;
 
-    case GCRY_CIPHER_MODE_STREAM:
-      c->cipher->stencrypt (&c->context.c,
-                            outbuf, (byte*)/*arggg*/inbuf, inbuflen);
-      rc = 0;
+    case GCRY_CIPHER_MODE_CCM:
+      c->mode_ops.encrypt = _gcry_cipher_ccm_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ccm_decrypt;
       break;
 
-    case GCRY_CIPHER_MODE_NONE:
-      if (fips_mode () || !_gcry_get_debug_flag (0))
-        {
-          fips_signal_error ("cipher mode NONE used");
-          rc = GPG_ERR_INV_CIPHER_MODE;
-        }
-      else
-        {
-          if (inbuf != outbuf)
-            memmove (outbuf, inbuf, inbuflen);
-          rc = 0;
-        }
+    case GCRY_CIPHER_MODE_EAX:
+      c->mode_ops.encrypt = _gcry_cipher_eax_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_eax_decrypt;
       break;
 
-    default:
-      log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode );
-      rc = GPG_ERR_INV_CIPHER_MODE;
+    case GCRY_CIPHER_MODE_GCM:
+      c->mode_ops.encrypt = _gcry_cipher_gcm_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_gcm_decrypt;
       break;
-    }
-
-  return rc;
-}
-
-
-/****************
- * Encrypt IN and write it to OUT.  If IN is NULL, in-place encryption has
- * been requested.
- */
-gcry_error_t
-gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize,
-                     const void *in, size_t inlen)
-{
-  gcry_err_code_t err;
 
-  if (!in)  /* Caller requested in-place encryption.  */
-    err = cipher_encrypt (h, out, outsize, out, outsize);
-  else
-    err = cipher_encrypt (h, out, outsize, in, inlen);
-
-  /* Failsafe: Make sure that the plaintext will never make it into
-     OUT if the encryption returned an error.  */
-  if (err && out)
-    memset (out, 0x42, outsize);
+    case GCRY_CIPHER_MODE_POLY1305:
+      c->mode_ops.encrypt = _gcry_cipher_poly1305_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_poly1305_decrypt;
+      break;
 
-  return gcry_error (err);
-}
+    case GCRY_CIPHER_MODE_OCB:
+      c->mode_ops.encrypt = _gcry_cipher_ocb_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_ocb_decrypt;
+      break;
 
+    case GCRY_CIPHER_MODE_XTS:
+      c->mode_ops.encrypt = _gcry_cipher_xts_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_xts_decrypt;
+      break;
 
+    case GCRY_CIPHER_MODE_SIV:
+      c->mode_ops.encrypt = _gcry_cipher_siv_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_siv_decrypt;
+      break;
 
-/****************
- * Decrypt INBUF to OUTBUF with the mode selected at open.
- * inbuf and outbuf may overlap or be the same.
- * Depending on the mode some some contraints apply to INBUFLEN.
- */
-static gcry_err_code_t
-cipher_decrypt (gcry_cipher_hd_t c, byte *outbuf, unsigned int outbuflen,
-                const byte *inbuf, unsigned int inbuflen)
-{
-  gcry_err_code_t rc;
+    case GCRY_CIPHER_MODE_GCM_SIV:
+      c->mode_ops.encrypt = _gcry_cipher_gcm_siv_encrypt;
+      c->mode_ops.decrypt = _gcry_cipher_gcm_siv_decrypt;
+      break;
 
-  switch (c->mode)
-    {
-    case GCRY_CIPHER_MODE_ECB:
-      rc = do_ecb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    default:
+      c->mode_ops.encrypt = do_encrypt_none_unknown;
+      c->mode_ops.decrypt = do_decrypt_none_unknown;
       break;
+    }
 
-    case GCRY_CIPHER_MODE_CBC:
-      rc = do_cbc_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+  /* Setup IV setting routine. */
+  switch (mode)
+    {
+    case GCRY_CIPHER_MODE_CCM:
+      c->mode_ops.setiv = _gcry_cipher_ccm_set_nonce;
       break;
 
-    case GCRY_CIPHER_MODE_CFB:
-      rc = do_cfb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    case GCRY_CIPHER_MODE_EAX:
+      c->mode_ops.setiv = _gcry_cipher_eax_set_nonce;
       break;
 
-    case GCRY_CIPHER_MODE_OFB:
-      rc = do_ofb_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    case GCRY_CIPHER_MODE_GCM:
+      c->mode_ops.setiv =  _gcry_cipher_gcm_setiv;
       break;
 
-    case GCRY_CIPHER_MODE_CTR:
-      rc = do_ctr_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    case GCRY_CIPHER_MODE_POLY1305:
+      c->mode_ops.setiv = _gcry_cipher_poly1305_setiv;
       break;
 
-    case GCRY_CIPHER_MODE_AESWRAP:
-      rc = do_aeswrap_decrypt (c, outbuf, outbuflen, inbuf, inbuflen);
+    case GCRY_CIPHER_MODE_OCB:
+      c->mode_ops.setiv = _gcry_cipher_ocb_set_nonce;
       break;
 
-    case GCRY_CIPHER_MODE_STREAM:
-      c->cipher->stdecrypt (&c->context.c,
-                            outbuf, (byte*)/*arggg*/inbuf, inbuflen);
-      rc = 0;
+    case GCRY_CIPHER_MODE_SIV:
+      c->mode_ops.setiv = _gcry_cipher_siv_set_nonce;
       break;
 
-    case GCRY_CIPHER_MODE_NONE:
-      if (fips_mode () || !_gcry_get_debug_flag (0))
-        {
-          fips_signal_error ("cipher mode NONE used");
-          rc = GPG_ERR_INV_CIPHER_MODE;
-        }
-      else
-        {
-          if (inbuf != outbuf)
-            memmove (outbuf, inbuf, inbuflen);
-          rc = 0;
-        }
+    case GCRY_CIPHER_MODE_GCM_SIV:
+      c->mode_ops.setiv = _gcry_cipher_gcm_siv_set_nonce;
       break;
 
     default:
-      log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode );
-      rc = GPG_ERR_INV_CIPHER_MODE;
+      c->mode_ops.setiv = cipher_setiv;
       break;
     }
 
-  return rc;
-}
-
-
-gcry_error_t
-gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize,
-                    const void *in, size_t inlen)
-{
-  gcry_err_code_t err;
-
-  if (!in) /* Caller requested in-place encryption. */
-    err = cipher_decrypt (h, out, outsize, out, outsize);
-  else
-    err = cipher_decrypt (h, out, outsize, in, inlen);
 
-  return gcry_error (err);
-}
+  /* Setup authentication routines for AEAD modes. */
+  switch (mode)
+    {
+    case GCRY_CIPHER_MODE_CCM:
+      c->mode_ops.authenticate = _gcry_cipher_ccm_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_ccm_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_ccm_check_tag;
+      break;
 
+    case GCRY_CIPHER_MODE_CMAC:
+      c->mode_ops.authenticate = _gcry_cipher_cmac_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_cmac_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_cmac_check_tag;
+      break;
 
+    case GCRY_CIPHER_MODE_EAX:
+      c->mode_ops.authenticate = _gcry_cipher_eax_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_eax_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_eax_check_tag;
+      break;
 
-/****************
- * Used for PGP's somewhat strange CFB mode. Only works if
- * the corresponding flag is set.
- */
-static void
-cipher_sync (gcry_cipher_hd_t c)
-{
-  if ((c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused)
-    {
-      memmove (c->u_iv.iv + c->unused,
-               c->u_iv.iv, c->cipher->blocksize - c->unused);
-      memcpy (c->u_iv.iv,
-              c->lastiv + c->cipher->blocksize - c->unused, c->unused);
-      c->unused = 0;
-    }
-}
+    case GCRY_CIPHER_MODE_GCM:
+      c->mode_ops.authenticate = _gcry_cipher_gcm_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_gcm_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_gcm_check_tag;
+      break;
 
+    case GCRY_CIPHER_MODE_POLY1305:
+      c->mode_ops.authenticate = _gcry_cipher_poly1305_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_poly1305_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_poly1305_check_tag;
+      break;
 
-gcry_error_t
-_gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, size_t keylen)
-{
-  return cipher_setkey (hd, (void*)key, keylen);
-}
+    case GCRY_CIPHER_MODE_OCB:
+      c->mode_ops.authenticate = _gcry_cipher_ocb_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_ocb_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_ocb_check_tag;
+      break;
 
+    case GCRY_CIPHER_MODE_SIV:
+      c->mode_ops.authenticate = _gcry_cipher_siv_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_siv_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_siv_check_tag;
+      break;
 
-gcry_error_t
-_gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen)
-{
-  cipher_setiv (hd, iv, ivlen);
-  return 0;
-}
+    case GCRY_CIPHER_MODE_GCM_SIV:
+      c->mode_ops.authenticate = _gcry_cipher_gcm_siv_authenticate;
+      c->mode_ops.get_tag      = _gcry_cipher_gcm_siv_get_tag;
+      c->mode_ops.check_tag    = _gcry_cipher_gcm_siv_check_tag;
+      break;
 
-/* Set counter for CTR mode.  (CTR,CTRLEN) must denote a buffer of
-   block size length, or (NULL,0) to set the CTR to the all-zero
-   block. */
-gpg_error_t
-_gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen)
-{
-  if (ctr && ctrlen == hd->cipher->blocksize)
-    {
-      memcpy (hd->u_ctr.ctr, ctr, hd->cipher->blocksize);
-      hd->unused = 0;
-    }
-  else if (!ctr || !ctrlen)
-    {
-      memset (hd->u_ctr.ctr, 0, hd->cipher->blocksize);
-      hd->unused = 0;
+    default:
+      c->mode_ops.authenticate = NULL;
+      c->mode_ops.get_tag      = NULL;
+      c->mode_ops.check_tag    = NULL;
+      break;
     }
-  else
-    return gpg_error (GPG_ERR_INV_ARG);
-  return 0;
 }
 
 
-gcry_error_t
-gcry_cipher_ctl( gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen)
+gcry_err_code_t
+_gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen)
 {
-  gcry_err_code_t rc = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc = 0;
 
   switch (cmd)
     {
-    case GCRYCTL_SET_KEY:  /* Deprecated; use gcry_cipher_setkey.  */
-      rc = cipher_setkey( h, buffer, buflen );
-      break;
-
-    case GCRYCTL_SET_IV:   /* Deprecated; use gcry_cipher_setiv.  */
-      cipher_setiv( h, buffer, buflen );
-      break;
-
     case GCRYCTL_RESET:
       cipher_reset (h);
       break;
 
+    case GCRYCTL_FINALIZE:
+      if (!h || buffer || buflen)
+       return GPG_ERR_INV_ARG;
+      h->marks.finalize = 1;
+      break;
+
     case GCRYCTL_CFB_SYNC:
       cipher_sync( h );
       break;
@@ -1961,33 +1550,90 @@ gcry_cipher_ctl( gcry_cipher_hd_t h, int cmd, void 
*buffer, size_t buflen)
        h->flags &= ~GCRY_CIPHER_CBC_MAC;
       break;
 
+    case GCRYCTL_SET_CCM_LENGTHS:
+      {
+        u64 params[3];
+        size_t encryptedlen;
+        size_t aadlen;
+        size_t authtaglen;
+
+        if (h->mode != GCRY_CIPHER_MODE_CCM)
+          return GPG_ERR_INV_CIPHER_MODE;
+
+        if (!buffer || buflen != 3 * sizeof(u64))
+          return GPG_ERR_INV_ARG;
+
+        /* This command is used to pass additional length parameters needed
+           by CCM mode to initialize CBC-MAC.  */
+        memcpy (params, buffer, sizeof(params));
+        encryptedlen = params[0];
+        aadlen = params[1];
+        authtaglen = params[2];
+
+        rc = _gcry_cipher_ccm_set_lengths (h, encryptedlen, aadlen, 
authtaglen);
+      }
+      break;
+
+    case GCRYCTL_SET_DECRYPTION_TAG:
+      {
+        if (!buffer)
+          return GPG_ERR_INV_ARG;
+
+        if (h->mode == GCRY_CIPHER_MODE_SIV)
+          rc = _gcry_cipher_siv_set_decryption_tag (h, buffer, buflen);
+        else if (h->mode == GCRY_CIPHER_MODE_GCM_SIV)
+          rc = _gcry_cipher_gcm_siv_set_decryption_tag (h, buffer, buflen);
+        else
+          rc = GPG_ERR_INV_CIPHER_MODE;
+      }
+      break;
+
+    case GCRYCTL_SET_TAGLEN:
+      if (!h || !buffer || buflen != sizeof(int) )
+       return GPG_ERR_INV_ARG;
+      switch (h->mode)
+        {
+        case GCRY_CIPHER_MODE_OCB:
+          switch (*(int*)buffer)
+            {
+            case 8: case 12: case 16:
+              h->u_mode.ocb.taglen = *(int*)buffer;
+              break;
+            default:
+              rc = GPG_ERR_INV_LENGTH; /* Invalid tag length. */
+              break;
+            }
+          break;
+
+        default:
+          rc =GPG_ERR_INV_CIPHER_MODE;
+          break;
+        }
+      break;
+
     case GCRYCTL_DISABLE_ALGO:
       /* This command expects NULL for H and BUFFER to point to an
          integer with the algo number.  */
       if( h || !buffer || buflen != sizeof(int) )
-       return gcry_error (GPG_ERR_CIPHER_ALGO);
+       return GPG_ERR_CIPHER_ALGO;
       disable_cipher_algo( *(int*)buffer );
       break;
 
-    case GCRYCTL_SET_CTR: /* Deprecated; use gcry_cipher_setctr.  */
-      rc = gpg_err_code (_gcry_cipher_setctr (h, buffer, buflen));
-      break;
-
-    case 61:  /* Disable weak key detection (private).  */
-      if (h->extraspec->set_extra_info)
-        rc = h->extraspec->set_extra_info
+    case PRIV_CIPHERCTL_DISABLE_WEAK_KEY:  /* (private)  */
+      if (h->spec->set_extra_info)
+        rc = h->spec->set_extra_info
           (&h->context.c, CIPHER_INFO_NO_WEAK_KEY, NULL, 0);
       else
         rc = GPG_ERR_NOT_SUPPORTED;
       break;
 
-    case 62: /* Return current input vector (private).  */
+    case PRIV_CIPHERCTL_GET_INPUT_VECTOR: /* (private)  */
       /* This is the input block as used in CFB and OFB mode which has
          initially been set as IV.  The returned format is:
            1 byte  Actual length of the block in bytes.
            n byte  The block.
          If the provided buffer is too short, an error is returned. */
-      if (buflen < (1 + h->cipher->blocksize))
+      if (buflen < (1 + h->spec->blocksize))
         rc = GPG_ERR_TOO_SHORT;
       else
         {
@@ -1996,47 +1642,130 @@ gcry_cipher_ctl( gcry_cipher_hd_t h, int cmd, void 
*buffer, size_t buflen)
           int n = h->unused;
 
           if (!n)
-            n = h->cipher->blocksize;
-          gcry_assert (n <= h->cipher->blocksize);
+            n = h->spec->blocksize;
+          gcry_assert (n <= h->spec->blocksize);
           *dst++ = n;
-          ivp = h->u_iv.iv + h->cipher->blocksize - n;
+          ivp = h->u_iv.iv + h->spec->blocksize - n;
           while (n--)
             *dst++ = *ivp++;
         }
       break;
 
+    case GCRYCTL_SET_SBOX:
+      if (h->spec->set_extra_info)
+        rc = h->spec->set_extra_info
+          (&h->context.c, GCRYCTL_SET_SBOX, buffer, buflen);
+      else
+        rc = GPG_ERR_NOT_SUPPORTED;
+      break;
+
+    case GCRYCTL_SET_ALLOW_WEAK_KEY:
+      /* Expecting BUFFER to be NULL and buflen to be on/off flag (0 or 1). */
+      if (!h || buffer || buflen > 1)
+       return GPG_ERR_CIPHER_ALGO;
+      h->marks.allow_weak_key = buflen ? 1 : 0;
+      break;
+
     default:
       rc = GPG_ERR_INV_OP;
     }
 
-  return gcry_error (rc);
+  return rc;
 }
 
 
 /* Return information about the cipher handle H.  CMD is the kind of
-   information requested.  BUFFER and NBYTES are reserved for now.
-
-   There are no values for CMD yet defined.
-
-   The function always returns GPG_ERR_INV_OP.
-
+ * information requested.
+ *
+ * CMD may be one of:
+ *
+ *  GCRYCTL_GET_TAGLEN:
+ *      Return the length of the tag for an AE algorithm mode.  An
+ *      error is returned for modes which do not support a tag.
+ *      BUFFER must be given as NULL.  On success the result is stored
+ *      at NBYTES.  The taglen is returned in bytes.
+ *
+ *  GCRYCTL_GET_KEYLEN:
+ *      Return the length of the key wrapped for AES-WRAP mode.  The
+ *      length is encoded in big-endian 4 bytes, when the key is
+ *      unwrapped with KWP.  Return 00 00 00 00, when the key is
+ *      unwrapped with KW.
+ *
+ * The function returns 0 on success or an error code.
  */
-gcry_error_t
-gcry_cipher_info (gcry_cipher_hd_t h, int cmd, void *buffer, size_t *nbytes)
+gcry_err_code_t
+_gcry_cipher_info (gcry_cipher_hd_t h, int cmd, void *buffer, size_t *nbytes)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  (void)h;
-  (void)buffer;
-  (void)nbytes;
+  gcry_err_code_t rc = 0;
 
   switch (cmd)
     {
+    case GCRYCTL_GET_TAGLEN:
+      if (!h || buffer || !nbytes)
+       rc = GPG_ERR_INV_ARG;
+      else
+       {
+          switch (h->mode)
+            {
+            case GCRY_CIPHER_MODE_OCB:
+              *nbytes = h->u_mode.ocb.taglen;
+              break;
+
+            case GCRY_CIPHER_MODE_CCM:
+              *nbytes = h->u_mode.ccm.authlen;
+              break;
+
+            case GCRY_CIPHER_MODE_EAX:
+              *nbytes = h->spec->blocksize;
+              break;
+
+            case GCRY_CIPHER_MODE_GCM:
+              *nbytes = GCRY_GCM_BLOCK_LEN;
+              break;
+
+            case GCRY_CIPHER_MODE_POLY1305:
+              *nbytes = POLY1305_TAGLEN;
+              break;
+
+            case GCRY_CIPHER_MODE_SIV:
+              *nbytes = GCRY_SIV_BLOCK_LEN;
+              break;
+
+            case GCRY_CIPHER_MODE_GCM_SIV:
+              *nbytes = GCRY_SIV_BLOCK_LEN;
+              break;
+
+            default:
+              rc = GPG_ERR_INV_CIPHER_MODE;
+              break;
+            }
+        }
+      break;
+
+    case GCRYCTL_GET_KEYLEN:
+      if (!h || !buffer || !nbytes)
+       rc = GPG_ERR_INV_ARG;
+      else
+        {
+          switch (h->mode)
+            {
+            case GCRY_CIPHER_MODE_AESWRAP:
+              *nbytes = 4;
+              memcpy (buffer, h->u_mode.wrap.plen, 4);
+              break;
+
+            default:
+              rc = GPG_ERR_INV_CIPHER_MODE;
+              break;
+            }
+        }
+      break;
+
     default:
-      err = GPG_ERR_INV_OP;
+      rc = GPG_ERR_INV_OP;
     }
 
-  return gcry_error (err);
+  return rc;
 }
 
 /* Return information about the given cipher algorithm ALGO.
@@ -2063,17 +1792,17 @@ gcry_cipher_info (gcry_cipher_hd_t h, int cmd, void 
*buffer, size_t *nbytes)
    and thereby detecting whether a error occurred or not (i.e. while
    checking the block size)
  */
-gcry_error_t
-gcry_cipher_algo_info (int algo, int what, void *buffer, size_t *nbytes)
+gcry_err_code_t
+_gcry_cipher_algo_info (int algo, int what, void *buffer, size_t *nbytes)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc = 0;
   unsigned int ui;
 
   switch (what)
     {
     case GCRYCTL_GET_KEYLEN:
       if (buffer || (! nbytes))
-       err = GPG_ERR_CIPHER_ALGO;
+       rc = GPG_ERR_CIPHER_ALGO;
       else
        {
          ui = cipher_get_keylen (algo);
@@ -2081,37 +1810,39 @@ gcry_cipher_algo_info (int algo, int what, void 
*buffer, size_t *nbytes)
            *nbytes = (size_t) ui / 8;
          else
            /* The only reason for an error is an invalid algo.  */
-           err = GPG_ERR_CIPHER_ALGO;
+           rc = GPG_ERR_CIPHER_ALGO;
        }
       break;
 
     case GCRYCTL_GET_BLKLEN:
       if (buffer || (! nbytes))
-       err = GPG_ERR_CIPHER_ALGO;
+       rc = GPG_ERR_CIPHER_ALGO;
       else
        {
          ui = cipher_get_blocksize (algo);
          if ((ui > 0) && (ui < 10000))
            *nbytes = ui;
          else
-           /* The only reason is an invalid algo or a strange
-              blocksize.  */
-           err = GPG_ERR_CIPHER_ALGO;
+            {
+              /* The only reason is an invalid algo or a strange
+                 blocksize.  */
+              rc = GPG_ERR_CIPHER_ALGO;
+            }
        }
       break;
 
     case GCRYCTL_TEST_ALGO:
       if (buffer || nbytes)
-       err = GPG_ERR_INV_ARG;
+       rc = GPG_ERR_INV_ARG;
       else
-       err = check_cipher_algo (algo);
+       rc = check_cipher_algo (algo);
       break;
 
       default:
-       err = GPG_ERR_INV_OP;
+       rc = GPG_ERR_INV_OP;
     }
 
-  return gcry_error (err);
+  return rc;
 }
 
 
@@ -2124,15 +1855,16 @@ gcry_cipher_algo_info (int algo, int what, void 
*buffer, size_t *nbytes)
    gcry_cipher_algo_info because it allows for proper type
    checking.  */
 size_t
-gcry_cipher_get_algo_keylen (int algo)
+_gcry_cipher_get_algo_keylen (int algo)
 {
   size_t n;
 
-  if (gcry_cipher_algo_info (algo, GCRYCTL_GET_KEYLEN, NULL, &n))
+  if (_gcry_cipher_algo_info (algo, GCRYCTL_GET_KEYLEN, NULL, &n))
     n = 0;
   return n;
 }
 
+
 /* This functions returns the blocklength of the algorithm ALGO
    counted in octets.  On error 0 is returned.
 
@@ -2140,42 +1872,21 @@ gcry_cipher_get_algo_keylen (int algo)
    gcry_cipher_algo_info because it allows for proper type
    checking.  */
 size_t
-gcry_cipher_get_algo_blklen (int algo)
+_gcry_cipher_get_algo_blklen (int algo)
 {
   size_t n;
 
-  if (gcry_cipher_algo_info( algo, GCRYCTL_GET_BLKLEN, NULL, &n))
+  if (_gcry_cipher_algo_info( algo, GCRYCTL_GET_BLKLEN, NULL, &n))
     n = 0;
   return n;
 }
 
+
 /* Explicitly initialize this module.  */
 gcry_err_code_t
 _gcry_cipher_init (void)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  REGISTER_DEFAULT_CIPHERS;
-
-  return err;
-}
-
-/* Get a list consisting of the IDs of the loaded cipher modules.  If
-   LIST is zero, write the number of loaded cipher modules to
-   LIST_LENGTH and return.  If LIST is non-zero, the first
-   *LIST_LENGTH algorithm IDs are stored in LIST, which must be of
-   according size.  In case there are less cipher modules than
-   *LIST_LENGTH, *LIST_LENGTH is updated to the correct number.  */
-gcry_error_t
-gcry_cipher_list (int *list, int *list_length)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  ath_mutex_lock (&ciphers_registered_lock);
-  err = _gcry_module_list (ciphers_registered, list, list_length);
-  ath_mutex_unlock (&ciphers_registered_lock);
-
-  return err;
+  return 0;
 }
 
 
@@ -2184,34 +1895,24 @@ gcry_cipher_list (int *list, int *list_length)
 gpg_error_t
 _gcry_cipher_selftest (int algo, int extended, selftest_report_func_t report)
 {
-  gcry_module_t module = NULL;
-  cipher_extra_spec_t *extraspec = NULL;
   gcry_err_code_t ec = 0;
+  gcry_cipher_spec_t *spec;
 
-  REGISTER_DEFAULT_CIPHERS;
-
-  ath_mutex_lock (&ciphers_registered_lock);
-  module = _gcry_module_lookup_id (ciphers_registered, algo);
-  if (module && !(module->flags & FLAG_MODULE_DISABLED))
-    extraspec = module->extraspec;
-  ath_mutex_unlock (&ciphers_registered_lock);
-  if (extraspec && extraspec->selftest)
-    ec = extraspec->selftest (algo, extended, report);
+  spec = spec_from_algo (algo);
+  if (spec && !spec->flags.disabled
+      && (spec->flags.fips || !fips_mode ())
+      && spec->selftest)
+    ec = spec->selftest (algo, extended, report);
   else
     {
       ec = GPG_ERR_CIPHER_ALGO;
       if (report)
         report ("cipher", algo, "module",
-                module && !(module->flags & FLAG_MODULE_DISABLED)?
+                spec && !spec->flags.disabled
+                && (spec->flags.fips || !fips_mode ())?
                 "no selftest available" :
-                module? "algorithm disabled" : "algorithm not found");
+                spec? "algorithm disabled" : "algorithm not found");
     }
 
-  if (module)
-    {
-      ath_mutex_lock (&ciphers_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&ciphers_registered_lock);
-    }
   return gpg_error (ec);
 }
diff --git a/grub-core/lib/libgcrypt/cipher/crc-armv8-aarch64-ce.S 
b/grub-core/lib/libgcrypt/cipher/crc-armv8-aarch64-ce.S
new file mode 100644
index 000000000..7ac884af3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/crc-armv8-aarch64-ce.S
@@ -0,0 +1,497 @@
+/* crc-armv8-aarch64-ce.S - ARMv8/CE PMULL accelerated CRC implementation
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Structure of crc32_consts_s */
+
+#define consts_k(idx)    ((idx) * 8)
+#define consts_my_p(idx) (consts_k(6) + (idx) * 8)
+
+/* Constants */
+
+.align 6
+.Lcrc32_constants:
+.Lcrc32_partial_fold_input_mask:
+  .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+  .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+.Lcrc32_refl_shuf_shift:
+  .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+  .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+  .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
+  .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+.Lcrc32_shuf_shift:
+  .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+  .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+.Lcrc32_bswap_shuf:
+  .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08
+  .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00
+  .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+  .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+
+/*
+ * void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ *                                  const struct crc32_consts_s *consts);
+ */
+.align 3
+.globl _gcry_crc32r_armv8_ce_bulk
+ELF(.type  _gcry_crc32r_armv8_ce_bulk,%function;)
+_gcry_crc32r_armv8_ce_bulk:
+  /* input:
+   *    x0: pcrc
+   *    x1: inbuf
+   *    x2: inlen
+   *    x3: consts
+   */
+  CFI_STARTPROC()
+
+  GET_DATA_POINTER(x7, .Lcrc32_constants)
+  add x9, x3, #consts_k(5 - 1)
+  cmp x2, #128
+
+  b.lo .Lcrc32r_fold_by_one_setup
+
+  eor v4.16b, v4.16b, v4.16b
+  add x4, x3, #consts_k(1 - 1)
+  ld1 {v4.s}[0], [x0]             /* load pcrc */
+  ld1 {v0.16b-v3.16b}, [x1], #64  /* load 64 bytes of input */
+  sub x2, x2, #64
+  ld1 {v6.16b}, [x4]
+  eor v0.16b, v0.16b, v4.16b
+
+  add x4, x3, #consts_k(3 - 1)
+  add x5, x3, #consts_my_p(0)
+
+.Lcrc32r_fold_by_four:
+
+  /* Fold by 4. */
+  ld1 {v16.16b-v19.16b}, [x1], #64 /* load 64 bytes of input */
+  sub x2, x2, #64
+  pmull v20.1q, v0.1d, v6.1d
+  pmull v21.1q, v1.1d, v6.1d
+  pmull v22.1q, v2.1d, v6.1d
+  pmull v23.1q, v3.1d, v6.1d
+  cmp x2, #64
+  pmull2 v24.1q, v0.2d, v6.2d
+  pmull2 v25.1q, v1.2d, v6.2d
+  pmull2 v26.1q, v2.2d, v6.2d
+  pmull2 v27.1q, v3.2d, v6.2d
+  eor v0.16b, v20.16b, v16.16b
+  eor v1.16b, v21.16b, v17.16b
+  eor v2.16b, v22.16b, v18.16b
+  eor v3.16b, v23.16b, v19.16b
+  eor v0.16b, v0.16b, v24.16b
+  eor v1.16b, v1.16b, v25.16b
+  eor v2.16b, v2.16b, v26.16b
+  eor v3.16b, v3.16b, v27.16b
+  b.hs .Lcrc32r_fold_by_four
+
+  ld1 {v6.16b}, [x4]
+  ld1 {v5.16b}, [x5]
+
+  cmp x2, #16
+
+  /* Fold 4 to 1. */
+
+  pmull v16.1q, v0.1d, v6.1d
+  pmull2 v4.1q, v0.2d, v6.2d
+  eor v0.16b, v16.16b, v1.16b
+  eor v0.16b, v0.16b, v4.16b
+
+  pmull v16.1q, v0.1d, v6.1d
+  pmull2 v4.1q, v0.2d, v6.2d
+  eor v0.16b, v16.16b, v2.16b
+  eor v0.16b, v0.16b, v4.16b
+
+  pmull v16.1q, v0.1d, v6.1d
+  pmull2 v4.1q, v0.2d, v6.2d
+  eor v0.16b, v16.16b, v3.16b
+  eor v0.16b, v0.16b, v4.16b
+
+  b.lo .Lcrc32r_fold_by_one_done
+  b .Lcrc32r_fold_by_one
+
+.Lcrc32r_fold_by_one_setup:
+
+  eor v1.16b, v1.16b, v1.16b
+  add x4, x3, #consts_k(3 - 1)
+  add x5, x3, #consts_my_p(0)
+  sub x2, x2, #16
+  ld1 {v1.s}[0], [x0]             /* load pcrc */
+  ld1 {v0.16b}, [x1], #16         /* load 16 bytes of input */
+  cmp x2, #16
+  ld1 {v6.16b}, [x4]              /* load k3k4 */
+  ld1 {v5.16b}, [x5]              /* load my_p */
+  eor v0.16b, v0.16b, v1.16b
+  b.lo .Lcrc32r_fold_by_one_done
+
+.Lcrc32r_fold_by_one:
+  sub x2, x2, #16
+  ld1 {v2.16b}, [x1], #16         /* load 16 bytes of input */
+  pmull v3.1q, v0.1d, v6.1d
+  pmull2 v1.1q, v0.2d, v6.2d
+  cmp x2, #16
+  eor v0.16b, v3.16b, v2.16b
+  eor v0.16b, v0.16b, v1.16b
+
+  b.hs .Lcrc32r_fold_by_one
+
+.Lcrc32r_fold_by_one_done:
+
+  cmp x2, #0
+  b.eq .Lcrc32r_final_fold
+
+  /* Partial fold. */
+
+  add x4, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants
+  add x5, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants + 16
+  add x6, x7, #.Lcrc32_partial_fold_input_mask - .Lcrc32_constants
+  sub x8, x2, #16
+  add x4, x4, x2
+  add x5, x5, x2
+  add x6, x6, x2
+  add x8, x1, x8
+
+  /* Load last input and add padding zeros. */
+  ld1 {v4.16b}, [x4]
+  eor x2, x2, x2
+  ld1 {v3.16b}, [x5]
+  ld1 {v2.16b}, [x6]
+  tbl v30.16b, {v0.16b}, v4.16b
+  ld1 {v4.16b}, [x8]
+  tbl v1.16b, {v0.16b}, v3.16b
+
+  pmull v0.1q, v30.1d, v6.1d
+  and v2.16b, v2.16b, v4.16b
+  pmull2 v31.1q, v30.2d, v6.2d
+  orr v2.16b, v2.16b, v1.16b
+  eor v0.16b, v0.16b, v31.16b
+  eor v0.16b, v0.16b, v2.16b
+
+.Lcrc32r_final_fold:
+
+  /* Final fold. */
+
+  eor v2.16b, v2.16b, v2.16b      /* zero reg */
+  ld1 {v7.16b}, [x9]
+
+  /* reduce 128-bits to 96-bits */
+  ext v6.16b, v6.16b, v6.16b, #8  /* swap high and low parts */
+  mov v1.16b, v0.16b
+  pmull v0.1q, v0.1d, v6.1d
+  ext v6.16b, v5.16b, v5.16b, #8  /* swap high and low parts */
+  ext v1.16b, v1.16b, v2.16b, #8  /* high to low, high zeroed */
+  eor v3.16b, v0.16b, v1.16b
+
+  /* reduce 96-bits to 64-bits */
+  eor v1.16b, v1.16b, v1.16b
+  ext v0.16b, v3.16b, v2.16b, #4  /* [00][00][x2][x1] */
+  mov v1.s[0], v3.s[0]            /* [00][00][00][x0] */
+  eor v3.16b, v3.16b, v3.16b
+  pmull v1.1q, v1.1d, v7.1d       /* [00][00][xx][xx] */
+  eor v0.16b, v0.16b, v1.16b      /* top 64-bit are zero */
+
+  /* barrett reduction */
+  mov v3.s[1], v0.s[0]            /* [00][00][x1][00] */
+  ext v0.16b, v2.16b, v0.16b, #12 /* [??][x1][??][00] */
+  pmull v1.1q, v3.1d, v5.1d       /* [00][xx][xx][00] */
+  pmull v1.1q, v1.1d, v6.1d       /* [00][xx][xx][00] */
+  eor v0.16b, v0.16b, v1.16b
+
+  /* store CRC */
+  st1 {v0.s}[2], [x0]
+
+  ret_spec_stop
+  CFI_ENDPROC()
+ELF(.size _gcry_crc32r_armv8_ce_bulk,.-_gcry_crc32r_armv8_ce_bulk;)
+
+/*
+ * void _gcry_crc32r_armv8_ce_reduction_4 (u32 *pcrc, u32 data, u32 crc,
+ *                                         const struct crc32_consts_s 
*consts);
+ */
+.align 3
+.globl _gcry_crc32r_armv8_ce_reduction_4
+ELF(.type  _gcry_crc32r_armv8_ce_reduction_4,%function;)
+_gcry_crc32r_armv8_ce_reduction_4:
+  /* input:
+   *    w0: data
+   *    w1: crc
+   *    x2: crc32 constants
+   */
+  CFI_STARTPROC()
+
+  eor v0.16b, v0.16b, v0.16b
+  add x2, x2, #consts_my_p(0)
+  eor v1.16b, v1.16b, v1.16b
+  ld1 {v5.16b}, [x2]
+
+  mov v0.s[0], w0
+  pmull v0.1q, v0.1d, v5.1d     /* [00][00][xx][xx] */
+  mov v1.s[1], w1
+  mov v0.s[2], v0.s[0]          /* [00][x0][x1][x0] */
+  pmull2 v0.1q, v0.2d, v5.2d    /* [00][00][xx][xx] */
+  eor v0.16b, v0.16b, v1.16b
+
+  mov w0, v0.s[1]
+
+  ret_spec_stop
+  CFI_ENDPROC()
+ELF(.size 
_gcry_crc32r_armv8_ce_reduction_4,.-_gcry_crc32r_armv8_ce_reduction_4;)
+
+/*
+ * void _gcry_crc32_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+ *                                 const struct crc32_consts_s *consts);
+ */
+.align 3
+.globl _gcry_crc32_armv8_ce_bulk
+ELF(.type  _gcry_crc32_armv8_ce_bulk,%function;)
+_gcry_crc32_armv8_ce_bulk:
+  /* input:
+   *    x0: pcrc
+   *    x1: inbuf
+   *    x2: inlen
+   *    x3: consts
+   */
+  CFI_STARTPROC()
+
+  GET_DATA_POINTER(x7, .Lcrc32_constants)
+  add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants
+  cmp x2, #128
+  ld1 {v7.16b}, [x4]
+
+  b.lo .Lcrc32_fold_by_one_setup
+
+  eor v4.16b, v4.16b, v4.16b
+  add x4, x3, #consts_k(1 - 1)
+  ld1 {v4.s}[0], [x0]            /* load pcrc */
+  ld1 {v0.16b-v3.16b}, [x1], #64 /* load 64 bytes of input */
+  sub x2, x2, #64
+  ld1 {v6.16b}, [x4]
+  eor v0.16b, v0.16b, v4.16b
+  ext v4.16b, v6.16b, v6.16b, #8
+  tbl v0.16b, { v0.16b }, v7.16b /* byte swap */
+  tbl v1.16b, { v1.16b }, v7.16b /* byte swap */
+  tbl v2.16b, { v2.16b }, v7.16b /* byte swap */
+  tbl v3.16b, { v3.16b }, v7.16b /* byte swap */
+
+  add x4, x3, #consts_k(3 - 1)
+  add x5, x3, #consts_my_p(0)
+
+.Lcrc32_fold_by_four:
+
+  /* Fold by 4. */
+  ld1 {v16.16b-v19.16b}, [x1], #64 /* load 64 bytes of input */
+  sub x2, x2, #64
+  tbl v16.16b, { v16.16b }, v7.16b /* byte swap */
+  tbl v17.16b, { v17.16b }, v7.16b /* byte swap */
+  tbl v18.16b, { v18.16b }, v7.16b /* byte swap */
+  tbl v19.16b, { v19.16b }, v7.16b /* byte swap */
+  cmp x2, #64
+  pmull2 v20.1q, v0.2d, v4.2d
+  pmull2 v21.1q, v1.2d, v4.2d
+  pmull2 v22.1q, v2.2d, v4.2d
+  pmull2 v23.1q, v3.2d, v4.2d
+  pmull v24.1q, v0.1d, v4.1d
+  pmull v25.1q, v1.1d, v4.1d
+  pmull v26.1q, v2.1d, v4.1d
+  pmull v27.1q, v3.1d, v4.1d
+  eor v0.16b, v20.16b, v16.16b
+  eor v1.16b, v21.16b, v17.16b
+  eor v2.16b, v22.16b, v18.16b
+  eor v3.16b, v23.16b, v19.16b
+  eor v0.16b, v0.16b, v24.16b
+  eor v1.16b, v1.16b, v25.16b
+  eor v2.16b, v2.16b, v26.16b
+  eor v3.16b, v3.16b, v27.16b
+  b.hs .Lcrc32_fold_by_four
+
+  ld1 {v6.16b}, [x4]
+  ld1 {v5.16b}, [x5]
+  ext v6.16b, v6.16b, v6.16b, #8
+  ext v5.16b, v5.16b, v5.16b, #8
+
+  cmp x2, #16
+
+  /* Fold 4 to 1. */
+
+  pmull2 v16.1q, v0.2d, v6.2d
+  pmull v4.1q, v0.1d, v6.1d
+  eor v0.16b, v16.16b, v1.16b
+  eor v0.16b, v0.16b, v4.16b
+
+  pmull2 v16.1q, v0.2d, v6.2d
+  pmull v4.1q, v0.1d, v6.1d
+  eor v0.16b, v16.16b, v2.16b
+  eor v0.16b, v0.16b, v4.16b
+
+  pmull2 v16.1q, v0.2d, v6.2d
+  pmull v4.1q, v0.1d, v6.1d
+  eor v0.16b, v16.16b, v3.16b
+  eor v0.16b, v0.16b, v4.16b
+
+  b.lo .Lcrc32_fold_by_one_done
+  b .Lcrc32_fold_by_one
+
+.Lcrc32_fold_by_one_setup:
+
+  eor v1.16b, v1.16b, v1.16b
+  add x4, x3, #consts_k(3 - 1)
+  add x5, x3, #consts_my_p(0)
+  ld1 {v1.s}[0], [x0]            /* load pcrc */
+  sub x2, x2, #16
+  ld1 {v0.16b}, [x1], #16        /* load 16 bytes of input */
+  ld1 {v6.16b}, [x4]             /* load k3k4 */
+  ld1 {v5.16b}, [x5]             /* load my_p */
+  eor v0.16b, v0.16b, v1.16b
+  cmp x2, #16
+  ext v6.16b, v6.16b, v6.16b, #8 /* swap high and low parts */
+  ext v5.16b, v5.16b, v5.16b, #8 /* swap high and low parts */
+  tbl v0.16b, { v0.16b }, v7.16b /* byte swap */
+  b.lo .Lcrc32_fold_by_one_done
+
+.Lcrc32_fold_by_one:
+  sub x2, x2, #16
+  ld1 {v2.16b}, [x1], #16        /* load 16 bytes of input */
+  pmull2 v3.1q, v0.2d, v6.2d
+  tbl v2.16b, { v2.16b }, v7.16b /* byte swap */
+  pmull v1.1q, v0.1d, v6.1d
+  cmp x2, #16
+  eor v0.16b, v3.16b, v2.16b
+  eor v0.16b, v0.16b, v1.16b
+
+  b.hs .Lcrc32_fold_by_one
+
+.Lcrc32_fold_by_one_done:
+
+  cmp x2, #0
+  b.eq .Lcrc32_final_fold
+
+  /* Partial fold. */
+
+  add x4, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants + 32
+  add x5, x7, #.Lcrc32_shuf_shift - .Lcrc32_constants + 16
+  add x6, x7, #.Lcrc32_partial_fold_input_mask - .Lcrc32_constants
+  sub x8, x2, #16
+  sub x4, x4, x2
+  add x5, x5, x2
+  add x6, x6, x2
+  add x8, x1, x8
+
+  /* Load last input and add padding zeros. */
+  ld1 {v4.16b}, [x4]
+  eor x2, x2, x2
+  ld1 {v3.16b}, [x5]
+  ld1 {v2.16b}, [x6]
+  tbl v30.16b, {v0.16b}, v4.16b
+  ld1 {v4.16b}, [x8]
+  tbl v1.16b, {v0.16b}, v3.16b
+  and v2.16b, v2.16b, v4.16b
+
+  pmull2 v0.1q, v30.2d, v6.2d
+  orr v2.16b, v2.16b, v1.16b
+  pmull v1.1q, v30.1d, v6.1d
+  tbl v2.16b, {v2.16b}, v7.16b   /* byte swap */
+  eor v0.16b, v0.16b, v1.16b
+  eor v0.16b, v0.16b, v2.16b
+
+.Lcrc32_final_fold:
+
+  /* Final fold. */
+
+  eor v2.16b, v2.16b, v2.16b     /* zero reg */
+
+  /* reduce 128-bits to 96-bits */
+  add x4, x3, #consts_k(4)
+  ext v3.16b, v6.16b, v6.16b, #8 /* swap high and low parts */
+  eor v6.16b, v6.16b, v6.16b
+  mov v1.16b, v0.16b
+  pmull2 v0.1q, v0.2d, v3.2d
+  ld1 {v6.d}[1], [x4]            /* load k4 */
+  ext v1.16b, v2.16b, v1.16b, #8 /* low to high, low zeroed */
+  eor v3.16b, v0.16b, v1.16b     /* bottom 32-bit are zero */
+
+  /* reduce 96-bits to 64-bits */
+  eor v0.16b, v0.16b, v0.16b
+  eor v1.16b, v1.16b, v1.16b
+  mov v0.s[1], v3.s[1]           /* [00][00][x1][00] */
+  mov v1.s[2], v3.s[3]           /* [00][x3][00][00] */
+  mov v0.s[2], v3.s[2]           /* [00][x2][x1][00] */
+  eor v3.16b, v3.16b, v3.16b
+  pmull2 v1.1q, v1.2d, v6.2d     /* [00][xx][xx][00] */
+  eor v0.16b, v0.16b, v1.16b     /* top and bottom 32-bit are zero */
+
+  /* barrett reduction */
+  mov v3.s[0], v0.s[1]           /* [00][00][00][x1] */
+  pmull2 v0.1q, v0.2d, v5.2d     /* [00][xx][xx][xx] */
+  ext v0.16b, v0.16b, v2.16b, #4 /* [00][00][xx][xx] */
+  pmull v0.1q, v0.1d, v5.1d
+  eor v0.16b, v0.16b, v3.16b
+
+  /* store CRC in input endian */
+  rev32 v0.8b, v0.8b             /* byte swap */
+  st1 {v0.s}[0], [x0]
+
+  ret_spec_stop
+  CFI_ENDPROC()
+ELF(.size _gcry_crc32_armv8_ce_bulk,.-_gcry_crc32_armv8_ce_bulk;)
+
+/*
+ * void _gcry_crc32_armv8_ce_reduction_4 (u32 *pcrc, u32 data, u32 crc,
+ *                                        const struct crc32_consts_s *consts);
+ */
+.align 3
+.globl _gcry_crc32_armv8_ce_reduction_4
+ELF(.type  _gcry_crc32_armv8_ce_reduction_4,%function;)
+_gcry_crc32_armv8_ce_reduction_4:
+  /* input:
+   *    w0: data
+   *    w1: crc
+   *    x2: crc32 constants
+   */
+  CFI_STARTPROC()
+
+  eor v0.16b, v0.16b, v0.16b
+  add x2, x2, #consts_my_p(0)
+  eor v1.16b, v1.16b, v1.16b
+  ld1 {v5.16b}, [x2]
+
+  mov v0.s[1], w0
+  pmull v0.1q, v0.1d, v5.1d     /* [00][xx][xx][00] */
+  mov v1.s[0], w1
+  pmull2 v0.1q, v0.2d, v5.2d    /* [00][00][xx][xx] */
+  eor v0.16b, v0.16b, v1.16b
+
+  rev32 v0.8b, v0.8b            /* Return in input endian */
+  mov w0, v0.s[0]
+
+  ret_spec_stop
+  CFI_ENDPROC()
+ELF(.size _gcry_crc32_armv8_ce_reduction_4,.-_gcry_crc32_armv8_ce_reduction_4;)
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/crc-armv8-ce.c 
b/grub-core/lib/libgcrypt/cipher/crc-armv8-ce.c
new file mode 100644
index 000000000..17e555482
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/crc-armv8-ce.c
@@ -0,0 +1,229 @@
+/* crc-armv8-ce.c - ARMv8-CE PMULL accelerated CRC implementation
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+
+#if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+
+#define ALIGNED_16 __attribute__ ((aligned (16)))
+
+
+struct u16_unaligned_s
+{
+  u16 a;
+} __attribute__((packed, aligned (1), may_alias));
+
+struct u32_unaligned_s
+{
+  u32 a;
+} __attribute__((packed, aligned (1), may_alias));
+
+
+/* Constants structure for generic reflected/non-reflected CRC32 PMULL
+ * functions. */
+struct crc32_consts_s
+{
+  /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
+  u64 k[6];
+  /* my_p: { floor(x^64 / P(x)), P(x) } */
+  u64 my_p[2];
+};
+
+/* PMULL constants for CRC32 and CRC32RFC1510. */
+static const struct crc32_consts_s crc32_consts ALIGNED_16 =
+{
+  { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
+    U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
+    U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
+    U64_C(0x163cd6124), 0                   /* y = 2 */
+  },
+  { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
+    U64_C(0x1f7011641), U64_C(0x1db710641)
+  }
+};
+
+/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
+static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 =
+{
+  { /* k[6] = x^(32*y) mod P(x) << 32*/
+    U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
+    U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
+    U64_C(0xd9fe8c00) << 32, 0                        /* y = 2 */
+  },
+  { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
+    U64_C(0x1f845fe24), U64_C(0x1864cfb00)
+  }
+};
+
+
+u32 _gcry_crc32r_armv8_ce_reduction_4 (u32 data, u32 crc,
+                                      const struct crc32_consts_s *consts);
+void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+                                 const struct crc32_consts_s *consts);
+
+u32 _gcry_crc32_armv8_ce_reduction_4 (u32 data, u32 crc,
+                                     const struct crc32_consts_s *consts);
+void _gcry_crc32_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+                                const struct crc32_consts_s *consts);
+
+
+static inline void
+crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+                    const struct crc32_consts_s *consts)
+{
+  u32 crc = *pcrc;
+  u32 data;
+
+  while (inlen >= 4)
+    {
+      data = ((const struct u32_unaligned_s *)inbuf)->a;
+      data ^= crc;
+
+      inlen -= 4;
+      inbuf += 4;
+
+      crc = _gcry_crc32r_armv8_ce_reduction_4 (data, 0, consts);
+    }
+
+  switch (inlen)
+    {
+    case 0:
+      break;
+    case 1:
+      data = inbuf[0];
+      data ^= crc;
+      data <<= 24;
+      crc >>= 8;
+      crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 2:
+      data = ((const struct u16_unaligned_s *)inbuf)->a;
+      data ^= crc;
+      data <<= 16;
+      crc >>= 16;
+      crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 3:
+      data = ((const struct u16_unaligned_s *)inbuf)->a;
+      data |= inbuf[2] << 16;
+      data ^= crc;
+      data <<= 8;
+      crc >>= 24;
+      crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts);
+      break;
+    }
+
+  *pcrc = crc;
+}
+
+static inline void
+crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+                   const struct crc32_consts_s *consts)
+{
+  u32 crc = *pcrc;
+  u32 data;
+
+  while (inlen >= 4)
+    {
+      data = ((const struct u32_unaligned_s *)inbuf)->a;
+      data ^= crc;
+      data = _gcry_bswap32(data);
+
+      inlen -= 4;
+      inbuf += 4;
+
+      crc = _gcry_crc32_armv8_ce_reduction_4 (data, 0, consts);
+    }
+
+  switch (inlen)
+    {
+    case 0:
+      break;
+    case 1:
+      data = inbuf[0];
+      data ^= crc;
+      data = data & 0xffU;
+      crc = _gcry_bswap32(crc >> 8);
+      crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 2:
+      data = ((const struct u16_unaligned_s *)inbuf)->a;
+      data ^= crc;
+      data = _gcry_bswap32(data << 16);
+      crc = _gcry_bswap32(crc >> 16);
+      crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 3:
+      data = ((const struct u16_unaligned_s *)inbuf)->a;
+      data |= inbuf[2] << 16;
+      data ^= crc;
+      data = _gcry_bswap32(data << 8);
+      crc = crc & 0xff000000U;
+      crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts);
+      break;
+    }
+
+  *pcrc = crc;
+}
+
+void
+_gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+  const struct crc32_consts_s *consts = &crc32_consts;
+
+  if (!inlen)
+    return;
+
+  if (inlen >= 16)
+    _gcry_crc32r_armv8_ce_bulk (pcrc, inbuf, inlen, consts);
+  else
+    crc32r_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+void
+_gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+  const struct crc32_consts_s *consts = &crc24rfc2440_consts;
+
+  if (!inlen)
+    return;
+
+  /* Note: *pcrc in input endian. */
+
+  if (inlen >= 16)
+    _gcry_crc32_armv8_ce_bulk (pcrc, inbuf, inlen, consts);
+  else
+    crc32_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/crc-intel-pclmul.c 
b/grub-core/lib/libgcrypt/cipher/crc-intel-pclmul.c
new file mode 100644
index 000000000..8c8b1915a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/crc-intel-pclmul.c
@@ -0,0 +1,939 @@
+/* crc-intel-pclmul.c - Intel PCLMUL accelerated CRC implementation
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+
+#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) && \
+    __GNUC__ >= 4 &&                                                   \
+    ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+#  pragma GCC target("no-sse")
+#endif
+#if __clang__
+#  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = 
function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR        NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+
+
+#define ALIGNED_16 __attribute__ ((aligned (16)))
+
+
+struct u16_unaligned_s
+{
+  u16 a;
+} __attribute__((packed, aligned (1), may_alias));
+
+
+/* Constants structure for generic reflected/non-reflected CRC32 CLMUL
+ * functions. */
+struct crc32_consts_s
+{
+  /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
+  u64 k[6];
+  /* my_p: { floor(x^64 / P(x)), P(x) } */
+  u64 my_p[2];
+};
+
+
+/* CLMUL constants for CRC32 and CRC32RFC1510. */
+static const struct crc32_consts_s crc32_consts ALIGNED_16 =
+{
+  { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
+    U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
+    U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
+    U64_C(0x163cd6124), 0                   /* y = 2 */
+  },
+  { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
+    U64_C(0x1f7011641), U64_C(0x1db710641)
+  }
+};
+
+/* CLMUL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
+static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 =
+{
+  { /* k[6] = x^(32*y) mod P(x) << 32*/
+    U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
+    U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
+    U64_C(0xd9fe8c00) << 32, 0                        /* y = 2 */
+  },
+  { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
+    U64_C(0x1f845fe24), U64_C(0x1864cfb00)
+  }
+};
+
+/* Common constants for CRC32 algorithms. */
+static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_16 =
+  {
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  };
+static const byte crc32_shuf_shift[3 * 16] ALIGNED_16 =
+  {
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+    0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  };
+static const byte *crc32_bswap_shuf = &crc32_shuf_shift[16];
+static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_16 =
+  {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  };
+static const u64 crc32_merge9to15_shuf[15 - 9 + 1][2] ALIGNED_16 =
+  {
+    { U64_C(0x0706050403020100), U64_C(0xffffffffffffff0f) }, /* 9 */
+    { U64_C(0x0706050403020100), U64_C(0xffffffffffff0f0e) },
+    { U64_C(0x0706050403020100), U64_C(0xffffffffff0f0e0d) },
+    { U64_C(0x0706050403020100), U64_C(0xffffffff0f0e0d0c) },
+    { U64_C(0x0706050403020100), U64_C(0xffffff0f0e0d0c0b) },
+    { U64_C(0x0706050403020100), U64_C(0xffff0f0e0d0c0b0a) },
+    { U64_C(0x0706050403020100), U64_C(0xff0f0e0d0c0b0a09) }, /* 15 */
+  };
+static const u64 crc32_merge5to7_shuf[7 - 5 + 1][2] ALIGNED_16 =
+  {
+    { U64_C(0xffffff0703020100), U64_C(0xffffffffffffffff) }, /* 5 */
+    { U64_C(0xffff070603020100), U64_C(0xffffffffffffffff) },
+    { U64_C(0xff07060503020100), U64_C(0xffffffffffffffff) }, /* 7 */
+  };
+
+/* PCLMUL functions for reflected CRC32. */
+static ASM_FUNC_ATTR_INLINE void
+crc32_reflected_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+                     const struct crc32_consts_s *consts)
+{
+  if (inlen >= 8 * 16)
+    {
+      asm volatile ("movd %[crc], %%xmm4\n\t"
+                   "movdqu %[inbuf_0], %%xmm0\n\t"
+                   "movdqu %[inbuf_1], %%xmm1\n\t"
+                   "movdqu %[inbuf_2], %%xmm2\n\t"
+                   "movdqu %[inbuf_3], %%xmm3\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+                   :
+                   : [inbuf_0] "m" (inbuf[0 * 16]),
+                     [inbuf_1] "m" (inbuf[1 * 16]),
+                     [inbuf_2] "m" (inbuf[2 * 16]),
+                     [inbuf_3] "m" (inbuf[3 * 16]),
+                     [crc] "m" (*pcrc)
+                   );
+
+      inbuf += 4 * 16;
+      inlen -= 4 * 16;
+
+      asm volatile ("movdqa %[k1k2], %%xmm4\n\t"
+                   :
+                   : [k1k2] "m" (consts->k[1 - 1])
+                   );
+
+      /* Fold by 4. */
+      while (inlen >= 4 * 16)
+       {
+         asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t"
+                       "movdqa %%xmm0, %%xmm6\n\t"
+                       "pclmulqdq $0x00, %%xmm4, %%xmm0\n\t"
+                       "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm0\n\t"
+                       "pxor %%xmm6, %%xmm0\n\t"
+
+                       "movdqu %[inbuf_1], %%xmm5\n\t"
+                       "movdqa %%xmm1, %%xmm6\n\t"
+                       "pclmulqdq $0x00, %%xmm4, %%xmm1\n\t"
+                       "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm1\n\t"
+                       "pxor %%xmm6, %%xmm1\n\t"
+
+                       "movdqu %[inbuf_2], %%xmm5\n\t"
+                       "movdqa %%xmm2, %%xmm6\n\t"
+                       "pclmulqdq $0x00, %%xmm4, %%xmm2\n\t"
+                       "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm2\n\t"
+                       "pxor %%xmm6, %%xmm2\n\t"
+
+                       "movdqu %[inbuf_3], %%xmm5\n\t"
+                       "movdqa %%xmm3, %%xmm6\n\t"
+                       "pclmulqdq $0x00, %%xmm4, %%xmm3\n\t"
+                       "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm3\n\t"
+                       "pxor %%xmm6, %%xmm3\n\t"
+                       :
+                       : [inbuf_0] "m" (inbuf[0 * 16]),
+                         [inbuf_1] "m" (inbuf[1 * 16]),
+                         [inbuf_2] "m" (inbuf[2 * 16]),
+                         [inbuf_3] "m" (inbuf[3 * 16])
+                       );
+
+         inbuf += 4 * 16;
+         inlen -= 4 * 16;
+       }
+
+      asm volatile ("movdqa %[k3k4], %%xmm6\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   :
+                   : [k3k4] "m" (consts->k[3 - 1]),
+                     [my_p] "m" (consts->my_p[0])
+                   );
+
+      /* Fold 4 to 1. */
+
+      asm volatile ("movdqa %%xmm0, %%xmm4\n\t"
+                   "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+
+                   "movdqa %%xmm0, %%xmm4\n\t"
+                   "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
+                   "pxor %%xmm2, %%xmm0\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+
+                   "movdqa %%xmm0, %%xmm4\n\t"
+                   "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t"
+                   "pxor %%xmm3, %%xmm0\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+                   :
+                   :
+                   );
+    }
+  else
+    {
+      asm volatile ("movd %[crc], %%xmm1\n\t"
+                   "movdqu %[inbuf], %%xmm0\n\t"
+                   "movdqa %[k3k4], %%xmm6\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   :
+                   : [inbuf] "m" (*inbuf),
+                     [crc] "m" (*pcrc),
+                     [k3k4] "m" (consts->k[3 - 1]),
+                     [my_p] "m" (consts->my_p[0])
+                   );
+
+      inbuf += 16;
+      inlen -= 16;
+    }
+
+  /* Fold by 1. */
+  if (inlen >= 16)
+    {
+      while (inlen >= 16)
+       {
+         /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */
+         asm volatile ("movdqu %[inbuf], %%xmm2\n\t"
+                       "movdqa %%xmm0, %%xmm1\n\t"
+                       "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+                       "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t"
+                       "pxor %%xmm2, %%xmm0\n\t"
+                       "pxor %%xmm1, %%xmm0\n\t"
+                       :
+                       : [inbuf] "m" (*inbuf)
+                       );
+
+         inbuf += 16;
+         inlen -= 16;
+       }
+    }
+
+  /* Partial fold. */
+  if (inlen)
+    {
+      /* Load last input and add padding zeros. */
+      asm volatile ("movdqu %[shr_shuf], %%xmm3\n\t"
+                   "movdqu %[shl_shuf], %%xmm4\n\t"
+                   "movdqu %[mask], %%xmm2\n\t"
+
+                   "movdqa %%xmm0, %%xmm1\n\t"
+                   "pshufb %%xmm4, %%xmm0\n\t"
+                   "movdqu %[inbuf], %%xmm4\n\t"
+                   "pshufb %%xmm3, %%xmm1\n\t"
+                   "pand %%xmm4, %%xmm2\n\t"
+                   "por %%xmm1, %%xmm2\n\t"
+
+                   "movdqa %%xmm0, %%xmm1\n\t"
+                   "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t"
+                   "pxor %%xmm2, %%xmm0\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+                   :
+                   : [inbuf] "m" (*(inbuf - 16 + inlen)),
+                     [mask] "m" (crc32_partial_fold_input_mask[inlen]),
+                     [shl_shuf] "m" (crc32_refl_shuf_shift[inlen]),
+                     [shr_shuf] "m" (crc32_refl_shuf_shift[inlen + 16])
+                   );
+
+      inbuf += inlen;
+      inlen -= inlen;
+    }
+
+  /* Final fold. */
+  asm volatile (/* reduce 128-bits to 96-bits */
+               "movdqa %%xmm0, %%xmm1\n\t"
+               "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t"
+               "psrldq $8, %%xmm1\n\t"
+               "pxor %%xmm1, %%xmm0\n\t"
+
+               /* reduce 96-bits to 64-bits */
+               "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */
+               "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */
+               "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */
+               "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */
+
+               /* barrett reduction */
+               "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */
+               "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */
+               "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+               "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+               "pxor %%xmm1, %%xmm0\n\t"
+
+               /* store CRC */
+               "pextrd $2, %%xmm0, %[out]\n\t"
+               : [out] "=m" (*pcrc)
+               : [k5] "m" (consts->k[5 - 1])
+               );
+}
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_reflected_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+                             const struct crc32_consts_s *consts)
+{
+  if (inlen < 4)
+    {
+      u32 crc = *pcrc;
+      u32 data;
+
+      asm volatile ("movdqa %[my_p], %%xmm5\n\t"
+                   :
+                   : [my_p] "m" (consts->my_p[0])
+                   );
+
+      if (inlen == 1)
+       {
+         data = inbuf[0];
+         data ^= crc;
+         data <<= 24;
+         crc >>= 8;
+       }
+      else if (inlen == 2)
+       {
+         data = ((const struct u16_unaligned_s *)inbuf)->a;
+         data ^= crc;
+         data <<= 16;
+         crc >>= 16;
+       }
+      else
+       {
+         data = ((const struct u16_unaligned_s *)inbuf)->a;
+         data |= inbuf[2] << 16;
+         data ^= crc;
+         data <<= 8;
+         crc >>= 24;
+       }
+
+      /* Barrett reduction */
+      asm volatile ("movd %[in], %%xmm0\n\t"
+                   "movd %[crc], %%xmm1\n\t"
+
+                   "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+                   "psllq $32, %%xmm1\n\t"
+                   "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */
+                   "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+                   "pxor %%xmm1, %%xmm0\n\t"
+
+                   "pextrd $1, %%xmm0, %[out]\n\t"
+                   : [out] "=m" (*pcrc)
+                   : [in] "rm" (data),
+                     [crc] "rm" (crc)
+                   );
+    }
+  else if (inlen == 4)
+    {
+      /* Barrett reduction */
+      asm volatile ("movd %[crc], %%xmm1\n\t"
+                   "movd %[in], %%xmm0\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+
+                   "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+                   "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */
+                   "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+
+                   "pextrd $1, %%xmm0, %[out]\n\t"
+                   : [out] "=m" (*pcrc)
+                   : [in] "m" (*inbuf),
+                     [crc] "m" (*pcrc),
+                     [my_p] "m" (consts->my_p[0])
+                   );
+    }
+  else
+    {
+      asm volatile ("movdqu %[shuf], %%xmm4\n\t"
+                   "movd %[crc], %%xmm1\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   "movdqa %[k3k4], %%xmm6\n\t"
+                   :
+                   : [shuf] "m" (crc32_refl_shuf_shift[inlen]),
+                     [crc] "m" (*pcrc),
+                     [my_p] "m" (consts->my_p[0]),
+                     [k3k4] "m" (consts->k[3 - 1])
+                   );
+
+      if (inlen >= 8)
+       {
+         asm volatile ("movq %[inbuf], %%xmm0\n\t"
+                       :
+                       : [inbuf] "m" (*inbuf)
+                       );
+         if (inlen > 8)
+           {
+             asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/
+                           "movq %[inbuf_tail], %%xmm2\n\t"
+                           "punpcklqdq %%xmm2, %%xmm0\n\t"
+                           "pshufb %[merge_shuf], %%xmm0\n\t"
+                           :
+                           : [inbuf_tail] "m" (inbuf[inlen - 8]),
+                             [merge_shuf] "m"
+                               (*crc32_merge9to15_shuf[inlen - 9])
+                           );
+           }
+       }
+      else
+       {
+         asm volatile ("movd %[inbuf], %%xmm0\n\t"
+                       "pinsrd $1, %[inbuf_tail], %%xmm0\n\t"
+                       "pshufb %[merge_shuf], %%xmm0\n\t"
+                       :
+                       : [inbuf] "m" (*inbuf),
+                         [inbuf_tail] "m" (inbuf[inlen - 4]),
+                         [merge_shuf] "m"
+                           (*crc32_merge5to7_shuf[inlen - 5])
+                       );
+       }
+
+      /* Final fold. */
+      asm volatile ("pxor %%xmm1, %%xmm0\n\t"
+                   "pshufb %%xmm4, %%xmm0\n\t"
+
+                   /* reduce 128-bits to 96-bits */
+                   "movdqa %%xmm0, %%xmm1\n\t"
+                   "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t"
+                   "psrldq $8, %%xmm1\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t" /* top 32-bit are zero */
+
+                   /* reduce 96-bits to 64-bits */
+                   "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */
+                   "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* 
[00][00][x>>64][x>>32] */
+                   "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */
+                   "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */
+
+                   /* barrett reduction */
+                   "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */
+                   "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */
+                   "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+                   "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */
+                   "pxor %%xmm1, %%xmm0\n\t"
+
+                   /* store CRC */
+                   "pextrd $2, %%xmm0, %[out]\n\t"
+                   : [out] "=m" (*pcrc)
+                   : [k5] "m" (consts->k[5 - 1])
+                   );
+    }
+}
+
+/* PCLMUL functions for non-reflected CRC32. */
+static ASM_FUNC_ATTR_INLINE void
+crc32_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+           const struct crc32_consts_s *consts)
+{
+  asm volatile ("movdqa %[bswap], %%xmm7\n\t"
+               :
+               : [bswap] "m" (*crc32_bswap_shuf)
+               );
+
+  if (inlen >= 8 * 16)
+    {
+      asm volatile ("movd %[crc], %%xmm4\n\t"
+                   "movdqu %[inbuf_0], %%xmm0\n\t"
+                   "movdqu %[inbuf_1], %%xmm1\n\t"
+                   "movdqu %[inbuf_2], %%xmm2\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+                   "movdqu %[inbuf_3], %%xmm3\n\t"
+                   "pshufb %%xmm7, %%xmm0\n\t"
+                   "pshufb %%xmm7, %%xmm1\n\t"
+                   "pshufb %%xmm7, %%xmm2\n\t"
+                   "pshufb %%xmm7, %%xmm3\n\t"
+                   :
+                   : [inbuf_0] "m" (inbuf[0 * 16]),
+                     [inbuf_1] "m" (inbuf[1 * 16]),
+                     [inbuf_2] "m" (inbuf[2 * 16]),
+                     [inbuf_3] "m" (inbuf[3 * 16]),
+                     [crc] "m" (*pcrc)
+                   );
+
+      inbuf += 4 * 16;
+      inlen -= 4 * 16;
+
+      asm volatile ("movdqa %[k1k2], %%xmm4\n\t"
+                   :
+                   : [k1k2] "m" (consts->k[1 - 1])
+                   );
+
+      /* Fold by 4. */
+      while (inlen >= 4 * 16)
+       {
+         asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t"
+                       "movdqa %%xmm0, %%xmm6\n\t"
+                       "pshufb %%xmm7, %%xmm5\n\t"
+                       "pclmulqdq $0x01, %%xmm4, %%xmm0\n\t"
+                       "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm0\n\t"
+                       "pxor %%xmm6, %%xmm0\n\t"
+
+                       "movdqu %[inbuf_1], %%xmm5\n\t"
+                       "movdqa %%xmm1, %%xmm6\n\t"
+                       "pshufb %%xmm7, %%xmm5\n\t"
+                       "pclmulqdq $0x01, %%xmm4, %%xmm1\n\t"
+                       "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm1\n\t"
+                       "pxor %%xmm6, %%xmm1\n\t"
+
+                       "movdqu %[inbuf_2], %%xmm5\n\t"
+                       "movdqa %%xmm2, %%xmm6\n\t"
+                       "pshufb %%xmm7, %%xmm5\n\t"
+                       "pclmulqdq $0x01, %%xmm4, %%xmm2\n\t"
+                       "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm2\n\t"
+                       "pxor %%xmm6, %%xmm2\n\t"
+
+                       "movdqu %[inbuf_3], %%xmm5\n\t"
+                       "movdqa %%xmm3, %%xmm6\n\t"
+                       "pshufb %%xmm7, %%xmm5\n\t"
+                       "pclmulqdq $0x01, %%xmm4, %%xmm3\n\t"
+                       "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t"
+                       "pxor %%xmm5, %%xmm3\n\t"
+                       "pxor %%xmm6, %%xmm3\n\t"
+                       :
+                       : [inbuf_0] "m" (inbuf[0 * 16]),
+                         [inbuf_1] "m" (inbuf[1 * 16]),
+                         [inbuf_2] "m" (inbuf[2 * 16]),
+                         [inbuf_3] "m" (inbuf[3 * 16])
+                       );
+
+         inbuf += 4 * 16;
+         inlen -= 4 * 16;
+       }
+
+      asm volatile ("movdqa %[k3k4], %%xmm6\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   :
+                   : [k3k4] "m" (consts->k[3 - 1]),
+                     [my_p] "m" (consts->my_p[0])
+                   );
+
+      /* Fold 4 to 1. */
+
+      asm volatile ("movdqa %%xmm0, %%xmm4\n\t"
+                   "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+
+                   "movdqa %%xmm0, %%xmm4\n\t"
+                   "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
+                   "pxor %%xmm2, %%xmm0\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+
+                   "movdqa %%xmm0, %%xmm4\n\t"
+                   "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t"
+                   "pxor %%xmm3, %%xmm0\n\t"
+                   "pxor %%xmm4, %%xmm0\n\t"
+                   :
+                   :
+                   );
+    }
+  else
+    {
+      asm volatile ("movd %[crc], %%xmm1\n\t"
+                   "movdqu %[inbuf], %%xmm0\n\t"
+                   "movdqa %[k3k4], %%xmm6\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   "pshufb %%xmm7, %%xmm0\n\t"
+                   :
+                   : [inbuf] "m" (*inbuf),
+                     [crc] "m" (*pcrc),
+                     [k3k4] "m" (consts->k[3 - 1]),
+                     [my_p] "m" (consts->my_p[0])
+                   );
+
+      inbuf += 16;
+      inlen -= 16;
+    }
+
+  /* Fold by 1. */
+  if (inlen >= 16)
+    {
+      while (inlen >= 16)
+       {
+         /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */
+         asm volatile ("movdqu %[inbuf], %%xmm2\n\t"
+                       "movdqa %%xmm0, %%xmm1\n\t"
+                       "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+                       "pshufb %%xmm7, %%xmm2\n\t"
+                       "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t"
+                       "pxor %%xmm2, %%xmm0\n\t"
+                       "pxor %%xmm1, %%xmm0\n\t"
+                       :
+                       : [inbuf] "m" (*inbuf)
+                       );
+
+         inbuf += 16;
+         inlen -= 16;
+       }
+    }
+
+  /* Partial fold. */
+  if (inlen)
+    {
+      /* Load last input and add padding zeros. */
+      asm volatile ("movdqu %[shl_shuf], %%xmm4\n\t"
+                   "movdqu %[shr_shuf], %%xmm3\n\t"
+                   "movdqu %[mask], %%xmm2\n\t"
+
+                   "movdqa %%xmm0, %%xmm1\n\t"
+                   "pshufb %%xmm4, %%xmm0\n\t"
+                   "movdqu %[inbuf], %%xmm4\n\t"
+                   "pshufb %%xmm3, %%xmm1\n\t"
+                   "pand %%xmm4, %%xmm2\n\t"
+                   "por %%xmm1, %%xmm2\n\t"
+
+                   "pshufb %%xmm7, %%xmm2\n\t"
+
+                   "movdqa %%xmm0, %%xmm1\n\t"
+                   "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t"
+                   "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t"
+                   "pxor %%xmm2, %%xmm0\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+                   :
+                   : [inbuf] "m" (*(inbuf - 16 + inlen)),
+                     [mask] "m" (crc32_partial_fold_input_mask[inlen]),
+                     [shl_shuf] "m" (crc32_refl_shuf_shift[32 - inlen]),
+                     [shr_shuf] "m" (crc32_shuf_shift[inlen + 16])
+                   );
+
+      inbuf += inlen;
+      inlen -= inlen;
+    }
+
+  /* Final fold. */
+  asm volatile (/* reduce 128-bits to 96-bits */
+               "movdqa %%xmm0, %%xmm1\n\t"
+               "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t"
+               "pslldq $8, %%xmm1\n\t"
+               "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */
+
+               /* reduce 96-bits to 64-bits */
+               "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */
+               "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */
+               "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */
+               "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */
+
+               /* barrett reduction */
+               "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */
+               "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */
+               "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */
+               "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t"
+               "pxor %%xmm1, %%xmm0\n\t"
+
+               /* store CRC in input endian */
+               "movd %%xmm0, %%eax\n\t"
+               "bswapl %%eax\n\t"
+               "movl %%eax, %[out]\n\t"
+               : [out] "=m" (*pcrc)
+               : [k5] "m" (consts->k[5 - 1])
+               : "eax" );
+}
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+                   const struct crc32_consts_s *consts)
+{
+  if (inlen < 4)
+    {
+      u32 crc = *pcrc;
+      u32 data;
+
+      asm volatile ("movdqa %[my_p], %%xmm5\n\t"
+                   :
+                   : [my_p] "m" (consts->my_p[0])
+                   );
+
+      if (inlen == 1)
+       {
+         data = inbuf[0];
+         data ^= crc;
+         data = _gcry_bswap32(data << 24);
+         crc = _gcry_bswap32(crc >> 8);
+       }
+      else if (inlen == 2)
+       {
+         data = ((const struct u16_unaligned_s *)inbuf)->a;
+         data ^= crc;
+         data = _gcry_bswap32(data << 16);
+         crc = _gcry_bswap32(crc >> 16);
+       }
+      else
+       {
+         data = ((const struct u16_unaligned_s *)inbuf)->a;
+         data |= inbuf[2] << 16;
+         data ^= crc;
+         data = _gcry_bswap32(data << 8);
+         crc = _gcry_bswap32(crc >> 24);
+       }
+
+      /* Barrett reduction */
+      asm volatile ("movd %[in], %%xmm0\n\t"
+                   "psllq $32, %%xmm0\n\t" /* [00][00][xx][00] */
+                   "movd %[crc], %%xmm1\n\t"
+
+                   "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */
+                   "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+                   "pxor %%xmm1, %%xmm0\n\t"
+
+                   /* store CRC in input endian */
+                   "movd %%xmm0, %%eax\n\t"
+                   "bswapl %%eax\n\t"
+                   "movl %%eax, %[out]\n\t"
+                   : [out] "=m" (*pcrc)
+                   : [in] "r" (data),
+                     [crc] "r" (crc)
+                   : "eax" );
+    }
+  else if (inlen == 4)
+    {
+      /* Barrett reduction */
+      asm volatile ("movd %[crc], %%xmm0\n\t"
+                   "movd %[in], %%xmm1\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   :
+                   : [in] "m" (*inbuf),
+                     [crc] "m" (*pcrc),
+                     [my_p] "m" (consts->my_p[0])
+                   : "cc" );
+
+      asm volatile ("pxor %%xmm1, %%xmm0\n\t"
+                   "pshufb %[bswap], %%xmm0\n\t" /* [xx][00][00][00] */
+
+                   "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */
+                   "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */
+                   :
+                   : [bswap] "m" (*crc32_bswap_shuf)
+                   : "cc" );
+
+      asm volatile (/* store CRC in input endian */
+                   "movd %%xmm0, %%eax\n\t"
+                   "bswapl %%eax\n\t"
+                   "movl %%eax, %[out]\n\t"
+                   : [out] "=m" (*pcrc)
+                   :
+                   : "eax", "cc" );
+    }
+  else
+    {
+      asm volatile ("movdqu %[shuf], %%xmm7\n\t"
+                   "movd %[crc], %%xmm1\n\t"
+                   "movdqa %[my_p], %%xmm5\n\t"
+                   "movdqa %[k3k4], %%xmm6\n\t"
+                   :
+                   : [shuf] "m" (crc32_shuf_shift[32 - inlen]),
+                     [crc] "m" (*pcrc),
+                     [my_p] "m" (consts->my_p[0]),
+                     [k3k4] "m" (consts->k[3 - 1])
+                   );
+
+      if (inlen >= 8)
+       {
+         asm volatile ("movq %[inbuf], %%xmm0\n\t"
+                       :
+                       : [inbuf] "m" (*inbuf)
+                       );
+         if (inlen > 8)
+           {
+             asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/
+                           "movq %[inbuf_tail], %%xmm2\n\t"
+                           "punpcklqdq %%xmm2, %%xmm0\n\t"
+                           "pshufb %[merge_shuf], %%xmm0\n\t"
+                           :
+                           : [inbuf_tail] "m" (inbuf[inlen - 8]),
+                             [merge_shuf] "m"
+                               (*crc32_merge9to15_shuf[inlen - 9])
+                           );
+           }
+       }
+      else
+       {
+         asm volatile ("movd %[inbuf], %%xmm0\n\t"
+                       "pinsrd $1, %[inbuf_tail], %%xmm0\n\t"
+                       "pshufb %[merge_shuf], %%xmm0\n\t"
+                       :
+                       : [inbuf] "m" (*inbuf),
+                         [inbuf_tail] "m" (inbuf[inlen - 4]),
+                         [merge_shuf] "m"
+                           (*crc32_merge5to7_shuf[inlen - 5])
+                       );
+       }
+
+      /* Final fold. */
+      asm volatile ("pxor %%xmm1, %%xmm0\n\t"
+                   "pshufb %%xmm7, %%xmm0\n\t"
+
+                   /* reduce 128-bits to 96-bits */
+                   "movdqa %%xmm0, %%xmm1\n\t"
+                   "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t"
+                   "pslldq $8, %%xmm1\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */
+
+                   /* reduce 96-bits to 64-bits */
+                   "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */
+                   "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */
+                   "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */
+                   "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero 
*/
+
+                   /* barrett reduction */
+                   "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */
+                   "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */
+                   "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */
+                   "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t"
+                   "pxor %%xmm1, %%xmm0\n\t"
+
+                   /* store CRC in input endian */
+                   "movd %%xmm0, %%eax\n\t"
+                   "bswapl %%eax\n\t"
+                   "movl %%eax, %[out]\n\t"
+                   : [out] "=m" (*pcrc)
+                   : [k5] "m" (consts->k[5 - 1])
+                   : "eax" );
+    }
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+  const struct crc32_consts_s *consts = &crc32_consts;
+#if defined(__x86_64__) && defined(__WIN64__)
+  char win64tmp[2 * 16];
+
+  /* XMM6-XMM7 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+                "movdqu %%xmm7, 1*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory");
+#endif
+
+  if (!inlen)
+    return;
+
+  if (inlen >= 16)
+    crc32_reflected_bulk(pcrc, inbuf, inlen, consts);
+  else
+    crc32_reflected_less_than_16(pcrc, inbuf, inlen, consts);
+
+#if defined(__x86_64__) && defined(__WIN64__)
+  /* Restore used registers. */
+  asm volatile("movdqu 0*16(%0), %%xmm6\n\t"
+               "movdqu 1*16(%0), %%xmm7\n\t"
+               :
+               : "r" (win64tmp)
+               : "memory");
+#endif
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+  const struct crc32_consts_s *consts = &crc24rfc2440_consts;
+#if defined(__x86_64__) && defined(__WIN64__)
+  char win64tmp[2 * 16];
+
+  /* XMM6-XMM7 need to be restored after use. */
+  asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t"
+                "movdqu %%xmm7, 1*16(%0)\n\t"
+                :
+                : "r" (win64tmp)
+                : "memory");
+#endif
+
+  if (!inlen)
+    return;
+
+  /* Note: *pcrc in input endian. */
+
+  if (inlen >= 16)
+    crc32_bulk(pcrc, inbuf, inlen, consts);
+  else
+    crc32_less_than_16(pcrc, inbuf, inlen, consts);
+
+#if defined(__x86_64__) && defined(__WIN64__)
+  /* Restore used registers. */
+  asm volatile("movdqu 0*16(%0), %%xmm6\n\t"
+               "movdqu 1*16(%0), %%xmm7\n\t"
+               :
+               : "r" (win64tmp)
+               : "memory");
+#endif
+}
+
+#if __clang__
+#  pragma clang attribute pop
+#endif
+
+#endif /* USE_INTEL_PCLMUL */
diff --git a/grub-core/lib/libgcrypt/cipher/crc-ppc.c 
b/grub-core/lib/libgcrypt/cipher/crc-ppc.c
new file mode 100644
index 000000000..b9a40130c
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/crc-ppc.c
@@ -0,0 +1,656 @@
+/* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+
+#include "bithelp.h"
+#include "bufhelp.h"
+
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+    defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+    defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+    __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+#define ALIGNED_64 __attribute__ ((aligned (64)))
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned int vector4x_u32;
+typedef vector unsigned long long vector2x_u64;
+
+
+/* Constants structure for generic reflected/non-reflected CRC32 PMULL
+ * functions. */
+struct crc32_consts_s
+{
+  /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */
+  unsigned long long k[6];
+  /* my_p: { floor(x^64 / P(x)), P(x) } */
+  unsigned long long my_p[2];
+};
+
+/* PMULL constants for CRC32 and CRC32RFC1510. */
+static const struct crc32_consts_s crc32_consts ALIGNED_64 =
+{
+  { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */
+    U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */
+    U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */
+    U64_C(0x163cd6124), 0                   /* y = 2 */
+  },
+  { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */
+    U64_C(0x1f7011641), U64_C(0x1db710641)
+  }
+};
+
+/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */
+static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_64 =
+{
+  { /* k[6] = x^(32*y) mod P(x) << 32*/
+    U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */
+    U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */
+    U64_C(0xd9fe8c00) << 32, 0                        /* y = 2 */
+  },
+  { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */
+    U64_C(0x1f845fe24), U64_C(0x1864cfb00)
+  }
+};
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vpmsumd(vector2x_u64 a, vector2x_u64 b)
+{
+  __asm__("vpmsumd %0, %1, %2"
+         : "=v" (a)
+         : "v" (a), "v" (b));
+  return a;
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_swap_u64(vector2x_u64 a)
+{
+  __asm__("xxswapd %x0, %x1"
+         : "=wa" (a)
+         : "wa" (a));
+  return a;
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_sld_u32(vector4x_u32 a, vector4x_u32 b, unsigned int idx)
+{
+  return vec_sld (a, b, (4 * idx) & 15);
+}
+
+
+static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_64 =
+  {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+  };
+static const byte crc32_shuf_shift[3 * 16] ALIGNED_64 =
+  {
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+    0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
+    0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+  };
+static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_64 =
+  {
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+  };
+static const vector16x_u8 bswap_const ALIGNED_64 =
+  { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+
+
+#define CRC_VEC_SWAP(v) ({ vector2x_u64 __vecu64 = (v); \
+                           vec_perm(__vecu64, __vecu64, bswap_const); })
+
+#ifdef WORDS_BIGENDIAN
+# define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) }
+# define CRC_VEC_U64_LOAD(offs, ptr) \
+         asm_swap_u64(asm_vec_u64_load(offs, ptr))
+# define CRC_VEC_U64_LOAD_LE(offs, ptr) \
+         CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr))
+# define CRC_VEC_U64_LOAD_BE(offs, ptr) \
+         asm_vec_u64_load(offs, ptr)
+# define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v)
+# define CRC_VEC_SWAP_TO_BE(v) (v)
+# define VEC_U64_LO 1
+# define VEC_U64_HI 0
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load(unsigned long offset, const void *ptr)
+{
+  vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
+                     : "=wa" (vecu64)
+                     : "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
+                     : "=wa" (vecu64)
+                     : "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+  return vecu64;
+}
+#else
+# define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) }
+# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr)
+# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr)
+# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
+# define CRC_VEC_SWAP_TO_LE(v) (v)
+# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
+# define VEC_U64_LO 0
+# define VEC_U64_HI 1
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load_le(unsigned long offset, const void *ptr)
+{
+  vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvd2x %x0,0,%1\n\t"
+                     : "=wa" (vecu64)
+                     : "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvd2x %x0,%1,%2\n\t"
+                     : "=wa" (vecu64)
+                     : "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+  return asm_swap_u64(vecu64);
+}
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load_be(unsigned int offset, const void *ptr)
+{
+  static const vector16x_u8 vec_load_le_const =
+    { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
+  vector2x_u64 vecu64;
+
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ ("lxvd2x %%vs32,0,%1\n\t"
+            "vperm %0,%%v0,%%v0,%2\n\t"
+            : "=v" (vecu64)
+            : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const)
+            : "memory", "v0");
+#endif
+  else
+    __asm__ ("lxvd2x %%vs32,%1,%2\n\t"
+            "vperm %0,%%v0,%%v0,%3\n\t"
+            : "=v" (vecu64)
+            : "r" (offset), "r" ((uintptr_t)(ptr)),
+              "v" (vec_load_le_const)
+            : "memory", "r0", "v0");
+
+  return vecu64;
+}
+#endif
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32r_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+                    const struct crc32_consts_s *consts)
+{
+  vector4x_u32 zero = { 0, 0, 0, 0 };
+  vector2x_u64 low_64bit_mask = CRC_VEC_U64_DEF((u64)-1, 0);
+  vector2x_u64 low_32bit_mask = CRC_VEC_U64_DEF((u32)-1, 0);
+  vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
+  vector2x_u64 k1k2 = CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]);
+  vector2x_u64 k3k4 = CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]);
+  vector2x_u64 k4lo = CRC_VEC_U64_DEF(k3k4[VEC_U64_HI], 0);
+  vector2x_u64 k5lo = CRC_VEC_U64_LOAD(0, &consts->k[5 - 1]);
+  vector2x_u64 crc = CRC_VEC_U64_DEF(*pcrc, 0);
+  vector2x_u64 crc0, crc1, crc2, crc3;
+  vector2x_u64 v0;
+
+  if (inlen >= 8 * 16)
+    {
+      crc0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
+      crc0 ^= crc;
+      crc1 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
+      crc2 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
+      crc3 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
+
+      inbuf += 4 * 16;
+      inlen -= 4 * 16;
+
+      /* Fold by 4. */
+      while (inlen >= 4 * 16)
+       {
+         v0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf);
+         crc0 = asm_vpmsumd(crc0, k1k2) ^ v0;
+
+         v0 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf);
+         crc1 = asm_vpmsumd(crc1, k1k2) ^ v0;
+
+         v0 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf);
+         crc2 = asm_vpmsumd(crc2, k1k2) ^ v0;
+
+         v0 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf);
+         crc3 = asm_vpmsumd(crc3, k1k2) ^ v0;
+
+         inbuf += 4 * 16;
+         inlen -= 4 * 16;
+       }
+
+      /* Fold 4 to 1. */
+      crc1 ^= asm_vpmsumd(crc0, k3k4);
+      crc2 ^= asm_vpmsumd(crc1, k3k4);
+      crc3 ^= asm_vpmsumd(crc2, k3k4);
+      crc = crc3;
+    }
+  else
+    {
+      v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
+      crc ^= v0;
+
+      inbuf += 16;
+      inlen -= 16;
+    }
+
+  /* Fold by 1. */
+  while (inlen >= 16)
+    {
+      v0 = CRC_VEC_U64_LOAD_LE(0, inbuf);
+      crc = asm_vpmsumd(k3k4, crc);
+      crc ^= v0;
+
+      inbuf += 16;
+      inlen -= 16;
+    }
+
+  /* Partial fold. */
+  if (inlen)
+    {
+      /* Load last input and add padding zeros. */
+      vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, 
crc32_partial_fold_input_mask);
+      vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(inlen, 
crc32_refl_shuf_shift);
+      vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, 
crc32_refl_shuf_shift);
+
+      v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
+      v0 &= mask;
+
+      crc = CRC_VEC_SWAP_TO_LE(crc);
+      v0 |= (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+                                  (vector16x_u8)shr_shuf);
+      crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+                                  (vector16x_u8)shl_shuf);
+      crc = asm_vpmsumd(k3k4, crc);
+      crc ^= v0;
+
+      inbuf += inlen;
+      inlen -= inlen;
+    }
+
+  /* Final fold. */
+
+  /* reduce 128-bits to 96-bits */
+  v0 = asm_swap_u64(crc);
+  v0 &= low_64bit_mask;
+  crc = asm_vpmsumd(k4lo, crc);
+  crc ^= v0;
+
+  /* reduce 96-bits to 64-bits */
+  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+                                (vector4x_u32)crc, 3);  /* [x0][x3][x2][x1] */
+  v0 &= low_64bit_mask;                                  /* [00][00][x2][x1] */
+  crc = crc & low_32bit_mask;                            /* [00][00][00][x0] */
+  crc = v0 ^ asm_vpmsumd(k5lo, crc);                     /* [00][00][xx][xx] */
+
+  /* barrett reduction */
+  v0 = crc << 32;                                        /* [00][00][x0][00] */
+  v0 = asm_vpmsumd(my_p, v0);
+  v0 = asm_swap_u64(v0);
+  v0 = asm_vpmsumd(my_p, v0);
+  crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+                                 zero, 1);              /* [00][x1][x0][00] */
+  crc ^= v0;
+
+  *pcrc = (u32)crc[VEC_U64_HI];
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+crc32r_ppc8_ce_reduction_4 (u32 data, u32 crc,
+                           const struct crc32_consts_s *consts)
+{
+  vector4x_u32 zero = { 0, 0, 0, 0 };
+  vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
+  vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data, 0);
+  v0 = asm_vpmsumd(v0, my_p);                          /* [00][00][xx][xx] */
+  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
+                                zero, 3);             /* [x0][00][00][00] */
+  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0,
+                                (vector4x_u32)v0, 3); /* [00][x0][00][00] */
+  v0 = asm_vpmsumd(v0, my_p);                          /* [00][00][xx][xx] */
+  return (v0[VEC_U64_LO] >> 32) ^ crc;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+                    const struct crc32_consts_s *consts)
+{
+  u32 crc = *pcrc;
+  u32 data;
+
+  while (inlen >= 4)
+    {
+      data = buf_get_le32(inbuf);
+      data ^= crc;
+
+      inlen -= 4;
+      inbuf += 4;
+
+      crc = crc32r_ppc8_ce_reduction_4 (data, 0, consts);
+    }
+
+  switch (inlen)
+    {
+    case 0:
+      break;
+    case 1:
+      data = inbuf[0];
+      data ^= crc;
+      data <<= 24;
+      crc >>= 8;
+      crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 2:
+      data = inbuf[0] << 0;
+      data |= inbuf[1] << 8;
+      data ^= crc;
+      data <<= 16;
+      crc >>= 16;
+      crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 3:
+      data = inbuf[0] << 0;
+      data |= inbuf[1] << 8;
+      data |= inbuf[2] << 16;
+      data ^= crc;
+      data <<= 8;
+      crc >>= 24;
+      crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts);
+      break;
+    }
+
+  *pcrc = crc;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen,
+                   const struct crc32_consts_s *consts)
+{
+  vector4x_u32 zero = { 0, 0, 0, 0 };
+  vector2x_u64 low_96bit_mask = CRC_VEC_U64_DEF(~0, ~((u64)(u32)-1 << 32));
+  vector2x_u64 p_my = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->my_p[0]));
+  vector2x_u64 p_my_lo, p_my_hi;
+  vector2x_u64 k2k1 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]));
+  vector2x_u64 k4k3 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]));
+  vector2x_u64 k4hi = CRC_VEC_U64_DEF(0, consts->k[4 - 1]);
+  vector2x_u64 k5hi = CRC_VEC_U64_DEF(0, consts->k[5 - 1]);
+  vector2x_u64 crc = CRC_VEC_U64_DEF(0, _gcry_bswap64(*pcrc));
+  vector2x_u64 crc0, crc1, crc2, crc3;
+  vector2x_u64 v0;
+
+  if (inlen >= 8 * 16)
+    {
+      crc0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
+      crc0 ^= crc;
+      crc1 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
+      crc2 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
+      crc3 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
+
+      inbuf += 4 * 16;
+      inlen -= 4 * 16;
+
+      /* Fold by 4. */
+      while (inlen >= 4 * 16)
+       {
+         v0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf);
+         crc0 = asm_vpmsumd(crc0, k2k1) ^ v0;
+
+         v0 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf);
+         crc1 = asm_vpmsumd(crc1, k2k1) ^ v0;
+
+         v0 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf);
+         crc2 = asm_vpmsumd(crc2, k2k1) ^ v0;
+
+         v0 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf);
+         crc3 = asm_vpmsumd(crc3, k2k1) ^ v0;
+
+         inbuf += 4 * 16;
+         inlen -= 4 * 16;
+       }
+
+      /* Fold 4 to 1. */
+      crc1 ^= asm_vpmsumd(crc0, k4k3);
+      crc2 ^= asm_vpmsumd(crc1, k4k3);
+      crc3 ^= asm_vpmsumd(crc2, k4k3);
+      crc = crc3;
+    }
+  else
+    {
+      v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
+      crc ^= v0;
+
+      inbuf += 16;
+      inlen -= 16;
+    }
+
+  /* Fold by 1. */
+  while (inlen >= 16)
+    {
+      v0 = CRC_VEC_U64_LOAD_BE(0, inbuf);
+      crc = asm_vpmsumd(k4k3, crc);
+      crc ^= v0;
+
+      inbuf += 16;
+      inlen -= 16;
+    }
+
+  /* Partial fold. */
+  if (inlen)
+    {
+      /* Load last input and add padding zeros. */
+      vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, 
crc32_partial_fold_input_mask);
+      vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(32 - inlen, 
crc32_refl_shuf_shift);
+      vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, 
crc32_shuf_shift);
+
+      v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf);
+      v0 &= mask;
+
+      crc = CRC_VEC_SWAP_TO_LE(crc);
+      crc2 = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+                                   (vector16x_u8)shr_shuf);
+      v0 |= crc2;
+      v0 = CRC_VEC_SWAP(v0);
+      crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero,
+                                  (vector16x_u8)shl_shuf);
+      crc = asm_vpmsumd(k4k3, crc);
+      crc ^= v0;
+
+      inbuf += inlen;
+      inlen -= inlen;
+    }
+
+  /* Final fold. */
+
+  /* reduce 128-bits to 96-bits */
+  v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+                                (vector4x_u32)zero, 2);
+  crc = asm_vpmsumd(k4hi, crc);
+  crc ^= v0; /* bottom 32-bit are zero */
+
+  /* reduce 96-bits to 64-bits */
+  v0 = crc & low_96bit_mask;    /* [00][x2][x1][00] */
+  crc >>= 32;                   /* [00][x3][00][x0] */
+  crc = asm_vpmsumd(k5hi, crc); /* [00][xx][xx][00] */
+  crc ^= v0;                    /* top and bottom 32-bit are zero */
+
+  /* barrett reduction */
+  p_my_hi = p_my;
+  p_my_lo = p_my;
+  p_my_hi[VEC_U64_LO] = 0;
+  p_my_lo[VEC_U64_HI] = 0;
+  v0 = crc >> 32;                                        /* [00][00][00][x1] */
+  crc = asm_vpmsumd(p_my_hi, crc);                       /* [00][xx][xx][xx] */
+  crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc,
+                                 (vector4x_u32)crc, 3); /* [x0][00][x2][x1] */
+  crc = asm_vpmsumd(p_my_lo, crc);                       /* [00][xx][xx][xx] */
+  crc ^= v0;
+
+  *pcrc = _gcry_bswap32(crc[VEC_U64_LO]);
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+crc32_ppc8_ce_reduction_4 (u32 data, u32 crc,
+                          const struct crc32_consts_s *consts)
+{
+  vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]);
+  vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data << 32, 0);
+  v0 = asm_vpmsumd(v0, my_p); /* [00][x1][x0][00] */
+  v0[VEC_U64_LO] = 0;         /* [00][x1][00][00] */
+  v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */
+  return _gcry_bswap32(v0[VEC_U64_LO]) ^ crc;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen,
+                   const struct crc32_consts_s *consts)
+{
+  u32 crc = *pcrc;
+  u32 data;
+
+  while (inlen >= 4)
+    {
+      data = buf_get_le32(inbuf);
+      data ^= crc;
+      data = _gcry_bswap32(data);
+
+      inlen -= 4;
+      inbuf += 4;
+
+      crc = crc32_ppc8_ce_reduction_4 (data, 0, consts);
+    }
+
+  switch (inlen)
+    {
+    case 0:
+      break;
+    case 1:
+      data = inbuf[0];
+      data ^= crc;
+      data = data & 0xffU;
+      crc = crc >> 8;
+      crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 2:
+      data = inbuf[0] << 0;
+      data |= inbuf[1] << 8;
+      data ^= crc;
+      data = _gcry_bswap32(data << 16);
+      crc = crc >> 16;
+      crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
+      break;
+    case 3:
+      data = inbuf[0] << 0;
+      data |= inbuf[1] << 8;
+      data |= inbuf[2] << 16;
+      data ^= crc;
+      data = _gcry_bswap32(data << 8);
+      crc = crc >> 24;
+      crc = crc32_ppc8_ce_reduction_4 (data, crc, consts);
+      break;
+    }
+
+  *pcrc = crc;
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+  const struct crc32_consts_s *consts = &crc32_consts;
+
+  if (!inlen)
+    return;
+
+  if (inlen >= 16)
+    crc32r_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
+  else
+    crc32r_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+void ASM_FUNC_ATTR
+_gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen)
+{
+  const struct crc32_consts_s *consts = &crc24rfc2440_consts;
+
+  if (!inlen)
+    return;
+
+  /* Note: *pcrc in input endian. */
+
+  if (inlen >= 16)
+    crc32_ppc8_ce_bulk (pcrc, inbuf, inlen, consts);
+  else
+    crc32_less_than_16 (pcrc, inbuf, inlen, consts);
+}
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/crc.c 
b/grub-core/lib/libgcrypt/cipher/crc.c
index 28454f8ab..b38869ecc 100644
--- a/grub-core/lib/libgcrypt/cipher/crc.c
+++ b/grub-core/lib/libgcrypt/cipher/crc.c
@@ -31,14 +31,79 @@
 #include "bufhelp.h"
 
 
+/* USE_INTEL_PCLMUL indicates whether to compile CRC with Intel PCLMUL/SSE4.1
+ * code.  */
+#undef USE_INTEL_PCLMUL
+#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT)
+# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+#  if __GNUC__ >= 4
+#   define USE_INTEL_PCLMUL 1
+#  endif
+# endif
+#endif /* USE_INTEL_PCLMUL */
+
+/* USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */
+#undef USE_ARM_PMULL
+#if defined(ENABLE_ARM_CRYPTO_SUPPORT)
+# if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+#  define USE_ARM_PMULL 1
+# endif
+#endif /* USE_ARM_PMULL */
+
+/* USE_PPC_VPMSUM indicates whether to enable PowerPC vector
+ * accelerated code. */
+#undef USE_PPC_VPMSUM
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+#  if __GNUC__ >= 4
+#   define USE_PPC_VPMSUM 1
+#  endif
+# endif
+#endif /* USE_PPC_VPMSUM */
+
+
 typedef struct
 {
   u32 CRC;
+#ifdef USE_INTEL_PCLMUL
+  unsigned int use_pclmul:1;           /* Intel PCLMUL shall be used.  */
+#endif
+#ifdef USE_ARM_PMULL
+  unsigned int use_pmull:1;            /* ARMv8 PMULL shall be used. */
+#endif
+#ifdef USE_PPC_VPMSUM
+  unsigned int use_vpmsum:1;           /* POWER vpmsum shall be used. */
+#endif
   byte buf[4];
 }
 CRC_CONTEXT;
 
 
+#ifdef USE_INTEL_PCLMUL
+/*-- crc-intel-pclmul.c --*/
+void _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf,
+                                     size_t inlen);
+#endif
+
+#ifdef USE_ARM_PMULL
+/*-- crc-armv8-ce.c --*/
+void _gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf,
+                                       size_t inlen);
+#endif
+
+#ifdef USE_PPC_VPMSUM
+/*-- crc-ppc.c --*/
+void _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen);
+void _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf,
+                                    size_t inlen);
+#endif
+
+
 /*
  * Code generated by universal_crc by Danjel McGougan
  *
@@ -335,9 +400,24 @@ crc32_next4 (u32 crc, u32 data)
 }
 
 static void
-crc32_init (void *context)
+crc32_init (void *context, unsigned int flags)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+  u32 hwf = _gcry_get_hw_features ();
+
+#ifdef USE_INTEL_PCLMUL
+  ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
+#ifdef USE_ARM_PMULL
+  ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
+#endif
+#ifdef USE_PPC_VPMSUM
+  ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
+
+  (void)flags;
+  (void)hwf;
+
   ctx->CRC = 0 ^ 0xffffffffL;
 }
 
@@ -348,6 +428,28 @@ crc32_write (void *context, const void *inbuf_arg, size_t 
inlen)
   const byte *inbuf = inbuf_arg;
   u32 crc;
 
+#ifdef USE_INTEL_PCLMUL
+  if (ctx->use_pclmul)
+    {
+      _gcry_crc32_intel_pclmul(&ctx->CRC, inbuf, inlen);
+      return;
+    }
+#endif
+#ifdef USE_ARM_PMULL
+  if (ctx->use_pmull)
+    {
+      _gcry_crc32_armv8_ce_pmull(&ctx->CRC, inbuf, inlen);
+      return;
+    }
+#endif
+#ifdef USE_PPC_VPMSUM
+  if (ctx->use_vpmsum)
+    {
+      _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
+      return;
+    }
+#endif
+
   if (!inbuf || !inlen)
     return;
 
@@ -397,9 +499,24 @@ crc32_final (void *context)
 /* CRC of the string "123456789" is 0x2dfd2d88 */
 
 static void
-crc32rfc1510_init (void *context)
+crc32rfc1510_init (void *context, unsigned int flags)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+  u32 hwf = _gcry_get_hw_features ();
+
+#ifdef USE_INTEL_PCLMUL
+  ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
+#ifdef USE_ARM_PMULL
+  ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
+#endif
+#ifdef USE_PPC_VPMSUM
+  ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
+
+  (void)flags;
+  (void)hwf;
+
   ctx->CRC = 0;
 }
 
@@ -688,7 +805,8 @@ static const u32 crc24_table[1024] =
 static inline
 u32 crc24_init (void)
 {
-  return 0xce04b7;
+  /* Transformed to 32-bit CRC by multiplied by x⁸ and then byte swapped. */
+  return 0xce04b7; /* _gcry_bswap(0xb704ce << 8) */
 }
 
 static inline
@@ -707,7 +825,7 @@ u32 crc24_next4 (u32 crc, u32 data)
   crc = crc24_table[(crc & 0xff) + 0x300] ^
         crc24_table[((crc >> 8) & 0xff) + 0x200] ^
         crc24_table[((crc >> 16) & 0xff) + 0x100] ^
-        crc24_table[(crc >> 24) & 0xff];
+        crc24_table[(data >> 24) & 0xff];
   return crc;
 }
 
@@ -718,9 +836,24 @@ u32 crc24_final (u32 crc)
 }
 
 static void
-crc24rfc2440_init (void *context)
+crc24rfc2440_init (void *context, unsigned int flags)
 {
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
+  u32 hwf = _gcry_get_hw_features ();
+
+#ifdef USE_INTEL_PCLMUL
+  ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL);
+#endif
+#ifdef USE_ARM_PMULL
+  ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL);
+#endif
+#ifdef USE_PPC_VPMSUM
+  ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07);
+#endif
+
+  (void)hwf;
+  (void)flags;
+
   ctx->CRC = crc24_init();
 }
 
@@ -731,6 +864,28 @@ crc24rfc2440_write (void *context, const void *inbuf_arg, 
size_t inlen)
   CRC_CONTEXT *ctx = (CRC_CONTEXT *) context;
   u32 crc;
 
+#ifdef USE_INTEL_PCLMUL
+  if (ctx->use_pclmul)
+    {
+      _gcry_crc24rfc2440_intel_pclmul(&ctx->CRC, inbuf, inlen);
+      return;
+    }
+#endif
+#ifdef USE_ARM_PMULL
+  if (ctx->use_pmull)
+    {
+      _gcry_crc24rfc2440_armv8_ce_pmull(&ctx->CRC, inbuf, inlen);
+      return;
+    }
+#endif
+#ifdef USE_PPC_VPMSUM
+  if (ctx->use_vpmsum)
+    {
+      _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen);
+      return;
+    }
+#endif
+
   if (!inbuf || !inlen)
     return;
 
@@ -769,25 +924,32 @@ crc24rfc2440_final (void *context)
   buf_put_le32 (ctx->buf, ctx->CRC);
 }
 
-gcry_md_spec_t _gcry_digest_spec_crc32 =
+/* We allow the CRC algorithms even in FIPS mode because they are
+   actually no cryptographic primitives.  */
+
+const gcry_md_spec_t _gcry_digest_spec_crc32 =
   {
+    GCRY_MD_CRC32, {0, 1},
     "CRC32", NULL, 0, NULL, 4,
-    crc32_init, crc32_write, crc32_final, crc32_read,
+    crc32_init, crc32_write, crc32_final, crc32_read, NULL,
+    NULL,
     sizeof (CRC_CONTEXT)
   };
 
-gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510 =
+const gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510 =
   {
+    GCRY_MD_CRC32_RFC1510, {0, 1},
     "CRC32RFC1510", NULL, 0, NULL, 4,
-    crc32rfc1510_init, crc32_write,
-    crc32rfc1510_final, crc32_read,
+    crc32rfc1510_init, crc32_write, crc32rfc1510_final, crc32_read, NULL,
+    NULL,
     sizeof (CRC_CONTEXT)
   };
 
-gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440 =
+const gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440 =
   {
+    GCRY_MD_CRC24_RFC2440, {0, 1},
     "CRC24RFC2440", NULL, 0, NULL, 3,
-    crc24rfc2440_init, crc24rfc2440_write,
-    crc24rfc2440_final, crc32_read,
+    crc24rfc2440_init, crc24rfc2440_write, crc24rfc2440_final, crc32_read, 
NULL,
+    NULL,
     sizeof (CRC_CONTEXT)
   };
diff --git a/grub-core/lib/libgcrypt/cipher/des-amd64.S 
b/grub-core/lib/libgcrypt/cipher/des-amd64.S
new file mode 100644
index 000000000..c1bf9f29e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/des-amd64.S
@@ -0,0 +1,1111 @@
+/* des-amd64.S  -  AMD64 assembly implementation of 3DES cipher
+ *
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+#define s1 0
+#define s2 ((s1) + (64*8))
+#define s3 ((s2) + (64*8))
+#define s4 ((s3) + (64*8))
+#define s5 ((s4) + (64*8))
+#define s6 ((s5) + (64*8))
+#define s7 ((s6) + (64*8))
+#define s8 ((s7) + (64*8))
+
+/* register macros */
+#define CTX %rdi
+#define SBOXES %rbp
+
+#define RL0 %r8
+#define RL1 %r9
+#define RL2 %r10
+
+#define RL0d %r8d
+#define RL1d %r9d
+#define RL2d %r10d
+
+#define RR0 %r11
+#define RR1 %r12
+#define RR2 %r13
+
+#define RR0d %r11d
+#define RR1d %r12d
+#define RR2d %r13d
+
+#define RW0 %rax
+#define RW1 %rbx
+#define RW2 %rcx
+
+#define RW0d %eax
+#define RW1d %ebx
+#define RW2d %ecx
+
+#define RW0bl %al
+#define RW1bl %bl
+#define RW2bl %cl
+
+#define RW0bh %ah
+#define RW1bh %bh
+#define RW2bh %ch
+
+#define RT0 %r15
+#define RT1 %rsi
+#define RT2 %r14
+#define RT3 %rdx
+
+#define RT0d %r15d
+#define RT1d %esi
+#define RT2d %r14d
+#define RT3d %edx
+
+/***********************************************************************
+ * 1-way 3DES
+ ***********************************************************************/
+#define do_permutation(a, b, offset, mask) \
+       movl a, RT0d; \
+       shrl $(offset), RT0d; \
+       xorl b, RT0d; \
+       andl $(mask), RT0d; \
+       xorl RT0d, b; \
+       shll $(offset), RT0d; \
+       xorl RT0d, a;
+
+#define expand_to_64bits(val, mask) \
+       movl val##d, RT0d; \
+       rorl $4, RT0d; \
+       shlq $32, RT0; \
+       orq RT0, val; \
+       andq mask, val;
+
+#define compress_to_64bits(val) \
+       movq val, RT0; \
+       shrq $32, RT0; \
+       roll $4, RT0d; \
+       orl RT0d, val##d;
+
+#define initial_permutation(left, right) \
+       do_permutation(left##d, right##d,  4, 0x0f0f0f0f); \
+       do_permutation(left##d, right##d, 16, 0x0000ffff); \
+       do_permutation(right##d, left##d,  2, 0x33333333); \
+       do_permutation(right##d, left##d,  8, 0x00ff00ff); \
+       movabs $0x3f3f3f3f3f3f3f3f, RT3; \
+       movl left##d, RW0d; \
+       roll $1, right##d; \
+       xorl right##d, RW0d; \
+       andl $0xaaaaaaaa, RW0d; \
+       xorl RW0d, left##d; \
+       xorl RW0d, right##d; \
+       roll $1, left##d; \
+       expand_to_64bits(right, RT3); \
+       expand_to_64bits(left, RT3);
+
+#define final_permutation(left, right) \
+       compress_to_64bits(right); \
+       compress_to_64bits(left); \
+       movl right##d, RW0d; \
+       rorl $1, left##d; \
+       xorl left##d, RW0d; \
+       andl $0xaaaaaaaa, RW0d; \
+       xorl RW0d, right##d; \
+       xorl RW0d, left##d; \
+       rorl $1, right##d; \
+       do_permutation(right##d, left##d,  8, 0x00ff00ff); \
+       do_permutation(right##d, left##d,  2, 0x33333333); \
+       do_permutation(left##d, right##d, 16, 0x0000ffff); \
+       do_permutation(left##d, right##d,  4, 0x0f0f0f0f);
+
+#define round1(n, from, to, load_next_key) \
+       xorq from, RW0; \
+       \
+       movzbl RW0bl, RT0d; \
+       movzbl RW0bh, RT1d; \
+       shrq $16, RW0; \
+       movzbl RW0bl, RT2d; \
+       movzbl RW0bh, RT3d; \
+       shrq $16, RW0; \
+       movq s8(SBOXES, RT0, 8), RT0; \
+       xorq s6(SBOXES, RT1, 8), to; \
+       movzbl RW0bl, RL1d; \
+       movzbl RW0bh, RT1d; \
+       shrl $16, RW0d; \
+       xorq s4(SBOXES, RT2, 8), RT0; \
+       xorq s2(SBOXES, RT3, 8), to; \
+       movzbl RW0bl, RT2d; \
+       movzbl RW0bh, RT3d; \
+       xorq s7(SBOXES, RL1, 8), RT0; \
+       xorq s5(SBOXES, RT1, 8), to; \
+       xorq s3(SBOXES, RT2, 8), RT0; \
+       load_next_key(n, RW0); \
+       xorq RT0, to; \
+       xorq s1(SBOXES, RT3, 8), to; \
+
+#define load_next_key(n, RWx) \
+       movq (((n) + 1) * 8)(CTX), RWx;
+
+#define dummy2(a, b) /*_*/
+
+#define read_block(io, left, right) \
+       movl    (io), left##d; \
+       movl   4(io), right##d; \
+       bswapl left##d; \
+       bswapl right##d;
+
+#define write_block(io, left, right) \
+       bswapl left##d; \
+       bswapl right##d; \
+       movl   left##d,   (io); \
+       movl   right##d, 4(io);
+
+.align 8
+.globl _gcry_3des_amd64_crypt_block
+ELF(.type  _gcry_3des_amd64_crypt_block,@function;)
+
+_gcry_3des_amd64_crypt_block:
+       /* input:
+        *      %rdi: round keys, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+       pushq %r14;
+       CFI_PUSH(%r14);
+       pushq %r15;
+       CFI_PUSH(%r15);
+       pushq %rsi; /*dst*/
+       CFI_PUSH(%rsi);
+
+       leaq .L_s1 rRIP, SBOXES;
+
+       read_block(%rdx, RL0, RR0);
+       initial_permutation(RL0, RR0);
+
+       movq (CTX), RW0;
+
+       round1(0, RR0, RL0, load_next_key);
+       round1(1, RL0, RR0, load_next_key);
+       round1(2, RR0, RL0, load_next_key);
+       round1(3, RL0, RR0, load_next_key);
+       round1(4, RR0, RL0, load_next_key);
+       round1(5, RL0, RR0, load_next_key);
+       round1(6, RR0, RL0, load_next_key);
+       round1(7, RL0, RR0, load_next_key);
+       round1(8, RR0, RL0, load_next_key);
+       round1(9, RL0, RR0, load_next_key);
+       round1(10, RR0, RL0, load_next_key);
+       round1(11, RL0, RR0, load_next_key);
+       round1(12, RR0, RL0, load_next_key);
+       round1(13, RL0, RR0, load_next_key);
+       round1(14, RR0, RL0, load_next_key);
+       round1(15, RL0, RR0, load_next_key);
+
+       round1(16+0, RL0, RR0, load_next_key);
+       round1(16+1, RR0, RL0, load_next_key);
+       round1(16+2, RL0, RR0, load_next_key);
+       round1(16+3, RR0, RL0, load_next_key);
+       round1(16+4, RL0, RR0, load_next_key);
+       round1(16+5, RR0, RL0, load_next_key);
+       round1(16+6, RL0, RR0, load_next_key);
+       round1(16+7, RR0, RL0, load_next_key);
+       round1(16+8, RL0, RR0, load_next_key);
+       round1(16+9, RR0, RL0, load_next_key);
+       round1(16+10, RL0, RR0, load_next_key);
+       round1(16+11, RR0, RL0, load_next_key);
+       round1(16+12, RL0, RR0, load_next_key);
+       round1(16+13, RR0, RL0, load_next_key);
+       round1(16+14, RL0, RR0, load_next_key);
+       round1(16+15, RR0, RL0, load_next_key);
+
+       round1(32+0, RR0, RL0, load_next_key);
+       round1(32+1, RL0, RR0, load_next_key);
+       round1(32+2, RR0, RL0, load_next_key);
+       round1(32+3, RL0, RR0, load_next_key);
+       round1(32+4, RR0, RL0, load_next_key);
+       round1(32+5, RL0, RR0, load_next_key);
+       round1(32+6, RR0, RL0, load_next_key);
+       round1(32+7, RL0, RR0, load_next_key);
+       round1(32+8, RR0, RL0, load_next_key);
+       round1(32+9, RL0, RR0, load_next_key);
+       round1(32+10, RR0, RL0, load_next_key);
+       round1(32+11, RL0, RR0, load_next_key);
+       round1(32+12, RR0, RL0, load_next_key);
+       round1(32+13, RL0, RR0, load_next_key);
+       round1(32+14, RR0, RL0, load_next_key);
+       round1(32+15, RL0, RR0, dummy2);
+
+       popq RW2; /*dst*/
+       CFI_POP_TMP_REG();
+       final_permutation(RR0, RL0);
+       write_block(RW2, RR0, RL0);
+
+       popq %r15;
+       CFI_POP(%r15);
+       popq %r14;
+       CFI_POP(%r14);
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;)
+
+/***********************************************************************
+ * 3-way 3DES
+ ***********************************************************************/
+#define expand_to_64bits(val, mask) \
+       movl val##d, RT0d; \
+       rorl $4, RT0d; \
+       shlq $32, RT0; \
+       orq RT0, val; \
+       andq mask, val;
+
+#define compress_to_64bits(val) \
+       movq val, RT0; \
+       shrq $32, RT0; \
+       roll $4, RT0d; \
+       orl RT0d, val##d;
+
+#define initial_permutation3(left, right) \
+       do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
+       do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
+         do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
+         do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
+           do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f); \
+           do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
+           \
+       do_permutation(right##0d, left##0d,  2, 0x33333333); \
+       do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
+         do_permutation(right##1d, left##1d,  2, 0x33333333); \
+         do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
+           do_permutation(right##2d, left##2d,  2, 0x33333333); \
+           do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
+           \
+       movabs $0x3f3f3f3f3f3f3f3f, RT3; \
+           \
+       movl left##0d, RW0d; \
+       roll $1, right##0d; \
+       xorl right##0d, RW0d; \
+       andl $0xaaaaaaaa, RW0d; \
+       xorl RW0d, left##0d; \
+       xorl RW0d, right##0d; \
+       roll $1, left##0d; \
+       expand_to_64bits(right##0, RT3); \
+       expand_to_64bits(left##0, RT3); \
+         movl left##1d, RW1d; \
+         roll $1, right##1d; \
+         xorl right##1d, RW1d; \
+         andl $0xaaaaaaaa, RW1d; \
+         xorl RW1d, left##1d; \
+         xorl RW1d, right##1d; \
+         roll $1, left##1d; \
+         expand_to_64bits(right##1, RT3); \
+         expand_to_64bits(left##1, RT3); \
+           movl left##2d, RW2d; \
+           roll $1, right##2d; \
+           xorl right##2d, RW2d; \
+           andl $0xaaaaaaaa, RW2d; \
+           xorl RW2d, left##2d; \
+           xorl RW2d, right##2d; \
+           roll $1, left##2d; \
+           expand_to_64bits(right##2, RT3); \
+           expand_to_64bits(left##2, RT3);
+
+#define final_permutation3(left, right) \
+       compress_to_64bits(right##0); \
+       compress_to_64bits(left##0); \
+       movl right##0d, RW0d; \
+       rorl $1, left##0d; \
+       xorl left##0d, RW0d; \
+       andl $0xaaaaaaaa, RW0d; \
+       xorl RW0d, right##0d; \
+       xorl RW0d, left##0d; \
+       rorl $1, right##0d; \
+         compress_to_64bits(right##1); \
+         compress_to_64bits(left##1); \
+         movl right##1d, RW1d; \
+         rorl $1, left##1d; \
+         xorl left##1d, RW1d; \
+         andl $0xaaaaaaaa, RW1d; \
+         xorl RW1d, right##1d; \
+         xorl RW1d, left##1d; \
+         rorl $1, right##1d; \
+           compress_to_64bits(right##2); \
+           compress_to_64bits(left##2); \
+           movl right##2d, RW2d; \
+           rorl $1, left##2d; \
+           xorl left##2d, RW2d; \
+           andl $0xaaaaaaaa, RW2d; \
+           xorl RW2d, right##2d; \
+           xorl RW2d, left##2d; \
+           rorl $1, right##2d; \
+           \
+       do_permutation(right##0d, left##0d,  8, 0x00ff00ff); \
+       do_permutation(right##0d, left##0d,  2, 0x33333333); \
+         do_permutation(right##1d, left##1d,  8, 0x00ff00ff); \
+         do_permutation(right##1d, left##1d,  2, 0x33333333); \
+           do_permutation(right##2d, left##2d,  8, 0x00ff00ff); \
+           do_permutation(right##2d, left##2d,  2, 0x33333333); \
+           \
+       do_permutation(left##0d, right##0d, 16, 0x0000ffff); \
+       do_permutation(left##0d, right##0d,  4, 0x0f0f0f0f); \
+         do_permutation(left##1d, right##1d, 16, 0x0000ffff); \
+         do_permutation(left##1d, right##1d,  4, 0x0f0f0f0f); \
+           do_permutation(left##2d, right##2d, 16, 0x0000ffff); \
+           do_permutation(left##2d, right##2d,  4, 0x0f0f0f0f);
+
+#define round3(n, from, to, load_next_key, do_movq) \
+       xorq from##0, RW0; \
+       movzbl RW0bl, RT3d; \
+       movzbl RW0bh, RT1d; \
+       shrq $16, RW0; \
+       xorq s8(SBOXES, RT3, 8), to##0; \
+       xorq s6(SBOXES, RT1, 8), to##0; \
+       movzbl RW0bl, RT3d; \
+       movzbl RW0bh, RT1d; \
+       shrq $16, RW0; \
+       xorq s4(SBOXES, RT3, 8), to##0; \
+       xorq s2(SBOXES, RT1, 8), to##0; \
+       movzbl RW0bl, RT3d; \
+       movzbl RW0bh, RT1d; \
+       shrl $16, RW0d; \
+       xorq s7(SBOXES, RT3, 8), to##0; \
+       xorq s5(SBOXES, RT1, 8), to##0; \
+       movzbl RW0bl, RT3d; \
+       movzbl RW0bh, RT1d; \
+       load_next_key(n, RW0); \
+       xorq s3(SBOXES, RT3, 8), to##0; \
+       xorq s1(SBOXES, RT1, 8), to##0; \
+               xorq from##1, RW1; \
+               movzbl RW1bl, RT3d; \
+               movzbl RW1bh, RT1d; \
+               shrq $16, RW1; \
+               xorq s8(SBOXES, RT3, 8), to##1; \
+               xorq s6(SBOXES, RT1, 8), to##1; \
+               movzbl RW1bl, RT3d; \
+               movzbl RW1bh, RT1d; \
+               shrq $16, RW1; \
+               xorq s4(SBOXES, RT3, 8), to##1; \
+               xorq s2(SBOXES, RT1, 8), to##1; \
+               movzbl RW1bl, RT3d; \
+               movzbl RW1bh, RT1d; \
+               shrl $16, RW1d; \
+               xorq s7(SBOXES, RT3, 8), to##1; \
+               xorq s5(SBOXES, RT1, 8), to##1; \
+               movzbl RW1bl, RT3d; \
+               movzbl RW1bh, RT1d; \
+               do_movq(RW0, RW1); \
+               xorq s3(SBOXES, RT3, 8), to##1; \
+               xorq s1(SBOXES, RT1, 8), to##1; \
+                       xorq from##2, RW2; \
+                       movzbl RW2bl, RT3d; \
+                       movzbl RW2bh, RT1d; \
+                       shrq $16, RW2; \
+                       xorq s8(SBOXES, RT3, 8), to##2; \
+                       xorq s6(SBOXES, RT1, 8), to##2; \
+                       movzbl RW2bl, RT3d; \
+                       movzbl RW2bh, RT1d; \
+                       shrq $16, RW2; \
+                       xorq s4(SBOXES, RT3, 8), to##2; \
+                       xorq s2(SBOXES, RT1, 8), to##2; \
+                       movzbl RW2bl, RT3d; \
+                       movzbl RW2bh, RT1d; \
+                       shrl $16, RW2d; \
+                       xorq s7(SBOXES, RT3, 8), to##2; \
+                       xorq s5(SBOXES, RT1, 8), to##2; \
+                       movzbl RW2bl, RT3d; \
+                       movzbl RW2bh, RT1d; \
+                       do_movq(RW0, RW2); \
+                       xorq s3(SBOXES, RT3, 8), to##2; \
+                       xorq s1(SBOXES, RT1, 8), to##2;
+
+#define __movq(src, dst) \
+       movq src, dst;
+
+#define read_block(io, left, right) \
+       movl    (io), left##d; \
+       movl   4(io), right##d; \
+       bswapl left##d; \
+       bswapl right##d;
+
+#define write_block(io, left, right) \
+       bswapl left##d; \
+       bswapl right##d; \
+       movl   left##d,   (io); \
+       movl   right##d, 4(io);
+
+.align 8
+ELF(.type  _gcry_3des_amd64_crypt_blk3,@function;)
+_gcry_3des_amd64_crypt_blk3:
+       /* input:
+        *  %rdi: round keys, CTX
+        *  RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks
+        *  RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks
+        */
+       CFI_STARTPROC();
+
+       leaq .L_s1 rRIP, SBOXES;
+
+       initial_permutation3(RL, RR);
+
+       movq 0(CTX), RW0;
+       movq RW0, RW1;
+       movq RW0, RW2;
+
+       round3(0, RR, RL, load_next_key, __movq);
+       round3(1, RL, RR, load_next_key, __movq);
+       round3(2, RR, RL, load_next_key, __movq);
+       round3(3, RL, RR, load_next_key, __movq);
+       round3(4, RR, RL, load_next_key, __movq);
+       round3(5, RL, RR, load_next_key, __movq);
+       round3(6, RR, RL, load_next_key, __movq);
+       round3(7, RL, RR, load_next_key, __movq);
+       round3(8, RR, RL, load_next_key, __movq);
+       round3(9, RL, RR, load_next_key, __movq);
+       round3(10, RR, RL, load_next_key, __movq);
+       round3(11, RL, RR, load_next_key, __movq);
+       round3(12, RR, RL, load_next_key, __movq);
+       round3(13, RL, RR, load_next_key, __movq);
+       round3(14, RR, RL, load_next_key, __movq);
+       round3(15, RL, RR, load_next_key, __movq);
+
+       round3(16+0, RL, RR, load_next_key, __movq);
+       round3(16+1, RR, RL, load_next_key, __movq);
+       round3(16+2, RL, RR, load_next_key, __movq);
+       round3(16+3, RR, RL, load_next_key, __movq);
+       round3(16+4, RL, RR, load_next_key, __movq);
+       round3(16+5, RR, RL, load_next_key, __movq);
+       round3(16+6, RL, RR, load_next_key, __movq);
+       round3(16+7, RR, RL, load_next_key, __movq);
+       round3(16+8, RL, RR, load_next_key, __movq);
+       round3(16+9, RR, RL, load_next_key, __movq);
+       round3(16+10, RL, RR, load_next_key, __movq);
+       round3(16+11, RR, RL, load_next_key, __movq);
+       round3(16+12, RL, RR, load_next_key, __movq);
+       round3(16+13, RR, RL, load_next_key, __movq);
+       round3(16+14, RL, RR, load_next_key, __movq);
+       round3(16+15, RR, RL, load_next_key, __movq);
+
+       round3(32+0, RR, RL, load_next_key, __movq);
+       round3(32+1, RL, RR, load_next_key, __movq);
+       round3(32+2, RR, RL, load_next_key, __movq);
+       round3(32+3, RL, RR, load_next_key, __movq);
+       round3(32+4, RR, RL, load_next_key, __movq);
+       round3(32+5, RL, RR, load_next_key, __movq);
+       round3(32+6, RR, RL, load_next_key, __movq);
+       round3(32+7, RL, RR, load_next_key, __movq);
+       round3(32+8, RR, RL, load_next_key, __movq);
+       round3(32+9, RL, RR, load_next_key, __movq);
+       round3(32+10, RR, RL, load_next_key, __movq);
+       round3(32+11, RL, RR, load_next_key, __movq);
+       round3(32+12, RR, RL, load_next_key, __movq);
+       round3(32+13, RL, RR, load_next_key, __movq);
+       round3(32+14, RR, RL, load_next_key, __movq);
+       round3(32+15, RL, RR, dummy2, dummy2);
+
+       final_permutation3(RR, RL);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;)
+
+.align 8
+.globl  _gcry_3des_amd64_cbc_dec
+ELF(.type   _gcry_3des_amd64_cbc_dec,@function;)
+_gcry_3des_amd64_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: iv (64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+       pushq %r14;
+       CFI_PUSH(%r14);
+       pushq %r15;
+       CFI_PUSH(%r15);
+
+       pushq %rsi; /*dst*/
+       CFI_PUSH(%rsi);
+       pushq %rdx; /*src*/
+       CFI_PUSH(%rdx);
+       pushq %rcx; /*iv*/
+       CFI_PUSH(%rcx);
+
+       /* load input */
+       movl 0 * 4(%rdx), RL0d;
+       movl 1 * 4(%rdx), RR0d;
+       movl 2 * 4(%rdx), RL1d;
+       movl 3 * 4(%rdx), RR1d;
+       movl 4 * 4(%rdx), RL2d;
+       movl 5 * 4(%rdx), RR2d;
+
+       bswapl RL0d;
+       bswapl RR0d;
+       bswapl RL1d;
+       bswapl RR1d;
+       bswapl RL2d;
+       bswapl RR2d;
+
+       call _gcry_3des_amd64_crypt_blk3;
+
+       popq %rcx; /*iv*/
+       CFI_POP_TMP_REG();
+       popq %rdx; /*src*/
+       CFI_POP_TMP_REG();
+       popq %rsi; /*dst*/
+       CFI_POP_TMP_REG();
+
+       bswapl RR0d;
+       bswapl RL0d;
+       bswapl RR1d;
+       bswapl RL1d;
+       bswapl RR2d;
+       bswapl RL2d;
+
+       movq 2 * 8(%rdx), RT0;
+       xorl 0 * 4(%rcx), RR0d;
+       xorl 1 * 4(%rcx), RL0d;
+       xorl 0 * 4(%rdx), RR1d;
+       xorl 1 * 4(%rdx), RL1d;
+       xorl 2 * 4(%rdx), RR2d;
+       xorl 3 * 4(%rdx), RL2d;
+       movq RT0, (%rcx); /* store new IV */
+
+       movl RR0d, 0 * 4(%rsi);
+       movl RL0d, 1 * 4(%rsi);
+       movl RR1d, 2 * 4(%rsi);
+       movl RL1d, 3 * 4(%rsi);
+       movl RR2d, 4 * 4(%rsi);
+       movl RL2d, 5 * 4(%rsi);
+
+       popq %r15;
+       CFI_POP(%r15);
+       popq %r14;
+       CFI_POP(%r14);
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
+
+.align 8
+.globl  _gcry_3des_amd64_ctr_enc
+ELF(.type   _gcry_3des_amd64_ctr_enc,@function;)
+_gcry_3des_amd64_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: iv (64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+       pushq %r14;
+       CFI_PUSH(%r14);
+       pushq %r15;
+       CFI_PUSH(%r15);
+
+       pushq %rsi; /*dst*/
+       CFI_PUSH(%rsi);
+       pushq %rdx; /*src*/
+       CFI_PUSH(%rdx);
+       movq %rcx, RW2;
+
+       /* load IV and byteswap */
+       movq (RW2), RT0;
+       bswapq RT0;
+       movq RT0, RR0;
+
+       /* construct IVs */
+       leaq 1(RT0), RR1;
+       leaq 2(RT0), RR2;
+       leaq 3(RT0), RT0;
+       movq RR0, RL0;
+       movq RR1, RL1;
+       movq RR2, RL2;
+       bswapq RT0;
+       shrq $32, RL0;
+       shrq $32, RL1;
+       shrq $32, RL2;
+
+       /* store new IV */
+       movq RT0, (RW2);
+
+       call _gcry_3des_amd64_crypt_blk3;
+
+       popq %rdx; /*src*/
+       CFI_POP_TMP_REG();
+       popq %rsi; /*dst*/
+       CFI_POP_TMP_REG();
+
+       bswapl RR0d;
+       bswapl RL0d;
+       bswapl RR1d;
+       bswapl RL1d;
+       bswapl RR2d;
+       bswapl RL2d;
+
+       xorl 0 * 4(%rdx), RR0d;
+       xorl 1 * 4(%rdx), RL0d;
+       xorl 2 * 4(%rdx), RR1d;
+       xorl 3 * 4(%rdx), RL1d;
+       xorl 4 * 4(%rdx), RR2d;
+       xorl 5 * 4(%rdx), RL2d;
+
+       movl RR0d, 0 * 4(%rsi);
+       movl RL0d, 1 * 4(%rsi);
+       movl RR1d, 2 * 4(%rsi);
+       movl RL1d, 3 * 4(%rsi);
+       movl RR2d, 4 * 4(%rsi);
+       movl RL2d, 5 * 4(%rsi);
+
+       popq %r15;
+       CFI_POP(%r15);
+       popq %r14;
+       CFI_POP(%r14);
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;)
+
+.align 8
+.globl  _gcry_3des_amd64_cfb_dec
+ELF(.type   _gcry_3des_amd64_cfb_dec,@function;)
+_gcry_3des_amd64_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: iv (64bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       pushq %rbx;
+       CFI_PUSH(%rbx);
+       pushq %r12;
+       CFI_PUSH(%r12);
+       pushq %r13;
+       CFI_PUSH(%r13);
+       pushq %r14;
+       CFI_PUSH(%r14);
+       pushq %r15;
+       CFI_PUSH(%r15);
+
+       pushq %rsi; /*dst*/
+       CFI_PUSH(%rsi);
+       pushq %rdx; /*src*/
+       CFI_PUSH(%rdx);
+       movq %rcx, RW2;
+
+       /* Load input */
+       movl 0 * 4(RW2), RL0d;
+       movl 1 * 4(RW2), RR0d;
+       movl 0 * 4(%rdx), RL1d;
+       movl 1 * 4(%rdx), RR1d;
+       movl 2 * 4(%rdx), RL2d;
+       movl 3 * 4(%rdx), RR2d;
+
+       bswapl RL0d;
+       bswapl RR0d;
+       bswapl RL1d;
+       bswapl RR1d;
+       bswapl RL2d;
+       bswapl RR2d;
+
+       /* Update IV */
+       movq 4 * 4(%rdx), RW0;
+       movq RW0, (RW2);
+
+       call _gcry_3des_amd64_crypt_blk3;
+
+       popq %rdx; /*src*/
+       CFI_POP_TMP_REG();
+       popq %rsi; /*dst*/
+       CFI_POP_TMP_REG();
+
+       bswapl RR0d;
+       bswapl RL0d;
+       bswapl RR1d;
+       bswapl RL1d;
+       bswapl RR2d;
+       bswapl RL2d;
+
+       xorl 0 * 4(%rdx), RR0d;
+       xorl 1 * 4(%rdx), RL0d;
+       xorl 2 * 4(%rdx), RR1d;
+       xorl 3 * 4(%rdx), RL1d;
+       xorl 4 * 4(%rdx), RR2d;
+       xorl 5 * 4(%rdx), RL2d;
+
+       movl RR0d, 0 * 4(%rsi);
+       movl RL0d, 1 * 4(%rsi);
+       movl RR1d, 2 * 4(%rsi);
+       movl RL1d, 3 * 4(%rsi);
+       movl RR2d, 4 * 4(%rsi);
+       movl RL2d, 5 * 4(%rsi);
+
+       popq %r15;
+       CFI_POP(%r15);
+       popq %r14;
+       CFI_POP(%r14);
+       popq %r13;
+       CFI_POP(%r13);
+       popq %r12;
+       CFI_POP(%r12);
+       popq %rbx;
+       CFI_POP(%rbx);
+       popq %rbp;
+       CFI_POP(%rbp);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;)
+
+.align 16
+.L_s1:
+       .quad 0x0010100001010400, 0x0000000000000000
+       .quad 0x0000100000010000, 0x0010100001010404
+       .quad 0x0010100001010004, 0x0000100000010404
+       .quad 0x0000000000000004, 0x0000100000010000
+       .quad 0x0000000000000400, 0x0010100001010400
+       .quad 0x0010100001010404, 0x0000000000000400
+       .quad 0x0010000001000404, 0x0010100001010004
+       .quad 0x0010000001000000, 0x0000000000000004
+       .quad 0x0000000000000404, 0x0010000001000400
+       .quad 0x0010000001000400, 0x0000100000010400
+       .quad 0x0000100000010400, 0x0010100001010000
+       .quad 0x0010100001010000, 0x0010000001000404
+       .quad 0x0000100000010004, 0x0010000001000004
+       .quad 0x0010000001000004, 0x0000100000010004
+       .quad 0x0000000000000000, 0x0000000000000404
+       .quad 0x0000100000010404, 0x0010000001000000
+       .quad 0x0000100000010000, 0x0010100001010404
+       .quad 0x0000000000000004, 0x0010100001010000
+       .quad 0x0010100001010400, 0x0010000001000000
+       .quad 0x0010000001000000, 0x0000000000000400
+       .quad 0x0010100001010004, 0x0000100000010000
+       .quad 0x0000100000010400, 0x0010000001000004
+       .quad 0x0000000000000400, 0x0000000000000004
+       .quad 0x0010000001000404, 0x0000100000010404
+       .quad 0x0010100001010404, 0x0000100000010004
+       .quad 0x0010100001010000, 0x0010000001000404
+       .quad 0x0010000001000004, 0x0000000000000404
+       .quad 0x0000100000010404, 0x0010100001010400
+       .quad 0x0000000000000404, 0x0010000001000400
+       .quad 0x0010000001000400, 0x0000000000000000
+       .quad 0x0000100000010004, 0x0000100000010400
+       .quad 0x0000000000000000, 0x0010100001010004
+.L_s2:
+       .quad 0x0801080200100020, 0x0800080000000000
+       .quad 0x0000080000000000, 0x0001080200100020
+       .quad 0x0001000000100000, 0x0000000200000020
+       .quad 0x0801000200100020, 0x0800080200000020
+       .quad 0x0800000200000020, 0x0801080200100020
+       .quad 0x0801080000100000, 0x0800000000000000
+       .quad 0x0800080000000000, 0x0001000000100000
+       .quad 0x0000000200000020, 0x0801000200100020
+       .quad 0x0001080000100000, 0x0001000200100020
+       .quad 0x0800080200000020, 0x0000000000000000
+       .quad 0x0800000000000000, 0x0000080000000000
+       .quad 0x0001080200100020, 0x0801000000100000
+       .quad 0x0001000200100020, 0x0800000200000020
+       .quad 0x0000000000000000, 0x0001080000100000
+       .quad 0x0000080200000020, 0x0801080000100000
+       .quad 0x0801000000100000, 0x0000080200000020
+       .quad 0x0000000000000000, 0x0001080200100020
+       .quad 0x0801000200100020, 0x0001000000100000
+       .quad 0x0800080200000020, 0x0801000000100000
+       .quad 0x0801080000100000, 0x0000080000000000
+       .quad 0x0801000000100000, 0x0800080000000000
+       .quad 0x0000000200000020, 0x0801080200100020
+       .quad 0x0001080200100020, 0x0000000200000020
+       .quad 0x0000080000000000, 0x0800000000000000
+       .quad 0x0000080200000020, 0x0801080000100000
+       .quad 0x0001000000100000, 0x0800000200000020
+       .quad 0x0001000200100020, 0x0800080200000020
+       .quad 0x0800000200000020, 0x0001000200100020
+       .quad 0x0001080000100000, 0x0000000000000000
+       .quad 0x0800080000000000, 0x0000080200000020
+       .quad 0x0800000000000000, 0x0801000200100020
+       .quad 0x0801080200100020, 0x0001080000100000
+.L_s3:
+       .quad 0x0000002000000208, 0x0000202008020200
+       .quad 0x0000000000000000, 0x0000200008020008
+       .quad 0x0000002008000200, 0x0000000000000000
+       .quad 0x0000202000020208, 0x0000002008000200
+       .quad 0x0000200000020008, 0x0000000008000008
+       .quad 0x0000000008000008, 0x0000200000020000
+       .quad 0x0000202008020208, 0x0000200000020008
+       .quad 0x0000200008020000, 0x0000002000000208
+       .quad 0x0000000008000000, 0x0000000000000008
+       .quad 0x0000202008020200, 0x0000002000000200
+       .quad 0x0000202000020200, 0x0000200008020000
+       .quad 0x0000200008020008, 0x0000202000020208
+       .quad 0x0000002008000208, 0x0000202000020200
+       .quad 0x0000200000020000, 0x0000002008000208
+       .quad 0x0000000000000008, 0x0000202008020208
+       .quad 0x0000002000000200, 0x0000000008000000
+       .quad 0x0000202008020200, 0x0000000008000000
+       .quad 0x0000200000020008, 0x0000002000000208
+       .quad 0x0000200000020000, 0x0000202008020200
+       .quad 0x0000002008000200, 0x0000000000000000
+       .quad 0x0000002000000200, 0x0000200000020008
+       .quad 0x0000202008020208, 0x0000002008000200
+       .quad 0x0000000008000008, 0x0000002000000200
+       .quad 0x0000000000000000, 0x0000200008020008
+       .quad 0x0000002008000208, 0x0000200000020000
+       .quad 0x0000000008000000, 0x0000202008020208
+       .quad 0x0000000000000008, 0x0000202000020208
+       .quad 0x0000202000020200, 0x0000000008000008
+       .quad 0x0000200008020000, 0x0000002008000208
+       .quad 0x0000002000000208, 0x0000200008020000
+       .quad 0x0000202000020208, 0x0000000000000008
+       .quad 0x0000200008020008, 0x0000202000020200
+.L_s4:
+       .quad 0x1008020000002001, 0x1000020800002001
+       .quad 0x1000020800002001, 0x0000000800000000
+       .quad 0x0008020800002000, 0x1008000800000001
+       .quad 0x1008000000000001, 0x1000020000002001
+       .quad 0x0000000000000000, 0x0008020000002000
+       .quad 0x0008020000002000, 0x1008020800002001
+       .quad 0x1000000800000001, 0x0000000000000000
+       .quad 0x0008000800000000, 0x1008000000000001
+       .quad 0x1000000000000001, 0x0000020000002000
+       .quad 0x0008000000000000, 0x1008020000002001
+       .quad 0x0000000800000000, 0x0008000000000000
+       .quad 0x1000020000002001, 0x0000020800002000
+       .quad 0x1008000800000001, 0x1000000000000001
+       .quad 0x0000020800002000, 0x0008000800000000
+       .quad 0x0000020000002000, 0x0008020800002000
+       .quad 0x1008020800002001, 0x1000000800000001
+       .quad 0x0008000800000000, 0x1008000000000001
+       .quad 0x0008020000002000, 0x1008020800002001
+       .quad 0x1000000800000001, 0x0000000000000000
+       .quad 0x0000000000000000, 0x0008020000002000
+       .quad 0x0000020800002000, 0x0008000800000000
+       .quad 0x1008000800000001, 0x1000000000000001
+       .quad 0x1008020000002001, 0x1000020800002001
+       .quad 0x1000020800002001, 0x0000000800000000
+       .quad 0x1008020800002001, 0x1000000800000001
+       .quad 0x1000000000000001, 0x0000020000002000
+       .quad 0x1008000000000001, 0x1000020000002001
+       .quad 0x0008020800002000, 0x1008000800000001
+       .quad 0x1000020000002001, 0x0000020800002000
+       .quad 0x0008000000000000, 0x1008020000002001
+       .quad 0x0000000800000000, 0x0008000000000000
+       .quad 0x0000020000002000, 0x0008020800002000
+.L_s5:
+       .quad 0x0000001000000100, 0x0020001002080100
+       .quad 0x0020000002080000, 0x0420001002000100
+       .quad 0x0000000000080000, 0x0000001000000100
+       .quad 0x0400000000000000, 0x0020000002080000
+       .quad 0x0400001000080100, 0x0000000000080000
+       .quad 0x0020001002000100, 0x0400001000080100
+       .quad 0x0420001002000100, 0x0420000002080000
+       .quad 0x0000001000080100, 0x0400000000000000
+       .quad 0x0020000002000000, 0x0400000000080000
+       .quad 0x0400000000080000, 0x0000000000000000
+       .quad 0x0400001000000100, 0x0420001002080100
+       .quad 0x0420001002080100, 0x0020001002000100
+       .quad 0x0420000002080000, 0x0400001000000100
+       .quad 0x0000000000000000, 0x0420000002000000
+       .quad 0x0020001002080100, 0x0020000002000000
+       .quad 0x0420000002000000, 0x0000001000080100
+       .quad 0x0000000000080000, 0x0420001002000100
+       .quad 0x0000001000000100, 0x0020000002000000
+       .quad 0x0400000000000000, 0x0020000002080000
+       .quad 0x0420001002000100, 0x0400001000080100
+       .quad 0x0020001002000100, 0x0400000000000000
+       .quad 0x0420000002080000, 0x0020001002080100
+       .quad 0x0400001000080100, 0x0000001000000100
+       .quad 0x0020000002000000, 0x0420000002080000
+       .quad 0x0420001002080100, 0x0000001000080100
+       .quad 0x0420000002000000, 0x0420001002080100
+       .quad 0x0020000002080000, 0x0000000000000000
+       .quad 0x0400000000080000, 0x0420000002000000
+       .quad 0x0000001000080100, 0x0020001002000100
+       .quad 0x0400001000000100, 0x0000000000080000
+       .quad 0x0000000000000000, 0x0400000000080000
+       .quad 0x0020001002080100, 0x0400001000000100
+.L_s6:
+       .quad 0x0200000120000010, 0x0204000020000000
+       .quad 0x0000040000000000, 0x0204040120000010
+       .quad 0x0204000020000000, 0x0000000100000010
+       .quad 0x0204040120000010, 0x0004000000000000
+       .quad 0x0200040020000000, 0x0004040100000010
+       .quad 0x0004000000000000, 0x0200000120000010
+       .quad 0x0004000100000010, 0x0200040020000000
+       .quad 0x0200000020000000, 0x0000040100000010
+       .quad 0x0000000000000000, 0x0004000100000010
+       .quad 0x0200040120000010, 0x0000040000000000
+       .quad 0x0004040000000000, 0x0200040120000010
+       .quad 0x0000000100000010, 0x0204000120000010
+       .quad 0x0204000120000010, 0x0000000000000000
+       .quad 0x0004040100000010, 0x0204040020000000
+       .quad 0x0000040100000010, 0x0004040000000000
+       .quad 0x0204040020000000, 0x0200000020000000
+       .quad 0x0200040020000000, 0x0000000100000010
+       .quad 0x0204000120000010, 0x0004040000000000
+       .quad 0x0204040120000010, 0x0004000000000000
+       .quad 0x0000040100000010, 0x0200000120000010
+       .quad 0x0004000000000000, 0x0200040020000000
+       .quad 0x0200000020000000, 0x0000040100000010
+       .quad 0x0200000120000010, 0x0204040120000010
+       .quad 0x0004040000000000, 0x0204000020000000
+       .quad 0x0004040100000010, 0x0204040020000000
+       .quad 0x0000000000000000, 0x0204000120000010
+       .quad 0x0000000100000010, 0x0000040000000000
+       .quad 0x0204000020000000, 0x0004040100000010
+       .quad 0x0000040000000000, 0x0004000100000010
+       .quad 0x0200040120000010, 0x0000000000000000
+       .quad 0x0204040020000000, 0x0200000020000000
+       .quad 0x0004000100000010, 0x0200040120000010
+.L_s7:
+       .quad 0x0002000000200000, 0x2002000004200002
+       .quad 0x2000000004000802, 0x0000000000000000
+       .quad 0x0000000000000800, 0x2000000004000802
+       .quad 0x2002000000200802, 0x0002000004200800
+       .quad 0x2002000004200802, 0x0002000000200000
+       .quad 0x0000000000000000, 0x2000000004000002
+       .quad 0x2000000000000002, 0x0000000004000000
+       .quad 0x2002000004200002, 0x2000000000000802
+       .quad 0x0000000004000800, 0x2002000000200802
+       .quad 0x2002000000200002, 0x0000000004000800
+       .quad 0x2000000004000002, 0x0002000004200000
+       .quad 0x0002000004200800, 0x2002000000200002
+       .quad 0x0002000004200000, 0x0000000000000800
+       .quad 0x2000000000000802, 0x2002000004200802
+       .quad 0x0002000000200800, 0x2000000000000002
+       .quad 0x0000000004000000, 0x0002000000200800
+       .quad 0x0000000004000000, 0x0002000000200800
+       .quad 0x0002000000200000, 0x2000000004000802
+       .quad 0x2000000004000802, 0x2002000004200002
+       .quad 0x2002000004200002, 0x2000000000000002
+       .quad 0x2002000000200002, 0x0000000004000000
+       .quad 0x0000000004000800, 0x0002000000200000
+       .quad 0x0002000004200800, 0x2000000000000802
+       .quad 0x2002000000200802, 0x0002000004200800
+       .quad 0x2000000000000802, 0x2000000004000002
+       .quad 0x2002000004200802, 0x0002000004200000
+       .quad 0x0002000000200800, 0x0000000000000000
+       .quad 0x2000000000000002, 0x2002000004200802
+       .quad 0x0000000000000000, 0x2002000000200802
+       .quad 0x0002000004200000, 0x0000000000000800
+       .quad 0x2000000004000002, 0x0000000004000800
+       .quad 0x0000000000000800, 0x2002000000200002
+.L_s8:
+       .quad 0x0100010410001000, 0x0000010000001000
+       .quad 0x0000000000040000, 0x0100010410041000
+       .quad 0x0100000010000000, 0x0100010410001000
+       .quad 0x0000000400000000, 0x0100000010000000
+       .quad 0x0000000400040000, 0x0100000010040000
+       .quad 0x0100010410041000, 0x0000010000041000
+       .quad 0x0100010010041000, 0x0000010400041000
+       .quad 0x0000010000001000, 0x0000000400000000
+       .quad 0x0100000010040000, 0x0100000410000000
+       .quad 0x0100010010001000, 0x0000010400001000
+       .quad 0x0000010000041000, 0x0000000400040000
+       .quad 0x0100000410040000, 0x0100010010041000
+       .quad 0x0000010400001000, 0x0000000000000000
+       .quad 0x0000000000000000, 0x0100000410040000
+       .quad 0x0100000410000000, 0x0100010010001000
+       .quad 0x0000010400041000, 0x0000000000040000
+       .quad 0x0000010400041000, 0x0000000000040000
+       .quad 0x0100010010041000, 0x0000010000001000
+       .quad 0x0000000400000000, 0x0100000410040000
+       .quad 0x0000010000001000, 0x0000010400041000
+       .quad 0x0100010010001000, 0x0000000400000000
+       .quad 0x0100000410000000, 0x0100000010040000
+       .quad 0x0100000410040000, 0x0100000010000000
+       .quad 0x0000000000040000, 0x0100010410001000
+       .quad 0x0000000000000000, 0x0100010410041000
+       .quad 0x0000000400040000, 0x0100000410000000
+       .quad 0x0100000010040000, 0x0100010010001000
+       .quad 0x0100010410001000, 0x0000000000000000
+       .quad 0x0100010410041000, 0x0000010000041000
+       .quad 0x0000010000041000, 0x0000010400001000
+       .quad 0x0000010400001000, 0x0000000400040000
+       .quad 0x0100000010000000, 0x0100010010041000
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/des.c 
b/grub-core/lib/libgcrypt/cipher/des.c
index 96b06ae36..51116fcfc 100644
--- a/grub-core/lib/libgcrypt/cipher/des.c
+++ b/grub-core/lib/libgcrypt/cipher/des.c
@@ -49,7 +49,7 @@
  * encrypt or decrypt data in 64bit blocks in Electronic Codebook Mode.
  *
  * (In the examples below the slashes at the beginning and ending of comments
- * are omited.)
+ * are omitted.)
  *
  * DES Example
  * -----------
@@ -68,7 +68,7 @@
  *     * Encrypt the plaintext *
  *     des_ecb_encrypt(context, plaintext, ciphertext);
  *
- *     * To recover the orginal plaintext from ciphertext use: *
+ *     * To recover the original plaintext from ciphertext use: *
  *     des_ecb_decrypt(context, ciphertext, recoverd);
  *
  *
@@ -118,17 +118,40 @@
 #include "types.h"             /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+
+#define DES_BLOCKSIZE 8
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* Helper macro to force alignment to 16 bytes.  */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
+#else
+# define ATTR_ALIGNED_16
+#endif
 
 #if defined(__GNUC__) && defined(__GNU_LIBRARY__)
-#define working_memcmp memcmp
+# define working_memcmp memcmp
 #else
 /*
  * According to the SunOS man page, memcmp returns indeterminate sign
  * depending on whether characters are signed or not.
  */
 static int
-working_memcmp( const char *a, const char *b, size_t n )
+working_memcmp( const void *_a, const void *_b, size_t n )
 {
+    const char *a = _a;
+    const char *b = _b;
     for( ; n; n--, a++, b++ )
        if( *a != *b )
            return (int)(*(byte*)a) - (int)(*(byte*)b);
@@ -170,6 +193,13 @@ static int tripledes_ecb_crypt (struct _tripledes_ctx *,
                                 const byte *, byte *, int);
 static int is_weak_key ( const byte *key );
 static const char *selftest (void);
+static unsigned int do_tripledes_encrypt(void *context, byte *outbuf,
+                                        const byte *inbuf );
+static unsigned int do_tripledes_decrypt(void *context, byte *outbuf,
+                                        const byte *inbuf );
+static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key,
+                                           unsigned keylen,
+                                           cipher_bulk_ops_t *bulk_ops);
 
 static int initialized;
 
@@ -455,14 +485,12 @@ static unsigned char weak_keys_chksum[20] = {
  * Macros to convert 8 bytes from/to 32bit words.
  */
 #define READ_64BIT_DATA(data, left, right)                                \
-    left  = (data[0] << 24) | (data[1] << 16) | (data[2] << 8) | data[3];  \
-    right = (data[4] << 24) | (data[5] << 16) | (data[6] << 8) | data[7];
+    left = buf_get_be32(data + 0);                                        \
+    right = buf_get_be32(data + 4);
 
 #define WRITE_64BIT_DATA(data, left, right)                               \
-    data[0] = (left >> 24) &0xff; data[1] = (left >> 16) &0xff;           \
-    data[2] = (left >> 8) &0xff; data[3] = left &0xff;                    \
-    data[4] = (right >> 24) &0xff; data[5] = (right >> 16) &0xff;         \
-    data[6] = (right >> 8) &0xff; data[7] = right &0xff;
+    buf_put_be32(data + 0, left);                                         \
+    buf_put_be32(data + 4, right);
 
 /*
  * Handy macros for encryption and decryption of data
@@ -728,6 +756,65 @@ tripledes_set3keys (struct _tripledes_ctx *ctx,
 
 
 
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementation of triple-DES. */
+extern void _gcry_3des_amd64_crypt_block(const void *keys, byte *out,
+                                         const byte *in);
+
+/* These assembly implementations process three blocks in parallel. */
+extern void _gcry_3des_amd64_ctr_enc(const void *keys, byte *out,
+                                     const byte *in, byte *ctr);
+
+extern void _gcry_3des_amd64_cbc_dec(const void *keys, byte *out,
+                                     const byte *in, byte *iv);
+
+extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out,
+                                     const byte *in, byte *iv);
+
+#define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *))
+
+
+/*
+ * Electronic Codebook Mode Triple-DES encryption/decryption of data
+ * according to 'mode'.  Sometimes this mode is named 'EDE' mode
+ * (Encryption-Decryption-Encryption).
+ */
+static inline int
+tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from,
+                     byte * to, int mode)
+{
+  u32 *keys;
+
+  keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys;
+
+  _gcry_3des_amd64_crypt_block(keys, to, from);
+
+  return 0;
+}
+
+static inline void
+tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr)
+{
+  _gcry_3des_amd64_ctr_enc(keys, out, in, ctr);
+}
+
+static inline void
+tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+  _gcry_3des_amd64_cbc_dec(keys, out, in, iv);
+}
+
+static inline void
+tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv)
+{
+  _gcry_3des_amd64_cfb_dec(keys, out, in, iv);
+}
+
+#else /*USE_AMD64_ASM*/
+
+#define TRIPLEDES_ECB_BURN_STACK 32
+
 /*
  * Electronic Codebook Mode Triple-DES encryption/decryption of data
  * according to 'mode'.  Sometimes this mode is named 'EDE' mode
@@ -778,8 +865,152 @@ tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const 
byte * from,
   return 0;
 }
 
+#endif /*!USE_AMD64_ASM*/
+
+
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size DES_BLOCKSIZE. */
+static void
+_gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+                   const void *inbuf_arg, size_t nblocks)
+{
+  struct _tripledes_ctx *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[DES_BLOCKSIZE];
+  int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
 
+#ifdef USE_AMD64_ASM
+  {
+    int asm_burn_depth = 9 * sizeof(void *);
 
+    if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+      burn_stack_depth = asm_burn_depth;
+
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+        tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr);
+
+        nblocks -= 3;
+        outbuf += 3 * DES_BLOCKSIZE;
+        inbuf  += 3 * DES_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* Encrypt the counter. */
+      tripledes_ecb_encrypt (ctx, ctr, tmpbuf);
+      /* XOR the input with the encrypted counter and store in output.  */
+      cipher_block_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE);
+      outbuf += DES_BLOCKSIZE;
+      inbuf  += DES_BLOCKSIZE;
+      /* Increment the counter.  */
+      cipher_block_add(ctr, 1, DES_BLOCKSIZE);
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                   const void *inbuf_arg, size_t nblocks)
+{
+  struct _tripledes_ctx *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char savebuf[DES_BLOCKSIZE];
+  int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+
+#ifdef USE_AMD64_ASM
+  {
+    int asm_burn_depth = 10 * sizeof(void *);
+
+    if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+      burn_stack_depth = asm_burn_depth;
+
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+        tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv);
+
+        nblocks -= 3;
+        outbuf += 3 * DES_BLOCKSIZE;
+        inbuf  += 3 * DES_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      tripledes_ecb_decrypt (ctx, inbuf, savebuf);
+
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE);
+      inbuf += DES_BLOCKSIZE;
+      outbuf += DES_BLOCKSIZE;
+    }
+
+  wipememory(savebuf, sizeof(savebuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                  const void *inbuf_arg, size_t nblocks)
+{
+  struct _tripledes_ctx *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK;
+
+#ifdef USE_AMD64_ASM
+  {
+    int asm_burn_depth = 9 * sizeof(void *);
+
+    if (nblocks >= 3 && burn_stack_depth < asm_burn_depth)
+      burn_stack_depth = asm_burn_depth;
+
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+        tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv);
+
+        nblocks -= 3;
+        outbuf += 3 * DES_BLOCKSIZE;
+        inbuf  += 3 * DES_BLOCKSIZE;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      tripledes_ecb_encrypt (ctx, iv, iv);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE);
+      outbuf += DES_BLOCKSIZE;
+      inbuf  += DES_BLOCKSIZE;
+    }
+
+  _gcry_burn_stack(burn_stack_depth);
+}
 
 
 /*
@@ -816,6 +1047,65 @@ is_weak_key ( const byte *key )
 }
 
 
+/* Alternative setkey for selftests; need larger key than default. */
+static gcry_err_code_t
+bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen,
+                      cipher_bulk_ops_t *bulk_ops)
+{
+  static const unsigned char key[24] ATTR_ALIGNED_16 = {
+      0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F,
+      0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22,
+      0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82
+    };
+
+  (void)__key;
+  (void)__keylen;
+
+  return do_tripledes_setkey(context, key, sizeof(key), bulk_ops);
+}
+
+
+/* Run the self-tests for DES-CTR, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+  const int nblocks = 3+1;
+  const int blocksize = DES_BLOCKSIZE;
+  const int context_size = sizeof(struct _tripledes_ctx);
+
+  return _gcry_selftest_helper_ctr("3DES", &bulk_selftest_setkey,
+           &do_tripledes_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for DES-CBC, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+  const int nblocks = 3+2;
+  const int blocksize = DES_BLOCKSIZE;
+  const int context_size = sizeof(struct _tripledes_ctx);
+
+  return _gcry_selftest_helper_cbc("3DES", &bulk_selftest_setkey,
+           &do_tripledes_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for DES-CFB, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+  const int nblocks = 3+2;
+  const int blocksize = DES_BLOCKSIZE;
+  const int context_size = sizeof(struct _tripledes_ctx);
+
+  return _gcry_selftest_helper_cfb("3DES", &bulk_selftest_setkey,
+           &do_tripledes_encrypt, nblocks, blocksize, context_size);
+}
+
 
 /*
  * Performs a selftest of this DES/Triple-DES implementation.
@@ -825,6 +1115,8 @@ is_weak_key ( const byte *key )
 static const char *
 selftest (void)
 {
+  const char *r;
+
   /*
    * Check if 'u32' is really 32 bits wide. This DES / 3DES implementation
    * need this.
@@ -894,7 +1186,8 @@ selftest (void)
    * thanks to Jeroen C. van Gelderen.
    */
   {
-    struct { byte key[24]; byte plain[8]; byte cipher[8]; } testdata[] = {
+    static const struct { byte key[24]; byte plain[8]; byte cipher[8]; }
+      testdata[] = {
       { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
           0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01  },
@@ -1003,18 +1296,34 @@ selftest (void)
         return "DES weak key detection failed";
   }
 
+  if ( (r = selftest_cbc ()) )
+    return r;
+
+  if ( (r = selftest_cfb ()) )
+    return r;
+
+  if ( (r = selftest_ctr ()) )
+    return r;
+
   return 0;
 }
 
 
 static gcry_err_code_t
-do_tripledes_setkey ( void *context, const byte *key, unsigned keylen )
+do_tripledes_setkey ( void *context, const byte *key, unsigned keylen,
+                      cipher_bulk_ops_t *bulk_ops )
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
 
   if( keylen != 24 )
     return GPG_ERR_INV_KEYLEN;
 
+  /* Setup bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cbc_dec =  _gcry_3des_cbc_dec;
+  bulk_ops->cfb_dec =  _gcry_3des_cfb_dec;
+  bulk_ops->ctr_enc =  _gcry_3des_ctr_enc;
+
   tripledes_set3keys ( ctx, key, key+8, key+16);
 
   if (ctx->flags.no_weak_key)
@@ -1054,28 +1363,31 @@ do_tripledes_set_extra_info (void *context, int what,
 }
 
 
-static void
+static unsigned int
 do_tripledes_encrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
 
   tripledes_ecb_encrypt ( ctx, inbuf, outbuf );
-  _gcry_burn_stack (32);
+  return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
 }
 
-static void
+static unsigned int
 do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context;
   tripledes_ecb_decrypt ( ctx, inbuf, outbuf );
-  _gcry_burn_stack (32);
+  return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK;
 }
 
 static gcry_err_code_t
-do_des_setkey (void *context, const byte *key, unsigned keylen)
+do_des_setkey (void *context, const byte *key, unsigned keylen,
+               cipher_bulk_ops_t *bulk_ops)
 {
   struct _des_ctx *ctx = (struct _des_ctx *) context;
 
+  (void)bulk_ops;
+
   if (keylen != 8)
     return GPG_ERR_INV_KEYLEN;
 
@@ -1091,22 +1403,22 @@ do_des_setkey (void *context, const byte *key, unsigned 
keylen)
 }
 
 
-static void
+static unsigned int
 do_des_encrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _des_ctx *ctx = (struct _des_ctx *) context;
 
   des_ecb_encrypt ( ctx, inbuf, outbuf );
-  _gcry_burn_stack (32);
+  return /*burn_stack*/ (32);
 }
 
-static void
+static unsigned int
 do_des_decrypt( void *context, byte *outbuf, const byte *inbuf )
 {
   struct _des_ctx *ctx = (struct _des_ctx *) context;
 
   des_ecb_decrypt ( ctx, inbuf, outbuf );
-  _gcry_burn_stack (32);
+  return /*burn_stack*/ (32);
 }
 
 
@@ -1169,11 +1481,12 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
 
 gcry_cipher_spec_t _gcry_cipher_spec_des =
   {
+    GCRY_CIPHER_DES, {0, 0},
     "DES", NULL, NULL, 8, 64, sizeof (struct _des_ctx),
     do_des_setkey, do_des_encrypt, do_des_decrypt
   };
 
-static gcry_cipher_oid_spec_t oids_tripledes[] =
+static const gcry_cipher_oid_spec_t oids_tripledes[] =
   {
     { "1.2.840.113549.3.7", GCRY_CIPHER_MODE_CBC },
     /* Teletrust specific OID for 3DES. */
@@ -1185,12 +1498,10 @@ static gcry_cipher_oid_spec_t oids_tripledes[] =
 
 gcry_cipher_spec_t _gcry_cipher_spec_tripledes =
   {
+    GCRY_CIPHER_3DES, {0, 0},
     "3DES", NULL, oids_tripledes, 8, 192, sizeof (struct _tripledes_ctx),
-    do_tripledes_setkey, do_tripledes_encrypt, do_tripledes_decrypt
-  };
-
-cipher_extra_spec_t _gcry_cipher_extraspec_tripledes =
-  {
+    do_tripledes_setkey, do_tripledes_encrypt, do_tripledes_decrypt,
+    NULL, NULL,
     run_selftests,
     do_tripledes_set_extra_info
   };
diff --git a/grub-core/lib/libgcrypt/cipher/dsa-common.c 
b/grub-core/lib/libgcrypt/cipher/dsa-common.c
new file mode 100644
index 000000000..7000903a4
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/dsa-common.c
@@ -0,0 +1,473 @@
+/* dsa-common.c - Common code for DSA
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ * Copyright (C) 2013  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+/*
+ * Modify K, so that computation time difference can be small,
+ * by making K large enough.
+ *
+ * Originally, (EC)DSA computation requires k where 0 < k < q.  Here,
+ * we add q (the order), to keep k in a range: q < k < 2*q (or,
+ * addming more q, to keep k in a range: 2*q < k < 3*q), so that
+ * timing difference of the EC multiply (or exponentiation) operation
+ * can be small.  The result of (EC)DSA computation is same.
+ */
+void
+_gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits)
+{
+  gcry_mpi_t k1 = mpi_new (qbits+2);
+
+  mpi_resize (k, (qbits+2+BITS_PER_MPI_LIMB-1) / BITS_PER_MPI_LIMB);
+  k->nlimbs = k->alloced;
+  mpi_add (k, k, q);
+  mpi_add (k1, k, q);
+  mpi_set_cond (k, k1, !mpi_test_bit (k, qbits));
+
+  mpi_free (k1);
+}
+
+/*
+ * Generate a random secret exponent K less than Q.
+ * Note that ECDSA uses this code also to generate D.
+ */
+gcry_mpi_t
+_gcry_dsa_gen_k (gcry_mpi_t q, int security_level)
+{
+  gcry_mpi_t k        = mpi_alloc_secure (mpi_get_nlimbs (q));
+  unsigned int nbits  = mpi_get_nbits (q);
+  unsigned int nbytes = (nbits+7)/8;
+  char *rndbuf = NULL;
+
+  /* To learn why we don't use mpi_mod to get the requested bit size,
+     read the paper: "The Insecurity of the Digital Signature
+     Algorithm with Partially Known Nonces" by Nguyen and Shparlinski.
+     Journal of Cryptology, New York. Vol 15, nr 3 (2003)  */
+
+  if (DBG_CIPHER)
+    log_debug ("choosing a random k of %u bits at seclevel %d\n",
+               nbits, security_level);
+  for (;;)
+    {
+      if ( !rndbuf || nbits < 32 )
+        {
+          xfree (rndbuf);
+          rndbuf = _gcry_random_bytes_secure (nbytes, security_level);
+       }
+      else
+        { /* Change only some of the higher bits.  We could improve
+            this by directly requesting more memory at the first call
+            to get_random_bytes() and use these extra bytes here.
+            However the required management code is more complex and
+            thus we better use this simple method.  */
+          char *pp = _gcry_random_bytes_secure (4, security_level);
+          memcpy (rndbuf, pp, 4);
+          xfree (pp);
+       }
+      _gcry_mpi_set_buffer (k, rndbuf, nbytes, 0);
+
+      /* Make sure we have the requested number of bits.  This code
+         looks a bit funny but it is easy to understand if you
+         consider that mpi_set_highbit clears all higher bits.  We
+         don't have a clear_highbit, thus we first set the high bit
+         and then clear it again.  */
+      if (mpi_test_bit (k, nbits-1))
+        mpi_set_highbit (k, nbits-1);
+      else
+        {
+          mpi_set_highbit (k, nbits-1);
+          mpi_clear_bit (k, nbits-1);
+       }
+
+      if (!(mpi_cmp (k, q) < 0))    /* check: k < q */
+        {
+          if (DBG_CIPHER)
+            log_debug ("\tk too large - again\n");
+          continue; /* no  */
+        }
+      if (!(mpi_cmp_ui (k, 0) > 0)) /* check: k > 0 */
+        {
+          if (DBG_CIPHER)
+            log_debug ("\tk is zero - again\n");
+          continue; /* no */
+        }
+      break;   /* okay */
+    }
+  xfree (rndbuf);
+
+  return k;
+}
+
+
+/* Turn VALUE into an octet string and store it in an allocated buffer
+   at R_FRAME.  If the resulting octet string is shorter than NBYTES
+   the result will be left padded with zeroes.  If VALUE does not fit
+   into NBYTES an error code is returned.  */
+static gpg_err_code_t
+int2octets (unsigned char **r_frame, gcry_mpi_t value, size_t nbytes)
+{
+  gpg_err_code_t rc;
+  size_t nframe, noff, n;
+  unsigned char *frame;
+
+  rc = _gcry_mpi_print (GCRYMPI_FMT_USG, NULL, 0, &nframe, value);
+  if (rc)
+    return rc;
+  if (nframe > nbytes)
+    return GPG_ERR_TOO_LARGE; /* Value too long to fit into NBYTES.  */
+
+  noff = (nframe < nbytes)? nbytes - nframe : 0;
+  n = nframe + noff;
+  frame = mpi_is_secure (value)? xtrymalloc_secure (n) : xtrymalloc (n);
+  if (!frame)
+    return gpg_err_code_from_syserror ();
+  if (noff)
+    memset (frame, 0, noff);
+  nframe += noff;
+  rc = _gcry_mpi_print (GCRYMPI_FMT_USG, frame+noff, nframe-noff, NULL, value);
+  if (rc)
+    {
+      xfree (frame);
+      return rc;
+    }
+
+  *r_frame = frame;
+  return 0;
+}
+
+
+/* Connert the bit string BITS of length NBITS into an octet string
+   with a length of (QBITS+7)/8 bytes.  On success store the result at
+   R_FRAME.  */
+static gpg_err_code_t
+bits2octets (unsigned char **r_frame,
+             const void *bits, unsigned int nbits,
+             gcry_mpi_t q, unsigned int qbits)
+{
+  gpg_err_code_t rc;
+  gcry_mpi_t z1;
+
+  /* z1 = bits2int (b) */
+  rc = _gcry_mpi_scan (&z1, GCRYMPI_FMT_USG, bits, (nbits+7)/8, NULL);
+  if (rc)
+    return rc;
+  if (nbits > qbits)
+    mpi_rshift (z1, z1, nbits - qbits);
+
+  /* z2 - z1 mod q */
+  if (mpi_cmp (z1, q) >= 0)
+    mpi_sub (z1, z1, q);
+
+  /* Convert to an octet string.  */
+  rc = int2octets (r_frame, z1, (qbits+7)/8);
+
+  mpi_free (z1);
+  return rc;
+}
+
+
+/*
+ * Generate a deterministic secret exponent K less than DSA_Q.  H1 is
+ * the to be signed digest with a length of HLEN bytes.  HALGO is the
+ * algorithm used to create the hash.  On success the value for K is
+ * stored at R_K.
+ */
+gpg_err_code_t
+_gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k,
+                         gcry_mpi_t dsa_q, gcry_mpi_t dsa_x,
+                         const unsigned char *h1, unsigned int hlen,
+                         int halgo, unsigned int extraloops)
+{
+  gpg_err_code_t rc;
+  unsigned char *V = NULL;
+  unsigned char *K = NULL;
+  unsigned char *x_buf = NULL;
+  unsigned char *h1_buf = NULL;
+  gcry_md_hd_t hd = NULL;
+  unsigned char *t = NULL;
+  gcry_mpi_t k = NULL;
+  unsigned int tbits, qbits;
+  int i;
+
+  qbits = mpi_get_nbits (dsa_q);
+
+  if (!qbits || !h1 || !hlen)
+    return GPG_ERR_EINVAL;
+
+  if (_gcry_md_get_algo_dlen (halgo) != hlen)
+    return GPG_ERR_DIGEST_ALGO;
+
+  /* Step b:  V = 0x01 0x01 0x01 ... 0x01 */
+  V = xtrymalloc (hlen);
+  if (!V)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+  for (i=0; i < hlen; i++)
+    V[i] = 1;
+
+  /* Step c:  K = 0x00 0x00 0x00 ... 0x00 */
+  K = xtrycalloc (1, hlen);
+  if (!K)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+
+  rc = int2octets (&x_buf, dsa_x, (qbits+7)/8);
+  if (rc)
+    goto leave;
+
+  rc = bits2octets (&h1_buf, h1, hlen*8, dsa_q, qbits);
+  if (rc)
+    goto leave;
+
+  /* Create a handle to compute the HMACs.  */
+  rc = _gcry_md_open (&hd, halgo, (GCRY_MD_FLAG_SECURE | GCRY_MD_FLAG_HMAC));
+  if (rc)
+    goto leave;
+
+  /* Step d:  K = HMAC_K(V || 0x00 || int2octets(x) || bits2octets(h1) */
+  rc = _gcry_md_setkey (hd, K, hlen);
+  if (rc)
+    goto leave;
+  _gcry_md_write (hd, V, hlen);
+  _gcry_md_write (hd, "", 1);
+  _gcry_md_write (hd, x_buf, (qbits+7)/8);
+  _gcry_md_write (hd, h1_buf, (qbits+7)/8);
+  memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+  /* Step e:  V = HMAC_K(V) */
+  rc = _gcry_md_setkey (hd, K, hlen);
+  if (rc)
+    goto leave;
+  _gcry_md_write (hd, V, hlen);
+  memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+  /* Step f:  K = HMAC_K(V || 0x01 || int2octets(x) || bits2octets(h1) */
+  rc = _gcry_md_setkey (hd, K, hlen);
+  if (rc)
+    goto leave;
+  _gcry_md_write (hd, V, hlen);
+  _gcry_md_write (hd, "\x01", 1);
+  _gcry_md_write (hd, x_buf, (qbits+7)/8);
+  _gcry_md_write (hd, h1_buf, (qbits+7)/8);
+  memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+  /* Step g:  V = HMAC_K(V) */
+  rc = _gcry_md_setkey (hd, K, hlen);
+  if (rc)
+    goto leave;
+  _gcry_md_write (hd, V, hlen);
+  memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+  /* Step h. */
+  t = xtrymalloc_secure ((qbits+7)/8+hlen);
+  if (!t)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+
+ again:
+  for (tbits = 0; tbits < qbits;)
+    {
+      /* V = HMAC_K(V) */
+      rc = _gcry_md_setkey (hd, K, hlen);
+      if (rc)
+        goto leave;
+      _gcry_md_write (hd, V, hlen);
+      memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+      /* T = T || V */
+      memcpy (t+(tbits+7)/8, V, hlen);
+      tbits += 8*hlen;
+    }
+
+  /* k = bits2int (T) */
+  mpi_free (k);
+  k = NULL;
+  rc = _gcry_mpi_scan (&k, GCRYMPI_FMT_USG, t, (tbits+7)/8, NULL);
+  if (rc)
+    goto leave;
+  if (tbits > qbits)
+    mpi_rshift (k, k, tbits - qbits);
+
+  /* Check: k < q and k > 1 */
+  if (!(mpi_cmp (k, dsa_q) < 0 && mpi_cmp_ui (k, 0) > 0))
+    {
+      /* K = HMAC_K(V || 0x00) */
+      rc = _gcry_md_setkey (hd, K, hlen);
+      if (rc)
+        goto leave;
+      _gcry_md_write (hd, V, hlen);
+      _gcry_md_write (hd, "", 1);
+      memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+      /* V = HMAC_K(V) */
+      rc = _gcry_md_setkey (hd, K, hlen);
+      if (rc)
+        goto leave;
+      _gcry_md_write (hd, V, hlen);
+      memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+      goto again;
+    }
+
+  /* The caller may have requested that we introduce some extra loops.
+     This is for example useful if the caller wants another value for
+     K because the last returned one yielded an R of 0.  Because this
+     is very unlikely we implement it in a straightforward way.  */
+  if (extraloops)
+    {
+      extraloops--;
+
+      /* K = HMAC_K(V || 0x00) */
+      rc = _gcry_md_setkey (hd, K, hlen);
+      if (rc)
+        goto leave;
+      _gcry_md_write (hd, V, hlen);
+      _gcry_md_write (hd, "", 1);
+      memcpy (K, _gcry_md_read (hd, 0), hlen);
+
+      /* V = HMAC_K(V) */
+      rc = _gcry_md_setkey (hd, K, hlen);
+      if (rc)
+        goto leave;
+      _gcry_md_write (hd, V, hlen);
+      memcpy (V, _gcry_md_read (hd, 0), hlen);
+
+      goto again;
+    }
+
+  /* log_mpidump ("  k", k); */
+
+ leave:
+  xfree (t);
+  _gcry_md_close (hd);
+  xfree (h1_buf);
+  xfree (x_buf);
+  xfree (K);
+  xfree (V);
+
+  if (rc)
+    mpi_free (k);
+  else
+    *r_k = k;
+  return rc;
+}
+
+
+
+/*
+ * For DSA/ECDSA, as prehash function, compute hash with HASHALGO for
+ * INPUT.  Result hash value is returned in R_HASH as an opaque MPI.
+ * Returns error code.
+ */
+gpg_err_code_t
+_gcry_dsa_compute_hash (gcry_mpi_t *r_hash, gcry_mpi_t input, int hashalgo)
+{
+  gpg_err_code_t rc = 0;
+  size_t hlen;
+  void *hashbuf;
+  void *abuf;
+  unsigned int abits;
+  unsigned int n;
+
+  hlen = _gcry_md_get_algo_dlen (hashalgo);
+  hashbuf = xtrymalloc (hlen);
+  if (!hashbuf)
+    {
+      rc = gpg_err_code_from_syserror ();
+      return rc;
+    }
+
+  if (mpi_is_opaque (input))
+    {
+      abuf = mpi_get_opaque (input, &abits);
+      n = (abits+7)/8;
+      _gcry_md_hash_buffer (hashalgo, hashbuf, abuf, n);
+    }
+  else
+    {
+      abits = mpi_get_nbits (input);
+      n = (abits+7)/8;
+      abuf = xtrymalloc (n);
+      if (!abuf)
+        {
+          rc = gpg_err_code_from_syserror ();
+          xfree (hashbuf);
+          return rc;
+        }
+      _gcry_mpi_to_octet_string (NULL, abuf, input, n);
+      _gcry_md_hash_buffer (hashalgo, hashbuf, abuf, n);
+      xfree (abuf);
+    }
+
+  *r_hash = mpi_set_opaque (NULL, hashbuf, hlen*8);
+  if (!*r_hash)
+    rc = GPG_ERR_INV_OBJ;
+
+  return rc;
+}
+
+
+/*
+ * Truncate opaque hash value to qbits for DSA.
+ * Non-opaque input is not truncated, in hope that user
+ * knows what is passed. It is not possible to correctly
+ * trucate non-opaque inputs.
+ */
+gpg_err_code_t
+_gcry_dsa_normalize_hash (gcry_mpi_t input,
+                          gcry_mpi_t *out,
+                          unsigned int qbits)
+{
+  gpg_err_code_t rc = 0;
+  const void *abuf;
+  unsigned int abits;
+  gcry_mpi_t hash;
+
+  if (mpi_is_opaque (input))
+    {
+      abuf = mpi_get_opaque (input, &abits);
+      rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL);
+      if (rc)
+        return rc;
+      if (abits > qbits)
+        mpi_rshift (hash, hash, abits - qbits);
+    }
+  else
+    hash = input;
+
+  *out = hash;
+
+  return rc;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/dsa.c 
b/grub-core/lib/libgcrypt/cipher/dsa.c
index 883a815f2..e559f9f5c 100644
--- a/grub-core/lib/libgcrypt/cipher/dsa.c
+++ b/grub-core/lib/libgcrypt/cipher/dsa.c
@@ -1,6 +1,7 @@
 /* dsa.c - DSA signature algorithm
  * Copyright (C) 1998, 2000, 2001, 2002, 2003,
  *               2006, 2008  Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH.
  *
  * This file is part of Libgcrypt.
  *
@@ -26,6 +27,8 @@
 #include "g10lib.h"
 #include "mpi.h"
 #include "cipher.h"
+#include "pubkey-internal.h"
+
 
 typedef struct
 {
@@ -55,8 +58,18 @@ typedef struct
 } dsa_domain_t;
 
 
-/* A sample 1024 bit DSA key used for the selftests.  */
-static const char sample_secret_key[] =
+static const char *dsa_names[] =
+  {
+    "dsa",
+    "openpgp-dsa",
+    NULL,
+  };
+
+
+/* A sample 1024 bit DSA key used for the selftests.  Not anymore
+ * used, kept only for reference.  */
+#if 0
+static const char sample_secret_key_1024[] =
 "(private-key"
 " (dsa"
 "  (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB"
@@ -74,7 +87,7 @@ static const char sample_secret_key[] =
 "      42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)"
 "  (x #11D54E4ADBD3034160F2CED4B7CD292A4EBF3EC0#)))";
 /* A sample 1024 bit DSA key used for the selftests (public only).  */
-static const char sample_public_key[] =
+static const char sample_public_key_1024[] =
 "(public-key"
 " (dsa"
 "  (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB"
@@ -90,11 +103,28 @@ static const char sample_public_key[] =
 "      A1816A724C34F87330FC9E187C5D66897A04535CC2AC9164A7150ABFA8179827"
 "      6E45831AB811EEE848EBB24D9F5F2883B6E5DDC4C659DEF944DCFD80BF4D0A20"
 "      42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)))";
+#endif /*0*/
 
+/* 2048 DSA key from RFC 6979 A.2.2 */
+static const char sample_public_key_2048[] =
+"(public-key"
+" (dsa"
+"  (p 
#9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)"
+"  (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)"
+"  (g 
#5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)"
+"  (y 
#667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)))";
+
+static const char sample_secret_key_2048[] =
+"(private-key"
+" (dsa"
+"  (p 
#9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)"
+"  (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)"
+"  (g 
#5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)"
+"  (y 
#667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)"
+"  (x #69C7548C21D0DFEA6B9A51C9EAD4E27C33D3B3F180316E5BCAB92C933F0E4DBC#)))";
 
 
 
-static gcry_mpi_t gen_k (gcry_mpi_t q);
 static int test_keys (DSA_secret_key *sk, unsigned int qbits);
 static int check_secret_key (DSA_secret_key *sk);
 static gpg_err_code_t generate (DSA_secret_key *sk,
@@ -103,15 +133,29 @@ static gpg_err_code_t generate (DSA_secret_key *sk,
                                 int transient_key,
                                 dsa_domain_t *domain,
                                 gcry_mpi_t **ret_factors);
-static void sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input,
-                  DSA_secret_key *skey);
-static int verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input,
-                   DSA_public_key *pkey);
+static gpg_err_code_t sign (gcry_mpi_t r, gcry_mpi_t s,
+                            gcry_mpi_t input, gcry_mpi_t k,
+                            DSA_secret_key *skey, int flags, int hashalgo);
+static gpg_err_code_t verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input,
+                              DSA_public_key *pkey, int flags, int hashalgo);
+static unsigned int dsa_get_nbits (gcry_sexp_t parms);
+
 
 static void (*progress_cb) (void *,const char *, int, int, int );
 static void *progress_cb_data;
 
 
+/* Check the DSA key length is acceptable for key generation or usage */
+static gpg_err_code_t
+dsa_check_keysize (unsigned int nbits)
+{
+  if (fips_mode () && nbits < 2048)
+    return GPG_ERR_INV_VALUE;
+
+  return 0;
+}
+
+
 void
 _gcry_register_pk_dsa_progress (void (*cb) (void *, const char *,
                                             int, int, int),
@@ -130,90 +174,15 @@ progress (int c)
 }
 
 
-/*
- * Generate a random secret exponent k less than q.
- */
-static gcry_mpi_t
-gen_k( gcry_mpi_t q )
-{
-  gcry_mpi_t k = mpi_alloc_secure( mpi_get_nlimbs(q) );
-  unsigned int nbits = mpi_get_nbits(q);
-  unsigned int nbytes = (nbits+7)/8;
-  char *rndbuf = NULL;
-
-  /* To learn why we don't use mpi_mod to get the requested bit size,
-     read the paper: "The Insecurity of the Digital Signature
-     Algorithm with Partially Known Nonces" by Nguyen and Shparlinski.
-     Journal of Cryptology, New York. Vol 15, nr 3 (2003)  */
-
-  if ( DBG_CIPHER )
-    log_debug("choosing a random k ");
-  for (;;)
-    {
-      if( DBG_CIPHER )
-        progress('.');
-
-      if ( !rndbuf || nbits < 32 )
-        {
-          gcry_free(rndbuf);
-          rndbuf = gcry_random_bytes_secure( (nbits+7)/8, GCRY_STRONG_RANDOM );
-       }
-      else
-        { /* Change only some of the higher bits.  We could improve
-            this by directly requesting more memory at the first call
-            to get_random_bytes() and use these extra bytes here.
-            However the required management code is more complex and
-            thus we better use this simple method.  */
-          char *pp = gcry_random_bytes_secure( 4, GCRY_STRONG_RANDOM );
-          memcpy( rndbuf,pp, 4 );
-          gcry_free(pp);
-       }
-      _gcry_mpi_set_buffer( k, rndbuf, nbytes, 0 );
-
-      /* Make sure we have the requested number of bits.  This code
-         looks a bit funny but it is easy to understand if you
-         consider that mpi_set_highbit clears all higher bits.  We
-         don't have a clear_highbit, thus we first set the high bit
-         and then clear it again.  */
-      if ( mpi_test_bit( k, nbits-1 ) )
-        mpi_set_highbit( k, nbits-1 );
-      else
-        {
-          mpi_set_highbit( k, nbits-1 );
-          mpi_clear_bit( k, nbits-1 );
-       }
-
-      if( !(mpi_cmp( k, q ) < 0) ) /* check: k < q */
-        {
-          if( DBG_CIPHER )
-            progress('+');
-          continue; /* no  */
-        }
-      if( !(mpi_cmp_ui( k, 0 ) > 0) )  /* check: k > 0 */
-        {
-          if( DBG_CIPHER )
-            progress('-');
-          continue; /* no */
-        }
-      break;   /* okay */
-    }
-  gcry_free(rndbuf);
-  if( DBG_CIPHER )
-    progress('\n');
-
-  return k;
-}
-
-
 /* Check that a freshly generated key actually works.  Returns 0 on success. */
 static int
 test_keys (DSA_secret_key *sk, unsigned int qbits)
 {
   int result = -1;  /* Default to failure.  */
   DSA_public_key pk;
-  gcry_mpi_t data  = gcry_mpi_new (qbits);
-  gcry_mpi_t sig_a = gcry_mpi_new (qbits);
-  gcry_mpi_t sig_b = gcry_mpi_new (qbits);
+  gcry_mpi_t data  = mpi_new (qbits);
+  gcry_mpi_t sig_a = mpi_new (qbits);
+  gcry_mpi_t sig_b = mpi_new (qbits);
 
   /* Put the relevant parameters into a public key structure.  */
   pk.p = sk->p;
@@ -222,26 +191,26 @@ test_keys (DSA_secret_key *sk, unsigned int qbits)
   pk.y = sk->y;
 
   /* Create a random plaintext.  */
-  gcry_mpi_randomize (data, qbits, GCRY_WEAK_RANDOM);
+  _gcry_mpi_randomize (data, qbits, GCRY_WEAK_RANDOM);
 
   /* Sign DATA using the secret key.  */
-  sign (sig_a, sig_b, data, sk);
+  sign (sig_a, sig_b, data, NULL, sk, 0, 0);
 
   /* Verify the signature using the public key.  */
-  if ( !verify (sig_a, sig_b, data, &pk) )
+  if ( verify (sig_a, sig_b, data, &pk, 0, 0) )
     goto leave; /* Signature does not match.  */
 
   /* Modify the data and check that the signing fails.  */
-  gcry_mpi_add_ui (data, data, 1);
-  if ( verify (sig_a, sig_b, data, &pk) )
+  mpi_add_ui (data, data, 1);
+  if ( !verify (sig_a, sig_b, data, &pk, 0, 0) )
     goto leave; /* Signature matches but should not.  */
 
   result = 0; /* The test succeeded.  */
 
  leave:
-  gcry_mpi_release (sig_b);
-  gcry_mpi_release (sig_a);
-  gcry_mpi_release (data);
+  _gcry_mpi_release (sig_b);
+  _gcry_mpi_release (sig_a);
+  _gcry_mpi_release (data);
   return result;
 }
 
@@ -259,6 +228,7 @@ static gpg_err_code_t
 generate (DSA_secret_key *sk, unsigned int nbits, unsigned int qbits,
           int transient_key, dsa_domain_t *domain, gcry_mpi_t **ret_factors )
 {
+  gpg_err_code_t rc;
   gcry_mpi_t p;    /* the prime */
   gcry_mpi_t q;    /* the 160 bit prime factor */
   gcry_mpi_t g;    /* the generator */
@@ -288,14 +258,6 @@ generate (DSA_secret_key *sk, unsigned int nbits, unsigned 
int qbits,
   if (nbits < 2*qbits || nbits > 15360)
     return GPG_ERR_INV_VALUE;
 
-  if (fips_mode ())
-    {
-      if (nbits < 1024)
-        return GPG_ERR_INV_VALUE;
-      if (transient_key)
-        return GPG_ERR_INV_VALUE;
-    }
-
   if (domain->p && domain->q && domain->g)
     {
       /* Domain parameters are given; use them.  */
@@ -310,7 +272,10 @@ generate (DSA_secret_key *sk, unsigned int nbits, unsigned 
int qbits,
   else
     {
       /* Generate new domain parameters.  */
-      p = _gcry_generate_elg_prime (1, nbits, qbits, NULL, ret_factors);
+      rc = _gcry_generate_elg_prime (1, nbits, qbits, NULL, &p, ret_factors);
+      if (rc)
+        return rc;
+
       /* Get q out of factors.  */
       q = mpi_copy ((*ret_factors)[0]);
       gcry_assert (mpi_get_nbits (q) == qbits);
@@ -326,18 +291,25 @@ generate (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
         {
           mpi_add_ui (h, h, 1);
           /* g = h^e mod p */
-          gcry_mpi_powm (g, h, e, p);
+          mpi_powm (g, h, e, p);
         }
       while (!mpi_cmp_ui (g, 1));  /* Continue until g != 1. */
     }
 
   /* Select a random number X with the property:
    *    0 < x < q-1
+   *
+   * FIXME: Why do we use the requirement x < q-1 ? It should be
+   * sufficient to test for x < q.  FIPS-186-3 check x < q-1 but it
+   * does not check for 0 < x because it makes sure that Q is unsigned
+   * and finally adds one to the result so that 0 will never be
+   * returned.  We should replace the code below with _gcry_dsa_gen_k.
+   *
    * This must be a very good random number because this is the secret
    * part.  The random quality depends on the transient_key flag.  */
   random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM;
   if (DBG_CIPHER)
-    log_debug("choosing a random x%s", transient_key? " (transient-key)":"");
+    log_debug("choosing a random x%s\n", transient_key? " (transient-key)":"");
   gcry_assert( qbits >= 160 );
   x = mpi_alloc_secure( mpi_get_nlimbs(q) );
   mpi_sub_ui( h, q, 1 );  /* put q-1 into h */
@@ -347,25 +319,25 @@ generate (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
       if( DBG_CIPHER )
         progress('.');
       if( !rndbuf )
-        rndbuf = gcry_random_bytes_secure ((qbits+7)/8, random_level);
+        rndbuf = _gcry_random_bytes_secure ((qbits+7)/8, random_level);
       else
         { /* Change only some of the higher bits (= 2 bytes)*/
-          char *r = gcry_random_bytes_secure (2, random_level);
+          char *r = _gcry_random_bytes_secure (2, random_level);
           memcpy(rndbuf, r, 2 );
-          gcry_free(r);
+          xfree(r);
         }
 
       _gcry_mpi_set_buffer( x, rndbuf, (qbits+7)/8, 0 );
       mpi_clear_highbit( x, qbits+1 );
     }
   while ( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, h )<0 ) );
-  gcry_free(rndbuf);
+  xfree(rndbuf);
   mpi_free( e );
   mpi_free( h );
 
   /* y = g^x mod p */
   y = mpi_alloc( mpi_get_nlimbs(p) );
-  gcry_mpi_powm( y, g, x, p );
+  mpi_powm (y, g, x, p);
 
   if( DBG_CIPHER )
     {
@@ -387,11 +359,11 @@ generate (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
   /* Now we can test our keys (this should never fail!). */
   if ( test_keys (sk, qbits) )
     {
-      gcry_mpi_release (sk->p); sk->p = NULL;
-      gcry_mpi_release (sk->q); sk->q = NULL;
-      gcry_mpi_release (sk->g); sk->g = NULL;
-      gcry_mpi_release (sk->y); sk->y = NULL;
-      gcry_mpi_release (sk->x); sk->x = NULL;
+      _gcry_mpi_release (sk->p); sk->p = NULL;
+      _gcry_mpi_release (sk->q); sk->q = NULL;
+      _gcry_mpi_release (sk->g); sk->g = NULL;
+      _gcry_mpi_release (sk->y); sk->y = NULL;
+      _gcry_mpi_release (sk->x); sk->x = NULL;
       fips_signal_error ("self-test after key generation failed");
       return GPG_ERR_SELFTEST_FAILED;
     }
@@ -425,6 +397,8 @@ generate_fips186 (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
   gcry_mpi_t value_x = NULL; /* The secret exponent. */
   gcry_mpi_t value_h = NULL; /* Helper.  */
   gcry_mpi_t value_e = NULL; /* Helper.  */
+  gcry_mpi_t value_c = NULL; /* helper for x */
+  gcry_mpi_t value_qm2 = NULL; /* q - 2 */
 
   /* Preset return values.  */
   *r_counter = 0;
@@ -445,8 +419,8 @@ generate_fips186 (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
 
   /* Check that QBITS and NBITS match the standard.  Note that FIPS
      186-3 uses N for QBITS and L for NBITS.  */
-  if (nbits == 1024 && qbits == 160)
-    ;
+  if (nbits == 1024 && qbits == 160 && use_fips186_2)
+    ; /* Allowed in FIPS 186-2 mode.  */
   else if (nbits == 2048 && qbits == 224)
     ;
   else if (nbits == 2048 && qbits == 256)
@@ -456,6 +430,10 @@ generate_fips186 (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
   else
     return GPG_ERR_INV_VALUE;
 
+  ec = dsa_check_keysize (nbits);
+  if (ec)
+    return ec;
+
   if (domain->p && domain->q && domain->g)
     {
       /* Domain parameters are given; use them.  */
@@ -474,33 +452,33 @@ generate_fips186 (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
       /* Get an initial seed value.  */
       if (deriveparms)
         {
-          initial_seed.sexp = gcry_sexp_find_token (deriveparms, "seed", 0);
+          initial_seed.sexp = sexp_find_token (deriveparms, "seed", 0);
           if (initial_seed.sexp)
-            initial_seed.seed = gcry_sexp_nth_data (initial_seed.sexp, 1,
-                                                    &initial_seed.seedlen);
+            initial_seed.seed = sexp_nth_data (initial_seed.sexp, 1,
+                                               &initial_seed.seedlen);
         }
 
-      /* Fixme: Enable 186-3 after it has been approved and after fixing
-         the generation function.  */
-      /*   if (use_fips186_2) */
-      (void)use_fips186_2;
-      ec = _gcry_generate_fips186_2_prime (nbits, qbits,
-                                           initial_seed.seed,
-                                           initial_seed.seedlen,
-                                           &prime_q, &prime_p,
-                                           r_counter,
-                                           r_seed, r_seedlen);
-      /*   else */
-      /*     ec = _gcry_generate_fips186_3_prime (nbits, qbits, NULL, 0, */
-      /*                                          &prime_q, &prime_p, */
-      /*                                          r_counter, */
-      /*                                          r_seed, r_seedlen, NULL); */
-      gcry_sexp_release (initial_seed.sexp);
+      if (use_fips186_2)
+        ec = _gcry_generate_fips186_2_prime (nbits, qbits,
+                                             initial_seed.seed,
+                                             initial_seed.seedlen,
+                                             &prime_q, &prime_p,
+                                             r_counter,
+                                             r_seed, r_seedlen);
+      else
+        ec = _gcry_generate_fips186_3_prime (nbits, qbits,
+                                             initial_seed.seed,
+                                             initial_seed.seedlen,
+                                             &prime_q, &prime_p,
+                                             r_counter,
+                                             r_seed, r_seedlen, NULL);
+      sexp_release (initial_seed.sexp);
       if (ec)
         goto leave;
 
       /* Find a generator g (h and e are helpers).
-         e = (p-1)/q */
+       *    e = (p-1)/q
+       */
       value_e = mpi_alloc_like (prime_p);
       mpi_sub_ui (value_e, prime_p, 1);
       mpi_fdiv_q (value_e, value_e, prime_q );
@@ -515,21 +493,28 @@ generate_fips186 (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
       while (!mpi_cmp_ui (value_g, 1));  /* Continue until g != 1.  */
     }
 
+  value_c = mpi_snew (qbits);
+  value_x = mpi_snew (qbits);
+  value_qm2 = mpi_snew (qbits);
+  mpi_sub_ui (value_qm2, prime_q, 2);
 
-  /* Select a random number x with:  0 < x < q  */
-  value_x = gcry_mpi_snew (qbits);
+  /* FIPS 186-4 B.1.2 steps 4-6 */
   do
     {
       if( DBG_CIPHER )
         progress('.');
-      gcry_mpi_randomize (value_x, qbits, GCRY_VERY_STRONG_RANDOM);
-      mpi_clear_highbit (value_x, qbits+1);
+      _gcry_mpi_randomize (value_c, qbits, GCRY_VERY_STRONG_RANDOM);
+      mpi_clear_highbit (value_c, qbits+1);
     }
-  while (!(mpi_cmp_ui (value_x, 0) > 0 && mpi_cmp (value_x, prime_q) < 0));
+  while (!(mpi_cmp_ui (value_c, 0) > 0 && mpi_cmp (value_c, value_qm2) < 0));
+  /* while (mpi_cmp (value_c, value_qm2) > 0); */
+
+  /* x = c + 1 */
+  mpi_add_ui(value_x, value_c, 1);
 
   /* y = g^x mod p */
   value_y = mpi_alloc_like (prime_p);
-  gcry_mpi_powm (value_y, value_g, value_x, prime_p);
+  mpi_powm (value_y, value_g, value_x, prime_p);
 
   if (DBG_CIPHER)
     {
@@ -551,22 +536,24 @@ generate_fips186 (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
   *r_h = value_h; value_h = NULL;
 
  leave:
-  gcry_mpi_release (prime_p);
-  gcry_mpi_release (prime_q);
-  gcry_mpi_release (value_g);
-  gcry_mpi_release (value_y);
-  gcry_mpi_release (value_x);
-  gcry_mpi_release (value_h);
-  gcry_mpi_release (value_e);
+  _gcry_mpi_release (prime_p);
+  _gcry_mpi_release (prime_q);
+  _gcry_mpi_release (value_g);
+  _gcry_mpi_release (value_y);
+  _gcry_mpi_release (value_x);
+  _gcry_mpi_release (value_h);
+  _gcry_mpi_release (value_e);
+  _gcry_mpi_release (value_c);
+  _gcry_mpi_release (value_qm2);
 
   /* As a last step test this keys (this should never fail of course). */
   if (!ec && test_keys (sk, qbits) )
     {
-      gcry_mpi_release (sk->p); sk->p = NULL;
-      gcry_mpi_release (sk->q); sk->q = NULL;
-      gcry_mpi_release (sk->g); sk->g = NULL;
-      gcry_mpi_release (sk->y); sk->y = NULL;
-      gcry_mpi_release (sk->x); sk->x = NULL;
+      _gcry_mpi_release (sk->p); sk->p = NULL;
+      _gcry_mpi_release (sk->q); sk->q = NULL;
+      _gcry_mpi_release (sk->g); sk->g = NULL;
+      _gcry_mpi_release (sk->y); sk->y = NULL;
+      _gcry_mpi_release (sk->x); sk->x = NULL;
       fips_signal_error ("self-test after key generation failed");
       ec = GPG_ERR_SELFTEST_FAILED;
     }
@@ -574,9 +561,9 @@ generate_fips186 (DSA_secret_key *sk, unsigned int nbits, 
unsigned int qbits,
   if (ec)
     {
       *r_counter = 0;
-      gcry_free (*r_seed); *r_seed = NULL;
+      xfree (*r_seed); *r_seed = NULL;
       *r_seedlen = 0;
-      gcry_mpi_release (*r_h); *r_h = NULL;
+      _gcry_mpi_release (*r_h); *r_h = NULL;
     }
 
   return ec;
@@ -594,7 +581,7 @@ check_secret_key( DSA_secret_key *sk )
   int rc;
   gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) );
 
-  gcry_mpi_powm( y, sk->g, sk->x, sk->p );
+  mpi_powm( y, sk->g, sk->x, sk->p );
   rc = !mpi_cmp( y, sk->y );
   mpi_free( y );
   return rc;
@@ -603,53 +590,157 @@ check_secret_key( DSA_secret_key *sk )
 
 
 /*
-   Make a DSA signature from HASH and put it into r and s.
+   Make a DSA signature from INPUT and put it into r and s.
+
+   INPUT may either be a plain MPI or an opaque MPI which is then
+   internally converted to a plain MPI.  FLAGS and HASHALGO may both
+   be 0 for standard operation mode.
+
+   The random value, K_SUPPLIED, may be supplied externally.  If not,
+   it is generated internally.
+
+   The return value is 0 on success or an error code.  Note that for
+   backward compatibility the function will not return any error if
+   FLAGS and HASHALGO are both 0 and INPUT is a plain MPI.
  */
-static void
-sign(gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t hash, DSA_secret_key *skey )
+static gpg_err_code_t
+sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, gcry_mpi_t k_supplied,
+      DSA_secret_key *skey, int flags, int hashalgo)
 {
+  gpg_err_code_t rc;
+  gcry_mpi_t hash;
   gcry_mpi_t k;
   gcry_mpi_t kinv;
   gcry_mpi_t tmp;
+  const void *abuf;
+  unsigned int abits, qbits;
+  int extraloops = 0;
+  gcry_mpi_t hash_computed_internally = NULL;
 
-  /* Select a random k with 0 < k < q */
-  k = gen_k( skey->q );
+  qbits = mpi_get_nbits (skey->q);
 
-  /* r = (a^k mod p) mod q */
-  gcry_mpi_powm( r, skey->g, k, skey->p );
-  mpi_fdiv_r( r, r, skey->q );
+  if ((flags & PUBKEY_FLAG_PREHASH))
+    {
+      rc = _gcry_dsa_compute_hash (&hash_computed_internally, input, hashalgo);
+      if (rc)
+        return rc;
+      input = hash_computed_internally;
+    }
+
+  /* Convert the INPUT into an MPI.  */
+  rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+  if (rc)
+    {
+      mpi_free (hash_computed_internally);
+      return rc;
+    }
+
+ again:
+  if (k_supplied)
+    k = k_supplied;
+  /* Create the K value.  */
+  else if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo)
+    {
+      /* Use Pornin's method for deterministic DSA.  If this flag is
+         set, it is expected that HASH is an opaque MPI with the to be
+         signed hash.  That hash is also used as h1 from 3.2.a.  */
+      if (!mpi_is_opaque (input))
+        {
+          rc = GPG_ERR_CONFLICT;
+          goto leave;
+        }
+
+      abuf = mpi_get_opaque (input, &abits);
+      rc = _gcry_dsa_gen_rfc6979_k (&k, skey->q, skey->x,
+                                    abuf, (abits+7)/8, hashalgo, extraloops);
+      if (rc)
+        goto leave;
+    }
+  else
+    {
+      /* Select a random k with 0 < k < q */
+      k = _gcry_dsa_gen_k (skey->q, GCRY_STRONG_RANDOM);
+    }
 
   /* kinv = k^(-1) mod q */
   kinv = mpi_alloc( mpi_get_nlimbs(k) );
   mpi_invm(kinv, k, skey->q );
 
+  _gcry_dsa_modify_k (k, skey->q, qbits);
+
+  /* r = (a^k mod p) mod q */
+  mpi_powm( r, skey->g, k, skey->p );
+  mpi_fdiv_r( r, r, skey->q );
+
   /* s = (kinv * ( hash + x * r)) mod q */
   tmp = mpi_alloc( mpi_get_nlimbs(skey->p) );
   mpi_mul( tmp, skey->x, r );
   mpi_add( tmp, tmp, hash );
   mpi_mulm( s , kinv, tmp, skey->q );
 
-  mpi_free(k);
+  if (!k_supplied)
+    mpi_free(k);
   mpi_free(kinv);
   mpi_free(tmp);
+
+  if (!mpi_cmp_ui (r, 0))
+    {
+      if (k_supplied)
+        {
+          rc = GPG_ERR_INV_VALUE;
+          goto leave;
+        }
+
+      /* This is a highly unlikely code path.  */
+      extraloops++;
+      goto again;
+    }
+
+  rc = 0;
+
+ leave:
+  if (hash != input)
+    mpi_free (hash);
+  mpi_free (hash_computed_internally);
+
+  return rc;
 }
 
 
 /*
    Returns true if the signature composed from R and S is valid.
  */
-static int
-verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t hash, DSA_public_key *pkey )
+static gpg_err_code_t
+verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, DSA_public_key *pkey,
+        int flags, int hashalgo)
 {
-  int rc;
+  gpg_err_code_t rc = 0;
   gcry_mpi_t w, u1, u2, v;
   gcry_mpi_t base[3];
   gcry_mpi_t ex[3];
+  gcry_mpi_t hash;
+  unsigned int nbits;
+  gcry_mpi_t hash_computed_internally = NULL;
 
   if( !(mpi_cmp_ui( r, 0 ) > 0 && mpi_cmp( r, pkey->q ) < 0) )
-    return 0; /* assertion     0 < r < q  failed */
+    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n  failed.  */
   if( !(mpi_cmp_ui( s, 0 ) > 0 && mpi_cmp( s, pkey->q ) < 0) )
-    return 0; /* assertion     0 < s < q  failed */
+    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n  failed.  */
+
+  nbits = mpi_get_nbits (pkey->q);
+  if ((flags & PUBKEY_FLAG_PREHASH))
+    {
+      rc = _gcry_dsa_compute_hash (&hash_computed_internally, input, hashalgo);
+      if (rc)
+        return rc;
+      input = hash_computed_internally;
+    }
+  rc = _gcry_dsa_normalize_hash (input, &hash, nbits);
+  if (rc)
+    {
+      mpi_free (hash_computed_internally);
+      return rc;
+    }
 
   w  = mpi_alloc( mpi_get_nlimbs(pkey->q) );
   u1 = mpi_alloc( mpi_get_nlimbs(pkey->q) );
@@ -672,12 +763,26 @@ verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t hash, 
DSA_public_key *pkey )
   mpi_mulpowm( v, base, ex, pkey->p );
   mpi_fdiv_r( v, v, pkey->q );
 
-  rc = !mpi_cmp( v, r );
+  if (mpi_cmp( v, r ))
+    {
+      if (DBG_CIPHER)
+        {
+          log_mpidump ("     i", input);
+          log_mpidump ("     h", hash);
+          log_mpidump ("     v", v);
+          log_mpidump ("     r", r);
+          log_mpidump ("     s", s);
+        }
+      rc = GPG_ERR_BAD_SIGNATURE;
+    }
 
   mpi_free(w);
   mpi_free(u1);
   mpi_free(u2);
   mpi_free(v);
+  if (hash != input)
+    mpi_free (hash);
+  mpi_free (hash_computed_internally);
 
   return rc;
 }
@@ -688,349 +793,456 @@ verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t hash, 
DSA_public_key *pkey )
  *********************************************/
 
 static gcry_err_code_t
-dsa_generate_ext (int algo, unsigned int nbits, unsigned long evalue,
-                  const gcry_sexp_t genparms,
-                  gcry_mpi_t *skey, gcry_mpi_t **retfactors,
-                  gcry_sexp_t *r_extrainfo)
+dsa_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
 {
-  gpg_err_code_t ec;
+  gpg_err_code_t rc;
+  unsigned int nbits;
+  gcry_sexp_t domainsexp;
   DSA_secret_key sk;
   gcry_sexp_t l1;
   unsigned int qbits = 0;
   gcry_sexp_t deriveparms = NULL;
   gcry_sexp_t seedinfo = NULL;
-  int transient_key = 0;
-  int use_fips186_2 = 0;
-  int use_fips186 = 0;
+  gcry_sexp_t misc_info = NULL;
+  int flags = 0;
   dsa_domain_t domain;
+  gcry_mpi_t *factors = NULL;
 
-  (void)algo;    /* No need to check it.  */
-  (void)evalue;  /* Not required for DSA. */
-
+  memset (&sk, 0, sizeof sk);
   memset (&domain, 0, sizeof domain);
 
-  if (genparms)
+  rc = _gcry_pk_util_get_nbits (genparms, &nbits);
+  if (rc)
+    return rc;
+
+  /* Parse the optional flags list.  */
+  l1 = sexp_find_token (genparms, "flags", 0);
+  if (l1)
     {
-      gcry_sexp_t domainsexp;
+      rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+      sexp_release (l1);
+      if (rc)
+        return rc;\
+    }
 
-      /* Parse the optional qbits element.  */
-      l1 = gcry_sexp_find_token (genparms, "qbits", 0);
-      if (l1)
-        {
-          char buf[50];
-          const char *s;
-          size_t n;
+  /* Parse the optional qbits element.  */
+  l1 = sexp_find_token (genparms, "qbits", 0);
+  if (l1)
+    {
+      char buf[50];
+      const char *s;
+      size_t n;
 
-          s = gcry_sexp_nth_data (l1, 1, &n);
-          if (!s || n >= DIM (buf) - 1 )
-            {
-              gcry_sexp_release (l1);
-              return GPG_ERR_INV_OBJ; /* No value or value too large.  */
-            }
-          memcpy (buf, s, n);
-          buf[n] = 0;
-          qbits = (unsigned int)strtoul (buf, NULL, 0);
-          gcry_sexp_release (l1);
+      s = sexp_nth_data (l1, 1, &n);
+      if (!s || n >= DIM (buf) - 1 )
+        {
+          sexp_release (l1);
+          return GPG_ERR_INV_OBJ; /* No value or value too large.  */
         }
+      memcpy (buf, s, n);
+      buf[n] = 0;
+      qbits = (unsigned int)strtoul (buf, NULL, 0);
+      sexp_release (l1);
+    }
 
-      /* Parse the optional transient-key flag.  */
-      l1 = gcry_sexp_find_token (genparms, "transient-key", 0);
+  /* Parse the optional transient-key flag.  */
+  if (!(flags & PUBKEY_FLAG_TRANSIENT_KEY))
+    {
+      l1 = sexp_find_token (genparms, "transient-key", 0);
       if (l1)
         {
-          transient_key = 1;
-          gcry_sexp_release (l1);
+          flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+          sexp_release (l1);
         }
+    }
 
-      /* Get the optional derive parameters.  */
-      deriveparms = gcry_sexp_find_token (genparms, "derive-parms", 0);
+  /* Get the optional derive parameters.  */
+  deriveparms = sexp_find_token (genparms, "derive-parms", 0);
 
-      /* Parse the optional "use-fips186" flags.  */
-      l1 = gcry_sexp_find_token (genparms, "use-fips186", 0);
+  /* Parse the optional "use-fips186" flags.  */
+  if (!(flags & PUBKEY_FLAG_USE_FIPS186))
+    {
+      l1 = sexp_find_token (genparms, "use-fips186", 0);
       if (l1)
         {
-          use_fips186 = 1;
-          gcry_sexp_release (l1);
+          flags |= PUBKEY_FLAG_USE_FIPS186;
+          sexp_release (l1);
         }
-      l1 = gcry_sexp_find_token (genparms, "use-fips186-2", 0);
+    }
+  if (!(flags & PUBKEY_FLAG_USE_FIPS186_2))
+    {
+      l1 = sexp_find_token (genparms, "use-fips186-2", 0);
       if (l1)
         {
-          use_fips186_2 = 1;
-          gcry_sexp_release (l1);
+          flags |= PUBKEY_FLAG_USE_FIPS186_2;
+          sexp_release (l1);
         }
+    }
 
-      /* Check whether domain parameters are given.  */
-      domainsexp = gcry_sexp_find_token (genparms, "domain", 0);
-      if (domainsexp)
+  /* Check whether domain parameters are given.  */
+  domainsexp = sexp_find_token (genparms, "domain", 0);
+  if (domainsexp)
+    {
+      /* DERIVEPARMS can't be used together with domain parameters.
+         NBITS abnd QBITS may not be specified because there values
+         are derived from the domain parameters.  */
+      if (deriveparms || qbits || nbits)
         {
-          /* DERIVEPARMS can't be used together with domain
-             parameters.  NBITS abnd QBITS may not be specified
-             because there values are derived from the domain
-             parameters.  */
-          if (deriveparms || qbits || nbits)
-            {
-              gcry_sexp_release (domainsexp);
-              gcry_sexp_release (deriveparms);
-              return GPG_ERR_INV_VALUE;
-            }
-
-          /* Put all domain parameters into the domain object.  */
-          l1 = gcry_sexp_find_token (domainsexp, "p", 0);
-          domain.p = gcry_sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
-          gcry_sexp_release (l1);
-          l1 = gcry_sexp_find_token (domainsexp, "q", 0);
-          domain.q = gcry_sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
-          gcry_sexp_release (l1);
-          l1 = gcry_sexp_find_token (domainsexp, "g", 0);
-          domain.g = gcry_sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
-          gcry_sexp_release (l1);
-          gcry_sexp_release (domainsexp);
-
-          /* Check that all domain parameters are available.  */
-          if (!domain.p || !domain.q || !domain.g)
-            {
-              gcry_mpi_release (domain.p);
-              gcry_mpi_release (domain.q);
-              gcry_mpi_release (domain.g);
-              gcry_sexp_release (deriveparms);
-              return GPG_ERR_MISSING_VALUE;
-            }
+          sexp_release (domainsexp);
+          sexp_release (deriveparms);
+          return GPG_ERR_INV_VALUE;
+        }
 
-          /* Get NBITS and QBITS from the domain parameters.  */
-          nbits = mpi_get_nbits (domain.p);
-          qbits = mpi_get_nbits (domain.q);
+      /* Put all domain parameters into the domain object.  */
+      l1 = sexp_find_token (domainsexp, "p", 0);
+      domain.p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+      sexp_release (l1);
+      l1 = sexp_find_token (domainsexp, "q", 0);
+      domain.q = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+      sexp_release (l1);
+      l1 = sexp_find_token (domainsexp, "g", 0);
+      domain.g = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+      sexp_release (l1);
+      sexp_release (domainsexp);
+
+      /* Check that all domain parameters are available.  */
+      if (!domain.p || !domain.q || !domain.g)
+        {
+          _gcry_mpi_release (domain.p);
+          _gcry_mpi_release (domain.q);
+          _gcry_mpi_release (domain.g);
+          sexp_release (deriveparms);
+          return GPG_ERR_MISSING_VALUE;
         }
+
+      /* Get NBITS and QBITS from the domain parameters.  */
+      nbits = mpi_get_nbits (domain.p);
+      qbits = mpi_get_nbits (domain.q);
     }
 
-  if (deriveparms || use_fips186 || use_fips186_2 || fips_mode ())
+  if (deriveparms
+      || (flags & PUBKEY_FLAG_USE_FIPS186)
+      || (flags & PUBKEY_FLAG_USE_FIPS186_2)
+      || fips_mode ())
     {
       int counter;
       void *seed;
       size_t seedlen;
       gcry_mpi_t h_value;
 
-      ec = generate_fips186 (&sk, nbits, qbits, deriveparms, use_fips186_2,
+      rc = generate_fips186 (&sk, nbits, qbits, deriveparms,
+                             !!(flags & PUBKEY_FLAG_USE_FIPS186_2),
                              &domain,
                              &counter, &seed, &seedlen, &h_value);
-      gcry_sexp_release (deriveparms);
-      if (!ec && h_value)
+      if (!rc && h_value)
         {
           /* Format the seed-values unless domain parameters are used
              for which a H_VALUE of NULL is an indication.  */
-          ec = gpg_err_code (gcry_sexp_build
-                             (&seedinfo, NULL,
-                              "(seed-values(counter %d)(seed %b)(h %m))",
-                              counter, (int)seedlen, seed, h_value));
-          if (ec)
-            {
-              gcry_mpi_release (sk.p); sk.p = NULL;
-              gcry_mpi_release (sk.q); sk.q = NULL;
-              gcry_mpi_release (sk.g); sk.g = NULL;
-              gcry_mpi_release (sk.y); sk.y = NULL;
-              gcry_mpi_release (sk.x); sk.x = NULL;
-            }
-          gcry_free (seed);
-          gcry_mpi_release (h_value);
+          rc = sexp_build (&seedinfo, NULL,
+                           "(seed-values(counter %d)(seed %b)(h %m))",
+                           counter, (int)seedlen, seed, h_value);
+          xfree (seed);
+          _gcry_mpi_release (h_value);
         }
     }
   else
     {
-      ec = generate (&sk, nbits, qbits, transient_key, &domain, retfactors);
+      rc = generate (&sk, nbits, qbits,
+                     !!(flags & PUBKEY_FLAG_TRANSIENT_KEY),
+                     &domain, &factors);
     }
 
-  gcry_mpi_release (domain.p);
-  gcry_mpi_release (domain.q);
-  gcry_mpi_release (domain.g);
-
-  if (!ec)
+  if (!rc)
     {
-      skey[0] = sk.p;
-      skey[1] = sk.q;
-      skey[2] = sk.g;
-      skey[3] = sk.y;
-      skey[4] = sk.x;
-
-      if (!r_extrainfo)
-        {
-          /* Old style interface - return the factors - if any - at
-             retfactors.  */
-        }
-      else if (!*retfactors && !seedinfo)
-        {
-          /* No factors and no seedinfo, thus there is nothing to return.  */
-          *r_extrainfo = NULL;
-        }
+      /* Put the factors into MISC_INFO.  Note that the factors are
+         not confidential thus we can store them in standard memory.  */
+      int nfactors, i, j;
+      char *p;
+      char *format = NULL;
+      void **arg_list = NULL;
+
+      for (nfactors=0; factors && factors[nfactors]; nfactors++)
+        ;
+      /* Allocate space for the format string:
+         "(misc-key-info%S(pm1-factors%m))"
+         with one "%m" for each factor and construct it.  */
+      format = xtrymalloc (50 + 2*nfactors);
+      if (!format)
+        rc = gpg_err_code_from_syserror ();
       else
         {
-          /* Put the factors into extrainfo and set retfactors to NULL
-             to make use of the new interface.  Note that the factors
-             are not confidential thus we can store them in standard
-             memory.  */
-          int nfactors, i, j;
-          char *p;
-          char *format = NULL;
-          void **arg_list = NULL;
-
-          for (nfactors=0; *retfactors && (*retfactors)[nfactors]; nfactors++)
-            ;
-          /* Allocate space for the format string:
-               "(misc-key-info%S(pm1-factors%m))"
-             with one "%m" for each factor and construct it.  */
-          format = gcry_malloc (50 + 2*nfactors);
-          if (!format)
-            ec = gpg_err_code_from_syserror ();
-          else
+          p = stpcpy (format, "(misc-key-info");
+          if (seedinfo)
+            p = stpcpy (p, "%S");
+          if (nfactors)
             {
-              p = stpcpy (format, "(misc-key-info");
-              if (seedinfo)
-                p = stpcpy (p, "%S");
-              if (nfactors)
-                {
-                  p = stpcpy (p, "(pm1-factors");
-                  for (i=0; i < nfactors; i++)
-                    p = stpcpy (p, "%m");
-                  p = stpcpy (p, ")");
-                }
+              p = stpcpy (p, "(pm1-factors");
+              for (i=0; i < nfactors; i++)
+                p = stpcpy (p, "%m");
               p = stpcpy (p, ")");
-
-              /* Allocate space for the list of factors plus one for
-                 an S-expression plus an extra NULL entry for safety
-                 and fill it with the factors.  */
-              arg_list = gcry_calloc (nfactors+1+1, sizeof *arg_list);
-              if (!arg_list)
-                ec = gpg_err_code_from_syserror ();
-              else
-                {
-                  i = 0;
-                  if (seedinfo)
-                    arg_list[i++] = &seedinfo;
-                  for (j=0; j < nfactors; j++)
-                    arg_list[i++] = (*retfactors) + j;
-                  arg_list[i] = NULL;
-
-                  ec = gpg_err_code (gcry_sexp_build_array
-                                     (r_extrainfo, NULL, format, arg_list));
-                }
-            }
-
-          gcry_free (arg_list);
-          gcry_free (format);
-          for (i=0; i < nfactors; i++)
-            {
-              gcry_mpi_release ((*retfactors)[i]);
-              (*retfactors)[i] = NULL;
             }
-          gcry_free (*retfactors);
-          *retfactors = NULL;
-          if (ec)
+          p = stpcpy (p, ")");
+
+          /* Allocate space for the list of factors plus one for the
+             seedinfo s-exp plus an extra NULL entry for safety and
+             fill it with the factors.  */
+          arg_list = xtrycalloc (nfactors+1+1, sizeof *arg_list);
+          if (!arg_list)
+            rc = gpg_err_code_from_syserror ();
+          else
             {
-              for (i=0; i < 5; i++)
-                {
-                  gcry_mpi_release (skey[i]);
-                  skey[i] = NULL;
-                }
+              i = 0;
+              if (seedinfo)
+                arg_list[i++] = &seedinfo;
+              for (j=0; j < nfactors; j++)
+                arg_list[i++] = factors + j;
+              arg_list[i] = NULL;
+
+              rc = sexp_build_array (&misc_info, NULL, format, arg_list);
             }
         }
-    }
-
-  gcry_sexp_release (seedinfo);
-  return ec;
-}
 
+      xfree (arg_list);
+      xfree (format);
+    }
 
-static gcry_err_code_t
-dsa_generate (int algo, unsigned int nbits, unsigned long evalue,
-              gcry_mpi_t *skey, gcry_mpi_t **retfactors)
-{
-  (void)evalue;
-  return dsa_generate_ext (algo, nbits, 0, NULL, skey, retfactors, NULL);
+  if (!rc)
+    rc = sexp_build (r_skey, NULL,
+                     "(key-data"
+                     " (public-key"
+                     "  (dsa(p%m)(q%m)(g%m)(y%m)))"
+                     " (private-key"
+                     "  (dsa(p%m)(q%m)(g%m)(y%m)(x%m)))"
+                     " %S)",
+                     sk.p, sk.q, sk.g, sk.y,
+                     sk.p, sk.q, sk.g, sk.y, sk.x,
+                     misc_info);
+
+
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.q);
+  _gcry_mpi_release (sk.g);
+  _gcry_mpi_release (sk.y);
+  _gcry_mpi_release (sk.x);
+
+  _gcry_mpi_release (domain.p);
+  _gcry_mpi_release (domain.q);
+  _gcry_mpi_release (domain.g);
+
+  sexp_release (seedinfo);
+  sexp_release (misc_info);
+  sexp_release (deriveparms);
+  if (factors)
+    {
+      gcry_mpi_t *mp;
+      for (mp = factors; *mp; mp++)
+        mpi_free (*mp);
+      xfree (factors);
+    }
+  return rc;
 }
 
 
 
 static gcry_err_code_t
-dsa_check_secret_key (int algo, gcry_mpi_t *skey)
+dsa_check_secret_key (gcry_sexp_t keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  DSA_secret_key sk;
+  gcry_err_code_t rc;
+  DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL};
 
-  (void)algo;
+  rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx",
+                                  &sk.p, &sk.q, &sk.g, &sk.y, &sk.x,
+                                  NULL);
+  if (rc)
+    goto leave;
 
-  if ((! skey[0]) || (! skey[1]) || (! skey[2]) || (! skey[3]) || (! skey[4]))
-    err = GPG_ERR_BAD_MPI;
-  else
-    {
-      sk.p = skey[0];
-      sk.q = skey[1];
-      sk.g = skey[2];
-      sk.y = skey[3];
-      sk.x = skey[4];
-      if (! check_secret_key (&sk))
-       err = GPG_ERR_BAD_SECKEY;
-    }
+  if (!check_secret_key (&sk))
+    rc = GPG_ERR_BAD_SECKEY;
 
-  return err;
+ leave:
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.q);
+  _gcry_mpi_release (sk.g);
+  _gcry_mpi_release (sk.y);
+  _gcry_mpi_release (sk.x);
+  if (DBG_CIPHER)
+    log_debug ("dsa_testkey    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-dsa_sign (int algo, gcry_mpi_t *resarr, gcry_mpi_t data, gcry_mpi_t *skey)
+dsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  DSA_secret_key sk;
-
-  (void)algo;
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_mpi_t data = NULL;
+  gcry_mpi_t k = NULL;
+  DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL};
+  gcry_mpi_t sig_r = NULL;
+  gcry_mpi_t sig_s = NULL;
+  unsigned int nbits = dsa_get_nbits (keyparms);
+
+  rc = dsa_check_keysize (nbits);
+  if (rc)
+    return rc;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, nbits);
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("dsa_sign   data", data);
+
+  if (ctx.label)
+    rc = _gcry_mpi_scan (&k, GCRYMPI_FMT_USG, ctx.label, ctx.labellen, NULL);
+  if (rc)
+    goto leave;
+
+  /* Extract the key.  */
+  rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx",
+                                 &sk.p, &sk.q, &sk.g, &sk.y, &sk.x, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("dsa_sign      p", sk.p);
+      log_mpidump ("dsa_sign      q", sk.q);
+      log_mpidump ("dsa_sign      g", sk.g);
+      log_mpidump ("dsa_sign      y", sk.y);
+      if (!fips_mode ())
+        log_mpidump ("dsa_sign      x", sk.x);
+    }
 
-  if ((! data)
-      || (! skey[0]) || (! skey[1]) || (! skey[2])
-      || (! skey[3]) || (! skey[4]))
-    err = GPG_ERR_BAD_MPI;
-  else
+  sig_r = mpi_new (0);
+  sig_s = mpi_new (0);
+  rc = sign (sig_r, sig_s, data, k, &sk, ctx.flags, ctx.hash_algo);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
     {
-      sk.p = skey[0];
-      sk.q = skey[1];
-      sk.g = skey[2];
-      sk.y = skey[3];
-      sk.x = skey[4];
-      resarr[0] = mpi_alloc (mpi_get_nlimbs (sk.p));
-      resarr[1] = mpi_alloc (mpi_get_nlimbs (sk.p));
-      sign (resarr[0], resarr[1], data, &sk);
+      log_mpidump ("dsa_sign  sig_r", sig_r);
+      log_mpidump ("dsa_sign  sig_s", sig_s);
     }
-  return err;
+  rc = sexp_build (r_sig, NULL, "(sig-val(dsa(r%M)(s%M)))", sig_r, sig_s);
+
+ leave:
+  _gcry_mpi_release (sig_r);
+  _gcry_mpi_release (sig_s);
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.q);
+  _gcry_mpi_release (sk.g);
+  _gcry_mpi_release (sk.y);
+  _gcry_mpi_release (sk.x);
+  _gcry_mpi_release (data);
+  _gcry_mpi_release (k);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("dsa_sign      => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
+
 static gcry_err_code_t
-dsa_verify (int algo, gcry_mpi_t hash, gcry_mpi_t *data, gcry_mpi_t *pkey,
-            int (*cmp) (void *, gcry_mpi_t), void *opaquev)
+dsa_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  DSA_public_key pk;
-
-  (void)algo;
-  (void)cmp;
-  (void)opaquev;
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_sexp_t l1 = NULL;
+  gcry_mpi_t sig_r = NULL;
+  gcry_mpi_t sig_s = NULL;
+  gcry_mpi_t data = NULL;
+  DSA_public_key pk = { NULL, NULL, NULL, NULL };
+  unsigned int nbits = dsa_get_nbits (s_keyparms);
+
+  rc = dsa_check_keysize (nbits);
+  if (rc)
+    return rc;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, nbits);
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("dsa_verify data", data);
+
+  /* Extract the signature value.  */
+  rc = _gcry_pk_util_preparse_sigval (s_sig, dsa_names, &l1, NULL);
+  if (rc)
+    goto leave;
+  rc = _gcry_sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("dsa_verify  s_r", sig_r);
+      log_mpidump ("dsa_verify  s_s", sig_s);
+    }
 
-  if ((! data[0]) || (! data[1]) || (! hash)
-      || (! pkey[0]) || (! pkey[1]) || (! pkey[2]) || (! pkey[3]))
-    err = GPG_ERR_BAD_MPI;
-  else
+  /* Extract the key.  */
+  rc = _gcry_sexp_extract_param (s_keyparms, NULL, "pqgy",
+                                 &pk.p, &pk.q, &pk.g, &pk.y, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
     {
-      pk.p = pkey[0];
-      pk.q = pkey[1];
-      pk.g = pkey[2];
-      pk.y = pkey[3];
-      if (! verify (data[0], data[1], hash, &pk))
-       err = GPG_ERR_BAD_SIGNATURE;
+      log_mpidump ("dsa_verify    p", pk.p);
+      log_mpidump ("dsa_verify    q", pk.q);
+      log_mpidump ("dsa_verify    g", pk.g);
+      log_mpidump ("dsa_verify    y", pk.y);
     }
-  return err;
+
+  /* Verify the signature.  */
+  rc = verify (sig_r, sig_s, data, &pk, ctx.flags, ctx.hash_algo);
+
+ leave:
+  _gcry_mpi_release (pk.p);
+  _gcry_mpi_release (pk.q);
+  _gcry_mpi_release (pk.g);
+  _gcry_mpi_release (pk.y);
+  _gcry_mpi_release (data);
+  _gcry_mpi_release (sig_r);
+  _gcry_mpi_release (sig_s);
+  sexp_release (l1);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("dsa_verify    => %s\n", rc?gpg_strerror (rc):"Good");
+  return rc;
 }
 
 
+/* Return the number of bits for the key described by PARMS.  On error
+ * 0 is returned.  The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ *   (dsa
+ *     (p <mpi>)
+ *     (q <mpi>)
+ *     (g <mpi>)
+ *     (y <mpi>))
+ *
+ * More parameters may be given but we only need P here.
+ */
 static unsigned int
-dsa_get_nbits (int algo, gcry_mpi_t *pkey)
+dsa_get_nbits (gcry_sexp_t parms)
 {
-  (void)algo;
-
-  return mpi_get_nbits (pkey[0]);
+  gcry_sexp_t l1;
+  gcry_mpi_t p;
+  unsigned int nbits;
+
+  l1 = sexp_find_token (parms, "p", 1);
+  if (!l1)
+    return 0; /* Parameter P not found.  */
+
+  p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+  sexp_release (l1);
+  nbits = p? mpi_get_nbits (p) : 0;
+  _gcry_mpi_release (p);
+  return nbits;
 }
 
 
@@ -1040,45 +1252,102 @@ dsa_get_nbits (int algo, gcry_mpi_t *pkey)
  */
 
 static const char *
-selftest_sign_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
+selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey)
 {
+  /* Sample data from RFC 6979 section A.2.2, hash is of message "sample" */
   static const char sample_data[] =
-    "(data (flags raw)"
-    " (value #a0b1c2d3e4f500102030405060708090a1b2c3d4#))";
+    "(data (flags rfc6979 prehash)"
+    " (hash-algo sha256)"
+    " (value 6:sample))";
   static const char sample_data_bad[] =
-    "(data (flags raw)"
-    " (value #a0b1c2d3e4f510102030405060708090a1b2c3d4#))";
+    "(data (flags rfc6979)"
+    " (hash sha256 
#bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e9891562113d8a62add1bf#))";
+  static const char signature_r[] =
+    "eace8bdbbe353c432a795d9ec556c6d021f7a03f42c36e9bc87e4ac7932cc809";
+  static const char signature_s[] =
+    "7081e175455f9247b812b74583e9e94f9ea79bd640dc962533b0680793a38d53";
 
   const char *errtxt = NULL;
   gcry_error_t err;
   gcry_sexp_t data = NULL;
   gcry_sexp_t data_bad = NULL;
   gcry_sexp_t sig = NULL;
-
-  err = gcry_sexp_sscan (&data, NULL,
-                         sample_data, strlen (sample_data));
+  gcry_sexp_t l1 = NULL;
+  gcry_sexp_t l2 = NULL;
+  gcry_mpi_t r = NULL;
+  gcry_mpi_t s = NULL;
+  gcry_mpi_t calculated_r = NULL;
+  gcry_mpi_t calculated_s = NULL;
+  int cmp;
+
+  err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data));
   if (!err)
-    err = gcry_sexp_sscan (&data_bad, NULL,
-                           sample_data_bad, strlen (sample_data_bad));
+    err = sexp_sscan (&data_bad, NULL,
+                      sample_data_bad, strlen (sample_data_bad));
+  if (!err)
+    err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL);
+  if (!err)
+    err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL);
+
   if (err)
     {
       errtxt = "converting data failed";
       goto leave;
     }
 
-  err = gcry_pk_sign (&sig, data, skey);
+  err = _gcry_pk_sign (&sig, data, skey);
   if (err)
     {
       errtxt = "signing failed";
       goto leave;
     }
-  err = gcry_pk_verify (sig, data, pkey);
+
+  /* check against known signature */
+  errtxt = "signature validity failed";
+  l1 = _gcry_sexp_find_token (sig, "sig-val", 0);
+  if (!l1)
+    goto leave;
+  l2 = _gcry_sexp_find_token (l1, "dsa", 0);
+  if (!l2)
+    goto leave;
+
+  sexp_release (l1);
+  l1 = l2;
+
+  l2 = _gcry_sexp_find_token (l1, "r", 0);
+  if (!l2)
+    goto leave;
+  calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+  if (!calculated_r)
+    goto leave;
+
+  sexp_release (l2);
+  l2 = _gcry_sexp_find_token (l1, "s", 0);
+  if (!l2)
+    goto leave;
+  calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+  if (!calculated_s)
+    goto leave;
+
+  errtxt = "known sig check failed";
+
+  cmp = _gcry_mpi_cmp (r, calculated_r);
+  if (cmp)
+    goto leave;
+  cmp = _gcry_mpi_cmp (s, calculated_s);
+  if (cmp)
+    goto leave;
+
+  errtxt = NULL;
+
+
+  err = _gcry_pk_verify (sig, data, pkey);
   if (err)
     {
       errtxt = "verify failed";
       goto leave;
     }
-  err = gcry_pk_verify (sig, data_bad, pkey);
+  err = _gcry_pk_verify (sig, data_bad, pkey);
   if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
     {
       errtxt = "bad signature not detected";
@@ -1087,15 +1356,21 @@ selftest_sign_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
 
 
  leave:
-  gcry_sexp_release (sig);
-  gcry_sexp_release (data_bad);
-  gcry_sexp_release (data);
+  _gcry_mpi_release (calculated_s);
+  _gcry_mpi_release (calculated_r);
+  _gcry_mpi_release (s);
+  _gcry_mpi_release (r);
+  sexp_release (l2);
+  sexp_release (l1);
+  sexp_release (sig);
+  sexp_release (data_bad);
+  sexp_release (data);
   return errtxt;
 }
 
 
 static gpg_err_code_t
-selftests_dsa (selftest_report_func_t report)
+selftests_dsa_2048 (selftest_report_func_t report)
 {
   const char *what;
   const char *errtxt;
@@ -1105,37 +1380,36 @@ selftests_dsa (selftest_report_func_t report)
 
   /* Convert the S-expressions into the internal representation.  */
   what = "convert";
-  err = gcry_sexp_sscan (&skey, NULL,
-                         sample_secret_key, strlen (sample_secret_key));
+  err = sexp_sscan (&skey, NULL, sample_secret_key_2048, strlen 
(sample_secret_key_2048));
   if (!err)
-    err = gcry_sexp_sscan (&pkey, NULL,
-                           sample_public_key, strlen (sample_public_key));
+    err = sexp_sscan (&pkey, NULL,
+                      sample_public_key_2048, strlen (sample_public_key_2048));
   if (err)
     {
-      errtxt = gcry_strerror (err);
+      errtxt = _gcry_strerror (err);
       goto failed;
     }
 
   what = "key consistency";
-  err = gcry_pk_testkey (skey);
+  err = _gcry_pk_testkey (skey);
   if (err)
     {
-      errtxt = gcry_strerror (err);
+      errtxt = _gcry_strerror (err);
       goto failed;
     }
 
   what = "sign";
-  errtxt = selftest_sign_1024 (pkey, skey);
+  errtxt = selftest_sign (pkey, skey);
   if (errtxt)
     goto failed;
 
-  gcry_sexp_release (pkey);
-  gcry_sexp_release (skey);
+  sexp_release (pkey);
+  sexp_release (skey);
   return 0; /* Succeeded. */
 
  failed:
-  gcry_sexp_release (pkey);
-  gcry_sexp_release (skey);
+  sexp_release (pkey);
+  sexp_release (skey);
   if (report)
     report ("pubkey", GCRY_PK_DSA, what, errtxt);
   return GPG_ERR_SELFTEST_FAILED;
@@ -1153,7 +1427,7 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
   switch (algo)
     {
     case GCRY_PK_DSA:
-      ec = selftests_dsa (report);
+      ec = selftests_dsa_2048 (report);
       break;
     default:
       ec = GPG_ERR_PUBKEY_ALGO;
@@ -1164,30 +1438,19 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
 }
 
 
-
 
-static const char *dsa_names[] =
-  {
-    "dsa",
-    "openpgp-dsa",
-    NULL,
-  };
-
 gcry_pk_spec_t _gcry_pubkey_spec_dsa =
   {
+    GCRY_PK_DSA, { 0, 0 },
+    GCRY_PK_USAGE_SIGN,
     "DSA", dsa_names,
     "pqgy", "pqgyx", "", "rs", "pqgy",
-    GCRY_PK_USAGE_SIGN,
     dsa_generate,
     dsa_check_secret_key,
     NULL,
     NULL,
     dsa_sign,
     dsa_verify,
-    dsa_get_nbits
-  };
-pk_extra_spec_t _gcry_pubkey_extraspec_dsa =
-  {
-    run_selftests,
-    dsa_generate_ext
+    dsa_get_nbits,
+    run_selftests
   };
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-common.h 
b/grub-core/lib/libgcrypt/cipher/ecc-common.h
new file mode 100644
index 000000000..01fb9042c
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-common.h
@@ -0,0 +1,143 @@
+/* ecc-common.h - Declarations of common ECC code
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_ECC_COMMON_H
+#define GCRY_ECC_COMMON_H
+
+
+/* Definition of a curve.  */
+typedef struct
+{
+  enum gcry_mpi_ec_models model;/* The model descrinbing this curve.  */
+  enum ecc_dialects dialect;    /* The dialect used with the curve.   */
+  gcry_mpi_t p;         /* Prime specifying the field GF(p).  */
+  gcry_mpi_t a;         /* First coefficient of the Weierstrass equation.  */
+  gcry_mpi_t b;         /* Second coefficient of the Weierstrass equation.
+                           or d as used by Twisted Edwards curves.  */
+  mpi_point_struct G;   /* Base point (generator).  */
+  gcry_mpi_t n;         /* Order of G.  */
+  unsigned int h;       /* Cofactor.  */
+  const char *name;     /* Name of the curve or NULL.  */
+} elliptic_curve_t;
+
+
+
+/* Set the value from S into D.  */
+static inline void
+point_set (mpi_point_t d, mpi_point_t s)
+{
+  mpi_set (d->x, s->x);
+  mpi_set (d->y, s->y);
+  mpi_set (d->z, s->z);
+}
+
+#define point_init(a)  _gcry_mpi_point_init ((a))
+#define point_free(a)  _gcry_mpi_point_free_parts ((a))
+
+
+/*-- ecc-curves.c --*/
+gpg_err_code_t _gcry_ecc_fill_in_curve (unsigned int nbits,
+                                        const char *name,
+                                        elliptic_curve_t *curve,
+                                        unsigned int *r_nbits);
+gpg_err_code_t _gcry_ecc_update_curve_param (const char *name,
+                                             enum gcry_mpi_ec_models *model,
+                                             enum ecc_dialects *dialect,
+                                             gcry_mpi_t *p, gcry_mpi_t *a,
+                                             gcry_mpi_t *b, gcry_mpi_t *g,
+                                             gcry_mpi_t *n);
+
+const char *_gcry_ecc_get_curve (gcry_sexp_t keyparms,
+                                 int iterator,
+                                 unsigned int *r_nbits);
+gcry_sexp_t _gcry_ecc_get_param_sexp (const char *name);
+
+/*-- ecc-misc.c --*/
+void _gcry_ecc_curve_free (elliptic_curve_t *E);
+elliptic_curve_t _gcry_ecc_curve_copy (elliptic_curve_t E);
+const char *_gcry_ecc_model2str (enum gcry_mpi_ec_models model);
+const char *_gcry_ecc_dialect2str (enum ecc_dialects dialect);
+unsigned char *_gcry_ecc_ec2os_buf (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p,
+                                    unsigned int *r_length);
+gcry_mpi_t   _gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p);
+
+mpi_point_t  _gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_mont_encodepoint (gcry_mpi_t x, unsigned int nbits,
+                                           int with_prefix,
+                                           unsigned char **r_buffer,
+                                           unsigned int *r_buflen);
+
+
+/*-- ecc.c --*/
+
+/*-- ecc-ecdsa.c --*/
+gpg_err_code_t _gcry_ecc_ecdsa_sign (gcry_mpi_t input, gcry_mpi_t k, mpi_ec_t 
ec,
+                                     gcry_mpi_t r, gcry_mpi_t s,
+                                     int flags, int hashalgo);
+gpg_err_code_t _gcry_ecc_ecdsa_verify (gcry_mpi_t input, mpi_ec_t ec,
+                                       gcry_mpi_t r, gcry_mpi_t s,
+                                       int flags, int hashalgo);
+
+/*-- ecc-eddsa.c --*/
+gpg_err_code_t _gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign,
+                                          mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ctx,
+                                            gcry_mpi_t x, gcry_mpi_t y,
+                                            int with_prefix,
+                                            unsigned char **r_buffer,
+                                            unsigned int *r_buflen);
+gpg_err_code_t _gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value,
+                                               unsigned int nbits);
+
+
+gpg_err_code_t _gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest,
+                                            mpi_ec_t ec);
+
+gpg_err_code_t _gcry_ecc_eddsa_genkey (mpi_ec_t ec, int flags);
+gpg_err_code_t _gcry_ecc_eddsa_sign (gcry_mpi_t input,
+                                     mpi_ec_t ec,
+                                     gcry_mpi_t r_r, gcry_mpi_t s,
+                                     struct pk_encoding_ctx *ctx);
+gpg_err_code_t _gcry_ecc_eddsa_verify (gcry_mpi_t input,
+                                       mpi_ec_t ec,
+                                       gcry_mpi_t r, gcry_mpi_t s,
+                                       struct pk_encoding_ctx *ctx);
+void reverse_buffer (unsigned char *buffer, unsigned int length);
+
+
+/*-- ecc-gost.c --*/
+gpg_err_code_t _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec,
+                                    gcry_mpi_t r, gcry_mpi_t s);
+gpg_err_code_t _gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec,
+                                      gcry_mpi_t r, gcry_mpi_t s);
+
+
+/*-- ecc-sm2.c --*/
+gpg_err_code_t _gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph,
+                                      gcry_mpi_t input, mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain,
+                                      gcry_sexp_t data_list, mpi_ec_t ec);
+gpg_err_code_t _gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec,
+                                   gcry_mpi_t r, gcry_mpi_t s,
+                                   int flags, int hashalgo);
+gpg_err_code_t _gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec,
+                                     gcry_mpi_t r, gcry_mpi_t s);
+
+
+#endif /*GCRY_ECC_COMMON_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-curves.c 
b/grub-core/lib/libgcrypt/cipher/ecc-curves.c
new file mode 100644
index 000000000..7c86e12cc
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-curves.c
@@ -0,0 +1,1585 @@
+/* ecc-curves.c  -  Elliptic Curve parameter mangement
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "mpi-internal.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
+
+
+static gpg_err_code_t
+point_from_keyparam (gcry_mpi_point_t *r_a,
+                     gcry_sexp_t keyparam, const char *name, mpi_ec_t ec);
+
+/* This tables defines aliases for curve names.  */
+static const struct
+{
+  const char *name;  /* Our name.  */
+  const char *other; /* Other name. */
+} curve_aliases[] =
+  {
+    { "Ed25519",    "1.3.6.1.4.1.11591.15.1" }, /* OpenPGP */
+    { "Ed25519",    "1.3.101.112" },         /* rfc8410 */
+
+    { "Curve25519", "1.3.6.1.4.1.3029.1.5.1" }, /* OpenPGP */
+    { "Curve25519", "1.3.101.110" },         /* rfc8410 */
+    { "Curve25519", "X25519" },              /* rfc8410 */
+
+    { "Ed448",      "1.3.101.113" },         /* rfc8410 */
+    { "X448",       "1.3.101.111" },         /* rfc8410 */
+
+    { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID  */
+    { "NIST P-192", "prime192v1" },          /* X9.62 name.  */
+    { "NIST P-192", "secp192r1"  },          /* SECP name.  */
+    { "NIST P-192", "nistp192"   },          /* rfc5656.  */
+
+    { "NIST P-224", "secp224r1" },
+    { "NIST P-224", "1.3.132.0.33" },        /* SECP OID.  */
+    { "NIST P-224", "nistp224"   },          /* rfc5656.  */
+
+    { "NIST P-256", "1.2.840.10045.3.1.7" }, /* From NIST SP 800-78-1.  */
+    { "NIST P-256", "prime256v1" },
+    { "NIST P-256", "secp256r1"  },
+    { "NIST P-256", "nistp256"   },          /* rfc5656.  */
+
+    { "NIST P-384", "secp384r1" },
+    { "NIST P-384", "1.3.132.0.34" },
+    { "NIST P-384", "nistp384"   },          /* rfc5656.  */
+
+    { "NIST P-521", "secp521r1" },
+    { "NIST P-521", "1.3.132.0.35" },
+    { "NIST P-521", "nistp521"   },          /* rfc5656.  */
+
+    { "brainpoolP160r1", "1.3.36.3.3.2.8.1.1.1" },
+    { "brainpoolP192r1", "1.3.36.3.3.2.8.1.1.3" },
+    { "brainpoolP224r1", "1.3.36.3.3.2.8.1.1.5" },
+    { "brainpoolP256r1", "1.3.36.3.3.2.8.1.1.7" },
+    { "brainpoolP320r1", "1.3.36.3.3.2.8.1.1.9" },
+    { "brainpoolP384r1", "1.3.36.3.3.2.8.1.1.11"},
+    { "brainpoolP512r1", "1.3.36.3.3.2.8.1.1.13"},
+
+    { "GOST2001-test", "1.2.643.2.2.35.0" },
+    { "GOST2001-CryptoPro-A", "1.2.643.2.2.35.1" },
+    { "GOST2001-CryptoPro-B", "1.2.643.2.2.35.2" },
+    { "GOST2001-CryptoPro-C", "1.2.643.2.2.35.3" },
+    { "GOST2001-CryptoPro-A", "GOST2001-CryptoPro-XchA" },
+    { "GOST2001-CryptoPro-C", "GOST2001-CryptoPro-XchB" },
+    { "GOST2001-CryptoPro-A", "1.2.643.2.2.36.0" },
+    { "GOST2001-CryptoPro-C", "1.2.643.2.2.36.1" },
+
+    { "GOST2012-256-tc26-A", "1.2.643.7.1.2.1.1.1" },
+    { "GOST2001-CryptoPro-A", "1.2.643.7.1.2.1.1.2" },
+    { "GOST2001-CryptoPro-A", "GOST2012-256-tc26-B" },
+    { "GOST2001-CryptoPro-B", "1.2.643.7.1.2.1.1.3" },
+    { "GOST2001-CryptoPro-B", "GOST2012-256-tc26-C" },
+    { "GOST2001-CryptoPro-C", "1.2.643.7.1.2.1.1.4" },
+    { "GOST2001-CryptoPro-C", "GOST2012-256-tc26-D" },
+
+    { "GOST2012-512-test", "GOST2012-test" },
+    { "GOST2012-512-test", "1.2.643.7.1.2.1.2.0" },
+    { "GOST2012-512-tc26-A", "GOST2012-tc26-A" },
+    { "GOST2012-512-tc26-B", "GOST2012-tc26-B" },
+    { "GOST2012-512-tc26-A", "1.2.643.7.1.2.1.2.1" },
+    { "GOST2012-512-tc26-B", "1.2.643.7.1.2.1.2.2" },
+    { "GOST2012-512-tc26-C", "1.2.643.7.1.2.1.2.3" },
+
+    { "secp256k1", "1.3.132.0.10" },
+
+    { "sm2p256v1", "1.2.156.10197.1.301" },
+
+    { NULL, NULL}
+  };
+
+
+typedef struct
+{
+  const char *desc;           /* Description of the curve.  */
+  unsigned int nbits;         /* Number of bits.  */
+  unsigned int fips:1;        /* True if this is a FIPS140-3 approved curve. */
+
+  /* The model describing this curve.  This is mainly used to select
+     the group equation. */
+  enum gcry_mpi_ec_models model;
+
+  /* The actual ECC dialect used.  This is used for curve specific
+     optimizations and to select encodings etc. */
+  enum ecc_dialects dialect;
+
+  const char *p;              /* The prime defining the field.  */
+  const char *a, *b;          /* The coefficients.  For Twisted Edwards
+                                 Curves b is used for d.  For Montgomery
+                                 Curves (a,b) has ((A-2)/4,B^-1).  */
+  const char *n;              /* The order of the base point.  */
+  const char *g_x, *g_y;      /* Base point.  */
+  unsigned int h;             /* Cofactor.  */
+} ecc_domain_parms_t;
+
+
+/* This static table defines all available curves.  */
+static const ecc_domain_parms_t domain_parms[] =
+  {
+    {
+      /* (-x^2 + y^2 = 1 + dx^2y^2) */
+      "Ed25519", 255, 0,
+      MPI_EC_EDWARDS, ECC_DIALECT_ED25519,
+      "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+      "-0x01",
+      "-0x2DFC9311D490018C7338BF8688861767FF8FF5B2BEBE27548A14B235ECA6874A",
+      "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED",
+      "0x216936D3CD6E53FEC0A4E231FDD6DC5C692CC7609525A7B2C9562D608F25D51A",
+      "0x6666666666666666666666666666666666666666666666666666666666666658",
+      8
+    },
+    {
+      /* (y^2 = x^3 + 486662*x^2 + x) */
+      "Curve25519", 255, 0,
+      MPI_EC_MONTGOMERY, ECC_DIALECT_STANDARD,
+      "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+      "0x01DB41",
+      "0x01",
+      "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED",
+      "0x0000000000000000000000000000000000000000000000000000000000000009",
+      "0x20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9",
+      8
+      /* Note: As per RFC-7748 errata eid4730 the g_y value should be
+       * "0x5F51E65E475F794B1FE122D388B72EB36DC2B28192839E4DD6163A5D81312C14"
+       * but that breaks the keygrip.  The new value is recovered in
+       * the function _gcry_ecc_fill_in_curve.  See bug #4712.
+       */
+    },
+    {
+      /* (x^2 + y^2 = 1 + dx^2y^2) */
+      "Ed448", 448, 0,
+      MPI_EC_EDWARDS, ECC_DIALECT_SAFECURVE,
+      "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+      "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+      "0x01",
+      "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+      "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF6756",
+      "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+      "7CCA23E9C44EDB49AED63690216CC2728DC58F552378C292AB5844F3",
+      "0x4F1970C66BED0DED221D15A622BF36DA9E146570470F1767EA6DE324"
+      "A3D3A46412AE1AF72AB66511433B80E18B00938E2626A82BC70CC05E",
+      "0x693F46716EB6BC248876203756C9C7624BEA73736CA3984087789C1E"
+      "05A0C2D73AD3FF1CE67C39C4FDBD132C4ED7C8AD9808795BF230FA14",
+      4,
+    },
+    {
+      /* (y^2 = x^3 + 156326*x^2 + x) */
+      "X448", 448, 0,
+      MPI_EC_MONTGOMERY, ECC_DIALECT_SAFECURVE,
+      "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+      "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+      "0x98A9",
+      "0x01",
+      "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+      "7CCA23E9C44EDB49AED63690216CC2728DC58F552378C292AB5844F3",
+      "0x00000000000000000000000000000000000000000000000000000000"
+      "00000000000000000000000000000000000000000000000000000005",
+      "0x7D235D1295F5B1F66C98AB6E58326FCECBAE5D34F55545D060F75DC2"
+      "8DF3F6EDB8027E2346430D211312C4B150677AF76FD7223D457B5B1A",
+      4,
+    },
+#if 0 /* No real specs yet found.  */
+    {
+      /* x^2 + y^2 = 1 + 3617x^2y^2 mod 2^414 - 17 */
+      "Curve3617",
+      "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+      "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEF",
+      MPI_EC_EDWARDS, 0,
+      "0x01",
+      "0x0e21",
+      "0x07FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEB3CC92414CF"
+      "706022B36F1C0338AD63CF181B0E71A5E106AF79",
+      "0x1A334905141443300218C0631C326E5FCD46369F44C03EC7F57FF35498A4AB4D"
+      "6D6BA111301A73FAA8537C64C4FD3812F3CBC595",
+      "0x22",
+      8
+    },
+#endif /*0*/
+    {
+      "NIST P-192", 192, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xfffffffffffffffffffffffffffffffeffffffffffffffff",
+      "0xfffffffffffffffffffffffffffffffefffffffffffffffc",
+      "0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1",
+      "0xffffffffffffffffffffffff99def836146bc9b1b4d22831",
+
+      "0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012",
+      "0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811",
+      1
+    },
+    {
+      "NIST P-224", 224, 1,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xffffffffffffffffffffffffffffffff000000000000000000000001",
+      "0xfffffffffffffffffffffffffffffffefffffffffffffffffffffffe",
+      "0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4",
+      "0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d" ,
+
+      "0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21",
+      "0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34",
+      1
+    },
+    {
+      "NIST P-256", 256, 1,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff",
+      "0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc",
+      "0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b",
+      "0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551",
+
+      "0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296",
+      "0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5",
+      1
+    },
+    {
+      "NIST P-384", 384, 1,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+      "ffffffff0000000000000000ffffffff",
+      "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+      "ffffffff0000000000000000fffffffc",
+      "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875a"
+      "c656398d8a2ed19d2a85c8edd3ec2aef",
+      "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf"
+      "581a0db248b0a77aecec196accc52973",
+
+      "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a38"
+      "5502f25dbf55296c3a545e3872760ab7",
+      "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c0"
+      "0a60b1ce1d7e819d7a431d7c90ea0e5f",
+      1
+    },
+    {
+      "NIST P-521", 521, 1,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+      "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+      "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+      "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc",
+      "0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef10"
+      "9e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00",
+      "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+      "fffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409",
+
+      "0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d"
+      "3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66",
+      "0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e"
+      "662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650",
+      1
+    },
+
+    { "brainpoolP160r1", 160, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xe95e4a5f737059dc60dfc7ad95b3d8139515620f",
+      "0x340e7be2a280eb74e2be61bada745d97e8f7c300",
+      "0x1e589a8595423412134faa2dbdec95c8d8675e58",
+      "0xe95e4a5f737059dc60df5991d45029409e60fc09",
+      "0xbed5af16ea3f6a4f62938c4631eb5af7bdbcdbc3",
+      "0x1667cb477a1a8ec338f94741669c976316da6321",
+      1
+    },
+
+    { "brainpoolP192r1", 192, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xc302f41d932a36cda7a3463093d18db78fce476de1a86297",
+      "0x6a91174076b1e0e19c39c031fe8685c1cae040e5c69a28ef",
+      "0x469a28ef7c28cca3dc721d044f4496bcca7ef4146fbf25c9",
+      "0xc302f41d932a36cda7a3462f9e9e916b5be8f1029ac4acc1",
+      "0xc0a0647eaab6a48753b033c56cb0f0900a2f5c4853375fd6",
+      "0x14b690866abd5bb88b5f4828c1490002e6773fa2fa299b8f",
+      1
+    },
+
+    { "brainpoolP224r1", 224, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xd7c134aa264366862a18302575d1d787b09f075797da89f57ec8c0ff",
+      "0x68a5e62ca9ce6c1c299803a6c1530b514e182ad8b0042a59cad29f43",
+      "0x2580f63ccfe44138870713b1a92369e33e2135d266dbb372386c400b",
+      "0xd7c134aa264366862a18302575d0fb98d116bc4b6ddebca3a5a7939f",
+      "0x0d9029ad2c7e5cf4340823b2a87dc68c9e4ce3174c1e6efdee12c07d",
+      "0x58aa56f772c0726f24c6b89e4ecdac24354b9e99caa3f6d3761402cd",
+      1
+    },
+
+    { "brainpoolP256r1", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xa9fb57dba1eea9bc3e660a909d838d726e3bf623d52620282013481d1f6e5377",
+      "0x7d5a0975fc2c3057eef67530417affe7fb8055c126dc5c6ce94a4b44f330b5d9",
+      "0x26dc5c6ce94a4b44f330b5d9bbd77cbf958416295cf7e1ce6bccdc18ff8c07b6",
+      "0xa9fb57dba1eea9bc3e660a909d838d718c397aa3b561a6f7901e0e82974856a7",
+      "0x8bd2aeb9cb7e57cb2c4b482ffc81b7afb9de27e1e3bd23c23a4453bd9ace3262",
+      "0x547ef835c3dac4fd97f8461a14611dc9c27745132ded8e545c1d54c72f046997",
+      1
+    },
+
+    { "brainpoolP320r1", 320, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xd35e472036bc4fb7e13c785ed201e065f98fcfa6f6f40def4f92b9ec7893ec28"
+      "fcd412b1f1b32e27",
+      "0x3ee30b568fbab0f883ccebd46d3f3bb8a2a73513f5eb79da66190eb085ffa9f4"
+      "92f375a97d860eb4",
+      "0x520883949dfdbc42d3ad198640688a6fe13f41349554b49acc31dccd88453981"
+      "6f5eb4ac8fb1f1a6",
+      "0xd35e472036bc4fb7e13c785ed201e065f98fcfa5b68f12a32d482ec7ee8658e9"
+      "8691555b44c59311",
+      "0x43bd7e9afb53d8b85289bcc48ee5bfe6f20137d10a087eb6e7871e2a10a599c7"
+      "10af8d0d39e20611",
+      "0x14fdd05545ec1cc8ab4093247f77275e0743ffed117182eaa9c77877aaac6ac7"
+      "d35245d1692e8ee1",
+      1
+    },
+
+    { "brainpoolP384r1", 384, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b412b1da197fb71123"
+      "acd3a729901d1a71874700133107ec53",
+      "0x7bc382c63d8c150c3c72080ace05afa0c2bea28e4fb22787139165efba91f90f"
+      "8aa5814a503ad4eb04a8c7dd22ce2826",
+      "0x04a8c7dd22ce28268b39b55416f0447c2fb77de107dcd2a62e880ea53eeb62d5"
+      "7cb4390295dbc9943ab78696fa504c11",
+      "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b31f166e6cac0425a7"
+      "cf3ab6af6b7fc3103b883202e9046565",
+      "0x1d1c64f068cf45ffa2a63a81b7c13f6b8847a3e77ef14fe3db7fcafe0cbd10e8"
+      "e826e03436d646aaef87b2e247d4af1e",
+      "0x8abe1d7520f9c2a45cb1eb8e95cfd55262b70b29feec5864e19c054ff9912928"
+      "0e4646217791811142820341263c5315",
+      1
+    },
+
+    { "brainpoolP512r1", 512, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330871"
+      "7d4d9b009bc66842aecda12ae6a380e62881ff2f2d82c68528aa6056583a48f3",
+      "0x7830a3318b603b89e2327145ac234cc594cbdd8d3df91610a83441caea9863bc"
+      "2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a72bf2c7b9e7c1ac4d77fc94ca",
+      "0x3df91610a83441caea9863bc2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a7"
+      "2bf2c7b9e7c1ac4d77fc94cadc083e67984050b75ebae5dd2809bd638016f723",
+      "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330870"
+      "553e5c414ca92619418661197fac10471db1d381085ddaddb58796829ca90069",
+      "0x81aee4bdd82ed9645a21322e9c4c6a9385ed9f70b5d916c1b43b62eef4d0098e"
+      "ff3b1f78e2d0d48d50d1687b93b97d5f7c6d5047406a5e688b352209bcb9f822",
+      "0x7dde385d566332ecc0eabfa9cf7822fdf209f70024a57b1aa000c55b881f8111"
+      "b2dcde494a5f485e5bca4bd88a2763aed1ca2b2fa8f0540678cd1e0f3ad80892",
+      1
+    },
+    {
+      "GOST2001-test", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0x8000000000000000000000000000000000000000000000000000000000000431",
+      "0x0000000000000000000000000000000000000000000000000000000000000007",
+      "0x5fbff498aa938ce739b8e022fbafef40563f6e6a3472fc2a514c0ce9dae23b7e",
+      "0x8000000000000000000000000000000150fe8a1892976154c59cfc193accf5b3",
+
+      "0x0000000000000000000000000000000000000000000000000000000000000002",
+      "0x08e2a8a0e65147d4bd6316030e16d19c85c97f0a9ca267122b96abbcea7e8fc8",
+      1
+    },
+    {
+      "GOST2001-CryptoPro-A", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd97",
+      "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd94",
+      "0x00000000000000000000000000000000000000000000000000000000000000a6",
+      "0xffffffffffffffffffffffffffffffff6c611070995ad10045841b09b761b893",
+      "0x0000000000000000000000000000000000000000000000000000000000000001",
+      "0x8d91e471e0989cda27df505a453f2b7635294f2ddf23e3b122acc99c9e9f1e14",
+      1
+    },
+    {
+      "GOST2001-CryptoPro-B", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0x8000000000000000000000000000000000000000000000000000000000000c99",
+      "0x8000000000000000000000000000000000000000000000000000000000000c96",
+      "0x3e1af419a269a5f866a7d3c25c3df80ae979259373ff2b182f49d4ce7e1bbc8b",
+      "0x800000000000000000000000000000015f700cfff1a624e5e497161bcc8a198f",
+      "0x0000000000000000000000000000000000000000000000000000000000000001",
+      "0x3fa8124359f96680b83d1c3eb2c070e5c545c9858d03ecfb744bf8d717717efc",
+      1
+    },
+    {
+      "GOST2001-CryptoPro-C", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d759b",
+      "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d7598",
+      "0x000000000000000000000000000000000000000000000000000000000000805a",
+      "0x9b9f605f5a858107ab1ec85e6b41c8aa582ca3511eddfb74f02f3a6598980bb9",
+      "0x0000000000000000000000000000000000000000000000000000000000000000",
+      "0x41ece55743711a8c3cbf3783cd08c0ee4d4dc440d4641a8f366e550dfdb3bb67",
+      1
+    },
+    {
+      "GOST2012-256-A", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd97",
+      "0xc2173f1513981673af4892c23035a27ce25e2013bf95aa33b22c656f277e7335",
+      "0x295f9bae7428ed9ccc20e7c359a9d41a22fccd9108e17bf7ba9337a6f8ae9513",
+      "0x400000000000000000000000000000000fd8cddfc87b6635c115af556c360c67",
+      "0x91e38443a5e82c0d880923425712b2bb658b9196932e02c78b2582fe742daa28",
+      "0x32879423ab1a0375895786c4bb46e9565fde0b5344766740af268adb32322e5c",
+      4
+    },
+    {
+      "GOST2012-512-test", 511, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d"
+      "f1d852741af4704a0458047e80e4546d35b8336fac224dd81664bbf528be6373",
+      "0x0000000000000000000000000000000000000000000000000000000000000007",
+      "0x1cff0806a31116da29d8cfa54e57eb748bc5f377e49400fdd788b649eca1ac4"
+      "361834013b2ad7322480a89ca58e0cf74bc9e540c2add6897fad0a3084f302adc",
+      "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d"
+      "a82f2d7ecb1dbac719905c5eecc423f1d86e25edbe23c595d644aaf187e6e6df",
+
+      "0x24d19cc64572ee30f396bf6ebbfd7a6c5213b3b3d7057cc825f91093a68cd762"
+      "fd60611262cd838dc6b60aa7eee804e28bc849977fac33b4b530f1b120248a9a",
+      "0x2bb312a43bd2ce6e0d020613c857acddcfbf061e91e5f2c3f32447c259f39b2"
+      "c83ab156d77f1496bf7eb3351e1ee4e43dc1a18b91b24640b6dbb92cb1add371e",
+      1
+    },
+    {
+      "GOST2012-512-tc26-A", 512, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+        "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc7",
+      "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+        "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc4",
+      "0xe8c2505dedfc86ddc1bd0b2b6667f1da34b82574761cb0e879bd081cfd0b6265"
+        "ee3cb090f30d27614cb4574010da90dd862ef9d4ebee4761503190785a71c760",
+      "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+        "27e69532f48d89116ff22b8d4e0560609b4b38abfad2b85dcacdb1411f10b275",
+      "0x0000000000000000000000000000000000000000000000000000000000000000"
+        "0000000000000000000000000000000000000000000000000000000000000003",
+      "0x7503cfe87a836ae3a61b8816e25450e6ce5e1c93acf1abc1778064fdcbefa921"
+        "df1626be4fd036e93d75e6a50e3a41e98028fe5fc235f5b889a589cb5215f2a4",
+      1
+    },
+    {
+      "GOST2012-512-tc26-B", 512, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0x8000000000000000000000000000000000000000000000000000000000000000"
+        "000000000000000000000000000000000000000000000000000000000000006f",
+      "0x8000000000000000000000000000000000000000000000000000000000000000"
+        "000000000000000000000000000000000000000000000000000000000000006c",
+      "0x687d1b459dc841457e3e06cf6f5e2517b97c7d614af138bcbf85dc806c4b289f"
+        "3e965d2db1416d217f8b276fad1ab69c50f78bee1fa3106efb8ccbc7c5140116",
+      "0x8000000000000000000000000000000000000000000000000000000000000001"
+        "49a1ec142565a545acfdb77bd9d40cfa8b996712101bea0ec6346c54374f25bd",
+      "0x0000000000000000000000000000000000000000000000000000000000000000"
+        "0000000000000000000000000000000000000000000000000000000000000002",
+      "0x1a8f7eda389b094c2c071e3647a8940f3c123b697578c213be6dd9e6c8ec7335"
+        "dcb228fd1edf4a39152cbcaaf8c0398828041055f94ceeec7e21340780fe41bd",
+      1
+    },
+    {
+      "GOST2012-512-tc26-C", 512, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+        "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc7",
+      "0xdc9203e514a721875485a529d2c722fb187bc8980eb866644de41c68e1430645"
+        "46e861c0e2c9edd92ade71f46fcf50ff2ad97f951fda9f2a2eb6546f39689bd3",
+      "0xb4c4ee28cebc6c2c8ac12952cf37f16ac7efb6a9f69f4b57ffda2e4f0de5ade0"
+        "38cbc2fff719d2c18de0284b8bfef3b52b8cc7a5f5bf0a3c8d2319a5312557e1",
+      "0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+        "c98cdba46506ab004c33a9ff5147502cc8eda9e7a769a12694623cef47f023ed",
+      "0xe2e31edfc23de7bdebe241ce593ef5de2295b7a9cbaef021d385f7074cea043a"
+        "a27272a7ae602bf2a7b9033db9ed3610c6fb85487eae97aac5bc7928c1950148",
+      "0xf5ce40d95b5eb899abbccff5911cb8577939804d6527378b8c108c3d2090ff9be"
+        "18e2d33e3021ed2ef32d85822423b6304f726aa854bae07d0396e9a9addc40f",
+      4
+    },
+
+    {
+      "secp256k1", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F",
+      "0x0000000000000000000000000000000000000000000000000000000000000000",
+      "0x0000000000000000000000000000000000000000000000000000000000000007",
+      "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141",
+      "0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798",
+      "0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8",
+      1
+    },
+
+    {
+      "sm2p256v1", 256, 0,
+      MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD,
+      "0xfffffffeffffffffffffffffffffffffffffffff00000000ffffffffffffffff",
+      "0xfffffffeffffffffffffffffffffffffffffffff00000000fffffffffffffffc",
+      "0x28e9fa9e9d9f5e344d5a9e4bcf6509a7f39789f515ab8f92ddbcbd414d940e93",
+      "0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54123",
+      "0x32c4ae2c1f1981195f9904466a39c9948fe30bbff2660be1715a4589334c74c7",
+      "0xbc3736a2f4f6779c59bdcee36b692153d0a9877cc62a474002df32e52139f0a0",
+      1
+    },
+
+    { NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL }
+  };
+
+
+
+
+/* Return a copy of POINT.  */
+static gcry_mpi_point_t
+point_copy (gcry_mpi_point_t point)
+{
+  gcry_mpi_point_t newpoint;
+
+  if (point)
+    {
+      newpoint = mpi_point_new (0);
+      point_set (newpoint, point);
+    }
+  else
+    newpoint = NULL;
+  return newpoint;
+}
+
+
+/* Helper to scan a hex string. */
+static gcry_mpi_t
+scanval (const char *string)
+{
+  gpg_err_code_t rc;
+  gcry_mpi_t val;
+
+  rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL);
+  if (rc)
+    log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc));
+  return val;
+}
+
+
+/* Return the index of the domain_parms table for a curve with NAME.
+   Return -1 if not found.  */
+static int
+find_domain_parms_idx (const char *name)
+{
+  int idx, aliasno;
+
+  /* First check our native curves.  */
+  for (idx = 0; domain_parms[idx].desc; idx++)
+    if (!strcmp (name, domain_parms[idx].desc))
+      return idx;
+
+  /* If not found consult the alias table.  */
+  if (!domain_parms[idx].desc)
+    {
+      for (aliasno = 0; curve_aliases[aliasno].name; aliasno++)
+        if (!strcmp (name, curve_aliases[aliasno].other))
+          break;
+      if (curve_aliases[aliasno].name)
+        {
+          for (idx = 0; domain_parms[idx].desc; idx++)
+            if (!strcmp (curve_aliases[aliasno].name, domain_parms[idx].desc))
+              return idx;
+        }
+    }
+
+  return -1;
+}
+
+
+/* Generate the crypto system setup.  This function takes the NAME of
+   a curve or the desired number of bits and stores at R_CURVE the
+   parameters of the named curve or those of a suitable curve.  If
+   R_NBITS is not NULL, the chosen number of bits is stored there.
+   NULL may be given for R_CURVE, if the value is not required and for
+   example only a quick test for availability is desired.  Note that
+   the curve fields should be initialized to zero because fields which
+   are not NULL are skipped.  */
+gpg_err_code_t
+_gcry_ecc_fill_in_curve (unsigned int nbits, const char *name,
+                         elliptic_curve_t *curve, unsigned int *r_nbits)
+{
+  int idx;
+  const char *resname = NULL; /* Set to a found curve name.  */
+
+  if (name)
+    idx = find_domain_parms_idx (name);
+  else
+    {
+      for (idx = 0; domain_parms[idx].desc; idx++)
+        if (nbits == domain_parms[idx].nbits
+            && domain_parms[idx].model == MPI_EC_WEIERSTRASS)
+          break;
+      if (!domain_parms[idx].desc)
+        idx = -1;
+    }
+  if (idx < 0)
+    return GPG_ERR_UNKNOWN_CURVE;
+
+  resname = domain_parms[idx].desc;
+
+  /* In fips mode we only support NIST curves.  Note that it is
+     possible to bypass this check by specifying the curve parameters
+     directly.  */
+  if (fips_mode () && !domain_parms[idx].fips )
+    return GPG_ERR_NOT_SUPPORTED;
+
+  switch (domain_parms[idx].model)
+    {
+    case MPI_EC_WEIERSTRASS:
+    case MPI_EC_EDWARDS:
+    case MPI_EC_MONTGOMERY:
+      break;
+    default:
+      return GPG_ERR_BUG;
+    }
+
+
+  if (r_nbits)
+    *r_nbits = domain_parms[idx].nbits;
+
+  if (curve)
+    {
+      curve->model = domain_parms[idx].model;
+      curve->dialect = domain_parms[idx].dialect;
+      if (!curve->p)
+        curve->p = scanval (domain_parms[idx].p);
+      if (!curve->a)
+        {
+          curve->a = scanval (domain_parms[idx].a);
+          if (curve->a->sign)
+            {
+              mpi_resize (curve->a, curve->p->nlimbs);
+              _gcry_mpih_sub_n (curve->a->d, curve->p->d,
+                                curve->a->d, curve->p->nlimbs);
+              curve->a->nlimbs = curve->p->nlimbs;
+              curve->a->sign = 0;
+            }
+        }
+      if (!curve->b)
+        {
+          curve->b = scanval (domain_parms[idx].b);
+          if (curve->b->sign)
+            {
+              mpi_resize (curve->b, curve->p->nlimbs);
+              _gcry_mpih_sub_n (curve->b->d, curve->p->d,
+                                curve->b->d, curve->p->nlimbs);
+              curve->b->nlimbs = curve->p->nlimbs;
+              curve->b->sign = 0;
+            }
+        }
+      if (!curve->n)
+        curve->n = scanval (domain_parms[idx].n);
+      if (!curve->G.x)
+        curve->G.x = scanval (domain_parms[idx].g_x);
+      if (!curve->G.y)
+        curve->G.y = scanval (domain_parms[idx].g_y);
+      curve->h = domain_parms[idx].h;
+
+      /*
+       * In the constants of domain_parms, we defined Curve25519
+       * domain parameters as the ones in RFC-7748 before the errata
+       * (eid4730).  To keep the computation having exact same values,
+       * we recover the new value of g_y, here.
+       */
+      if (!strcmp (resname, "Curve25519"))
+        mpi_sub (curve->G.y, curve->p, curve->G.y);
+
+      if (!curve->G.z)
+        curve->G.z = mpi_alloc_set_ui (1);
+      if (!curve->name)
+        curve->name = resname;
+    }
+
+  return 0;
+}
+
+
+/* Give the name of the curve NAME, store the curve parameters into P,
+   A, B, G, and N if they point to NULL value.  Note that G is
+   returned in standard uncompressed format.  Also update MODEL and
+   DIALECT if they are not NULL. */
+gpg_err_code_t
+_gcry_ecc_update_curve_param (const char *name,
+                              enum gcry_mpi_ec_models *model,
+                              enum ecc_dialects *dialect,
+                              gcry_mpi_t *p, gcry_mpi_t *a, gcry_mpi_t *b,
+                              gcry_mpi_t *g, gcry_mpi_t *n)
+{
+  int idx;
+
+  idx = find_domain_parms_idx (name);
+  if (idx < 0)
+    return GPG_ERR_UNKNOWN_CURVE;
+
+  if (g)
+    {
+      char *buf;
+      size_t len;
+
+      len = 4;
+      len += strlen (domain_parms[idx].g_x+2);
+      len += strlen (domain_parms[idx].g_y+2);
+      len++;
+      buf = xtrymalloc (len);
+      if (!buf)
+        return gpg_err_code_from_syserror ();
+      strcpy (stpcpy (stpcpy (buf, "0x04"), domain_parms[idx].g_x+2),
+              domain_parms[idx].g_y+2);
+      _gcry_mpi_release (*g);
+      *g = scanval (buf);
+      xfree (buf);
+    }
+  if (model)
+    *model = domain_parms[idx].model;
+  if (dialect)
+    *dialect = domain_parms[idx].dialect;
+  if (p)
+    {
+      _gcry_mpi_release (*p);
+      *p = scanval (domain_parms[idx].p);
+    }
+  if (a)
+    {
+      _gcry_mpi_release (*a);
+      *a = scanval (domain_parms[idx].a);
+    }
+  if (b)
+    {
+      _gcry_mpi_release (*b);
+      *b = scanval (domain_parms[idx].b);
+    }
+  if (n)
+    {
+      _gcry_mpi_release (*n);
+      *n = scanval (domain_parms[idx].n);
+    }
+  return 0;
+}
+
+
+/* Return the name matching the parameters in PKEY.  This works only
+   with curves described by the Weierstrass equation. */
+const char *
+_gcry_ecc_get_curve (gcry_sexp_t keyparms, int iterator, unsigned int *r_nbits)
+{
+  gpg_err_code_t rc;
+  const char *result = NULL;
+  elliptic_curve_t E;
+  gcry_mpi_point_t G = NULL;
+  gcry_mpi_t tmp = NULL;
+  int idx;
+
+  memset (&E, 0, sizeof E);
+
+  if (r_nbits)
+    *r_nbits = 0;
+
+  if (!keyparms)
+    {
+      idx = iterator;
+      if (idx >= 0 && idx < DIM (domain_parms))
+        {
+          result = domain_parms[idx].desc;
+          if (r_nbits)
+            *r_nbits = domain_parms[idx].nbits;
+        }
+      return result;
+    }
+
+
+  /*
+   * Extract the curve parameters..
+   */
+  rc = gpg_err_code (sexp_extract_param (keyparms, NULL, "pabn",
+                                         &E.p, &E.a, &E.b, &E.n, NULL));
+  if (rc == GPG_ERR_NO_OBJ)
+    {
+      /* This might be the second use case of checking whether a
+         specific curve given by name is supported.  */
+      gcry_sexp_t l1;
+      char *name;
+
+      l1 = sexp_find_token (keyparms, "curve", 5);
+      if (!l1)
+        goto leave;  /* No curve name parameter.  */
+
+      name = sexp_nth_string (l1, 1);
+      sexp_release (l1);
+      if (!name)
+        goto leave;  /* Name missing or out of core. */
+
+      idx = find_domain_parms_idx (name);
+      xfree (name);
+      if (idx >= 0)  /* Curve found.  */
+        {
+          result = domain_parms[idx].desc;
+          if (r_nbits)
+            *r_nbits = domain_parms[idx].nbits;
+        }
+      return result;
+    }
+
+  if (rc)
+    goto leave;
+
+  rc = point_from_keyparam (&G, keyparms, "g", NULL);
+  if (rc)
+    goto leave;
+
+  _gcry_mpi_point_init (&E.G);
+  _gcry_mpi_point_set (&E.G, G->x, G->y, G->z);
+
+  for (idx = 0; domain_parms[idx].desc; idx++)
+    {
+      mpi_free (tmp);
+      tmp = scanval (domain_parms[idx].p);
+      if (mpi_cmp (tmp, E.p))
+        continue;
+
+      mpi_free (tmp);
+      tmp = scanval (domain_parms[idx].a);
+      if (tmp->sign)
+        {
+          if (!mpi_cmpabs (tmp, E.a))
+            /* For backward compatibility to <= libgcrypt 1.8, we
+               allow this match to support existing keys in SEXP.  */
+            ;
+          else
+            {
+              mpi_resize (tmp, E.p->nlimbs);
+              _gcry_mpih_sub_n (tmp->d, E.p->d,
+                                tmp->d, E.p->nlimbs);
+              tmp->nlimbs = E.p->nlimbs;
+              tmp->sign = 0;
+              if (mpi_cmp (tmp, E.a))
+                continue;
+            }
+        }
+      else if (mpi_cmp (tmp, E.a))
+        continue;
+
+      mpi_free (tmp);
+      tmp = scanval (domain_parms[idx].b);
+      if (tmp->sign)
+        {
+          if (!mpi_cmpabs (tmp, E.b))
+            /* Same for backward compatibility, see above.  */
+            ;
+          else
+            {
+              mpi_resize (tmp, E.p->nlimbs);
+              _gcry_mpih_sub_n (tmp->d, E.p->d,
+                                tmp->d, E.p->nlimbs);
+              tmp->nlimbs = E.p->nlimbs;
+              tmp->sign = 0;
+              if (mpi_cmp (tmp, E.b))
+                continue;
+            }
+        }
+      else if (mpi_cmp (tmp, E.b))
+        continue;
+
+      mpi_free (tmp);
+      tmp = scanval (domain_parms[idx].n);
+      if (mpi_cmp (tmp, E.n))
+        continue;
+
+      mpi_free (tmp);
+      tmp = scanval (domain_parms[idx].g_x);
+      if (mpi_cmp (tmp, E.G.x))
+        continue;
+
+      mpi_free (tmp);
+      tmp = scanval (domain_parms[idx].g_y);
+      if (mpi_cmp (tmp, E.G.y))
+        continue;
+
+      result = domain_parms[idx].desc;
+      if (r_nbits)
+        *r_nbits = domain_parms[idx].nbits;
+      break;
+    }
+
+ leave:
+  _gcry_mpi_point_release (G);
+  _gcry_mpi_release (tmp);
+  _gcry_mpi_release (E.p);
+  _gcry_mpi_release (E.a);
+  _gcry_mpi_release (E.b);
+  _gcry_mpi_point_free_parts (&E.G);
+  _gcry_mpi_release (E.n);
+  return result;
+}
+
+
+/* Helper to extract an MPI from key parameters.  */
+static gpg_err_code_t
+mpi_from_keyparam (gcry_mpi_t *r_a, gcry_sexp_t keyparam, const char *name,
+                   int opaque)
+{
+  gcry_err_code_t ec = 0;
+  gcry_sexp_t l1;
+
+  l1 = sexp_find_token (keyparam, name, 0);
+  if (l1)
+    {
+      *r_a = sexp_nth_mpi (l1, 1, opaque? GCRYMPI_FMT_OPAQUE : 
GCRYMPI_FMT_USG);
+      sexp_release (l1);
+      if (!*r_a)
+        ec = GPG_ERR_INV_OBJ;
+    }
+  return ec;
+}
+
+/* Helper to extract a point from key parameters.  If no parameter
+   with NAME is found, the functions tries to find a non-encoded point
+   by appending ".x", ".y" and ".z" to NAME.  ".z" is in this case
+   optional and defaults to 1.  EC is the context which at this point
+   may not be fully initialized. */
+static gpg_err_code_t
+point_from_keyparam (gcry_mpi_point_t *r_a,
+                     gcry_sexp_t keyparam, const char *name, mpi_ec_t ec)
+{
+  gcry_err_code_t rc;
+  gcry_sexp_t l1;
+  gcry_mpi_point_t point;
+
+  l1 = sexp_find_token (keyparam, name, 0);
+  if (l1)
+    {
+      gcry_mpi_t a;
+
+      a = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_OPAQUE);
+      sexp_release (l1);
+      if (!a)
+        return GPG_ERR_INV_OBJ;
+
+      point = mpi_point_new (0);
+      rc = _gcry_mpi_ec_decode_point (point, a, ec);
+      mpi_free (a);
+      if (rc)
+        {
+          mpi_point_release (point);
+          return rc;
+        }
+    }
+  else
+    {
+      char *tmpname;
+      gcry_mpi_t x = NULL;
+      gcry_mpi_t y = NULL;
+      gcry_mpi_t z = NULL;
+
+      tmpname = xtrymalloc (strlen (name) + 2 + 1);
+      if (!tmpname)
+        return gpg_err_code_from_syserror ();
+      strcpy (stpcpy (tmpname, name), ".x");
+      rc = mpi_from_keyparam (&x, keyparam, tmpname, 0);
+      if (rc)
+        {
+          xfree (tmpname);
+          return rc;
+        }
+      strcpy (stpcpy (tmpname, name), ".y");
+      rc = mpi_from_keyparam (&y, keyparam, tmpname, 0);
+      if (rc)
+        {
+          mpi_free (x);
+          xfree (tmpname);
+          return rc;
+        }
+      strcpy (stpcpy (tmpname, name), ".z");
+      rc = mpi_from_keyparam (&z, keyparam, tmpname, 0);
+      if (rc)
+        {
+          mpi_free (y);
+          mpi_free (x);
+          xfree (tmpname);
+          return rc;
+        }
+      if (!z)
+        z = mpi_set_ui (NULL, 1);
+      if (x && y)
+        point = mpi_point_snatch_set (NULL, x, y, z);
+      else
+        {
+          mpi_free (x);
+          mpi_free (y);
+          mpi_free (z);
+          point = NULL;
+        }
+      xfree (tmpname);
+    }
+
+  if (point)
+    *r_a = point;
+  return 0;
+}
+
+
+
+static gpg_err_code_t
+mpi_ec_get_elliptic_curve (elliptic_curve_t *E, int *r_flags,
+                           gcry_sexp_t keyparam, const char *curvename)
+{
+  gpg_err_code_t errc;
+  unsigned int nbits;
+  gcry_sexp_t l1;
+
+  errc = _gcry_pk_util_get_nbits (keyparam, &nbits);
+  if (errc)
+    return errc;
+
+  E->model = MPI_EC_WEIERSTRASS;
+  E->dialect = ECC_DIALECT_STANDARD;
+  E->h = 1;
+
+  if (keyparam)
+    {
+      /* Parse an optional flags list.  */
+      l1 = sexp_find_token (keyparam, "flags", 0);
+      if (l1)
+        {
+          int flags = 0;
+
+          errc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+          sexp_release (l1);
+          l1 = NULL;
+          if (errc)
+            goto leave;
+
+          *r_flags |= flags;
+        }
+
+      /* Parse the deprecated optional transient-key flag.  */
+      l1 = sexp_find_token (keyparam, "transient-key", 0);
+      if (l1)
+        {
+          *r_flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+          sexp_release (l1);
+        }
+
+      /* Check whether a curve name was given.  */
+      l1 = sexp_find_token (keyparam, "curve", 5);
+
+      /* If we don't have a curve name or if override parameters have
+         explicitly been requested, parse them.  */
+      if (!l1 || (*r_flags & PUBKEY_FLAG_PARAM))
+        {
+          gcry_mpi_point_t G = NULL;
+          gcry_mpi_t cofactor = NULL;
+
+          errc = mpi_from_keyparam (&E->p, keyparam, "p", 0);
+          if (errc)
+            goto leave;
+          errc = mpi_from_keyparam (&E->a, keyparam, "a", 0);
+          if (errc)
+            goto leave;
+          errc = mpi_from_keyparam (&E->b, keyparam, "b", 0);
+          if (errc)
+            goto leave;
+          errc = point_from_keyparam (&G, keyparam, "g", NULL);
+          if (errc)
+            goto leave;
+          if (G)
+            {
+              _gcry_mpi_point_init (&E->G);
+              mpi_point_set (&E->G, G->x, G->y, G->z);
+              mpi_point_set (G, NULL, NULL, NULL);
+              mpi_point_release (G);
+            }
+          errc = mpi_from_keyparam (&E->n, keyparam, "n", 0);
+          if (errc)
+            goto leave;
+          errc = mpi_from_keyparam (&cofactor, keyparam, "h", 0);
+          if (errc)
+            goto leave;
+          if (cofactor)
+            {
+              mpi_get_ui (&E->h, cofactor);
+              mpi_free (cofactor);
+            }
+        }
+    }
+  else
+    l1 = NULL; /* No curvename.  */
+
+  /* Check whether a curve parameter is available and use that to fill
+     in missing values.  If no curve parameter is available try an
+     optional provided curvename.  If only the curvename has been
+     given use that one. */
+  if (l1 || curvename || nbits)
+    {
+      char *name;
+
+      if (l1)
+        {
+          name = sexp_nth_string (l1, 1);
+          sexp_release (l1);
+          if (!name)
+            {
+              errc = GPG_ERR_INV_OBJ; /* Name missing or out of core. */
+              goto leave;
+            }
+        }
+      else
+        name = NULL;
+
+      errc = _gcry_ecc_fill_in_curve (nbits, name? name : curvename, E, NULL);
+      xfree (name);
+      if (errc)
+        goto leave;
+    }
+
+ leave:
+  return errc;
+}
+
+static gpg_err_code_t
+mpi_ec_setup_elliptic_curve (mpi_ec_t ec, int flags,
+                             elliptic_curve_t *E, gcry_sexp_t keyparam)
+{
+  gpg_err_code_t errc = 0;
+
+  ec->G = mpi_point_snatch_set (NULL, E->G.x, E->G.y, E->G.z);
+  E->G.x = NULL;
+  E->G.y = NULL;
+  E->G.z = NULL;
+  ec->n = E->n;
+  E->n = NULL;
+  ec->h = E->h;
+  ec->name = E->name;
+
+  /* Now that we know the curve name we can look for the public key
+     Q.  point_from_keyparam needs to know the curve parameters so
+     that it is able to use the correct decompression.  Parsing
+     the private key D could have been done earlier but it is less
+     surprising if we do it here as well.  */
+  if (keyparam)
+    {
+      int is_opaque_bytes = ((ec->dialect == ECC_DIALECT_ED25519
+                              && (flags & PUBKEY_FLAG_EDDSA))
+                             || (ec->dialect == ECC_DIALECT_SAFECURVE));
+
+      errc = point_from_keyparam (&ec->Q, keyparam, "q", ec);
+      if (errc)
+        return errc;
+      errc = mpi_from_keyparam (&ec->d, keyparam, "d", is_opaque_bytes);
+
+      /* Size of opaque bytes should match size of P.  */
+      if (!errc && ec->d && is_opaque_bytes)
+        {
+          unsigned int n = mpi_get_nbits (ec->d);
+          unsigned int len;
+
+          len = (ec->nbits+7)/8;
+          /* EdDSA requires additional bit for sign.  */
+          if ((ec->nbits%8) == 0 && ec->model == MPI_EC_EDWARDS)
+            len++;
+
+          if ((n+7)/8 != len)
+            {
+              if (ec->dialect == ECC_DIALECT_ED25519)
+                {
+                  /*
+                   * GnuPG (<= 2.2) or OpenPGP implementations with no
+                   * SOS support may remove zeros at the beginning.
+                   * Recover those zeros.
+                   */
+                  /*
+                   * Also, GnuPG (<= 2.2) may add additional zero at
+                   * the beginning, when private key is moved from
+                   * OpenPGP to gpg-agent.  Remove such a zero-prefix.
+                   */
+                  const unsigned char *buf;
+                  unsigned char *value;
+
+                  buf = mpi_get_opaque (ec->d, &n);
+                  if (!buf)
+                    return GPG_ERR_INV_OBJ;
+
+                  value = xtrymalloc_secure (len);
+                  if (!value)
+                    return gpg_err_code_from_syserror ();
+
+                  if ((n+7)/8 < len)
+                    /* Recover zeros.  */
+                    {
+                      memset (value, 0, len - (n+7)/8);
+                      memcpy (value + len - (n+7)/8, buf, (n+7)/8);
+                    }
+                  else if ((n+7)/8 == len + 1)
+                    /* Remove a zero.  */
+                    memcpy (value, buf+1, len);
+                  else
+                    {
+                      xfree (value);
+                      return GPG_ERR_INV_OBJ;
+                    }
+
+                  mpi_set_opaque (ec->d, value, len*8);
+                }
+              else
+                {
+                  if (DBG_CIPHER)
+                    log_debug ("scalar size (%d) != prime size (%d)",
+                               (n+7)/8, len);
+
+                  errc = GPG_ERR_INV_OBJ;
+                }
+            }
+        }
+    }
+
+  return errc;
+}
+
+gpg_err_code_t
+_gcry_mpi_ec_internal_new (mpi_ec_t *r_ec, int *r_flags, const char *name_op,
+                           gcry_sexp_t keyparam, const char *curvename)
+{
+  gpg_err_code_t errc;
+  elliptic_curve_t E;
+  mpi_ec_t ec;
+
+  *r_ec = NULL;
+
+  memset (&E, 0, sizeof E);
+  errc = mpi_ec_get_elliptic_curve (&E, r_flags, keyparam, curvename);
+  if (errc)
+    goto leave;
+
+  ec = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, *r_flags,
+                                    E.p, E.a, E.b);
+  if (!ec)
+    goto leave;
+
+  errc = mpi_ec_setup_elliptic_curve (ec, *r_flags, &E, keyparam);
+  if (errc)
+    {
+      _gcry_mpi_ec_free (ec);
+      goto leave;
+    }
+  else
+    *r_ec = ec;
+
+  if (!errc && DBG_CIPHER)
+    {
+      gcry_mpi_t mpi_q = NULL;
+      gcry_sexp_t l1;
+      char msg[80];
+
+      l1 = sexp_find_token (keyparam, "q", 0);
+      if (l1)
+        {
+          mpi_q = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_OPAQUE);
+          sexp_release (l1);
+        }
+
+      log_debug ("%s info: %s/%s%s\n", name_op,
+                 _gcry_ecc_model2str (ec->model),
+                 _gcry_ecc_dialect2str (ec->dialect),
+                 (*r_flags & PUBKEY_FLAG_EDDSA)? "+EdDSA" : "");
+      if (ec->name)
+        log_debug  ("%s name: %s\n", name_op, ec->name);
+      snprintf (msg, sizeof msg, "%s    p", name_op);
+      log_printmpi (msg, ec->p);
+      snprintf (msg, sizeof msg, "%s    a", name_op);
+      log_printmpi (msg, ec->a);
+      snprintf (msg, sizeof msg, "%s    b", name_op);
+      log_printmpi (msg, ec->b);
+      snprintf (msg, sizeof msg, "%s  g", name_op);
+      log_printpnt (msg, ec->G, NULL);
+      snprintf (msg, sizeof msg, "%s    n", name_op);
+      log_printmpi (msg, ec->n);
+      log_debug ("%s    h:+%02x\n", name_op, ec->h);
+      if (mpi_q)
+        {
+          snprintf (msg, sizeof msg, "%s    q", name_op);
+          log_printmpi (msg, mpi_q);
+          mpi_free (mpi_q);
+        }
+      if (!fips_mode () && ec->d)
+        {
+          snprintf (msg, sizeof msg, "%s    d", name_op);
+          log_printmpi (msg, ec->d);
+        }
+    }
+
+ leave:
+  _gcry_ecc_curve_free (&E);
+  return errc;
+}
+
+/* This function creates a new context for elliptic curve operations.
+   Either KEYPARAM or CURVENAME must be given.  If both are given and
+   KEYPARAM has no curve parameter, CURVENAME is used to add missing
+   parameters.  On success 0 is returned and the new context stored at
+   R_CTX.  On error NULL is stored at R_CTX and an error code is
+   returned.  The context needs to be released using
+   gcry_ctx_release.  */
+gpg_err_code_t
+_gcry_mpi_ec_new (gcry_ctx_t *r_ctx,
+                  gcry_sexp_t keyparam, const char *curvename)
+{
+  gpg_err_code_t errc;
+  elliptic_curve_t E;
+  gcry_ctx_t ctx = NULL;
+  int flags = 0;
+  mpi_ec_t ec;
+
+  *r_ctx = NULL;
+
+  memset (&E, 0, sizeof E);
+  errc = mpi_ec_get_elliptic_curve (&E, &flags, keyparam, curvename);
+  if (errc)
+    goto leave;
+
+  errc = _gcry_mpi_ec_p_new (&ctx, E.model, E.dialect, flags, E.p, E.a, E.b);
+  if (errc)
+    goto leave;
+
+  ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+  errc = mpi_ec_setup_elliptic_curve (ec, flags, &E, keyparam);
+  if (errc)
+    goto leave;
+
+  *r_ctx = ctx;
+  ctx = NULL;
+
+ leave:
+  _gcry_ecc_curve_free (&E);
+  _gcry_ctx_release (ctx);
+  return errc;
+}
+
+
+/* Return the parameters of the curve NAME as an S-expression.  */
+gcry_sexp_t
+_gcry_ecc_get_param_sexp (const char *name)
+{
+  elliptic_curve_t E;
+  gcry_mpi_t pkey[5];
+  gcry_sexp_t result;
+
+  memset (&E, 0, sizeof E);
+  if (_gcry_ecc_fill_in_curve (0, name, &E, NULL))
+    return NULL;
+
+  pkey[0] = E.p;
+  pkey[1] = E.a;
+  pkey[2] = E.b;
+  pkey[3] = _gcry_ecc_ec2os (E.G.x, E.G.y, E.p);
+  pkey[4] = E.n;
+
+  if (sexp_build (&result, NULL,
+                  "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)))",
+                  pkey[0], pkey[1], pkey[2], pkey[3], pkey[4], E.h))
+    result = NULL;
+
+  _gcry_ecc_curve_free (&E);
+  _gcry_mpi_release (pkey[3]);
+
+  return result;
+}
+
+
+/* Return an MPI (or opaque MPI) described by NAME and the context EC.
+   If COPY is true a copy is returned, if not a const MPI may be
+   returned.  In any case mpi_free must be used.  */
+gcry_mpi_t
+_gcry_ecc_get_mpi (const char *name, mpi_ec_t ec, int copy)
+{
+  if (!*name)
+    return NULL;
+
+  if (!strcmp (name, "p") && ec->p)
+    return mpi_is_const (ec->p) && !copy? ec->p : mpi_copy (ec->p);
+  if (!strcmp (name, "a") && ec->a)
+    return mpi_is_const (ec->a) && !copy? ec->a : mpi_copy (ec->a);
+  if (!strcmp (name, "b") && ec->b)
+    return mpi_is_const (ec->b) && !copy? ec->b : mpi_copy (ec->b);
+  if (!strcmp (name, "n") && ec->n)
+    return mpi_is_const (ec->n) && !copy? ec->n : mpi_copy (ec->n);
+  if (!strcmp (name, "h"))
+    {
+      gcry_mpi_t h = _gcry_mpi_get_const (ec->h);
+
+      return !copy? h : mpi_set (NULL, h);
+    }
+  if (!strcmp (name, "d") && ec->d)
+    return mpi_is_const (ec->d) && !copy? ec->d : mpi_copy (ec->d);
+
+  /* Return a requested point coordinate.  */
+  if (!strcmp (name, "g.x") && ec->G && ec->G->x)
+    return mpi_is_const (ec->G->x) && !copy? ec->G->x : mpi_copy (ec->G->x);
+  if (!strcmp (name, "g.y") && ec->G && ec->G->y)
+    return mpi_is_const (ec->G->y) && !copy? ec->G->y : mpi_copy (ec->G->y);
+  if (!strcmp (name, "q.x") && ec->Q && ec->Q->x)
+    return mpi_is_const (ec->Q->x) && !copy? ec->Q->x : mpi_copy (ec->Q->x);
+  if (!strcmp (name, "q.y") && ec->Q && ec->Q->y)
+    return mpi_is_const (ec->Q->y) && !copy? ec->Q->y : mpi_copy (ec->Q->y);
+
+  /* If the base point has been requested, return it in standard
+     encoding.  */
+  if (!strcmp (name, "g") && ec->G)
+    return _gcry_mpi_ec_ec2os (ec->G, ec);
+
+  /* If the public key has been requested, return it by default in
+     standard uncompressed encoding or if requested in other
+     encodings.  */
+  if (*name == 'q' && (!name[1] || name[1] == '@'))
+    {
+      /* If only the private key is given, compute the public key.  */
+      if (!ec->Q)
+        ec->Q = _gcry_ecc_compute_public (NULL, ec);
+
+      if (!ec->Q)
+        return NULL;
+
+      if (name[1] != '@')
+        return _gcry_mpi_ec_ec2os (ec->Q, ec);
+
+      if (!strcmp (name+2, "eddsa") && ec->model == MPI_EC_EDWARDS)
+        {
+          unsigned char *encpk;
+          unsigned int encpklen;
+
+          if (!_gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0,
+                                            &encpk, &encpklen))
+            return mpi_set_opaque (NULL, encpk, encpklen*8);
+        }
+    }
+
+  return NULL;
+}
+
+
+/* Return a point described by NAME and the context EC.  */
+gcry_mpi_point_t
+_gcry_ecc_get_point (const char *name, mpi_ec_t ec)
+{
+  if (!strcmp (name, "g") && ec->G)
+    return point_copy (ec->G);
+  if (!strcmp (name, "q"))
+    {
+      /* If only the private key is given, compute the public key.  */
+      if (!ec->Q)
+        ec->Q = _gcry_ecc_compute_public (NULL, ec);
+
+      if (ec->Q)
+        return point_copy (ec->Q);
+    }
+
+  return NULL;
+}
+
+
+/* Store the MPI NEWVALUE into the context EC under NAME. */
+gpg_err_code_t
+_gcry_ecc_set_mpi (const char *name, gcry_mpi_t newvalue, mpi_ec_t ec)
+{
+  gpg_err_code_t rc = 0;
+
+  if (!*name)
+    ;
+  else if (!strcmp (name, "p"))
+    {
+      mpi_free (ec->p);
+      ec->p = mpi_copy (newvalue);
+      _gcry_mpi_ec_get_reset (ec);
+    }
+  else if (!strcmp (name, "a"))
+    {
+      mpi_free (ec->a);
+      ec->a = mpi_copy (newvalue);
+      _gcry_mpi_ec_get_reset (ec);
+    }
+  else if (!strcmp (name, "b"))
+    {
+      mpi_free (ec->b);
+      ec->b = mpi_copy (newvalue);
+    }
+  else if (!strcmp (name, "n"))
+    {
+      mpi_free (ec->n);
+      ec->n = mpi_copy (newvalue);
+    }
+  else if (!strcmp (name, "h"))
+    {
+      mpi_get_ui (&ec->h, newvalue);
+    }
+  else if (*name == 'q' && (!name[1] || name[1] == '@'))
+    {
+      if (newvalue)
+        {
+          if (!ec->Q)
+            ec->Q = mpi_point_new (0);
+          rc = _gcry_mpi_ec_decode_point (ec->Q, newvalue, ec);
+        }
+      if (rc || !newvalue)
+        {
+          _gcry_mpi_point_release (ec->Q);
+          ec->Q = NULL;
+        }
+      /* Note: We assume that Q matches d and thus do not reset d.  */
+    }
+  else if (!strcmp (name, "d"))
+    {
+      mpi_free (ec->d);
+      ec->d = mpi_copy (newvalue);
+      if (ec->d)
+        {
+          /* We need to reset the public key because it may not
+             anymore match.  */
+          _gcry_mpi_point_release (ec->Q);
+          ec->Q = NULL;
+        }
+    }
+  else
+   rc = GPG_ERR_UNKNOWN_NAME;
+
+  return rc;
+}
+
+
+/* Store the point NEWVALUE into the context EC under NAME.  */
+gpg_err_code_t
+_gcry_ecc_set_point (const char *name, gcry_mpi_point_t newvalue, mpi_ec_t ec)
+{
+  if (!strcmp (name, "g"))
+    {
+      _gcry_mpi_point_release (ec->G);
+      ec->G = point_copy (newvalue);
+    }
+  else if (!strcmp (name, "q"))
+    {
+      _gcry_mpi_point_release (ec->Q);
+      ec->Q = point_copy (newvalue);
+    }
+  else
+    return GPG_ERR_UNKNOWN_NAME;
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-ecdh.c 
b/grub-core/lib/libgcrypt/cipher/ecc-ecdh.c
new file mode 100644
index 000000000..d6b8991af
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-ecdh.c
@@ -0,0 +1,127 @@
+/* ecc-ecdh.c  -  Elliptic Curve Diffie-Hellman key agreement
+ * Copyright (C) 2019 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1+
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+
+#define ECC_CURVE25519_BYTES 32
+#define ECC_CURVE448_BYTES   56
+
+static gpg_err_code_t
+prepare_ec (mpi_ec_t *r_ec, const char *name)
+{
+  int flags = 0;
+
+  if (!strcmp (name, "Curve25519"))
+    flags = PUBKEY_FLAG_DJB_TWEAK;
+
+  return _gcry_mpi_ec_internal_new (r_ec, &flags, "ecc_mul_point", NULL, name);
+}
+
+unsigned int
+_gcry_ecc_get_algo_keylen (int curveid)
+{
+  unsigned int len = 0;
+
+  if (curveid == GCRY_ECC_CURVE25519)
+    len = ECC_CURVE25519_BYTES;
+  else if (curveid == GCRY_ECC_CURVE448)
+    len = ECC_CURVE448_BYTES;
+
+  return len;
+}
+
+gpg_error_t
+_gcry_ecc_mul_point (int curveid, unsigned char *result,
+                     const unsigned char *scalar, const unsigned char *point)
+{
+  unsigned int nbits;
+  unsigned int nbytes;
+  const char *curve;
+  gpg_err_code_t err;
+  gcry_mpi_t mpi_k;
+  mpi_ec_t ec;
+  mpi_point_struct Q;
+  gcry_mpi_t x;
+  unsigned int len;
+  unsigned char *buf;
+
+  if (curveid == GCRY_ECC_CURVE25519)
+    curve = "Curve25519";
+  else if (curveid == GCRY_ECC_CURVE448)
+    curve = "X448";
+  else
+    return gpg_error (GPG_ERR_UNKNOWN_CURVE);
+
+  err = prepare_ec (&ec, curve);
+  if (err)
+    return err;
+
+  nbits = ec->nbits;
+  nbytes = (nbits + 7)/8;
+
+  mpi_k = _gcry_mpi_set_opaque_copy (NULL, scalar, nbytes*8);
+  x = mpi_new (nbits);
+  point_init (&Q);
+
+  if (point)
+    {
+      gcry_mpi_t mpi_u = _gcry_mpi_set_opaque_copy (NULL, point, nbytes*8);
+      mpi_point_struct P;
+
+      point_init (&P);
+      err = _gcry_ecc_mont_decodepoint (mpi_u, ec, &P);
+      _gcry_mpi_release (mpi_u);
+      if (err)
+        goto leave;
+      _gcry_mpi_ec_mul_point (&Q, mpi_k, &P, ec);
+      point_free (&P);
+    }
+  else
+    _gcry_mpi_ec_mul_point (&Q, mpi_k, ec->G, ec);
+
+  _gcry_mpi_ec_get_affine (x, NULL, &Q, ec);
+
+  buf = _gcry_mpi_get_buffer (x, nbytes, &len, NULL);
+  if (!buf)
+    err = gpg_error_from_syserror ();
+  else
+    {
+      memcpy (result, buf, nbytes);
+      xfree (buf);
+    }
+
+ leave:
+  _gcry_mpi_release (x);
+  point_free (&Q);
+  _gcry_mpi_release (mpi_k);
+  _gcry_mpi_ec_free (ec);
+  return err;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-ecdsa.c 
b/grub-core/lib/libgcrypt/cipher/ecc-ecdsa.c
new file mode 100644
index 000000000..3f3ef97b2
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-ecdsa.c
@@ -0,0 +1,297 @@
+/* ecc-ecdsa.c  -  Elliptic Curve ECDSA signatures
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
+
+
+/* Compute an ECDSA signature.
+ * Return the signature struct (r,s) from the message hash.  The caller
+ * must have allocated R and S.
+ */
+gpg_err_code_t
+_gcry_ecc_ecdsa_sign (gcry_mpi_t input, gcry_mpi_t k_supplied, mpi_ec_t ec,
+                      gcry_mpi_t r, gcry_mpi_t s,
+                      int flags, int hashalgo)
+{
+  gpg_err_code_t rc = 0;
+  int extraloops = 0;
+  gcry_mpi_t k, dr, sum, k_1, x;
+  mpi_point_struct I;
+  gcry_mpi_t hash;
+  const void *abuf;
+  unsigned int abits, qbits;
+  gcry_mpi_t b;                /* Random number needed for blinding.  */
+  gcry_mpi_t bi;               /* multiplicative inverse of B.        */
+  gcry_mpi_t hash_computed_internally = NULL;
+
+  if (DBG_CIPHER)
+    log_mpidump ("ecdsa sign hash  ", input );
+
+  qbits = mpi_get_nbits (ec->n);
+
+  if ((flags & PUBKEY_FLAG_PREHASH))
+    {
+      rc = _gcry_dsa_compute_hash (&hash_computed_internally, input, hashalgo);
+      if (rc)
+        return rc;
+      input = hash_computed_internally;
+    }
+
+  /* Convert the INPUT into an MPI if needed.  */
+  rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+
+  if (rc)
+    {
+      mpi_free (hash_computed_internally);
+      return rc;
+    }
+
+  b  = mpi_snew (qbits);
+  bi = mpi_snew (qbits);
+  do
+    {
+      _gcry_mpi_randomize (b, qbits, GCRY_WEAK_RANDOM);
+      mpi_mod (b, b, ec->n);
+    }
+  while (!mpi_invm (bi, b, ec->n));
+
+  k = NULL;
+  dr = mpi_alloc (0);
+  sum = mpi_alloc (0);
+  k_1 = mpi_alloc (0);
+  x = mpi_alloc (0);
+  point_init (&I);
+
+  /* Two loops to avoid R or S are zero.  This is more of a joke than
+     a real demand because the probability of them being zero is less
+     than any hardware failure.  Some specs however require it.  */
+  while (1)
+    {
+      while (1)
+        {
+          if (k_supplied)
+            k = k_supplied;
+          else
+            {
+              mpi_free (k);
+              k = NULL;
+              if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo)
+                {
+                  /* Use Pornin's method for deterministic DSA.  If this
+                     flag is set, it is expected that HASH is an opaque
+                     MPI with the to be signed hash.  That hash is also
+                     used as h1 from 3.2.a.  */
+                  if (!mpi_is_opaque (input))
+                    {
+                      rc = GPG_ERR_CONFLICT;
+                      goto leave;
+                    }
+
+                  abuf = mpi_get_opaque (input, &abits);
+                  rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d,
+                                                abuf, (abits+7)/8,
+                                                hashalgo, extraloops);
+                  if (rc)
+                    goto leave;
+                  extraloops++;
+                }
+              else
+                k = _gcry_dsa_gen_k (ec->n, GCRY_STRONG_RANDOM);
+            }
+
+          mpi_invm (k_1, k, ec->n);     /* k_1 = k^(-1) mod n  */
+
+          _gcry_dsa_modify_k (k, ec->n, qbits);
+
+          _gcry_mpi_ec_mul_point (&I, k, ec->G, ec);
+          if (_gcry_mpi_ec_get_affine (x, NULL, &I, ec))
+            {
+              if (DBG_CIPHER)
+                log_debug ("ecc sign: Failed to get affine coordinates\n");
+              rc = GPG_ERR_BAD_SIGNATURE;
+              goto leave;
+            }
+          mpi_mod (r, x, ec->n);  /* r = x mod n */
+
+          if (mpi_cmp_ui (r, 0))
+            break;
+
+          if (k_supplied)
+            {
+              rc = GPG_ERR_INV_VALUE;
+              goto leave;
+            }
+        }
+
+      /* Computation of dr, sum, and s are blinded with b.  */
+      mpi_mulm (dr, b, ec->d, ec->n);
+      mpi_mulm (dr, dr, r, ec->n);      /* dr = d*r mod n */
+      mpi_mulm (sum, b, hash, ec->n);
+      mpi_addm (sum, sum, dr, ec->n);   /* sum = hash + (d*r) mod n */
+      mpi_mulm (s, k_1, sum, ec->n);    /* s = k^(-1)*(hash+(d*r)) mod n */
+      /* Undo blinding by b^-1 */
+      mpi_mulm (s, bi, s, ec->n);
+      if (mpi_cmp_ui (s, 0))
+        break;
+
+      if (k_supplied)
+        {
+          rc = GPG_ERR_INV_VALUE;
+          break;
+        }
+    }
+
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("ecdsa sign result r ", r);
+      log_mpidump ("ecdsa sign result s ", s);
+    }
+
+ leave:
+  mpi_free (b);
+  mpi_free (bi);
+  point_free (&I);
+  mpi_free (x);
+  mpi_free (k_1);
+  mpi_free (sum);
+  mpi_free (dr);
+  if (!k_supplied)
+    mpi_free (k);
+
+  if (hash != input)
+    mpi_free (hash);
+  mpi_free (hash_computed_internally);
+
+  return rc;
+}
+
+
+/* Verify an ECDSA signature.
+ * Check if R and S verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_ecdsa_verify (gcry_mpi_t input, mpi_ec_t ec,
+                        gcry_mpi_t r, gcry_mpi_t s, int flags, int hashalgo)
+{
+  gpg_err_code_t err = 0;
+  gcry_mpi_t hash, h, h1, h2, x;
+  mpi_point_struct Q, Q1, Q2;
+  unsigned int nbits;
+  gcry_mpi_t hash_computed_internally = NULL;
+
+  if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+    return GPG_ERR_BROKEN_PUBKEY;
+
+  if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, ec->n) < 0) )
+    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n  failed.  */
+  if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, ec->n) < 0) )
+    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n  failed.  */
+
+  nbits = mpi_get_nbits (ec->n);
+  if ((flags & PUBKEY_FLAG_PREHASH))
+    {
+      err = _gcry_dsa_compute_hash (&hash_computed_internally, input,
+                                    hashalgo);
+      if (err)
+        return err;
+      input = hash_computed_internally;
+    }
+
+  err = _gcry_dsa_normalize_hash (input, &hash, nbits);
+  if (err)
+    {
+      mpi_free (hash_computed_internally);
+      return err;
+    }
+
+  h  = mpi_alloc (0);
+  h1 = mpi_alloc (0);
+  h2 = mpi_alloc (0);
+  x = mpi_alloc (0);
+  point_init (&Q);
+  point_init (&Q1);
+  point_init (&Q2);
+
+  /* h  = s^(-1) (mod n) */
+  mpi_invm (h, s, ec->n);
+  /* h1 = hash * s^(-1) (mod n) */
+  mpi_mulm (h1, hash, h, ec->n);
+  /* Q1 = [ hash * s^(-1) ]G  */
+  _gcry_mpi_ec_mul_point (&Q1, h1, ec->G, ec);
+  /* h2 = r * s^(-1) (mod n) */
+  mpi_mulm (h2, r, h, ec->n);
+  /* Q2 = [ r * s^(-1) ]Q */
+  _gcry_mpi_ec_mul_point (&Q2, h2, ec->Q, ec);
+  /* Q  = ([hash * s^(-1)]G) + ([r * s^(-1)]Q) */
+  _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ec);
+
+  if (!mpi_cmp_ui (Q.z, 0))
+    {
+      if (DBG_CIPHER)
+          log_debug ("ecc verify: Rejected\n");
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+  if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ec))
+    {
+      if (DBG_CIPHER)
+        log_debug ("ecc verify: Failed to get affine coordinates\n");
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+  mpi_mod (x, x, ec->n); /* x = x mod E_n */
+  if (mpi_cmp (x, r))   /* x != r */
+    {
+      if (DBG_CIPHER)
+        {
+          log_mpidump ("     x", x);
+          log_mpidump ("     r", r);
+          log_mpidump ("     s", s);
+        }
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+
+ leave:
+  point_free (&Q2);
+  point_free (&Q1);
+  point_free (&Q);
+  mpi_free (x);
+  mpi_free (h2);
+  mpi_free (h1);
+  mpi_free (h);
+  if (hash != input)
+    mpi_free (hash);
+  mpi_free (hash_computed_internally);
+
+  return err;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-eddsa.c 
b/grub-core/lib/libgcrypt/cipher/ecc-eddsa.c
new file mode 100644
index 000000000..ce79b48ef
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-eddsa.c
@@ -0,0 +1,1079 @@
+/* ecc-eddsa.c  -  Elliptic Curve EdDSA signatures
+ * Copyright (C) 2013, 2014 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+
+
+
+void
+reverse_buffer (unsigned char *buffer, unsigned int length)
+{
+  unsigned int tmp, i;
+
+  for (i=0; i < length/2; i++)
+    {
+      tmp = buffer[i];
+      buffer[i] = buffer[length-1-i];
+      buffer[length-1-i] = tmp;
+    }
+}
+
+
+/* Helper to scan a hex string. */
+static gcry_mpi_t
+scanval (const char *string)
+{
+  gpg_err_code_t rc;
+  gcry_mpi_t val;
+
+  rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL);
+  if (rc)
+    log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc));
+  return val;
+}
+
+
+
+/* Encode MPI using the EdDSA scheme.  MINLEN specifies the required
+   length of the buffer in bytes.  On success 0 is returned an a
+   malloced buffer with the encoded point is stored at R_BUFFER; the
+   length of this buffer is stored at R_BUFLEN.  */
+static gpg_err_code_t
+eddsa_encodempi (gcry_mpi_t mpi, unsigned int nbits,
+                 unsigned char **r_buffer, unsigned int *r_buflen)
+{
+  unsigned char *rawmpi;
+  unsigned int rawmpilen;
+  unsigned int minlen = (nbits%8) == 0 ? (nbits/8 + 1): (nbits+7)/8;
+
+  rawmpi = _gcry_mpi_get_buffer (mpi, minlen, &rawmpilen, NULL);
+  if (!rawmpi)
+    return gpg_err_code_from_syserror ();
+
+  *r_buffer = rawmpi;
+  *r_buflen = rawmpilen;
+  return 0;
+}
+
+
+/* Encode (X,Y) using the EdDSA scheme.  NBITS is the number of bits
+   of the field of the curve.  If WITH_PREFIX is set the returned
+   buffer is prefixed with a 0x40 byte.  On success 0 is returned and
+   a malloced buffer with the encoded point is stored at R_BUFFER; the
+   length of this buffer is stored at R_BUFLEN.  */
+static gpg_err_code_t
+eddsa_encode_x_y (gcry_mpi_t x, gcry_mpi_t y, unsigned int nbits,
+                  int with_prefix,
+                  unsigned char **r_buffer, unsigned int *r_buflen)
+{
+  unsigned char *rawmpi;
+  unsigned int rawmpilen;
+  int off = with_prefix? 1:0;
+  unsigned int minlen = (nbits%8) == 0 ? (nbits/8 + 1): (nbits+7)/8;
+
+  rawmpi = _gcry_mpi_get_buffer_extra (y, minlen, off?-1:0, &rawmpilen, NULL);
+  if (!rawmpi)
+    return gpg_err_code_from_syserror ();
+  if (mpi_test_bit (x, 0) && rawmpilen)
+    rawmpi[off + rawmpilen - 1] |= 0x80;  /* Set sign bit.  */
+  if (off)
+    rawmpi[0] = 0x40;
+
+  *r_buffer = rawmpi;
+  *r_buflen = rawmpilen + off;
+  return 0;
+}
+
+/* Encode POINT using the EdDSA scheme.  X and Y are either scratch
+   variables supplied by the caller or NULL.  CTX is the usual
+   context.  If WITH_PREFIX is set the returned buffer is prefixed
+   with a 0x40 byte.  On success 0 is returned and a malloced buffer
+   with the encoded point is stored at R_BUFFER; the length of this
+   buffer is stored at R_BUFLEN.  */
+gpg_err_code_t
+_gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ec,
+                             gcry_mpi_t x_in, gcry_mpi_t y_in,
+                             int with_prefix,
+                             unsigned char **r_buffer, unsigned int *r_buflen)
+{
+  gpg_err_code_t rc;
+  gcry_mpi_t x, y;
+
+  x = x_in? x_in : mpi_new (0);
+  y = y_in? y_in : mpi_new (0);
+
+  if (_gcry_mpi_ec_get_affine (x, y, point, ec))
+    {
+      log_error ("eddsa_encodepoint: Failed to get affine coordinates\n");
+      rc = GPG_ERR_INTERNAL;
+    }
+  else
+    rc = eddsa_encode_x_y (x, y, ec->nbits, with_prefix, r_buffer, r_buflen);
+
+  if (!x_in)
+    mpi_free (x);
+  if (!y_in)
+    mpi_free (y);
+  return rc;
+}
+
+
+/* Make sure that the opaque MPI VALUE is in compact EdDSA format.
+   This function updates MPI if needed.  */
+gpg_err_code_t
+_gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value, unsigned int nbits)
+{
+  gpg_err_code_t rc;
+  const unsigned char *buf;
+  unsigned int rawmpilen;
+  gcry_mpi_t x, y;
+  unsigned char *enc;
+  unsigned int enclen;
+
+  if (!mpi_is_opaque (value))
+    return GPG_ERR_INV_OBJ;
+  buf = mpi_get_opaque (value, &rawmpilen);
+  if (!buf)
+    return GPG_ERR_INV_OBJ;
+  rawmpilen = (rawmpilen + 7)/8;
+
+  if (rawmpilen > 1 && (rawmpilen%2))
+    {
+      if (buf[0] == 0x04)
+        {
+          /* Buffer is in SEC1 uncompressed format.  Extract y and
+             compress.  */
+          rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG,
+                               buf+1, (rawmpilen-1)/2, NULL);
+          if (rc)
+            return rc;
+          rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG,
+                               buf+1+(rawmpilen-1)/2, (rawmpilen-1)/2, NULL);
+          if (rc)
+            {
+              mpi_free (x);
+              return rc;
+            }
+
+          rc = eddsa_encode_x_y (x, y, nbits, 0, &enc, &enclen);
+          mpi_free (x);
+          mpi_free (y);
+          if (rc)
+            return rc;
+
+          mpi_set_opaque (value, enc, 8*enclen);
+        }
+      else if (buf[0] == 0x40)
+        {
+          /* Buffer is compressed but with our SEC1 alike compression
+             indicator.  Remove that byte.  FIXME: We should write and
+             use a function to manipulate an opaque MPI in place. */
+          if (!_gcry_mpi_set_opaque_copy (value, buf + 1, (rawmpilen - 1)*8))
+            return gpg_err_code_from_syserror ();
+        }
+    }
+
+  return 0;
+}
+
+
+static gpg_err_code_t
+ecc_ed448_recover_x (gcry_mpi_t x, gcry_mpi_t y, int x_0, mpi_ec_t ec)
+{
+  gpg_err_code_t rc = 0;
+  gcry_mpi_t u, v, u3, v3, t;
+  static gcry_mpi_t p34; /* Hard coded (P-3)/4 */
+
+  if (mpi_cmp (y, ec->p) >= 0)
+    rc = GPG_ERR_INV_OBJ;
+
+  if (!p34)
+    p34 = scanval ("3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+                   "BFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF");
+
+  u   = mpi_new (0);
+  v   = mpi_new (0);
+  u3  = mpi_new (0);
+  v3  = mpi_new (0);
+  t   = mpi_new (0);
+
+  /* Compute u and v */
+  /* u = y^2    */
+  mpi_mulm (u, y, y, ec->p);
+  /* v = b*y^2   */
+  mpi_mulm (v, ec->b, u, ec->p);
+  /* u = y^2-1  */
+  mpi_sub_ui (u, u, 1);
+  /* v = b*y^2-1 */
+  mpi_sub_ui (v, v, 1);
+
+  /* Compute sqrt(u/v) */
+  /* u3 = u^3 */
+  mpi_powm (u3, u, mpi_const (MPI_C_THREE), ec->p);
+  mpi_powm (v3, v, mpi_const (MPI_C_THREE), ec->p);
+  /* t = u^4 * u * v3 = u^5 * v^3 */
+  mpi_powm (t, u, mpi_const (MPI_C_FOUR), ec->p);
+  mpi_mulm (t, t, u, ec->p);
+  mpi_mulm (t, t, v3, ec->p);
+  /* t = t^((p-3)/4) = (u^5 * v^3)^((p-3)/4)  */
+  mpi_powm (t, t, p34, ec->p);
+  /* x = t * u^3 * v = (u^3 * v) * (u^5 * v^3)^((p-3)/4) */
+  mpi_mulm (t, t, u3, ec->p);
+  mpi_mulm (x, t, v, ec->p);
+
+  /* t = v * x^2  */
+  mpi_mulm (t, x, x, ec->p);
+  mpi_mulm (t, t, v, ec->p);
+
+  if (mpi_cmp (t, u) != 0)
+    rc = GPG_ERR_INV_OBJ;
+  else
+    {
+      if (!mpi_cmp_ui (x, 0) && x_0)
+        rc = GPG_ERR_INV_OBJ;
+
+      /* Choose the desired square root according to parity */
+      if (mpi_test_bit (x, 0) != !!x_0)
+        mpi_sub (x, ec->p, x);
+    }
+
+  mpi_free (t);
+  mpi_free (u3);
+  mpi_free (v3);
+  mpi_free (v);
+  mpi_free (u);
+
+  return rc;
+}
+
+
+/* Recover X from Y and SIGN (which actually is a parity bit).  */
+gpg_err_code_t
+_gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign, mpi_ec_t ec)
+{
+  gpg_err_code_t rc = 0;
+  gcry_mpi_t u, v, v3, t;
+  static gcry_mpi_t p58, seven;
+
+  /*
+   * This routine is actually curve specific.  Now, only supports
+   * Ed25519 and Ed448.
+   */
+
+  if (ec->dialect != ECC_DIALECT_ED25519)
+    /* For now, it's only Ed448.  */
+    return ecc_ed448_recover_x (x, y, sign, ec);
+
+  /* It's Ed25519.  */
+
+  if (!p58)
+    p58 = scanval ("0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"
+                   "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD");
+  if (!seven)
+    seven = mpi_set_ui (NULL, 7);
+
+  u   = mpi_new (0);
+  v   = mpi_new (0);
+  v3  = mpi_new (0);
+  t   = mpi_new (0);
+
+  /* Compute u and v */
+  /* u = y^2    */
+  mpi_mulm (u, y, y, ec->p);
+  /* v = b*y^2   */
+  mpi_mulm (v, ec->b, u, ec->p);
+  /* u = y^2-1  */
+  mpi_sub_ui (u, u, 1);
+  /* v = b*y^2+1 */
+  mpi_add_ui (v, v, 1);
+
+  /* Compute sqrt(u/v) */
+  /* v3 = v^3 */
+  mpi_powm (v3, v, mpi_const (MPI_C_THREE), ec->p);
+  /* t = v3 * v3 * u * v = u * v^7 */
+  mpi_powm (t, v, seven, ec->p);
+  mpi_mulm (t, t, u, ec->p);
+  /* t = t^((p-5)/8) = (u * v^7)^((p-5)/8)  */
+  mpi_powm (t, t, p58, ec->p);
+  /* x = t * u * v^3 = (u * v^3) * (u * v^7)^((p-5)/8) */
+  mpi_mulm (t, t, u, ec->p);
+  mpi_mulm (x, t, v3, ec->p);
+
+  /* Adjust if needed.  */
+  /* t = v * x^2  */
+  mpi_mulm (t, x, x, ec->p);
+  mpi_mulm (t, t, v, ec->p);
+  /* -t == u ? x = x * sqrt(-1) */
+  mpi_sub (t, ec->p, t);
+  if (!mpi_cmp (t, u))
+    {
+      static gcry_mpi_t m1;  /* Fixme: this is not thread-safe.  */
+      if (!m1)
+        m1 = scanval ("2B8324804FC1DF0B2B4D00993DFBD7A7"
+                      "2F431806AD2FE478C4EE1B274A0EA0B0");
+      mpi_mulm (x, x, m1, ec->p);
+      /* t = v * x^2  */
+      mpi_mulm (t, x, x, ec->p);
+      mpi_mulm (t, t, v, ec->p);
+      /* -t == u ? x = x * sqrt(-1) */
+      mpi_sub (t, ec->p, t);
+      if (!mpi_cmp (t, u))
+        rc = GPG_ERR_INV_OBJ;
+    }
+
+  /* Choose the desired square root according to parity */
+  if (mpi_test_bit (x, 0) != !!sign)
+    mpi_sub (x, ec->p, x);
+
+  mpi_free (t);
+  mpi_free (v3);
+  mpi_free (v);
+  mpi_free (u);
+
+  return rc;
+}
+
+
+/* Decode the EdDSA style encoded PK and set it into RESULT.  CTX is
+   the usual curve context.  If R_ENCPK is not NULL, the encoded PK is
+   stored at that address; this is a new copy to be released by the
+   caller.  In contrast to the supplied PK, this is not an MPI and
+   thus guaranteed to be properly padded.  R_ENCPKLEN receives the
+   length of that encoded key.  */
+gpg_err_code_t
+_gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result,
+                             unsigned char **r_encpk, unsigned int *r_encpklen)
+{
+  gpg_err_code_t rc;
+  unsigned char *rawmpi;
+  unsigned int rawmpilen;
+  int sign;
+
+  if (mpi_is_opaque (pk))
+    {
+      const unsigned char *buf;
+      unsigned int len;
+
+      len = (ctx->nbits%8) == 0 ? (ctx->nbits/8 + 1): (ctx->nbits+7)/8;
+
+      buf = mpi_get_opaque (pk, &rawmpilen);
+      if (!buf)
+        return GPG_ERR_INV_OBJ;
+      rawmpilen = (rawmpilen + 7)/8;
+
+      if (!(rawmpilen == len
+            || rawmpilen == len + 1
+            || rawmpilen == len * 2 + 1))
+        return GPG_ERR_INV_OBJ;
+
+      /* Handle compression prefixes.  The size of the buffer will be
+         odd in this case.  */
+      if (rawmpilen > 1 && (rawmpilen == len + 1 || rawmpilen == len * 2 + 1))
+        {
+          /* First check whether the public key has been given in
+             standard uncompressed format (SEC1).  No need to recover
+             x in this case.  */
+          if (buf[0] == 0x04)
+            {
+              gcry_mpi_t x, y;
+
+              rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG,
+                                   buf+1, (rawmpilen-1)/2, NULL);
+              if (rc)
+                return rc;
+              rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG,
+                                   buf+1+(rawmpilen-1)/2, 
(rawmpilen-1)/2,NULL);
+              if (rc)
+                {
+                  mpi_free (x);
+                  return rc;
+                }
+
+              if (r_encpk)
+                {
+                  rc = eddsa_encode_x_y (x, y, ctx->nbits, 0,
+                                         r_encpk, r_encpklen);
+                  if (rc)
+                    {
+                      mpi_free (x);
+                      mpi_free (y);
+                      return rc;
+                    }
+                }
+              mpi_snatch (result->x, x);
+              mpi_snatch (result->y, y);
+              mpi_set_ui (result->z, 1);
+              return 0;
+            }
+
+          /* Check whether the public key has been prefixed with a 0x40
+             byte to explicitly indicate compressed format using a SEC1
+             alike prefix byte.  This is a Libgcrypt extension.  */
+          if (buf[0] == 0x40)
+            {
+              rawmpilen--;
+              buf++;
+            }
+        }
+
+      /* EdDSA compressed point.  */
+      rawmpi = xtrymalloc (rawmpilen);
+      if (!rawmpi)
+        return gpg_err_code_from_syserror ();
+      memcpy (rawmpi, buf, rawmpilen);
+      reverse_buffer (rawmpi, rawmpilen);
+    }
+  else
+    {
+      /* Note: Without using an opaque MPI it is not reliable possible
+         to find out whether the public key has been given in
+         uncompressed format.  Thus we expect native EdDSA format.  */
+      rawmpi = _gcry_mpi_get_buffer (pk, (ctx->nbits+7)/8, &rawmpilen, NULL);
+      if (!rawmpi)
+        return gpg_err_code_from_syserror ();
+    }
+
+  if (rawmpilen)
+    {
+      sign = !!(rawmpi[0] & 0x80);
+      rawmpi[0] &= 0x7f;
+    }
+  else
+    sign = 0;
+  _gcry_mpi_set_buffer (result->y, rawmpi, rawmpilen, 0);
+  if (r_encpk)
+    {
+      /* Revert to little endian.  */
+      if (sign && rawmpilen)
+        rawmpi[0] |= 0x80;
+      reverse_buffer (rawmpi, rawmpilen);
+      *r_encpk = rawmpi;
+      if (r_encpklen)
+        *r_encpklen = rawmpilen;
+    }
+  else
+    xfree (rawmpi);
+
+  rc = _gcry_ecc_eddsa_recover_x (result->x, result->y, sign, ctx);
+  mpi_set_ui (result->z, 1);
+
+  return rc;
+}
+
+
+/* Compute the A value as used by EdDSA.  The caller needs to provide
+   the context EC and the actual secret D as an MPI.  The function
+   returns a newly allocated 64 byte buffer at r_digest; the first 32
+   bytes represent the A value.  NULL is returned on error and NULL
+   stored at R_DIGEST.  */
+gpg_err_code_t
+_gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest, mpi_ec_t ec)
+{
+  gpg_err_code_t rc;
+  unsigned char *rawmpi = NULL;
+  unsigned int rawmpilen;
+  unsigned char *digest;
+  int hashalgo, b, digestlen;
+  gcry_buffer_t hvec[2];
+
+  *r_digest = NULL;
+
+  b = (ec->nbits+7)/8;
+
+  /*
+   * Choice of hashalgo is curve specific.
+   * For now, it's determine by the bit size of the field.
+   */
+  if (ec->nbits == 255)
+    {
+      hashalgo = GCRY_MD_SHA512;
+      digestlen = 64;
+    }
+  else if (ec->nbits == 448)
+    {
+      b++;
+      hashalgo = GCRY_MD_SHAKE256;
+      digestlen = 2 * b;
+    }
+  else
+    return GPG_ERR_NOT_IMPLEMENTED;
+
+  /* Note that we clear DIGEST so we can use it as input to left pad
+     the key with zeroes for hashing.  */
+  digest = xtrycalloc_secure (2, b);
+  if (!digest)
+    return gpg_err_code_from_syserror ();
+
+  rawmpi = _gcry_mpi_get_buffer (ec->d, 0, &rawmpilen, NULL);
+  if (!rawmpi)
+    {
+      xfree (digest);
+      return gpg_err_code_from_syserror ();
+    }
+
+  memset (hvec, 0, sizeof hvec);
+
+  hvec[0].data = digest;
+  hvec[0].len = (hashalgo == GCRY_MD_SHA512 && b > rawmpilen)
+                 ? b - rawmpilen : 0;
+  hvec[1].data = rawmpi;
+  hvec[1].len = rawmpilen;
+  rc = _gcry_md_hash_buffers_extract (hashalgo, 0, digest, digestlen, hvec, 2);
+
+  xfree (rawmpi);
+  if (rc)
+    {
+      xfree (digest);
+      return rc;
+    }
+
+  /* Compute the A value.  */
+  reverse_buffer (digest, b);  /* Only the first half of the hash.  */
+
+  /* Field specific handling of clearing/setting bits. */
+  if (ec->nbits == 255)
+    {
+      digest[0]   = (digest[0] & 0x7f) | 0x40;
+      digest[31] &= 0xf8;
+    }
+  else
+    {
+      digest[0]   = 0;
+      digest[1]  |= 0x80;
+      digest[56] &= 0xfc;
+    }
+
+  *r_digest = digest;
+  return 0;
+}
+
+
+/**
+ * _gcry_ecc_eddsa_genkey - EdDSA version of the key generation.
+ *
+ * @ec: Elliptic curve computation context.
+ * @flags: Flags controlling aspects of the creation.
+ *
+ * Return: An error code.
+ *
+ * The only @flags bit used by this function is %PUBKEY_FLAG_TRANSIENT
+ * to use a faster RNG.
+ */
+gpg_err_code_t
+_gcry_ecc_eddsa_genkey (mpi_ec_t ec, int flags)
+{
+  gpg_err_code_t rc;
+  int b;
+  gcry_mpi_t a, x, y;
+  mpi_point_struct Q;
+  gcry_random_level_t random_level;
+  char *dbuf;
+  size_t dlen;
+  unsigned char *hash_d = NULL;
+
+  if ((flags & PUBKEY_FLAG_TRANSIENT_KEY))
+    random_level = GCRY_STRONG_RANDOM;
+  else
+    random_level = GCRY_VERY_STRONG_RANDOM;
+
+  b = (ec->nbits+7)/8;
+
+  if (ec->nbits == 255)
+    ;
+  else if (ec->nbits == 448)
+    b++;
+  else
+    return GPG_ERR_NOT_IMPLEMENTED;
+
+  dlen = b;
+
+  a = mpi_snew (0);
+  x = mpi_new (0);
+  y = mpi_new (0);
+
+  /* Generate a secret.  */
+  dbuf = _gcry_random_bytes_secure (dlen, random_level);
+  ec->d = _gcry_mpi_set_opaque (NULL, dbuf, dlen*8);
+  rc = _gcry_ecc_eddsa_compute_h_d (&hash_d, ec);
+  if (rc)
+    goto leave;
+
+  _gcry_mpi_set_buffer (a, hash_d, b, 0);
+  xfree (hash_d);
+  /* log_printmpi ("ecgen         a", a); */
+
+  /* Compute Q.  */
+  point_init (&Q);
+  _gcry_mpi_ec_mul_point (&Q, a, ec->G, ec);
+  if (DBG_CIPHER)
+    log_printpnt ("ecgen      pk", &Q, ec);
+
+  ec->Q = mpi_point_snatch_set (NULL, Q.x, Q.y, Q.z);
+  Q.x = NULL;
+  Q.y = NULL;
+  Q.x = NULL;
+
+ leave:
+  _gcry_mpi_release (a);
+  _gcry_mpi_release (x);
+  _gcry_mpi_release (y);
+  return rc;
+}
+
+
+/* Compute an EdDSA signature. See:
+ *   [ed25519] 23pp. (PDF) Daniel J. Bernstein, Niels Duif, Tanja
+ *   Lange, Peter Schwabe, Bo-Yin Yang. High-speed high-security
+ *   signatures.  Journal of Cryptographic Engineering 2 (2012), 77-89.
+ *   Document ID: a1a62a2f76d23f65d622484ddd09caf8.
+ *   URL: http://cr.yp.to/papers.html#ed25519. Date: 2011.09.26.
+ *
+ * Despite that this function requires the specification of a hash
+ * algorithm, we only support what has been specified by the paper.
+ * This may change in the future.
+ *
+ * Return the signature struct (r,s) from the message hash.  The caller
+ * must have allocated R_R and S.
+ */
+
+/* String to be used with Ed448 */
+#define DOM25519     "SigEd25519 no Ed25519 collisions"
+#define DOM25519_LEN 32
+#define DOM448       "SigEd448"
+#define DOM448_LEN   8
+
+gpg_err_code_t
+_gcry_ecc_eddsa_sign (gcry_mpi_t input, mpi_ec_t ec,
+                      gcry_mpi_t r_r, gcry_mpi_t s,
+                      struct pk_encoding_ctx *ctx)
+{
+  int rc;
+  unsigned int tmp;
+  unsigned char *digest = NULL;
+  const void *mbuf;
+  size_t mlen;
+  unsigned char *rawmpi = NULL;
+  unsigned int rawmpilen = 0;
+  unsigned char *encpk = NULL; /* Encoded public key.  */
+  unsigned int encpklen = 0;
+  mpi_point_struct I;          /* Intermediate value.  */
+  gcry_mpi_t a, x, y, r;
+  const char *dom;
+  int domlen, digestlen;
+  int b, i;
+  unsigned char x_olen[2];
+  unsigned char prehashed_msg[64];
+  gcry_buffer_t hvec[6];
+  gcry_buffer_t hvec2[1];
+
+  b = (ec->nbits+7)/8;
+
+  if (ec->nbits == 255)
+    {
+      dom = DOM25519;
+      domlen = DOM25519_LEN;
+      digestlen = 64;
+    }
+  else if (ec->nbits == 448)
+    {
+      b++;
+      dom = DOM448;
+      domlen = DOM448_LEN;
+      digestlen = 2 * b;
+    }
+  else
+    return GPG_ERR_NOT_IMPLEMENTED;
+
+  if (!mpi_is_opaque (input))
+    return GPG_ERR_INV_DATA;
+
+  /* Initialize some helpers.  */
+  point_init (&I);
+  a = mpi_snew (0);
+  x = mpi_new (0);
+  y = mpi_new (0);
+  r = mpi_snew (0);
+
+  rc = _gcry_ecc_eddsa_compute_h_d (&digest, ec);
+  if (rc)
+    goto leave;
+  _gcry_mpi_set_buffer (a, digest, b, 0);
+
+  /* Compute the public key if it's not available (only secret part).  */
+  if (ec->Q == NULL)
+    {
+      mpi_point_struct Q;
+
+      point_init (&Q);
+      _gcry_mpi_ec_mul_point (&Q, a, ec->G, ec);
+      ec->Q = mpi_point_snatch_set (NULL, Q.x, Q.y, Q.z);
+    }
+  rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, x, y, 0, &encpk, &encpklen);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_printhex ("  e_pk", encpk, encpklen);
+
+  /* Compute R.  */
+  mbuf = mpi_get_opaque (input, &tmp);
+  mlen = (tmp +7)/8;
+  if (DBG_CIPHER)
+    log_printhex ("     m", mbuf, mlen);
+
+  memset (hvec, 0, sizeof hvec);
+  i = 0;
+
+  if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen || ec->nbits == 448)
+    {
+      hvec[i].data = (void *)dom;
+      hvec[i].len  = domlen;
+      i++;
+      x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+      x_olen[1] = ctx->labellen;
+      hvec[i].data = x_olen;
+      hvec[i].len  = 2;
+      i++;
+      if (ctx->labellen)
+       {
+         hvec[i].data = ctx->label;
+         hvec[i].len  = ctx->labellen;
+         i++;
+       }
+    }
+
+  hvec[i].data = digest;
+  hvec[i].off  = b;
+  hvec[i].len  = b;
+  i++;
+  if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+    {
+      memset (hvec2, 0, sizeof hvec2);
+
+      hvec2[0].data = (char*)mbuf;
+      hvec2[0].len  = mlen;
+
+      _gcry_md_hash_buffers_extract (ctx->hash_algo, 0, prehashed_msg, 64,
+                                    hvec2, 1);
+      hvec[i].data = (char*)prehashed_msg;
+      hvec[i].len  = 64;
+    }
+  else
+    {
+      hvec[i].data = (char*)mbuf;
+      hvec[i].len  = mlen;
+    }
+  i++;
+
+  rc = _gcry_md_hash_buffers_extract (ctx->hash_algo, 0, digest, digestlen,
+                                     hvec, i);
+  if (rc)
+    goto leave;
+  reverse_buffer (digest, digestlen);
+  if (DBG_CIPHER)
+    log_printhex ("     r", digest, digestlen);
+  _gcry_mpi_set_buffer (r, digest, digestlen, 0);
+  mpi_mod (r, r, ec->n);
+  _gcry_mpi_ec_mul_point (&I, r, ec->G, ec);
+  if (DBG_CIPHER)
+    log_printpnt ("   r", &I, ec);
+
+  /* Convert R into affine coordinates and apply encoding.  */
+  rc = _gcry_ecc_eddsa_encodepoint (&I, ec, x, y, 0, &rawmpi, &rawmpilen);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_printhex ("   e_r", rawmpi, rawmpilen);
+
+  memset (hvec, 0, sizeof hvec);
+  i = 0;
+
+  if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen || ec->nbits == 448)
+    {
+      hvec[i].data = (void *)dom;
+      hvec[i].len  = domlen;
+      i++;
+      x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+      x_olen[1] = ctx->labellen;
+      hvec[i].data = x_olen;
+      hvec[i].len  = 2;
+      i++;
+      if (ctx->labellen)
+       {
+         hvec[i].data = ctx->label;
+         hvec[i].len  = ctx->labellen;
+         i++;
+       }
+    }
+
+  /* S = r + a * H(dom2(F,C)+encodepoint(R)+encodepoint(pk)+m) mod n  */
+  hvec[i].data = rawmpi;  /* (this is R) */
+  hvec[i].len  = rawmpilen;
+  i++;
+  hvec[i].data = encpk;
+  hvec[i].len  = encpklen;
+  i++;
+  if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+    {
+      hvec[i].data = (char*)prehashed_msg;
+      hvec[i].len  = 64;
+    }
+  else
+    {
+      hvec[i].data = (char*)mbuf;
+      hvec[i].len  = mlen;
+    }
+  i++;
+
+  rc = _gcry_md_hash_buffers_extract (ctx->hash_algo, 0, digest, digestlen,
+                                     hvec, i);
+  if (rc)
+    goto leave;
+
+  /* No more need for RAWMPI thus we now transfer it to R_R.  */
+  mpi_set_opaque (r_r, rawmpi, rawmpilen*8);
+  rawmpi = NULL;
+
+  reverse_buffer (digest, digestlen);
+  if (DBG_CIPHER)
+    log_printhex (" H(R+)", digest, digestlen);
+  _gcry_mpi_set_buffer (s, digest, digestlen, 0);
+  mpi_mulm (s, s, a, ec->n);
+  mpi_addm (s, s, r, ec->n);
+  rc = eddsa_encodempi (s, ec->nbits, &rawmpi, &rawmpilen);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_printhex ("   e_s", rawmpi, rawmpilen);
+  mpi_set_opaque (s, rawmpi, rawmpilen*8);
+  rawmpi = NULL;
+
+  rc = 0;
+
+ leave:
+  _gcry_mpi_release (a);
+  _gcry_mpi_release (x);
+  _gcry_mpi_release (y);
+  _gcry_mpi_release (r);
+  xfree (digest);
+  point_free (&I);
+  xfree (encpk);
+  xfree (rawmpi);
+  return rc;
+}
+
+
+/* Verify an EdDSA signature.  See sign_eddsa for the reference.
+ * Check if R_IN and S_IN verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_eddsa_verify (gcry_mpi_t input, mpi_ec_t ec,
+                        gcry_mpi_t r_in, gcry_mpi_t s_in,
+                        struct pk_encoding_ctx *ctx)
+{
+  int rc;
+  int b;
+  unsigned int tmp;
+  unsigned char *encpk = NULL; /* Encoded public key.  */
+  unsigned int encpklen = 0;
+  const void *mbuf, *rbuf;
+  unsigned char *tbuf = NULL;
+  size_t mlen, rlen;
+  unsigned int tlen;
+  unsigned char digest[114];
+  gcry_mpi_t h, s;
+  mpi_point_struct Ia, Ib;
+  const char *dom;
+  int domlen, digestlen;
+  int i;
+  unsigned char x_olen[2];
+  unsigned char prehashed_msg[64];
+  gcry_buffer_t hvec[6];
+  gcry_buffer_t hvec2[1];
+
+  if (!mpi_is_opaque (input) || !mpi_is_opaque (r_in) || !mpi_is_opaque (s_in))
+    return GPG_ERR_INV_DATA;
+
+  b = (ec->nbits+7)/8;
+
+  if (ec->nbits == 255)
+    {
+      dom = DOM25519;
+      domlen = DOM25519_LEN;
+      digestlen = 64;
+    }
+  else if (ec->nbits == 448)
+    {
+      b++;
+      dom = DOM448;
+      domlen = DOM448_LEN;
+      digestlen = 2 * b;
+    }
+  else
+    return GPG_ERR_NOT_IMPLEMENTED;
+
+  point_init (&Ia);
+  point_init (&Ib);
+  h = mpi_new (0);
+  s = mpi_new (0);
+
+  /* Encode and check the public key.  */
+  rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0,
+                                    &encpk, &encpklen);
+  if (rc)
+    goto leave;
+  if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+    {
+      rc = GPG_ERR_BROKEN_PUBKEY;
+      goto leave;
+    }
+  if (DBG_CIPHER)
+    log_printhex ("  e_pk", encpk, encpklen);
+  if (encpklen != b)
+    {
+      rc = GPG_ERR_INV_LENGTH;
+      goto leave;
+    }
+
+  /* Convert the other input parameters.  */
+  mbuf = mpi_get_opaque (input, &tmp);
+  mlen = (tmp +7)/8;
+  if (DBG_CIPHER)
+    log_printhex ("     m", mbuf, mlen);
+  rbuf = mpi_get_opaque (r_in, &tmp);
+  rlen = (tmp +7)/8;
+  if (DBG_CIPHER)
+    log_printhex ("     r", rbuf, rlen);
+  if (rlen != b)
+    {
+      rc = GPG_ERR_INV_LENGTH;
+      goto leave;
+    }
+
+  memset (hvec, 0, sizeof hvec);
+  i = 0;
+
+  /* h = H(dom2(F,C)+encodepoint(R)+encodepoint(pk)+m)  */
+  if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen || ec->nbits == 448)
+    {
+      hvec[i].data = (void *)dom;
+      hvec[i].len  = domlen;
+      i++;
+      x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH);
+      x_olen[1] = ctx->labellen;
+      hvec[i].data = x_olen;
+      hvec[i].len  = 2;
+      i++;
+      if (ctx->labellen)
+       {
+         hvec[i].data = ctx->label;
+         hvec[i].len  = ctx->labellen;
+         i++;
+       }
+    }
+
+  hvec[i].data = (char*)rbuf;
+  hvec[i].len  = rlen;
+  i++;
+  hvec[i].data = encpk;
+  hvec[i].len  = encpklen;
+  i++;
+  if ((ctx->flags & PUBKEY_FLAG_PREHASH))
+    {
+      memset (hvec2, 0, sizeof hvec2);
+
+      hvec2[0].data = (char*)mbuf;
+      hvec2[0].len  = mlen;
+
+      _gcry_md_hash_buffers_extract (ctx->hash_algo, 0, prehashed_msg, 64,
+                                     hvec2, 1);
+      hvec[i].data = (char*)prehashed_msg;
+      hvec[i].len  = 64;
+    }
+  else
+    {
+      hvec[i].data = (char*)mbuf;
+      hvec[i].len  = mlen;
+    }
+  i++;
+
+  rc = _gcry_md_hash_buffers_extract (ctx->hash_algo, 0, digest, digestlen,
+                                     hvec, i);
+  if (rc)
+    goto leave;
+  reverse_buffer (digest, digestlen);
+  if (DBG_CIPHER)
+    log_printhex (" H(R+)", digest, digestlen);
+  _gcry_mpi_set_buffer (h, digest, digestlen, 0);
+
+  /* According to the paper the best way for verification is:
+         encodepoint(sG - h·Q) = encodepoint(r)
+     because we don't need to decode R. */
+  {
+    void *sbuf;
+    unsigned int slen;
+
+    sbuf = _gcry_mpi_get_opaque_copy (s_in, &tmp);
+    slen = (tmp +7)/8;
+    reverse_buffer (sbuf, slen);
+    if (DBG_CIPHER)
+      log_printhex ("     s", sbuf, slen);
+    _gcry_mpi_set_buffer (s, sbuf, slen, 0);
+    xfree (sbuf);
+    if (slen != b)
+      {
+        rc = GPG_ERR_INV_LENGTH;
+        goto leave;
+      }
+  }
+
+  _gcry_mpi_ec_mul_point (&Ia, s, ec->G, ec);
+  _gcry_mpi_ec_mul_point (&Ib, h, ec->Q, ec);
+  _gcry_mpi_sub (Ib.x, ec->p, Ib.x);
+  _gcry_mpi_ec_add_points (&Ia, &Ia, &Ib, ec);
+  rc = _gcry_ecc_eddsa_encodepoint (&Ia, ec, s, h, 0, &tbuf, &tlen);
+  if (rc)
+    goto leave;
+  if (tlen != rlen || memcmp (tbuf, rbuf, tlen))
+    {
+      rc = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+
+  rc = 0;
+
+ leave:
+  xfree (encpk);
+  xfree (tbuf);
+  _gcry_mpi_release (s);
+  _gcry_mpi_release (h);
+  point_free (&Ia);
+  point_free (&Ib);
+  return rc;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-gost.c 
b/grub-core/lib/libgcrypt/cipher/ecc-gost.c
new file mode 100644
index 000000000..36230f8a3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-gost.c
@@ -0,0 +1,218 @@
+/* ecc-gots.c  -  Elliptic Curve GOST signatures
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+#include "pubkey-internal.h"
+
+
+/* Compute an GOST R 34.10-01/-12 signature.
+ * Return the signature struct (r,s) from the message hash.  The caller
+ * must have allocated R and S.
+ */
+gpg_err_code_t
+_gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec,
+                     gcry_mpi_t r, gcry_mpi_t s)
+{
+  gpg_err_code_t rc = 0;
+  gcry_mpi_t k, dr, sum, ke, x, e;
+  mpi_point_struct I;
+  gcry_mpi_t hash;
+  unsigned int qbits;
+
+  if (DBG_CIPHER)
+    log_mpidump ("gost sign hash  ", input );
+
+  qbits = mpi_get_nbits (ec->n);
+
+  /* Convert the INPUT into an MPI if needed.  */
+  rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+  if (rc)
+    return rc;
+
+  k = NULL;
+  dr = mpi_alloc (0);
+  sum = mpi_alloc (0);
+  ke = mpi_alloc (0);
+  e = mpi_alloc (0);
+  x = mpi_alloc (0);
+  point_init (&I);
+
+  mpi_mod (e, input, ec->n); /* e = hash mod n */
+
+  if (!mpi_cmp_ui (e, 0))
+    mpi_set_ui (e, 1);
+
+  /* Two loops to avoid R or S are zero.  This is more of a joke than
+     a real demand because the probability of them being zero is less
+     than any hardware failure.  Some specs however require it.  */
+  do
+    {
+      do
+        {
+          mpi_free (k);
+          k = _gcry_dsa_gen_k (ec->n, GCRY_STRONG_RANDOM);
+
+          _gcry_dsa_modify_k (k, ec->n, qbits);
+
+          _gcry_mpi_ec_mul_point (&I, k, ec->G, ec);
+          if (_gcry_mpi_ec_get_affine (x, NULL, &I, ec))
+            {
+              if (DBG_CIPHER)
+                log_debug ("ecc sign: Failed to get affine coordinates\n");
+              rc = GPG_ERR_BAD_SIGNATURE;
+              goto leave;
+            }
+          mpi_mod (r, x, ec->n);  /* r = x mod n */
+        }
+      while (!mpi_cmp_ui (r, 0));
+      mpi_mulm (dr, ec->d, r, ec->n); /* dr = d*r mod n  */
+      mpi_mulm (ke, k, e, ec->n); /* ke = k*e mod n */
+      mpi_addm (s, ke, dr, ec->n); /* sum = (k*e+ d*r) mod n  */
+    }
+  while (!mpi_cmp_ui (s, 0));
+
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("gost sign result r ", r);
+      log_mpidump ("gost sign result s ", s);
+    }
+
+ leave:
+  point_free (&I);
+  mpi_free (x);
+  mpi_free (e);
+  mpi_free (ke);
+  mpi_free (sum);
+  mpi_free (dr);
+  mpi_free (k);
+
+  if (hash != input)
+    mpi_free (hash);
+
+  return rc;
+}
+
+
+/* Verify a GOST R 34.10-01/-12 signature.
+ * Check if R and S verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec,
+                       gcry_mpi_t r, gcry_mpi_t s)
+{
+  gpg_err_code_t err = 0;
+  gcry_mpi_t e, x, z1, z2, v, rv, zero;
+  mpi_point_struct Q, Q1, Q2;
+
+  if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+    return GPG_ERR_BROKEN_PUBKEY;
+
+  if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, ec->n) < 0) )
+    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n  failed.  */
+  if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, ec->n) < 0) )
+    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n  failed.  */
+
+  x = mpi_alloc (0);
+  e = mpi_alloc (0);
+  z1 = mpi_alloc (0);
+  z2 = mpi_alloc (0);
+  v = mpi_alloc (0);
+  rv = mpi_alloc (0);
+  zero = mpi_alloc (0);
+
+  point_init (&Q);
+  point_init (&Q1);
+  point_init (&Q2);
+
+  mpi_mod (e, input, ec->n); /* e = hash mod n */
+  if (!mpi_cmp_ui (e, 0))
+    mpi_set_ui (e, 1);
+  mpi_invm (v, e, ec->n); /* v = e^(-1) (mod n) */
+  mpi_mulm (z1, s, v, ec->n); /* z1 = s*v (mod n) */
+  mpi_mulm (rv, r, v, ec->n); /* rv = r*v (mod n) */
+  mpi_subm (z2, zero, rv, ec->n); /* z2 = -r*v (mod n) */
+
+  _gcry_mpi_ec_mul_point (&Q1, z1, ec->G, ec);
+/*   log_mpidump ("Q1.x", Q1.x); */
+/*   log_mpidump ("Q1.y", Q1.y); */
+/*   log_mpidump ("Q1.z", Q1.z); */
+  _gcry_mpi_ec_mul_point (&Q2, z2, ec->Q, ec);
+/*   log_mpidump ("Q2.x", Q2.x); */
+/*   log_mpidump ("Q2.y", Q2.y); */
+/*   log_mpidump ("Q2.z", Q2.z); */
+  _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ec);
+/*   log_mpidump (" Q.x", Q.x); */
+/*   log_mpidump (" Q.y", Q.y); */
+/*   log_mpidump (" Q.z", Q.z); */
+
+  if (!mpi_cmp_ui (Q.z, 0))
+    {
+      if (DBG_CIPHER)
+          log_debug ("ecc verify: Rejected\n");
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+  if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ec))
+    {
+      if (DBG_CIPHER)
+        log_debug ("ecc verify: Failed to get affine coordinates\n");
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+  mpi_mod (x, x, ec->n); /* x = x mod E_n */
+  if (mpi_cmp (x, r))   /* x != r */
+    {
+      if (DBG_CIPHER)
+        {
+          log_mpidump ("     x", x);
+          log_mpidump ("     r", r);
+          log_mpidump ("     s", s);
+          log_debug ("ecc verify: Not verified\n");
+        }
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+  if (DBG_CIPHER)
+    log_debug ("ecc verify: Accepted\n");
+
+ leave:
+  point_free (&Q2);
+  point_free (&Q1);
+  point_free (&Q);
+  mpi_free (zero);
+  mpi_free (rv);
+  mpi_free (v);
+  mpi_free (z2);
+  mpi_free (z1);
+  mpi_free (x);
+  mpi_free (e);
+  return err;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-misc.c 
b/grub-core/lib/libgcrypt/cipher/ecc-misc.c
new file mode 100644
index 000000000..6796ba2c0
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-misc.c
@@ -0,0 +1,469 @@
+/* ecc-misc.c  -  Elliptic Curve miscellaneous functions
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ecc-common.h"
+
+
+/*
+ * Release a curve object.
+ */
+void
+_gcry_ecc_curve_free (elliptic_curve_t *E)
+{
+  mpi_free (E->p); E->p = NULL;
+  mpi_free (E->a); E->a = NULL;
+  mpi_free (E->b);  E->b = NULL;
+  _gcry_mpi_point_free_parts (&E->G);
+  mpi_free (E->n);  E->n = NULL;
+}
+
+
+/*
+ * Return a copy of a curve object.
+ */
+elliptic_curve_t
+_gcry_ecc_curve_copy (elliptic_curve_t E)
+{
+  elliptic_curve_t R;
+
+  R.model = E.model;
+  R.dialect = E.dialect;
+  R.name = E.name;
+  R.p = mpi_copy (E.p);
+  R.a = mpi_copy (E.a);
+  R.b = mpi_copy (E.b);
+  _gcry_mpi_point_init (&R.G);
+  point_set (&R.G, &E.G);
+  R.n = mpi_copy (E.n);
+  R.h = E.h;
+
+  return R;
+}
+
+
+/*
+ * Return a description of the curve model.
+ */
+const char *
+_gcry_ecc_model2str (enum gcry_mpi_ec_models model)
+{
+  const char *str = "?";
+  switch (model)
+    {
+    case MPI_EC_WEIERSTRASS:    str = "Weierstrass"; break;
+    case MPI_EC_MONTGOMERY:     str = "Montgomery";  break;
+    case MPI_EC_EDWARDS:        str = "Edwards"; break;
+    }
+  return str;
+}
+
+
+/*
+ * Return a description of the curve dialect.
+ */
+const char *
+_gcry_ecc_dialect2str (enum ecc_dialects dialect)
+{
+  const char *str = "?";
+  switch (dialect)
+    {
+    case ECC_DIALECT_STANDARD:  str = "Standard"; break;
+    case ECC_DIALECT_ED25519:   str = "Ed25519"; break;
+    case ECC_DIALECT_SAFECURVE: str = "SafeCurve"; break;
+    }
+  return str;
+}
+
+
+/* Return an uncompressed point (X,Y) in P as a malloced buffer with
+ * its byte length stored at R_LENGTH.  May not be used for sensitive
+ * data. */
+unsigned char *
+_gcry_ecc_ec2os_buf (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p,
+                     unsigned int *r_length)
+{
+  gpg_err_code_t rc;
+  int pbytes = (mpi_get_nbits (p)+7)/8;
+  size_t n;
+  unsigned char *buf, *ptr;
+
+  buf = xmalloc ( 1 + 2*pbytes );
+  *buf = 04; /* Uncompressed point.  */
+  ptr = buf+1;
+  rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, x);
+  if (rc)
+    log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc));
+  if (n < pbytes)
+    {
+      memmove (ptr+(pbytes-n), ptr, n);
+      memset (ptr, 0, (pbytes-n));
+    }
+  ptr += pbytes;
+  rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, y);
+  if (rc)
+    log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc));
+  if (n < pbytes)
+    {
+      memmove (ptr+(pbytes-n), ptr, n);
+      memset (ptr, 0, (pbytes-n));
+    }
+
+  *r_length = 1 + 2*pbytes;
+  return buf;
+}
+
+
+gcry_mpi_t
+_gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p)
+{
+  unsigned char *buf;
+  unsigned int buflen;
+
+  buf = _gcry_ecc_ec2os_buf (x, y, p, &buflen);
+  return mpi_set_opaque (NULL, buf, 8*buflen);
+}
+
+/* Convert POINT into affine coordinates using the context CTX and
+   return a newly allocated MPI.  If the conversion is not possible
+   NULL is returned.  This function won't print an error message.  */
+gcry_mpi_t
+_gcry_mpi_ec_ec2os (gcry_mpi_point_t point, mpi_ec_t ec)
+{
+  gcry_mpi_t g_x, g_y, result;
+
+  g_x = mpi_new (0);
+  g_y = mpi_new (0);
+  if (_gcry_mpi_ec_get_affine (g_x, g_y, point, ec))
+    result = NULL;
+  else
+    result = _gcry_ecc_ec2os (g_x, g_y, ec->p);
+  mpi_free (g_x);
+  mpi_free (g_y);
+
+  return result;
+}
+
+
+/* Decode octet string in VALUE into RESULT, in the format defined by SEC 1.
+   RESULT must have been initialized and is set on success to the
+   point given by VALUE.  */
+gpg_err_code_t
+_gcry_ecc_sec_decodepoint  (gcry_mpi_t value, mpi_ec_t ec, mpi_point_t result)
+{
+  gpg_err_code_t rc;
+  size_t n;
+  const unsigned char *buf;
+  unsigned char *buf_memory;
+  gcry_mpi_t x, y;
+
+  if (mpi_is_opaque (value))
+    {
+      unsigned int nbits;
+
+      buf = mpi_get_opaque (value, &nbits);
+      if (!buf)
+        return GPG_ERR_INV_OBJ;
+      n = (nbits + 7)/8;
+      buf_memory = NULL;
+    }
+  else
+    {
+      n = (mpi_get_nbits (value)+7)/8;
+      buf_memory = xmalloc (n);
+      rc = _gcry_mpi_print (GCRYMPI_FMT_USG, buf_memory, n, &n, value);
+      if (rc)
+        {
+          xfree (buf_memory);
+          return rc;
+        }
+      buf = buf_memory;
+    }
+
+  if (n < 1)
+    {
+      xfree (buf_memory);
+      return GPG_ERR_INV_OBJ;
+    }
+
+  if (*buf == 2 || *buf == 3)
+    {
+      gcry_mpi_t x3;
+      gcry_mpi_t t;
+      gcry_mpi_t p1_4;
+      int y_bit = (*buf == 3);
+
+      if (!mpi_test_bit (ec->p, 1))
+        {
+          xfree (buf_memory);
+          return GPG_ERR_NOT_IMPLEMENTED; /* No support for point compression. 
 */
+        }
+
+      n = n - 1;
+      rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL);
+      xfree (buf_memory);
+      if (rc)
+        return rc;
+
+      /*
+       * Recover Y.  The Weierstrass curve: y^2 = x^3 + a*x + b
+       */
+
+      x3 = mpi_new (0);
+      t = mpi_new (0);
+      p1_4 = mpi_new (0);
+      y = mpi_new (0);
+
+      /* Compute right hand side.  */
+      mpi_powm (x3, x, mpi_const (MPI_C_THREE), ec->p);
+      mpi_mul (t, ec->a, x);
+      mpi_mod (t, t, ec->p);
+      mpi_add (t, t, ec->b);
+      mpi_mod (t, t, ec->p);
+      mpi_add (t, t, x3);
+      mpi_mod (t, t, ec->p);
+
+      /*
+       * When p mod 4 = 3, modular square root of A can be computed by
+       * A^((p+1)/4) mod p
+       */
+
+      /* Compute (p+1)/4 into p1_4 */
+      mpi_rshift (p1_4, ec->p, 2);
+      _gcry_mpi_add_ui (p1_4, p1_4, 1);
+
+      mpi_powm (y, t, p1_4, ec->p);
+
+      if (y_bit != mpi_test_bit (y, 0))
+        mpi_sub (y, ec->p, y);
+
+      mpi_free (p1_4);
+      mpi_free (t);
+      mpi_free (x3);
+    }
+  else if (*buf == 4)
+    {
+      if ( ((n-1)%2) )
+        {
+          xfree (buf_memory);
+          return GPG_ERR_INV_OBJ;
+        }
+      n = (n-1)/2;
+      rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL);
+      if (rc)
+        {
+          xfree (buf_memory);
+          return rc;
+        }
+      rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG, buf+1+n, n, NULL);
+      xfree (buf_memory);
+      if (rc)
+        {
+          mpi_free (x);
+          return rc;
+        }
+    }
+  else
+    {
+      xfree (buf_memory);
+      return GPG_ERR_INV_OBJ;
+    }
+
+  mpi_set (result->x, x);
+  mpi_set (result->y, y);
+  mpi_set_ui (result->z, 1);
+
+  mpi_free (x);
+  mpi_free (y);
+
+  return 0;
+}
+
+
+/* Compute the public key from the the context EC.  Obviously a
+   requirement is that the secret key is available in EC.  On success
+   Q is returned; on error NULL.  If Q is NULL a newly allocated point
+   is returned.  If G or D are given they override the values taken
+   from EC. */
+mpi_point_t
+_gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec)
+{
+  if (!ec->d || !ec->G || !ec->p || !ec->a)
+    return NULL;
+  if (ec->model == MPI_EC_EDWARDS && !ec->b)
+    return NULL;
+
+  if ((ec->dialect == ECC_DIALECT_ED25519 && (ec->flags & PUBKEY_FLAG_EDDSA))
+      || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE))
+    {
+      gcry_mpi_t a;
+      unsigned char *digest;
+      int b;
+
+      b = (ec->nbits+7)/8;
+      if (ec->nbits == 255)
+        ;
+      else if (ec->nbits == 448)
+        b++;
+      else
+        return NULL;            /* Not implemented.  */
+
+      if (_gcry_ecc_eddsa_compute_h_d (&digest, ec))
+        return NULL;
+
+      a = mpi_snew (0);
+      _gcry_mpi_set_buffer (a, digest, b, 0);
+      xfree (digest);
+
+      /* And finally the public key.  */
+      if (!Q)
+        Q = mpi_point_new (0);
+      if (Q)
+        _gcry_mpi_ec_mul_point (Q, a, ec->G, ec);
+      mpi_free (a);
+    }
+  else
+    {
+      if (!Q)
+        Q = mpi_point_new (0);
+      if (Q)
+        _gcry_mpi_ec_mul_point (Q, ec->d, ec->G, ec);
+    }
+
+  return Q;
+}
+
+
+gpg_err_code_t
+_gcry_ecc_mont_encodepoint (gcry_mpi_t x, unsigned int nbits,
+                            int with_prefix,
+                            unsigned char **r_buffer, unsigned int *r_buflen)
+{
+  unsigned char *rawmpi;
+  unsigned int rawmpilen;
+
+  rawmpi = _gcry_mpi_get_buffer_extra (x, (nbits+7)/8,
+                                       with_prefix? -1 : 0, &rawmpilen, NULL);
+  if (rawmpi == NULL)
+    return gpg_err_code_from_syserror ();
+
+  if (with_prefix)
+    {
+      rawmpi[0] = 0x40;
+      rawmpilen++;
+    }
+
+  *r_buffer = rawmpi;
+  *r_buflen = rawmpilen;
+  return 0;
+}
+
+
+gpg_err_code_t
+_gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ec, mpi_point_t result)
+{
+  unsigned char *rawmpi;
+  unsigned int rawmpilen;
+  unsigned int nbytes = (ec->nbits+7)/8;
+
+  /*
+   * It is not reliable to assume that the first byte of 0x40
+   * means the prefix.
+   *
+   * For newer implementation, it is reliable since we always put
+   * 0x40 for x-only coordinate.
+   *
+   * For data by older implementation (non-released development
+   * version in 2015), there is no 0x40 prefix added.
+   *
+   * So, it is possible to have shorter length of data when it was
+   * handled as MPI, removing preceding zeros.
+   *
+   * Besides, when data was parsed as MPI, we might have 0x00
+   * prefix (when the MSB in the first byte is set).
+   */
+
+  if (mpi_is_opaque (pk))
+    {
+      const unsigned char *buf;
+      unsigned char *p;
+
+      buf = mpi_get_opaque (pk, &rawmpilen);
+      if (!buf)
+        return GPG_ERR_INV_OBJ;
+      rawmpilen = (rawmpilen + 7)/8;
+
+      if (rawmpilen == nbytes + 1
+          && (buf[0] == 0x00 || buf[0] == 0x40))
+        {
+          rawmpilen--;
+          buf++;
+        }
+      else if (rawmpilen > nbytes)
+        return GPG_ERR_INV_OBJ;
+
+      rawmpi = xtrymalloc (nbytes);
+      if (!rawmpi)
+        return gpg_err_code_from_syserror ();
+
+      p = rawmpi + rawmpilen;
+      while (p > rawmpi)
+        *--p = *buf++;
+
+      if (rawmpilen < nbytes)
+        memset (rawmpi + nbytes - rawmpilen, 0, nbytes - rawmpilen);
+    }
+  else
+    {
+      rawmpi = _gcry_mpi_get_buffer (pk, nbytes, &rawmpilen, NULL);
+      if (!rawmpi)
+        return gpg_err_code_from_syserror ();
+      if (rawmpilen > nbytes + BYTES_PER_MPI_LIMB)
+        {
+          xfree (rawmpi);
+          return GPG_ERR_INV_OBJ;
+        }
+      /*
+       * When we have the prefix (0x40 or 0x00), it comes at the end,
+       * since it is taken by _gcry_mpi_get_buffer with little endian.
+       * Just setting RAWMPILEN to NBYTES is enough in this case.
+       * Othewise, RAWMPILEN is NBYTES already.
+       */
+      rawmpilen = nbytes;
+    }
+
+  if ((ec->nbits % 8))
+    rawmpi[0] &= (1 << (ec->nbits % 8)) - 1;
+  _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0);
+  xfree (rawmpi);
+  mpi_set_ui (result->z, 1);
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/ecc-sm2.c 
b/grub-core/lib/libgcrypt/cipher/ecc-sm2.c
new file mode 100644
index 000000000..c52629fd3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/ecc-sm2.c
@@ -0,0 +1,569 @@
+/* ecc-sm2.c  -  Elliptic Curve SM2 implementation
+ * Copyright (C) 2020 Tianjia Zhang
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
+
+#define MPI_NBYTES(m)   ((mpi_get_nbits(m) + 7) / 8)
+
+
+/* Key derivation function from X9.63/SECG */
+static gpg_err_code_t
+kdf_x9_63 (int algo, const void *in, size_t inlen, void *out, size_t outlen)
+{
+  gpg_err_code_t rc;
+  gcry_md_hd_t hd;
+  int mdlen;
+  u32 counter = 1;
+  u32 counter_be;
+  unsigned char *dgst;
+  unsigned char *pout = out;
+  size_t rlen = outlen;
+  size_t len;
+
+  rc = _gcry_md_open (&hd, algo, 0);
+  if (rc)
+    return rc;
+
+  mdlen = _gcry_md_get_algo_dlen (algo);
+
+  while (rlen > 0)
+    {
+      counter_be = be_bswap32 (counter);   /* cpu_to_be32 */
+      counter++;
+
+      _gcry_md_write (hd, in, inlen);
+      _gcry_md_write (hd, &counter_be, sizeof(counter_be));
+
+      dgst = _gcry_md_read (hd, algo);
+      if (dgst == NULL)
+        {
+          rc = GPG_ERR_DIGEST_ALGO;
+          break;
+        }
+
+      len = mdlen < rlen ? mdlen : rlen;  /* min(mdlen, rlen) */
+      memcpy (pout, dgst, len);
+      rlen -= len;
+      pout += len;
+
+      _gcry_md_reset (hd);
+    }
+
+  _gcry_md_close (hd);
+  return rc;
+}
+
+
+/* _gcry_ecc_sm2_encrypt description:
+ *   input:
+ *     data[0] : octet string
+ *   output: A new S-expression with the parameters:
+ *     a: c1 : generated ephemeral public key (kG)
+ *     b: c3 : Hash(x2 || IN || y2)
+ *     c: c2 : cipher
+ *
+ * sm2_decrypt description:
+ *   in contrast to encrypt
+ */
+gpg_err_code_t
+_gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, gcry_mpi_t input, mpi_ec_t ec)
+{
+  gpg_err_code_t rc;
+  const int algo = GCRY_MD_SM3;
+  gcry_md_hd_t md = NULL;
+  int mdlen;
+  unsigned char *dgst;
+  gcry_mpi_t k = NULL;
+  mpi_point_struct kG, kP;
+  gcry_mpi_t x1, y1;
+  gcry_mpi_t x2, y2;
+  gcry_mpi_t x2y2 = NULL;
+  unsigned char *in = NULL;
+  unsigned int inlen;
+  unsigned char *raw;
+  unsigned int rawlen;
+  unsigned char *cipher = NULL;
+  int i;
+
+  point_init (&kG);
+  point_init (&kP);
+  x1 = mpi_new (0);
+  y1 = mpi_new (0);
+  x2 = mpi_new (0);
+  y2 = mpi_new (0);
+
+  in = _gcry_mpi_get_buffer (input, 0, &inlen, NULL);
+  if (!in)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+
+  cipher = xtrymalloc (inlen);
+  if (!cipher)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+
+  /* rand k in [1, n-1] */
+  k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM);
+
+  /* [k]G = (x1, y1) */
+  _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec);
+  if (_gcry_mpi_ec_get_affine (x1, y1, &kG, ec))
+    {
+      if (DBG_CIPHER)
+        log_debug ("Bad check: kG can not be a Point at Infinity!\n");
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
+
+  /* [k]P = (x2, y2) */
+  _gcry_mpi_ec_mul_point (&kP, k, ec->Q, ec);
+  if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec))
+    {
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
+
+  /* t = KDF(x2 || y2, klen) */
+  x2y2 = _gcry_mpi_ec_ec2os (&kP, ec);
+  raw = mpi_get_opaque (x2y2, &rawlen);
+  rawlen = (rawlen + 7) / 8;
+
+  /* skip the prefix '0x04' */
+  raw += 1;
+  rawlen -= 1;
+  rc = kdf_x9_63 (algo, raw, rawlen, cipher, inlen);
+  if (rc)
+    goto leave;
+
+  /* cipher = t xor in */
+  for (i = 0; i < inlen; i++)
+    cipher[i] ^= in[i];
+
+  /* hash(x2 || IN || y2) */
+  mdlen = _gcry_md_get_algo_dlen (algo);
+  rc = _gcry_md_open (&md, algo, 0);
+  if (rc)
+    goto leave;
+  _gcry_md_write (md, raw, MPI_NBYTES(x2));
+  _gcry_md_write (md, in, inlen);
+  _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2));
+  dgst = _gcry_md_read (md, algo);
+  if (dgst == NULL)
+    {
+      rc = GPG_ERR_DIGEST_ALGO;
+      goto leave;
+    }
+
+  if (!rc)
+    {
+      gcry_mpi_t c1;
+      gcry_mpi_t c3;
+      gcry_mpi_t c2;
+
+      c3 = mpi_new (0);
+      c2 = mpi_new (0);
+
+      c1 = _gcry_ecc_ec2os (x1, y1, ec->p);
+      _gcry_mpi_set_opaque_copy (c3, dgst, mdlen * 8);
+      _gcry_mpi_set_opaque_copy (c2, cipher, inlen * 8);
+
+      rc = sexp_build (r_ciph, NULL,
+                       "(enc-val(flags sm2)(sm2(a%M)(b%M)(c%M)))",
+                       c1, c3, c2);
+
+      mpi_free (c1);
+      mpi_free (c3);
+      mpi_free (c2);
+    }
+
+leave:
+  _gcry_md_close (md);
+  mpi_free (x2y2);
+  mpi_free (k);
+
+  point_free (&kG);
+  point_free (&kP);
+  mpi_free (x1);
+  mpi_free (y1);
+  mpi_free (x2);
+  mpi_free (y2);
+
+  xfree (cipher);
+  xfree (in);
+
+  return rc;
+}
+
+
+gpg_err_code_t
+_gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t data_list, mpi_ec_t 
ec)
+{
+  gpg_err_code_t rc;
+  gcry_mpi_t data_c1 = NULL;
+  gcry_mpi_t data_c3 = NULL;
+  gcry_mpi_t data_c2 = NULL;
+
+  /*
+   * Extract the data.
+   */
+  rc = sexp_extract_param (data_list, NULL, "/a/b/c",
+                           &data_c1, &data_c3, &data_c2, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_printmpi ("ecc_decrypt  d_c1", data_c1);
+      log_printmpi ("ecc_decrypt  d_c3", data_c3);
+      log_printmpi ("ecc_decrypt  d_c2", data_c2);
+    }
+
+  {
+    const int algo = GCRY_MD_SM3;
+    gcry_md_hd_t md = NULL;
+    int mdlen;
+    unsigned char *dgst;
+    mpi_point_struct c1;
+    mpi_point_struct kP;
+    gcry_mpi_t x2, y2;
+    gcry_mpi_t x2y2 = NULL;
+    unsigned char *in = NULL;
+    unsigned int inlen;
+    unsigned char *plain = NULL;
+    unsigned char *raw;
+    unsigned int rawlen;
+    unsigned char *c3 = NULL;
+    unsigned int c3_len;
+    int i;
+
+    point_init (&c1);
+    point_init (&kP);
+    x2 = mpi_new (0);
+    y2 = mpi_new (0);
+
+    in = mpi_get_opaque (data_c2, &inlen);
+    inlen = (inlen + 7) / 8;
+    plain = xtrymalloc (inlen);
+    if (!plain)
+      {
+        rc = gpg_err_code_from_syserror ();
+        goto leave_main;
+      }
+
+    rc = _gcry_ecc_sec_decodepoint (data_c1, ec, &c1);
+    if (rc)
+      goto leave_main;
+
+    if (!_gcry_mpi_ec_curve_point (&c1, ec))
+      {
+        rc = GPG_ERR_INV_DATA;
+        goto leave_main;
+      }
+
+    /* [d]C1 = (x2, y2), C1 = [k]G */
+    _gcry_mpi_ec_mul_point (&kP, ec->d, &c1, ec);
+    if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec))
+      {
+        rc = GPG_ERR_INV_DATA;
+        goto leave_main;
+      }
+
+    /* t = KDF(x2 || y2, inlen) */
+    x2y2 = _gcry_mpi_ec_ec2os (&kP, ec);
+    raw = mpi_get_opaque (x2y2, &rawlen);
+    rawlen = (rawlen + 7) / 8;
+    /* skip the prefix '0x04' */
+    raw += 1;
+    rawlen -= 1;
+    rc = kdf_x9_63 (algo, raw, rawlen, plain, inlen);
+    if (rc)
+      goto leave_main;
+
+    /* plain = C2 xor t */
+    for (i = 0; i < inlen; i++)
+      plain[i] ^= in[i];
+
+    /* Hash(x2 || IN || y2) == C3 */
+    mdlen = _gcry_md_get_algo_dlen (algo);
+    rc = _gcry_md_open (&md, algo, 0);
+    if (rc)
+      goto leave_main;
+    _gcry_md_write (md, raw, MPI_NBYTES(x2));
+    _gcry_md_write (md, plain, inlen);
+    _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2));
+    dgst = _gcry_md_read (md, algo);
+    if (dgst == NULL)
+      {
+        memset (plain, 0, inlen);
+        rc = GPG_ERR_DIGEST_ALGO;
+        goto leave_main;
+      }
+    c3 = mpi_get_opaque (data_c3, &c3_len);
+    c3_len = (c3_len + 7) / 8;
+    if (c3_len != mdlen || memcmp (dgst, c3, c3_len) != 0)
+      {
+        memset (plain, 0, inlen);
+        rc = GPG_ERR_INV_DATA;
+        goto leave_main;
+      }
+
+    if (!rc)
+      {
+        gcry_mpi_t r;
+
+        r = mpi_new (inlen * 8);
+        _gcry_mpi_set_buffer (r, plain, inlen, 0);
+
+        rc = sexp_build (r_plain, NULL, "(value %m)", r);
+
+        mpi_free (r);
+      }
+
+  leave_main:
+    _gcry_md_close (md);
+    mpi_free (x2y2);
+    xfree (plain);
+
+    point_free (&c1);
+    point_free (&kP);
+    mpi_free (x2);
+    mpi_free (y2);
+  }
+
+ leave:
+  _gcry_mpi_release (data_c1);
+  _gcry_mpi_release (data_c3);
+  _gcry_mpi_release (data_c2);
+
+  return rc;
+}
+
+
+/* Compute an SM2 signature.
+ * Return the signature struct (r,s) from the message hash.  The caller
+ * must have allocated R and S.
+ */
+gpg_err_code_t
+_gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec,
+                    gcry_mpi_t r, gcry_mpi_t s,
+                    int flags, int hashalgo)
+{
+  gpg_err_code_t rc = 0;
+  int extraloops = 0;
+  gcry_mpi_t hash;
+  const void *abuf;
+  unsigned int abits, qbits;
+  gcry_mpi_t tmp = NULL;
+  gcry_mpi_t k = NULL;
+  gcry_mpi_t rk = NULL;
+  mpi_point_struct kG;
+  gcry_mpi_t x1;
+
+  if (DBG_CIPHER)
+    log_mpidump ("sm2 sign hash  ", input);
+
+  qbits = mpi_get_nbits (ec->n);
+
+  /* Convert the INPUT into an MPI if needed.  */
+  rc = _gcry_dsa_normalize_hash (input, &hash, qbits);
+  if (rc)
+    return rc;
+
+  point_init (&kG);
+  x1 = mpi_new (0);
+  rk = mpi_new (0);
+  tmp = mpi_new (0);
+
+  for (;;)
+    {
+      /* rand k in [1, n-1] */
+      if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo)
+        {
+          /* Use Pornin's method for deterministic DSA.  If this
+             flag is set, it is expected that HASH is an opaque
+             MPI with the to be signed hash.  That hash is also
+             used as h1 from 3.2.a.  */
+          if (!mpi_is_opaque (input))
+            {
+              rc = GPG_ERR_CONFLICT;
+              goto leave;
+            }
+
+          abuf = mpi_get_opaque (input, &abits);
+          rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d,
+                                        abuf, (abits+7)/8,
+                                        hashalgo, extraloops);
+          if (rc)
+            goto leave;
+          extraloops++;
+        }
+      else
+        k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM);
+
+      _gcry_dsa_modify_k (k, ec->n, qbits);
+
+      /* [k]G = (x1, y1) */
+      _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec);
+      if (_gcry_mpi_ec_get_affine (x1, NULL, &kG, ec))
+        {
+          rc = GPG_ERR_INV_DATA;
+          goto leave;
+        }
+
+      /* r = (e + x1) % n */
+      mpi_addm (r, hash, x1, ec->n);
+
+      /* r != 0 && r + k != n */
+      if (mpi_cmp_ui (r, 0) == 0)
+        continue;
+      mpi_add (rk, r, k);
+      if (mpi_cmp (rk, ec->n) == 0)
+        continue;
+
+      /* s = ((d + 1)^-1 * (k - rd)) % n */
+      mpi_addm (s, ec->d, GCRYMPI_CONST_ONE, ec->n);
+      mpi_invm (s, s, ec->n);
+      mpi_mulm (tmp, r, ec->d, ec->n);
+      mpi_subm (tmp, k, tmp, ec->n);
+      mpi_mulm (s, s, tmp, ec->n);
+
+      /* s != 0 */
+      if (mpi_cmp_ui (s, 0) == 0)
+        continue;
+
+      break;    /* Okay */
+    }
+
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("sm2 sign result r ", r);
+      log_mpidump ("sm2 sign result s ", s);
+    }
+
+leave:
+  point_free (&kG);
+  mpi_free (k);
+  mpi_free (x1);
+  mpi_free (rk);
+  mpi_free (tmp);
+
+  if (hash != input)
+    mpi_free (hash);
+
+  return rc;
+}
+
+
+/* Verify an SM2 signature.
+ * Check if R and S verifies INPUT.
+ */
+gpg_err_code_t
+_gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec,
+                      gcry_mpi_t r, gcry_mpi_t s)
+{
+  gpg_err_code_t err = 0;
+  gcry_mpi_t hash = NULL;
+  gcry_mpi_t t = NULL;
+  mpi_point_struct sG, tP;
+  gcry_mpi_t x1, y1;
+  unsigned int nbits;
+
+  if (!_gcry_mpi_ec_curve_point (ec->Q, ec))
+    return GPG_ERR_BROKEN_PUBKEY;
+
+  /* r, s within [1, n-1] */
+  if (mpi_cmp_ui (r, 1) < 0 || mpi_cmp (r, ec->n) > 0)
+    return GPG_ERR_BAD_SIGNATURE;
+  if (mpi_cmp_ui (s, 1) < 0 || mpi_cmp (s, ec->n) > 0)
+    return GPG_ERR_BAD_SIGNATURE;
+
+  nbits = mpi_get_nbits (ec->n);
+  err = _gcry_dsa_normalize_hash (input, &hash, nbits);
+  if (err)
+    return err;
+
+  point_init (&sG);
+  point_init (&tP);
+  x1 = mpi_new (0);
+  y1 = mpi_new (0);
+  t = mpi_new (0);
+
+  /* t = (r + s) % n, t != 0 */
+  mpi_addm (t, r, s, ec->n);
+  if (mpi_cmp_ui (t, 0) == 0)
+    {
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+
+  /* sG + tP = (x1, y1) */
+  _gcry_mpi_ec_mul_point (&sG, s, ec->G, ec);
+  _gcry_mpi_ec_mul_point (&tP, t, ec->Q, ec);
+  _gcry_mpi_ec_add_points (&sG, &sG, &tP, ec);
+  if (_gcry_mpi_ec_get_affine (x1, y1, &sG, ec))
+    {
+      err = GPG_ERR_INV_DATA;
+      goto leave;
+    }
+
+  /* R = (e + x1) % n */
+  mpi_addm (t, hash, x1, ec->n);
+
+  /* check R == r */
+  if (mpi_cmp (t, r))
+    {
+      if (DBG_CIPHER)
+        {
+          log_mpidump ("     R", t);
+          log_mpidump ("     r", r);
+          log_mpidump ("     s", s);
+        }
+      err = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+  if (DBG_CIPHER)
+    log_debug ("sm2 verify: Accepted\n");
+
+ leave:
+  point_free (&sG);
+  point_free (&tP);
+  mpi_free (x1);
+  mpi_free (y1);
+  mpi_free (t);
+  if (hash != input)
+    mpi_free (hash);
+
+  return err;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/ecc.c 
b/grub-core/lib/libgcrypt/cipher/ecc.c
index b8487dc13..cf3fc6618 100644
--- a/grub-core/lib/libgcrypt/cipher/ecc.c
+++ b/grub-core/lib/libgcrypt/cipher/ecc.c
@@ -1,22 +1,22 @@
 /* ecc.c  -  Elliptic Curve Cryptography
-   Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
-
-   This file is part of Libgcrypt.
-
-   Libgcrypt is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of
-   the License, or (at your option) any later version.
-
-   Libgcrypt is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
-   USA.  */
+ * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2015 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
 
 /* This code is originally based on the Patch 0.1.6 for the gnupg
    1.4.x branch as retrieved on 2007-03-21 from
@@ -30,24 +30,20 @@
       Ramiro Moreno Chiral
       Mikael Mylnikov (mmr)
   For use in Libgcrypt the code has been heavily modified and cleaned
-  up. In fact there is not much left of the orginally code except for
+  up. In fact there is not much left of the originally code except for
   some variable names and the text book implementaion of the sign and
   verification algorithms.  The arithmetic functions have entirely
   been rewritten and moved to mpi/ec.c.
 
-  ECDH encrypt and decrypt code written by Andrey Jivsov,
+  ECDH encrypt and decrypt code written by Andrey Jivsov.
 */
 
 
 /* TODO:
 
-  - If we support point compression we need to uncompress before
-    computing the keygrip
-
   - In mpi/ec.c we use mpi_powm for x^2 mod p: Either implement a
     special case in mpi_powm or check whether mpi_mulm is faster.
 
-  - Decide whether we should hide the mpi_point_t definition.
 */
 
 
@@ -55,231 +51,46 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <errno.h>
 
 #include "g10lib.h"
 #include "mpi.h"
 #include "cipher.h"
-
-/* Definition of a curve.  */
-typedef struct
-{
-  gcry_mpi_t p;   /* Prime specifying the field GF(p).  */
-  gcry_mpi_t a;   /* First coefficient of the Weierstrass equation.  */
-  gcry_mpi_t b;   /* Second coefficient of the Weierstrass equation.  */
-  mpi_point_t G;  /* Base point (generator).  */
-  gcry_mpi_t n;   /* Order of G.  */
-  const char *name;  /* Name of curve or NULL.  */
-} elliptic_curve_t;
-
-
-typedef struct
-{
-  elliptic_curve_t E;
-  mpi_point_t Q;  /* Q = [d]G  */
-} ECC_public_key;
-
-typedef struct
-{
-  elliptic_curve_t E;
-  mpi_point_t Q;
-  gcry_mpi_t d;
-} ECC_secret_key;
+#include "context.h"
+#include "ec-context.h"
+#include "pubkey-internal.h"
+#include "ecc-common.h"
 
 
-/* This tables defines aliases for curve names.  */
-static const struct
-{
-  const char *name;  /* Our name.  */
-  const char *other; /* Other name. */
-} curve_aliases[] =
+static const char *ecc_names[] =
   {
-    { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID  */
-    { "NIST P-192", "prime192v1" },          /* X9.62 name.  */
-    { "NIST P-192", "secp192r1"  },          /* SECP name.  */
-
-    { "NIST P-224", "secp224r1" },
-    { "NIST P-224", "1.3.132.0.33" },        /* SECP OID.  */
-
-    { "NIST P-256", "1.2.840.10045.3.1.7" }, /* From NIST SP 800-78-1.  */
-    { "NIST P-256", "prime256v1" },
-    { "NIST P-256", "secp256r1"  },
-
-    { "NIST P-384", "secp384r1" },
-    { "NIST P-384", "1.3.132.0.34" },
-
-    { "NIST P-521", "secp521r1" },
-    { "NIST P-521", "1.3.132.0.35" },
+    "ecc",
+    "ecdsa",
+    "ecdh",
+    "eddsa",
+    "gost",
+    "sm2",
+    NULL,
+  };
 
-    { "brainpoolP160r1", "1.3.36.3.3.2.8.1.1.1" },
-    { "brainpoolP192r1", "1.3.36.3.3.2.8.1.1.3" },
-    { "brainpoolP224r1", "1.3.36.3.3.2.8.1.1.5" },
-    { "brainpoolP256r1", "1.3.36.3.3.2.8.1.1.7" },
-    { "brainpoolP320r1", "1.3.36.3.3.2.8.1.1.9" },
-    { "brainpoolP384r1", "1.3.36.3.3.2.8.1.1.11"},
-    { "brainpoolP512r1", "1.3.36.3.3.2.8.1.1.13"},
 
-    { NULL, NULL}
-  };
+/* Sample NIST P-256 key from RFC 6979 A.2.5 */
+static const char sample_public_key_secp256[] =
+  "(public-key"
+  " (ecc"
+  "  (curve secp256r1)"
+  "  (q #04"
+  /**/  "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6"
+  /**/  "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))";
 
-typedef struct   {
-  const char *desc;           /* Description of the curve.  */
-  unsigned int nbits;         /* Number of bits.  */
-  unsigned int fips:1;        /* True if this is a FIPS140-2 approved curve. */
-  const char  *p;             /* Order of the prime field.  */
-  const char *a, *b;          /* The coefficients. */
-  const char *n;              /* The order of the base point.  */
-  const char *g_x, *g_y;      /* Base point.  */
-} ecc_domain_parms_t;
-
-/* This static table defines all available curves.  */
-static const ecc_domain_parms_t domain_parms[] =
-  {
-    {
-      "NIST P-192", 192, 1,
-      "0xfffffffffffffffffffffffffffffffeffffffffffffffff",
-      "0xfffffffffffffffffffffffffffffffefffffffffffffffc",
-      "0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1",
-      "0xffffffffffffffffffffffff99def836146bc9b1b4d22831",
-
-      "0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012",
-      "0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811"
-    },
-    {
-      "NIST P-224", 224, 1,
-      "0xffffffffffffffffffffffffffffffff000000000000000000000001",
-      "0xfffffffffffffffffffffffffffffffefffffffffffffffffffffffe",
-      "0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4",
-      "0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d" ,
-
-      "0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21",
-      "0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34"
-    },
-    {
-      "NIST P-256", 256, 1,
-      "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff",
-      "0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc",
-      "0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b",
-      "0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551",
-
-      "0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296",
-      "0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5"
-    },
-    {
-      "NIST P-384", 384, 1,
-      "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
-      "ffffffff0000000000000000ffffffff",
-      "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
-      "ffffffff0000000000000000fffffffc",
-      "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875a"
-      "c656398d8a2ed19d2a85c8edd3ec2aef",
-      "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf"
-      "581a0db248b0a77aecec196accc52973",
-
-      "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a38"
-      "5502f25dbf55296c3a545e3872760ab7",
-      "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c0"
-      "0a60b1ce1d7e819d7a431d7c90ea0e5f"
-    },
-    {
-      "NIST P-521", 521, 1,
-      "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
-      "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
-      "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
-      "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc",
-      "0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef10"
-      "9e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00",
-      "0x1fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
-      "ffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409",
-
-      "0xc6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d3d"
-      "baa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66",
-      "0x11839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e6"
-      "62c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650"
-    },
-
-    { "brainpoolP160r1", 160, 0,
-      "0xe95e4a5f737059dc60dfc7ad95b3d8139515620f",
-      "0x340e7be2a280eb74e2be61bada745d97e8f7c300",
-      "0x1e589a8595423412134faa2dbdec95c8d8675e58",
-      "0xe95e4a5f737059dc60df5991d45029409e60fc09",
-      "0xbed5af16ea3f6a4f62938c4631eb5af7bdbcdbc3",
-      "0x1667cb477a1a8ec338f94741669c976316da6321"
-    },
-
-    { "brainpoolP192r1", 192, 0,
-      "0xc302f41d932a36cda7a3463093d18db78fce476de1a86297",
-      "0x6a91174076b1e0e19c39c031fe8685c1cae040e5c69a28ef",
-      "0x469a28ef7c28cca3dc721d044f4496bcca7ef4146fbf25c9",
-      "0xc302f41d932a36cda7a3462f9e9e916b5be8f1029ac4acc1",
-      "0xc0a0647eaab6a48753b033c56cb0f0900a2f5c4853375fd6",
-      "0x14b690866abd5bb88b5f4828c1490002e6773fa2fa299b8f"
-    },
-
-    { "brainpoolP224r1", 224, 0,
-      "0xd7c134aa264366862a18302575d1d787b09f075797da89f57ec8c0ff",
-      "0x68a5e62ca9ce6c1c299803a6c1530b514e182ad8b0042a59cad29f43",
-      "0x2580f63ccfe44138870713b1a92369e33e2135d266dbb372386c400b",
-      "0xd7c134aa264366862a18302575d0fb98d116bc4b6ddebca3a5a7939f",
-      "0x0d9029ad2c7e5cf4340823b2a87dc68c9e4ce3174c1e6efdee12c07d",
-      "0x58aa56f772c0726f24c6b89e4ecdac24354b9e99caa3f6d3761402cd"
-    },
-
-    { "brainpoolP256r1", 256, 0,
-      "0xa9fb57dba1eea9bc3e660a909d838d726e3bf623d52620282013481d1f6e5377",
-      "0x7d5a0975fc2c3057eef67530417affe7fb8055c126dc5c6ce94a4b44f330b5d9",
-      "0x26dc5c6ce94a4b44f330b5d9bbd77cbf958416295cf7e1ce6bccdc18ff8c07b6",
-      "0xa9fb57dba1eea9bc3e660a909d838d718c397aa3b561a6f7901e0e82974856a7",
-      "0x8bd2aeb9cb7e57cb2c4b482ffc81b7afb9de27e1e3bd23c23a4453bd9ace3262",
-      "0x547ef835c3dac4fd97f8461a14611dc9c27745132ded8e545c1d54c72f046997"
-    },
-
-    { "brainpoolP320r1", 320, 0,
-      "0xd35e472036bc4fb7e13c785ed201e065f98fcfa6f6f40def4f92b9ec7893ec28"
-      "fcd412b1f1b32e27",
-      "0x3ee30b568fbab0f883ccebd46d3f3bb8a2a73513f5eb79da66190eb085ffa9f4"
-      "92f375a97d860eb4",
-      "0x520883949dfdbc42d3ad198640688a6fe13f41349554b49acc31dccd88453981"
-      "6f5eb4ac8fb1f1a6",
-      "0xd35e472036bc4fb7e13c785ed201e065f98fcfa5b68f12a32d482ec7ee8658e9"
-      "8691555b44c59311",
-      "0x43bd7e9afb53d8b85289bcc48ee5bfe6f20137d10a087eb6e7871e2a10a599c7"
-      "10af8d0d39e20611",
-      "0x14fdd05545ec1cc8ab4093247f77275e0743ffed117182eaa9c77877aaac6ac7"
-      "d35245d1692e8ee1"
-    },
-
-    { "brainpoolP384r1", 384, 0,
-      "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b412b1da197fb71123"
-      "acd3a729901d1a71874700133107ec53",
-      "0x7bc382c63d8c150c3c72080ace05afa0c2bea28e4fb22787139165efba91f90f"
-      "8aa5814a503ad4eb04a8c7dd22ce2826",
-      "0x04a8c7dd22ce28268b39b55416f0447c2fb77de107dcd2a62e880ea53eeb62d5"
-      "7cb4390295dbc9943ab78696fa504c11",
-      "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b31f166e6cac0425a7"
-      "cf3ab6af6b7fc3103b883202e9046565",
-      "0x1d1c64f068cf45ffa2a63a81b7c13f6b8847a3e77ef14fe3db7fcafe0cbd10e8"
-      "e826e03436d646aaef87b2e247d4af1e",
-      "0x8abe1d7520f9c2a45cb1eb8e95cfd55262b70b29feec5864e19c054ff9912928"
-      "0e4646217791811142820341263c5315"
-    },
-
-    { "brainpoolP512r1", 512, 0,
-      "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330871"
-      "7d4d9b009bc66842aecda12ae6a380e62881ff2f2d82c68528aa6056583a48f3",
-      "0x7830a3318b603b89e2327145ac234cc594cbdd8d3df91610a83441caea9863bc"
-      "2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a72bf2c7b9e7c1ac4d77fc94ca",
-      "0x3df91610a83441caea9863bc2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a7"
-      "2bf2c7b9e7c1ac4d77fc94cadc083e67984050b75ebae5dd2809bd638016f723",
-      "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330870"
-      "553e5c414ca92619418661197fac10471db1d381085ddaddb58796829ca90069",
-      "0x81aee4bdd82ed9645a21322e9c4c6a9385ed9f70b5d916c1b43b62eef4d0098e"
-      "ff3b1f78e2d0d48d50d1687b93b97d5f7c6d5047406a5e688b352209bcb9f822",
-      "0x7dde385d566332ecc0eabfa9cf7822fdf209f70024a57b1aa000c55b881f8111"
-      "b2dcde494a5f485e5bca4bd88a2763aed1ca2b2fa8f0540678cd1e0f3ad80892"
-    },
-
-    { NULL, 0, 0, NULL, NULL, NULL, NULL }
-  };
+static const char sample_secret_key_secp256[] =
+  "(private-key"
+  " (ecc"
+  "  (curve secp256r1)"
+  "  (d #C9AFA9D845BA75166B5C215767B1D6934E50C3DB36E89B127B8A622B120F6721#)"
+  "  (q #04"
+  /**/  "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6"
+  /**/  "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))";
 
 
 /* Registered progress function and its callback value. */
@@ -287,22 +98,12 @@ static void (*progress_cb) (void *, const char*, int, int, 
int);
 static void *progress_cb_data;
 
 
-#define point_init(a)  _gcry_mpi_ec_point_init ((a))
-#define point_free(a)  _gcry_mpi_ec_point_free ((a))
-
-
 
 /* Local prototypes. */
-static gcry_mpi_t gen_k (gcry_mpi_t p, int security_level);
-static void test_keys (ECC_secret_key * sk, unsigned int nbits);
-static int check_secret_key (ECC_secret_key * sk);
-static gpg_err_code_t sign (gcry_mpi_t input, ECC_secret_key *skey,
-                            gcry_mpi_t r, gcry_mpi_t s);
-static gpg_err_code_t verify (gcry_mpi_t input, ECC_public_key *pkey,
-                              gcry_mpi_t r, gcry_mpi_t s);
-
-
-static gcry_mpi_t gen_y_2 (gcry_mpi_t x, elliptic_curve_t * base);
+static void test_keys (mpi_ec_t ec, unsigned int nbits);
+static int test_keys_fips (gcry_sexp_t skey);
+static void test_ecdh_only_keys (mpi_ec_t ec, unsigned int nbits, int flags);
+static unsigned int ecc_get_nbits (gcry_sexp_t parms);
 
 
 
@@ -325,260 +126,138 @@ _gcry_register_pk_ecc_progress (void (*cb) (void *, 
const char *,
 
 
 
-
-/* Set the value from S into D.  */
-static void
-point_set (mpi_point_t *d, mpi_point_t *s)
-{
-  mpi_set (d->x, s->x);
-  mpi_set (d->y, s->y);
-  mpi_set (d->z, s->z);
-}
-
-
-/*
- * Release a curve object.
- */
-static void
-curve_free (elliptic_curve_t *E)
-{
-  mpi_free (E->p); E->p = NULL;
-  mpi_free (E->a); E->a = NULL;
-  mpi_free (E->b);  E->b = NULL;
-  point_free (&E->G);
-  mpi_free (E->n);  E->n = NULL;
-}
-
-
-/*
- * Return a copy of a curve object.
- */
-static elliptic_curve_t
-curve_copy (elliptic_curve_t E)
-{
-  elliptic_curve_t R;
-
-  R.p = mpi_copy (E.p);
-  R.a = mpi_copy (E.a);
-  R.b = mpi_copy (E.b);
-  point_init (&R.G);
-  point_set (&R.G, &E.G);
-  R.n = mpi_copy (E.n);
-
-  return R;
-}
-
-
-/* Helper to scan a hex string. */
-static gcry_mpi_t
-scanval (const char *string)
-{
-  gpg_error_t err;
-  gcry_mpi_t val;
-
-  err = gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL);
-  if (err)
-    log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (err));
-  return val;
-}
-
-
-
-
-
-/****************
- * Solve the right side of the equation that defines a curve.
+/**
+ * nist_generate_key - Standard version of the ECC key generation.
+ * @ec: Elliptic curve computation context.
+ * @flags: Flags controlling aspects of the creation.
+ * @r_x: On success this receives an allocated MPI with the affine
+ *       x-coordinate of the poblic key.  On error NULL is stored.
+ * @r_y: Ditto for the y-coordinate.
+ *
+ * Return: An error code.
+ *
+ * The @flags bits used by this function are %PUBKEY_FLAG_TRANSIENT to
+ * use a faster RNG, and %PUBKEY_FLAG_NO_KEYTEST to skip the assertion
+ * that the key works as expected.
+ *
+ * FIXME: Check whether N is needed.
  */
-static gcry_mpi_t
-gen_y_2 (gcry_mpi_t x, elliptic_curve_t *base)
-{
-  gcry_mpi_t three, x_3, axb, y;
-
-  three = mpi_alloc_set_ui (3);
-  x_3 = mpi_new (0);
-  axb = mpi_new (0);
-  y   = mpi_new (0);
-
-  mpi_powm (x_3, x, three, base->p);
-  mpi_mulm (axb, base->a, x, base->p);
-  mpi_addm (axb, axb, base->b, base->p);
-  mpi_addm (y, x_3, axb, base->p);
-
-  mpi_free (x_3);
-  mpi_free (axb);
-  mpi_free (three);
-  return y; /* The quadratic value of the coordinate if it exist. */
-}
-
-
-/* Generate a random secret scalar k with an order of p
-
-   At the beginning this was identical to the code is in elgamal.c.
-   Later imporved by mmr.   Further simplified by wk.  */
-static gcry_mpi_t
-gen_k (gcry_mpi_t p, int security_level)
+static gpg_err_code_t
+nist_generate_key (mpi_ec_t ec, int flags,
+                   gcry_mpi_t *r_x, gcry_mpi_t *r_y)
 {
-  gcry_mpi_t k;
-  unsigned int nbits;
-
-  nbits = mpi_get_nbits (p);
-  k = mpi_snew (nbits);
-  if (DBG_CIPHER)
-    log_debug ("choosing a random k of %u bits at seclevel %d\n",
-               nbits, security_level);
-
-  gcry_mpi_randomize (k, nbits, security_level);
-
-  mpi_mod (k, k, p);  /*  k = k mod p  */
-
-  return k;
-}
+  mpi_point_struct Q;
+  gcry_random_level_t random_level;
+  gcry_mpi_t x, y;
+  const unsigned int pbits = ec->nbits;
 
+  point_init (&Q);
 
-/* Generate the crypto system setup.  This function takes the NAME of
-   a curve or the desired number of bits and stores at R_CURVE the
-   parameters of the named curve or those of a suitable curve.  The
-   chosen number of bits is stored on R_NBITS.  */
-static gpg_err_code_t
-fill_in_curve (unsigned int nbits, const char *name,
-               elliptic_curve_t *curve, unsigned int *r_nbits)
-{
-  int idx, aliasno;
-  const char *resname = NULL; /* Set to a found curve name.  */
+  if ((flags & PUBKEY_FLAG_TRANSIENT_KEY))
+    random_level = GCRY_STRONG_RANDOM;
+  else
+    random_level = GCRY_VERY_STRONG_RANDOM;
 
-  if (name)
+  /* Generate a secret.  */
+  if (ec->dialect == ECC_DIALECT_ED25519
+      || ec->dialect == ECC_DIALECT_SAFECURVE
+      || (flags & PUBKEY_FLAG_DJB_TWEAK))
     {
-      /* First check our native curves.  */
-      for (idx = 0; domain_parms[idx].desc; idx++)
-        if (!strcmp (name, domain_parms[idx].desc))
-          {
-            resname = domain_parms[idx].desc;
-            break;
-          }
-      /* If not found consult the alias table.  */
-      if (!domain_parms[idx].desc)
+      char *rndbuf;
+      int len = (pbits+7)/8;
+
+      rndbuf = _gcry_random_bytes_secure (len, random_level);
+      if (ec->dialect == ECC_DIALECT_SAFECURVE)
+        ec->d = mpi_set_opaque (NULL, rndbuf, len*8);
+      else
         {
-          for (aliasno = 0; curve_aliases[aliasno].name; aliasno++)
-            if (!strcmp (name, curve_aliases[aliasno].other))
-              break;
-          if (curve_aliases[aliasno].name)
-            {
-              for (idx = 0; domain_parms[idx].desc; idx++)
-                if (!strcmp (curve_aliases[aliasno].name,
-                             domain_parms[idx].desc))
-                  {
-                    resname = domain_parms[idx].desc;
-                    break;
-                  }
-            }
+          ec->d = mpi_snew (pbits);
+          if ((pbits % 8))
+            rndbuf[0] &= (1 << (pbits % 8)) - 1;
+          rndbuf[0] |= (1 << ((pbits + 7) % 8));
+          rndbuf[len-1] &= (256 - ec->h);
+          _gcry_mpi_set_buffer (ec->d, rndbuf, len, 0);
+          xfree (rndbuf);
         }
     }
   else
-    {
-      for (idx = 0; domain_parms[idx].desc; idx++)
-        if (nbits == domain_parms[idx].nbits)
-          break;
-    }
-  if (!domain_parms[idx].desc)
-    return GPG_ERR_INV_VALUE;
-
-  /* In fips mode we only support NIST curves.  Note that it is
-     possible to bypass this check by specifying the curve parameters
-     directly.  */
-  if (fips_mode () && !domain_parms[idx].fips )
-    return GPG_ERR_NOT_SUPPORTED;
-
-  *r_nbits = domain_parms[idx].nbits;
-  curve->p = scanval (domain_parms[idx].p);
-  curve->a = scanval (domain_parms[idx].a);
-  curve->b = scanval (domain_parms[idx].b);
-  curve->n = scanval (domain_parms[idx].n);
-  curve->G.x = scanval (domain_parms[idx].g_x);
-  curve->G.y = scanval (domain_parms[idx].g_y);
-  curve->G.z = mpi_alloc_set_ui (1);
-  curve->name = resname;
+    ec->d = _gcry_dsa_gen_k (ec->n, random_level);
 
-  return 0;
-}
+  /* Compute Q.  */
+  _gcry_mpi_ec_mul_point (&Q, ec->d, ec->G, ec);
 
+  x = mpi_new (pbits);
+  if (r_y == NULL)
+    y = NULL;
+  else
+    y = mpi_new (pbits);
+  if (_gcry_mpi_ec_get_affine (x, y, &Q, ec))
+    log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q");
+
+  /* We want the Q=(x,y) be a "compliant key" in terms of the
+   * http://tools.ietf.org/html/draft-jivsov-ecc-compact, which simply
+   * means that we choose either Q=(x,y) or -Q=(x,p-y) such that we
+   * end up with the min(y,p-y) as the y coordinate.  Such a public
+   * key allows the most efficient compression: y can simply be
+   * dropped because we know that it's a minimum of the two
+   * possibilities without any loss of security.  Note that we don't
+   * do that for Ed25519 so that we do not violate the special
+   * construction of the secret key.  */
+  if (r_y == NULL || ec->dialect == ECC_DIALECT_ED25519)
+    ec->Q = mpi_point_set (NULL, Q.x, Q.y, Q.z);
+  else
+    {
+      gcry_mpi_t negative;
 
-/*
- * First obtain the setup.  Over the finite field randomize an scalar
- * secret value, and calculate the public point.
- */
-static gpg_err_code_t
-generate_key (ECC_secret_key *sk, unsigned int nbits, const char *name,
-              int transient_key,
-              gcry_mpi_t g_x, gcry_mpi_t g_y,
-              gcry_mpi_t q_x, gcry_mpi_t q_y,
-              const char **r_usedcurve)
-{
-  gpg_err_code_t err;
-  elliptic_curve_t E;
-  gcry_mpi_t d;
-  mpi_point_t Q;
-  mpi_ec_t ctx;
-  gcry_random_level_t random_level;
+      negative = mpi_new (pbits);
 
-  *r_usedcurve = NULL;
+      if (ec->model == MPI_EC_WEIERSTRASS)
+        mpi_sub (negative, ec->p, y);      /* negative = p - y */
+      else
+        mpi_sub (negative, ec->p, x);      /* negative = p - x */
 
-  err = fill_in_curve (nbits, name, &E, &nbits);
-  if (err)
-    return err;
+      if (mpi_cmp (negative, y) < 0)   /* p - y < p */
+        {
+          /* We need to end up with -Q; this assures that new Q's y is
+             the smallest one */
+          if (ec->model == MPI_EC_WEIERSTRASS)
+            {
+              mpi_free (y);
+              y = negative;
+            }
+          else
+            {
+              mpi_free (x);
+              x = negative;
+            }
+          mpi_sub (ec->d, ec->n, ec->d);   /* d = order - d */
+          ec->Q = mpi_point_set (NULL, x, y, mpi_const (MPI_C_ONE));
 
-  if (DBG_CIPHER)
-    {
-      log_mpidump ("ecgen curve  p", E.p);
-      log_mpidump ("ecgen curve  a", E.a);
-      log_mpidump ("ecgen curve  b", E.b);
-      log_mpidump ("ecgen curve  n", E.n);
-      log_mpidump ("ecgen curve Gx", E.G.x);
-      log_mpidump ("ecgen curve Gy", E.G.y);
-      log_mpidump ("ecgen curve Gz", E.G.z);
-      if (E.name)
-        log_debug   ("ecgen curve used: %s\n", E.name);
+          if (DBG_CIPHER)
+            log_debug ("ecgen converted Q to a compliant point\n");
+        }
+      else /* p - y >= p */
+        {
+          /* No change is needed exactly 50% of the time: just copy. */
+          mpi_free (negative);
+          ec->Q = mpi_point_set (NULL, Q.x, Q.y, Q.z);
+          if (DBG_CIPHER)
+            log_debug ("ecgen didn't need to convert Q to a compliant 
point\n");
+        }
     }
 
-  random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM;
-  d = gen_k (E.n, random_level);
-
-  /* Compute Q.  */
-  point_init (&Q);
-  ctx = _gcry_mpi_ec_init (E.p, E.a);
-  _gcry_mpi_ec_mul_point (&Q, d, &E.G, ctx);
-
-  /* Copy the stuff to the key structures. */
-  sk->E.p = mpi_copy (E.p);
-  sk->E.a = mpi_copy (E.a);
-  sk->E.b = mpi_copy (E.b);
-  point_init (&sk->E.G);
-  point_set (&sk->E.G, &E.G);
-  sk->E.n = mpi_copy (E.n);
-  point_init (&sk->Q);
-  point_set (&sk->Q, &Q);
-  sk->d    = mpi_copy (d);
-  /* We also return copies of G and Q in affine coordinates if
-     requested.  */
-  if (g_x && g_y)
-    {
-      if (_gcry_mpi_ec_get_affine (g_x, g_y, &sk->E.G, ctx))
-        log_fatal ("ecgen: Failed to get affine coordinates\n");
-    }
-  if (q_x && q_y)
-    {
-      if (_gcry_mpi_ec_get_affine (q_x, q_y, &sk->Q, ctx))
-        log_fatal ("ecgen: Failed to get affine coordinates\n");
-    }
-  _gcry_mpi_ec_free (ctx);
+  *r_x = x;
+  if (r_y)
+    *r_y = y;
 
   point_free (&Q);
-  mpi_free (d);
-
-  *r_usedcurve = E.name;
-  curve_free (&E);
-
   /* Now we can test our keys (this should never fail!).  */
-  test_keys (sk, nbits - 64);
+  if ((flags & PUBKEY_FLAG_NO_KEYTEST))
+    ; /* User requested to skip the test.  */
+  else if (ec->model == MPI_EC_MONTGOMERY)
+    test_ecdh_only_keys (ec, ec->nbits - 63, flags);
+  else if (!fips_mode ())
+    test_keys (ec, ec->nbits - 64);
 
   return 0;
 }
@@ -591,11 +270,10 @@ generate_key (ECC_secret_key *sk, unsigned int nbits, 
const char *name,
  * Second, test with the sign and verify functions.
  */
 static void
-test_keys (ECC_secret_key *sk, unsigned int nbits)
+test_keys (mpi_ec_t ec, unsigned int nbits)
 {
-  ECC_public_key pk;
   gcry_mpi_t test = mpi_new (nbits);
-  mpi_point_t R_;
+  mpi_point_struct R_;
   gcry_mpi_t c = mpi_new (nbits);
   gcry_mpi_t out = mpi_new (nbits);
   gcry_mpi_t r = mpi_new (nbits);
@@ -606,16 +284,12 @@ test_keys (ECC_secret_key *sk, unsigned int nbits)
 
   point_init (&R_);
 
-  pk.E = curve_copy (sk->E);
-  point_init (&pk.Q);
-  point_set (&pk.Q, &sk->Q);
+  _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM);
 
-  gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM);
-
-  if (sign (test, sk, r, s) )
+  if (_gcry_ecc_ecdsa_sign (test, NULL, ec, r, s, 0, 0) )
     log_fatal ("ECDSA operation: sign failed\n");
 
-  if (verify (test, &pk, r, s))
+  if (_gcry_ecc_ecdsa_verify (test, ec, r, s, 0, 0))
     {
       log_fatal ("ECDSA operation: sign, verify failed\n");
     }
@@ -623,9 +297,6 @@ test_keys (ECC_secret_key *sk, unsigned int nbits)
   if (DBG_CIPHER)
     log_debug ("ECDSA operation: sign, verify ok.\n");
 
-  point_free (&pk.Q);
-  curve_free (&pk.E);
-
   point_free (&R_);
   mpi_free (s);
   mpi_free (r);
@@ -634,243 +305,248 @@ test_keys (ECC_secret_key *sk, unsigned int nbits)
   mpi_free (test);
 }
 
-
-/*
- * To check the validity of the value, recalculate the correspondence
- * between the public value and the secret one.
- */
+/* We should get here only with the NIST curves as they are the only ones
+ * having the fips bit set in ecc_domain_parms_t struct so this is slightly
+ * simpler than the whole ecc_generate function */
 static int
-check_secret_key (ECC_secret_key * sk)
+test_keys_fips (gcry_sexp_t skey)
 {
-  int rc = 1;
-  mpi_point_t Q;
-  gcry_mpi_t y_2, y2;
-  mpi_ec_t ctx = NULL;
-
-  point_init (&Q);
-
-  /* ?primarity test of 'p' */
-  /*  (...) //!! */
-  /* G in E(F_p) */
-  y_2 = gen_y_2 (sk->E.G.x, &sk->E);   /*  y^2=x^3+a*x+b */
-  y2 = mpi_alloc (0);
-  mpi_mulm (y2, sk->E.G.y, sk->E.G.y, sk->E.p);      /*  y^2=y*y */
-  if (mpi_cmp (y_2, y2))
-    {
-      if (DBG_CIPHER)
-        log_debug ("Bad check: Point 'G' does not belong to curve 'E'!\n");
-      goto leave;
-    }
-  /* G != PaI */
-  if (!mpi_cmp_ui (sk->E.G.z, 0))
+  int result = -1; /* Default to failure */
+  gcry_md_hd_t hd = NULL;
+  const char *data_tmpl = "(data (flags rfc6979) (hash %s %b))";
+  gcry_sexp_t sig = NULL;
+  char plaintext[128];
+  int rc;
+
+  /* Create a random plaintext.  */
+  _gcry_randomize (plaintext, sizeof plaintext, GCRY_WEAK_RANDOM);
+
+  /* Open MD context and feed the random data in */
+  rc = _gcry_md_open (&hd, GCRY_MD_SHA256, 0);
+  if (rc)
     {
-      if (DBG_CIPHER)
-        log_debug ("Bad check: 'G' cannot be Point at Infinity!\n");
+      log_error ("ECDSA operation: failed to initialize MD context: %s\n", 
gpg_strerror (rc));
       goto leave;
     }
+  _gcry_md_write (hd, plaintext, sizeof(plaintext));
 
-  ctx = _gcry_mpi_ec_init (sk->E.p, sk->E.a);
-
-  _gcry_mpi_ec_mul_point (&Q, sk->E.n, &sk->E.G, ctx);
-  if (mpi_cmp_ui (Q.z, 0))
+  /* Sign the data */
+  rc = _gcry_pk_sign_md (&sig, data_tmpl, hd, skey, NULL);
+  if (rc)
     {
-      if (DBG_CIPHER)
-        log_debug ("check_secret_key: E is not a curve of order n\n");
+      log_error ("ECDSA operation: signing failed: %s\n", gpg_strerror (rc));
       goto leave;
     }
-  /* pubkey cannot be PaI */
-  if (!mpi_cmp_ui (sk->Q.z, 0))
+
+  /* Verify this signature.  */
+  rc = _gcry_pk_verify_md (sig, data_tmpl, hd, skey, NULL);
+  if (rc)
     {
-      if (DBG_CIPHER)
-        log_debug ("Bad check: Q can not be a Point at Infinity!\n");
+      log_error ("ECDSA operation: verification failed: %s\n", gpg_strerror 
(rc));
       goto leave;
     }
-  /* pubkey = [d]G over E */
-  _gcry_mpi_ec_mul_point (&Q, sk->d, &sk->E.G, ctx);
-  if ((Q.x == sk->Q.x) && (Q.y == sk->Q.y) && (Q.z == sk->Q.z))
+
+  /* Modify the data and check that the signing fails.  */
+  _gcry_md_reset(hd);
+  plaintext[sizeof plaintext / 2] ^= 1;
+  _gcry_md_write (hd, plaintext, sizeof(plaintext));
+  rc = _gcry_pk_verify_md (sig, data_tmpl, hd, skey, NULL);
+  if (rc != GPG_ERR_BAD_SIGNATURE)
     {
-      if (DBG_CIPHER)
-        log_debug
-          ("Bad check: There is NO correspondence between 'd' and 'Q'!\n");
+      log_error ("ECDSA operation: signature verification worked on modified 
data\n");
       goto leave;
     }
-  rc = 0; /* Okay.  */
 
- leave:
-  _gcry_mpi_ec_free (ctx);
-  mpi_free (y2);
-  mpi_free (y_2);
-  point_free (&Q);
-  return rc;
+  result = 0;
+leave:
+  _gcry_md_close (hd);
+  sexp_release (sig);
+  return result;
 }
 
 
-/*
- * Return the signature struct (r,s) from the message hash.  The caller
- * must have allocated R and S.
- */
-static gpg_err_code_t
-sign (gcry_mpi_t input, ECC_secret_key *skey, gcry_mpi_t r, gcry_mpi_t s)
+static void
+test_ecdh_only_keys (mpi_ec_t ec, unsigned int nbits, int flags)
 {
-  gpg_err_code_t err = 0;
-  gcry_mpi_t k, dr, sum, k_1, x;
-  mpi_point_t I;
-  mpi_ec_t ctx;
+  gcry_mpi_t test;
+  mpi_point_struct R_;
+  gcry_mpi_t x0, x1;
 
   if (DBG_CIPHER)
-    log_mpidump ("ecdsa sign hash  ", input );
+    log_debug ("Testing ECDH only key.\n");
 
-  k = NULL;
-  dr = mpi_alloc (0);
-  sum = mpi_alloc (0);
-  k_1 = mpi_alloc (0);
-  x = mpi_alloc (0);
-  point_init (&I);
-
-  mpi_set_ui (s, 0);
-  mpi_set_ui (r, 0);
-
-  ctx = _gcry_mpi_ec_init (skey->E.p, skey->E.a);
+  point_init (&R_);
 
-  while (!mpi_cmp_ui (s, 0)) /* s == 0 */
+  if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK))
     {
-      while (!mpi_cmp_ui (r, 0)) /* r == 0 */
+      char *rndbuf;
+      const unsigned int pbits = ec->nbits;
+      int len = (pbits+7)/8;
+
+      rndbuf = _gcry_random_bytes (len, GCRY_WEAK_RANDOM);
+      if (ec->dialect == ECC_DIALECT_SAFECURVE)
+        test = mpi_set_opaque (NULL, rndbuf, len*8);
+      else
         {
-          /* Note, that we are guaranteed to enter this loop at least
-             once because r has been intialized to 0.  We can't use a
-             do_while because we want to keep the value of R even if S
-             has to be recomputed.  */
-          mpi_free (k);
-          k = gen_k (skey->E.n, GCRY_STRONG_RANDOM);
-          _gcry_mpi_ec_mul_point (&I, k, &skey->E.G, ctx);
-          if (_gcry_mpi_ec_get_affine (x, NULL, &I, ctx))
-            {
-              if (DBG_CIPHER)
-                log_debug ("ecc sign: Failed to get affine coordinates\n");
-              err = GPG_ERR_BAD_SIGNATURE;
-              goto leave;
-            }
-          mpi_mod (r, x, skey->E.n);  /* r = x mod n */
+          test = mpi_new (pbits);
+          if ((pbits % 8))
+            rndbuf[0] &= (1 << (pbits % 8)) - 1;
+          rndbuf[0] |= (1 << ((pbits + 7) % 8));
+          rndbuf[len-1] &= (256 - ec->h);
+          _gcry_mpi_set_buffer (test, rndbuf, len, 0);
+          xfree (rndbuf);
         }
-      mpi_mulm (dr, skey->d, r, skey->E.n); /* dr = d*r mod n  */
-      mpi_addm (sum, input, dr, skey->E.n); /* sum = hash + (d*r) mod n  */
-      mpi_invm (k_1, k, skey->E.n);         /* k_1 = k^(-1) mod n  */
-      mpi_mulm (s, k_1, sum, skey->E.n);    /* s = k^(-1)*(hash+(d*r)) mod n */
+    }
+  else
+    {
+      test = mpi_new (nbits);
+      _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM);
     }
 
-  if (DBG_CIPHER)
+  x0 = mpi_new (0);
+  x1 = mpi_new (0);
+
+  /* R_ = hkQ  <=>  R_ = hkdG  */
+  _gcry_mpi_ec_mul_point (&R_, test, ec->Q, ec);
+  if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK))
+    _gcry_mpi_ec_mul_point (&R_, _gcry_mpi_get_const (ec->h), &R_, ec);
+  if (_gcry_mpi_ec_get_affine (x0, NULL, &R_, ec))
+    log_fatal ("ecdh: Failed to get affine coordinates for hkQ\n");
+
+  _gcry_mpi_ec_mul_point (&R_, test, ec->G, ec);
+  _gcry_mpi_ec_mul_point (&R_, ec->d, &R_, ec);
+  /* R_ = hdkG */
+  if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK))
+    _gcry_mpi_ec_mul_point (&R_, _gcry_mpi_get_const (ec->h), &R_, ec);
+
+  if (_gcry_mpi_ec_get_affine (x1, NULL, &R_, ec))
+    log_fatal ("ecdh: Failed to get affine coordinates for hdkG\n");
+
+  if (mpi_cmp (x0, x1))
     {
-      log_mpidump ("ecdsa sign result r ", r);
-      log_mpidump ("ecdsa sign result s ", s);
+      log_fatal ("ECDH test failed.\n");
     }
 
- leave:
-  _gcry_mpi_ec_free (ctx);
-  point_free (&I);
-  mpi_free (x);
-  mpi_free (k_1);
-  mpi_free (sum);
-  mpi_free (dr);
-  mpi_free (k);
-
-  return err;
+  mpi_free (x0);
+  mpi_free (x1);
+
+  point_free (&R_);
+  mpi_free (test);
 }
 
 
 /*
- * Check if R and S verifies INPUT.
+ * To check the validity of the value, recalculate the correspondence
+ * between the public value and the secret one.
  */
-static gpg_err_code_t
-verify (gcry_mpi_t input, ECC_public_key *pkey, gcry_mpi_t r, gcry_mpi_t s)
+static int
+check_secret_key (mpi_ec_t ec, int flags)
 {
-  gpg_err_code_t err = 0;
-  gcry_mpi_t h, h1, h2, x, y;
-  mpi_point_t Q, Q1, Q2;
-  mpi_ec_t ctx;
-
-  if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, pkey->E.n) < 0) )
-    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n  failed.  */
-  if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, pkey->E.n) < 0) )
-    return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n  failed.  */
-
-  h  = mpi_alloc (0);
-  h1 = mpi_alloc (0);
-  h2 = mpi_alloc (0);
-  x = mpi_alloc (0);
-  y = mpi_alloc (0);
+  int rc = 1;
+  mpi_point_struct Q;
+  gcry_mpi_t x1, y1;
+  gcry_mpi_t x2 = NULL;
+  gcry_mpi_t y2 = NULL;
+
   point_init (&Q);
-  point_init (&Q1);
-  point_init (&Q2);
-
-  ctx = _gcry_mpi_ec_init (pkey->E.p, pkey->E.a);
-
-  /* h  = s^(-1) (mod n) */
-  mpi_invm (h, s, pkey->E.n);
-/*   log_mpidump ("   h", h); */
-  /* h1 = hash * s^(-1) (mod n) */
-  mpi_mulm (h1, input, h, pkey->E.n);
-/*   log_mpidump ("  h1", h1); */
-  /* Q1 = [ hash * s^(-1) ]G  */
-  _gcry_mpi_ec_mul_point (&Q1, h1, &pkey->E.G, ctx);
-/*   log_mpidump ("Q1.x", Q1.x); */
-/*   log_mpidump ("Q1.y", Q1.y); */
-/*   log_mpidump ("Q1.z", Q1.z); */
-  /* h2 = r * s^(-1) (mod n) */
-  mpi_mulm (h2, r, h, pkey->E.n);
-/*   log_mpidump ("  h2", h2); */
-  /* Q2 = [ r * s^(-1) ]Q */
-  _gcry_mpi_ec_mul_point (&Q2, h2, &pkey->Q, ctx);
-/*   log_mpidump ("Q2.x", Q2.x); */
-/*   log_mpidump ("Q2.y", Q2.y); */
-/*   log_mpidump ("Q2.z", Q2.z); */
-  /* Q  = ([hash * s^(-1)]G) + ([r * s^(-1)]Q) */
-  _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ctx);
-/*   log_mpidump (" Q.x", Q.x); */
-/*   log_mpidump (" Q.y", Q.y); */
-/*   log_mpidump (" Q.z", Q.z); */
-
-  if (!mpi_cmp_ui (Q.z, 0))
+  x1 = mpi_new (0);
+  if (ec->model == MPI_EC_MONTGOMERY)
+    y1 = NULL;
+  else
+    y1 = mpi_new (0);
+
+  /* G in E(F_p) */
+  if (!_gcry_mpi_ec_curve_point (ec->G, ec))
+    {
+      if (DBG_CIPHER)
+        log_debug ("Bad check: Point 'G' does not belong to curve 'E'!\n");
+      goto leave;
+    }
+
+  /* G != PaI */
+  if (!mpi_cmp_ui (ec->G->z, 0))
+    {
+      if (DBG_CIPHER)
+        log_debug ("Bad check: 'G' cannot be Point at Infinity!\n");
+      goto leave;
+    }
+
+  /* Check order of curve.  */
+  if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK))
+    {
+      _gcry_mpi_ec_mul_point (&Q, ec->n, ec->G, ec);
+      if (mpi_cmp_ui (Q.z, 0))
+        {
+          if (DBG_CIPHER)
+            log_debug ("check_secret_key: E is not a curve of order n\n");
+          goto leave;
+        }
+    }
+
+  /* Pubkey cannot be PaI */
+  if (!mpi_cmp_ui (ec->Q->z, 0))
     {
       if (DBG_CIPHER)
-          log_debug ("ecc verify: Rejected\n");
-      err = GPG_ERR_BAD_SIGNATURE;
+        log_debug ("Bad check: Q can not be a Point at Infinity!\n");
       goto leave;
     }
-  if (_gcry_mpi_ec_get_affine (x, y, &Q, ctx))
+
+  /* pubkey = [d]G over E */
+  if (!_gcry_ecc_compute_public (&Q, ec))
     {
       if (DBG_CIPHER)
-        log_debug ("ecc verify: Failed to get affine coordinates\n");
-      err = GPG_ERR_BAD_SIGNATURE;
+        log_debug ("Bad check: computation of dG failed\n");
       goto leave;
     }
-  mpi_mod (x, x, pkey->E.n); /* x = x mod E_n */
-  if (mpi_cmp (x, r))   /* x != r */
+  if (_gcry_mpi_ec_get_affine (x1, y1, &Q, ec))
     {
       if (DBG_CIPHER)
+        log_debug ("Bad check: Q can not be a Point at Infinity!\n");
+      goto leave;
+    }
+
+  if ((flags & PUBKEY_FLAG_EDDSA)
+      || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE))
+    ; /* Fixme: EdDSA is special.  */
+  else if (!mpi_cmp_ui (ec->Q->z, 1))
+    {
+      /* Fast path if Q is already in affine coordinates.  */
+      if (mpi_cmp (x1, ec->Q->x) || (y1 && mpi_cmp (y1, ec->Q->y)))
         {
-          log_mpidump ("     x", x);
-          log_mpidump ("     y", y);
-          log_mpidump ("     r", r);
-          log_mpidump ("     s", s);
-          log_debug ("ecc verify: Not verified\n");
+          if (DBG_CIPHER)
+            log_debug
+              ("Bad check: There is NO correspondence between 'd' and 'Q'!\n");
+          goto leave;
         }
-      err = GPG_ERR_BAD_SIGNATURE;
-      goto leave;
     }
-  if (DBG_CIPHER)
-    log_debug ("ecc verify: Accepted\n");
+  else
+    {
+      x2 = mpi_new (0);
+      y2 = mpi_new (0);
+      if (_gcry_mpi_ec_get_affine (x2, y2, ec->Q, ec))
+        {
+          if (DBG_CIPHER)
+            log_debug ("Bad check: Q can not be a Point at Infinity!\n");
+          goto leave;
+        }
+
+      if (mpi_cmp (x1, x2) || mpi_cmp (y1, y2))
+        {
+          if (DBG_CIPHER)
+            log_debug
+              ("Bad check: There is NO correspondence between 'd' and 'Q'!\n");
+          goto leave;
+        }
+    }
+  rc = 0; /* Okay.  */
 
  leave:
-  _gcry_mpi_ec_free (ctx);
-  point_free (&Q2);
-  point_free (&Q1);
+  mpi_free (x2);
+  mpi_free (x1);
+  mpi_free (y1);
+  mpi_free (y2);
   point_free (&Q);
-  mpi_free (y);
-  mpi_free (x);
-  mpi_free (h2);
-  mpi_free (h1);
-  mpi_free (h);
-  return err;
+  return rc;
 }
 
 
@@ -878,482 +554,410 @@ verify (gcry_mpi_t input, ECC_public_key *pkey, 
gcry_mpi_t r, gcry_mpi_t s)
 /*********************************************
  **************  interface  ******************
  *********************************************/
-static gcry_mpi_t
-ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p)
+
+static gcry_err_code_t
+ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
 {
-  gpg_error_t err;
-  int pbytes = (mpi_get_nbits (p)+7)/8;
-  size_t n;
-  unsigned char *buf, *ptr;
-  gcry_mpi_t result;
-
-  buf = gcry_xmalloc ( 1 + 2*pbytes );
-  *buf = 04; /* Uncompressed point.  */
-  ptr = buf+1;
-  err = gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, x);
-  if (err)
-    log_fatal ("mpi_print failed: %s\n", gpg_strerror (err));
-  if (n < pbytes)
+  gpg_err_code_t rc;
+  gcry_mpi_t Gx = NULL;
+  gcry_mpi_t Gy = NULL;
+  gcry_mpi_t Qx = NULL;
+  gcry_mpi_t Qy = NULL;
+  mpi_ec_t ec = NULL;
+  gcry_sexp_t curve_info = NULL;
+  gcry_sexp_t curve_flags = NULL;
+  gcry_mpi_t base = NULL;
+  gcry_mpi_t public = NULL;
+  int flags = 0;
+
+  rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecgen curve", genparms, NULL);
+  if (rc)
+    goto leave;
+
+  if ((flags & PUBKEY_FLAG_EDDSA)
+      || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE))
+    rc = _gcry_ecc_eddsa_genkey (ec, flags);
+  else if (ec->model == MPI_EC_MONTGOMERY)
+    rc = nist_generate_key (ec, flags, &Qx, NULL);
+  else
+    rc = nist_generate_key (ec, flags, &Qx, &Qy);
+  if (rc)
+    goto leave;
+
+  /* Copy data to the result.  */
+  Gx = mpi_new (0);
+  Gy = mpi_new (0);
+  if (ec->model != MPI_EC_MONTGOMERY)
     {
-      memmove (ptr+(pbytes-n), ptr, n);
-      memset (ptr, 0, (pbytes-n));
+      if (_gcry_mpi_ec_get_affine (Gx, Gy, ec->G, ec))
+        log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G");
+      base = _gcry_ecc_ec2os (Gx, Gy, ec->p);
     }
-  ptr += pbytes;
-  err = gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, y);
-  if (err)
-    log_fatal ("mpi_print failed: %s\n", gpg_strerror (err));
-  if (n < pbytes)
+  if (((ec->dialect == ECC_DIALECT_SAFECURVE && ec->model == MPI_EC_EDWARDS)
+       || ec->dialect == ECC_DIALECT_ED25519 || ec->model == MPI_EC_MONTGOMERY)
+      && !(flags & PUBKEY_FLAG_NOCOMP))
     {
-      memmove (ptr+(pbytes-n), ptr, n);
-      memset (ptr, 0, (pbytes-n));
+      unsigned char *encpk;
+      unsigned int encpklen;
+
+      if (ec->model == MPI_EC_MONTGOMERY)
+        rc = _gcry_ecc_mont_encodepoint (Qx, ec->nbits,
+                                         ec->dialect != ECC_DIALECT_SAFECURVE,
+                                         &encpk, &encpklen);
+      else
+        /* (Gx and Gy are used as scratch variables)  */
+        rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, Gx, Gy,
+                                          (ec->dialect != ECC_DIALECT_SAFECURVE
+                                           && !!(flags & PUBKEY_FLAG_COMP)),
+                                          &encpk, &encpklen);
+      if (rc)
+        goto leave;
+      public = mpi_new (0);
+      mpi_set_opaque (public, encpk, encpklen*8);
     }
-
-  err = gcry_mpi_scan (&result, GCRYMPI_FMT_USG, buf, 1+2*pbytes, NULL);
-  if (err)
-    log_fatal ("mpi_scan failed: %s\n", gpg_strerror (err));
-  gcry_free (buf);
-
-  return result;
-}
-
-
-/* RESULT must have been initialized and is set on success to the
-   point given by VALUE.  */
-static gcry_error_t
-os2ec (mpi_point_t *result, gcry_mpi_t value)
-{
-  gcry_error_t err;
-  size_t n;
-  unsigned char *buf;
-  gcry_mpi_t x, y;
-
-  n = (mpi_get_nbits (value)+7)/8;
-  buf = gcry_xmalloc (n);
-  err = gcry_mpi_print (GCRYMPI_FMT_USG, buf, n, &n, value);
-  if (err)
+  else
     {
-      gcry_free (buf);
-      return err;
+      if (!Qx)
+        {
+          /* This is the case for a key from _gcry_ecc_eddsa_generate
+             with no compression.  */
+          Qx = mpi_new (0);
+          Qy = mpi_new (0);
+          if (_gcry_mpi_ec_get_affine (Qx, Qy, ec->Q, ec))
+            log_fatal ("ecgen: Failed to get affine coordinates for %s\n", 
"Q");
+        }
+      public = _gcry_ecc_ec2os (Qx, Qy, ec->p);
     }
-  if (n < 1)
+  if (ec->name)
     {
-      gcry_free (buf);
-      return GPG_ERR_INV_OBJ;
+      rc = sexp_build (&curve_info, NULL, "(curve %s)", ec->name);
+      if (rc)
+        goto leave;
     }
-  if (*buf != 4)
-    {
-      gcry_free (buf);
-      return GPG_ERR_NOT_IMPLEMENTED; /* No support for point compression.  */
-    }
-  if ( ((n-1)%2) )
+
+  if ((flags & PUBKEY_FLAG_PARAM) || (flags & PUBKEY_FLAG_EDDSA)
+      || (flags & PUBKEY_FLAG_DJB_TWEAK))
     {
-      gcry_free (buf);
-      return GPG_ERR_INV_OBJ;
+      rc = sexp_build
+        (&curve_flags, NULL,
+         ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))?
+         "(flags param eddsa)" :
+         ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_DJB_TWEAK))?
+         "(flags param djb-tweak)" :
+         ((flags & PUBKEY_FLAG_PARAM))?
+         "(flags param)" : ((flags & PUBKEY_FLAG_EDDSA))?
+         "(flags eddsa)" : "(flags djb-tweak)" );
+      if (rc)
+        goto leave;
     }
-  n = (n-1)/2;
-  err = gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL);
-  if (err)
+
+  if ((flags & PUBKEY_FLAG_PARAM) && ec->name)
+    rc = sexp_build (r_skey, NULL,
+                     "(key-data"
+                     " (public-key"
+                     "  (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)))"
+                     " (private-key"
+                     "  (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)(d%m)))"
+                     " )",
+                     curve_info, curve_flags,
+                     ec->p, ec->a, ec->b, base, ec->n, ec->h, public,
+                     curve_info, curve_flags,
+                     ec->p, ec->a, ec->b, base, ec->n, ec->h, public,
+                     ec->d);
+  else
+    rc = sexp_build (r_skey, NULL,
+                     "(key-data"
+                     " (public-key"
+                     "  (ecc%S%S(q%m)))"
+                     " (private-key"
+                     "  (ecc%S%S(q%m)(d%m)))"
+                     " )",
+                     curve_info, curve_flags,
+                     public,
+                     curve_info, curve_flags,
+                     public, ec->d);
+  if (rc)
+    goto leave;
+
+  if (DBG_CIPHER)
     {
-      gcry_free (buf);
-      return err;
+      log_printmpi ("ecgen result  p", ec->p);
+      log_printmpi ("ecgen result  a", ec->a);
+      log_printmpi ("ecgen result  b", ec->b);
+      log_printmpi ("ecgen result  G", base);
+      log_printmpi ("ecgen result  n", ec->n);
+      log_debug    ("ecgen result  h:+%02x\n", ec->h);
+      log_printmpi ("ecgen result  Q", public);
+      log_printmpi ("ecgen result  d", ec->d);
+      if ((flags & PUBKEY_FLAG_EDDSA))
+        log_debug ("ecgen result  using Ed25519+EdDSA\n");
     }
-  err = gcry_mpi_scan (&y, GCRYMPI_FMT_USG, buf+1+n, n, NULL);
-  gcry_free (buf);
-  if (err)
+
+  if (fips_mode () && test_keys_fips (*r_skey))
     {
-      mpi_free (x);
-      return err;
+      sexp_release (*r_skey);
+      *r_skey = NULL;
+      fips_signal_error ("self-test after key generation failed");
+      rc = GPG_ERR_SELFTEST_FAILED;
     }
 
-  mpi_set (result->x, x);
-  mpi_set (result->y, y);
-  mpi_set_ui (result->z, 1);
-
-  mpi_free (x);
-  mpi_free (y);
-
-  return 0;
+ leave:
+  mpi_free (public);
+  mpi_free (base);
+  mpi_free (Gx);
+  mpi_free (Gy);
+  mpi_free (Qx);
+  mpi_free (Qy);
+  _gcry_mpi_ec_free (ec);
+  sexp_release (curve_flags);
+  sexp_release (curve_info);
+  return rc;
 }
 
 
-/* Extended version of ecc_generate.  */
 static gcry_err_code_t
-ecc_generate_ext (int algo, unsigned int nbits, unsigned long evalue,
-                  const gcry_sexp_t genparms,
-                  gcry_mpi_t *skey, gcry_mpi_t **retfactors,
-                  gcry_sexp_t *r_extrainfo)
+ecc_check_secret_key (gcry_sexp_t keyparms)
 {
-  gpg_err_code_t ec;
-  ECC_secret_key sk;
-  gcry_mpi_t g_x, g_y, q_x, q_y;
-  char *curve_name = NULL;
-  gcry_sexp_t l1;
-  int transient_key = 0;
-  const char *usedcurve = NULL;
-
-  (void)algo;
-  (void)evalue;
-
-  if (genparms)
+  gcry_err_code_t rc;
+  int flags = 0;
+  mpi_ec_t ec = NULL;
+
+  /*
+   * Extract the key.
+   */
+  rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_testkey", keyparms, NULL);
+  if (rc)
+    goto leave;
+  if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q || !ec->d)
     {
-      /* Parse the optional "curve" parameter. */
-      l1 = gcry_sexp_find_token (genparms, "curve", 0);
-      if (l1)
-        {
-          curve_name = _gcry_sexp_nth_string (l1, 1);
-          gcry_sexp_release (l1);
-          if (!curve_name)
-            return GPG_ERR_INV_OBJ; /* No curve name or value too large. */
-        }
-
-      /* Parse the optional transient-key flag.  */
-      l1 = gcry_sexp_find_token (genparms, "transient-key", 0);
-      if (l1)
-        {
-          transient_key = 1;
-          gcry_sexp_release (l1);
-        }
+      rc = GPG_ERR_NO_OBJ;
+      goto leave;
     }
 
-  /* NBITS is required if no curve name has been given.  */
-  if (!nbits && !curve_name)
-    return GPG_ERR_NO_OBJ; /* No NBITS parameter. */
-
-  g_x = mpi_new (0);
-  g_y = mpi_new (0);
-  q_x = mpi_new (0);
-  q_y = mpi_new (0);
-  ec = generate_key (&sk, nbits, curve_name, transient_key, g_x, g_y, q_x, q_y,
-                     &usedcurve);
-  gcry_free (curve_name);
-  if (ec)
-    return ec;
-  if (usedcurve)  /* Fixme: No error return checking.  */
-    gcry_sexp_build (r_extrainfo, NULL, "(curve %s)", usedcurve);
-
-  skey[0] = sk.E.p;
-  skey[1] = sk.E.a;
-  skey[2] = sk.E.b;
-  skey[3] = ec2os (g_x, g_y, sk.E.p);
-  skey[4] = sk.E.n;
-  skey[5] = ec2os (q_x, q_y, sk.E.p);
-  skey[6] = sk.d;
-
-  mpi_free (g_x);
-  mpi_free (g_y);
-  mpi_free (q_x);
-  mpi_free (q_y);
-
-  point_free (&sk.E.G);
-  point_free (&sk.Q);
-
-  /* Make an empty list of factors.  */
-  *retfactors = gcry_calloc ( 1, sizeof **retfactors );
-  if (!*retfactors)
-    return gpg_err_code_from_syserror ();  /* Fixme: relase mem?  */
+  if (check_secret_key (ec, flags))
+    rc = GPG_ERR_BAD_SECKEY;
 
+ leave:
+  _gcry_mpi_ec_free (ec);
   if (DBG_CIPHER)
-    {
-      log_mpidump ("ecgen result p", skey[0]);
-      log_mpidump ("ecgen result a", skey[1]);
-      log_mpidump ("ecgen result b", skey[2]);
-      log_mpidump ("ecgen result G", skey[3]);
-      log_mpidump ("ecgen result n", skey[4]);
-      log_mpidump ("ecgen result Q", skey[5]);
-      log_mpidump ("ecgen result d", skey[6]);
-    }
-
-  return 0;
-}
-
-
-static gcry_err_code_t
-ecc_generate (int algo, unsigned int nbits, unsigned long evalue,
-              gcry_mpi_t *skey, gcry_mpi_t **retfactors)
-{
-  (void)evalue;
-  return ecc_generate_ext (algo, nbits, 0, NULL, skey, retfactors, NULL);
+    log_debug ("ecc_testkey    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
-/* Return the parameters of the curve NAME in an MPI array.  */
 static gcry_err_code_t
-ecc_get_param (const char *name, gcry_mpi_t *pkey)
-{
-  gpg_err_code_t err;
-  unsigned int nbits;
-  elliptic_curve_t E;
-  mpi_ec_t ctx;
-  gcry_mpi_t g_x, g_y;
-
-  err = fill_in_curve (0, name, &E, &nbits);
-  if (err)
-    return err;
-
-  g_x = mpi_new (0);
-  g_y = mpi_new (0);
-  ctx = _gcry_mpi_ec_init (E.p, E.a);
-  if (_gcry_mpi_ec_get_affine (g_x, g_y, &E.G, ctx))
-    log_fatal ("ecc get param: Failed to get affine coordinates\n");
-  _gcry_mpi_ec_free (ctx);
-  point_free (&E.G);
-
-  pkey[0] = E.p;
-  pkey[1] = E.a;
-  pkey[2] = E.b;
-  pkey[3] = ec2os (g_x, g_y, E.p);
-  pkey[4] = E.n;
-  pkey[5] = NULL;
-
-  mpi_free (g_x);
-  mpi_free (g_y);
-
-  return 0;
-}
-
-
-/* Return the parameters of the curve NAME as an S-expression.  */
-static gcry_sexp_t
-ecc_get_param_sexp (const char *name)
+ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  gcry_mpi_t pkey[6];
-  gcry_sexp_t result;
-  int i;
-
-  if (ecc_get_param (name, pkey))
-    return NULL;
-
-  if (gcry_sexp_build (&result, NULL,
-                       "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)))",
-                       pkey[0], pkey[1], pkey[2], pkey[3], pkey[4]))
-    result = NULL;
-
-  for (i=0; pkey[i]; i++)
-    gcry_mpi_release (pkey[i]);
-
-  return result;
-}
-
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_mpi_t data = NULL;
+  gcry_mpi_t k = NULL;
+  gcry_mpi_t sig_r = NULL;
+  gcry_mpi_t sig_s = NULL;
+  mpi_ec_t ec = NULL;
+  int flags = 0;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, 0);
+
+  /*
+   * Extract the key.
+   */
+  rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_sign", keyparms, NULL);
+  if (rc)
+    goto leave;
+  if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->d)
+    {
+      rc = GPG_ERR_NO_OBJ;
+      goto leave;
+    }
 
-/* Return the name matching the parameters in PKEY.  */
-static const char *
-ecc_get_curve (gcry_mpi_t *pkey, int iterator, unsigned int *r_nbits)
-{
-  gpg_err_code_t err;
-  elliptic_curve_t E;
-  int idx;
-  gcry_mpi_t tmp;
-  const char *result = NULL;
+  ctx.flags |= flags;
+  if (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE)
+    ctx.flags |= PUBKEY_FLAG_EDDSA;
+  /* Clear hash algo for EdDSA.  */
+  if ((ctx.flags & PUBKEY_FLAG_EDDSA))
+    ctx.hash_algo = GCRY_MD_NONE;
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("ecc_sign   data", data);
 
-  if (r_nbits)
-    *r_nbits = 0;
+  if (ctx.label)
+    rc = _gcry_mpi_scan (&k, GCRYMPI_FMT_USG, ctx.label, ctx.labellen, NULL);
+  if (rc)
+    goto leave;
 
-  if (!pkey)
+  /* Hash algo is determined by curve in EdDSA.  Fill it if not specified.  */
+  if ((ctx.flags & PUBKEY_FLAG_EDDSA) && !ctx.hash_algo)
     {
-      idx = iterator;
-      if (idx >= 0 && idx < DIM (domain_parms))
-        {
-          result = domain_parms[idx].desc;
-          if (r_nbits)
-            *r_nbits = domain_parms[idx].nbits;
-        }
-      return result;
+      if (ec->dialect == ECC_DIALECT_ED25519)
+        ctx.hash_algo = GCRY_MD_SHA512;
+      else if (ec->dialect == ECC_DIALECT_SAFECURVE)
+        ctx.hash_algo = GCRY_MD_SHAKE256;
     }
 
-  if (!pkey[0] || !pkey[1] || !pkey[2] || !pkey[3] || !pkey[4])
-    return NULL;
-
-  E.p = pkey[0];
-  E.a = pkey[1];
-  E.b = pkey[2];
-  point_init (&E.G);
-  err = os2ec (&E.G, pkey[3]);
-  if (err)
+  sig_r = mpi_new (0);
+  sig_s = mpi_new (0);
+  if ((ctx.flags & PUBKEY_FLAG_EDDSA))
     {
-      point_free (&E.G);
-      return NULL;
+      /* EdDSA requires the public key.  */
+      rc = _gcry_ecc_eddsa_sign (data, ec, sig_r, sig_s, &ctx);
+      if (!rc)
+        rc = sexp_build (r_sig, NULL,
+                         "(sig-val(eddsa(r%M)(s%M)))", sig_r, sig_s);
     }
-  E.n = pkey[4];
-
-  for (idx = 0; domain_parms[idx].desc; idx++)
+  else if ((ctx.flags & PUBKEY_FLAG_GOST))
     {
-      tmp = scanval (domain_parms[idx].p);
-      if (!mpi_cmp (tmp, E.p))
-        {
-          mpi_free (tmp);
-          tmp = scanval (domain_parms[idx].a);
-          if (!mpi_cmp (tmp, E.a))
-            {
-              mpi_free (tmp);
-              tmp = scanval (domain_parms[idx].b);
-              if (!mpi_cmp (tmp, E.b))
-                {
-                  mpi_free (tmp);
-                  tmp = scanval (domain_parms[idx].n);
-                  if (!mpi_cmp (tmp, E.n))
-                    {
-                      mpi_free (tmp);
-                      tmp = scanval (domain_parms[idx].g_x);
-                      if (!mpi_cmp (tmp, E.G.x))
-                        {
-                          mpi_free (tmp);
-                          tmp = scanval (domain_parms[idx].g_y);
-                          if (!mpi_cmp (tmp, E.G.y))
-                            {
-                              result = domain_parms[idx].desc;
-                              if (r_nbits)
-                                *r_nbits = domain_parms[idx].nbits;
-                              break;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-      mpi_free (tmp);
+      rc = _gcry_ecc_gost_sign (data, ec, sig_r, sig_s);
+      if (!rc)
+        rc = sexp_build (r_sig, NULL,
+                         "(sig-val(gost(r%M)(s%M)))", sig_r, sig_s);
+    }
+  else if ((ctx.flags & PUBKEY_FLAG_SM2))
+    {
+      rc = _gcry_ecc_sm2_sign (data, ec, sig_r, sig_s,
+                               ctx.flags, ctx.hash_algo);
+      if (!rc)
+        rc = sexp_build (r_sig, NULL,
+                         "(sig-val(sm2(r%M)(s%M)))", sig_r, sig_s);
+    }
+  else
+    {
+      rc = _gcry_ecc_ecdsa_sign (data, k, ec, sig_r, sig_s,
+                                 ctx.flags, ctx.hash_algo);
+      if (!rc)
+        rc = sexp_build (r_sig, NULL,
+                         "(sig-val(ecdsa(r%M)(s%M)))", sig_r, sig_s);
     }
 
-  point_free (&E.G);
-
-  return result;
+ leave:
+  _gcry_mpi_release (sig_r);
+  _gcry_mpi_release (sig_s);
+  _gcry_mpi_release (data);
+  _gcry_mpi_release (k);
+  _gcry_mpi_ec_free (ec);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("ecc_sign      => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-ecc_check_secret_key (int algo, gcry_mpi_t *skey)
+ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
 {
-  gpg_err_code_t err;
-  ECC_secret_key sk;
-
-  (void)algo;
-
-  /* FIXME:  This check looks a bit fishy:  Now long is the array?  */
-  if (!skey[0] || !skey[1] || !skey[2] || !skey[3] || !skey[4] || !skey[5]
-      || !skey[6])
-    return GPG_ERR_BAD_MPI;
-
-  sk.E.p = skey[0];
-  sk.E.a = skey[1];
-  sk.E.b = skey[2];
-  point_init (&sk.E.G);
-  err = os2ec (&sk.E.G, skey[3]);
-  if (err)
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_sexp_t l1 = NULL;
+  gcry_mpi_t sig_r = NULL;
+  gcry_mpi_t sig_s = NULL;
+  gcry_mpi_t data = NULL;
+  int sigflags;
+  mpi_ec_t ec = NULL;
+  int flags = 0;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY,
+                                   ecc_get_nbits (s_keyparms));
+
+  /*
+   * Extract the key.
+   */
+  rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_verify",
+                                  s_keyparms, NULL);
+  if (rc)
+    goto leave;
+  if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q)
     {
-      point_free (&sk.E.G);
-      return err;
+      rc = GPG_ERR_NO_OBJ;
+      goto leave;
     }
-  sk.E.n = skey[4];
-  point_init (&sk.Q);
-  err = os2ec (&sk.Q, skey[5]);
-  if (err)
+
+  if (ec->model == MPI_EC_MONTGOMERY)
     {
-      point_free (&sk.E.G);
-      point_free (&sk.Q);
-      return err;
+      if (DBG_CIPHER)
+        log_debug ("ecc_verify: Can't use a Montgomery curve\n");
+      rc = GPG_ERR_INTERNAL;
+      goto leave;
     }
 
-  sk.d = skey[6];
+  ctx.flags |= flags;
+  if (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE)
+    ctx.flags |= PUBKEY_FLAG_EDDSA;
+  /* Clear hash algo for EdDSA.  */
+  if ((ctx.flags & PUBKEY_FLAG_EDDSA))
+    ctx.hash_algo = GCRY_MD_NONE;
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("ecc_verify data", data);
 
-  if (check_secret_key (&sk))
+  /* Hash algo is determined by curve in EdDSA.  Fill it if not specified.  */
+  if ((ctx.flags & PUBKEY_FLAG_EDDSA) && !ctx.hash_algo)
     {
-      point_free (&sk.E.G);
-      point_free (&sk.Q);
-      return GPG_ERR_BAD_SECKEY;
+      if (ec->dialect == ECC_DIALECT_ED25519)
+        ctx.hash_algo = GCRY_MD_SHA512;
+      else if (ec->dialect == ECC_DIALECT_SAFECURVE)
+        ctx.hash_algo = GCRY_MD_SHAKE256;
     }
-  point_free (&sk.E.G);
-  point_free (&sk.Q);
-  return 0;
-}
-
-
-static gcry_err_code_t
-ecc_sign (int algo, gcry_mpi_t *resarr, gcry_mpi_t data, gcry_mpi_t *skey)
-{
-  gpg_err_code_t err;
-  ECC_secret_key sk;
-
-  (void)algo;
 
-  if (!data || !skey[0] || !skey[1] || !skey[2] || !skey[3] || !skey[4]
-      || !skey[5] || !skey[6] )
-    return GPG_ERR_BAD_MPI;
-
-  sk.E.p = skey[0];
-  sk.E.a = skey[1];
-  sk.E.b = skey[2];
-  point_init (&sk.E.G);
-  err = os2ec (&sk.E.G, skey[3]);
-  if (err)
+  /*
+   * Extract the signature value.
+   */
+  rc = _gcry_pk_util_preparse_sigval (s_sig, ecc_names, &l1, &sigflags);
+  if (rc)
+    goto leave;
+  rc = sexp_extract_param (l1, NULL, (sigflags & PUBKEY_FLAG_EDDSA)? 
"/rs":"rs",
+                           &sig_r, &sig_s, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
     {
-      point_free (&sk.E.G);
-      return err;
+      log_mpidump ("ecc_verify  s_r", sig_r);
+      log_mpidump ("ecc_verify  s_s", sig_s);
     }
-  sk.E.n = skey[4];
-  point_init (&sk.Q);
-  err = os2ec (&sk.Q, skey[5]);
-  if (err)
+  if ((ctx.flags & PUBKEY_FLAG_EDDSA) ^ (sigflags & PUBKEY_FLAG_EDDSA))
     {
-      point_free (&sk.E.G);
-      point_free (&sk.Q);
-      return err;
+      rc = GPG_ERR_CONFLICT; /* Inconsistent use of flag/algoname.  */
+      goto leave;
     }
-  sk.d = skey[6];
 
-  resarr[0] = mpi_alloc (mpi_get_nlimbs (sk.E.p));
-  resarr[1] = mpi_alloc (mpi_get_nlimbs (sk.E.p));
-  err = sign (data, &sk, resarr[0], resarr[1]);
-  if (err)
+  /*
+   * Verify the signature.
+   */
+  if ((sigflags & PUBKEY_FLAG_EDDSA))
     {
-      mpi_free (resarr[0]);
-      mpi_free (resarr[1]);
-      resarr[0] = NULL; /* Mark array as released.  */
+      rc = _gcry_ecc_eddsa_verify (data, ec, sig_r, sig_s, &ctx);
     }
-  point_free (&sk.E.G);
-  point_free (&sk.Q);
-  return err;
-}
-
-
-static gcry_err_code_t
-ecc_verify (int algo, gcry_mpi_t hash, gcry_mpi_t *data, gcry_mpi_t *pkey,
-            int (*cmp)(void *, gcry_mpi_t), void *opaquev)
-{
-  gpg_err_code_t err;
-  ECC_public_key pk;
-
-  (void)algo;
-  (void)cmp;
-  (void)opaquev;
-
-  if (!data[0] || !data[1] || !hash || !pkey[0] || !pkey[1] || !pkey[2]
-      || !pkey[3] || !pkey[4] || !pkey[5] )
-    return GPG_ERR_BAD_MPI;
-
-  pk.E.p = pkey[0];
-  pk.E.a = pkey[1];
-  pk.E.b = pkey[2];
-  point_init (&pk.E.G);
-  err = os2ec (&pk.E.G, pkey[3]);
-  if (err)
+  else if ((sigflags & PUBKEY_FLAG_GOST))
     {
-      point_free (&pk.E.G);
-      return err;
+      rc = _gcry_ecc_gost_verify (data, ec, sig_r, sig_s);
     }
-  pk.E.n = pkey[4];
-  point_init (&pk.Q);
-  err = os2ec (&pk.Q, pkey[5]);
-  if (err)
+  else if ((sigflags & PUBKEY_FLAG_SM2))
     {
-      point_free (&pk.E.G);
-      point_free (&pk.Q);
-      return err;
+      rc = _gcry_ecc_sm2_verify (data, ec, sig_r, sig_s);
+    }
+  else
+    {
+      rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s,
+                                   ctx.flags, ctx.hash_algo);
     }
 
-  err = verify (hash, &pk, data[0], data[1]);
-
-  point_free (&pk.E.G);
-  point_free (&pk.Q);
-  return err;
+ leave:
+  _gcry_mpi_release (data);
+  _gcry_mpi_release (sig_r);
+  _gcry_mpi_release (sig_s);
+  _gcry_mpi_ec_free (ec);
+  sexp_release (l1);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("ecc_verify    => %s\n", rc?gpg_strerror (rc):"Good");
+  return rc;
 }
 
 
@@ -1375,9 +979,9 @@ ecc_verify (int algo, gcry_mpi_t hash, gcry_mpi_t *data, 
gcry_mpi_t *pkey,
  * ecc_encrypt_raw description:
  *   input:
  *     data[0] : private scalar (k)
- *   output:
- *     result[0] : shared point (kdG)
- *     result[1] : generated ephemeral public key (kG)
+ *   output: A new S-expression with the parameters:
+ *     s : shared point (kdG)
+ *     e : generated ephemeral public key (kG)
  *
  * ecc_decrypt_raw description:
  *   input:
@@ -1386,93 +990,168 @@ ecc_verify (int algo, gcry_mpi_t hash, gcry_mpi_t 
*data, gcry_mpi_t *pkey,
  *     result[0] : shared point (kdG)
  */
 static gcry_err_code_t
-ecc_encrypt_raw (int algo, gcry_mpi_t *resarr, gcry_mpi_t k,
-                 gcry_mpi_t *pkey, int flags)
+ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  ECC_public_key pk;
-  mpi_ec_t ctx;
-  gcry_mpi_t result[2];
-  int err;
-
-  (void)algo;
-  (void)flags;
-
-  if (!k
-      || !pkey[0] || !pkey[1] || !pkey[2] || !pkey[3] || !pkey[4] || !pkey[5])
-    return GPG_ERR_BAD_MPI;
-
-  pk.E.p = pkey[0];
-  pk.E.a = pkey[1];
-  pk.E.b = pkey[2];
-  point_init (&pk.E.G);
-  err = os2ec (&pk.E.G, pkey[3]);
-  if (err)
+  unsigned int nbits;
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_mpi_t mpi_s = NULL;
+  gcry_mpi_t mpi_e = NULL;
+  gcry_mpi_t data = NULL;
+  mpi_ec_t ec = NULL;
+  int flags = 0;
+  int no_error_on_infinity;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT,
+                                   (nbits = ecc_get_nbits (keyparms)));
+
+  /*
+   * Extract the key.
+   */
+  rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_encrypt", keyparms, NULL);
+  if (rc)
+    goto leave;
+
+  if (ec->dialect == ECC_DIALECT_SAFECURVE)
     {
-      point_free (&pk.E.G);
-      return err;
+      ctx.flags |= PUBKEY_FLAG_RAW_FLAG;
+      no_error_on_infinity = 1;
     }
-  pk.E.n = pkey[4];
-  point_init (&pk.Q);
-  err = os2ec (&pk.Q, pkey[5]);
-  if (err)
+  else if ((flags & PUBKEY_FLAG_DJB_TWEAK))
+    no_error_on_infinity = 1;
+  else
+    no_error_on_infinity = 0;
+
+  /*
+   * Extract the data.
+   */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+
+  /*
+   * Tweak the scalar bits by cofactor and number of bits of the field.
+   * It assumes the cofactor is a power of 2.
+   */
+  if ((flags & PUBKEY_FLAG_DJB_TWEAK))
     {
-      point_free (&pk.E.G);
-      point_free (&pk.Q);
-      return err;
+      int i;
+
+      for (i = 0; (ec->h & (1 << i)) == 0; i++)
+        mpi_clear_bit (data, i);
+      mpi_set_highbit (data, ec->nbits - 1);
     }
+  if (DBG_CIPHER)
+    log_mpidump ("ecc_encrypt data", data);
 
-  ctx = _gcry_mpi_ec_init (pk.E.p, pk.E.a);
+  if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q)
+    {
+      rc = GPG_ERR_NO_OBJ;
+      goto leave;
+    }
+
+  if ((ctx.flags & PUBKEY_FLAG_SM2))
+    {
+      /* All encryption will be done, return it.  */
+      rc = _gcry_ecc_sm2_encrypt (r_ciph, data, ec);
+      goto leave;
+    }
 
   /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */
   {
-    mpi_point_t R;     /* Result that we return.  */
+    mpi_point_struct R;  /* Result that we return.  */
     gcry_mpi_t x, y;
+    unsigned char *rawmpi;
+    unsigned int rawmpilen;
 
+    rc = 0;
     x = mpi_new (0);
-    y = mpi_new (0);
+    if (ec->model == MPI_EC_MONTGOMERY)
+      y = NULL;
+    else
+      y = mpi_new (0);
 
     point_init (&R);
 
     /* R = kQ  <=>  R = kdG  */
-    _gcry_mpi_ec_mul_point (&R, k, &pk.Q, ctx);
-
-    if (_gcry_mpi_ec_get_affine (x, y, &R, ctx))
-      log_fatal ("ecdh: Failed to get affine coordinates for kdG\n");
+    _gcry_mpi_ec_mul_point (&R, data, ec->Q, ec);
 
-    result[0] = ec2os (x, y, pk.E.p);
+    if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
+      {
+        /*
+         * Here, X is 0.  In the X25519 computation on Curve25519, X0
+         * function maps infinity to zero.  So, when PUBKEY_FLAG_DJB_TWEAK
+         * is enabled, return the result of 0 not raising an error.
+         *
+         * This is a corner case.  It never occurs with properly
+         * generated public keys, but it might happen with blindly
+         * imported public key which might not follow the key
+         * generation procedure.
+         */
+        if (!no_error_on_infinity)
+          { /* It's not for X25519, then, the input data was simply wrong.  */
+            rc = GPG_ERR_INV_DATA;
+            goto leave_main;
+          }
+      }
+    if (y)
+      mpi_s = _gcry_ecc_ec2os (x, y, ec->p);
+    else
+      {
+        rc = _gcry_ecc_mont_encodepoint (x, nbits,
+                                         ec->dialect != ECC_DIALECT_SAFECURVE,
+                                         &rawmpi, &rawmpilen);
+        if (rc)
+          goto leave_main;
+        mpi_s = mpi_new (0);
+        mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8);
+      }
 
     /* R = kG */
-    _gcry_mpi_ec_mul_point (&R, k, &pk.E.G, ctx);
-
-    if (_gcry_mpi_ec_get_affine (x, y, &R, ctx))
-      log_fatal ("ecdh: Failed to get affine coordinates for kG\n");
+    _gcry_mpi_ec_mul_point (&R, data, ec->G, ec);
 
-    result[1] = ec2os (x, y, pk.E.p);
+    if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
+      {
+        rc = GPG_ERR_INV_DATA;
+        goto leave_main;
+      }
+    if (y)
+      mpi_e = _gcry_ecc_ec2os (x, y, ec->p);
+    else
+      {
+        rc = _gcry_ecc_mont_encodepoint (x, nbits,
+                                         ec->dialect != ECC_DIALECT_SAFECURVE,
+                                         &rawmpi, &rawmpilen);
+        if (!rc)
+          {
+            mpi_e = mpi_new (0);
+            mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8);
+          }
+      }
 
+  leave_main:
     mpi_free (x);
     mpi_free (y);
-
     point_free (&R);
+    if (rc)
+      goto leave;
   }
 
-  _gcry_mpi_ec_free (ctx);
-  point_free (&pk.E.G);
-  point_free (&pk.Q);
+  if (!rc)
+    rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e);
 
-  if (!result[0] || !result[1])
-    {
-      mpi_free (result[0]);
-      mpi_free (result[1]);
-      return GPG_ERR_ENOMEM;
-    }
-
-  /* Success.  */
-  resarr[0] = result[0];
-  resarr[1] = result[1];
-
-  return 0;
+ leave:
+  _gcry_mpi_release (data);
+  _gcry_mpi_release (mpi_s);
+  _gcry_mpi_release (mpi_e);
+  _gcry_mpi_ec_free (ec);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("ecc_encrypt    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
+
 /*  input:
  *     data[0] : a point kG (ephemeral public key)
  *   output:
@@ -1481,168 +1160,307 @@ ecc_encrypt_raw (int algo, gcry_mpi_t *resarr, 
gcry_mpi_t k,
  *  see ecc_encrypt_raw for details.
  */
 static gcry_err_code_t
-ecc_decrypt_raw (int algo, gcry_mpi_t *result, gcry_mpi_t *data,
-                 gcry_mpi_t *skey, int flags)
+ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t 
keyparms)
 {
-  ECC_secret_key sk;
-  mpi_point_t R;       /* Result that we return.  */
-  mpi_point_t kG;
-  mpi_ec_t ctx;
-  gcry_mpi_t r;
-  int err;
+  unsigned int nbits;
+  gpg_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_sexp_t l1 = NULL;
+  gcry_mpi_t data_e = NULL;
+  mpi_ec_t ec = NULL;
+  mpi_point_struct kG;
+  mpi_point_struct R;
+  gcry_mpi_t r = NULL;
+  int flags = 0;
+  int enable_specific_point_validation;
+
+  point_init (&kG);
+  point_init (&R);
 
-  (void)algo;
-  (void)flags;
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT,
+                                   (nbits = ecc_get_nbits (keyparms)));
 
-  *result = NULL;
+  /*
+   * Extract the key.
+   */
+  rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_decrypt", keyparms, NULL);
+  if (rc)
+    goto leave;
 
-  if (!data || !data[0]
-      || !skey[0] || !skey[1] || !skey[2] || !skey[3] || !skey[4]
-      || !skey[5] || !skey[6] )
-    return GPG_ERR_BAD_MPI;
+  if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->d)
+    {
+      rc = GPG_ERR_NO_OBJ;
+      goto leave;
+    }
 
-  point_init (&kG);
-  err = os2ec (&kG, data[0]);
-  if (err)
+  /*
+   * Extract the data.
+   */
+  rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx);
+  if (rc)
+    goto leave;
+  if ((ctx.flags & PUBKEY_FLAG_SM2))
     {
-      point_free (&kG);
-      return err;
+      /* All decryption will be done, return it.  */
+      rc = _gcry_ecc_sm2_decrypt (r_plain, l1, ec);
+      goto leave;
+    }
+  else
+    {
+      rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL);
+      if (rc)
+        goto leave;
+      if (DBG_CIPHER)
+        log_printmpi ("ecc_decrypt  d_e", data_e);
     }
 
+  if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK))
+    enable_specific_point_validation = 1;
+  else
+    enable_specific_point_validation = 0;
 
-  sk.E.p = skey[0];
-  sk.E.a = skey[1];
-  sk.E.b = skey[2];
-  point_init (&sk.E.G);
-  err = os2ec (&sk.E.G, skey[3]);
-  if (err)
+  /*
+   * Compute the plaintext.
+   */
+  if (ec->model == MPI_EC_MONTGOMERY)
+    rc = _gcry_ecc_mont_decodepoint (data_e, ec, &kG);
+  else
+    rc = _gcry_ecc_sec_decodepoint (data_e, ec, &kG);
+  if (rc)
+    goto leave;
+
+  if (DBG_CIPHER)
+    log_printpnt ("ecc_decrypt    kG", &kG, NULL);
+
+  if (enable_specific_point_validation)
     {
-      point_free (&kG);
-      point_free (&sk.E.G);
-      return err;
+      /* For X25519, by its definition, validation should not be done.  */
+      /* (Instead, we do output check.)
+       *
+       * However, to mitigate secret key leak from our implementation,
+       * we also do input validation here.  For constant-time
+       * implementation, we can remove this input validation.
+       */
+      if (_gcry_mpi_ec_bad_point (&kG, ec))
+        {
+          rc = GPG_ERR_INV_DATA;
+          goto leave;
+        }
     }
-  sk.E.n = skey[4];
-  point_init (&sk.Q);
-  err = os2ec (&sk.Q, skey[5]);
-  if (err)
+  else if (!_gcry_mpi_ec_curve_point (&kG, ec))
     {
-      point_free (&kG);
-      point_free (&sk.E.G);
-      point_free (&sk.Q);
-      return err;
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
     }
-  sk.d = skey[6];
-
-  ctx = _gcry_mpi_ec_init (sk.E.p, sk.E.a);
 
   /* R = dkG */
-  point_init (&R);
-  _gcry_mpi_ec_mul_point (&R, sk.d, &kG, ctx);
-
-  point_free (&kG);
+  _gcry_mpi_ec_mul_point (&R, ec->d, &kG, ec);
 
   /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so:  */
   {
     gcry_mpi_t x, y;
 
     x = mpi_new (0);
-    y = mpi_new (0);
+    if (ec->model == MPI_EC_MONTGOMERY)
+      y = NULL;
+    else
+      y = mpi_new (0);
 
-    if (_gcry_mpi_ec_get_affine (x, y, &R, ctx))
-      log_fatal ("ecdh: Failed to get affine coordinates\n");
+    if (_gcry_mpi_ec_get_affine (x, y, &R, ec))
+      {
+        rc = GPG_ERR_INV_DATA;
+        goto leave;
+        /*
+         * Note for X25519.
+         *
+         * By the definition of X25519, this is the case where X25519
+         * returns 0, mapping infinity to zero.  However, we
+         * deliberately let it return an error.
+         *
+         * For X25519 ECDH, comming here means that it might be
+         * decrypted by anyone with the shared secret of 0 (the result
+         * of this function could be always 0 by other scalar values,
+         * other than the private key of D).
+         *
+         * So, it looks like an encrypted message but it can be
+         * decrypted by anyone, or at least something wrong
+         * happens.  Recipient should not proceed as if it were
+         * properly encrypted message.
+         *
+         * This handling is needed for our major usage of GnuPG,
+         * where it does the One-Pass Diffie-Hellman method,
+         * C(1, 1, ECC CDH), with an ephemeral key.
+         */
+      }
 
-    r = ec2os (x, y, sk.E.p);
+    if (y)
+      r = _gcry_ecc_ec2os (x, y, ec->p);
+    else
+      {
+
+        unsigned char *rawmpi;
+        unsigned int rawmpilen;
+
+        rc = _gcry_ecc_mont_encodepoint (x, nbits,
+                                         ec->dialect != ECC_DIALECT_SAFECURVE,
+                                         &rawmpi, &rawmpilen);
+        if (rc)
+          goto leave;
+
+        r = mpi_new (0);
+        mpi_set_opaque (r, rawmpi, rawmpilen*8);
+      }
+    if (!r)
+      rc = gpg_err_code_from_syserror ();
+    else
+      rc = 0;
     mpi_free (x);
     mpi_free (y);
   }
+  if (DBG_CIPHER)
+    log_printmpi ("ecc_decrypt  res", r);
+
+  if (!rc)
+    rc = sexp_build (r_plain, NULL, "(value %m)", r);
 
+ leave:
   point_free (&R);
-  _gcry_mpi_ec_free (ctx);
   point_free (&kG);
-  point_free (&sk.E.G);
-  point_free (&sk.Q);
-
-  if (!r)
-    return GPG_ERR_ENOMEM;
-
-  /* Success.  */
-
-  *result = r;
-
-  return 0;
+  _gcry_mpi_release (r);
+  _gcry_mpi_release (data_e);
+  sexp_release (l1);
+  _gcry_mpi_ec_free (ec);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("ecc_decrypt    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
+/* Return the number of bits for the key described by PARMS.  On error
+ * 0 is returned.  The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ *   (ecc
+ *     (curve <name>)
+ *     (p <mpi>)
+ *     (a <mpi>)
+ *     (b <mpi>)
+ *     (g <mpi>)
+ *     (n <mpi>)
+ *     (q <mpi>))
+ *
+ * More parameters may be given. Either P or CURVE is needed.
+ */
 static unsigned int
-ecc_get_nbits (int algo, gcry_mpi_t *pkey)
+ecc_get_nbits (gcry_sexp_t parms)
 {
-  (void)algo;
+  gcry_sexp_t l1;
+  gcry_mpi_t p;
+  unsigned int nbits = 0;
+  char *curve;
+
+  l1 = sexp_find_token (parms, "p", 1);
+  if (!l1)
+    { /* Parameter P not found - check whether we have "curve".  */
+      l1 = sexp_find_token (parms, "curve", 5);
+      if (!l1)
+        return 0; /* Neither P nor CURVE found.  */
+
+      curve = sexp_nth_string (l1, 1);
+      sexp_release (l1);
+      if (!curve)
+        return 0;  /* No curve name given (or out of core). */
 
-  return mpi_get_nbits (pkey[0]);
+      if (_gcry_ecc_fill_in_curve (0, curve, NULL, &nbits))
+        nbits = 0;
+      xfree (curve);
+    }
+  else
+    {
+      p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+      sexp_release (l1);
+      if (p)
+        {
+          nbits = mpi_get_nbits (p);
+          _gcry_mpi_release (p);
+        }
+    }
+  return nbits;
 }
 
 
 /* See rsa.c for a description of this function.  */
 static gpg_err_code_t
-compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparam)
+compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparms)
 {
 #define N_COMPONENTS 6
-  static const char names[N_COMPONENTS+1] = "pabgnq";
-  gpg_err_code_t ec = 0;
+  static const char names[N_COMPONENTS] = "pabgnq";
+  gpg_err_code_t rc;
   gcry_sexp_t l1;
   gcry_mpi_t values[N_COMPONENTS];
   int idx;
-
-  /* Clear the values for easier error cleanup.  */
+  char *curvename = NULL;
+  int flags = 0;
+  enum gcry_mpi_ec_models model = 0;
+  enum ecc_dialects dialect = 0;
+  const unsigned char *raw;
+  unsigned int n;
+  int maybe_uncompress;
+
+  /* Clear the values first.  */
   for (idx=0; idx < N_COMPONENTS; idx++)
     values[idx] = NULL;
 
-  /* Fill values with all provided parameters.  */
-  for (idx=0; idx < N_COMPONENTS; idx++)
+
+  /* Look for flags. */
+  l1 = sexp_find_token (keyparms, "flags", 0);
+  if (l1)
     {
-      l1 = gcry_sexp_find_token (keyparam, names+idx, 1);
-      if (l1)
-        {
-          values[idx] = gcry_sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
-         gcry_sexp_release (l1);
-         if (!values[idx])
-            {
-              ec = GPG_ERR_INV_OBJ;
-              goto leave;
-            }
-       }
+      rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+      if (rc)
+        goto leave;
     }
 
+  /* Extract the parameters.  */
+  if ((flags & PUBKEY_FLAG_PARAM))
+    rc = sexp_extract_param (keyparms, NULL, "p?a?b?g?n?/q",
+                             &values[0], &values[1], &values[2],
+                             &values[3], &values[4], &values[5],
+                             NULL);
+  else
+    rc = sexp_extract_param (keyparms, NULL, "/q", &values[5], NULL);
+  if (rc)
+    goto leave;
+
   /* Check whether a curve parameter is available and use that to fill
      in missing values.  */
-  l1 = gcry_sexp_find_token (keyparam, "curve", 5);
+  sexp_release (l1);
+  l1 = sexp_find_token (keyparms, "curve", 5);
   if (l1)
     {
-      char *curve;
-      gcry_mpi_t tmpvalues[N_COMPONENTS];
-
-      for (idx = 0; idx < N_COMPONENTS; idx++)
-        tmpvalues[idx] = NULL;
-
-      curve = _gcry_sexp_nth_string (l1, 1);
-      gcry_sexp_release (l1);
-      if (!curve)
+      curvename = sexp_nth_string (l1, 1);
+      if (curvename)
         {
-          ec = GPG_ERR_INV_OBJ; /* Name missing or out of core. */
-          goto leave;
+          rc = _gcry_ecc_update_curve_param (curvename,
+                                             &model, &dialect,
+                                             &values[0], &values[1], 
&values[2],
+                                             &values[3], &values[4]);
+          if (rc)
+            goto leave;
         }
-      ec = ecc_get_param (curve, tmpvalues);
-      gcry_free (curve);
-      if (ec)
-        goto leave;
+    }
 
-      for (idx = 0; idx < N_COMPONENTS; idx++)
-        {
-          if (!values[idx])
-            values[idx] = tmpvalues[idx];
-          else
-            mpi_free (tmpvalues[idx]);
-        }
+  /* Guess required fields if a curve parameter has not been given.
+     FIXME: This is a crude hacks.  We need to fix that.  */
+  if (!curvename)
+    {
+      model = ((flags & PUBKEY_FLAG_EDDSA)
+               ? MPI_EC_EDWARDS
+               : MPI_EC_WEIERSTRASS);
+      dialect = ((flags & PUBKEY_FLAG_EDDSA)
+                 ? ECC_DIALECT_ED25519
+                 : ECC_DIALECT_STANDARD);
     }
 
   /* Check that all parameters are known and normalize all MPIs (that
@@ -1651,41 +1469,276 @@ compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparam)
   for (idx = 0; idx < N_COMPONENTS; idx++)
     if (!values[idx])
       {
-        ec = GPG_ERR_NO_OBJ;
+        rc = GPG_ERR_NO_OBJ;
         goto leave;
       }
     else
       _gcry_mpi_normalize (values[idx]);
 
+  /* Uncompress the public key with the exception of EdDSA where
+     compression is the default and we thus compute the keygrip using
+     the compressed version.  Because we don't support any non-eddsa
+     compression, the only thing we need to do is to compress
+     EdDSA.  */
+  if ((flags & PUBKEY_FLAG_EDDSA) && dialect == ECC_DIALECT_ED25519)
+    {
+      const unsigned int pbits = mpi_get_nbits (values[0]);
+
+      rc = _gcry_ecc_eddsa_ensure_compact (values[5], pbits);
+      if (rc)
+        goto leave;
+      maybe_uncompress = 0;
+    }
+  else if ((flags & PUBKEY_FLAG_DJB_TWEAK))
+    {
+      /* Remove the prefix 0x40 for keygrip computation.  */
+      raw = mpi_get_opaque (values[5], &n);
+      if (raw)
+        {
+          n = (n + 7)/8;
+
+          if (n > 1 && (n%2) && raw[0] == 0x40)
+            if (!_gcry_mpi_set_opaque_copy (values[5], raw + 1, (n - 1)*8))
+                rc = gpg_err_code_from_syserror ();
+        }
+      else
+        {
+          rc = GPG_ERR_INV_OBJ;
+          goto leave;
+        }
+      maybe_uncompress = 0;
+    }
+  else
+    maybe_uncompress = 1;
+
   /* Hash them all.  */
   for (idx = 0; idx < N_COMPONENTS; idx++)
     {
       char buf[30];
-      unsigned char *rawmpi;
-      unsigned int rawmpilen;
+      unsigned char *rawbuffer;
+      unsigned int rawlen;
 
-      rawmpi = _gcry_mpi_get_buffer (values[idx], &rawmpilen, NULL);
-      if (!rawmpi)
+      if (mpi_is_opaque (values[idx]))
         {
-          ec = gpg_err_code_from_syserror ();
-          goto leave;
+          rawbuffer = NULL;
+          raw = mpi_get_opaque (values[idx], &rawlen);
+          rawlen = (rawlen + 7)/8;
+        }
+      else
+        {
+          rawbuffer = _gcry_mpi_get_buffer (values[idx], 0, &rawlen, NULL);
+          if (!rawbuffer)
+            {
+              rc = gpg_err_code_from_syserror ();
+              goto leave;
+            }
+          raw = rawbuffer;
+        }
+
+      if (maybe_uncompress && idx == 5 && rawlen > 1
+          && (*raw == 0x02 || *raw == 0x03))
+        {
+          /* This is a compressed Q - uncompress.  */
+          mpi_ec_t ec = NULL;
+          gcry_mpi_t x, y;
+          gcry_mpi_t x3;
+          gcry_mpi_t t;
+          gcry_mpi_t p1_4;
+          int y_bit = (*raw == 0x03);
+
+          /* We need to get the curve parameters as MPIs so that we
+           * can do computations.  We have them in VALUES but it is
+           * possible that the caller provided them as opaque MPIs. */
+          rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_keygrip",
+                                          keyparms, NULL);
+          if (rc)
+            goto leave;
+          if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n)
+            {
+              rc = GPG_ERR_NO_OBJ;
+              _gcry_mpi_ec_free (ec);
+              goto leave;
+            }
+
+          if (!mpi_test_bit (ec->p, 1))
+            {
+              /* No support for point compression for this curve.  */
+              rc = GPG_ERR_NOT_IMPLEMENTED;
+              _gcry_mpi_ec_free (ec);
+              xfree (rawbuffer);
+              goto leave;
+            }
+
+          raw++;
+          rawlen--;
+          rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, raw, rawlen, NULL);
+          if (rc)
+            {
+              _gcry_mpi_ec_free (ec);
+              xfree (rawbuffer);
+              goto leave;
+            }
+
+          /*
+           * Recover Y.  The Weierstrass curve: y^2 = x^3 + a*x + b
+           */
+
+          x3 = mpi_new (0);
+          t = mpi_new (0);
+          p1_4 = mpi_new (0);
+          y = mpi_new (0);
+
+          /* Compute right hand side.  */
+          mpi_powm (x3, x, mpi_const (MPI_C_THREE), ec->p);
+          mpi_mul (t, ec->a, x);
+          mpi_mod (t, t, ec->p);
+          mpi_add (t, t, ec->b);
+          mpi_mod (t, t, ec->p);
+          mpi_add (t, t, x3);
+          mpi_mod (t, t, ec->p);
+
+          /*
+           * When p mod 4 = 3, modular square root of A can be computed by
+           * A^((p+1)/4) mod p
+           */
+
+          /* Compute (p+1)/4 into p1_4 */
+          mpi_rshift (p1_4, ec->p, 2);
+          _gcry_mpi_add_ui (p1_4, p1_4, 1);
+
+          mpi_powm (y, t, p1_4, ec->p);
+
+          if (y_bit != mpi_test_bit (y, 0))
+            mpi_sub (y, ec->p, y);
+
+          mpi_free (p1_4);
+          mpi_free (t);
+          mpi_free (x3);
+
+          xfree (rawbuffer);
+          rawbuffer = _gcry_ecc_ec2os_buf (x, y, ec->p, &rawlen);
+          raw = rawbuffer;
+
+          mpi_free (x);
+          mpi_free (y);
+          _gcry_mpi_ec_free (ec);
         }
-      snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], rawmpilen);
-      gcry_md_write (md, buf, strlen (buf));
-      gcry_md_write (md, rawmpi, rawmpilen);
-      gcry_md_write (md, ")", 1);
-      gcry_free (rawmpi);
+
+      snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], rawlen);
+      _gcry_md_write (md, buf, strlen (buf));
+      _gcry_md_write (md, raw, rawlen);
+      _gcry_md_write (md, ")", 1);
+      xfree (rawbuffer);
     }
 
  leave:
+  xfree (curvename);
+  sexp_release (l1);
   for (idx = 0; idx < N_COMPONENTS; idx++)
     _gcry_mpi_release (values[idx]);
 
-  return ec;
+  return rc;
 #undef N_COMPONENTS
 }
 
 
+
+/*
+   Low-level API helper functions.
+ */
+
+/* This is the worker function for gcry_pubkey_get_sexp for ECC
+   algorithms.  Note that the caller has already stored NULL at
+   R_SEXP.  */
+gpg_err_code_t
+_gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode, mpi_ec_t ec)
+{
+  gpg_err_code_t rc;
+  gcry_mpi_t mpi_G = NULL;
+  gcry_mpi_t mpi_Q = NULL;
+
+  if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n)
+    return GPG_ERR_BAD_CRYPT_CTX;
+
+  if (mode == GCRY_PK_GET_SECKEY && !ec->d)
+    return GPG_ERR_NO_SECKEY;
+
+  /* Compute the public point if it is missing.  */
+  if (!ec->Q && ec->d)
+    ec->Q = _gcry_ecc_compute_public (NULL, ec);
+
+  /* Encode G and Q.  */
+  mpi_G = _gcry_mpi_ec_ec2os (ec->G, ec);
+  if (!mpi_G)
+    {
+      rc = GPG_ERR_BROKEN_PUBKEY;
+      goto leave;
+    }
+  if (!ec->Q)
+    {
+      rc = GPG_ERR_BAD_CRYPT_CTX;
+      goto leave;
+    }
+
+  if (ec->dialect == ECC_DIALECT_ED25519)
+    {
+      unsigned char *encpk;
+      unsigned int encpklen;
+
+      rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0,
+                                        &encpk, &encpklen);
+      if (rc)
+        goto leave;
+      mpi_Q = mpi_set_opaque (NULL, encpk, encpklen*8);
+      encpk = NULL;
+    }
+  else if (ec->model == MPI_EC_MONTGOMERY)
+    {
+      unsigned char *encpk;
+      unsigned int encpklen;
+
+      rc = _gcry_ecc_mont_encodepoint (ec->Q->x, ec->nbits,
+                                       ec->dialect != ECC_DIALECT_SAFECURVE,
+                                       &encpk, &encpklen);
+      if (rc)
+        goto leave;
+      mpi_Q = mpi_set_opaque (NULL, encpk, encpklen*8);
+    }
+  else
+    {
+      mpi_Q = _gcry_mpi_ec_ec2os (ec->Q, ec);
+    }
+  if (!mpi_Q)
+    {
+      rc = GPG_ERR_BROKEN_PUBKEY;
+      goto leave;
+    }
+
+  /* Fixme: We should return a curve name instead of the parameters if
+     if know that they match a curve.  */
+
+  if (ec->d && (!mode || mode == GCRY_PK_GET_SECKEY))
+    {
+      /* Let's return a private key. */
+      rc = sexp_build (r_sexp, NULL,
+                       
"(private-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)(d%m)))",
+                       ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q, ec->d);
+    }
+  else if (ec->Q)
+    {
+      /* Let's return a public key.  */
+      rc = sexp_build (r_sexp, NULL,
+                       "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)))",
+                       ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q);
+    }
+  else
+    rc = GPG_ERR_BAD_CRYPT_CTX;
+
+ leave:
+  mpi_free (mpi_Q);
+  mpi_free (mpi_G);
+  return rc;
+}
 
 
 
@@ -1693,25 +1746,296 @@ compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparam)
      Self-test section.
  */
 
+static const char *
+selftest_hash_sign (gcry_sexp_t pkey, gcry_sexp_t skey)
+{
+  int md_algo = GCRY_MD_SHA256;
+  gcry_md_hd_t hd = NULL;
+  const char *data_tmpl = "(data (flags rfc6979) (hash %s %b))";
+  /* Sample data from RFC 6979 section A.2.5, hash is of message "sample" */
+  static const char sample_data[] = "sample";
+  static const char sample_data_bad[] = "sbmple";
+  static const char signature_r[] =
+    "efd48b2aacb6a8fd1140dd9cd45e81d69d2c877b56aaf991c34d0ea84eaf3716";
+  static const char signature_s[] =
+    "f7cb1c942d657c41d436c7a1b6e29f65f3e900dbb9aff4064dc4ab2f843acda8";
+
+  const char *errtxt = NULL;
+  gcry_error_t err;
+  gcry_sexp_t sig = NULL;
+  gcry_sexp_t l1 = NULL;
+  gcry_sexp_t l2 = NULL;
+  gcry_mpi_t r = NULL;
+  gcry_mpi_t s = NULL;
+  gcry_mpi_t calculated_r = NULL;
+  gcry_mpi_t calculated_s = NULL;
+  int cmp;
+
+  err = _gcry_md_open (&hd, md_algo, 0);
+  if (err)
+    {
+      errtxt = "gcry_md_open failed";
+      goto leave;
+    }
+
+  _gcry_md_write (hd, sample_data, strlen(sample_data));
+
+  err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL);
+  if (!err)
+    err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL);
+
+  if (err)
+    {
+      errtxt = "converting data failed";
+      goto leave;
+    }
+
+  err = _gcry_pk_sign_md (&sig, data_tmpl, hd, skey, NULL);
+  if (err)
+    {
+      errtxt = "signing failed";
+      goto leave;
+    }
+
+  /* check against known signature */
+  errtxt = "signature validity failed";
+  l1 = _gcry_sexp_find_token (sig, "sig-val", 0);
+  if (!l1)
+    goto leave;
+  l2 = _gcry_sexp_find_token (l1, "ecdsa", 0);
+  if (!l2)
+    goto leave;
+
+  sexp_release (l1);
+  l1 = l2;
+
+  l2 = _gcry_sexp_find_token (l1, "r", 0);
+  if (!l2)
+    goto leave;
+  calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+  if (!calculated_r)
+    goto leave;
+
+  sexp_release (l2);
+  l2 = _gcry_sexp_find_token (l1, "s", 0);
+  if (!l2)
+    goto leave;
+  calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+  if (!calculated_s)
+    goto leave;
+
+  errtxt = "known sig check failed";
+
+  cmp = _gcry_mpi_cmp (r, calculated_r);
+  if (cmp)
+    goto leave;
+  cmp = _gcry_mpi_cmp (s, calculated_s);
+  if (cmp)
+    goto leave;
+
+  errtxt = NULL;
+
+  /* verify generated signature */
+  err = _gcry_pk_verify_md (sig, data_tmpl, hd, pkey, NULL);
+  if (err)
+    {
+      errtxt = "verify failed";
+      goto leave;
+    }
+
+  _gcry_md_reset(hd);
+  _gcry_md_write (hd, sample_data_bad, strlen(sample_data_bad));
+  err = _gcry_pk_verify_md (sig, data_tmpl, hd, pkey, NULL);
+  if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
+    {
+      errtxt = "bad signature not detected";
+      goto leave;
+    }
+
+
+ leave:
+  _gcry_md_close (hd);
+  sexp_release (sig);
+  sexp_release (l1);
+  sexp_release (l2);
+  mpi_release (r);
+  mpi_release (s);
+  mpi_release (calculated_r);
+  mpi_release (calculated_s);
+  return errtxt;
+}
+
+
+static const char *
+selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey)
+{
+  /* Sample data from RFC 6979 section A.2.5, hash is of message "sample" */
+  static const char sample_data[] =
+    "(data (flags rfc6979 prehash)"
+    " (hash-algo sha256)"
+    " (value 6:sample))";
+  static const char sample_data_bad[] =
+    "(data (flags rfc6979)"
+    " (hash sha256 #bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e98915"
+    /**/           "62113d8a62add1bf#))";
+  static const char signature_r[] =
+    "efd48b2aacb6a8fd1140dd9cd45e81d69d2c877b56aaf991c34d0ea84eaf3716";
+  static const char signature_s[] =
+    "f7cb1c942d657c41d436c7a1b6e29f65f3e900dbb9aff4064dc4ab2f843acda8";
+
+  const char *errtxt = NULL;
+  gcry_error_t err;
+  gcry_sexp_t data = NULL;
+  gcry_sexp_t data_bad = NULL;
+  gcry_sexp_t sig = NULL;
+  gcry_sexp_t l1 = NULL;
+  gcry_sexp_t l2 = NULL;
+  gcry_mpi_t r = NULL;
+  gcry_mpi_t s = NULL;
+  gcry_mpi_t calculated_r = NULL;
+  gcry_mpi_t calculated_s = NULL;
+  int cmp;
+
+  err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data));
+  if (!err)
+    err = sexp_sscan (&data_bad, NULL,
+                      sample_data_bad, strlen (sample_data_bad));
+  if (!err)
+    err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL);
+  if (!err)
+    err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL);
+
+  if (err)
+    {
+      errtxt = "converting data failed";
+      goto leave;
+    }
+
+  err = _gcry_pk_sign (&sig, data, skey);
+  if (err)
+    {
+      errtxt = "signing failed";
+      goto leave;
+    }
+
+  /* check against known signature */
+  errtxt = "signature validity failed";
+  l1 = _gcry_sexp_find_token (sig, "sig-val", 0);
+  if (!l1)
+    goto leave;
+  l2 = _gcry_sexp_find_token (l1, "ecdsa", 0);
+  if (!l2)
+    goto leave;
+
+  sexp_release (l1);
+  l1 = l2;
+
+  l2 = _gcry_sexp_find_token (l1, "r", 0);
+  if (!l2)
+    goto leave;
+  calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+  if (!calculated_r)
+    goto leave;
+
+  sexp_release (l2);
+  l2 = _gcry_sexp_find_token (l1, "s", 0);
+  if (!l2)
+    goto leave;
+  calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG);
+  if (!calculated_s)
+    goto leave;
+
+  errtxt = "known sig check failed";
+
+  cmp = _gcry_mpi_cmp (r, calculated_r);
+  if (cmp)
+    goto leave;
+  cmp = _gcry_mpi_cmp (s, calculated_s);
+  if (cmp)
+    goto leave;
+
+  errtxt = NULL;
+
+  /* verify generated signature */
+  err = _gcry_pk_verify (sig, data, pkey);
+  if (err)
+    {
+      errtxt = "verify failed";
+      goto leave;
+    }
+  err = _gcry_pk_verify (sig, data_bad, pkey);
+  if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
+    {
+      errtxt = "bad signature not detected";
+      goto leave;
+    }
+
+
+ leave:
+  sexp_release (sig);
+  sexp_release (data_bad);
+  sexp_release (data);
+  sexp_release (l1);
+  sexp_release (l2);
+  mpi_release (r);
+  mpi_release (s);
+  mpi_release (calculated_r);
+  mpi_release (calculated_s);
+  return errtxt;
+}
+
 
 static gpg_err_code_t
-selftests_ecdsa (selftest_report_func_t report)
+selftests_ecdsa (selftest_report_func_t report, int extended)
 {
   const char *what;
   const char *errtxt;
+  gcry_error_t err;
+  gcry_sexp_t skey = NULL;
+  gcry_sexp_t pkey = NULL;
+
+  what = "convert";
+  err = sexp_sscan (&skey, NULL, sample_secret_key_secp256,
+                    strlen (sample_secret_key_secp256));
+  if (!err)
+    err = sexp_sscan (&pkey, NULL, sample_public_key_secp256,
+                      strlen (sample_public_key_secp256));
+  if (err)
+    {
+      errtxt = _gcry_strerror (err);
+      goto failed;
+    }
+
+  what = "key consistency";
+  err = ecc_check_secret_key(skey);
+  if (err)
+    {
+      errtxt = _gcry_strerror (err);
+      goto failed;
+    }
 
-  what = "low-level";
-  errtxt = NULL; /*selftest ();*/
+  if (extended)
+    {
+      what = "sign";
+      errtxt = selftest_sign (pkey, skey);
+      if (errtxt)
+        goto failed;
+    }
+
+  what = "digest sign";
+  errtxt = selftest_hash_sign (pkey, skey);
   if (errtxt)
     goto failed;
 
-  /* FIXME:  need more tests.  */
-
+  sexp_release(pkey);
+  sexp_release(skey);
   return 0; /* Succeeded. */
 
  failed:
+  sexp_release(pkey);
+  sexp_release(skey);
   if (report)
-    report ("pubkey", GCRY_PK_ECDSA, what, errtxt);
+    report ("pubkey", GCRY_PK_ECC, what, errtxt);
   return GPG_ERR_SELFTEST_FAILED;
 }
 
@@ -1720,74 +2044,30 @@ selftests_ecdsa (selftest_report_func_t report)
 static gpg_err_code_t
 run_selftests (int algo, int extended, selftest_report_func_t report)
 {
-  gpg_err_code_t ec;
+  if (algo != GCRY_PK_ECC)
+    return GPG_ERR_PUBKEY_ALGO;
 
-  (void)extended;
-
-  switch (algo)
-    {
-    case GCRY_PK_ECDSA:
-      ec = selftests_ecdsa (report);
-      break;
-    default:
-      ec = GPG_ERR_PUBKEY_ALGO;
-      break;
-
-    }
-  return ec;
+  return selftests_ecdsa (report, extended);
 }
 
 
 
 
-static const char *ecdsa_names[] =
-  {
-    "ecdsa",
-    "ecc",
-    NULL,
-  };
-static const char *ecdh_names[] =
-  {
-    "ecdh",
-    "ecc",
-    NULL,
-  };
-
-gcry_pk_spec_t _gcry_pubkey_spec_ecdsa =
-  {
-    "ECDSA", ecdsa_names,
-    "pabgnq", "pabgnqd", "", "rs", "pabgnq",
-    GCRY_PK_USAGE_SIGN,
-    ecc_generate,
-    ecc_check_secret_key,
-    NULL,
-    NULL,
-    ecc_sign,
-    ecc_verify,
-    ecc_get_nbits
-  };
-
-gcry_pk_spec_t _gcry_pubkey_spec_ecdh =
+gcry_pk_spec_t _gcry_pubkey_spec_ecc =
   {
-    "ECDH", ecdh_names,
-    "pabgnq", "pabgnqd", "se", "", "pabgnq",
-    GCRY_PK_USAGE_ENCR,
+    GCRY_PK_ECC, { 0, 1 },
+    (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR),
+    "ECC", ecc_names,
+    "pabgnhq", "pabgnhqd", "se", "rs", "pabgnhq",
     ecc_generate,
     ecc_check_secret_key,
     ecc_encrypt_raw,
     ecc_decrypt_raw,
-    NULL,
-    NULL,
-    ecc_get_nbits
-  };
-
-
-pk_extra_spec_t _gcry_pubkey_extraspec_ecdsa =
-  {
+    ecc_sign,
+    ecc_verify,
+    ecc_get_nbits,
     run_selftests,
-    ecc_generate_ext,
     compute_keygrip,
-    ecc_get_param,
-    ecc_get_curve,
-    ecc_get_param_sexp
+    _gcry_ecc_get_curve,
+    _gcry_ecc_get_param_sexp
   };
diff --git a/grub-core/lib/libgcrypt/cipher/elgamal.c 
b/grub-core/lib/libgcrypt/cipher/elgamal.c
index ce4be8524..540ecb028 100644
--- a/grub-core/lib/libgcrypt/cipher/elgamal.c
+++ b/grub-core/lib/libgcrypt/cipher/elgamal.c
@@ -1,6 +1,7 @@
 /* Elgamal.c  -  Elgamal Public Key encryption
  * Copyright (C) 1998, 2000, 2001, 2002, 2003,
  *               2008  Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -29,6 +30,15 @@
 #include "g10lib.h"
 #include "mpi.h"
 #include "cipher.h"
+#include "pubkey-internal.h"
+#include "const-time.h"
+
+
+/* Blinding is used to mitigate side-channel attacks.  You may undef
+   this to speed up the operation in case the system is secured
+   against physical and network mounted side-channel attacks.  */
+#define USE_BLINDING 1
+
 
 typedef struct
 {
@@ -47,9 +57,19 @@ typedef struct
 } ELG_secret_key;
 
 
+static const char *elg_names[] =
+  {
+    "elg",
+    "openpgp-elg",
+    "openpgp-elg-sig",
+    NULL,
+  };
+
+
 static int test_keys (ELG_secret_key *sk, unsigned int nbits, int nodie);
-static gcry_mpi_t gen_k (gcry_mpi_t p, int small_k);
-static void generate (ELG_secret_key *sk, unsigned nbits, gcry_mpi_t 
**factors);
+static gcry_mpi_t gen_k (gcry_mpi_t p);
+static gcry_err_code_t generate (ELG_secret_key *sk, unsigned nbits,
+                                 gcry_mpi_t **factors);
 static int  check_secret_key (ELG_secret_key *sk);
 static void do_encrypt (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input,
                         ELG_public_key *pkey);
@@ -59,6 +79,7 @@ static void sign (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t 
input,
                   ELG_secret_key *skey);
 static int  verify (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input,
                     ELG_public_key *pkey);
+static unsigned int elg_get_nbits (gcry_sexp_t parms);
 
 
 static void (*progress_cb) (void *, const char *, int, int, int);
@@ -128,17 +149,17 @@ static int
 test_keys ( ELG_secret_key *sk, unsigned int nbits, int nodie )
 {
   ELG_public_key pk;
-  gcry_mpi_t test = gcry_mpi_new ( 0 );
-  gcry_mpi_t out1_a = gcry_mpi_new ( nbits );
-  gcry_mpi_t out1_b = gcry_mpi_new ( nbits );
-  gcry_mpi_t out2 = gcry_mpi_new ( nbits );
+  gcry_mpi_t test   = mpi_new ( 0 );
+  gcry_mpi_t out1_a = mpi_new ( nbits );
+  gcry_mpi_t out1_b = mpi_new ( nbits );
+  gcry_mpi_t out2   = mpi_new ( nbits );
   int failed = 0;
 
   pk.p = sk->p;
   pk.g = sk->g;
   pk.y = sk->y;
 
-  gcry_mpi_randomize ( test, nbits, GCRY_WEAK_RANDOM );
+  _gcry_mpi_randomize ( test, nbits, GCRY_WEAK_RANDOM );
 
   do_encrypt ( out1_a, out1_b, test, &pk );
   decrypt ( out2, out1_a, out1_b, sk );
@@ -149,10 +170,10 @@ test_keys ( ELG_secret_key *sk, unsigned int nbits, int 
nodie )
   if ( !verify( out1_a, out1_b, test, &pk ) )
     failed |= 2;
 
-  gcry_mpi_release ( test );
-  gcry_mpi_release ( out1_a );
-  gcry_mpi_release ( out1_b );
-  gcry_mpi_release ( out2 );
+  _gcry_mpi_release ( test );
+  _gcry_mpi_release ( out1_a );
+  _gcry_mpi_release ( out1_b );
+  _gcry_mpi_release ( out2 );
 
   if (failed && !nodie)
     log_fatal ("Elgamal test key for %s %s failed\n",
@@ -169,11 +190,10 @@ test_keys ( ELG_secret_key *sk, unsigned int nbits, int 
nodie )
 
 /****************
  * Generate a random secret exponent k from prime p, so that k is
- * relatively prime to p-1.  With SMALL_K set, k will be selected for
- * better encryption performance - this must never be used signing!
+ * relatively prime to p-1.
  */
 static gcry_mpi_t
-gen_k( gcry_mpi_t p, int small_k )
+gen_k( gcry_mpi_t p )
 {
   gcry_mpi_t k = mpi_alloc_secure( 0 );
   gcry_mpi_t temp = mpi_alloc( mpi_get_nlimbs(p) );
@@ -182,29 +202,18 @@ gen_k( gcry_mpi_t p, int small_k )
   unsigned int nbits, nbytes;
   char *rndbuf = NULL;
 
-  if (small_k)
-    {
-      /* Using a k much lesser than p is sufficient for encryption and
-       * it greatly improves the encryption performance.  We use
-       * Wiener's table and add a large safety margin. */
-      nbits = wiener_map( orig_nbits ) * 3 / 2;
-      if( nbits >= orig_nbits )
-        BUG();
-    }
-  else
-    nbits = orig_nbits;
-
+  nbits = orig_nbits;
 
   nbytes = (nbits+7)/8;
   if( DBG_CIPHER )
-    log_debug("choosing a random k ");
+    log_debug("choosing a random k\n");
   mpi_sub_ui( p_1, p, 1);
   for(;;)
     {
       if( !rndbuf || nbits < 32 )
         {
-          gcry_free(rndbuf);
-          rndbuf = gcry_random_bytes_secure( nbytes, GCRY_STRONG_RANDOM );
+          xfree(rndbuf);
+          rndbuf = _gcry_random_bytes_secure( nbytes, GCRY_STRONG_RANDOM );
         }
       else
         {
@@ -213,9 +222,9 @@ gen_k( gcry_mpi_t p, int small_k )
              to get_random_bytes() and use this the here maybe it is
              easier to do this directly in random.c Anyway, it is
              highly inlikely that we will ever reach this code. */
-          char *pp = gcry_random_bytes_secure( 4, GCRY_STRONG_RANDOM );
+          char *pp = _gcry_random_bytes_secure( 4, GCRY_STRONG_RANDOM );
           memcpy( rndbuf, pp, 4 );
-          gcry_free(pp);
+          xfree(pp);
        }
       _gcry_mpi_set_buffer( k, rndbuf, nbytes, 0 );
 
@@ -233,7 +242,7 @@ gen_k( gcry_mpi_t p, int small_k )
                 progress('-');
               break; /* no */
             }
-          if (gcry_mpi_gcd( temp, k, p_1 ))
+          if (mpi_gcd( temp, k, p_1 ))
             goto found;  /* okay, k is relative prime to (p-1) */
           mpi_add_ui( k, k, 1 );
           if( DBG_CIPHER )
@@ -241,7 +250,7 @@ gen_k( gcry_mpi_t p, int small_k )
        }
     }
  found:
-  gcry_free(rndbuf);
+  xfree (rndbuf);
   if( DBG_CIPHER )
     progress('\n');
   mpi_free(p_1);
@@ -255,9 +264,10 @@ gen_k( gcry_mpi_t p, int small_k )
  * Returns: 2 structures filled with all needed values
  *         and an array with n-1 factors of (p-1)
  */
-static void
+static gcry_err_code_t
 generate ( ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t **ret_factors )
 {
+  gcry_err_code_t rc;
   gcry_mpi_t p;    /* the prime */
   gcry_mpi_t p_min1;
   gcry_mpi_t g;
@@ -267,12 +277,18 @@ generate ( ELG_secret_key *sk, unsigned int nbits, 
gcry_mpi_t **ret_factors )
   unsigned int xbits;
   byte *rndbuf;
 
-  p_min1 = gcry_mpi_new ( nbits );
+  p_min1 = mpi_new ( nbits );
   qbits = wiener_map( nbits );
   if( qbits & 1 ) /* better have a even one */
     qbits++;
   g = mpi_alloc(1);
-  p = _gcry_generate_elg_prime( 0, nbits, qbits, g, ret_factors );
+  rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors);
+  if (rc)
+    {
+      mpi_free (p_min1);
+      mpi_free (g);
+      return rc;
+    }
   mpi_sub_ui(p_min1, p, 1);
 
 
@@ -290,9 +306,9 @@ generate ( ELG_secret_key *sk, unsigned int nbits, 
gcry_mpi_t **ret_factors )
   xbits = qbits * 3 / 2;
   if( xbits >= nbits )
     BUG();
-  x = gcry_mpi_snew ( xbits );
+  x = mpi_snew ( xbits );
   if( DBG_CIPHER )
-    log_debug("choosing a random x of size %u", xbits );
+    log_debug("choosing a random x of size %u\n", xbits );
   rndbuf = NULL;
   do
     {
@@ -302,39 +318,38 @@ generate ( ELG_secret_key *sk, unsigned int nbits, 
gcry_mpi_t **ret_factors )
         { /* Change only some of the higher bits */
           if( xbits < 16 ) /* should never happen ... */
             {
-              gcry_free(rndbuf);
-              rndbuf = gcry_random_bytes_secure( (xbits+7)/8,
-                                                 GCRY_VERY_STRONG_RANDOM );
+              xfree(rndbuf);
+              rndbuf = _gcry_random_bytes_secure ((xbits+7)/8,
+                                                  GCRY_VERY_STRONG_RANDOM);
             }
           else
             {
-              char *r = gcry_random_bytes_secure( 2,
-                                                  GCRY_VERY_STRONG_RANDOM );
+              char *r = _gcry_random_bytes_secure (2, GCRY_VERY_STRONG_RANDOM);
               memcpy(rndbuf, r, 2 );
-              gcry_free(r);
+              xfree (r);
             }
        }
       else
         {
-          rndbuf = gcry_random_bytes_secure( (xbits+7)/8,
-                                             GCRY_VERY_STRONG_RANDOM );
+          rndbuf = _gcry_random_bytes_secure ((xbits+7)/8,
+                                              GCRY_VERY_STRONG_RANDOM );
        }
       _gcry_mpi_set_buffer( x, rndbuf, (xbits+7)/8, 0 );
       mpi_clear_highbit( x, xbits+1 );
     }
   while( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, p_min1 )<0 ) );
-  gcry_free(rndbuf);
+  xfree(rndbuf);
 
-  y = gcry_mpi_new (nbits);
-  gcry_mpi_powm( y, g, x, p );
+  y = mpi_new (nbits);
+  mpi_powm( y, g, x, p );
 
   if( DBG_CIPHER )
     {
-      progress('\n');
-      log_mpidump("elg  p= ", p );
-      log_mpidump("elg  g= ", g );
-      log_mpidump("elg  y= ", y );
-      log_mpidump("elg  x= ", x );
+      progress ('\n');
+      log_mpidump ("elg  p", p );
+      log_mpidump ("elg  g", g );
+      log_mpidump ("elg  y", y );
+      log_mpidump ("elg  x", x );
     }
 
   /* Copy the stuff to the key structures */
@@ -343,10 +358,12 @@ generate ( ELG_secret_key *sk, unsigned int nbits, 
gcry_mpi_t **ret_factors )
   sk->y = y;
   sk->x = x;
 
-  gcry_mpi_release ( p_min1 );
+  _gcry_mpi_release ( p_min1 );
 
   /* Now we can test our keys (this should never fail!) */
   test_keys ( sk, nbits - 64, 0 );
+
+  return 0;
 }
 
 
@@ -361,6 +378,7 @@ static gcry_err_code_t
 generate_using_x (ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t x,
                   gcry_mpi_t **ret_factors )
 {
+  gcry_err_code_t rc;
   gcry_mpi_t p;      /* The prime.  */
   gcry_mpi_t p_min1; /* The prime minus 1.  */
   gcry_mpi_t g;      /* The generator.  */
@@ -378,51 +396,57 @@ generate_using_x (ELG_secret_key *sk, unsigned int nbits, 
gcry_mpi_t x,
   if ( xbits < 64 || xbits >= nbits )
     return GPG_ERR_INV_VALUE;
 
-  p_min1 = gcry_mpi_new ( nbits );
+  p_min1 = mpi_new ( nbits );
   qbits  = wiener_map ( nbits );
   if ( (qbits & 1) ) /* Better have an even one.  */
     qbits++;
   g = mpi_alloc (1);
-  p = _gcry_generate_elg_prime ( 0, nbits, qbits, g, ret_factors );
+  rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors );
+  if (rc)
+    {
+      mpi_free (p_min1);
+      mpi_free (g);
+      return rc;
+    }
   mpi_sub_ui (p_min1, p, 1);
 
   if (DBG_CIPHER)
     log_debug ("using a supplied x of size %u", xbits );
   if ( !(mpi_cmp_ui ( x, 0 ) > 0 && mpi_cmp ( x, p_min1 ) <0 ) )
     {
-      gcry_mpi_release ( p_min1 );
-      gcry_mpi_release ( p );
-      gcry_mpi_release ( g );
+      _gcry_mpi_release ( p_min1 );
+      _gcry_mpi_release ( p );
+      _gcry_mpi_release ( g );
       return GPG_ERR_INV_VALUE;
     }
 
-  y = gcry_mpi_new (nbits);
-  gcry_mpi_powm ( y, g, x, p );
+  y = mpi_new (nbits);
+  mpi_powm ( y, g, x, p );
 
   if ( DBG_CIPHER )
     {
       progress ('\n');
-      log_mpidump ("elg  p= ", p );
-      log_mpidump ("elg  g= ", g );
-      log_mpidump ("elg  y= ", y );
-      log_mpidump ("elg  x= ", x );
+      log_mpidump ("elg  p", p );
+      log_mpidump ("elg  g", g );
+      log_mpidump ("elg  y", y );
+      log_mpidump ("elg  x", x );
     }
 
   /* Copy the stuff to the key structures */
   sk->p = p;
   sk->g = g;
   sk->y = y;
-  sk->x = gcry_mpi_copy (x);
+  sk->x = mpi_copy (x);
 
-  gcry_mpi_release ( p_min1 );
+  _gcry_mpi_release ( p_min1 );
 
   /* Now we can test our keys. */
   if ( test_keys ( sk, nbits - 64, 1 ) )
     {
-      gcry_mpi_release ( sk->p ); sk->p = NULL;
-      gcry_mpi_release ( sk->g ); sk->g = NULL;
-      gcry_mpi_release ( sk->y ); sk->y = NULL;
-      gcry_mpi_release ( sk->x ); sk->x = NULL;
+      _gcry_mpi_release ( sk->p ); sk->p = NULL;
+      _gcry_mpi_release ( sk->g ); sk->g = NULL;
+      _gcry_mpi_release ( sk->y ); sk->y = NULL;
+      _gcry_mpi_release ( sk->x ); sk->x = NULL;
       return GPG_ERR_BAD_SECKEY;
     }
 
@@ -440,7 +464,7 @@ check_secret_key( ELG_secret_key *sk )
   int rc;
   gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) );
 
-  gcry_mpi_powm( y, sk->g, sk->x, sk->p );
+  mpi_powm (y, sk->g, sk->x, sk->p);
   rc = !mpi_cmp( y, sk->y );
   mpi_free( y );
   return rc;
@@ -457,24 +481,25 @@ do_encrypt(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, 
ELG_public_key *pkey )
    * error code.
    */
 
-  k = gen_k( pkey->p, 1 );
-  gcry_mpi_powm( a, pkey->g, k, pkey->p );
+  k = gen_k( pkey->p );
+  mpi_powm (a, pkey->g, k, pkey->p);
+
   /* b = (y^k * input) mod p
    *    = ((y^k mod p) * (input mod p)) mod p
    * and because input is < p
    *    = ((y^k mod p) * input) mod p
    */
-  gcry_mpi_powm( b, pkey->y, k, pkey->p );
-  gcry_mpi_mulm( b, b, input, pkey->p );
+  mpi_powm (b, pkey->y, k, pkey->p);
+  mpi_mulm (b, b, input, pkey->p);
 #if 0
   if( DBG_CIPHER )
     {
-      log_mpidump("elg encrypted y= ", pkey->y);
-      log_mpidump("elg encrypted p= ", pkey->p);
-      log_mpidump("elg encrypted k= ", k);
-      log_mpidump("elg encrypted M= ", input);
-      log_mpidump("elg encrypted a= ", a);
-      log_mpidump("elg encrypted b= ", b);
+      log_mpidump("elg encrypted y", pkey->y);
+      log_mpidump("elg encrypted p", pkey->p);
+      log_mpidump("elg encrypted k", k);
+      log_mpidump("elg encrypted M", input);
+      log_mpidump("elg encrypted a", a);
+      log_mpidump("elg encrypted b", b);
     }
 #endif
   mpi_free(k);
@@ -484,25 +509,72 @@ do_encrypt(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, 
ELG_public_key *pkey )
 
 
 static void
-decrypt(gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b, ELG_secret_key *skey )
+decrypt (gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b, ELG_secret_key *skey )
 {
-  gcry_mpi_t t1 = mpi_alloc_secure( mpi_get_nlimbs( skey->p ) );
+  gcry_mpi_t t1, t2, r, r1, h;
+  unsigned int nbits = mpi_get_nbits (skey->p);
+  gcry_mpi_t x_blind;
+
+  mpi_normalize (a);
+  mpi_normalize (b);
+
+  t1 = mpi_snew (nbits);
+
+#ifdef USE_BLINDING
+
+  t2 = mpi_snew (nbits);
+  r  = mpi_new (nbits);
+  r1 = mpi_new (nbits);
+  h  = mpi_new (nbits);
+  x_blind = mpi_snew (nbits);
+
+  /* We need a random number of about the prime size.  The random
+     number merely needs to be unpredictable; thus we use level 0.  */
+  _gcry_mpi_randomize (r, nbits, GCRY_WEAK_RANDOM);
+
+  /* Also, exponent blinding: x_blind = x + (p-1)*r1 */
+  _gcry_mpi_randomize (r1, nbits, GCRY_WEAK_RANDOM);
+  mpi_set_highbit (r1, nbits - 1);
+  mpi_sub_ui (h, skey->p, 1);
+  mpi_mul (x_blind, h, r1);
+  mpi_add (x_blind, skey->x, x_blind);
+
+  /* t1 = r^x mod p */
+  mpi_powm (t1, r, x_blind, skey->p);
+  /* t2 = (a * r)^-x mod p */
+  mpi_mulm (t2, a, r, skey->p);
+  mpi_powm (t2, t2, x_blind, skey->p);
+  mpi_invm (t2, t2, skey->p);
+  /* t1 = (t1 * t2) mod p*/
+  mpi_mulm (t1, t1, t2, skey->p);
+
+  mpi_free (x_blind);
+  mpi_free (h);
+  mpi_free (r1);
+  mpi_free (r);
+  mpi_free (t2);
+
+#else /*!USE_BLINDING*/
 
   /* output = b/(a^x) mod p */
-  gcry_mpi_powm( t1, a, skey->x, skey->p );
-  mpi_invm( t1, t1, skey->p );
-  mpi_mulm( output, b, t1, skey->p );
+  mpi_powm (t1, a, skey->x, skey->p);
+  mpi_invm (t1, t1, skey->p);
+
+#endif /*!USE_BLINDING*/
+
+  mpi_mulm (output, b, t1, skey->p);
+
 #if 0
   if( DBG_CIPHER )
     {
-      log_mpidump("elg decrypted x= ", skey->x);
-      log_mpidump("elg decrypted p= ", skey->p);
-      log_mpidump("elg decrypted a= ", a);
-      log_mpidump("elg decrypted b= ", b);
-      log_mpidump("elg decrypted M= ", output);
+      log_mpidump ("elg decrypted x", skey->x);
+      log_mpidump ("elg decrypted p", skey->p);
+      log_mpidump ("elg decrypted a", a);
+      log_mpidump ("elg decrypted b", b);
+      log_mpidump ("elg decrypted M", output);
     }
 #endif
-  mpi_free(t1);
+  mpi_free (t1);
 }
 
 
@@ -525,8 +597,8 @@ sign(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, 
ELG_secret_key *skey )
     *
     */
     mpi_sub_ui(p_1, p_1, 1);
-    k = gen_k( skey->p, 0 /* no small K ! */ );
-    gcry_mpi_powm( a, skey->g, k, skey->p );
+    k = gen_k( skey->p );
+    mpi_powm( a, skey->g, k, skey->p );
     mpi_mul(t, skey->x, a );
     mpi_subm(t, input, t, p_1 );
     mpi_invm(inv, k, p_1 );
@@ -535,14 +607,14 @@ sign(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, 
ELG_secret_key *skey )
 #if 0
     if( DBG_CIPHER )
       {
-       log_mpidump("elg sign p= ", skey->p);
-       log_mpidump("elg sign g= ", skey->g);
-       log_mpidump("elg sign y= ", skey->y);
-       log_mpidump("elg sign x= ", skey->x);
-       log_mpidump("elg sign k= ", k);
-       log_mpidump("elg sign M= ", input);
-       log_mpidump("elg sign a= ", a);
-       log_mpidump("elg sign b= ", b);
+       log_mpidump ("elg sign p", skey->p);
+       log_mpidump ("elg sign g", skey->g);
+       log_mpidump ("elg sign y", skey->y);
+       log_mpidump ("elg sign x", skey->x);
+       log_mpidump ("elg sign k", k);
+       log_mpidump ("elg sign M", input);
+       log_mpidump ("elg sign a", a);
+       log_mpidump ("elg sign b", b);
       }
 #endif
     mpi_free(k);
@@ -613,233 +685,480 @@ verify(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, 
ELG_public_key *pkey )
  *********************************************/
 
 static gpg_err_code_t
-elg_generate_ext (int algo, unsigned int nbits, unsigned long evalue,
-                  const gcry_sexp_t genparms,
-                  gcry_mpi_t *skey, gcry_mpi_t **retfactors,
-                  gcry_sexp_t *r_extrainfo)
+elg_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
 {
-  gpg_err_code_t ec;
+  gpg_err_code_t rc;
+  unsigned int nbits;
   ELG_secret_key sk;
   gcry_mpi_t xvalue = NULL;
   gcry_sexp_t l1;
+  gcry_mpi_t *factors = NULL;
+  gcry_sexp_t misc_info = NULL;
 
-  (void)algo;
-  (void)evalue;
-  (void)r_extrainfo;
+  memset (&sk, 0, sizeof sk);
 
-  if (genparms)
+  rc = _gcry_pk_util_get_nbits (genparms, &nbits);
+  if (rc)
+    return rc;
+
+  /* Parse the optional xvalue element. */
+  l1 = sexp_find_token (genparms, "xvalue", 0);
+  if (l1)
     {
-      /* Parse the optional xvalue element. */
-      l1 = gcry_sexp_find_token (genparms, "xvalue", 0);
-      if (l1)
-        {
-          xvalue = gcry_sexp_nth_mpi (l1, 1, 0);
-          gcry_sexp_release (l1);
-          if (!xvalue)
-            return GPG_ERR_BAD_MPI;
-        }
+      xvalue = sexp_nth_mpi (l1, 1, 0);
+      sexp_release (l1);
+      if (!xvalue)
+        return GPG_ERR_BAD_MPI;
     }
 
   if (xvalue)
-    ec = generate_using_x (&sk, nbits, xvalue, retfactors);
+    {
+      rc = generate_using_x (&sk, nbits, xvalue, &factors);
+      mpi_free (xvalue);
+    }
   else
     {
-      generate (&sk, nbits, retfactors);
-      ec = 0;
+      rc = generate (&sk, nbits, &factors);
+    }
+  if (rc)
+    goto leave;
+
+  if (factors && factors[0])
+    {
+      int nfac;
+      void **arg_list;
+      char *buffer, *p;
+
+      for (nfac = 0; factors[nfac]; nfac++)
+        ;
+      arg_list = xtrycalloc (nfac+1, sizeof *arg_list);
+      if (!arg_list)
+        {
+          rc = gpg_err_code_from_syserror ();
+          goto leave;
+        }
+      buffer = xtrymalloc (30 + nfac*2 + 2 + 1);
+      if (!buffer)
+        {
+          rc = gpg_err_code_from_syserror ();
+          xfree (arg_list);
+          goto leave;
+        }
+      p = stpcpy (buffer, "(misc-key-info(pm1-factors");
+      for(nfac = 0; factors[nfac]; nfac++)
+        {
+          p = stpcpy (p, "%m");
+          arg_list[nfac] = factors + nfac;
+        }
+      p = stpcpy (p, "))");
+      rc = sexp_build_array (&misc_info, NULL, buffer, arg_list);
+      xfree (arg_list);
+      xfree (buffer);
+      if (rc)
+        goto leave;
     }
 
-  skey[0] = sk.p;
-  skey[1] = sk.g;
-  skey[2] = sk.y;
-  skey[3] = sk.x;
+  rc = sexp_build (r_skey, NULL,
+                   "(key-data"
+                   " (public-key"
+                   "  (elg(p%m)(g%m)(y%m)))"
+                   " (private-key"
+                   "  (elg(p%m)(g%m)(y%m)(x%m)))"
+                   " %S)",
+                   sk.p, sk.g, sk.y,
+                   sk.p, sk.g, sk.y, sk.x,
+                   misc_info);
+
+ leave:
+  mpi_free (sk.p);
+  mpi_free (sk.g);
+  mpi_free (sk.y);
+  mpi_free (sk.x);
+  sexp_release (misc_info);
+  if (factors)
+    {
+      gcry_mpi_t *mp;
+      for (mp = factors; *mp; mp++)
+        mpi_free (*mp);
+      xfree (factors);
+    }
 
-  return ec;
+  return rc;
 }
 
 
 static gcry_err_code_t
-elg_generate (int algo, unsigned int nbits, unsigned long evalue,
-              gcry_mpi_t *skey, gcry_mpi_t **retfactors)
+elg_check_secret_key (gcry_sexp_t keyparms)
 {
-  ELG_secret_key sk;
-
-  (void)algo;
-  (void)evalue;
-
-  generate (&sk, nbits, retfactors);
-  skey[0] = sk.p;
-  skey[1] = sk.g;
-  skey[2] = sk.y;
-  skey[3] = sk.x;
-
-  return GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc;
+  ELG_secret_key sk = {NULL, NULL, NULL, NULL};
+
+  rc = sexp_extract_param (keyparms, NULL, "pgyx",
+                           &sk.p, &sk.g, &sk.y, &sk.x,
+                           NULL);
+  if (rc)
+    goto leave;
+
+  if (!check_secret_key (&sk))
+    rc = GPG_ERR_BAD_SECKEY;
+
+ leave:
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.g);
+  _gcry_mpi_release (sk.y);
+  _gcry_mpi_release (sk.x);
+  if (DBG_CIPHER)
+    log_debug ("elg_testkey    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-elg_check_secret_key (int algo, gcry_mpi_t *skey)
+elg_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  ELG_secret_key sk;
-
-  (void)algo;
-
-  if ((! skey[0]) || (! skey[1]) || (! skey[2]) || (! skey[3]))
-    err = GPG_ERR_BAD_MPI;
-  else
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_mpi_t mpi_a = NULL;
+  gcry_mpi_t mpi_b = NULL;
+  gcry_mpi_t data = NULL;
+  ELG_public_key pk = { NULL, NULL, NULL };
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT,
+                                   elg_get_nbits (keyparms));
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("elg_encrypt data", data);
+  if (mpi_is_opaque (data))
     {
-      sk.p = skey[0];
-      sk.g = skey[1];
-      sk.y = skey[2];
-      sk.x = skey[3];
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
 
-      if (! check_secret_key (&sk))
-       err = GPG_ERR_BAD_SECKEY;
+  /* Extract the key.  */
+  rc = sexp_extract_param (keyparms, NULL, "pgy",
+                           &pk.p, &pk.g, &pk.y, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("elg_encrypt  p", pk.p);
+      log_mpidump ("elg_encrypt  g", pk.g);
+      log_mpidump ("elg_encrypt  y", pk.y);
     }
 
-  return err;
+  /* Do Elgamal computation and build result.  */
+  mpi_a = mpi_new (0);
+  mpi_b = mpi_new (0);
+  do_encrypt (mpi_a, mpi_b, data, &pk);
+  rc = sexp_build (r_ciph, NULL, "(enc-val(elg(a%m)(b%m)))", mpi_a, mpi_b);
+
+ leave:
+  _gcry_mpi_release (mpi_a);
+  _gcry_mpi_release (mpi_b);
+  _gcry_mpi_release (pk.p);
+  _gcry_mpi_release (pk.g);
+  _gcry_mpi_release (pk.y);
+  _gcry_mpi_release (data);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("elg_encrypt   => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-elg_encrypt (int algo, gcry_mpi_t *resarr,
-             gcry_mpi_t data, gcry_mpi_t *pkey, int flags)
+elg_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  ELG_public_key pk;
-
-  (void)algo;
-  (void)flags;
-
-  if ((! data) || (! pkey[0]) || (! pkey[1]) || (! pkey[2]))
-    err = GPG_ERR_BAD_MPI;
-  else
+  gpg_err_code_t rc, rc_sexp;
+  struct pk_encoding_ctx ctx;
+  gcry_sexp_t l1 = NULL;
+  gcry_mpi_t data_a = NULL;
+  gcry_mpi_t data_b = NULL;
+  ELG_secret_key sk = {NULL, NULL, NULL, NULL};
+  gcry_mpi_t plain = NULL;
+  unsigned char *unpad = NULL;
+  size_t unpadlen = 0;
+  gcry_sexp_t result = NULL;
+  gcry_sexp_t dummy = NULL;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT,
+                                   elg_get_nbits (keyparms));
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_preparse_encval (s_data, elg_names, &l1, &ctx);
+  if (rc)
+    goto leave;
+  rc = sexp_extract_param (l1, NULL, "ab", &data_a, &data_b, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
     {
-      pk.p = pkey[0];
-      pk.g = pkey[1];
-      pk.y = pkey[2];
-      resarr[0] = mpi_alloc (mpi_get_nlimbs (pk.p));
-      resarr[1] = mpi_alloc (mpi_get_nlimbs (pk.p));
-      do_encrypt (resarr[0], resarr[1], data, &pk);
+      log_printmpi ("elg_decrypt  d_a", data_a);
+      log_printmpi ("elg_decrypt  d_b", data_b);
+    }
+  if (mpi_is_opaque (data_a) || mpi_is_opaque (data_b))
+    {
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
     }
-  return err;
-}
-
 
-static gcry_err_code_t
-elg_decrypt (int algo, gcry_mpi_t *result,
-             gcry_mpi_t *data, gcry_mpi_t *skey, int flags)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  ELG_secret_key sk;
+  /* Extract the key.  */
+  rc = sexp_extract_param (keyparms, NULL, "pgyx",
+                           &sk.p, &sk.g, &sk.y, &sk.x,
+                           NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_printmpi ("elg_decrypt    p", sk.p);
+      log_printmpi ("elg_decrypt    g", sk.g);
+      log_printmpi ("elg_decrypt    y", sk.y);
+      if (!fips_mode ())
+        log_printmpi ("elg_decrypt    x", sk.x);
+    }
 
-  (void)algo;
-  (void)flags;
+  plain = mpi_snew (ctx.nbits);
+  decrypt (plain, data_a, data_b, &sk);
+  if (DBG_CIPHER)
+    log_printmpi ("elg_decrypt  res", plain);
 
-  if ((! data[0]) || (! data[1])
-      || (! skey[0]) || (! skey[1]) || (! skey[2]) || (! skey[3]))
-    err = GPG_ERR_BAD_MPI;
-  else
+  /* Reverse the encoding and build the s-expression.  */
+  switch (ctx.encoding)
     {
-      sk.p = skey[0];
-      sk.g = skey[1];
-      sk.y = skey[2];
-      sk.x = skey[3];
-      *result = mpi_alloc_secure (mpi_get_nlimbs (sk.p));
-      decrypt (*result, data[0], data[1], &sk);
+    case PUBKEY_ENC_PKCS1:
+      rc = _gcry_rsa_pkcs1_decode_for_enc (&unpad, &unpadlen, ctx.nbits, 
plain);
+      mpi_free (plain);
+      plain = NULL;
+      rc_sexp = sexp_build (&result, NULL, "(value %b)", (int)unpadlen, unpad);
+      *r_plain = sexp_null_cond (result, ct_is_not_zero (rc));
+      dummy = sexp_null_cond (result, ct_is_zero (rc));
+      sexp_release (dummy);
+      rc = ct_ulong_select (rc_sexp, rc,
+                           ct_is_zero (rc) & ct_is_not_zero (rc_sexp));
+      break;
+
+    case PUBKEY_ENC_OAEP:
+      rc = _gcry_rsa_oaep_decode (&unpad, &unpadlen,
+                                  ctx.nbits, ctx.hash_algo, plain,
+                                  ctx.label, ctx.labellen);
+      mpi_free (plain);
+      plain = NULL;
+      rc_sexp = sexp_build (&result, NULL, "(value %b)", (int)unpadlen, unpad);
+      *r_plain = sexp_null_cond (result, ct_is_not_zero (rc));
+      dummy = sexp_null_cond (result, ct_is_zero (rc));
+      sexp_release (dummy);
+      rc = ct_ulong_select (rc_sexp, rc,
+                           ct_is_zero (rc) & ct_is_not_zero (rc_sexp));
+      break;
+
+    default:
+      /* Raw format.  For backward compatibility we need to assume a
+         signed mpi by using the sexp format string "%m".  */
+      rc = sexp_build (r_plain, NULL,
+                       (ctx.flags & PUBKEY_FLAG_LEGACYRESULT)
+                       ? "%m" : "(value %m)",
+                       plain);
+      break;
     }
-  return err;
+
+
+ leave:
+  xfree (unpad);
+  _gcry_mpi_release (plain);
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.g);
+  _gcry_mpi_release (sk.y);
+  _gcry_mpi_release (sk.x);
+  _gcry_mpi_release (data_a);
+  _gcry_mpi_release (data_b);
+  sexp_release (l1);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("elg_decrypt    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-elg_sign (int algo, gcry_mpi_t *resarr, gcry_mpi_t data, gcry_mpi_t *skey)
+elg_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  ELG_secret_key sk;
-
-  (void)algo;
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_mpi_t data = NULL;
+  ELG_secret_key sk = {NULL, NULL, NULL, NULL};
+  gcry_mpi_t sig_r = NULL;
+  gcry_mpi_t sig_s = NULL;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN,
+                                   elg_get_nbits (keyparms));
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("elg_sign   data", data);
+  if (mpi_is_opaque (data))
+    {
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
 
-  if ((! data)
-      || (! skey[0]) || (! skey[1]) || (! skey[2]) || (! skey[3]))
-    err = GPG_ERR_BAD_MPI;
-  else
+  /* Extract the key.  */
+  rc = sexp_extract_param (keyparms, NULL, "pgyx",
+                           &sk.p, &sk.g, &sk.y, &sk.x, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
     {
-      sk.p = skey[0];
-      sk.g = skey[1];
-      sk.y = skey[2];
-      sk.x = skey[3];
-      resarr[0] = mpi_alloc (mpi_get_nlimbs (sk.p));
-      resarr[1] = mpi_alloc (mpi_get_nlimbs (sk.p));
-      sign (resarr[0], resarr[1], data, &sk);
+      log_mpidump ("elg_sign      p", sk.p);
+      log_mpidump ("elg_sign      g", sk.g);
+      log_mpidump ("elg_sign      y", sk.y);
+      if (!fips_mode ())
+        log_mpidump ("elg_sign      x", sk.x);
     }
 
-  return err;
+  sig_r = mpi_new (0);
+  sig_s = mpi_new (0);
+  sign (sig_r, sig_s, data, &sk);
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("elg_sign  sig_r", sig_r);
+      log_mpidump ("elg_sign  sig_s", sig_s);
+    }
+  rc = sexp_build (r_sig, NULL, "(sig-val(elg(r%M)(s%M)))", sig_r, sig_s);
+
+ leave:
+  _gcry_mpi_release (sig_r);
+  _gcry_mpi_release (sig_s);
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.g);
+  _gcry_mpi_release (sk.y);
+  _gcry_mpi_release (sk.x);
+  _gcry_mpi_release (data);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("elg_sign      => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-elg_verify (int algo, gcry_mpi_t hash, gcry_mpi_t *data, gcry_mpi_t *pkey,
-            int (*cmp) (void *, gcry_mpi_t), void *opaquev)
+elg_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  ELG_public_key pk;
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_sexp_t l1 = NULL;
+  gcry_mpi_t sig_r = NULL;
+  gcry_mpi_t sig_s = NULL;
+  gcry_mpi_t data = NULL;
+  ELG_public_key pk = { NULL, NULL, NULL };
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY,
+                                   elg_get_nbits (s_keyparms));
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("elg_verify data", data);
+  if (mpi_is_opaque (data))
+    {
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
 
-  (void)algo;
-  (void)cmp;
-  (void)opaquev;
+  /* Extract the signature value.  */
+  rc = _gcry_pk_util_preparse_sigval (s_sig, elg_names, &l1, NULL);
+  if (rc)
+    goto leave;
+  rc = sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("elg_verify  s_r", sig_r);
+      log_mpidump ("elg_verify  s_s", sig_s);
+    }
 
-  if ((! data[0]) || (! data[1]) || (! hash)
-      || (! pkey[0]) || (! pkey[1]) || (! pkey[2]))
-    err = GPG_ERR_BAD_MPI;
-  else
+  /* Extract the key.  */
+  rc = sexp_extract_param (s_keyparms, NULL, "pgy",
+                                 &pk.p, &pk.g, &pk.y, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
     {
-      pk.p = pkey[0];
-      pk.g = pkey[1];
-      pk.y = pkey[2];
-      if (! verify (data[0], data[1], hash, &pk))
-       err = GPG_ERR_BAD_SIGNATURE;
+      log_mpidump ("elg_verify    p", pk.p);
+      log_mpidump ("elg_verify    g", pk.g);
+      log_mpidump ("elg_verify    y", pk.y);
     }
 
-  return err;
+  /* Verify the signature.  */
+  if (!verify (sig_r, sig_s, data, &pk))
+    rc = GPG_ERR_BAD_SIGNATURE;
+
+ leave:
+  _gcry_mpi_release (pk.p);
+  _gcry_mpi_release (pk.g);
+  _gcry_mpi_release (pk.y);
+  _gcry_mpi_release (data);
+  _gcry_mpi_release (sig_r);
+  _gcry_mpi_release (sig_s);
+  sexp_release (l1);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("elg_verify    => %s\n", rc?gpg_strerror (rc):"Good");
+  return rc;
 }
 
 
+/* Return the number of bits for the key described by PARMS.  On error
+ * 0 is returned.  The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ *   (dsa
+ *     (p <mpi>)
+ *     (g <mpi>)
+ *     (y <mpi>))
+ *
+ * More parameters may be given but we only need P here.
+ */
 static unsigned int
-elg_get_nbits (int algo, gcry_mpi_t *pkey)
+elg_get_nbits (gcry_sexp_t parms)
 {
-  (void)algo;
-
-  return mpi_get_nbits (pkey[0]);
+  gcry_sexp_t l1;
+  gcry_mpi_t p;
+  unsigned int nbits;
+
+  l1 = sexp_find_token (parms, "p", 1);
+  if (!l1)
+    return 0; /* Parameter P not found.  */
+
+  p= sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+  sexp_release (l1);
+  nbits = p? mpi_get_nbits (p) : 0;
+  _gcry_mpi_release (p);
+  return nbits;
 }
 
 
-static const char *elg_names[] =
-  {
-    "elg",
-    "openpgp-elg",
-    "openpgp-elg-sig",
-    NULL,
-  };
-
-
+
 gcry_pk_spec_t _gcry_pubkey_spec_elg =
   {
+    GCRY_PK_ELG, { 0, 0 },
+    (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR),
     "ELG", elg_names,
     "pgy", "pgyx", "ab", "rs", "pgy",
-    GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR,
     elg_generate,
     elg_check_secret_key,
     elg_encrypt,
     elg_decrypt,
     elg_sign,
     elg_verify,
-    elg_get_nbits
-  };
-
-pk_extra_spec_t _gcry_pubkey_extraspec_elg =
-  {
-    NULL,
-    elg_generate_ext,
-    NULL
+    elg_get_nbits,
   };
diff --git a/grub-core/lib/libgcrypt/cipher/gost-s-box.c 
b/grub-core/lib/libgcrypt/cipher/gost-s-box.c
new file mode 100644
index 000000000..5d5ed7dc4
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/gost-s-box.c
@@ -0,0 +1,266 @@
+/* gost-s-box.c - GOST 28147-89 S-Box expander
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define DIM(v) (sizeof(v)/sizeof((v)[0]))
+
+struct gost_sbox
+{
+  const char *name;
+  const char *oid;
+  unsigned int keymeshing;
+  unsigned char sbox[16*8];
+} gost_sboxes[] = {
+  { "test_3411", "1.2.643.2.2.30.0", 0,
+    {
+      0x4, 0xE, 0x5, 0x7, 0x6, 0x4, 0xD, 0x1,
+      0xA, 0xB, 0x8, 0xD, 0xC, 0xB, 0xB, 0xF,
+      0x9, 0x4, 0x1, 0xA, 0x7, 0xA, 0x4, 0xD,
+      0x2, 0xC, 0xD, 0x1, 0x1, 0x0, 0x1, 0x0,
+
+      0xD, 0x6, 0xA, 0x0, 0x5, 0x7, 0x3, 0x5,
+      0x8, 0xD, 0x3, 0x8, 0xF, 0x2, 0xF, 0x7,
+      0x0, 0xF, 0x4, 0x9, 0xD, 0x1, 0x5, 0xA,
+      0xE, 0xA, 0x2, 0xF, 0x8, 0xD, 0x9, 0x4,
+
+      0x6, 0x2, 0xE, 0xE, 0x4, 0x3, 0x0, 0x9,
+      0xB, 0x3, 0xF, 0x4, 0xA, 0x6, 0xA, 0x2,
+      0x1, 0x8, 0xC, 0x6, 0x9, 0x8, 0xE, 0x3,
+      0xC, 0x1, 0x7, 0xC, 0xE, 0x5, 0x7, 0xE,
+
+      0x7, 0x0, 0x6, 0xB, 0x0, 0x9, 0x6, 0x6,
+      0xF, 0x7, 0x0, 0x2, 0x3, 0xC, 0x8, 0xB,
+      0x5, 0x5, 0x9, 0x5, 0xB, 0xF, 0x2, 0x8,
+      0x3, 0x9, 0xB, 0x3, 0x2, 0xE, 0xC, 0xC,
+    }
+  },
+  { "CryptoPro_3411", "1.2.643.2.2.30.1", 0,
+     {
+      0xA, 0x5, 0x7, 0x4, 0x7, 0x7, 0xD, 0x1,
+      0x4, 0xF, 0xF, 0xA, 0x6, 0x6, 0xE, 0x3,
+      0x5, 0x4, 0xC, 0x7, 0x4, 0x2, 0x4, 0xA,
+      0x6, 0x0, 0xE, 0xC, 0xB, 0x4, 0x1, 0x9,
+
+      0x8, 0x2, 0x9, 0x0, 0x9, 0xD, 0x7, 0x5,
+      0x1, 0xD, 0x4, 0xF, 0xC, 0x9, 0x0, 0xB,
+      0x3, 0xB, 0x1, 0x2, 0x2, 0xF, 0x5, 0x4,
+      0x7, 0x9, 0x0, 0x8, 0xA, 0x0, 0xA, 0xF,
+
+      0xD, 0x1, 0x3, 0xE, 0x1, 0xA, 0x3, 0x8,
+      0xC, 0x7, 0xB, 0x1, 0x8, 0x1, 0xC, 0x6,
+      0xE, 0x6, 0x5, 0x6, 0x0, 0x5, 0x8, 0x7,
+      0x0, 0x3, 0x2, 0x5, 0xE, 0xB, 0xF, 0xE,
+
+      0x9, 0xC, 0x6, 0xD, 0xF, 0x8, 0x6, 0xD,
+      0x2, 0xE, 0xA, 0xB, 0xD, 0xE, 0x2, 0x0,
+      0xB, 0xA, 0x8, 0x9, 0x3, 0xC, 0x9, 0x2,
+      0xF, 0x8, 0xD, 0x3, 0x5, 0x3, 0xB, 0xC,
+    }
+  },
+  { "Test_89", "1.2.643.2.2.31.0", 0,
+    {
+      0x4, 0xC, 0xD, 0xE, 0x3, 0x8, 0x9, 0xC,
+      0x2, 0x9, 0x8, 0x9, 0xE, 0xF, 0xB, 0x6,
+      0xF, 0xF, 0xE, 0xB, 0x5, 0x6, 0xC, 0x5,
+      0x5, 0xE, 0xC, 0x2, 0x9, 0xB, 0x0, 0x2,
+
+      0x9, 0x8, 0x7, 0x5, 0x6, 0x1, 0x3, 0xB,
+      0x1, 0x1, 0x3, 0xF, 0x8, 0x9, 0x6, 0x0,
+      0x0, 0x3, 0x9, 0x7, 0x0, 0xC, 0x7, 0x9,
+      0x8, 0xA, 0xA, 0x1, 0xD, 0x5, 0x5, 0xD,
+
+      0xE, 0x2, 0x1, 0x0, 0xA, 0xD, 0x4, 0x3,
+      0x3, 0x7, 0x5, 0xD, 0xB, 0x3, 0x8, 0xE,
+      0xB, 0x4, 0x2, 0xC, 0x7, 0x7, 0xE, 0x7,
+      0xC, 0xD, 0x4, 0x6, 0xC, 0xA, 0xF, 0xA,
+
+      0xD, 0x6, 0x6, 0xA, 0x2, 0x0, 0x1, 0xF,
+      0x7, 0x0, 0xF, 0x4, 0x1, 0xE, 0xA, 0x4,
+      0xA, 0xB, 0x0, 0x3, 0xF, 0x2, 0x2, 0x1,
+      0x6, 0x5, 0xB, 0x8, 0x4, 0x4, 0xD, 0x8,
+    }
+  },
+  { "CryptoPro_A", "1.2.643.2.2.31.1", 1,
+    {
+      0x9, 0x3, 0xE, 0xE, 0xB, 0x3, 0x1, 0xB,
+      0x6, 0x7, 0x4, 0x7, 0x5, 0xA, 0xD, 0xA,
+      0x3, 0xE, 0x6, 0xA, 0x1, 0xD, 0x2, 0xF,
+      0x2, 0x9, 0x2, 0xC, 0x9, 0xC, 0x9, 0x5,
+
+      0x8, 0x8, 0xB, 0xD, 0x8, 0x1, 0x7, 0x0,
+      0xB, 0xA, 0x3, 0x1, 0xD, 0x2, 0xA, 0xC,
+      0x1, 0xF, 0xD, 0x3, 0xF, 0x0, 0x6, 0xE,
+      0x7, 0x0, 0x8, 0x9, 0x0, 0xB, 0x0, 0x8,
+
+      0xA, 0x5, 0xC, 0x0, 0xE, 0x7, 0x8, 0x6,
+      0x4, 0x2, 0xF, 0x2, 0x4, 0x5, 0xC, 0x2,
+      0xE, 0x6, 0x5, 0xB, 0x2, 0x9, 0x4, 0x3,
+      0xF, 0xC, 0xA, 0x4, 0x3, 0x4, 0x5, 0x9,
+
+      0xC, 0xB, 0x0, 0xF, 0xC, 0x8, 0xF, 0x1,
+      0x0, 0x4, 0x7, 0x8, 0x7, 0xF, 0x3, 0x7,
+      0xD, 0xD, 0x1, 0x5, 0xA, 0xE, 0xB, 0xD,
+      0x5, 0x1, 0x9, 0x6, 0x6, 0x6, 0xE, 0x4,
+    }
+  },
+  { "CryptoPro_B", "1.2.643.2.2.31.2", 1,
+    {
+      0x8, 0x0, 0xE, 0x7, 0x2, 0x8, 0x5, 0x0,
+      0x4, 0x1, 0xC, 0x5, 0x7, 0x3, 0x2, 0x4,
+      0xB, 0x2, 0x0, 0x0, 0xC, 0x2, 0xA, 0xB,
+      0x1, 0xA, 0xA, 0xD, 0xF, 0x6, 0xB, 0xE,
+
+      0x3, 0x4, 0x9, 0xB, 0x9, 0x4, 0x9, 0x8,
+      0x5, 0xD, 0x2, 0x6, 0x5, 0xD, 0x1, 0x3,
+      0x0, 0x5, 0xD, 0x1, 0xA, 0xE, 0xC, 0x7,
+      0x9, 0xC, 0xB, 0x2, 0xB, 0xB, 0x3, 0x1,
+
+      0x2, 0x9, 0x7, 0x3, 0x1, 0xC, 0x7, 0xA,
+      0xE, 0x7, 0x5, 0xA, 0x4, 0x1, 0x4, 0x2,
+      0xA, 0x3, 0x8, 0xC, 0x0, 0x7, 0xD, 0x9,
+      0xC, 0xF, 0xF, 0xF, 0xD, 0xF, 0x0, 0x6,
+
+      0xD, 0xB, 0x3, 0x4, 0x6, 0xA, 0x6, 0xF,
+      0x6, 0x8, 0x6, 0xE, 0x8, 0x0, 0xF, 0xD,
+      0x7, 0x6, 0x1, 0x9, 0xE, 0x9, 0x8, 0x5,
+      0xF, 0xE, 0x4, 0x8, 0x3, 0x5, 0xE, 0xC,
+    }
+  },
+  { "CryptoPro_C", "1.2.643.2.2.31.3", 1,
+    {
+      0x1, 0x0, 0x8, 0x3, 0x8, 0xC, 0xA, 0x7,
+      0xB, 0x1, 0x2, 0x6, 0xD, 0x9, 0x9, 0x4,
+      0xC, 0x7, 0x5, 0x0, 0xB, 0xB, 0x6, 0x0,
+      0x2, 0xD, 0x0, 0x1, 0x0, 0x1, 0x8, 0x5,
+
+      0x9, 0xB, 0x4, 0x5, 0x4, 0x8, 0xD, 0xA,
+      0xD, 0x4, 0x9, 0xD, 0x5, 0xE, 0xE, 0x2,
+      0x0, 0x5, 0xF, 0xA, 0x1, 0x2, 0x2, 0xF,
+      0xF, 0x2, 0xA, 0x8, 0x2, 0x4, 0x0, 0xE,
+
+      0x4, 0x8, 0x3, 0xB, 0x9, 0x7, 0xF, 0xC,
+      0x5, 0xE, 0x7, 0x2, 0x3, 0x3, 0x3, 0x6,
+      0x8, 0xF, 0xC, 0x9, 0xC, 0x6, 0x5, 0x1,
+      0xE, 0xC, 0xD, 0x7, 0xE, 0x5, 0xB, 0xB,
+
+      0xA, 0x9, 0x6, 0xE, 0x6, 0xA, 0x4, 0xD,
+      0x7, 0xA, 0xE, 0xF, 0xF, 0x0, 0x1, 0x9,
+      0x6, 0x6, 0x1, 0xC, 0xA, 0xF, 0xC, 0x3,
+      0x3, 0x3, 0xB, 0x4, 0x7, 0xD, 0x7, 0x8,
+    }
+  },
+  { "CryptoPro_D", "1.2.643.2.2.31.4", 1,
+    {
+      0xF, 0xB, 0x1, 0x1, 0x0, 0x8, 0x3, 0x1,
+      0xC, 0x6, 0xC, 0x5, 0xC, 0x0, 0x0, 0xA,
+      0x2, 0x3, 0xB, 0xE, 0x8, 0xF, 0x6, 0x6,
+      0xA, 0x4, 0x0, 0xC, 0x9, 0x3, 0xF, 0x8,
+
+      0x6, 0xC, 0xF, 0xA, 0xD, 0x2, 0x1, 0xF,
+      0x4, 0xF, 0xE, 0x7, 0x2, 0x5, 0xE, 0xB,
+      0x5, 0xE, 0x6, 0x0, 0xA, 0xE, 0x9, 0x0,
+      0x0, 0x2, 0x5, 0xD, 0xB, 0xB, 0x2, 0x4,
+
+      0x7, 0x7, 0xA, 0x6, 0x7, 0x1, 0xD, 0xC,
+      0x9, 0xD, 0xD, 0x2, 0x3, 0xA, 0x8, 0x3,
+      0xE, 0x8, 0x4, 0xB, 0x6, 0x4, 0xC, 0x5,
+      0xD, 0x0, 0x8, 0x4, 0x5, 0x7, 0x4, 0x9,
+
+      0x1, 0x5, 0x9, 0x9, 0x4, 0xC, 0xB, 0x7,
+      0xB, 0xA, 0x3, 0x3, 0xE, 0x9, 0xA, 0xD,
+      0x8, 0x9, 0x7, 0xF, 0xF, 0xD, 0x5, 0x2,
+      0x3, 0x1, 0x2, 0x8, 0x1, 0x6, 0x7, 0xE,
+    }
+  },
+  { "TC26_Z", "1.2.643.7.1.2.5.1.1", 1,
+    {
+      0xc, 0x6, 0xb, 0xc, 0x7, 0x5, 0x8, 0x1,
+      0x4, 0x8, 0x3, 0x8, 0xf, 0xd, 0xe, 0x7,
+      0x6, 0x2, 0x5, 0x2, 0x5, 0xf, 0x2, 0xe,
+      0x2, 0x3, 0x8, 0x1, 0xa, 0x6, 0x5, 0xd,
+
+      0xa, 0x9, 0x2, 0xd, 0x8, 0x9, 0x6, 0x0,
+      0x5, 0xa, 0xf, 0x4, 0x1, 0x2, 0x9, 0x5,
+      0xb, 0x5, 0xa, 0xf, 0x6, 0xc, 0x1, 0x8,
+      0x9, 0xc, 0xd, 0x6, 0xd, 0xa, 0xc, 0x3,
+
+      0xe, 0x1, 0xe, 0x7, 0x0, 0xb, 0xf, 0x4,
+      0x8, 0xe, 0x1, 0x0, 0x9, 0x7, 0x4, 0xf,
+      0xd, 0x4, 0x7, 0xa, 0x3, 0x8, 0xb, 0xa,
+      0x7, 0x7, 0x4, 0x5, 0xe, 0x1, 0x0, 0x6,
+
+      0x0, 0xb, 0xc, 0x3, 0xb, 0x4, 0xd, 0x9,
+      0x3, 0xd, 0x9, 0xe, 0x4, 0x3, 0xa, 0xc,
+      0xf, 0x0, 0x6, 0x9, 0x2, 0xe, 0x3, 0xb,
+      0x1, 0xf, 0x0, 0xb, 0xc, 0x0, 0x7, 0x2,
+    }
+  },
+};
+
+int main(int argc, char **argv)
+{
+  unsigned int i, j, s;
+  FILE *f;
+
+  if (argc == 1)
+    f = stdin;
+  else
+    f = fopen(argv[1], "w");
+
+  if (!f)
+    {
+      perror("fopen");
+      exit(1);
+    }
+
+  for (s = 0; s < DIM(gost_sboxes); s++)
+    {
+      unsigned char *sbox = gost_sboxes[s].sbox;
+      fprintf (f, "static const u32 sbox_%s[4*256] =\n  {", 
gost_sboxes[s].name);
+      for (i = 0; i < 4; i++) {
+        fprintf (f,    "\n    /* %d */\n   ", i);
+        for (j = 0; j < 256; j++) {
+          unsigned int val;
+          if (j % 4 == 0 && j != 0)
+            fprintf (f, "\n   ");
+          val = sbox[ (j & 0xf) * 8 + 2 * i + 0] |
+               (sbox[ (j >> 4)  * 8 + 2 * i + 1] << 4);
+          val <<= (8*i);
+          val = (val << 11) | (val >> 21);
+          fprintf (f, " 0x%08x,", val);
+        }
+      }
+      fprintf (f, "\n  };\n\n");
+    }
+
+  fprintf (f, "static struct\n{\n  const char *oid;\n  const u32 *sbox;\n  
const int keymeshing;\n} gost_oid_map[] = {\n");
+
+  for (s = 0; s < DIM(gost_sboxes); s++)
+    {
+      fprintf (f, "  { \"%s\", sbox_%s, %d },\n", gost_sboxes[s].oid, 
gost_sboxes[s].name, gost_sboxes[s].keymeshing );
+    }
+
+  fprintf(f, "  { NULL, NULL, 0 }\n};\n");
+
+  fclose (f);
+
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/gost-sb.h 
b/grub-core/lib/libgcrypt/cipher/gost-sb.h
new file mode 100644
index 000000000..b86ac32f0
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/gost-sb.h
@@ -0,0 +1,2128 @@
+static const u32 sbox_test_3411[4*256] =
+  {
+    /* 0 */
+    0x00072000, 0x00075000, 0x00074800, 0x00071000,
+    0x00076800, 0x00074000, 0x00070000, 0x00077000,
+    0x00073000, 0x00075800, 0x00070800, 0x00076000,
+    0x00073800, 0x00077800, 0x00072800, 0x00071800,
+    0x0005a000, 0x0005d000, 0x0005c800, 0x00059000,
+    0x0005e800, 0x0005c000, 0x00058000, 0x0005f000,
+    0x0005b000, 0x0005d800, 0x00058800, 0x0005e000,
+    0x0005b800, 0x0005f800, 0x0005a800, 0x00059800,
+    0x00022000, 0x00025000, 0x00024800, 0x00021000,
+    0x00026800, 0x00024000, 0x00020000, 0x00027000,
+    0x00023000, 0x00025800, 0x00020800, 0x00026000,
+    0x00023800, 0x00027800, 0x00022800, 0x00021800,
+    0x00062000, 0x00065000, 0x00064800, 0x00061000,
+    0x00066800, 0x00064000, 0x00060000, 0x00067000,
+    0x00063000, 0x00065800, 0x00060800, 0x00066000,
+    0x00063800, 0x00067800, 0x00062800, 0x00061800,
+    0x00032000, 0x00035000, 0x00034800, 0x00031000,
+    0x00036800, 0x00034000, 0x00030000, 0x00037000,
+    0x00033000, 0x00035800, 0x00030800, 0x00036000,
+    0x00033800, 0x00037800, 0x00032800, 0x00031800,
+    0x0006a000, 0x0006d000, 0x0006c800, 0x00069000,
+    0x0006e800, 0x0006c000, 0x00068000, 0x0006f000,
+    0x0006b000, 0x0006d800, 0x00068800, 0x0006e000,
+    0x0006b800, 0x0006f800, 0x0006a800, 0x00069800,
+    0x0007a000, 0x0007d000, 0x0007c800, 0x00079000,
+    0x0007e800, 0x0007c000, 0x00078000, 0x0007f000,
+    0x0007b000, 0x0007d800, 0x00078800, 0x0007e000,
+    0x0007b800, 0x0007f800, 0x0007a800, 0x00079800,
+    0x00052000, 0x00055000, 0x00054800, 0x00051000,
+    0x00056800, 0x00054000, 0x00050000, 0x00057000,
+    0x00053000, 0x00055800, 0x00050800, 0x00056000,
+    0x00053800, 0x00057800, 0x00052800, 0x00051800,
+    0x00012000, 0x00015000, 0x00014800, 0x00011000,
+    0x00016800, 0x00014000, 0x00010000, 0x00017000,
+    0x00013000, 0x00015800, 0x00010800, 0x00016000,
+    0x00013800, 0x00017800, 0x00012800, 0x00011800,
+    0x0001a000, 0x0001d000, 0x0001c800, 0x00019000,
+    0x0001e800, 0x0001c000, 0x00018000, 0x0001f000,
+    0x0001b000, 0x0001d800, 0x00018800, 0x0001e000,
+    0x0001b800, 0x0001f800, 0x0001a800, 0x00019800,
+    0x00042000, 0x00045000, 0x00044800, 0x00041000,
+    0x00046800, 0x00044000, 0x00040000, 0x00047000,
+    0x00043000, 0x00045800, 0x00040800, 0x00046000,
+    0x00043800, 0x00047800, 0x00042800, 0x00041800,
+    0x0000a000, 0x0000d000, 0x0000c800, 0x00009000,
+    0x0000e800, 0x0000c000, 0x00008000, 0x0000f000,
+    0x0000b000, 0x0000d800, 0x00008800, 0x0000e000,
+    0x0000b800, 0x0000f800, 0x0000a800, 0x00009800,
+    0x00002000, 0x00005000, 0x00004800, 0x00001000,
+    0x00006800, 0x00004000, 0x00000000, 0x00007000,
+    0x00003000, 0x00005800, 0x00000800, 0x00006000,
+    0x00003800, 0x00007800, 0x00002800, 0x00001800,
+    0x0003a000, 0x0003d000, 0x0003c800, 0x00039000,
+    0x0003e800, 0x0003c000, 0x00038000, 0x0003f000,
+    0x0003b000, 0x0003d800, 0x00038800, 0x0003e000,
+    0x0003b800, 0x0003f800, 0x0003a800, 0x00039800,
+    0x0002a000, 0x0002d000, 0x0002c800, 0x00029000,
+    0x0002e800, 0x0002c000, 0x00028000, 0x0002f000,
+    0x0002b000, 0x0002d800, 0x00028800, 0x0002e000,
+    0x0002b800, 0x0002f800, 0x0002a800, 0x00029800,
+    0x0004a000, 0x0004d000, 0x0004c800, 0x00049000,
+    0x0004e800, 0x0004c000, 0x00048000, 0x0004f000,
+    0x0004b000, 0x0004d800, 0x00048800, 0x0004e000,
+    0x0004b800, 0x0004f800, 0x0004a800, 0x00049800,
+    /* 1 */
+    0x03a80000, 0x03c00000, 0x03880000, 0x03e80000,
+    0x03d00000, 0x03980000, 0x03a00000, 0x03900000,
+    0x03f00000, 0x03f80000, 0x03e00000, 0x03b80000,
+    0x03b00000, 0x03800000, 0x03c80000, 0x03d80000,
+    0x06a80000, 0x06c00000, 0x06880000, 0x06e80000,
+    0x06d00000, 0x06980000, 0x06a00000, 0x06900000,
+    0x06f00000, 0x06f80000, 0x06e00000, 0x06b80000,
+    0x06b00000, 0x06800000, 0x06c80000, 0x06d80000,
+    0x05280000, 0x05400000, 0x05080000, 0x05680000,
+    0x05500000, 0x05180000, 0x05200000, 0x05100000,
+    0x05700000, 0x05780000, 0x05600000, 0x05380000,
+    0x05300000, 0x05000000, 0x05480000, 0x05580000,
+    0x00a80000, 0x00c00000, 0x00880000, 0x00e80000,
+    0x00d00000, 0x00980000, 0x00a00000, 0x00900000,
+    0x00f00000, 0x00f80000, 0x00e00000, 0x00b80000,
+    0x00b00000, 0x00800000, 0x00c80000, 0x00d80000,
+    0x00280000, 0x00400000, 0x00080000, 0x00680000,
+    0x00500000, 0x00180000, 0x00200000, 0x00100000,
+    0x00700000, 0x00780000, 0x00600000, 0x00380000,
+    0x00300000, 0x00000000, 0x00480000, 0x00580000,
+    0x04280000, 0x04400000, 0x04080000, 0x04680000,
+    0x04500000, 0x04180000, 0x04200000, 0x04100000,
+    0x04700000, 0x04780000, 0x04600000, 0x04380000,
+    0x04300000, 0x04000000, 0x04480000, 0x04580000,
+    0x04a80000, 0x04c00000, 0x04880000, 0x04e80000,
+    0x04d00000, 0x04980000, 0x04a00000, 0x04900000,
+    0x04f00000, 0x04f80000, 0x04e00000, 0x04b80000,
+    0x04b00000, 0x04800000, 0x04c80000, 0x04d80000,
+    0x07a80000, 0x07c00000, 0x07880000, 0x07e80000,
+    0x07d00000, 0x07980000, 0x07a00000, 0x07900000,
+    0x07f00000, 0x07f80000, 0x07e00000, 0x07b80000,
+    0x07b00000, 0x07800000, 0x07c80000, 0x07d80000,
+    0x07280000, 0x07400000, 0x07080000, 0x07680000,
+    0x07500000, 0x07180000, 0x07200000, 0x07100000,
+    0x07700000, 0x07780000, 0x07600000, 0x07380000,
+    0x07300000, 0x07000000, 0x07480000, 0x07580000,
+    0x02280000, 0x02400000, 0x02080000, 0x02680000,
+    0x02500000, 0x02180000, 0x02200000, 0x02100000,
+    0x02700000, 0x02780000, 0x02600000, 0x02380000,
+    0x02300000, 0x02000000, 0x02480000, 0x02580000,
+    0x03280000, 0x03400000, 0x03080000, 0x03680000,
+    0x03500000, 0x03180000, 0x03200000, 0x03100000,
+    0x03700000, 0x03780000, 0x03600000, 0x03380000,
+    0x03300000, 0x03000000, 0x03480000, 0x03580000,
+    0x06280000, 0x06400000, 0x06080000, 0x06680000,
+    0x06500000, 0x06180000, 0x06200000, 0x06100000,
+    0x06700000, 0x06780000, 0x06600000, 0x06380000,
+    0x06300000, 0x06000000, 0x06480000, 0x06580000,
+    0x05a80000, 0x05c00000, 0x05880000, 0x05e80000,
+    0x05d00000, 0x05980000, 0x05a00000, 0x05900000,
+    0x05f00000, 0x05f80000, 0x05e00000, 0x05b80000,
+    0x05b00000, 0x05800000, 0x05c80000, 0x05d80000,
+    0x01280000, 0x01400000, 0x01080000, 0x01680000,
+    0x01500000, 0x01180000, 0x01200000, 0x01100000,
+    0x01700000, 0x01780000, 0x01600000, 0x01380000,
+    0x01300000, 0x01000000, 0x01480000, 0x01580000,
+    0x02a80000, 0x02c00000, 0x02880000, 0x02e80000,
+    0x02d00000, 0x02980000, 0x02a00000, 0x02900000,
+    0x02f00000, 0x02f80000, 0x02e00000, 0x02b80000,
+    0x02b00000, 0x02800000, 0x02c80000, 0x02d80000,
+    0x01a80000, 0x01c00000, 0x01880000, 0x01e80000,
+    0x01d00000, 0x01980000, 0x01a00000, 0x01900000,
+    0x01f00000, 0x01f80000, 0x01e00000, 0x01b80000,
+    0x01b00000, 0x01800000, 0x01c80000, 0x01d80000,
+    /* 2 */
+    0x30000002, 0x60000002, 0x38000002, 0x08000002,
+    0x28000002, 0x78000002, 0x68000002, 0x40000002,
+    0x20000002, 0x50000002, 0x48000002, 0x70000002,
+    0x00000002, 0x18000002, 0x58000002, 0x10000002,
+    0xb0000005, 0xe0000005, 0xb8000005, 0x88000005,
+    0xa8000005, 0xf8000005, 0xe8000005, 0xc0000005,
+    0xa0000005, 0xd0000005, 0xc8000005, 0xf0000005,
+    0x80000005, 0x98000005, 0xd8000005, 0x90000005,
+    0x30000005, 0x60000005, 0x38000005, 0x08000005,
+    0x28000005, 0x78000005, 0x68000005, 0x40000005,
+    0x20000005, 0x50000005, 0x48000005, 0x70000005,
+    0x00000005, 0x18000005, 0x58000005, 0x10000005,
+    0x30000000, 0x60000000, 0x38000000, 0x08000000,
+    0x28000000, 0x78000000, 0x68000000, 0x40000000,
+    0x20000000, 0x50000000, 0x48000000, 0x70000000,
+    0x00000000, 0x18000000, 0x58000000, 0x10000000,
+    0xb0000003, 0xe0000003, 0xb8000003, 0x88000003,
+    0xa8000003, 0xf8000003, 0xe8000003, 0xc0000003,
+    0xa0000003, 0xd0000003, 0xc8000003, 0xf0000003,
+    0x80000003, 0x98000003, 0xd8000003, 0x90000003,
+    0x30000001, 0x60000001, 0x38000001, 0x08000001,
+    0x28000001, 0x78000001, 0x68000001, 0x40000001,
+    0x20000001, 0x50000001, 0x48000001, 0x70000001,
+    0x00000001, 0x18000001, 0x58000001, 0x10000001,
+    0xb0000000, 0xe0000000, 0xb8000000, 0x88000000,
+    0xa8000000, 0xf8000000, 0xe8000000, 0xc0000000,
+    0xa0000000, 0xd0000000, 0xc8000000, 0xf0000000,
+    0x80000000, 0x98000000, 0xd8000000, 0x90000000,
+    0xb0000006, 0xe0000006, 0xb8000006, 0x88000006,
+    0xa8000006, 0xf8000006, 0xe8000006, 0xc0000006,
+    0xa0000006, 0xd0000006, 0xc8000006, 0xf0000006,
+    0x80000006, 0x98000006, 0xd8000006, 0x90000006,
+    0xb0000001, 0xe0000001, 0xb8000001, 0x88000001,
+    0xa8000001, 0xf8000001, 0xe8000001, 0xc0000001,
+    0xa0000001, 0xd0000001, 0xc8000001, 0xf0000001,
+    0x80000001, 0x98000001, 0xd8000001, 0x90000001,
+    0x30000003, 0x60000003, 0x38000003, 0x08000003,
+    0x28000003, 0x78000003, 0x68000003, 0x40000003,
+    0x20000003, 0x50000003, 0x48000003, 0x70000003,
+    0x00000003, 0x18000003, 0x58000003, 0x10000003,
+    0x30000004, 0x60000004, 0x38000004, 0x08000004,
+    0x28000004, 0x78000004, 0x68000004, 0x40000004,
+    0x20000004, 0x50000004, 0x48000004, 0x70000004,
+    0x00000004, 0x18000004, 0x58000004, 0x10000004,
+    0xb0000002, 0xe0000002, 0xb8000002, 0x88000002,
+    0xa8000002, 0xf8000002, 0xe8000002, 0xc0000002,
+    0xa0000002, 0xd0000002, 0xc8000002, 0xf0000002,
+    0x80000002, 0x98000002, 0xd8000002, 0x90000002,
+    0xb0000004, 0xe0000004, 0xb8000004, 0x88000004,
+    0xa8000004, 0xf8000004, 0xe8000004, 0xc0000004,
+    0xa0000004, 0xd0000004, 0xc8000004, 0xf0000004,
+    0x80000004, 0x98000004, 0xd8000004, 0x90000004,
+    0x30000006, 0x60000006, 0x38000006, 0x08000006,
+    0x28000006, 0x78000006, 0x68000006, 0x40000006,
+    0x20000006, 0x50000006, 0x48000006, 0x70000006,
+    0x00000006, 0x18000006, 0x58000006, 0x10000006,
+    0xb0000007, 0xe0000007, 0xb8000007, 0x88000007,
+    0xa8000007, 0xf8000007, 0xe8000007, 0xc0000007,
+    0xa0000007, 0xd0000007, 0xc8000007, 0xf0000007,
+    0x80000007, 0x98000007, 0xd8000007, 0x90000007,
+    0x30000007, 0x60000007, 0x38000007, 0x08000007,
+    0x28000007, 0x78000007, 0x68000007, 0x40000007,
+    0x20000007, 0x50000007, 0x48000007, 0x70000007,
+    0x00000007, 0x18000007, 0x58000007, 0x10000007,
+    /* 3 */
+    0x000000e8, 0x000000d8, 0x000000a0, 0x00000088,
+    0x00000098, 0x000000f8, 0x000000a8, 0x000000c8,
+    0x00000080, 0x000000d0, 0x000000f0, 0x000000b8,
+    0x000000b0, 0x000000c0, 0x00000090, 0x000000e0,
+    0x000007e8, 0x000007d8, 0x000007a0, 0x00000788,
+    0x00000798, 0x000007f8, 0x000007a8, 0x000007c8,
+    0x00000780, 0x000007d0, 0x000007f0, 0x000007b8,
+    0x000007b0, 0x000007c0, 0x00000790, 0x000007e0,
+    0x000006e8, 0x000006d8, 0x000006a0, 0x00000688,
+    0x00000698, 0x000006f8, 0x000006a8, 0x000006c8,
+    0x00000680, 0x000006d0, 0x000006f0, 0x000006b8,
+    0x000006b0, 0x000006c0, 0x00000690, 0x000006e0,
+    0x00000068, 0x00000058, 0x00000020, 0x00000008,
+    0x00000018, 0x00000078, 0x00000028, 0x00000048,
+    0x00000000, 0x00000050, 0x00000070, 0x00000038,
+    0x00000030, 0x00000040, 0x00000010, 0x00000060,
+    0x000002e8, 0x000002d8, 0x000002a0, 0x00000288,
+    0x00000298, 0x000002f8, 0x000002a8, 0x000002c8,
+    0x00000280, 0x000002d0, 0x000002f0, 0x000002b8,
+    0x000002b0, 0x000002c0, 0x00000290, 0x000002e0,
+    0x000003e8, 0x000003d8, 0x000003a0, 0x00000388,
+    0x00000398, 0x000003f8, 0x000003a8, 0x000003c8,
+    0x00000380, 0x000003d0, 0x000003f0, 0x000003b8,
+    0x000003b0, 0x000003c0, 0x00000390, 0x000003e0,
+    0x00000568, 0x00000558, 0x00000520, 0x00000508,
+    0x00000518, 0x00000578, 0x00000528, 0x00000548,
+    0x00000500, 0x00000550, 0x00000570, 0x00000538,
+    0x00000530, 0x00000540, 0x00000510, 0x00000560,
+    0x00000268, 0x00000258, 0x00000220, 0x00000208,
+    0x00000218, 0x00000278, 0x00000228, 0x00000248,
+    0x00000200, 0x00000250, 0x00000270, 0x00000238,
+    0x00000230, 0x00000240, 0x00000210, 0x00000260,
+    0x000004e8, 0x000004d8, 0x000004a0, 0x00000488,
+    0x00000498, 0x000004f8, 0x000004a8, 0x000004c8,
+    0x00000480, 0x000004d0, 0x000004f0, 0x000004b8,
+    0x000004b0, 0x000004c0, 0x00000490, 0x000004e0,
+    0x00000168, 0x00000158, 0x00000120, 0x00000108,
+    0x00000118, 0x00000178, 0x00000128, 0x00000148,
+    0x00000100, 0x00000150, 0x00000170, 0x00000138,
+    0x00000130, 0x00000140, 0x00000110, 0x00000160,
+    0x000001e8, 0x000001d8, 0x000001a0, 0x00000188,
+    0x00000198, 0x000001f8, 0x000001a8, 0x000001c8,
+    0x00000180, 0x000001d0, 0x000001f0, 0x000001b8,
+    0x000001b0, 0x000001c0, 0x00000190, 0x000001e0,
+    0x00000768, 0x00000758, 0x00000720, 0x00000708,
+    0x00000718, 0x00000778, 0x00000728, 0x00000748,
+    0x00000700, 0x00000750, 0x00000770, 0x00000738,
+    0x00000730, 0x00000740, 0x00000710, 0x00000760,
+    0x00000368, 0x00000358, 0x00000320, 0x00000308,
+    0x00000318, 0x00000378, 0x00000328, 0x00000348,
+    0x00000300, 0x00000350, 0x00000370, 0x00000338,
+    0x00000330, 0x00000340, 0x00000310, 0x00000360,
+    0x000005e8, 0x000005d8, 0x000005a0, 0x00000588,
+    0x00000598, 0x000005f8, 0x000005a8, 0x000005c8,
+    0x00000580, 0x000005d0, 0x000005f0, 0x000005b8,
+    0x000005b0, 0x000005c0, 0x00000590, 0x000005e0,
+    0x00000468, 0x00000458, 0x00000420, 0x00000408,
+    0x00000418, 0x00000478, 0x00000428, 0x00000448,
+    0x00000400, 0x00000450, 0x00000470, 0x00000438,
+    0x00000430, 0x00000440, 0x00000410, 0x00000460,
+    0x00000668, 0x00000658, 0x00000620, 0x00000608,
+    0x00000618, 0x00000678, 0x00000628, 0x00000648,
+    0x00000600, 0x00000650, 0x00000670, 0x00000638,
+    0x00000630, 0x00000640, 0x00000610, 0x00000660,
+  };
+
+static const u32 sbox_CryptoPro_3411[4*256] =
+  {
+    /* 0 */
+    0x0002d000, 0x0002a000, 0x0002a800, 0x0002b000,
+    0x0002c000, 0x00028800, 0x00029800, 0x0002b800,
+    0x0002e800, 0x0002e000, 0x0002f000, 0x00028000,
+    0x0002c800, 0x00029000, 0x0002d800, 0x0002f800,
+    0x0007d000, 0x0007a000, 0x0007a800, 0x0007b000,
+    0x0007c000, 0x00078800, 0x00079800, 0x0007b800,
+    0x0007e800, 0x0007e000, 0x0007f000, 0x00078000,
+    0x0007c800, 0x00079000, 0x0007d800, 0x0007f800,
+    0x00025000, 0x00022000, 0x00022800, 0x00023000,
+    0x00024000, 0x00020800, 0x00021800, 0x00023800,
+    0x00026800, 0x00026000, 0x00027000, 0x00020000,
+    0x00024800, 0x00021000, 0x00025800, 0x00027800,
+    0x00005000, 0x00002000, 0x00002800, 0x00003000,
+    0x00004000, 0x00000800, 0x00001800, 0x00003800,
+    0x00006800, 0x00006000, 0x00007000, 0x00000000,
+    0x00004800, 0x00001000, 0x00005800, 0x00007800,
+    0x00015000, 0x00012000, 0x00012800, 0x00013000,
+    0x00014000, 0x00010800, 0x00011800, 0x00013800,
+    0x00016800, 0x00016000, 0x00017000, 0x00010000,
+    0x00014800, 0x00011000, 0x00015800, 0x00017800,
+    0x0006d000, 0x0006a000, 0x0006a800, 0x0006b000,
+    0x0006c000, 0x00068800, 0x00069800, 0x0006b800,
+    0x0006e800, 0x0006e000, 0x0006f000, 0x00068000,
+    0x0006c800, 0x00069000, 0x0006d800, 0x0006f800,
+    0x0005d000, 0x0005a000, 0x0005a800, 0x0005b000,
+    0x0005c000, 0x00058800, 0x00059800, 0x0005b800,
+    0x0005e800, 0x0005e000, 0x0005f000, 0x00058000,
+    0x0005c800, 0x00059000, 0x0005d800, 0x0005f800,
+    0x0004d000, 0x0004a000, 0x0004a800, 0x0004b000,
+    0x0004c000, 0x00048800, 0x00049800, 0x0004b800,
+    0x0004e800, 0x0004e000, 0x0004f000, 0x00048000,
+    0x0004c800, 0x00049000, 0x0004d800, 0x0004f800,
+    0x0000d000, 0x0000a000, 0x0000a800, 0x0000b000,
+    0x0000c000, 0x00008800, 0x00009800, 0x0000b800,
+    0x0000e800, 0x0000e000, 0x0000f000, 0x00008000,
+    0x0000c800, 0x00009000, 0x0000d800, 0x0000f800,
+    0x0003d000, 0x0003a000, 0x0003a800, 0x0003b000,
+    0x0003c000, 0x00038800, 0x00039800, 0x0003b800,
+    0x0003e800, 0x0003e000, 0x0003f000, 0x00038000,
+    0x0003c800, 0x00039000, 0x0003d800, 0x0003f800,
+    0x00035000, 0x00032000, 0x00032800, 0x00033000,
+    0x00034000, 0x00030800, 0x00031800, 0x00033800,
+    0x00036800, 0x00036000, 0x00037000, 0x00030000,
+    0x00034800, 0x00031000, 0x00035800, 0x00037800,
+    0x0001d000, 0x0001a000, 0x0001a800, 0x0001b000,
+    0x0001c000, 0x00018800, 0x00019800, 0x0001b800,
+    0x0001e800, 0x0001e000, 0x0001f000, 0x00018000,
+    0x0001c800, 0x00019000, 0x0001d800, 0x0001f800,
+    0x00065000, 0x00062000, 0x00062800, 0x00063000,
+    0x00064000, 0x00060800, 0x00061800, 0x00063800,
+    0x00066800, 0x00066000, 0x00067000, 0x00060000,
+    0x00064800, 0x00061000, 0x00065800, 0x00067800,
+    0x00075000, 0x00072000, 0x00072800, 0x00073000,
+    0x00074000, 0x00070800, 0x00071800, 0x00073800,
+    0x00076800, 0x00076000, 0x00077000, 0x00070000,
+    0x00074800, 0x00071000, 0x00075800, 0x00077800,
+    0x00055000, 0x00052000, 0x00052800, 0x00053000,
+    0x00054000, 0x00050800, 0x00051800, 0x00053800,
+    0x00056800, 0x00056000, 0x00057000, 0x00050000,
+    0x00054800, 0x00051000, 0x00055800, 0x00057800,
+    0x00045000, 0x00042000, 0x00042800, 0x00043000,
+    0x00044000, 0x00040800, 0x00041800, 0x00043800,
+    0x00046800, 0x00046000, 0x00047000, 0x00040000,
+    0x00044800, 0x00041000, 0x00045800, 0x00047800,
+    /* 1 */
+    0x02380000, 0x02780000, 0x02600000, 0x02700000,
+    0x02480000, 0x02200000, 0x02080000, 0x02000000,
+    0x02180000, 0x02580000, 0x02280000, 0x02100000,
+    0x02300000, 0x02500000, 0x02400000, 0x02680000,
+    0x05380000, 0x05780000, 0x05600000, 0x05700000,
+    0x05480000, 0x05200000, 0x05080000, 0x05000000,
+    0x05180000, 0x05580000, 0x05280000, 0x05100000,
+    0x05300000, 0x05500000, 0x05400000, 0x05680000,
+    0x03b80000, 0x03f80000, 0x03e00000, 0x03f00000,
+    0x03c80000, 0x03a00000, 0x03880000, 0x03800000,
+    0x03980000, 0x03d80000, 0x03a80000, 0x03900000,
+    0x03b00000, 0x03d00000, 0x03c00000, 0x03e80000,
+    0x06380000, 0x06780000, 0x06600000, 0x06700000,
+    0x06480000, 0x06200000, 0x06080000, 0x06000000,
+    0x06180000, 0x06580000, 0x06280000, 0x06100000,
+    0x06300000, 0x06500000, 0x06400000, 0x06680000,
+    0x00380000, 0x00780000, 0x00600000, 0x00700000,
+    0x00480000, 0x00200000, 0x00080000, 0x00000000,
+    0x00180000, 0x00580000, 0x00280000, 0x00100000,
+    0x00300000, 0x00500000, 0x00400000, 0x00680000,
+    0x07b80000, 0x07f80000, 0x07e00000, 0x07f00000,
+    0x07c80000, 0x07a00000, 0x07880000, 0x07800000,
+    0x07980000, 0x07d80000, 0x07a80000, 0x07900000,
+    0x07b00000, 0x07d00000, 0x07c00000, 0x07e80000,
+    0x01380000, 0x01780000, 0x01600000, 0x01700000,
+    0x01480000, 0x01200000, 0x01080000, 0x01000000,
+    0x01180000, 0x01580000, 0x01280000, 0x01100000,
+    0x01300000, 0x01500000, 0x01400000, 0x01680000,
+    0x04380000, 0x04780000, 0x04600000, 0x04700000,
+    0x04480000, 0x04200000, 0x04080000, 0x04000000,
+    0x04180000, 0x04580000, 0x04280000, 0x04100000,
+    0x04300000, 0x04500000, 0x04400000, 0x04680000,
+    0x07380000, 0x07780000, 0x07600000, 0x07700000,
+    0x07480000, 0x07200000, 0x07080000, 0x07000000,
+    0x07180000, 0x07580000, 0x07280000, 0x07100000,
+    0x07300000, 0x07500000, 0x07400000, 0x07680000,
+    0x00b80000, 0x00f80000, 0x00e00000, 0x00f00000,
+    0x00c80000, 0x00a00000, 0x00880000, 0x00800000,
+    0x00980000, 0x00d80000, 0x00a80000, 0x00900000,
+    0x00b00000, 0x00d00000, 0x00c00000, 0x00e80000,
+    0x03380000, 0x03780000, 0x03600000, 0x03700000,
+    0x03480000, 0x03200000, 0x03080000, 0x03000000,
+    0x03180000, 0x03580000, 0x03280000, 0x03100000,
+    0x03300000, 0x03500000, 0x03400000, 0x03680000,
+    0x02b80000, 0x02f80000, 0x02e00000, 0x02f00000,
+    0x02c80000, 0x02a00000, 0x02880000, 0x02800000,
+    0x02980000, 0x02d80000, 0x02a80000, 0x02900000,
+    0x02b00000, 0x02d00000, 0x02c00000, 0x02e80000,
+    0x06b80000, 0x06f80000, 0x06e00000, 0x06f00000,
+    0x06c80000, 0x06a00000, 0x06880000, 0x06800000,
+    0x06980000, 0x06d80000, 0x06a80000, 0x06900000,
+    0x06b00000, 0x06d00000, 0x06c00000, 0x06e80000,
+    0x05b80000, 0x05f80000, 0x05e00000, 0x05f00000,
+    0x05c80000, 0x05a00000, 0x05880000, 0x05800000,
+    0x05980000, 0x05d80000, 0x05a80000, 0x05900000,
+    0x05b00000, 0x05d00000, 0x05c00000, 0x05e80000,
+    0x04b80000, 0x04f80000, 0x04e00000, 0x04f00000,
+    0x04c80000, 0x04a00000, 0x04880000, 0x04800000,
+    0x04980000, 0x04d80000, 0x04a80000, 0x04900000,
+    0x04b00000, 0x04d00000, 0x04c00000, 0x04e80000,
+    0x01b80000, 0x01f80000, 0x01e00000, 0x01f00000,
+    0x01c80000, 0x01a00000, 0x01880000, 0x01800000,
+    0x01980000, 0x01d80000, 0x01a80000, 0x01900000,
+    0x01b00000, 0x01d00000, 0x01c00000, 0x01e80000,
+    /* 2 */
+    0xb8000003, 0xb0000003, 0xa0000003, 0xd8000003,
+    0xc8000003, 0xe0000003, 0x90000003, 0xd0000003,
+    0x88000003, 0xc0000003, 0x80000003, 0xf0000003,
+    0xf8000003, 0xe8000003, 0x98000003, 0xa8000003,
+    0x38000003, 0x30000003, 0x20000003, 0x58000003,
+    0x48000003, 0x60000003, 0x10000003, 0x50000003,
+    0x08000003, 0x40000003, 0x00000003, 0x70000003,
+    0x78000003, 0x68000003, 0x18000003, 0x28000003,
+    0x38000001, 0x30000001, 0x20000001, 0x58000001,
+    0x48000001, 0x60000001, 0x10000001, 0x50000001,
+    0x08000001, 0x40000001, 0x00000001, 0x70000001,
+    0x78000001, 0x68000001, 0x18000001, 0x28000001,
+    0x38000002, 0x30000002, 0x20000002, 0x58000002,
+    0x48000002, 0x60000002, 0x10000002, 0x50000002,
+    0x08000002, 0x40000002, 0x00000002, 0x70000002,
+    0x78000002, 0x68000002, 0x18000002, 0x28000002,
+    0xb8000006, 0xb0000006, 0xa0000006, 0xd8000006,
+    0xc8000006, 0xe0000006, 0x90000006, 0xd0000006,
+    0x88000006, 0xc0000006, 0x80000006, 0xf0000006,
+    0xf8000006, 0xe8000006, 0x98000006, 0xa8000006,
+    0xb8000004, 0xb0000004, 0xa0000004, 0xd8000004,
+    0xc8000004, 0xe0000004, 0x90000004, 0xd0000004,
+    0x88000004, 0xc0000004, 0x80000004, 0xf0000004,
+    0xf8000004, 0xe8000004, 0x98000004, 0xa8000004,
+    0xb8000007, 0xb0000007, 0xa0000007, 0xd8000007,
+    0xc8000007, 0xe0000007, 0x90000007, 0xd0000007,
+    0x88000007, 0xc0000007, 0x80000007, 0xf0000007,
+    0xf8000007, 0xe8000007, 0x98000007, 0xa8000007,
+    0x38000000, 0x30000000, 0x20000000, 0x58000000,
+    0x48000000, 0x60000000, 0x10000000, 0x50000000,
+    0x08000000, 0x40000000, 0x00000000, 0x70000000,
+    0x78000000, 0x68000000, 0x18000000, 0x28000000,
+    0x38000005, 0x30000005, 0x20000005, 0x58000005,
+    0x48000005, 0x60000005, 0x10000005, 0x50000005,
+    0x08000005, 0x40000005, 0x00000005, 0x70000005,
+    0x78000005, 0x68000005, 0x18000005, 0x28000005,
+    0xb8000000, 0xb0000000, 0xa0000000, 0xd8000000,
+    0xc8000000, 0xe0000000, 0x90000000, 0xd0000000,
+    0x88000000, 0xc0000000, 0x80000000, 0xf0000000,
+    0xf8000000, 0xe8000000, 0x98000000, 0xa8000000,
+    0xb8000002, 0xb0000002, 0xa0000002, 0xd8000002,
+    0xc8000002, 0xe0000002, 0x90000002, 0xd0000002,
+    0x88000002, 0xc0000002, 0x80000002, 0xf0000002,
+    0xf8000002, 0xe8000002, 0x98000002, 0xa8000002,
+    0xb8000005, 0xb0000005, 0xa0000005, 0xd8000005,
+    0xc8000005, 0xe0000005, 0x90000005, 0xd0000005,
+    0x88000005, 0xc0000005, 0x80000005, 0xf0000005,
+    0xf8000005, 0xe8000005, 0x98000005, 0xa8000005,
+    0x38000004, 0x30000004, 0x20000004, 0x58000004,
+    0x48000004, 0x60000004, 0x10000004, 0x50000004,
+    0x08000004, 0x40000004, 0x00000004, 0x70000004,
+    0x78000004, 0x68000004, 0x18000004, 0x28000004,
+    0x38000007, 0x30000007, 0x20000007, 0x58000007,
+    0x48000007, 0x60000007, 0x10000007, 0x50000007,
+    0x08000007, 0x40000007, 0x00000007, 0x70000007,
+    0x78000007, 0x68000007, 0x18000007, 0x28000007,
+    0x38000006, 0x30000006, 0x20000006, 0x58000006,
+    0x48000006, 0x60000006, 0x10000006, 0x50000006,
+    0x08000006, 0x40000006, 0x00000006, 0x70000006,
+    0x78000006, 0x68000006, 0x18000006, 0x28000006,
+    0xb8000001, 0xb0000001, 0xa0000001, 0xd8000001,
+    0xc8000001, 0xe0000001, 0x90000001, 0xd0000001,
+    0x88000001, 0xc0000001, 0x80000001, 0xf0000001,
+    0xf8000001, 0xe8000001, 0x98000001, 0xa8000001,
+    /* 3 */
+    0x000000e8, 0x000000f0, 0x000000a0, 0x00000088,
+    0x000000b8, 0x00000080, 0x000000a8, 0x000000d0,
+    0x00000098, 0x000000e0, 0x000000c0, 0x000000f8,
+    0x000000b0, 0x00000090, 0x000000c8, 0x000000d8,
+    0x000001e8, 0x000001f0, 0x000001a0, 0x00000188,
+    0x000001b8, 0x00000180, 0x000001a8, 0x000001d0,
+    0x00000198, 0x000001e0, 0x000001c0, 0x000001f8,
+    0x000001b0, 0x00000190, 0x000001c8, 0x000001d8,
+    0x00000568, 0x00000570, 0x00000520, 0x00000508,
+    0x00000538, 0x00000500, 0x00000528, 0x00000550,
+    0x00000518, 0x00000560, 0x00000540, 0x00000578,
+    0x00000530, 0x00000510, 0x00000548, 0x00000558,
+    0x000004e8, 0x000004f0, 0x000004a0, 0x00000488,
+    0x000004b8, 0x00000480, 0x000004a8, 0x000004d0,
+    0x00000498, 0x000004e0, 0x000004c0, 0x000004f8,
+    0x000004b0, 0x00000490, 0x000004c8, 0x000004d8,
+    0x000002e8, 0x000002f0, 0x000002a0, 0x00000288,
+    0x000002b8, 0x00000280, 0x000002a8, 0x000002d0,
+    0x00000298, 0x000002e0, 0x000002c0, 0x000002f8,
+    0x000002b0, 0x00000290, 0x000002c8, 0x000002d8,
+    0x000005e8, 0x000005f0, 0x000005a0, 0x00000588,
+    0x000005b8, 0x00000580, 0x000005a8, 0x000005d0,
+    0x00000598, 0x000005e0, 0x000005c0, 0x000005f8,
+    0x000005b0, 0x00000590, 0x000005c8, 0x000005d8,
+    0x00000268, 0x00000270, 0x00000220, 0x00000208,
+    0x00000238, 0x00000200, 0x00000228, 0x00000250,
+    0x00000218, 0x00000260, 0x00000240, 0x00000278,
+    0x00000230, 0x00000210, 0x00000248, 0x00000258,
+    0x000007e8, 0x000007f0, 0x000007a0, 0x00000788,
+    0x000007b8, 0x00000780, 0x000007a8, 0x000007d0,
+    0x00000798, 0x000007e0, 0x000007c0, 0x000007f8,
+    0x000007b0, 0x00000790, 0x000007c8, 0x000007d8,
+    0x00000468, 0x00000470, 0x00000420, 0x00000408,
+    0x00000438, 0x00000400, 0x00000428, 0x00000450,
+    0x00000418, 0x00000460, 0x00000440, 0x00000478,
+    0x00000430, 0x00000410, 0x00000448, 0x00000458,
+    0x00000368, 0x00000370, 0x00000320, 0x00000308,
+    0x00000338, 0x00000300, 0x00000328, 0x00000350,
+    0x00000318, 0x00000360, 0x00000340, 0x00000378,
+    0x00000330, 0x00000310, 0x00000348, 0x00000358,
+    0x000003e8, 0x000003f0, 0x000003a0, 0x00000388,
+    0x000003b8, 0x00000380, 0x000003a8, 0x000003d0,
+    0x00000398, 0x000003e0, 0x000003c0, 0x000003f8,
+    0x000003b0, 0x00000390, 0x000003c8, 0x000003d8,
+    0x00000768, 0x00000770, 0x00000720, 0x00000708,
+    0x00000738, 0x00000700, 0x00000728, 0x00000750,
+    0x00000718, 0x00000760, 0x00000740, 0x00000778,
+    0x00000730, 0x00000710, 0x00000748, 0x00000758,
+    0x000006e8, 0x000006f0, 0x000006a0, 0x00000688,
+    0x000006b8, 0x00000680, 0x000006a8, 0x000006d0,
+    0x00000698, 0x000006e0, 0x000006c0, 0x000006f8,
+    0x000006b0, 0x00000690, 0x000006c8, 0x000006d8,
+    0x00000068, 0x00000070, 0x00000020, 0x00000008,
+    0x00000038, 0x00000000, 0x00000028, 0x00000050,
+    0x00000018, 0x00000060, 0x00000040, 0x00000078,
+    0x00000030, 0x00000010, 0x00000048, 0x00000058,
+    0x00000168, 0x00000170, 0x00000120, 0x00000108,
+    0x00000138, 0x00000100, 0x00000128, 0x00000150,
+    0x00000118, 0x00000160, 0x00000140, 0x00000178,
+    0x00000130, 0x00000110, 0x00000148, 0x00000158,
+    0x00000668, 0x00000670, 0x00000620, 0x00000608,
+    0x00000638, 0x00000600, 0x00000628, 0x00000650,
+    0x00000618, 0x00000660, 0x00000640, 0x00000678,
+    0x00000630, 0x00000610, 0x00000648, 0x00000658,
+  };
+
+static const u32 sbox_Test_89[4*256] =
+  {
+    /* 0 */
+    0x00062000, 0x00061000, 0x00067800, 0x00062800,
+    0x00064800, 0x00060800, 0x00060000, 0x00064000,
+    0x00067000, 0x00061800, 0x00065800, 0x00066000,
+    0x00066800, 0x00063800, 0x00065000, 0x00063000,
+    0x0004a000, 0x00049000, 0x0004f800, 0x0004a800,
+    0x0004c800, 0x00048800, 0x00048000, 0x0004c000,
+    0x0004f000, 0x00049800, 0x0004d800, 0x0004e000,
+    0x0004e800, 0x0004b800, 0x0004d000, 0x0004b000,
+    0x0007a000, 0x00079000, 0x0007f800, 0x0007a800,
+    0x0007c800, 0x00078800, 0x00078000, 0x0007c000,
+    0x0007f000, 0x00079800, 0x0007d800, 0x0007e000,
+    0x0007e800, 0x0007b800, 0x0007d000, 0x0007b000,
+    0x00072000, 0x00071000, 0x00077800, 0x00072800,
+    0x00074800, 0x00070800, 0x00070000, 0x00074000,
+    0x00077000, 0x00071800, 0x00075800, 0x00076000,
+    0x00076800, 0x00073800, 0x00075000, 0x00073000,
+    0x00042000, 0x00041000, 0x00047800, 0x00042800,
+    0x00044800, 0x00040800, 0x00040000, 0x00044000,
+    0x00047000, 0x00041800, 0x00045800, 0x00046000,
+    0x00046800, 0x00043800, 0x00045000, 0x00043000,
+    0x0000a000, 0x00009000, 0x0000f800, 0x0000a800,
+    0x0000c800, 0x00008800, 0x00008000, 0x0000c000,
+    0x0000f000, 0x00009800, 0x0000d800, 0x0000e000,
+    0x0000e800, 0x0000b800, 0x0000d000, 0x0000b000,
+    0x0001a000, 0x00019000, 0x0001f800, 0x0001a800,
+    0x0001c800, 0x00018800, 0x00018000, 0x0001c000,
+    0x0001f000, 0x00019800, 0x0001d800, 0x0001e000,
+    0x0001e800, 0x0001b800, 0x0001d000, 0x0001b000,
+    0x00052000, 0x00051000, 0x00057800, 0x00052800,
+    0x00054800, 0x00050800, 0x00050000, 0x00054000,
+    0x00057000, 0x00051800, 0x00055800, 0x00056000,
+    0x00056800, 0x00053800, 0x00055000, 0x00053000,
+    0x00012000, 0x00011000, 0x00017800, 0x00012800,
+    0x00014800, 0x00010800, 0x00010000, 0x00014000,
+    0x00017000, 0x00011800, 0x00015800, 0x00016000,
+    0x00016800, 0x00013800, 0x00015000, 0x00013000,
+    0x0003a000, 0x00039000, 0x0003f800, 0x0003a800,
+    0x0003c800, 0x00038800, 0x00038000, 0x0003c000,
+    0x0003f000, 0x00039800, 0x0003d800, 0x0003e000,
+    0x0003e800, 0x0003b800, 0x0003d000, 0x0003b000,
+    0x00022000, 0x00021000, 0x00027800, 0x00022800,
+    0x00024800, 0x00020800, 0x00020000, 0x00024000,
+    0x00027000, 0x00021800, 0x00025800, 0x00026000,
+    0x00026800, 0x00023800, 0x00025000, 0x00023000,
+    0x0006a000, 0x00069000, 0x0006f800, 0x0006a800,
+    0x0006c800, 0x00068800, 0x00068000, 0x0006c000,
+    0x0006f000, 0x00069800, 0x0006d800, 0x0006e000,
+    0x0006e800, 0x0006b800, 0x0006d000, 0x0006b000,
+    0x00032000, 0x00031000, 0x00037800, 0x00032800,
+    0x00034800, 0x00030800, 0x00030000, 0x00034000,
+    0x00037000, 0x00031800, 0x00035800, 0x00036000,
+    0x00036800, 0x00033800, 0x00035000, 0x00033000,
+    0x00002000, 0x00001000, 0x00007800, 0x00002800,
+    0x00004800, 0x00000800, 0x00000000, 0x00004000,
+    0x00007000, 0x00001800, 0x00005800, 0x00006000,
+    0x00006800, 0x00003800, 0x00005000, 0x00003000,
+    0x0005a000, 0x00059000, 0x0005f800, 0x0005a800,
+    0x0005c800, 0x00058800, 0x00058000, 0x0005c000,
+    0x0005f000, 0x00059800, 0x0005d800, 0x0005e000,
+    0x0005e800, 0x0005b800, 0x0005d000, 0x0005b000,
+    0x0002a000, 0x00029000, 0x0002f800, 0x0002a800,
+    0x0002c800, 0x00028800, 0x00028000, 0x0002c000,
+    0x0002f000, 0x00029800, 0x0002d800, 0x0002e000,
+    0x0002e800, 0x0002b800, 0x0002d000, 0x0002b000,
+    /* 1 */
+    0x07680000, 0x07400000, 0x07700000, 0x07600000,
+    0x07380000, 0x07180000, 0x07480000, 0x07500000,
+    0x07080000, 0x07280000, 0x07100000, 0x07200000,
+    0x07300000, 0x07780000, 0x07000000, 0x07580000,
+    0x04e80000, 0x04c00000, 0x04f00000, 0x04e00000,
+    0x04b80000, 0x04980000, 0x04c80000, 0x04d00000,
+    0x04880000, 0x04a80000, 0x04900000, 0x04a00000,
+    0x04b00000, 0x04f80000, 0x04800000, 0x04d80000,
+    0x05e80000, 0x05c00000, 0x05f00000, 0x05e00000,
+    0x05b80000, 0x05980000, 0x05c80000, 0x05d00000,
+    0x05880000, 0x05a80000, 0x05900000, 0x05a00000,
+    0x05b00000, 0x05f80000, 0x05800000, 0x05d80000,
+    0x01680000, 0x01400000, 0x01700000, 0x01600000,
+    0x01380000, 0x01180000, 0x01480000, 0x01500000,
+    0x01080000, 0x01280000, 0x01100000, 0x01200000,
+    0x01300000, 0x01780000, 0x01000000, 0x01580000,
+    0x02e80000, 0x02c00000, 0x02f00000, 0x02e00000,
+    0x02b80000, 0x02980000, 0x02c80000, 0x02d00000,
+    0x02880000, 0x02a80000, 0x02900000, 0x02a00000,
+    0x02b00000, 0x02f80000, 0x02800000, 0x02d80000,
+    0x07e80000, 0x07c00000, 0x07f00000, 0x07e00000,
+    0x07b80000, 0x07980000, 0x07c80000, 0x07d00000,
+    0x07880000, 0x07a80000, 0x07900000, 0x07a00000,
+    0x07b00000, 0x07f80000, 0x07800000, 0x07d80000,
+    0x03e80000, 0x03c00000, 0x03f00000, 0x03e00000,
+    0x03b80000, 0x03980000, 0x03c80000, 0x03d00000,
+    0x03880000, 0x03a80000, 0x03900000, 0x03a00000,
+    0x03b00000, 0x03f80000, 0x03800000, 0x03d80000,
+    0x00e80000, 0x00c00000, 0x00f00000, 0x00e00000,
+    0x00b80000, 0x00980000, 0x00c80000, 0x00d00000,
+    0x00880000, 0x00a80000, 0x00900000, 0x00a00000,
+    0x00b00000, 0x00f80000, 0x00800000, 0x00d80000,
+    0x00680000, 0x00400000, 0x00700000, 0x00600000,
+    0x00380000, 0x00180000, 0x00480000, 0x00500000,
+    0x00080000, 0x00280000, 0x00100000, 0x00200000,
+    0x00300000, 0x00780000, 0x00000000, 0x00580000,
+    0x06e80000, 0x06c00000, 0x06f00000, 0x06e00000,
+    0x06b80000, 0x06980000, 0x06c80000, 0x06d00000,
+    0x06880000, 0x06a80000, 0x06900000, 0x06a00000,
+    0x06b00000, 0x06f80000, 0x06800000, 0x06d80000,
+    0x06680000, 0x06400000, 0x06700000, 0x06600000,
+    0x06380000, 0x06180000, 0x06480000, 0x06500000,
+    0x06080000, 0x06280000, 0x06100000, 0x06200000,
+    0x06300000, 0x06780000, 0x06000000, 0x06580000,
+    0x03680000, 0x03400000, 0x03700000, 0x03600000,
+    0x03380000, 0x03180000, 0x03480000, 0x03500000,
+    0x03080000, 0x03280000, 0x03100000, 0x03200000,
+    0x03300000, 0x03780000, 0x03000000, 0x03580000,
+    0x05680000, 0x05400000, 0x05700000, 0x05600000,
+    0x05380000, 0x05180000, 0x05480000, 0x05500000,
+    0x05080000, 0x05280000, 0x05100000, 0x05200000,
+    0x05300000, 0x05780000, 0x05000000, 0x05580000,
+    0x02680000, 0x02400000, 0x02700000, 0x02600000,
+    0x02380000, 0x02180000, 0x02480000, 0x02500000,
+    0x02080000, 0x02280000, 0x02100000, 0x02200000,
+    0x02300000, 0x02780000, 0x02000000, 0x02580000,
+    0x01e80000, 0x01c00000, 0x01f00000, 0x01e00000,
+    0x01b80000, 0x01980000, 0x01c80000, 0x01d00000,
+    0x01880000, 0x01a80000, 0x01900000, 0x01a00000,
+    0x01b00000, 0x01f80000, 0x01800000, 0x01d80000,
+    0x04680000, 0x04400000, 0x04700000, 0x04600000,
+    0x04380000, 0x04180000, 0x04480000, 0x04500000,
+    0x04080000, 0x04280000, 0x04100000, 0x04200000,
+    0x04300000, 0x04780000, 0x04000000, 0x04580000,
+    /* 2 */
+    0x18000004, 0x70000004, 0x28000004, 0x48000004,
+    0x30000004, 0x40000004, 0x00000004, 0x68000004,
+    0x50000004, 0x58000004, 0x38000004, 0x60000004,
+    0x10000004, 0x08000004, 0x78000004, 0x20000004,
+    0x98000007, 0xf0000007, 0xa8000007, 0xc8000007,
+    0xb0000007, 0xc0000007, 0x80000007, 0xe8000007,
+    0xd0000007, 0xd8000007, 0xb8000007, 0xe0000007,
+    0x90000007, 0x88000007, 0xf8000007, 0xa0000007,
+    0x18000003, 0x70000003, 0x28000003, 0x48000003,
+    0x30000003, 0x40000003, 0x00000003, 0x68000003,
+    0x50000003, 0x58000003, 0x38000003, 0x60000003,
+    0x10000003, 0x08000003, 0x78000003, 0x20000003,
+    0x98000005, 0xf0000005, 0xa8000005, 0xc8000005,
+    0xb0000005, 0xc0000005, 0x80000005, 0xe8000005,
+    0xd0000005, 0xd8000005, 0xb8000005, 0xe0000005,
+    0x90000005, 0x88000005, 0xf8000005, 0xa0000005,
+    0x98000000, 0xf0000000, 0xa8000000, 0xc8000000,
+    0xb0000000, 0xc0000000, 0x80000000, 0xe8000000,
+    0xd0000000, 0xd8000000, 0xb8000000, 0xe0000000,
+    0x90000000, 0x88000000, 0xf8000000, 0xa0000000,
+    0x98000004, 0xf0000004, 0xa8000004, 0xc8000004,
+    0xb0000004, 0xc0000004, 0x80000004, 0xe8000004,
+    0xd0000004, 0xd8000004, 0xb8000004, 0xe0000004,
+    0x90000004, 0x88000004, 0xf8000004, 0xa0000004,
+    0x18000006, 0x70000006, 0x28000006, 0x48000006,
+    0x30000006, 0x40000006, 0x00000006, 0x68000006,
+    0x50000006, 0x58000006, 0x38000006, 0x60000006,
+    0x10000006, 0x08000006, 0x78000006, 0x20000006,
+    0x98000002, 0xf0000002, 0xa8000002, 0xc8000002,
+    0xb0000002, 0xc0000002, 0x80000002, 0xe8000002,
+    0xd0000002, 0xd8000002, 0xb8000002, 0xe0000002,
+    0x90000002, 0x88000002, 0xf8000002, 0xa0000002,
+    0x98000006, 0xf0000006, 0xa8000006, 0xc8000006,
+    0xb0000006, 0xc0000006, 0x80000006, 0xe8000006,
+    0xd0000006, 0xd8000006, 0xb8000006, 0xe0000006,
+    0x90000006, 0x88000006, 0xf8000006, 0xa0000006,
+    0x98000001, 0xf0000001, 0xa8000001, 0xc8000001,
+    0xb0000001, 0xc0000001, 0x80000001, 0xe8000001,
+    0xd0000001, 0xd8000001, 0xb8000001, 0xe0000001,
+    0x90000001, 0x88000001, 0xf8000001, 0xa0000001,
+    0x98000003, 0xf0000003, 0xa8000003, 0xc8000003,
+    0xb0000003, 0xc0000003, 0x80000003, 0xe8000003,
+    0xd0000003, 0xd8000003, 0xb8000003, 0xe0000003,
+    0x90000003, 0x88000003, 0xf8000003, 0xa0000003,
+    0x18000005, 0x70000005, 0x28000005, 0x48000005,
+    0x30000005, 0x40000005, 0x00000005, 0x68000005,
+    0x50000005, 0x58000005, 0x38000005, 0x60000005,
+    0x10000005, 0x08000005, 0x78000005, 0x20000005,
+    0x18000000, 0x70000000, 0x28000000, 0x48000000,
+    0x30000000, 0x40000000, 0x00000000, 0x68000000,
+    0x50000000, 0x58000000, 0x38000000, 0x60000000,
+    0x10000000, 0x08000000, 0x78000000, 0x20000000,
+    0x18000007, 0x70000007, 0x28000007, 0x48000007,
+    0x30000007, 0x40000007, 0x00000007, 0x68000007,
+    0x50000007, 0x58000007, 0x38000007, 0x60000007,
+    0x10000007, 0x08000007, 0x78000007, 0x20000007,
+    0x18000001, 0x70000001, 0x28000001, 0x48000001,
+    0x30000001, 0x40000001, 0x00000001, 0x68000001,
+    0x50000001, 0x58000001, 0x38000001, 0x60000001,
+    0x10000001, 0x08000001, 0x78000001, 0x20000001,
+    0x18000002, 0x70000002, 0x28000002, 0x48000002,
+    0x30000002, 0x40000002, 0x00000002, 0x68000002,
+    0x50000002, 0x58000002, 0x38000002, 0x60000002,
+    0x10000002, 0x08000002, 0x78000002, 0x20000002,
+    /* 3 */
+    0x00000648, 0x00000658, 0x00000660, 0x00000600,
+    0x00000618, 0x00000630, 0x00000638, 0x00000628,
+    0x00000620, 0x00000640, 0x00000670, 0x00000678,
+    0x00000608, 0x00000650, 0x00000610, 0x00000668,
+    0x00000348, 0x00000358, 0x00000360, 0x00000300,
+    0x00000318, 0x00000330, 0x00000338, 0x00000328,
+    0x00000320, 0x00000340, 0x00000370, 0x00000378,
+    0x00000308, 0x00000350, 0x00000310, 0x00000368,
+    0x000002c8, 0x000002d8, 0x000002e0, 0x00000280,
+    0x00000298, 0x000002b0, 0x000002b8, 0x000002a8,
+    0x000002a0, 0x000002c0, 0x000002f0, 0x000002f8,
+    0x00000288, 0x000002d0, 0x00000290, 0x000002e8,
+    0x00000148, 0x00000158, 0x00000160, 0x00000100,
+    0x00000118, 0x00000130, 0x00000138, 0x00000128,
+    0x00000120, 0x00000140, 0x00000170, 0x00000178,
+    0x00000108, 0x00000150, 0x00000110, 0x00000168,
+    0x000005c8, 0x000005d8, 0x000005e0, 0x00000580,
+    0x00000598, 0x000005b0, 0x000005b8, 0x000005a8,
+    0x000005a0, 0x000005c0, 0x000005f0, 0x000005f8,
+    0x00000588, 0x000005d0, 0x00000590, 0x000005e8,
+    0x00000048, 0x00000058, 0x00000060, 0x00000000,
+    0x00000018, 0x00000030, 0x00000038, 0x00000028,
+    0x00000020, 0x00000040, 0x00000070, 0x00000078,
+    0x00000008, 0x00000050, 0x00000010, 0x00000068,
+    0x000004c8, 0x000004d8, 0x000004e0, 0x00000480,
+    0x00000498, 0x000004b0, 0x000004b8, 0x000004a8,
+    0x000004a0, 0x000004c0, 0x000004f0, 0x000004f8,
+    0x00000488, 0x000004d0, 0x00000490, 0x000004e8,
+    0x000006c8, 0x000006d8, 0x000006e0, 0x00000680,
+    0x00000698, 0x000006b0, 0x000006b8, 0x000006a8,
+    0x000006a0, 0x000006c0, 0x000006f0, 0x000006f8,
+    0x00000688, 0x000006d0, 0x00000690, 0x000006e8,
+    0x000001c8, 0x000001d8, 0x000001e0, 0x00000180,
+    0x00000198, 0x000001b0, 0x000001b8, 0x000001a8,
+    0x000001a0, 0x000001c0, 0x000001f0, 0x000001f8,
+    0x00000188, 0x000001d0, 0x00000190, 0x000001e8,
+    0x00000748, 0x00000758, 0x00000760, 0x00000700,
+    0x00000718, 0x00000730, 0x00000738, 0x00000728,
+    0x00000720, 0x00000740, 0x00000770, 0x00000778,
+    0x00000708, 0x00000750, 0x00000710, 0x00000768,
+    0x000003c8, 0x000003d8, 0x000003e0, 0x00000380,
+    0x00000398, 0x000003b0, 0x000003b8, 0x000003a8,
+    0x000003a0, 0x000003c0, 0x000003f0, 0x000003f8,
+    0x00000388, 0x000003d0, 0x00000390, 0x000003e8,
+    0x00000548, 0x00000558, 0x00000560, 0x00000500,
+    0x00000518, 0x00000530, 0x00000538, 0x00000528,
+    0x00000520, 0x00000540, 0x00000570, 0x00000578,
+    0x00000508, 0x00000550, 0x00000510, 0x00000568,
+    0x000007c8, 0x000007d8, 0x000007e0, 0x00000780,
+    0x00000798, 0x000007b0, 0x000007b8, 0x000007a8,
+    0x000007a0, 0x000007c0, 0x000007f0, 0x000007f8,
+    0x00000788, 0x000007d0, 0x00000790, 0x000007e8,
+    0x00000248, 0x00000258, 0x00000260, 0x00000200,
+    0x00000218, 0x00000230, 0x00000238, 0x00000228,
+    0x00000220, 0x00000240, 0x00000270, 0x00000278,
+    0x00000208, 0x00000250, 0x00000210, 0x00000268,
+    0x000000c8, 0x000000d8, 0x000000e0, 0x00000080,
+    0x00000098, 0x000000b0, 0x000000b8, 0x000000a8,
+    0x000000a0, 0x000000c0, 0x000000f0, 0x000000f8,
+    0x00000088, 0x000000d0, 0x00000090, 0x000000e8,
+    0x00000448, 0x00000458, 0x00000460, 0x00000400,
+    0x00000418, 0x00000430, 0x00000438, 0x00000428,
+    0x00000420, 0x00000440, 0x00000470, 0x00000478,
+    0x00000408, 0x00000450, 0x00000410, 0x00000468,
+  };
+
+static const u32 sbox_CryptoPro_A[4*256] =
+  {
+    /* 0 */
+    0x0001c800, 0x0001b000, 0x00019800, 0x00019000,
+    0x0001c000, 0x0001d800, 0x00018800, 0x0001b800,
+    0x0001d000, 0x0001a000, 0x0001f000, 0x0001f800,
+    0x0001e000, 0x00018000, 0x0001e800, 0x0001a800,
+    0x0003c800, 0x0003b000, 0x00039800, 0x00039000,
+    0x0003c000, 0x0003d800, 0x00038800, 0x0003b800,
+    0x0003d000, 0x0003a000, 0x0003f000, 0x0003f800,
+    0x0003e000, 0x00038000, 0x0003e800, 0x0003a800,
+    0x00074800, 0x00073000, 0x00071800, 0x00071000,
+    0x00074000, 0x00075800, 0x00070800, 0x00073800,
+    0x00075000, 0x00072000, 0x00077000, 0x00077800,
+    0x00076000, 0x00070000, 0x00076800, 0x00072800,
+    0x0004c800, 0x0004b000, 0x00049800, 0x00049000,
+    0x0004c000, 0x0004d800, 0x00048800, 0x0004b800,
+    0x0004d000, 0x0004a000, 0x0004f000, 0x0004f800,
+    0x0004e000, 0x00048000, 0x0004e800, 0x0004a800,
+    0x00044800, 0x00043000, 0x00041800, 0x00041000,
+    0x00044000, 0x00045800, 0x00040800, 0x00043800,
+    0x00045000, 0x00042000, 0x00047000, 0x00047800,
+    0x00046000, 0x00040000, 0x00046800, 0x00042800,
+    0x00054800, 0x00053000, 0x00051800, 0x00051000,
+    0x00054000, 0x00055800, 0x00050800, 0x00053800,
+    0x00055000, 0x00052000, 0x00057000, 0x00057800,
+    0x00056000, 0x00050000, 0x00056800, 0x00052800,
+    0x0007c800, 0x0007b000, 0x00079800, 0x00079000,
+    0x0007c000, 0x0007d800, 0x00078800, 0x0007b800,
+    0x0007d000, 0x0007a000, 0x0007f000, 0x0007f800,
+    0x0007e000, 0x00078000, 0x0007e800, 0x0007a800,
+    0x00004800, 0x00003000, 0x00001800, 0x00001000,
+    0x00004000, 0x00005800, 0x00000800, 0x00003800,
+    0x00005000, 0x00002000, 0x00007000, 0x00007800,
+    0x00006000, 0x00000000, 0x00006800, 0x00002800,
+    0x0002c800, 0x0002b000, 0x00029800, 0x00029000,
+    0x0002c000, 0x0002d800, 0x00028800, 0x0002b800,
+    0x0002d000, 0x0002a000, 0x0002f000, 0x0002f800,
+    0x0002e000, 0x00028000, 0x0002e800, 0x0002a800,
+    0x00014800, 0x00013000, 0x00011800, 0x00011000,
+    0x00014000, 0x00015800, 0x00010800, 0x00013800,
+    0x00015000, 0x00012000, 0x00017000, 0x00017800,
+    0x00016000, 0x00010000, 0x00016800, 0x00012800,
+    0x00034800, 0x00033000, 0x00031800, 0x00031000,
+    0x00034000, 0x00035800, 0x00030800, 0x00033800,
+    0x00035000, 0x00032000, 0x00037000, 0x00037800,
+    0x00036000, 0x00030000, 0x00036800, 0x00032800,
+    0x00064800, 0x00063000, 0x00061800, 0x00061000,
+    0x00064000, 0x00065800, 0x00060800, 0x00063800,
+    0x00065000, 0x00062000, 0x00067000, 0x00067800,
+    0x00066000, 0x00060000, 0x00066800, 0x00062800,
+    0x0005c800, 0x0005b000, 0x00059800, 0x00059000,
+    0x0005c000, 0x0005d800, 0x00058800, 0x0005b800,
+    0x0005d000, 0x0005a000, 0x0005f000, 0x0005f800,
+    0x0005e000, 0x00058000, 0x0005e800, 0x0005a800,
+    0x00024800, 0x00023000, 0x00021800, 0x00021000,
+    0x00024000, 0x00025800, 0x00020800, 0x00023800,
+    0x00025000, 0x00022000, 0x00027000, 0x00027800,
+    0x00026000, 0x00020000, 0x00026800, 0x00022800,
+    0x0006c800, 0x0006b000, 0x00069800, 0x00069000,
+    0x0006c000, 0x0006d800, 0x00068800, 0x0006b800,
+    0x0006d000, 0x0006a000, 0x0006f000, 0x0006f800,
+    0x0006e000, 0x00068000, 0x0006e800, 0x0006a800,
+    0x0000c800, 0x0000b000, 0x00009800, 0x00009000,
+    0x0000c000, 0x0000d800, 0x00008800, 0x0000b800,
+    0x0000d000, 0x0000a000, 0x0000f000, 0x0000f800,
+    0x0000e000, 0x00008000, 0x0000e800, 0x0000a800,
+    /* 1 */
+    0x07700000, 0x07200000, 0x07300000, 0x07100000,
+    0x07580000, 0x07180000, 0x07680000, 0x07400000,
+    0x07600000, 0x07780000, 0x07280000, 0x07500000,
+    0x07000000, 0x07380000, 0x07080000, 0x07480000,
+    0x03f00000, 0x03a00000, 0x03b00000, 0x03900000,
+    0x03d80000, 0x03980000, 0x03e80000, 0x03c00000,
+    0x03e00000, 0x03f80000, 0x03a80000, 0x03d00000,
+    0x03800000, 0x03b80000, 0x03880000, 0x03c80000,
+    0x05700000, 0x05200000, 0x05300000, 0x05100000,
+    0x05580000, 0x05180000, 0x05680000, 0x05400000,
+    0x05600000, 0x05780000, 0x05280000, 0x05500000,
+    0x05000000, 0x05380000, 0x05080000, 0x05480000,
+    0x06700000, 0x06200000, 0x06300000, 0x06100000,
+    0x06580000, 0x06180000, 0x06680000, 0x06400000,
+    0x06600000, 0x06780000, 0x06280000, 0x06500000,
+    0x06000000, 0x06380000, 0x06080000, 0x06480000,
+    0x06f00000, 0x06a00000, 0x06b00000, 0x06900000,
+    0x06d80000, 0x06980000, 0x06e80000, 0x06c00000,
+    0x06e00000, 0x06f80000, 0x06a80000, 0x06d00000,
+    0x06800000, 0x06b80000, 0x06880000, 0x06c80000,
+    0x00f00000, 0x00a00000, 0x00b00000, 0x00900000,
+    0x00d80000, 0x00980000, 0x00e80000, 0x00c00000,
+    0x00e00000, 0x00f80000, 0x00a80000, 0x00d00000,
+    0x00800000, 0x00b80000, 0x00880000, 0x00c80000,
+    0x01f00000, 0x01a00000, 0x01b00000, 0x01900000,
+    0x01d80000, 0x01980000, 0x01e80000, 0x01c00000,
+    0x01e00000, 0x01f80000, 0x01a80000, 0x01d00000,
+    0x01800000, 0x01b80000, 0x01880000, 0x01c80000,
+    0x04f00000, 0x04a00000, 0x04b00000, 0x04900000,
+    0x04d80000, 0x04980000, 0x04e80000, 0x04c00000,
+    0x04e00000, 0x04f80000, 0x04a80000, 0x04d00000,
+    0x04800000, 0x04b80000, 0x04880000, 0x04c80000,
+    0x00700000, 0x00200000, 0x00300000, 0x00100000,
+    0x00580000, 0x00180000, 0x00680000, 0x00400000,
+    0x00600000, 0x00780000, 0x00280000, 0x00500000,
+    0x00000000, 0x00380000, 0x00080000, 0x00480000,
+    0x01700000, 0x01200000, 0x01300000, 0x01100000,
+    0x01580000, 0x01180000, 0x01680000, 0x01400000,
+    0x01600000, 0x01780000, 0x01280000, 0x01500000,
+    0x01000000, 0x01380000, 0x01080000, 0x01480000,
+    0x05f00000, 0x05a00000, 0x05b00000, 0x05900000,
+    0x05d80000, 0x05980000, 0x05e80000, 0x05c00000,
+    0x05e00000, 0x05f80000, 0x05a80000, 0x05d00000,
+    0x05800000, 0x05b80000, 0x05880000, 0x05c80000,
+    0x02700000, 0x02200000, 0x02300000, 0x02100000,
+    0x02580000, 0x02180000, 0x02680000, 0x02400000,
+    0x02600000, 0x02780000, 0x02280000, 0x02500000,
+    0x02000000, 0x02380000, 0x02080000, 0x02480000,
+    0x07f00000, 0x07a00000, 0x07b00000, 0x07900000,
+    0x07d80000, 0x07980000, 0x07e80000, 0x07c00000,
+    0x07e00000, 0x07f80000, 0x07a80000, 0x07d00000,
+    0x07800000, 0x07b80000, 0x07880000, 0x07c80000,
+    0x04700000, 0x04200000, 0x04300000, 0x04100000,
+    0x04580000, 0x04180000, 0x04680000, 0x04400000,
+    0x04600000, 0x04780000, 0x04280000, 0x04500000,
+    0x04000000, 0x04380000, 0x04080000, 0x04480000,
+    0x02f00000, 0x02a00000, 0x02b00000, 0x02900000,
+    0x02d80000, 0x02980000, 0x02e80000, 0x02c00000,
+    0x02e00000, 0x02f80000, 0x02a80000, 0x02d00000,
+    0x02800000, 0x02b80000, 0x02880000, 0x02c80000,
+    0x03700000, 0x03200000, 0x03300000, 0x03100000,
+    0x03580000, 0x03180000, 0x03680000, 0x03400000,
+    0x03600000, 0x03780000, 0x03280000, 0x03500000,
+    0x03000000, 0x03380000, 0x03080000, 0x03480000,
+    /* 2 */
+    0xd8000001, 0xa8000001, 0x88000001, 0xc8000001,
+    0xc0000001, 0xe8000001, 0xf8000001, 0x80000001,
+    0xf0000001, 0xa0000001, 0x90000001, 0x98000001,
+    0xe0000001, 0xb8000001, 0xd0000001, 0xb0000001,
+    0x58000005, 0x28000005, 0x08000005, 0x48000005,
+    0x40000005, 0x68000005, 0x78000005, 0x00000005,
+    0x70000005, 0x20000005, 0x10000005, 0x18000005,
+    0x60000005, 0x38000005, 0x50000005, 0x30000005,
+    0xd8000006, 0xa8000006, 0x88000006, 0xc8000006,
+    0xc0000006, 0xe8000006, 0xf8000006, 0x80000006,
+    0xf0000006, 0xa0000006, 0x90000006, 0x98000006,
+    0xe0000006, 0xb8000006, 0xd0000006, 0xb0000006,
+    0x58000006, 0x28000006, 0x08000006, 0x48000006,
+    0x40000006, 0x68000006, 0x78000006, 0x00000006,
+    0x70000006, 0x20000006, 0x10000006, 0x18000006,
+    0x60000006, 0x38000006, 0x50000006, 0x30000006,
+    0xd8000000, 0xa8000000, 0x88000000, 0xc8000000,
+    0xc0000000, 0xe8000000, 0xf8000000, 0x80000000,
+    0xf0000000, 0xa0000000, 0x90000000, 0x98000000,
+    0xe0000000, 0xb8000000, 0xd0000000, 0xb0000000,
+    0x58000001, 0x28000001, 0x08000001, 0x48000001,
+    0x40000001, 0x68000001, 0x78000001, 0x00000001,
+    0x70000001, 0x20000001, 0x10000001, 0x18000001,
+    0x60000001, 0x38000001, 0x50000001, 0x30000001,
+    0x58000000, 0x28000000, 0x08000000, 0x48000000,
+    0x40000000, 0x68000000, 0x78000000, 0x00000000,
+    0x70000000, 0x20000000, 0x10000000, 0x18000000,
+    0x60000000, 0x38000000, 0x50000000, 0x30000000,
+    0xd8000005, 0xa8000005, 0x88000005, 0xc8000005,
+    0xc0000005, 0xe8000005, 0xf8000005, 0x80000005,
+    0xf0000005, 0xa0000005, 0x90000005, 0x98000005,
+    0xe0000005, 0xb8000005, 0xd0000005, 0xb0000005,
+    0xd8000003, 0xa8000003, 0x88000003, 0xc8000003,
+    0xc0000003, 0xe8000003, 0xf8000003, 0x80000003,
+    0xf0000003, 0xa0000003, 0x90000003, 0x98000003,
+    0xe0000003, 0xb8000003, 0xd0000003, 0xb0000003,
+    0xd8000002, 0xa8000002, 0x88000002, 0xc8000002,
+    0xc0000002, 0xe8000002, 0xf8000002, 0x80000002,
+    0xf0000002, 0xa0000002, 0x90000002, 0x98000002,
+    0xe0000002, 0xb8000002, 0xd0000002, 0xb0000002,
+    0xd8000004, 0xa8000004, 0x88000004, 0xc8000004,
+    0xc0000004, 0xe8000004, 0xf8000004, 0x80000004,
+    0xf0000004, 0xa0000004, 0x90000004, 0x98000004,
+    0xe0000004, 0xb8000004, 0xd0000004, 0xb0000004,
+    0x58000002, 0x28000002, 0x08000002, 0x48000002,
+    0x40000002, 0x68000002, 0x78000002, 0x00000002,
+    0x70000002, 0x20000002, 0x10000002, 0x18000002,
+    0x60000002, 0x38000002, 0x50000002, 0x30000002,
+    0x58000004, 0x28000004, 0x08000004, 0x48000004,
+    0x40000004, 0x68000004, 0x78000004, 0x00000004,
+    0x70000004, 0x20000004, 0x10000004, 0x18000004,
+    0x60000004, 0x38000004, 0x50000004, 0x30000004,
+    0xd8000007, 0xa8000007, 0x88000007, 0xc8000007,
+    0xc0000007, 0xe8000007, 0xf8000007, 0x80000007,
+    0xf0000007, 0xa0000007, 0x90000007, 0x98000007,
+    0xe0000007, 0xb8000007, 0xd0000007, 0xb0000007,
+    0x58000007, 0x28000007, 0x08000007, 0x48000007,
+    0x40000007, 0x68000007, 0x78000007, 0x00000007,
+    0x70000007, 0x20000007, 0x10000007, 0x18000007,
+    0x60000007, 0x38000007, 0x50000007, 0x30000007,
+    0x58000003, 0x28000003, 0x08000003, 0x48000003,
+    0x40000003, 0x68000003, 0x78000003, 0x00000003,
+    0x70000003, 0x20000003, 0x10000003, 0x18000003,
+    0x60000003, 0x38000003, 0x50000003, 0x30000003,
+    /* 3 */
+    0x00000588, 0x000005e8, 0x00000590, 0x000005c8,
+    0x000005b8, 0x000005d0, 0x000005b0, 0x00000580,
+    0x000005c0, 0x000005e0, 0x000005a0, 0x000005a8,
+    0x000005f8, 0x00000598, 0x000005d8, 0x000005f0,
+    0x00000508, 0x00000568, 0x00000510, 0x00000548,
+    0x00000538, 0x00000550, 0x00000530, 0x00000500,
+    0x00000540, 0x00000560, 0x00000520, 0x00000528,
+    0x00000578, 0x00000518, 0x00000558, 0x00000570,
+    0x00000788, 0x000007e8, 0x00000790, 0x000007c8,
+    0x000007b8, 0x000007d0, 0x000007b0, 0x00000780,
+    0x000007c0, 0x000007e0, 0x000007a0, 0x000007a8,
+    0x000007f8, 0x00000798, 0x000007d8, 0x000007f0,
+    0x00000288, 0x000002e8, 0x00000290, 0x000002c8,
+    0x000002b8, 0x000002d0, 0x000002b0, 0x00000280,
+    0x000002c0, 0x000002e0, 0x000002a0, 0x000002a8,
+    0x000002f8, 0x00000298, 0x000002d8, 0x000002f0,
+    0x00000008, 0x00000068, 0x00000010, 0x00000048,
+    0x00000038, 0x00000050, 0x00000030, 0x00000000,
+    0x00000040, 0x00000060, 0x00000020, 0x00000028,
+    0x00000078, 0x00000018, 0x00000058, 0x00000070,
+    0x00000608, 0x00000668, 0x00000610, 0x00000648,
+    0x00000638, 0x00000650, 0x00000630, 0x00000600,
+    0x00000640, 0x00000660, 0x00000620, 0x00000628,
+    0x00000678, 0x00000618, 0x00000658, 0x00000670,
+    0x00000708, 0x00000768, 0x00000710, 0x00000748,
+    0x00000738, 0x00000750, 0x00000730, 0x00000700,
+    0x00000740, 0x00000760, 0x00000720, 0x00000728,
+    0x00000778, 0x00000718, 0x00000758, 0x00000770,
+    0x00000408, 0x00000468, 0x00000410, 0x00000448,
+    0x00000438, 0x00000450, 0x00000430, 0x00000400,
+    0x00000440, 0x00000460, 0x00000420, 0x00000428,
+    0x00000478, 0x00000418, 0x00000458, 0x00000470,
+    0x00000308, 0x00000368, 0x00000310, 0x00000348,
+    0x00000338, 0x00000350, 0x00000330, 0x00000300,
+    0x00000340, 0x00000360, 0x00000320, 0x00000328,
+    0x00000378, 0x00000318, 0x00000358, 0x00000370,
+    0x00000108, 0x00000168, 0x00000110, 0x00000148,
+    0x00000138, 0x00000150, 0x00000130, 0x00000100,
+    0x00000140, 0x00000160, 0x00000120, 0x00000128,
+    0x00000178, 0x00000118, 0x00000158, 0x00000170,
+    0x00000188, 0x000001e8, 0x00000190, 0x000001c8,
+    0x000001b8, 0x000001d0, 0x000001b0, 0x00000180,
+    0x000001c0, 0x000001e0, 0x000001a0, 0x000001a8,
+    0x000001f8, 0x00000198, 0x000001d8, 0x000001f0,
+    0x00000488, 0x000004e8, 0x00000490, 0x000004c8,
+    0x000004b8, 0x000004d0, 0x000004b0, 0x00000480,
+    0x000004c0, 0x000004e0, 0x000004a0, 0x000004a8,
+    0x000004f8, 0x00000498, 0x000004d8, 0x000004f0,
+    0x00000088, 0x000000e8, 0x00000090, 0x000000c8,
+    0x000000b8, 0x000000d0, 0x000000b0, 0x00000080,
+    0x000000c0, 0x000000e0, 0x000000a0, 0x000000a8,
+    0x000000f8, 0x00000098, 0x000000d8, 0x000000f0,
+    0x00000388, 0x000003e8, 0x00000390, 0x000003c8,
+    0x000003b8, 0x000003d0, 0x000003b0, 0x00000380,
+    0x000003c0, 0x000003e0, 0x000003a0, 0x000003a8,
+    0x000003f8, 0x00000398, 0x000003d8, 0x000003f0,
+    0x00000688, 0x000006e8, 0x00000690, 0x000006c8,
+    0x000006b8, 0x000006d0, 0x000006b0, 0x00000680,
+    0x000006c0, 0x000006e0, 0x000006a0, 0x000006a8,
+    0x000006f8, 0x00000698, 0x000006d8, 0x000006f0,
+    0x00000208, 0x00000268, 0x00000210, 0x00000248,
+    0x00000238, 0x00000250, 0x00000230, 0x00000200,
+    0x00000240, 0x00000260, 0x00000220, 0x00000228,
+    0x00000278, 0x00000218, 0x00000258, 0x00000270,
+  };
+
+static const u32 sbox_CryptoPro_B[4*256] =
+  {
+    /* 0 */
+    0x00004000, 0x00002000, 0x00005800, 0x00000800,
+    0x00001800, 0x00002800, 0x00000000, 0x00004800,
+    0x00001000, 0x00007000, 0x00005000, 0x00006000,
+    0x00006800, 0x00003000, 0x00003800, 0x00007800,
+    0x0000c000, 0x0000a000, 0x0000d800, 0x00008800,
+    0x00009800, 0x0000a800, 0x00008000, 0x0000c800,
+    0x00009000, 0x0000f000, 0x0000d000, 0x0000e000,
+    0x0000e800, 0x0000b000, 0x0000b800, 0x0000f800,
+    0x00014000, 0x00012000, 0x00015800, 0x00010800,
+    0x00011800, 0x00012800, 0x00010000, 0x00014800,
+    0x00011000, 0x00017000, 0x00015000, 0x00016000,
+    0x00016800, 0x00013000, 0x00013800, 0x00017800,
+    0x00054000, 0x00052000, 0x00055800, 0x00050800,
+    0x00051800, 0x00052800, 0x00050000, 0x00054800,
+    0x00051000, 0x00057000, 0x00055000, 0x00056000,
+    0x00056800, 0x00053000, 0x00053800, 0x00057800,
+    0x00024000, 0x00022000, 0x00025800, 0x00020800,
+    0x00021800, 0x00022800, 0x00020000, 0x00024800,
+    0x00021000, 0x00027000, 0x00025000, 0x00026000,
+    0x00026800, 0x00023000, 0x00023800, 0x00027800,
+    0x0006c000, 0x0006a000, 0x0006d800, 0x00068800,
+    0x00069800, 0x0006a800, 0x00068000, 0x0006c800,
+    0x00069000, 0x0006f000, 0x0006d000, 0x0006e000,
+    0x0006e800, 0x0006b000, 0x0006b800, 0x0006f800,
+    0x0002c000, 0x0002a000, 0x0002d800, 0x00028800,
+    0x00029800, 0x0002a800, 0x00028000, 0x0002c800,
+    0x00029000, 0x0002f000, 0x0002d000, 0x0002e000,
+    0x0002e800, 0x0002b000, 0x0002b800, 0x0002f800,
+    0x00064000, 0x00062000, 0x00065800, 0x00060800,
+    0x00061800, 0x00062800, 0x00060000, 0x00064800,
+    0x00061000, 0x00067000, 0x00065000, 0x00066000,
+    0x00066800, 0x00063000, 0x00063800, 0x00067800,
+    0x0004c000, 0x0004a000, 0x0004d800, 0x00048800,
+    0x00049800, 0x0004a800, 0x00048000, 0x0004c800,
+    0x00049000, 0x0004f000, 0x0004d000, 0x0004e000,
+    0x0004e800, 0x0004b000, 0x0004b800, 0x0004f800,
+    0x0003c000, 0x0003a000, 0x0003d800, 0x00038800,
+    0x00039800, 0x0003a800, 0x00038000, 0x0003c800,
+    0x00039000, 0x0003f000, 0x0003d000, 0x0003e000,
+    0x0003e800, 0x0003b000, 0x0003b800, 0x0003f800,
+    0x0001c000, 0x0001a000, 0x0001d800, 0x00018800,
+    0x00019800, 0x0001a800, 0x00018000, 0x0001c800,
+    0x00019000, 0x0001f000, 0x0001d000, 0x0001e000,
+    0x0001e800, 0x0001b000, 0x0001b800, 0x0001f800,
+    0x0007c000, 0x0007a000, 0x0007d800, 0x00078800,
+    0x00079800, 0x0007a800, 0x00078000, 0x0007c800,
+    0x00079000, 0x0007f000, 0x0007d000, 0x0007e000,
+    0x0007e800, 0x0007b000, 0x0007b800, 0x0007f800,
+    0x0005c000, 0x0005a000, 0x0005d800, 0x00058800,
+    0x00059800, 0x0005a800, 0x00058000, 0x0005c800,
+    0x00059000, 0x0005f000, 0x0005d000, 0x0005e000,
+    0x0005e800, 0x0005b000, 0x0005b800, 0x0005f800,
+    0x00044000, 0x00042000, 0x00045800, 0x00040800,
+    0x00041800, 0x00042800, 0x00040000, 0x00044800,
+    0x00041000, 0x00047000, 0x00045000, 0x00046000,
+    0x00046800, 0x00043000, 0x00043800, 0x00047800,
+    0x00034000, 0x00032000, 0x00035800, 0x00030800,
+    0x00031800, 0x00032800, 0x00030000, 0x00034800,
+    0x00031000, 0x00037000, 0x00035000, 0x00036000,
+    0x00036800, 0x00033000, 0x00033800, 0x00037800,
+    0x00074000, 0x00072000, 0x00075800, 0x00070800,
+    0x00071800, 0x00072800, 0x00070000, 0x00074800,
+    0x00071000, 0x00077000, 0x00075000, 0x00076000,
+    0x00076800, 0x00073000, 0x00073800, 0x00077800,
+    /* 1 */
+    0x03f00000, 0x03e00000, 0x03800000, 0x03d00000,
+    0x03c80000, 0x03900000, 0x03e80000, 0x03d80000,
+    0x03b80000, 0x03a80000, 0x03c00000, 0x03f80000,
+    0x03980000, 0x03b00000, 0x03880000, 0x03a00000,
+    0x02f00000, 0x02e00000, 0x02800000, 0x02d00000,
+    0x02c80000, 0x02900000, 0x02e80000, 0x02d80000,
+    0x02b80000, 0x02a80000, 0x02c00000, 0x02f80000,
+    0x02980000, 0x02b00000, 0x02880000, 0x02a00000,
+    0x00700000, 0x00600000, 0x00000000, 0x00500000,
+    0x00480000, 0x00100000, 0x00680000, 0x00580000,
+    0x00380000, 0x00280000, 0x00400000, 0x00780000,
+    0x00180000, 0x00300000, 0x00080000, 0x00200000,
+    0x06f00000, 0x06e00000, 0x06800000, 0x06d00000,
+    0x06c80000, 0x06900000, 0x06e80000, 0x06d80000,
+    0x06b80000, 0x06a80000, 0x06c00000, 0x06f80000,
+    0x06980000, 0x06b00000, 0x06880000, 0x06a00000,
+    0x05f00000, 0x05e00000, 0x05800000, 0x05d00000,
+    0x05c80000, 0x05900000, 0x05e80000, 0x05d80000,
+    0x05b80000, 0x05a80000, 0x05c00000, 0x05f80000,
+    0x05980000, 0x05b00000, 0x05880000, 0x05a00000,
+    0x03700000, 0x03600000, 0x03000000, 0x03500000,
+    0x03480000, 0x03100000, 0x03680000, 0x03580000,
+    0x03380000, 0x03280000, 0x03400000, 0x03780000,
+    0x03180000, 0x03300000, 0x03080000, 0x03200000,
+    0x00f00000, 0x00e00000, 0x00800000, 0x00d00000,
+    0x00c80000, 0x00900000, 0x00e80000, 0x00d80000,
+    0x00b80000, 0x00a80000, 0x00c00000, 0x00f80000,
+    0x00980000, 0x00b00000, 0x00880000, 0x00a00000,
+    0x01700000, 0x01600000, 0x01000000, 0x01500000,
+    0x01480000, 0x01100000, 0x01680000, 0x01580000,
+    0x01380000, 0x01280000, 0x01400000, 0x01780000,
+    0x01180000, 0x01300000, 0x01080000, 0x01200000,
+    0x01f00000, 0x01e00000, 0x01800000, 0x01d00000,
+    0x01c80000, 0x01900000, 0x01e80000, 0x01d80000,
+    0x01b80000, 0x01a80000, 0x01c00000, 0x01f80000,
+    0x01980000, 0x01b00000, 0x01880000, 0x01a00000,
+    0x05700000, 0x05600000, 0x05000000, 0x05500000,
+    0x05480000, 0x05100000, 0x05680000, 0x05580000,
+    0x05380000, 0x05280000, 0x05400000, 0x05780000,
+    0x05180000, 0x05300000, 0x05080000, 0x05200000,
+    0x06700000, 0x06600000, 0x06000000, 0x06500000,
+    0x06480000, 0x06100000, 0x06680000, 0x06580000,
+    0x06380000, 0x06280000, 0x06400000, 0x06780000,
+    0x06180000, 0x06300000, 0x06080000, 0x06200000,
+    0x07f00000, 0x07e00000, 0x07800000, 0x07d00000,
+    0x07c80000, 0x07900000, 0x07e80000, 0x07d80000,
+    0x07b80000, 0x07a80000, 0x07c00000, 0x07f80000,
+    0x07980000, 0x07b00000, 0x07880000, 0x07a00000,
+    0x02700000, 0x02600000, 0x02000000, 0x02500000,
+    0x02480000, 0x02100000, 0x02680000, 0x02580000,
+    0x02380000, 0x02280000, 0x02400000, 0x02780000,
+    0x02180000, 0x02300000, 0x02080000, 0x02200000,
+    0x07700000, 0x07600000, 0x07000000, 0x07500000,
+    0x07480000, 0x07100000, 0x07680000, 0x07580000,
+    0x07380000, 0x07280000, 0x07400000, 0x07780000,
+    0x07180000, 0x07300000, 0x07080000, 0x07200000,
+    0x04f00000, 0x04e00000, 0x04800000, 0x04d00000,
+    0x04c80000, 0x04900000, 0x04e80000, 0x04d80000,
+    0x04b80000, 0x04a80000, 0x04c00000, 0x04f80000,
+    0x04980000, 0x04b00000, 0x04880000, 0x04a00000,
+    0x04700000, 0x04600000, 0x04000000, 0x04500000,
+    0x04480000, 0x04100000, 0x04680000, 0x04580000,
+    0x04380000, 0x04280000, 0x04400000, 0x04780000,
+    0x04180000, 0x04300000, 0x04080000, 0x04200000,
+    /* 2 */
+    0x10000004, 0x38000004, 0x60000004, 0x78000004,
+    0x48000004, 0x28000004, 0x50000004, 0x58000004,
+    0x08000004, 0x20000004, 0x00000004, 0x68000004,
+    0x30000004, 0x40000004, 0x70000004, 0x18000004,
+    0x90000001, 0xb8000001, 0xe0000001, 0xf8000001,
+    0xc8000001, 0xa8000001, 0xd0000001, 0xd8000001,
+    0x88000001, 0xa0000001, 0x80000001, 0xe8000001,
+    0xb0000001, 0xc0000001, 0xf0000001, 0x98000001,
+    0x10000001, 0x38000001, 0x60000001, 0x78000001,
+    0x48000001, 0x28000001, 0x50000001, 0x58000001,
+    0x08000001, 0x20000001, 0x00000001, 0x68000001,
+    0x30000001, 0x40000001, 0x70000001, 0x18000001,
+    0x10000003, 0x38000003, 0x60000003, 0x78000003,
+    0x48000003, 0x28000003, 0x50000003, 0x58000003,
+    0x08000003, 0x20000003, 0x00000003, 0x68000003,
+    0x30000003, 0x40000003, 0x70000003, 0x18000003,
+    0x10000002, 0x38000002, 0x60000002, 0x78000002,
+    0x48000002, 0x28000002, 0x50000002, 0x58000002,
+    0x08000002, 0x20000002, 0x00000002, 0x68000002,
+    0x30000002, 0x40000002, 0x70000002, 0x18000002,
+    0x90000006, 0xb8000006, 0xe0000006, 0xf8000006,
+    0xc8000006, 0xa8000006, 0xd0000006, 0xd8000006,
+    0x88000006, 0xa0000006, 0x80000006, 0xe8000006,
+    0xb0000006, 0xc0000006, 0xf0000006, 0x98000006,
+    0x10000007, 0x38000007, 0x60000007, 0x78000007,
+    0x48000007, 0x28000007, 0x50000007, 0x58000007,
+    0x08000007, 0x20000007, 0x00000007, 0x68000007,
+    0x30000007, 0x40000007, 0x70000007, 0x18000007,
+    0x90000005, 0xb8000005, 0xe0000005, 0xf8000005,
+    0xc8000005, 0xa8000005, 0xd0000005, 0xd8000005,
+    0x88000005, 0xa0000005, 0x80000005, 0xe8000005,
+    0xb0000005, 0xc0000005, 0xf0000005, 0x98000005,
+    0x10000006, 0x38000006, 0x60000006, 0x78000006,
+    0x48000006, 0x28000006, 0x50000006, 0x58000006,
+    0x08000006, 0x20000006, 0x00000006, 0x68000006,
+    0x30000006, 0x40000006, 0x70000006, 0x18000006,
+    0x90000000, 0xb8000000, 0xe0000000, 0xf8000000,
+    0xc8000000, 0xa8000000, 0xd0000000, 0xd8000000,
+    0x88000000, 0xa0000000, 0x80000000, 0xe8000000,
+    0xb0000000, 0xc0000000, 0xf0000000, 0x98000000,
+    0x90000003, 0xb8000003, 0xe0000003, 0xf8000003,
+    0xc8000003, 0xa8000003, 0xd0000003, 0xd8000003,
+    0x88000003, 0xa0000003, 0x80000003, 0xe8000003,
+    0xb0000003, 0xc0000003, 0xf0000003, 0x98000003,
+    0x90000007, 0xb8000007, 0xe0000007, 0xf8000007,
+    0xc8000007, 0xa8000007, 0xd0000007, 0xd8000007,
+    0x88000007, 0xa0000007, 0x80000007, 0xe8000007,
+    0xb0000007, 0xc0000007, 0xf0000007, 0x98000007,
+    0x10000005, 0x38000005, 0x60000005, 0x78000005,
+    0x48000005, 0x28000005, 0x50000005, 0x58000005,
+    0x08000005, 0x20000005, 0x00000005, 0x68000005,
+    0x30000005, 0x40000005, 0x70000005, 0x18000005,
+    0x10000000, 0x38000000, 0x60000000, 0x78000000,
+    0x48000000, 0x28000000, 0x50000000, 0x58000000,
+    0x08000000, 0x20000000, 0x00000000, 0x68000000,
+    0x30000000, 0x40000000, 0x70000000, 0x18000000,
+    0x90000004, 0xb8000004, 0xe0000004, 0xf8000004,
+    0xc8000004, 0xa8000004, 0xd0000004, 0xd8000004,
+    0x88000004, 0xa0000004, 0x80000004, 0xe8000004,
+    0xb0000004, 0xc0000004, 0xf0000004, 0x98000004,
+    0x90000002, 0xb8000002, 0xe0000002, 0xf8000002,
+    0xc8000002, 0xa8000002, 0xd0000002, 0xd8000002,
+    0x88000002, 0xa0000002, 0x80000002, 0xe8000002,
+    0xb0000002, 0xc0000002, 0xf0000002, 0x98000002,
+    /* 3 */
+    0x00000028, 0x00000010, 0x00000050, 0x00000058,
+    0x00000048, 0x00000008, 0x00000060, 0x00000018,
+    0x00000038, 0x00000020, 0x00000068, 0x00000000,
+    0x00000030, 0x00000078, 0x00000040, 0x00000070,
+    0x00000228, 0x00000210, 0x00000250, 0x00000258,
+    0x00000248, 0x00000208, 0x00000260, 0x00000218,
+    0x00000238, 0x00000220, 0x00000268, 0x00000200,
+    0x00000230, 0x00000278, 0x00000240, 0x00000270,
+    0x000005a8, 0x00000590, 0x000005d0, 0x000005d8,
+    0x000005c8, 0x00000588, 0x000005e0, 0x00000598,
+    0x000005b8, 0x000005a0, 0x000005e8, 0x00000580,
+    0x000005b0, 0x000005f8, 0x000005c0, 0x000005f0,
+    0x00000728, 0x00000710, 0x00000750, 0x00000758,
+    0x00000748, 0x00000708, 0x00000760, 0x00000718,
+    0x00000738, 0x00000720, 0x00000768, 0x00000700,
+    0x00000730, 0x00000778, 0x00000740, 0x00000770,
+    0x00000428, 0x00000410, 0x00000450, 0x00000458,
+    0x00000448, 0x00000408, 0x00000460, 0x00000418,
+    0x00000438, 0x00000420, 0x00000468, 0x00000400,
+    0x00000430, 0x00000478, 0x00000440, 0x00000470,
+    0x000001a8, 0x00000190, 0x000001d0, 0x000001d8,
+    0x000001c8, 0x00000188, 0x000001e0, 0x00000198,
+    0x000001b8, 0x000001a0, 0x000001e8, 0x00000180,
+    0x000001b0, 0x000001f8, 0x000001c0, 0x000001f0,
+    0x000003a8, 0x00000390, 0x000003d0, 0x000003d8,
+    0x000003c8, 0x00000388, 0x000003e0, 0x00000398,
+    0x000003b8, 0x000003a0, 0x000003e8, 0x00000380,
+    0x000003b0, 0x000003f8, 0x000003c0, 0x000003f0,
+    0x000000a8, 0x00000090, 0x000000d0, 0x000000d8,
+    0x000000c8, 0x00000088, 0x000000e0, 0x00000098,
+    0x000000b8, 0x000000a0, 0x000000e8, 0x00000080,
+    0x000000b0, 0x000000f8, 0x000000c0, 0x000000f0,
+    0x00000528, 0x00000510, 0x00000550, 0x00000558,
+    0x00000548, 0x00000508, 0x00000560, 0x00000518,
+    0x00000538, 0x00000520, 0x00000568, 0x00000500,
+    0x00000530, 0x00000578, 0x00000540, 0x00000570,
+    0x00000128, 0x00000110, 0x00000150, 0x00000158,
+    0x00000148, 0x00000108, 0x00000160, 0x00000118,
+    0x00000138, 0x00000120, 0x00000168, 0x00000100,
+    0x00000130, 0x00000178, 0x00000140, 0x00000170,
+    0x000004a8, 0x00000490, 0x000004d0, 0x000004d8,
+    0x000004c8, 0x00000488, 0x000004e0, 0x00000498,
+    0x000004b8, 0x000004a0, 0x000004e8, 0x00000480,
+    0x000004b0, 0x000004f8, 0x000004c0, 0x000004f0,
+    0x00000328, 0x00000310, 0x00000350, 0x00000358,
+    0x00000348, 0x00000308, 0x00000360, 0x00000318,
+    0x00000338, 0x00000320, 0x00000368, 0x00000300,
+    0x00000330, 0x00000378, 0x00000340, 0x00000370,
+    0x000007a8, 0x00000790, 0x000007d0, 0x000007d8,
+    0x000007c8, 0x00000788, 0x000007e0, 0x00000798,
+    0x000007b8, 0x000007a0, 0x000007e8, 0x00000780,
+    0x000007b0, 0x000007f8, 0x000007c0, 0x000007f0,
+    0x000006a8, 0x00000690, 0x000006d0, 0x000006d8,
+    0x000006c8, 0x00000688, 0x000006e0, 0x00000698,
+    0x000006b8, 0x000006a0, 0x000006e8, 0x00000680,
+    0x000006b0, 0x000006f8, 0x000006c0, 0x000006f0,
+    0x000002a8, 0x00000290, 0x000002d0, 0x000002d8,
+    0x000002c8, 0x00000288, 0x000002e0, 0x00000298,
+    0x000002b8, 0x000002a0, 0x000002e8, 0x00000280,
+    0x000002b0, 0x000002f8, 0x000002c0, 0x000002f0,
+    0x00000628, 0x00000610, 0x00000650, 0x00000658,
+    0x00000648, 0x00000608, 0x00000660, 0x00000618,
+    0x00000638, 0x00000620, 0x00000668, 0x00000600,
+    0x00000630, 0x00000678, 0x00000640, 0x00000670,
+  };
+
+static const u32 sbox_CryptoPro_C[4*256] =
+  {
+    /* 0 */
+    0x00000800, 0x00005800, 0x00006000, 0x00001000,
+    0x00004800, 0x00006800, 0x00000000, 0x00007800,
+    0x00002000, 0x00002800, 0x00004000, 0x00007000,
+    0x00005000, 0x00003800, 0x00003000, 0x00001800,
+    0x00008800, 0x0000d800, 0x0000e000, 0x00009000,
+    0x0000c800, 0x0000e800, 0x00008000, 0x0000f800,
+    0x0000a000, 0x0000a800, 0x0000c000, 0x0000f000,
+    0x0000d000, 0x0000b800, 0x0000b000, 0x00009800,
+    0x00038800, 0x0003d800, 0x0003e000, 0x00039000,
+    0x0003c800, 0x0003e800, 0x00038000, 0x0003f800,
+    0x0003a000, 0x0003a800, 0x0003c000, 0x0003f000,
+    0x0003d000, 0x0003b800, 0x0003b000, 0x00039800,
+    0x00068800, 0x0006d800, 0x0006e000, 0x00069000,
+    0x0006c800, 0x0006e800, 0x00068000, 0x0006f800,
+    0x0006a000, 0x0006a800, 0x0006c000, 0x0006f000,
+    0x0006d000, 0x0006b800, 0x0006b000, 0x00069800,
+    0x00058800, 0x0005d800, 0x0005e000, 0x00059000,
+    0x0005c800, 0x0005e800, 0x00058000, 0x0005f800,
+    0x0005a000, 0x0005a800, 0x0005c000, 0x0005f000,
+    0x0005d000, 0x0005b800, 0x0005b000, 0x00059800,
+    0x00020800, 0x00025800, 0x00026000, 0x00021000,
+    0x00024800, 0x00026800, 0x00020000, 0x00027800,
+    0x00022000, 0x00022800, 0x00024000, 0x00027000,
+    0x00025000, 0x00023800, 0x00023000, 0x00021800,
+    0x00028800, 0x0002d800, 0x0002e000, 0x00029000,
+    0x0002c800, 0x0002e800, 0x00028000, 0x0002f800,
+    0x0002a000, 0x0002a800, 0x0002c000, 0x0002f000,
+    0x0002d000, 0x0002b800, 0x0002b000, 0x00029800,
+    0x00010800, 0x00015800, 0x00016000, 0x00011000,
+    0x00014800, 0x00016800, 0x00010000, 0x00017800,
+    0x00012000, 0x00012800, 0x00014000, 0x00017000,
+    0x00015000, 0x00013800, 0x00013000, 0x00011800,
+    0x00040800, 0x00045800, 0x00046000, 0x00041000,
+    0x00044800, 0x00046800, 0x00040000, 0x00047800,
+    0x00042000, 0x00042800, 0x00044000, 0x00047000,
+    0x00045000, 0x00043800, 0x00043000, 0x00041800,
+    0x00070800, 0x00075800, 0x00076000, 0x00071000,
+    0x00074800, 0x00076800, 0x00070000, 0x00077800,
+    0x00072000, 0x00072800, 0x00074000, 0x00077000,
+    0x00075000, 0x00073800, 0x00073000, 0x00071800,
+    0x00078800, 0x0007d800, 0x0007e000, 0x00079000,
+    0x0007c800, 0x0007e800, 0x00078000, 0x0007f800,
+    0x0007a000, 0x0007a800, 0x0007c000, 0x0007f000,
+    0x0007d000, 0x0007b800, 0x0007b000, 0x00079800,
+    0x00060800, 0x00065800, 0x00066000, 0x00061000,
+    0x00064800, 0x00066800, 0x00060000, 0x00067800,
+    0x00062000, 0x00062800, 0x00064000, 0x00067000,
+    0x00065000, 0x00063800, 0x00063000, 0x00061800,
+    0x00048800, 0x0004d800, 0x0004e000, 0x00049000,
+    0x0004c800, 0x0004e800, 0x00048000, 0x0004f800,
+    0x0004a000, 0x0004a800, 0x0004c000, 0x0004f000,
+    0x0004d000, 0x0004b800, 0x0004b000, 0x00049800,
+    0x00050800, 0x00055800, 0x00056000, 0x00051000,
+    0x00054800, 0x00056800, 0x00050000, 0x00057800,
+    0x00052000, 0x00052800, 0x00054000, 0x00057000,
+    0x00055000, 0x00053800, 0x00053000, 0x00051800,
+    0x00030800, 0x00035800, 0x00036000, 0x00031000,
+    0x00034800, 0x00036800, 0x00030000, 0x00037800,
+    0x00032000, 0x00032800, 0x00034000, 0x00037000,
+    0x00035000, 0x00033800, 0x00033000, 0x00031800,
+    0x00018800, 0x0001d800, 0x0001e000, 0x00019000,
+    0x0001c800, 0x0001e800, 0x00018000, 0x0001f800,
+    0x0001a000, 0x0001a800, 0x0001c000, 0x0001f000,
+    0x0001d000, 0x0001b800, 0x0001b000, 0x00019800,
+    /* 1 */
+    0x01c00000, 0x01900000, 0x01a80000, 0x01800000,
+    0x01a00000, 0x01c80000, 0x01f80000, 0x01d00000,
+    0x01980000, 0x01b80000, 0x01e00000, 0x01e80000,
+    0x01b00000, 0x01f00000, 0x01880000, 0x01d80000,
+    0x03400000, 0x03100000, 0x03280000, 0x03000000,
+    0x03200000, 0x03480000, 0x03780000, 0x03500000,
+    0x03180000, 0x03380000, 0x03600000, 0x03680000,
+    0x03300000, 0x03700000, 0x03080000, 0x03580000,
+    0x00400000, 0x00100000, 0x00280000, 0x00000000,
+    0x00200000, 0x00480000, 0x00780000, 0x00500000,
+    0x00180000, 0x00380000, 0x00600000, 0x00680000,
+    0x00300000, 0x00700000, 0x00080000, 0x00580000,
+    0x00c00000, 0x00900000, 0x00a80000, 0x00800000,
+    0x00a00000, 0x00c80000, 0x00f80000, 0x00d00000,
+    0x00980000, 0x00b80000, 0x00e00000, 0x00e80000,
+    0x00b00000, 0x00f00000, 0x00880000, 0x00d80000,
+    0x02c00000, 0x02900000, 0x02a80000, 0x02800000,
+    0x02a00000, 0x02c80000, 0x02f80000, 0x02d00000,
+    0x02980000, 0x02b80000, 0x02e00000, 0x02e80000,
+    0x02b00000, 0x02f00000, 0x02880000, 0x02d80000,
+    0x06c00000, 0x06900000, 0x06a80000, 0x06800000,
+    0x06a00000, 0x06c80000, 0x06f80000, 0x06d00000,
+    0x06980000, 0x06b80000, 0x06e00000, 0x06e80000,
+    0x06b00000, 0x06f00000, 0x06880000, 0x06d80000,
+    0x05400000, 0x05100000, 0x05280000, 0x05000000,
+    0x05200000, 0x05480000, 0x05780000, 0x05500000,
+    0x05180000, 0x05380000, 0x05600000, 0x05680000,
+    0x05300000, 0x05700000, 0x05080000, 0x05580000,
+    0x04400000, 0x04100000, 0x04280000, 0x04000000,
+    0x04200000, 0x04480000, 0x04780000, 0x04500000,
+    0x04180000, 0x04380000, 0x04600000, 0x04680000,
+    0x04300000, 0x04700000, 0x04080000, 0x04580000,
+    0x05c00000, 0x05900000, 0x05a80000, 0x05800000,
+    0x05a00000, 0x05c80000, 0x05f80000, 0x05d00000,
+    0x05980000, 0x05b80000, 0x05e00000, 0x05e80000,
+    0x05b00000, 0x05f00000, 0x05880000, 0x05d80000,
+    0x01400000, 0x01100000, 0x01280000, 0x01000000,
+    0x01200000, 0x01480000, 0x01780000, 0x01500000,
+    0x01180000, 0x01380000, 0x01600000, 0x01680000,
+    0x01300000, 0x01700000, 0x01080000, 0x01580000,
+    0x04c00000, 0x04900000, 0x04a80000, 0x04800000,
+    0x04a00000, 0x04c80000, 0x04f80000, 0x04d00000,
+    0x04980000, 0x04b80000, 0x04e00000, 0x04e80000,
+    0x04b00000, 0x04f00000, 0x04880000, 0x04d80000,
+    0x03c00000, 0x03900000, 0x03a80000, 0x03800000,
+    0x03a00000, 0x03c80000, 0x03f80000, 0x03d00000,
+    0x03980000, 0x03b80000, 0x03e00000, 0x03e80000,
+    0x03b00000, 0x03f00000, 0x03880000, 0x03d80000,
+    0x07400000, 0x07100000, 0x07280000, 0x07000000,
+    0x07200000, 0x07480000, 0x07780000, 0x07500000,
+    0x07180000, 0x07380000, 0x07600000, 0x07680000,
+    0x07300000, 0x07700000, 0x07080000, 0x07580000,
+    0x07c00000, 0x07900000, 0x07a80000, 0x07800000,
+    0x07a00000, 0x07c80000, 0x07f80000, 0x07d00000,
+    0x07980000, 0x07b80000, 0x07e00000, 0x07e80000,
+    0x07b00000, 0x07f00000, 0x07880000, 0x07d80000,
+    0x06400000, 0x06100000, 0x06280000, 0x06000000,
+    0x06200000, 0x06480000, 0x06780000, 0x06500000,
+    0x06180000, 0x06380000, 0x06600000, 0x06680000,
+    0x06300000, 0x06700000, 0x06080000, 0x06580000,
+    0x02400000, 0x02100000, 0x02280000, 0x02000000,
+    0x02200000, 0x02480000, 0x02780000, 0x02500000,
+    0x02180000, 0x02380000, 0x02600000, 0x02680000,
+    0x02300000, 0x02700000, 0x02080000, 0x02580000,
+    /* 2 */
+    0x40000006, 0x68000006, 0x58000006, 0x00000006,
+    0x20000006, 0x28000006, 0x08000006, 0x10000006,
+    0x48000006, 0x18000006, 0x60000006, 0x70000006,
+    0x30000006, 0x78000006, 0x50000006, 0x38000006,
+    0xc0000004, 0xe8000004, 0xd8000004, 0x80000004,
+    0xa0000004, 0xa8000004, 0x88000004, 0x90000004,
+    0xc8000004, 0x98000004, 0xe0000004, 0xf0000004,
+    0xb0000004, 0xf8000004, 0xd0000004, 0xb8000004,
+    0xc0000005, 0xe8000005, 0xd8000005, 0x80000005,
+    0xa0000005, 0xa8000005, 0x88000005, 0x90000005,
+    0xc8000005, 0x98000005, 0xe0000005, 0xf0000005,
+    0xb0000005, 0xf8000005, 0xd0000005, 0xb8000005,
+    0xc0000000, 0xe8000000, 0xd8000000, 0x80000000,
+    0xa0000000, 0xa8000000, 0x88000000, 0x90000000,
+    0xc8000000, 0x98000000, 0xe0000000, 0xf0000000,
+    0xb0000000, 0xf8000000, 0xd0000000, 0xb8000000,
+    0x40000004, 0x68000004, 0x58000004, 0x00000004,
+    0x20000004, 0x28000004, 0x08000004, 0x10000004,
+    0x48000004, 0x18000004, 0x60000004, 0x70000004,
+    0x30000004, 0x78000004, 0x50000004, 0x38000004,
+    0x40000007, 0x68000007, 0x58000007, 0x00000007,
+    0x20000007, 0x28000007, 0x08000007, 0x10000007,
+    0x48000007, 0x18000007, 0x60000007, 0x70000007,
+    0x30000007, 0x78000007, 0x50000007, 0x38000007,
+    0x40000001, 0x68000001, 0x58000001, 0x00000001,
+    0x20000001, 0x28000001, 0x08000001, 0x10000001,
+    0x48000001, 0x18000001, 0x60000001, 0x70000001,
+    0x30000001, 0x78000001, 0x50000001, 0x38000001,
+    0x40000002, 0x68000002, 0x58000002, 0x00000002,
+    0x20000002, 0x28000002, 0x08000002, 0x10000002,
+    0x48000002, 0x18000002, 0x60000002, 0x70000002,
+    0x30000002, 0x78000002, 0x50000002, 0x38000002,
+    0xc0000003, 0xe8000003, 0xd8000003, 0x80000003,
+    0xa0000003, 0xa8000003, 0x88000003, 0x90000003,
+    0xc8000003, 0x98000003, 0xe0000003, 0xf0000003,
+    0xb0000003, 0xf8000003, 0xd0000003, 0xb8000003,
+    0xc0000001, 0xe8000001, 0xd8000001, 0x80000001,
+    0xa0000001, 0xa8000001, 0x88000001, 0x90000001,
+    0xc8000001, 0x98000001, 0xe0000001, 0xf0000001,
+    0xb0000001, 0xf8000001, 0xd0000001, 0xb8000001,
+    0x40000003, 0x68000003, 0x58000003, 0x00000003,
+    0x20000003, 0x28000003, 0x08000003, 0x10000003,
+    0x48000003, 0x18000003, 0x60000003, 0x70000003,
+    0x30000003, 0x78000003, 0x50000003, 0x38000003,
+    0xc0000002, 0xe8000002, 0xd8000002, 0x80000002,
+    0xa0000002, 0xa8000002, 0x88000002, 0x90000002,
+    0xc8000002, 0x98000002, 0xe0000002, 0xf0000002,
+    0xb0000002, 0xf8000002, 0xd0000002, 0xb8000002,
+    0x40000005, 0x68000005, 0x58000005, 0x00000005,
+    0x20000005, 0x28000005, 0x08000005, 0x10000005,
+    0x48000005, 0x18000005, 0x60000005, 0x70000005,
+    0x30000005, 0x78000005, 0x50000005, 0x38000005,
+    0x40000000, 0x68000000, 0x58000000, 0x00000000,
+    0x20000000, 0x28000000, 0x08000000, 0x10000000,
+    0x48000000, 0x18000000, 0x60000000, 0x70000000,
+    0x30000000, 0x78000000, 0x50000000, 0x38000000,
+    0xc0000007, 0xe8000007, 0xd8000007, 0x80000007,
+    0xa0000007, 0xa8000007, 0x88000007, 0x90000007,
+    0xc8000007, 0x98000007, 0xe0000007, 0xf0000007,
+    0xb0000007, 0xf8000007, 0xd0000007, 0xb8000007,
+    0xc0000006, 0xe8000006, 0xd8000006, 0x80000006,
+    0xa0000006, 0xa8000006, 0x88000006, 0x90000006,
+    0xc8000006, 0x98000006, 0xe0000006, 0xf0000006,
+    0xb0000006, 0xf8000006, 0xd0000006, 0xb8000006,
+    /* 3 */
+    0x000003d0, 0x000003c8, 0x000003b0, 0x000003c0,
+    0x000003e8, 0x000003f0, 0x00000390, 0x00000380,
+    0x000003f8, 0x00000398, 0x000003a8, 0x000003d8,
+    0x000003a0, 0x00000388, 0x000003e0, 0x000003b8,
+    0x00000250, 0x00000248, 0x00000230, 0x00000240,
+    0x00000268, 0x00000270, 0x00000210, 0x00000200,
+    0x00000278, 0x00000218, 0x00000228, 0x00000258,
+    0x00000220, 0x00000208, 0x00000260, 0x00000238,
+    0x00000050, 0x00000048, 0x00000030, 0x00000040,
+    0x00000068, 0x00000070, 0x00000010, 0x00000000,
+    0x00000078, 0x00000018, 0x00000028, 0x00000058,
+    0x00000020, 0x00000008, 0x00000060, 0x00000038,
+    0x000002d0, 0x000002c8, 0x000002b0, 0x000002c0,
+    0x000002e8, 0x000002f0, 0x00000290, 0x00000280,
+    0x000002f8, 0x00000298, 0x000002a8, 0x000002d8,
+    0x000002a0, 0x00000288, 0x000002e0, 0x000002b8,
+    0x00000550, 0x00000548, 0x00000530, 0x00000540,
+    0x00000568, 0x00000570, 0x00000510, 0x00000500,
+    0x00000578, 0x00000518, 0x00000528, 0x00000558,
+    0x00000520, 0x00000508, 0x00000560, 0x00000538,
+    0x00000150, 0x00000148, 0x00000130, 0x00000140,
+    0x00000168, 0x00000170, 0x00000110, 0x00000100,
+    0x00000178, 0x00000118, 0x00000128, 0x00000158,
+    0x00000120, 0x00000108, 0x00000160, 0x00000138,
+    0x000007d0, 0x000007c8, 0x000007b0, 0x000007c0,
+    0x000007e8, 0x000007f0, 0x00000790, 0x00000780,
+    0x000007f8, 0x00000798, 0x000007a8, 0x000007d8,
+    0x000007a0, 0x00000788, 0x000007e0, 0x000007b8,
+    0x00000750, 0x00000748, 0x00000730, 0x00000740,
+    0x00000768, 0x00000770, 0x00000710, 0x00000700,
+    0x00000778, 0x00000718, 0x00000728, 0x00000758,
+    0x00000720, 0x00000708, 0x00000760, 0x00000738,
+    0x00000650, 0x00000648, 0x00000630, 0x00000640,
+    0x00000668, 0x00000670, 0x00000610, 0x00000600,
+    0x00000678, 0x00000618, 0x00000628, 0x00000658,
+    0x00000620, 0x00000608, 0x00000660, 0x00000638,
+    0x00000350, 0x00000348, 0x00000330, 0x00000340,
+    0x00000368, 0x00000370, 0x00000310, 0x00000300,
+    0x00000378, 0x00000318, 0x00000328, 0x00000358,
+    0x00000320, 0x00000308, 0x00000360, 0x00000338,
+    0x000000d0, 0x000000c8, 0x000000b0, 0x000000c0,
+    0x000000e8, 0x000000f0, 0x00000090, 0x00000080,
+    0x000000f8, 0x00000098, 0x000000a8, 0x000000d8,
+    0x000000a0, 0x00000088, 0x000000e0, 0x000000b8,
+    0x000005d0, 0x000005c8, 0x000005b0, 0x000005c0,
+    0x000005e8, 0x000005f0, 0x00000590, 0x00000580,
+    0x000005f8, 0x00000598, 0x000005a8, 0x000005d8,
+    0x000005a0, 0x00000588, 0x000005e0, 0x000005b8,
+    0x000006d0, 0x000006c8, 0x000006b0, 0x000006c0,
+    0x000006e8, 0x000006f0, 0x00000690, 0x00000680,
+    0x000006f8, 0x00000698, 0x000006a8, 0x000006d8,
+    0x000006a0, 0x00000688, 0x000006e0, 0x000006b8,
+    0x000004d0, 0x000004c8, 0x000004b0, 0x000004c0,
+    0x000004e8, 0x000004f0, 0x00000490, 0x00000480,
+    0x000004f8, 0x00000498, 0x000004a8, 0x000004d8,
+    0x000004a0, 0x00000488, 0x000004e0, 0x000004b8,
+    0x000001d0, 0x000001c8, 0x000001b0, 0x000001c0,
+    0x000001e8, 0x000001f0, 0x00000190, 0x00000180,
+    0x000001f8, 0x00000198, 0x000001a8, 0x000001d8,
+    0x000001a0, 0x00000188, 0x000001e0, 0x000001b8,
+    0x00000450, 0x00000448, 0x00000430, 0x00000440,
+    0x00000468, 0x00000470, 0x00000410, 0x00000400,
+    0x00000478, 0x00000418, 0x00000428, 0x00000458,
+    0x00000420, 0x00000408, 0x00000460, 0x00000438,
+  };
+
+static const u32 sbox_CryptoPro_D[4*256] =
+  {
+    /* 0 */
+    0x0005f800, 0x0005e000, 0x00059000, 0x0005d000,
+    0x0005b000, 0x0005a000, 0x0005a800, 0x00058000,
+    0x0005b800, 0x0005c800, 0x0005f000, 0x0005e800,
+    0x00058800, 0x0005d800, 0x0005c000, 0x00059800,
+    0x00037800, 0x00036000, 0x00031000, 0x00035000,
+    0x00033000, 0x00032000, 0x00032800, 0x00030000,
+    0x00033800, 0x00034800, 0x00037000, 0x00036800,
+    0x00030800, 0x00035800, 0x00034000, 0x00031800,
+    0x0001f800, 0x0001e000, 0x00019000, 0x0001d000,
+    0x0001b000, 0x0001a000, 0x0001a800, 0x00018000,
+    0x0001b800, 0x0001c800, 0x0001f000, 0x0001e800,
+    0x00018800, 0x0001d800, 0x0001c000, 0x00019800,
+    0x00027800, 0x00026000, 0x00021000, 0x00025000,
+    0x00023000, 0x00022000, 0x00022800, 0x00020000,
+    0x00023800, 0x00024800, 0x00027000, 0x00026800,
+    0x00020800, 0x00025800, 0x00024000, 0x00021800,
+    0x00067800, 0x00066000, 0x00061000, 0x00065000,
+    0x00063000, 0x00062000, 0x00062800, 0x00060000,
+    0x00063800, 0x00064800, 0x00067000, 0x00066800,
+    0x00060800, 0x00065800, 0x00064000, 0x00061800,
+    0x0007f800, 0x0007e000, 0x00079000, 0x0007d000,
+    0x0007b000, 0x0007a000, 0x0007a800, 0x00078000,
+    0x0007b800, 0x0007c800, 0x0007f000, 0x0007e800,
+    0x00078800, 0x0007d800, 0x0007c000, 0x00079800,
+    0x00077800, 0x00076000, 0x00071000, 0x00075000,
+    0x00073000, 0x00072000, 0x00072800, 0x00070000,
+    0x00073800, 0x00074800, 0x00077000, 0x00076800,
+    0x00070800, 0x00075800, 0x00074000, 0x00071800,
+    0x00017800, 0x00016000, 0x00011000, 0x00015000,
+    0x00013000, 0x00012000, 0x00012800, 0x00010000,
+    0x00013800, 0x00014800, 0x00017000, 0x00016800,
+    0x00010800, 0x00015800, 0x00014000, 0x00011800,
+    0x0003f800, 0x0003e000, 0x00039000, 0x0003d000,
+    0x0003b000, 0x0003a000, 0x0003a800, 0x00038000,
+    0x0003b800, 0x0003c800, 0x0003f000, 0x0003e800,
+    0x00038800, 0x0003d800, 0x0003c000, 0x00039800,
+    0x0006f800, 0x0006e000, 0x00069000, 0x0006d000,
+    0x0006b000, 0x0006a000, 0x0006a800, 0x00068000,
+    0x0006b800, 0x0006c800, 0x0006f000, 0x0006e800,
+    0x00068800, 0x0006d800, 0x0006c000, 0x00069800,
+    0x00047800, 0x00046000, 0x00041000, 0x00045000,
+    0x00043000, 0x00042000, 0x00042800, 0x00040000,
+    0x00043800, 0x00044800, 0x00047000, 0x00046800,
+    0x00040800, 0x00045800, 0x00044000, 0x00041800,
+    0x00007800, 0x00006000, 0x00001000, 0x00005000,
+    0x00003000, 0x00002000, 0x00002800, 0x00000000,
+    0x00003800, 0x00004800, 0x00007000, 0x00006800,
+    0x00000800, 0x00005800, 0x00004000, 0x00001800,
+    0x0002f800, 0x0002e000, 0x00029000, 0x0002d000,
+    0x0002b000, 0x0002a000, 0x0002a800, 0x00028000,
+    0x0002b800, 0x0002c800, 0x0002f000, 0x0002e800,
+    0x00028800, 0x0002d800, 0x0002c000, 0x00029800,
+    0x00057800, 0x00056000, 0x00051000, 0x00055000,
+    0x00053000, 0x00052000, 0x00052800, 0x00050000,
+    0x00053800, 0x00054800, 0x00057000, 0x00056800,
+    0x00050800, 0x00055800, 0x00054000, 0x00051800,
+    0x0004f800, 0x0004e000, 0x00049000, 0x0004d000,
+    0x0004b000, 0x0004a000, 0x0004a800, 0x00048000,
+    0x0004b800, 0x0004c800, 0x0004f000, 0x0004e800,
+    0x00048800, 0x0004d800, 0x0004c000, 0x00049800,
+    0x0000f800, 0x0000e000, 0x00009000, 0x0000d000,
+    0x0000b000, 0x0000a000, 0x0000a800, 0x00008000,
+    0x0000b800, 0x0000c800, 0x0000f000, 0x0000e800,
+    0x00008800, 0x0000d800, 0x0000c000, 0x00009800,
+    /* 1 */
+    0x00880000, 0x00e00000, 0x00d80000, 0x00800000,
+    0x00f80000, 0x00f00000, 0x00b00000, 0x00a80000,
+    0x00d00000, 0x00e80000, 0x00a00000, 0x00c00000,
+    0x00c80000, 0x00980000, 0x00b80000, 0x00900000,
+    0x02880000, 0x02e00000, 0x02d80000, 0x02800000,
+    0x02f80000, 0x02f00000, 0x02b00000, 0x02a80000,
+    0x02d00000, 0x02e80000, 0x02a00000, 0x02c00000,
+    0x02c80000, 0x02980000, 0x02b80000, 0x02900000,
+    0x07080000, 0x07600000, 0x07580000, 0x07000000,
+    0x07780000, 0x07700000, 0x07300000, 0x07280000,
+    0x07500000, 0x07680000, 0x07200000, 0x07400000,
+    0x07480000, 0x07180000, 0x07380000, 0x07100000,
+    0x06080000, 0x06600000, 0x06580000, 0x06000000,
+    0x06780000, 0x06700000, 0x06300000, 0x06280000,
+    0x06500000, 0x06680000, 0x06200000, 0x06400000,
+    0x06480000, 0x06180000, 0x06380000, 0x06100000,
+    0x05080000, 0x05600000, 0x05580000, 0x05000000,
+    0x05780000, 0x05700000, 0x05300000, 0x05280000,
+    0x05500000, 0x05680000, 0x05200000, 0x05400000,
+    0x05480000, 0x05180000, 0x05380000, 0x05100000,
+    0x03880000, 0x03e00000, 0x03d80000, 0x03800000,
+    0x03f80000, 0x03f00000, 0x03b00000, 0x03a80000,
+    0x03d00000, 0x03e80000, 0x03a00000, 0x03c00000,
+    0x03c80000, 0x03980000, 0x03b80000, 0x03900000,
+    0x00080000, 0x00600000, 0x00580000, 0x00000000,
+    0x00780000, 0x00700000, 0x00300000, 0x00280000,
+    0x00500000, 0x00680000, 0x00200000, 0x00400000,
+    0x00480000, 0x00180000, 0x00380000, 0x00100000,
+    0x06880000, 0x06e00000, 0x06d80000, 0x06800000,
+    0x06f80000, 0x06f00000, 0x06b00000, 0x06a80000,
+    0x06d00000, 0x06e80000, 0x06a00000, 0x06c00000,
+    0x06c80000, 0x06980000, 0x06b80000, 0x06900000,
+    0x03080000, 0x03600000, 0x03580000, 0x03000000,
+    0x03780000, 0x03700000, 0x03300000, 0x03280000,
+    0x03500000, 0x03680000, 0x03200000, 0x03400000,
+    0x03480000, 0x03180000, 0x03380000, 0x03100000,
+    0x01080000, 0x01600000, 0x01580000, 0x01000000,
+    0x01780000, 0x01700000, 0x01300000, 0x01280000,
+    0x01500000, 0x01680000, 0x01200000, 0x01400000,
+    0x01480000, 0x01180000, 0x01380000, 0x01100000,
+    0x05880000, 0x05e00000, 0x05d80000, 0x05800000,
+    0x05f80000, 0x05f00000, 0x05b00000, 0x05a80000,
+    0x05d00000, 0x05e80000, 0x05a00000, 0x05c00000,
+    0x05c80000, 0x05980000, 0x05b80000, 0x05900000,
+    0x02080000, 0x02600000, 0x02580000, 0x02000000,
+    0x02780000, 0x02700000, 0x02300000, 0x02280000,
+    0x02500000, 0x02680000, 0x02200000, 0x02400000,
+    0x02480000, 0x02180000, 0x02380000, 0x02100000,
+    0x04880000, 0x04e00000, 0x04d80000, 0x04800000,
+    0x04f80000, 0x04f00000, 0x04b00000, 0x04a80000,
+    0x04d00000, 0x04e80000, 0x04a00000, 0x04c00000,
+    0x04c80000, 0x04980000, 0x04b80000, 0x04900000,
+    0x01880000, 0x01e00000, 0x01d80000, 0x01800000,
+    0x01f80000, 0x01f00000, 0x01b00000, 0x01a80000,
+    0x01d00000, 0x01e80000, 0x01a00000, 0x01c00000,
+    0x01c80000, 0x01980000, 0x01b80000, 0x01900000,
+    0x07880000, 0x07e00000, 0x07d80000, 0x07800000,
+    0x07f80000, 0x07f00000, 0x07b00000, 0x07a80000,
+    0x07d00000, 0x07e80000, 0x07a00000, 0x07c00000,
+    0x07c80000, 0x07980000, 0x07b80000, 0x07900000,
+    0x04080000, 0x04600000, 0x04580000, 0x04000000,
+    0x04780000, 0x04700000, 0x04300000, 0x04280000,
+    0x04500000, 0x04680000, 0x04200000, 0x04400000,
+    0x04480000, 0x04180000, 0x04380000, 0x04100000,
+    /* 2 */
+    0x00000004, 0x60000004, 0x40000004, 0x48000004,
+    0x68000004, 0x10000004, 0x50000004, 0x58000004,
+    0x38000004, 0x18000004, 0x30000004, 0x28000004,
+    0x20000004, 0x70000004, 0x78000004, 0x08000004,
+    0x00000000, 0x60000000, 0x40000000, 0x48000000,
+    0x68000000, 0x10000000, 0x50000000, 0x58000000,
+    0x38000000, 0x18000000, 0x30000000, 0x28000000,
+    0x20000000, 0x70000000, 0x78000000, 0x08000000,
+    0x80000007, 0xe0000007, 0xc0000007, 0xc8000007,
+    0xe8000007, 0x90000007, 0xd0000007, 0xd8000007,
+    0xb8000007, 0x98000007, 0xb0000007, 0xa8000007,
+    0xa0000007, 0xf0000007, 0xf8000007, 0x88000007,
+    0x80000001, 0xe0000001, 0xc0000001, 0xc8000001,
+    0xe8000001, 0x90000001, 0xd0000001, 0xd8000001,
+    0xb8000001, 0x98000001, 0xb0000001, 0xa8000001,
+    0xa0000001, 0xf0000001, 0xf8000001, 0x88000001,
+    0x00000001, 0x60000001, 0x40000001, 0x48000001,
+    0x68000001, 0x10000001, 0x50000001, 0x58000001,
+    0x38000001, 0x18000001, 0x30000001, 0x28000001,
+    0x20000001, 0x70000001, 0x78000001, 0x08000001,
+    0x80000002, 0xe0000002, 0xc0000002, 0xc8000002,
+    0xe8000002, 0x90000002, 0xd0000002, 0xd8000002,
+    0xb8000002, 0x98000002, 0xb0000002, 0xa8000002,
+    0xa0000002, 0xf0000002, 0xf8000002, 0x88000002,
+    0x00000007, 0x60000007, 0x40000007, 0x48000007,
+    0x68000007, 0x10000007, 0x50000007, 0x58000007,
+    0x38000007, 0x18000007, 0x30000007, 0x28000007,
+    0x20000007, 0x70000007, 0x78000007, 0x08000007,
+    0x80000005, 0xe0000005, 0xc0000005, 0xc8000005,
+    0xe8000005, 0x90000005, 0xd0000005, 0xd8000005,
+    0xb8000005, 0x98000005, 0xb0000005, 0xa8000005,
+    0xa0000005, 0xf0000005, 0xf8000005, 0x88000005,
+    0x80000000, 0xe0000000, 0xc0000000, 0xc8000000,
+    0xe8000000, 0x90000000, 0xd0000000, 0xd8000000,
+    0xb8000000, 0x98000000, 0xb0000000, 0xa8000000,
+    0xa0000000, 0xf0000000, 0xf8000000, 0x88000000,
+    0x00000005, 0x60000005, 0x40000005, 0x48000005,
+    0x68000005, 0x10000005, 0x50000005, 0x58000005,
+    0x38000005, 0x18000005, 0x30000005, 0x28000005,
+    0x20000005, 0x70000005, 0x78000005, 0x08000005,
+    0x00000002, 0x60000002, 0x40000002, 0x48000002,
+    0x68000002, 0x10000002, 0x50000002, 0x58000002,
+    0x38000002, 0x18000002, 0x30000002, 0x28000002,
+    0x20000002, 0x70000002, 0x78000002, 0x08000002,
+    0x80000003, 0xe0000003, 0xc0000003, 0xc8000003,
+    0xe8000003, 0x90000003, 0xd0000003, 0xd8000003,
+    0xb8000003, 0x98000003, 0xb0000003, 0xa8000003,
+    0xa0000003, 0xf0000003, 0xf8000003, 0x88000003,
+    0x00000006, 0x60000006, 0x40000006, 0x48000006,
+    0x68000006, 0x10000006, 0x50000006, 0x58000006,
+    0x38000006, 0x18000006, 0x30000006, 0x28000006,
+    0x20000006, 0x70000006, 0x78000006, 0x08000006,
+    0x80000004, 0xe0000004, 0xc0000004, 0xc8000004,
+    0xe8000004, 0x90000004, 0xd0000004, 0xd8000004,
+    0xb8000004, 0x98000004, 0xb0000004, 0xa8000004,
+    0xa0000004, 0xf0000004, 0xf8000004, 0x88000004,
+    0x80000006, 0xe0000006, 0xc0000006, 0xc8000006,
+    0xe8000006, 0x90000006, 0xd0000006, 0xd8000006,
+    0xb8000006, 0x98000006, 0xb0000006, 0xa8000006,
+    0xa0000006, 0xf0000006, 0xf8000006, 0x88000006,
+    0x00000003, 0x60000003, 0x40000003, 0x48000003,
+    0x68000003, 0x10000003, 0x50000003, 0x58000003,
+    0x38000003, 0x18000003, 0x30000003, 0x28000003,
+    0x20000003, 0x70000003, 0x78000003, 0x08000003,
+    /* 3 */
+    0x00000098, 0x00000080, 0x000000b0, 0x000000f8,
+    0x00000088, 0x000000f0, 0x000000c8, 0x00000090,
+    0x000000e8, 0x000000c0, 0x000000e0, 0x000000a0,
+    0x000000d8, 0x000000d0, 0x000000a8, 0x000000b8,
+    0x00000518, 0x00000500, 0x00000530, 0x00000578,
+    0x00000508, 0x00000570, 0x00000548, 0x00000510,
+    0x00000568, 0x00000540, 0x00000560, 0x00000520,
+    0x00000558, 0x00000550, 0x00000528, 0x00000538,
+    0x00000318, 0x00000300, 0x00000330, 0x00000378,
+    0x00000308, 0x00000370, 0x00000348, 0x00000310,
+    0x00000368, 0x00000340, 0x00000360, 0x00000320,
+    0x00000358, 0x00000350, 0x00000328, 0x00000338,
+    0x00000418, 0x00000400, 0x00000430, 0x00000478,
+    0x00000408, 0x00000470, 0x00000448, 0x00000410,
+    0x00000468, 0x00000440, 0x00000460, 0x00000420,
+    0x00000458, 0x00000450, 0x00000428, 0x00000438,
+    0x00000798, 0x00000780, 0x000007b0, 0x000007f8,
+    0x00000788, 0x000007f0, 0x000007c8, 0x00000790,
+    0x000007e8, 0x000007c0, 0x000007e0, 0x000007a0,
+    0x000007d8, 0x000007d0, 0x000007a8, 0x000007b8,
+    0x00000598, 0x00000580, 0x000005b0, 0x000005f8,
+    0x00000588, 0x000005f0, 0x000005c8, 0x00000590,
+    0x000005e8, 0x000005c0, 0x000005e0, 0x000005a0,
+    0x000005d8, 0x000005d0, 0x000005a8, 0x000005b8,
+    0x00000018, 0x00000000, 0x00000030, 0x00000078,
+    0x00000008, 0x00000070, 0x00000048, 0x00000010,
+    0x00000068, 0x00000040, 0x00000060, 0x00000020,
+    0x00000058, 0x00000050, 0x00000028, 0x00000038,
+    0x00000218, 0x00000200, 0x00000230, 0x00000278,
+    0x00000208, 0x00000270, 0x00000248, 0x00000210,
+    0x00000268, 0x00000240, 0x00000260, 0x00000220,
+    0x00000258, 0x00000250, 0x00000228, 0x00000238,
+    0x00000618, 0x00000600, 0x00000630, 0x00000678,
+    0x00000608, 0x00000670, 0x00000648, 0x00000610,
+    0x00000668, 0x00000640, 0x00000660, 0x00000620,
+    0x00000658, 0x00000650, 0x00000628, 0x00000638,
+    0x00000198, 0x00000180, 0x000001b0, 0x000001f8,
+    0x00000188, 0x000001f0, 0x000001c8, 0x00000190,
+    0x000001e8, 0x000001c0, 0x000001e0, 0x000001a0,
+    0x000001d8, 0x000001d0, 0x000001a8, 0x000001b8,
+    0x00000298, 0x00000280, 0x000002b0, 0x000002f8,
+    0x00000288, 0x000002f0, 0x000002c8, 0x00000290,
+    0x000002e8, 0x000002c0, 0x000002e0, 0x000002a0,
+    0x000002d8, 0x000002d0, 0x000002a8, 0x000002b8,
+    0x00000498, 0x00000480, 0x000004b0, 0x000004f8,
+    0x00000488, 0x000004f0, 0x000004c8, 0x00000490,
+    0x000004e8, 0x000004c0, 0x000004e0, 0x000004a0,
+    0x000004d8, 0x000004d0, 0x000004a8, 0x000004b8,
+    0x00000398, 0x00000380, 0x000003b0, 0x000003f8,
+    0x00000388, 0x000003f0, 0x000003c8, 0x00000390,
+    0x000003e8, 0x000003c0, 0x000003e0, 0x000003a0,
+    0x000003d8, 0x000003d0, 0x000003a8, 0x000003b8,
+    0x00000698, 0x00000680, 0x000006b0, 0x000006f8,
+    0x00000688, 0x000006f0, 0x000006c8, 0x00000690,
+    0x000006e8, 0x000006c0, 0x000006e0, 0x000006a0,
+    0x000006d8, 0x000006d0, 0x000006a8, 0x000006b8,
+    0x00000118, 0x00000100, 0x00000130, 0x00000178,
+    0x00000108, 0x00000170, 0x00000148, 0x00000110,
+    0x00000168, 0x00000140, 0x00000160, 0x00000120,
+    0x00000158, 0x00000150, 0x00000128, 0x00000138,
+    0x00000718, 0x00000700, 0x00000730, 0x00000778,
+    0x00000708, 0x00000770, 0x00000748, 0x00000710,
+    0x00000768, 0x00000740, 0x00000760, 0x00000720,
+    0x00000758, 0x00000750, 0x00000728, 0x00000738,
+  };
+
+static const u32 sbox_TC26_Z[4*256] =
+  {
+    /* 0 */
+    0x00036000, 0x00032000, 0x00033000, 0x00031000,
+    0x00035000, 0x00032800, 0x00035800, 0x00034800,
+    0x00037000, 0x00034000, 0x00036800, 0x00033800,
+    0x00030000, 0x00031800, 0x00037800, 0x00030800,
+    0x00046000, 0x00042000, 0x00043000, 0x00041000,
+    0x00045000, 0x00042800, 0x00045800, 0x00044800,
+    0x00047000, 0x00044000, 0x00046800, 0x00043800,
+    0x00040000, 0x00041800, 0x00047800, 0x00040800,
+    0x00016000, 0x00012000, 0x00013000, 0x00011000,
+    0x00015000, 0x00012800, 0x00015800, 0x00014800,
+    0x00017000, 0x00014000, 0x00016800, 0x00013800,
+    0x00010000, 0x00011800, 0x00017800, 0x00010800,
+    0x0001e000, 0x0001a000, 0x0001b000, 0x00019000,
+    0x0001d000, 0x0001a800, 0x0001d800, 0x0001c800,
+    0x0001f000, 0x0001c000, 0x0001e800, 0x0001b800,
+    0x00018000, 0x00019800, 0x0001f800, 0x00018800,
+    0x0004e000, 0x0004a000, 0x0004b000, 0x00049000,
+    0x0004d000, 0x0004a800, 0x0004d800, 0x0004c800,
+    0x0004f000, 0x0004c000, 0x0004e800, 0x0004b800,
+    0x00048000, 0x00049800, 0x0004f800, 0x00048800,
+    0x00056000, 0x00052000, 0x00053000, 0x00051000,
+    0x00055000, 0x00052800, 0x00055800, 0x00054800,
+    0x00057000, 0x00054000, 0x00056800, 0x00053800,
+    0x00050000, 0x00051800, 0x00057800, 0x00050800,
+    0x0002e000, 0x0002a000, 0x0002b000, 0x00029000,
+    0x0002d000, 0x0002a800, 0x0002d800, 0x0002c800,
+    0x0002f000, 0x0002c000, 0x0002e800, 0x0002b800,
+    0x00028000, 0x00029800, 0x0002f800, 0x00028800,
+    0x00066000, 0x00062000, 0x00063000, 0x00061000,
+    0x00065000, 0x00062800, 0x00065800, 0x00064800,
+    0x00067000, 0x00064000, 0x00066800, 0x00063800,
+    0x00060000, 0x00061800, 0x00067800, 0x00060800,
+    0x0000e000, 0x0000a000, 0x0000b000, 0x00009000,
+    0x0000d000, 0x0000a800, 0x0000d800, 0x0000c800,
+    0x0000f000, 0x0000c000, 0x0000e800, 0x0000b800,
+    0x00008000, 0x00009800, 0x0000f800, 0x00008800,
+    0x00076000, 0x00072000, 0x00073000, 0x00071000,
+    0x00075000, 0x00072800, 0x00075800, 0x00074800,
+    0x00077000, 0x00074000, 0x00076800, 0x00073800,
+    0x00070000, 0x00071800, 0x00077800, 0x00070800,
+    0x00026000, 0x00022000, 0x00023000, 0x00021000,
+    0x00025000, 0x00022800, 0x00025800, 0x00024800,
+    0x00027000, 0x00024000, 0x00026800, 0x00023800,
+    0x00020000, 0x00021800, 0x00027800, 0x00020800,
+    0x0003e000, 0x0003a000, 0x0003b000, 0x00039000,
+    0x0003d000, 0x0003a800, 0x0003d800, 0x0003c800,
+    0x0003f000, 0x0003c000, 0x0003e800, 0x0003b800,
+    0x00038000, 0x00039800, 0x0003f800, 0x00038800,
+    0x0005e000, 0x0005a000, 0x0005b000, 0x00059000,
+    0x0005d000, 0x0005a800, 0x0005d800, 0x0005c800,
+    0x0005f000, 0x0005c000, 0x0005e800, 0x0005b800,
+    0x00058000, 0x00059800, 0x0005f800, 0x00058800,
+    0x0006e000, 0x0006a000, 0x0006b000, 0x00069000,
+    0x0006d000, 0x0006a800, 0x0006d800, 0x0006c800,
+    0x0006f000, 0x0006c000, 0x0006e800, 0x0006b800,
+    0x00068000, 0x00069800, 0x0006f800, 0x00068800,
+    0x00006000, 0x00002000, 0x00003000, 0x00001000,
+    0x00005000, 0x00002800, 0x00005800, 0x00004800,
+    0x00007000, 0x00004000, 0x00006800, 0x00003800,
+    0x00000000, 0x00001800, 0x00007800, 0x00000800,
+    0x0007e000, 0x0007a000, 0x0007b000, 0x00079000,
+    0x0007d000, 0x0007a800, 0x0007d800, 0x0007c800,
+    0x0007f000, 0x0007c000, 0x0007e800, 0x0007b800,
+    0x00078000, 0x00079800, 0x0007f800, 0x00078800,
+    /* 1 */
+    0x06580000, 0x06180000, 0x06280000, 0x06400000,
+    0x06100000, 0x06780000, 0x06500000, 0x06680000,
+    0x06700000, 0x06080000, 0x06380000, 0x06200000,
+    0x06600000, 0x06480000, 0x06300000, 0x06000000,
+    0x04580000, 0x04180000, 0x04280000, 0x04400000,
+    0x04100000, 0x04780000, 0x04500000, 0x04680000,
+    0x04700000, 0x04080000, 0x04380000, 0x04200000,
+    0x04600000, 0x04480000, 0x04300000, 0x04000000,
+    0x01580000, 0x01180000, 0x01280000, 0x01400000,
+    0x01100000, 0x01780000, 0x01500000, 0x01680000,
+    0x01700000, 0x01080000, 0x01380000, 0x01200000,
+    0x01600000, 0x01480000, 0x01300000, 0x01000000,
+    0x00d80000, 0x00980000, 0x00a80000, 0x00c00000,
+    0x00900000, 0x00f80000, 0x00d00000, 0x00e80000,
+    0x00f00000, 0x00880000, 0x00b80000, 0x00a00000,
+    0x00e00000, 0x00c80000, 0x00b00000, 0x00800000,
+    0x06d80000, 0x06980000, 0x06a80000, 0x06c00000,
+    0x06900000, 0x06f80000, 0x06d00000, 0x06e80000,
+    0x06f00000, 0x06880000, 0x06b80000, 0x06a00000,
+    0x06e00000, 0x06c80000, 0x06b00000, 0x06800000,
+    0x02580000, 0x02180000, 0x02280000, 0x02400000,
+    0x02100000, 0x02780000, 0x02500000, 0x02680000,
+    0x02700000, 0x02080000, 0x02380000, 0x02200000,
+    0x02600000, 0x02480000, 0x02300000, 0x02000000,
+    0x07d80000, 0x07980000, 0x07a80000, 0x07c00000,
+    0x07900000, 0x07f80000, 0x07d00000, 0x07e80000,
+    0x07f00000, 0x07880000, 0x07b80000, 0x07a00000,
+    0x07e00000, 0x07c80000, 0x07b00000, 0x07800000,
+    0x03580000, 0x03180000, 0x03280000, 0x03400000,
+    0x03100000, 0x03780000, 0x03500000, 0x03680000,
+    0x03700000, 0x03080000, 0x03380000, 0x03200000,
+    0x03600000, 0x03480000, 0x03300000, 0x03000000,
+    0x03d80000, 0x03980000, 0x03a80000, 0x03c00000,
+    0x03900000, 0x03f80000, 0x03d00000, 0x03e80000,
+    0x03f00000, 0x03880000, 0x03b80000, 0x03a00000,
+    0x03e00000, 0x03c80000, 0x03b00000, 0x03800000,
+    0x00580000, 0x00180000, 0x00280000, 0x00400000,
+    0x00100000, 0x00780000, 0x00500000, 0x00680000,
+    0x00700000, 0x00080000, 0x00380000, 0x00200000,
+    0x00600000, 0x00480000, 0x00300000, 0x00000000,
+    0x05580000, 0x05180000, 0x05280000, 0x05400000,
+    0x05100000, 0x05780000, 0x05500000, 0x05680000,
+    0x05700000, 0x05080000, 0x05380000, 0x05200000,
+    0x05600000, 0x05480000, 0x05300000, 0x05000000,
+    0x02d80000, 0x02980000, 0x02a80000, 0x02c00000,
+    0x02900000, 0x02f80000, 0x02d00000, 0x02e80000,
+    0x02f00000, 0x02880000, 0x02b80000, 0x02a00000,
+    0x02e00000, 0x02c80000, 0x02b00000, 0x02800000,
+    0x01d80000, 0x01980000, 0x01a80000, 0x01c00000,
+    0x01900000, 0x01f80000, 0x01d00000, 0x01e80000,
+    0x01f00000, 0x01880000, 0x01b80000, 0x01a00000,
+    0x01e00000, 0x01c80000, 0x01b00000, 0x01800000,
+    0x07580000, 0x07180000, 0x07280000, 0x07400000,
+    0x07100000, 0x07780000, 0x07500000, 0x07680000,
+    0x07700000, 0x07080000, 0x07380000, 0x07200000,
+    0x07600000, 0x07480000, 0x07300000, 0x07000000,
+    0x04d80000, 0x04980000, 0x04a80000, 0x04c00000,
+    0x04900000, 0x04f80000, 0x04d00000, 0x04e80000,
+    0x04f00000, 0x04880000, 0x04b80000, 0x04a00000,
+    0x04e00000, 0x04c80000, 0x04b00000, 0x04800000,
+    0x05d80000, 0x05980000, 0x05a80000, 0x05c00000,
+    0x05900000, 0x05f80000, 0x05d00000, 0x05e80000,
+    0x05f00000, 0x05880000, 0x05b80000, 0x05a00000,
+    0x05e00000, 0x05c80000, 0x05b00000, 0x05800000,
+    /* 2 */
+    0xb8000002, 0xf8000002, 0xa8000002, 0xd0000002,
+    0xc0000002, 0x88000002, 0xb0000002, 0xe8000002,
+    0x80000002, 0xc8000002, 0x98000002, 0xf0000002,
+    0xd8000002, 0xa0000002, 0x90000002, 0xe0000002,
+    0xb8000006, 0xf8000006, 0xa8000006, 0xd0000006,
+    0xc0000006, 0x88000006, 0xb0000006, 0xe8000006,
+    0x80000006, 0xc8000006, 0x98000006, 0xf0000006,
+    0xd8000006, 0xa0000006, 0x90000006, 0xe0000006,
+    0xb8000007, 0xf8000007, 0xa8000007, 0xd0000007,
+    0xc0000007, 0x88000007, 0xb0000007, 0xe8000007,
+    0x80000007, 0xc8000007, 0x98000007, 0xf0000007,
+    0xd8000007, 0xa0000007, 0x90000007, 0xe0000007,
+    0x38000003, 0x78000003, 0x28000003, 0x50000003,
+    0x40000003, 0x08000003, 0x30000003, 0x68000003,
+    0x00000003, 0x48000003, 0x18000003, 0x70000003,
+    0x58000003, 0x20000003, 0x10000003, 0x60000003,
+    0xb8000004, 0xf8000004, 0xa8000004, 0xd0000004,
+    0xc0000004, 0x88000004, 0xb0000004, 0xe8000004,
+    0x80000004, 0xc8000004, 0x98000004, 0xf0000004,
+    0xd8000004, 0xa0000004, 0x90000004, 0xe0000004,
+    0x38000001, 0x78000001, 0x28000001, 0x50000001,
+    0x40000001, 0x08000001, 0x30000001, 0x68000001,
+    0x00000001, 0x48000001, 0x18000001, 0x70000001,
+    0x58000001, 0x20000001, 0x10000001, 0x60000001,
+    0x38000006, 0x78000006, 0x28000006, 0x50000006,
+    0x40000006, 0x08000006, 0x30000006, 0x68000006,
+    0x00000006, 0x48000006, 0x18000006, 0x70000006,
+    0x58000006, 0x20000006, 0x10000006, 0x60000006,
+    0x38000005, 0x78000005, 0x28000005, 0x50000005,
+    0x40000005, 0x08000005, 0x30000005, 0x68000005,
+    0x00000005, 0x48000005, 0x18000005, 0x70000005,
+    0x58000005, 0x20000005, 0x10000005, 0x60000005,
+    0xb8000005, 0xf8000005, 0xa8000005, 0xd0000005,
+    0xc0000005, 0x88000005, 0xb0000005, 0xe8000005,
+    0x80000005, 0xc8000005, 0x98000005, 0xf0000005,
+    0xd8000005, 0xa0000005, 0x90000005, 0xe0000005,
+    0xb8000003, 0xf8000003, 0xa8000003, 0xd0000003,
+    0xc0000003, 0x88000003, 0xb0000003, 0xe8000003,
+    0x80000003, 0xc8000003, 0x98000003, 0xf0000003,
+    0xd8000003, 0xa0000003, 0x90000003, 0xe0000003,
+    0x38000004, 0x78000004, 0x28000004, 0x50000004,
+    0x40000004, 0x08000004, 0x30000004, 0x68000004,
+    0x00000004, 0x48000004, 0x18000004, 0x70000004,
+    0x58000004, 0x20000004, 0x10000004, 0x60000004,
+    0xb8000000, 0xf8000000, 0xa8000000, 0xd0000000,
+    0xc0000000, 0x88000000, 0xb0000000, 0xe8000000,
+    0x80000000, 0xc8000000, 0x98000000, 0xf0000000,
+    0xd8000000, 0xa0000000, 0x90000000, 0xe0000000,
+    0x38000002, 0x78000002, 0x28000002, 0x50000002,
+    0x40000002, 0x08000002, 0x30000002, 0x68000002,
+    0x00000002, 0x48000002, 0x18000002, 0x70000002,
+    0x58000002, 0x20000002, 0x10000002, 0x60000002,
+    0xb8000001, 0xf8000001, 0xa8000001, 0xd0000001,
+    0xc0000001, 0x88000001, 0xb0000001, 0xe8000001,
+    0x80000001, 0xc8000001, 0x98000001, 0xf0000001,
+    0xd8000001, 0xa0000001, 0x90000001, 0xe0000001,
+    0x38000007, 0x78000007, 0x28000007, 0x50000007,
+    0x40000007, 0x08000007, 0x30000007, 0x68000007,
+    0x00000007, 0x48000007, 0x18000007, 0x70000007,
+    0x58000007, 0x20000007, 0x10000007, 0x60000007,
+    0x38000000, 0x78000000, 0x28000000, 0x50000000,
+    0x40000000, 0x08000000, 0x30000000, 0x68000000,
+    0x00000000, 0x48000000, 0x18000000, 0x70000000,
+    0x58000000, 0x20000000, 0x10000000, 0x60000000,
+    /* 3 */
+    0x000000c0, 0x000000f0, 0x00000090, 0x000000a8,
+    0x000000b0, 0x000000c8, 0x00000088, 0x000000e0,
+    0x000000f8, 0x000000a0, 0x000000d8, 0x00000080,
+    0x000000e8, 0x000000d0, 0x00000098, 0x000000b8,
+    0x000003c0, 0x000003f0, 0x00000390, 0x000003a8,
+    0x000003b0, 0x000003c8, 0x00000388, 0x000003e0,
+    0x000003f8, 0x000003a0, 0x000003d8, 0x00000380,
+    0x000003e8, 0x000003d0, 0x00000398, 0x000003b8,
+    0x00000740, 0x00000770, 0x00000710, 0x00000728,
+    0x00000730, 0x00000748, 0x00000708, 0x00000760,
+    0x00000778, 0x00000720, 0x00000758, 0x00000700,
+    0x00000768, 0x00000750, 0x00000718, 0x00000738,
+    0x000006c0, 0x000006f0, 0x00000690, 0x000006a8,
+    0x000006b0, 0x000006c8, 0x00000688, 0x000006e0,
+    0x000006f8, 0x000006a0, 0x000006d8, 0x00000680,
+    0x000006e8, 0x000006d0, 0x00000698, 0x000006b8,
+    0x00000040, 0x00000070, 0x00000010, 0x00000028,
+    0x00000030, 0x00000048, 0x00000008, 0x00000060,
+    0x00000078, 0x00000020, 0x00000058, 0x00000000,
+    0x00000068, 0x00000050, 0x00000018, 0x00000038,
+    0x000002c0, 0x000002f0, 0x00000290, 0x000002a8,
+    0x000002b0, 0x000002c8, 0x00000288, 0x000002e0,
+    0x000002f8, 0x000002a0, 0x000002d8, 0x00000280,
+    0x000002e8, 0x000002d0, 0x00000298, 0x000002b8,
+    0x00000440, 0x00000470, 0x00000410, 0x00000428,
+    0x00000430, 0x00000448, 0x00000408, 0x00000460,
+    0x00000478, 0x00000420, 0x00000458, 0x00000400,
+    0x00000468, 0x00000450, 0x00000418, 0x00000438,
+    0x000001c0, 0x000001f0, 0x00000190, 0x000001a8,
+    0x000001b0, 0x000001c8, 0x00000188, 0x000001e0,
+    0x000001f8, 0x000001a0, 0x000001d8, 0x00000180,
+    0x000001e8, 0x000001d0, 0x00000198, 0x000001b8,
+    0x00000240, 0x00000270, 0x00000210, 0x00000228,
+    0x00000230, 0x00000248, 0x00000208, 0x00000260,
+    0x00000278, 0x00000220, 0x00000258, 0x00000200,
+    0x00000268, 0x00000250, 0x00000218, 0x00000238,
+    0x000007c0, 0x000007f0, 0x00000790, 0x000007a8,
+    0x000007b0, 0x000007c8, 0x00000788, 0x000007e0,
+    0x000007f8, 0x000007a0, 0x000007d8, 0x00000780,
+    0x000007e8, 0x000007d0, 0x00000798, 0x000007b8,
+    0x00000540, 0x00000570, 0x00000510, 0x00000528,
+    0x00000530, 0x00000548, 0x00000508, 0x00000560,
+    0x00000578, 0x00000520, 0x00000558, 0x00000500,
+    0x00000568, 0x00000550, 0x00000518, 0x00000538,
+    0x00000340, 0x00000370, 0x00000310, 0x00000328,
+    0x00000330, 0x00000348, 0x00000308, 0x00000360,
+    0x00000378, 0x00000320, 0x00000358, 0x00000300,
+    0x00000368, 0x00000350, 0x00000318, 0x00000338,
+    0x000004c0, 0x000004f0, 0x00000490, 0x000004a8,
+    0x000004b0, 0x000004c8, 0x00000488, 0x000004e0,
+    0x000004f8, 0x000004a0, 0x000004d8, 0x00000480,
+    0x000004e8, 0x000004d0, 0x00000498, 0x000004b8,
+    0x00000640, 0x00000670, 0x00000610, 0x00000628,
+    0x00000630, 0x00000648, 0x00000608, 0x00000660,
+    0x00000678, 0x00000620, 0x00000658, 0x00000600,
+    0x00000668, 0x00000650, 0x00000618, 0x00000638,
+    0x000005c0, 0x000005f0, 0x00000590, 0x000005a8,
+    0x000005b0, 0x000005c8, 0x00000588, 0x000005e0,
+    0x000005f8, 0x000005a0, 0x000005d8, 0x00000580,
+    0x000005e8, 0x000005d0, 0x00000598, 0x000005b8,
+    0x00000140, 0x00000170, 0x00000110, 0x00000128,
+    0x00000130, 0x00000148, 0x00000108, 0x00000160,
+    0x00000178, 0x00000120, 0x00000158, 0x00000100,
+    0x00000168, 0x00000150, 0x00000118, 0x00000138,
+  };
+
+static struct
+{
+  const char *oid;
+  const u32 *sbox;
+  const int keymeshing;
+} gost_oid_map[] = {
+  { "1.2.643.2.2.30.0", sbox_test_3411, 0 },
+  { "1.2.643.2.2.30.1", sbox_CryptoPro_3411, 0 },
+  { "1.2.643.2.2.31.0", sbox_Test_89, 0 },
+  { "1.2.643.2.2.31.1", sbox_CryptoPro_A, 1 },
+  { "1.2.643.2.2.31.2", sbox_CryptoPro_B, 1 },
+  { "1.2.643.2.2.31.3", sbox_CryptoPro_C, 1 },
+  { "1.2.643.2.2.31.4", sbox_CryptoPro_D, 1 },
+  { "1.2.643.7.1.2.5.1.1", sbox_TC26_Z, 1 },
+  { NULL, NULL, 0 }
+};
diff --git a/grub-core/lib/libgcrypt/cipher/gost.h 
b/grub-core/lib/libgcrypt/cipher/gost.h
new file mode 100644
index 000000000..53a405050
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/gost.h
@@ -0,0 +1,34 @@
+/* gost.h - GOST 28147-89 implementation
+ * Copyright (C) 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _GCRY_GOST_H
+#define _GCRY_GOST_H
+
+typedef struct {
+  u32 key[8];
+  const u32 *sbox;
+  unsigned int mesh_counter;
+  unsigned int mesh_limit;
+} GOST28147_context;
+
+/* This is a simple interface that will be used by GOST R 34.11-94 */
+unsigned int _gcry_gost_enc_data (const u32 *key,
+    u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro);
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/gost28147.c 
b/grub-core/lib/libgcrypt/cipher/gost28147.c
new file mode 100644
index 000000000..f094d5bab
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/gost28147.c
@@ -0,0 +1,553 @@
+/* gost28147.c - GOST 28147-89 implementation for Libgcrypt
+ * Copyright (C) 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* GOST 28147-89 defines several modes of encryption:
+ * - ECB which should be used only for key transfer
+ * - CFB mode
+ * - OFB-like mode with additional transformation on keystream
+ *   RFC 5830 names this 'counter encryption' mode
+ *   Original GOST text uses the term 'gammirovanie'
+ * - MAC mode ('imitovstavka')
+ *
+ * This implementation handles ECB and CFB modes via usual libgcrypt handling.
+ * OFB-like modes are unsupported.
+ */
+
+#include <config.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "mac-internal.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+
+#include "gost.h"
+#include "gost-sb.h"
+
+static void
+gost_do_set_sbox (GOST28147_context *ctx, unsigned int index)
+{
+  ctx->sbox = gost_oid_map[index].sbox;
+  ctx->mesh_limit = gost_oid_map[index].keymeshing ? 1024 : 0;
+}
+
+static gcry_err_code_t
+gost_setkey (void *c, const byte *key, unsigned keylen,
+             cipher_bulk_ops_t *bulk_ops)
+{
+  int i;
+  GOST28147_context *ctx = c;
+
+  (void)bulk_ops;
+
+  if (keylen != 256 / 8)
+    return GPG_ERR_INV_KEYLEN;
+
+  if (!ctx->sbox)
+    gost_do_set_sbox (ctx, 0);
+
+  for (i = 0; i < 8; i++)
+    {
+      ctx->key[i] = buf_get_le32(&key[4*i]);
+    }
+
+  ctx->mesh_counter = 0;
+
+  return GPG_ERR_NO_ERROR;
+}
+
+static inline u32
+gost_val (u32 subkey, u32 cm1, const u32 *sbox)
+{
+  cm1 += subkey;
+  cm1 = sbox[0*256 + ((cm1 >>  0) & 0xff)] |
+        sbox[1*256 + ((cm1 >>  8) & 0xff)] |
+        sbox[2*256 + ((cm1 >> 16) & 0xff)] |
+        sbox[3*256 + ((cm1 >> 24) & 0xff)];
+  return cm1;
+}
+
+static unsigned int
+_gost_encrypt_data (const u32 *sbox, const u32 *key, u32 *o1, u32 *o2, u32 n1, 
u32 n2)
+{
+  n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+  n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+  n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+  n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+  n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+  n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+  n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+  n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+  n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+  n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+  n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+  n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+  n2 ^= gost_val (key[7], n1, sbox); n1 ^= gost_val (key[6], n2, sbox);
+  n2 ^= gost_val (key[5], n1, sbox); n1 ^= gost_val (key[4], n2, sbox);
+  n2 ^= gost_val (key[3], n1, sbox); n1 ^= gost_val (key[2], n2, sbox);
+  n2 ^= gost_val (key[1], n1, sbox); n1 ^= gost_val (key[0], n2, sbox);
+
+  *o1 = n2;
+  *o2 = n1;
+
+  return /* burn_stack */ 4*sizeof(void*) /* func call */ +
+                          3*sizeof(void*) /* stack */ +
+                          4*sizeof(void*) /* gost_val call */;
+}
+
+static unsigned int
+gost_encrypt_block (void *c, byte *outbuf, const byte *inbuf)
+{
+  GOST28147_context *ctx = c;
+  u32 n1, n2;
+  unsigned int burn;
+
+  n1 = buf_get_le32 (inbuf);
+  n2 = buf_get_le32 (inbuf+4);
+
+  burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2);
+
+  buf_put_le32 (outbuf+0, n1);
+  buf_put_le32 (outbuf+4, n2);
+
+  return /* burn_stack */ burn + 6*sizeof(void*) /* func call */;
+}
+
+unsigned int _gcry_gost_enc_data (const u32 *key,
+    u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro)
+{
+  const u32 *sbox;
+  if (cryptopro)
+    sbox = sbox_CryptoPro_3411;
+  else
+    sbox = sbox_test_3411;
+  return _gost_encrypt_data (sbox, key, o1, o2, n1, n2) + 7 * sizeof(void *);
+}
+
+static unsigned int
+gost_decrypt_block (void *c, byte *outbuf, const byte *inbuf)
+{
+  GOST28147_context *ctx = c;
+  u32 n1, n2;
+  const u32 *sbox = ctx->sbox;
+
+  n1 = buf_get_le32 (inbuf);
+  n2 = buf_get_le32 (inbuf+4);
+
+  n2 ^= gost_val (ctx->key[0], n1, sbox); n1 ^= gost_val (ctx->key[1], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[2], n1, sbox); n1 ^= gost_val (ctx->key[3], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[4], n1, sbox); n1 ^= gost_val (ctx->key[5], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[6], n1, sbox); n1 ^= gost_val (ctx->key[7], n2, 
sbox);
+
+  n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, 
sbox);
+
+  n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, 
sbox);
+
+  n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, 
sbox);
+  n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, 
sbox);
+
+  buf_put_le32 (outbuf+0, n2);
+  buf_put_le32 (outbuf+4, n1);
+
+  return /* burn_stack */ 4*sizeof(void*) /* func call */ +
+                          3*sizeof(void*) /* stack */ +
+                          4*sizeof(void*) /* gost_val call */;
+}
+
+static gpg_err_code_t
+gost_set_sbox (GOST28147_context *ctx, const char *oid)
+{
+  int i;
+
+  for (i = 0; gost_oid_map[i].oid; i++)
+    {
+      if (!strcmp(gost_oid_map[i].oid, oid))
+        {
+          gost_do_set_sbox (ctx, i);
+          return 0;
+        }
+    }
+  return GPG_ERR_VALUE_NOT_FOUND;
+}
+
+static gpg_err_code_t
+gost_set_extra_info (void *c, int what, const void *buffer, size_t buflen)
+{
+  GOST28147_context *ctx = c;
+  gpg_err_code_t ec = 0;
+
+  (void)buffer;
+  (void)buflen;
+
+  switch (what)
+    {
+    case GCRYCTL_SET_SBOX:
+      ec = gost_set_sbox (ctx, buffer);
+      break;
+
+    default:
+      ec = GPG_ERR_INV_OP;
+      break;
+    }
+  return ec;
+}
+
+static const byte CryptoProKeyMeshingKey[] = {
+    0x69, 0x00, 0x72, 0x22, 0x64, 0xC9, 0x04, 0x23,
+    0x8D, 0x3A, 0xDB, 0x96, 0x46, 0xE9, 0x2A, 0xC4,
+    0x18, 0xFE, 0xAC, 0x94, 0x00, 0xED, 0x07, 0x12,
+    0xC0, 0x86, 0xDC, 0xC2, 0xEF, 0x4C, 0xA9, 0x2B
+};
+
+/* Implements key meshing algorithm by modifing ctx and returning new IV.
+   Thanks to Dmitry Belyavskiy. */
+static void
+cryptopro_key_meshing (GOST28147_context *ctx)
+{
+    unsigned char newkey[32];
+    unsigned int i;
+
+    /* "Decrypt" the static keymeshing key */
+    for (i = 0; i < 4; i++)
+      {
+       gost_decrypt_block (ctx, newkey + i*8, CryptoProKeyMeshingKey + i*8);
+      }
+
+    /* Set new key */
+    for (i = 0; i < 8; i++)
+      {
+       ctx->key[i] = buf_get_le32(&newkey[4*i]);
+      }
+
+    ctx->mesh_counter = 0;
+}
+
+static unsigned int
+gost_encrypt_block_mesh (void *c, byte *outbuf, const byte *inbuf)
+{
+  GOST28147_context *ctx = c;
+  u32 n1, n2;
+  unsigned int burn;
+
+  n1 = buf_get_le32 (inbuf);
+  n2 = buf_get_le32 (inbuf+4);
+
+  if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit))
+    {
+      cryptopro_key_meshing (ctx);
+      /* Yes, encrypt twice: once for KeyMeshing procedure per RFC 4357,
+       * once for block encryption */
+      _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2);
+    }
+
+  burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2);
+
+  ctx->mesh_counter += 8;
+
+  buf_put_le32 (outbuf+0, n1);
+  buf_put_le32 (outbuf+4, n2);
+
+  return /* burn_stack */ burn + 6*sizeof(void*) /* func call */;
+}
+
+static const gcry_cipher_oid_spec_t oids_gost28147_mesh[] =
+  {
+    { "1.2.643.2.2.21", GCRY_CIPHER_MODE_CFB },
+    /* { "1.2.643.2.2.31.0", GCRY_CIPHER_MODE_CNTGOST }, */
+    { "1.2.643.2.2.31.1", GCRY_CIPHER_MODE_CFB },
+    { "1.2.643.2.2.31.2", GCRY_CIPHER_MODE_CFB },
+    { "1.2.643.2.2.31.3", GCRY_CIPHER_MODE_CFB },
+    { "1.2.643.2.2.31.4", GCRY_CIPHER_MODE_CFB },
+    { NULL }
+  };
+
+gcry_cipher_spec_t _gcry_cipher_spec_gost28147 =
+  {
+    GCRY_CIPHER_GOST28147, {0, 0},
+    "GOST28147", NULL, NULL, 8, 256,
+    sizeof (GOST28147_context),
+    gost_setkey,
+    gost_encrypt_block,
+    gost_decrypt_block,
+    NULL, NULL, NULL, gost_set_extra_info,
+  };
+
+/* Meshing is used only for CFB, so no need to have separate
+ * gost_decrypt_block_mesh.
+ * Moreover key meshing is specified as encrypting the block (IV). Decrypting
+ * it afterwards would be meaningless. */
+gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh =
+  {
+    GCRY_CIPHER_GOST28147_MESH, {0, 0},
+    "GOST28147_MESH", NULL, oids_gost28147_mesh, 8, 256,
+    sizeof (GOST28147_context),
+    gost_setkey,
+    gost_encrypt_block_mesh,
+    gost_decrypt_block,
+    NULL, NULL, NULL, gost_set_extra_info,
+  };
+
+static gcry_err_code_t
+gost_imit_open (gcry_mac_hd_t h)
+{
+  memset(&h->u.imit, 0, sizeof(h->u.imit));
+  return 0;
+}
+
+static void
+gost_imit_close (gcry_mac_hd_t h)
+{
+  (void) h;
+}
+
+static gcry_err_code_t
+gost_imit_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+  int i;
+
+  if (keylen != 256 / 8)
+    return GPG_ERR_INV_KEYLEN;
+
+  if (!h->u.imit.ctx.sbox)
+    h->u.imit.ctx.sbox = sbox_CryptoPro_A;
+
+  for (i = 0; i < 8; i++)
+    {
+      h->u.imit.ctx.key[i] = buf_get_le32(&key[4*i]);
+    }
+
+  return 0;
+}
+
+static gcry_err_code_t
+gost_imit_setiv (gcry_mac_hd_t h,
+                const unsigned char *iv,
+                size_t ivlen)
+{
+  if (ivlen != 8)
+    return GPG_ERR_INV_LENGTH;
+
+  h->u.imit.n1 = buf_get_le32 (iv + 0);
+  h->u.imit.n2 = buf_get_le32 (iv + 4);
+
+  return 0;
+}
+
+static gcry_err_code_t
+gost_imit_reset (gcry_mac_hd_t h)
+{
+  h->u.imit.n1 = h->u.imit.n2 = 0;
+  h->u.imit.unused = 0;
+  return 0;
+}
+
+static unsigned int
+_gost_imit_block (const u32 *sbox, const u32 *key, u32 *o1, u32 *o2, u32 n1, 
u32 n2)
+{
+  n1 ^= *o1;
+  n2 ^= *o2;
+
+  n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+  n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+  n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+  n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+  n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox);
+  n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox);
+  n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox);
+  n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox);
+
+  *o1 = n1;
+  *o2 = n2;
+
+  return /* burn_stack */ 4*sizeof(void*) /* func call */ +
+                          3*sizeof(void*) /* stack */ +
+                          4*sizeof(void*) /* gost_val call */;
+}
+
+static inline unsigned int
+gost_imit_block (GOST28147_context *ctx, u32 *n1, u32 *n2, const unsigned char 
*buf)
+{
+  if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit))
+    cryptopro_key_meshing (ctx);
+
+  return _gost_imit_block (ctx->sbox, ctx->key,
+                          n1, n2,
+                          buf_get_le32 (buf+0),
+                          buf_get_le32 (buf+4));
+}
+
+static gcry_err_code_t
+gost_imit_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  const int blocksize = 8;
+  unsigned int burn = 0;
+  if (!buflen || !buf)
+    return GPG_ERR_NO_ERROR;
+
+  if (h->u.imit.unused)
+    {
+      for (; buflen && h->u.imit.unused < blocksize; buflen --)
+        h->u.imit.lastiv[h->u.imit.unused++] = *buf++;
+
+      if (h->u.imit.unused < blocksize)
+        return GPG_ERR_NO_ERROR;
+
+      h->u.imit.count ++;
+      burn = gost_imit_block (&h->u.imit.ctx,
+                             &h->u.imit.n1, &h->u.imit.n2,
+                             h->u.imit.lastiv);
+
+      h->u.imit.unused = 0;
+    }
+
+  while (buflen >= blocksize)
+    {
+      h->u.imit.count ++;
+      burn = gost_imit_block (&h->u.imit.ctx,
+                             &h->u.imit.n1, &h->u.imit.n2,
+                             buf);
+      buf += blocksize;
+      buflen -= blocksize;
+    }
+
+  for (; buflen; buflen--)
+    h->u.imit.lastiv[h->u.imit.unused++] = *buf++;
+
+  _gcry_burn_stack (burn);
+
+  return GPG_ERR_NO_ERROR;
+}
+
+static void
+gost_imit_finish (gcry_mac_hd_t h)
+{
+  static const unsigned char zero[8] = {0};
+
+  /* Fill till full block */
+  if (h->u.imit.unused)
+    gost_imit_write(h, zero, 8 - h->u.imit.unused);
+
+  if (h->u.imit.count == 1)
+    gost_imit_write(h, zero, 8);
+}
+
+static gcry_err_code_t
+gost_imit_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+  unsigned int dlen = 8;
+  unsigned char digest[8];
+
+  gost_imit_finish (h);
+
+  buf_put_le32 (digest+0, h->u.imit.n1);
+  buf_put_le32 (digest+4, h->u.imit.n2);
+
+  if (*outlen <= dlen)
+    buf_cpy (outbuf, digest, *outlen);
+  else
+    {
+      buf_cpy (outbuf, digest, dlen);
+      *outlen = dlen;
+    }
+  return 0;
+}
+
+static gcry_err_code_t
+gost_imit_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  unsigned char tbuf[8];
+
+  gost_imit_finish (h);
+
+  buf_put_le32 (tbuf+0, h->u.imit.n1);
+  buf_put_le32 (tbuf+4, h->u.imit.n2);
+
+  return buf_eq_const(tbuf, buf, buflen) ?
+             GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM;
+}
+
+static unsigned int
+gost_imit_get_maclen (int algo)
+{
+  (void) algo;
+  return 4; /* or 8 */
+}
+
+
+static unsigned int
+gost_imit_get_keylen (int algo)
+{
+  (void) algo;
+  return 256 / 8;
+}
+
+static gpg_err_code_t
+gost_imit_set_extra_info (gcry_mac_hd_t hd, int what, const void *buffer, 
size_t buflen)
+{
+  gpg_err_code_t ec = 0;
+
+  (void)buffer;
+  (void)buflen;
+
+  switch (what)
+    {
+    case GCRYCTL_SET_SBOX:
+      ec = gost_set_sbox (&hd->u.imit.ctx, buffer);
+      break;
+
+    default:
+      ec = GPG_ERR_INV_OP;
+      break;
+    }
+  return ec;
+}
+
+
+static gcry_mac_spec_ops_t gost_imit_ops = {
+  gost_imit_open,
+  gost_imit_close,
+  gost_imit_setkey,
+  gost_imit_setiv,
+  gost_imit_reset,
+  gost_imit_write,
+  gost_imit_read,
+  gost_imit_verify,
+  gost_imit_get_maclen,
+  gost_imit_get_keylen,
+  gost_imit_set_extra_info,
+  NULL
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_gost28147_imit =
+  {
+    GCRY_MAC_GOST28147_IMIT, {0, 0}, "GOST28147_IMIT",
+    &gost_imit_ops
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/gostr3411-94.c 
b/grub-core/lib/libgcrypt/cipher/gostr3411-94.c
new file mode 100644
index 000000000..93de83b49
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/gostr3411-94.c
@@ -0,0 +1,383 @@
+/* gostr3411-94.c - GOST R 34.11-94 hash function
+ * Copyright (C) 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+#include "gost.h"
+
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
+typedef struct {
+  gcry_md_block_ctx_t bctx;
+  union {
+    u32 h[8];
+    byte result[32];
+  };
+  u32 sigma[8];
+  u32 len;
+  int cryptopro;
+} GOSTR3411_CONTEXT;
+
+static unsigned int
+transform (void *c, const unsigned char *data, size_t nblks);
+
+static void
+gost3411_init (void *context, unsigned int flags)
+{
+  GOSTR3411_CONTEXT *hd = context;
+
+  (void)flags;
+
+  memset (hd->h, 0, 32);
+  memset (hd->sigma, 0, 32);
+
+  hd->bctx.nblocks = 0;
+  hd->bctx.count = 0;
+  hd->bctx.blocksize_shift = _gcry_ctz(32);
+  hd->bctx.bwrite = transform;
+  hd->cryptopro = 0;
+}
+
+static void
+gost3411_cp_init (void *context, unsigned int flags)
+{
+  GOSTR3411_CONTEXT *hd = context;
+  gost3411_init (context, flags);
+  hd->cryptopro = 1;
+}
+
+static void
+do_p (u32 *p, u32 *u, u32 *v)
+{
+  int k;
+  u32 t[8];
+
+  for (k = 0; k < 8; k++)
+    t[k] = u[k] ^ v[k];
+
+  k = 0;
+  p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+           ((t[2] >> (8*k)) & 0xff) << 8 |
+           ((t[4] >> (8*k)) & 0xff) << 16 |
+           ((t[6] >> (8*k)) & 0xff) << 24;
+  p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+           ((t[3] >> (8*k)) & 0xff) << 8 |
+           ((t[5] >> (8*k)) & 0xff) << 16 |
+           ((t[7] >> (8*k)) & 0xff) << 24;
+
+  k = 1;
+  p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+           ((t[2] >> (8*k)) & 0xff) << 8 |
+           ((t[4] >> (8*k)) & 0xff) << 16 |
+           ((t[6] >> (8*k)) & 0xff) << 24;
+  p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+           ((t[3] >> (8*k)) & 0xff) << 8 |
+           ((t[5] >> (8*k)) & 0xff) << 16 |
+           ((t[7] >> (8*k)) & 0xff) << 24;
+
+  k = 2;
+  p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+           ((t[2] >> (8*k)) & 0xff) << 8 |
+           ((t[4] >> (8*k)) & 0xff) << 16 |
+           ((t[6] >> (8*k)) & 0xff) << 24;
+  p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+           ((t[3] >> (8*k)) & 0xff) << 8 |
+           ((t[5] >> (8*k)) & 0xff) << 16 |
+           ((t[7] >> (8*k)) & 0xff) << 24;
+
+  k = 3;
+  p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 |
+           ((t[2] >> (8*k)) & 0xff) << 8 |
+           ((t[4] >> (8*k)) & 0xff) << 16 |
+           ((t[6] >> (8*k)) & 0xff) << 24;
+  p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 |
+           ((t[3] >> (8*k)) & 0xff) << 8 |
+           ((t[5] >> (8*k)) & 0xff) << 16 |
+           ((t[7] >> (8*k)) & 0xff) << 24;
+}
+
+static void
+do_a (u32 *u)
+{
+  u32 t[2];
+  int i;
+  memcpy(t, u, 2*4);
+  for (i = 0; i < 6; i++)
+    u[i] = u[i+2];
+  u[6] = u[0] ^ t[0];
+  u[7] = u[1] ^ t[1];
+}
+/* apply do_a twice: 1 2 3 4 -> 3 4 1^2 2^3 */
+static void
+do_a2 (u32 *u)
+{
+  u32 t[4];
+  int i;
+  memcpy (t, u, 16);
+  memcpy (u, u + 4, 16);
+  for (i = 0; i < 2; i++)
+    {
+      u[4+i] = t[i] ^ t[i + 2];
+      u[6+i] = u[i] ^ t[i + 2];
+    }
+}
+
+static void
+do_apply_c2 (u32 *u)
+{
+  u[ 0] ^= 0xff00ff00;
+  u[ 1] ^= 0xff00ff00;
+  u[ 2] ^= 0x00ff00ff;
+  u[ 3] ^= 0x00ff00ff;
+  u[ 4] ^= 0x00ffff00;
+  u[ 5] ^= 0xff0000ff;
+  u[ 6] ^= 0x000000ff;
+  u[ 7] ^= 0xff00ffff;
+}
+
+#define do_chi_step12(e) \
+  e[6] ^= ((e[6] >> 16) ^ e[7] ^ (e[7] >> 16) ^ e[4] ^ (e[5] >>16)) & 0xffff;
+
+#define do_chi_step13(e) \
+  e[6] ^= ((e[7] ^ (e[7] >> 16) ^ e[0] ^ (e[4] >> 16) ^ e[6]) & 0xffff) << 16;
+
+#define do_chi_doublestep(e, i) \
+  e[i] ^= (e[i] >> 16) ^ (e[(i+1)%8] << 16) ^ e[(i+1)%8] ^ (e[(i+1)%8] >> 16) 
^ (e[(i+2)%8] << 16) ^ e[(i+6)%8] ^ (e[(i+7)%8] >> 16); \
+  e[i] ^= (e[i] << 16);
+
+static void
+do_chi_submix12 (u32 *e, u32 *x)
+{
+  e[6] ^= x[0];
+  e[7] ^= x[1];
+  e[0] ^= x[2];
+  e[1] ^= x[3];
+  e[2] ^= x[4];
+  e[3] ^= x[5];
+  e[4] ^= x[6];
+  e[5] ^= x[7];
+}
+
+static void
+do_chi_submix13 (u32 *e, u32 *x)
+{
+  e[6] ^= (x[0] << 16) | (x[7] >> 16);
+  e[7] ^= (x[1] << 16) | (x[0] >> 16);
+  e[0] ^= (x[2] << 16) | (x[1] >> 16);
+  e[1] ^= (x[3] << 16) | (x[2] >> 16);
+  e[2] ^= (x[4] << 16) | (x[3] >> 16);
+  e[3] ^= (x[5] << 16) | (x[4] >> 16);
+  e[4] ^= (x[6] << 16) | (x[5] >> 16);
+  e[5] ^= (x[7] << 16) | (x[6] >> 16);
+}
+
+static void
+do_add (u32 *s, u32 *a)
+{
+  u32 carry = 0;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    {
+      u32 op = carry + a[i];
+      s[i] += op;
+      carry = (a[i] > op) || (op > s[i]);
+    }
+}
+
+static unsigned int
+do_hash_step (GOSTR3411_CONTEXT *hd, u32 *h, u32 *m)
+{
+  u32 u[8], v[8];
+  u32 s[8];
+  u32 k[8];
+  unsigned int burn;
+  int i;
+
+  memcpy (u, h, 32);
+  memcpy (v, m, 32);
+
+  for (i = 0; i < 4; i++) {
+    do_p (k, u, v);
+
+    burn = _gcry_gost_enc_data (k, &s[2*i], &s[2*i+1], h[2*i], h[2*i+1], 
hd->cryptopro);
+
+    do_a (u);
+    if (i == 1)
+      do_apply_c2 (u);
+    do_a2 (v);
+  }
+
+  for (i = 0; i < 5; i++)
+    {
+      do_chi_doublestep (s, 0);
+      do_chi_doublestep (s, 1);
+      do_chi_doublestep (s, 2);
+      do_chi_doublestep (s, 3);
+      do_chi_doublestep (s, 4);
+      /* That is in total 12 + 1 + 61 = 74 = 16 * 4 + 10 rounds */
+      if (i == 4)
+        break;
+      do_chi_doublestep (s, 5);
+      if (i == 0)
+        do_chi_submix12(s, m);
+      do_chi_step12 (s);
+      if (i == 0)
+        do_chi_submix13(s, h);
+      do_chi_step13 (s);
+      do_chi_doublestep (s, 7);
+    }
+
+  memcpy (h, s+5, 12);
+  memcpy (h+3, s, 20);
+
+  return /* burn_stack */ 4 * sizeof(void*) /* func call (ret addr + args) */ +
+                          4 * 32 + 2 * sizeof(int) /* stack */ +
+                          max(burn /* _gcry_gost_enc_one */,
+                              sizeof(void*) * 2 /* do_a2 call */ +
+                              16 + sizeof(int) /* do_a2 stack */ );
+}
+
+static unsigned int
+transform_blk (void *ctx, const unsigned char *data)
+{
+  GOSTR3411_CONTEXT *hd = ctx;
+  u32 m[8];
+  unsigned int burn;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    m[i] = buf_get_le32(data + i*4);
+  burn = do_hash_step (hd, hd->h, m);
+  do_add (hd->sigma, m);
+
+  return /* burn_stack */ burn + 3 * sizeof(void*) + 32 + 2 * sizeof(void*);
+}
+
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+  unsigned int burn;
+
+  do
+    {
+      burn = transform_blk (c, data);
+      data += 32;
+    }
+  while (--nblks);
+
+  return burn;
+}
+
+
+/*
+   The routine finally terminates the computation and returns the
+   digest.  The handle is prepared for a new cycle, but adding bytes
+   to the handle will the destroy the returned buffer.  Returns: 32
+   bytes with the message the digest.  */
+static void
+gost3411_final (void *context)
+{
+  GOSTR3411_CONTEXT *hd = context;
+  size_t padlen = 0;
+  u32 l[8];
+  int i;
+  MD_NBLOCKS_TYPE nblocks;
+
+  if (hd->bctx.count > 0)
+    {
+      padlen = 32 - hd->bctx.count;
+      memset (hd->bctx.buf + hd->bctx.count, 0, padlen);
+      hd->bctx.count += padlen;
+      _gcry_md_block_write (hd, NULL, 0); /* flush */;
+    }
+
+  if (hd->bctx.count != 0)
+    return; /* Something went wrong */
+
+  memset (l, 0, 32);
+
+  nblocks = hd->bctx.nblocks;
+  if (padlen)
+    {
+      nblocks --;
+      l[0] = 256 - padlen * 8;
+    }
+  l[0] |= nblocks << 8;
+  nblocks >>= 24;
+
+  for (i = 1; i < 8 && nblocks != 0; i++)
+    {
+      l[i] = nblocks;
+      nblocks >>= 24;
+    }
+
+  do_hash_step (hd, hd->h, l);
+  do_hash_step (hd, hd->h, hd->sigma);
+  for (i = 0; i < 8; i++)
+    hd->h[i] = le_bswap32(hd->h[i]);
+}
+
+static byte *
+gost3411_read (void *context)
+{
+  GOSTR3411_CONTEXT *hd = context;
+
+  return hd->result;
+}
+
+static const unsigned char asn[6] = /* Object ID is 1.2.643.2.2.3 */
+  { 0x2a, 0x85, 0x03, 0x02, 0x02, 0x03 };
+
+static const gcry_md_oid_spec_t oid_spec_gostr3411[] =
+  {
+    /* iso.member-body.ru.rans.cryptopro.3 (gostR3411-94-with-gostR3410-2001) 
*/
+    { "1.2.643.2.2.3" },
+    /* iso.member-body.ru.rans.cryptopro.9 (gostR3411-94) */
+    { "1.2.643.2.2.9" },
+    {NULL},
+  };
+
+const gcry_md_spec_t _gcry_digest_spec_gost3411_94 =
+  {
+    GCRY_MD_GOSTR3411_94, {0, 0},
+    "GOSTR3411_94", NULL, 0, NULL, 32,
+    gost3411_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL,
+    NULL,
+    sizeof (GOSTR3411_CONTEXT)
+  };
+const gcry_md_spec_t _gcry_digest_spec_gost3411_cp =
+  {
+    GCRY_MD_GOSTR3411_CP, {0, 0},
+    "GOSTR3411_CP", asn, DIM (asn), oid_spec_gostr3411, 32,
+    gost3411_cp_init, _gcry_md_block_write, gost3411_final, gost3411_read, 
NULL,
+    NULL,
+    sizeof (GOSTR3411_CONTEXT)
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/hash-common.c 
b/grub-core/lib/libgcrypt/cipher/hash-common.c
index 8c413bcba..ed2d7cacd 100644
--- a/grub-core/lib/libgcrypt/cipher/hash-common.c
+++ b/grub-core/lib/libgcrypt/cipher/hash-common.c
@@ -26,6 +26,7 @@
 #endif
 
 #include "g10lib.h"
+#include "bufhelp.h"
 #include "hash-common.h"
 
 
@@ -49,8 +50,12 @@ _gcry_hash_selftest_check_one (int algo,
   gcry_error_t err = 0;
   gcry_md_hd_t hd;
   unsigned char *digest;
+  char aaa[1000];
+  int xof = 0;
 
-  if (_gcry_md_get_algo_dlen (algo) != expectlen)
+  if (_gcry_md_get_algo_dlen (algo) == 0)
+    xof = 1;
+  else if (_gcry_md_get_algo_dlen (algo) != expectlen)
     return "digest size does not match expected size";
 
   err = _gcry_md_open (&hd, algo, 0);
@@ -65,7 +70,6 @@ _gcry_hash_selftest_check_one (int algo,
 
     case 1: /* Hash one million times an "a". */
       {
-        char aaa[1000];
         int i;
 
         /* Write in odd size chunks so that we test the buffering.  */
@@ -81,13 +85,109 @@ _gcry_hash_selftest_check_one (int algo,
 
   if (!result)
     {
-      digest = _gcry_md_read (hd, algo);
-
-      if ( memcmp (digest, expect, expectlen) )
-        result = "digest mismatch";
+      if (!xof)
+       {
+         digest = _gcry_md_read (hd, algo);
+
+         if ( memcmp (digest, expect, expectlen) )
+           result = "digest mismatch";
+       }
+      else
+       {
+         gcry_assert(expectlen <= sizeof(aaa));
+
+         err = _gcry_md_extract (hd, algo, aaa, expectlen);
+         if (err)
+           result = "error extracting output from XOF";
+         else if ( memcmp (aaa, expect, expectlen) )
+           result = "digest mismatch";
+       }
     }
 
   _gcry_md_close (hd);
 
   return result;
 }
+
+
+/* Common function to write a chunk of data to the transform function
+   of a hash algorithm.  Note that the use of the term "block" does
+   not imply a fixed size block.  Note that we explicitly allow to use
+   this function after the context has been finalized; the result does
+   not have any meaning but writing after finalize is sometimes
+   helpful to mitigate timing attacks. */
+void
+_gcry_md_block_write (void *context, const void *inbuf_arg, size_t inlen)
+{
+  const unsigned char *inbuf = inbuf_arg;
+  gcry_md_block_ctx_t *hd = context;
+  unsigned int stack_burn = 0;
+  unsigned int nburn;
+  const unsigned int blocksize_shift = hd->blocksize_shift;
+  const unsigned int blocksize = 1 << blocksize_shift;
+  size_t inblocks;
+  size_t copylen;
+
+  if (sizeof(hd->buf) < blocksize)
+    BUG();
+
+  if (!hd->bwrite)
+    return;
+
+  if (hd->count > blocksize)
+    {
+      /* This happens only when gcry_md_write is called after final.
+       * Writing after final is used for mitigating timing attacks. */
+      hd->count = 0;
+    }
+
+  while (hd->count)
+    {
+      if (hd->count == blocksize)  /* Flush the buffer. */
+       {
+         nburn = hd->bwrite (hd, hd->buf, 1);
+         stack_burn = nburn > stack_burn ? nburn : stack_burn;
+         hd->count = 0;
+         if (!++hd->nblocks)
+           hd->nblocks_high++;
+       }
+      else
+       {
+         copylen = inlen;
+         if (copylen > blocksize - hd->count)
+           copylen = blocksize - hd->count;
+
+         if (copylen == 0)
+           break;
+
+         buf_cpy (&hd->buf[hd->count], inbuf, copylen);
+         hd->count += copylen;
+         inbuf += copylen;
+         inlen -= copylen;
+       }
+    }
+
+  if (inlen == 0)
+    return;
+
+  if (inlen >= blocksize)
+    {
+      inblocks = inlen >> blocksize_shift;
+      nburn = hd->bwrite (hd, inbuf, inblocks);
+      stack_burn = nburn > stack_burn ? nburn : stack_burn;
+      hd->count = 0;
+      hd->nblocks_high += (hd->nblocks + inblocks < inblocks);
+      hd->nblocks += inblocks;
+      inlen -= inblocks << blocksize_shift;
+      inbuf += inblocks << blocksize_shift;
+    }
+
+  if (inlen)
+    {
+      buf_cpy (hd->buf, inbuf, inlen);
+      hd->count = inlen;
+    }
+
+  if (stack_burn > 0)
+    _gcry_burn_stack (stack_burn);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/hash-common.h 
b/grub-core/lib/libgcrypt/cipher/hash-common.h
index fdebef42a..561e77a7e 100644
--- a/grub-core/lib/libgcrypt/cipher/hash-common.h
+++ b/grub-core/lib/libgcrypt/cipher/hash-common.h
@@ -20,14 +20,43 @@
 #ifndef GCRY_HASH_COMMON_H
 #define GCRY_HASH_COMMON_H
 
+#include "types.h"
+
 
 const char * _gcry_hash_selftest_check_one
 /**/         (int algo,
               int datamode, const void *data, size_t datalen,
               const void *expect, size_t expectlen);
 
-
-
-
+/* Type for the md_write helper function.  */
+typedef unsigned int (*_gcry_md_block_write_t) (void *c,
+                                               const unsigned char *blks,
+                                               size_t nblks);
+
+#if (defined(USE_SHA512) || defined(USE_WHIRLPOOL))
+/* SHA-512 and Whirlpool needs u64. SHA-512 needs larger buffer. */
+# define MD_BLOCK_MAX_BLOCKSIZE 128
+# define MD_NBLOCKS_TYPE u64
+#else
+# define MD_BLOCK_MAX_BLOCKSIZE 64
+# define MD_NBLOCKS_TYPE u32
+#endif
+
+/* SHA1 needs 2x64 bytes and SHA-512 needs 128 bytes. */
+#define MD_BLOCK_CTX_BUFFER_SIZE 128
+
+typedef struct gcry_md_block_ctx
+{
+    byte buf[MD_BLOCK_CTX_BUFFER_SIZE];
+    MD_NBLOCKS_TYPE nblocks;
+    MD_NBLOCKS_TYPE nblocks_high;
+    int count;
+    unsigned int blocksize_shift;
+    _gcry_md_block_write_t bwrite;
+} gcry_md_block_ctx_t;
+
+
+void
+_gcry_md_block_write( void *context, const void *inbuf_arg, size_t inlen);
 
 #endif /*GCRY_HASH_COMMON_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/hmac-tests.c 
b/grub-core/lib/libgcrypt/cipher/hmac-tests.c
deleted file mode 100644
index a32ece75d..000000000
--- a/grub-core/lib/libgcrypt/cipher/hmac-tests.c
+++ /dev/null
@@ -1,732 +0,0 @@
-/* hmac-tests.c - HMAC selftests.
- * Copyright (C) 2008 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
-   Although algorithm self-tests are usually implemented in the module
-   implementing the algorithm, the case for HMAC is different because
-   HMAC is implemnetd on a higher level using a special feature of the
-   gcry_md_ functions.  It would be possible to do this also in the
-   digest algorithm modules, but that would blow up the code too much
-   and spread the hmac tests over several modules.
-
-   Thus we implement all HMAC tests in this test module and provide a
-   function to run the tests.
-*/
-
-#include <config.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#ifdef HAVE_STDINT_H
-# include <stdint.h>
-#endif
-
-#include "g10lib.h"
-#include "cipher.h"
-#include "hmac256.h"
-
-/* Check one HMAC with digest ALGO using the regualr HAMC
-   API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key
-   and (EXPECT,EXPECTLEN) the expected result.  Returns NULL on
-   succdess or a string describing the failure.  */
-static const char *
-check_one (int algo,
-           const void *data, size_t datalen,
-           const void *key, size_t keylen,
-           const void *expect, size_t expectlen)
-{
-  gcry_md_hd_t hd;
-  const unsigned char *digest;
-
-/*   printf ("HMAC algo %d\n", algo); */
-  if (_gcry_md_get_algo_dlen (algo) != expectlen)
-    return "invalid tests data";
-  if (_gcry_md_open (&hd, algo, GCRY_MD_FLAG_HMAC))
-    return "gcry_md_open failed";
-  if (_gcry_md_setkey (hd, key, keylen))
-    {
-      _gcry_md_close (hd);
-      return "gcry_md_setkey failed";
-    }
-  _gcry_md_write (hd, data, datalen);
-  digest = _gcry_md_read (hd, algo);
-  if (!digest)
-    {
-      _gcry_md_close (hd);
-      return "gcry_md_read failed";
-    }
-  if (memcmp (digest, expect, expectlen))
-    {
-/*       int i; */
-
-/*       fputs ("        {", stdout); */
-/*       for (i=0; i < expectlen-1; i++) */
-/*         { */
-/*           if (i && !(i % 8)) */
-/*             fputs ("\n         ", stdout); */
-/*           printf (" 0x%02x,", digest[i]); */
-/*         } */
-/*       printf (" 0x%02x } },\n", digest[i]); */
-
-      _gcry_md_close (hd);
-      return "does not match";
-    }
-  _gcry_md_close (hd);
-  return NULL;
-}
-
-
-static gpg_err_code_t
-selftests_sha1 (int extended, selftest_report_func_t report)
-{
-  const char *what;
-  const char *errtxt;
-  unsigned char key[128];
-  int i, j;
-
-  what = "FIPS-198a, A.1";
-  for (i=0; i < 64; i++)
-    key[i] = i;
-  errtxt = check_one (GCRY_MD_SHA1,
-                      "Sample #1", 9,
-                      key, 64,
-                      "\x4f\x4c\xa3\xd5\xd6\x8b\xa7\xcc\x0a\x12"
-                      "\x08\xc9\xc6\x1e\x9c\x5d\xa0\x40\x3c\x0a", 20);
-  if (errtxt)
-    goto failed;
-
-  if (extended)
-    {
-      what = "FIPS-198a, A.2";
-      for (i=0, j=0x30; i < 20; i++)
-        key[i] = j++;
-      errtxt = check_one (GCRY_MD_SHA1,
-                          "Sample #2", 9,
-                          key, 20,
-                          "\x09\x22\xd3\x40\x5f\xaa\x3d\x19\x4f\x82"
-                          "\xa4\x58\x30\x73\x7d\x5c\xc6\xc7\x5d\x24", 20);
-      if (errtxt)
-        goto failed;
-
-      what = "FIPS-198a, A.3";
-      for (i=0, j=0x50; i < 100; i++)
-        key[i] = j++;
-      errtxt = check_one (GCRY_MD_SHA1,
-                          "Sample #3", 9,
-                          key, 100,
-                          "\xbc\xf4\x1e\xab\x8b\xb2\xd8\x02\xf3\xd0"
-                          "\x5c\xaf\x7c\xb0\x92\xec\xf8\xd1\xa3\xaa", 20 );
-      if (errtxt)
-        goto failed;
-
-      what = "FIPS-198a, A.4";
-      for (i=0, j=0x70; i < 49; i++)
-        key[i] = j++;
-      errtxt = check_one (GCRY_MD_SHA1,
-                          "Sample #4", 9,
-                          key, 49,
-                          "\x9e\xa8\x86\xef\xe2\x68\xdb\xec\xce\x42"
-                          "\x0c\x75\x24\xdf\x32\xe0\x75\x1a\x2a\x26", 20 );
-      if (errtxt)
-        goto failed;
-    }
-
-  return 0; /* Succeeded. */
-
- failed:
-  if (report)
-    report ("hmac", GCRY_MD_SHA1, what, errtxt);
-  return GPG_ERR_SELFTEST_FAILED;
-}
-
-
-
-static gpg_err_code_t
-selftests_sha224 (int extended, selftest_report_func_t report)
-{
-  static struct
-  {
-    const char * const desc;
-    const char * const data;
-    const char * const key;
-    const char expect[28];
-  } tv[] =
-    {
-      { "data-28 key-4",
-        "what do ya want for nothing?",
-        "Jefe",
-        { 0xa3, 0x0e, 0x01, 0x09, 0x8b, 0xc6, 0xdb, 0xbf,
-          0x45, 0x69, 0x0f, 0x3a, 0x7e, 0x9e, 0x6d, 0x0f,
-          0x8b, 0xbe, 0xa2, 0xa3, 0x9e, 0x61, 0x48, 0x00,
-          0x8f, 0xd0, 0x5e, 0x44 } },
-
-      { "data-9 key-20",
-        "Hi There",
-       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
-        "\x0b\x0b\x0b\x0b",
-        { 0x89, 0x6f, 0xb1, 0x12, 0x8a, 0xbb, 0xdf, 0x19,
-          0x68, 0x32, 0x10, 0x7c, 0xd4, 0x9d, 0xf3, 0x3f,
-          0x47, 0xb4, 0xb1, 0x16, 0x99, 0x12, 0xba, 0x4f,
-          0x53, 0x68, 0x4b, 0x22 } },
-
-      { "data-50 key-20",
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa",
-        { 0x7f, 0xb3, 0xcb, 0x35, 0x88, 0xc6, 0xc1, 0xf6,
-          0xff, 0xa9, 0x69, 0x4d, 0x7d, 0x6a, 0xd2, 0x64,
-          0x93, 0x65, 0xb0, 0xc1, 0xf6, 0x5d, 0x69, 0xd1,
-          0xec, 0x83, 0x33, 0xea } },
-
-      { "data-50 key-26",
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd",
-       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
-        { 0x6c, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3c, 0xac,
-          0x6a, 0x2a, 0xbc, 0x1b, 0xb3, 0x82, 0x62, 0x7c,
-          0xec, 0x6a, 0x90, 0xd8, 0x6e, 0xfc, 0x01, 0x2d,
-          0xe7, 0xaf, 0xec, 0x5a } },
-
-      { "data-54 key-131",
-        "Test Using Larger Than Block-Size Key - Hash Key First",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-        { 0x95, 0xe9, 0xa0, 0xdb, 0x96, 0x20, 0x95, 0xad,
-          0xae, 0xbe, 0x9b, 0x2d, 0x6f, 0x0d, 0xbc, 0xe2,
-          0xd4, 0x99, 0xf1, 0x12, 0xf2, 0xd2, 0xb7, 0x27,
-          0x3f, 0xa6, 0x87, 0x0e } },
-
-      { "data-152 key-131",
-        "This is a test using a larger than block-size key and a larger "
-        "than block-size data. The key needs to be hashed before being "
-        "used by the HMAC algorithm.",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-        { 0x3a, 0x85, 0x41, 0x66, 0xac, 0x5d, 0x9f, 0x02,
-          0x3f, 0x54, 0xd5, 0x17, 0xd0, 0xb3, 0x9d, 0xbd,
-          0x94, 0x67, 0x70, 0xdb, 0x9c, 0x2b, 0x95, 0xc9,
-          0xf6, 0xf5, 0x65, 0xd1 } },
-
-      { NULL }
-    };
-  const char *what;
-  const char *errtxt;
-  int tvidx;
-
-  for (tvidx=0; tv[tvidx].desc; tvidx++)
-    {
-      what = tv[tvidx].desc;
-      errtxt = check_one (GCRY_MD_SHA224,
-                          tv[tvidx].data, strlen (tv[tvidx].data),
-                          tv[tvidx].key, strlen (tv[tvidx].key),
-                          tv[tvidx].expect, DIM (tv[tvidx].expect) );
-      if (errtxt)
-        goto failed;
-      if (!extended)
-        break;
-    }
-
-  return 0; /* Succeeded. */
-
- failed:
-  if (report)
-    report ("hmac", GCRY_MD_SHA224, what, errtxt);
-  return GPG_ERR_SELFTEST_FAILED;
-}
-
-
-static gpg_err_code_t
-selftests_sha256 (int extended, selftest_report_func_t report)
-{
-  static struct
-  {
-    const char * const desc;
-    const char * const data;
-    const char * const key;
-    const char expect[32];
-  } tv[] =
-    {
-      { "data-28 key-4",
-        "what do ya want for nothing?",
-        "Jefe",
-       { 0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e,
-          0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7,
-          0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83,
-          0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43 } },
-
-      { "data-9 key-20",
-        "Hi There",
-       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
-        "\x0b\x0b\x0b\x0b",
-        { 0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53,
-          0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b,
-          0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7,
-          0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7 } },
-
-      { "data-50 key-20",
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa",
-        { 0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46,
-          0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7,
-          0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22,
-          0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe } },
-
-      { "data-50 key-26",
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd",
-       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
-       { 0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e,
-          0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a,
-          0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07,
-          0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b } },
-
-      { "data-54 key-131",
-        "Test Using Larger Than Block-Size Key - Hash Key First",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-       { 0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f,
-          0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f,
-          0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14,
-          0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54 } },
-
-      { "data-152 key-131",
-        "This is a test using a larger than block-size key and a larger "
-        "than block-size data. The key needs to be hashed before being "
-        "used by the HMAC algorithm.",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-       { 0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb,
-          0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44,
-          0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93,
-          0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2 } },
-
-      { NULL }
-    };
-  const char *what;
-  const char *errtxt;
-  int tvidx;
-
-  for (tvidx=0; tv[tvidx].desc; tvidx++)
-    {
-      hmac256_context_t hmachd;
-      const unsigned char *digest;
-      size_t dlen;
-
-      what = tv[tvidx].desc;
-      errtxt = check_one (GCRY_MD_SHA256,
-                          tv[tvidx].data, strlen (tv[tvidx].data),
-                          tv[tvidx].key, strlen (tv[tvidx].key),
-                          tv[tvidx].expect, DIM (tv[tvidx].expect) );
-      if (errtxt)
-        goto failed;
-
-      hmachd = _gcry_hmac256_new (tv[tvidx].key, strlen (tv[tvidx].key));
-      if (!hmachd)
-        {
-          errtxt = "_gcry_hmac256_new failed";
-          goto failed;
-        }
-      _gcry_hmac256_update (hmachd, tv[tvidx].data, strlen (tv[tvidx].data));
-      digest = _gcry_hmac256_finalize (hmachd, &dlen);
-      if (!digest)
-        {
-          errtxt = "_gcry_hmac256_finalize failed";
-          _gcry_hmac256_release (hmachd);
-          goto failed;
-        }
-      if (dlen != DIM (tv[tvidx].expect)
-          || memcmp (digest, tv[tvidx].expect, DIM (tv[tvidx].expect)))
-        {
-          errtxt = "does not match in second implementation";
-          _gcry_hmac256_release (hmachd);
-          goto failed;
-        }
-      _gcry_hmac256_release (hmachd);
-
-      if (!extended)
-        break;
-    }
-
-  return 0; /* Succeeded. */
-
- failed:
-  if (report)
-    report ("hmac", GCRY_MD_SHA256, what, errtxt);
-  return GPG_ERR_SELFTEST_FAILED;
-}
-
-
-static gpg_err_code_t
-selftests_sha384 (int extended, selftest_report_func_t report)
-{
-  static struct
-  {
-    const char * const desc;
-    const char * const data;
-    const char * const key;
-    const char expect[48];
-  } tv[] =
-    {
-      { "data-28 key-4",
-        "what do ya want for nothing?",
-        "Jefe",
-        { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31,
-          0x61, 0x7f, 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b,
-          0x9c, 0x7e, 0xf4, 0x64, 0xf5, 0xa0, 0x1b, 0x47,
-          0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22, 0x44, 0x5e,
-          0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7,
-          0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 } },
-
-      { "data-9 key-20",
-        "Hi There",
-       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
-        "\x0b\x0b\x0b\x0b",
-        { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62,
-          0x6b, 0x08, 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f,
-          0x15, 0xf9, 0xda, 0xdb, 0xe4, 0x10, 0x1e, 0xc6,
-          0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb, 0xc5, 0x9c,
-          0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f,
-          0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 } },
-
-      { "data-50 key-20",
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa",
-        { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a,
-          0x0a, 0xa2, 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f,
-          0x0a, 0xa6, 0x35, 0xd9, 0x47, 0xac, 0x9f, 0xeb,
-          0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66, 0x14, 0x4b,
-          0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9,
-          0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 } },
-
-      { "data-50 key-26",
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd",
-       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
-        { 0x3e, 0x8a, 0x69, 0xb7, 0x78, 0x3c, 0x25, 0x85,
-          0x19, 0x33, 0xab, 0x62, 0x90, 0xaf, 0x6c, 0xa7,
-          0x7a, 0x99, 0x81, 0x48, 0x08, 0x50, 0x00, 0x9c,
-          0xc5, 0x57, 0x7c, 0x6e, 0x1f, 0x57, 0x3b, 0x4e,
-          0x68, 0x01, 0xdd, 0x23, 0xc4, 0xa7, 0xd6, 0x79,
-          0xcc, 0xf8, 0xa3, 0x86, 0xc6, 0x74, 0xcf, 0xfb } },
-
-      { "data-54 key-131",
-        "Test Using Larger Than Block-Size Key - Hash Key First",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-        { 0x4e, 0xce, 0x08, 0x44, 0x85, 0x81, 0x3e, 0x90,
-          0x88, 0xd2, 0xc6, 0x3a, 0x04, 0x1b, 0xc5, 0xb4,
-          0x4f, 0x9e, 0xf1, 0x01, 0x2a, 0x2b, 0x58, 0x8f,
-          0x3c, 0xd1, 0x1f, 0x05, 0x03, 0x3a, 0xc4, 0xc6,
-          0x0c, 0x2e, 0xf6, 0xab, 0x40, 0x30, 0xfe, 0x82,
-          0x96, 0x24, 0x8d, 0xf1, 0x63, 0xf4, 0x49, 0x52 } },
-
-      { "data-152 key-131",
-        "This is a test using a larger than block-size key and a larger "
-        "than block-size data. The key needs to be hashed before being "
-        "used by the HMAC algorithm.",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-        { 0x66, 0x17, 0x17, 0x8e, 0x94, 0x1f, 0x02, 0x0d,
-          0x35, 0x1e, 0x2f, 0x25, 0x4e, 0x8f, 0xd3, 0x2c,
-          0x60, 0x24, 0x20, 0xfe, 0xb0, 0xb8, 0xfb, 0x9a,
-          0xdc, 0xce, 0xbb, 0x82, 0x46, 0x1e, 0x99, 0xc5,
-          0xa6, 0x78, 0xcc, 0x31, 0xe7, 0x99, 0x17, 0x6d,
-          0x38, 0x60, 0xe6, 0x11, 0x0c, 0x46, 0x52, 0x3e } },
-
-      { NULL }
-    };
-  const char *what;
-  const char *errtxt;
-  int tvidx;
-
-  for (tvidx=0; tv[tvidx].desc; tvidx++)
-    {
-      what = tv[tvidx].desc;
-      errtxt = check_one (GCRY_MD_SHA384,
-                          tv[tvidx].data, strlen (tv[tvidx].data),
-                          tv[tvidx].key, strlen (tv[tvidx].key),
-                          tv[tvidx].expect, DIM (tv[tvidx].expect) );
-      if (errtxt)
-        goto failed;
-      if (!extended)
-        break;
-    }
-
-  return 0; /* Succeeded. */
-
- failed:
-  if (report)
-    report ("hmac", GCRY_MD_SHA384, what, errtxt);
-  return GPG_ERR_SELFTEST_FAILED;
-}
-
-
-static gpg_err_code_t
-selftests_sha512 (int extended, selftest_report_func_t report)
-{
-  static struct
-  {
-    const char * const desc;
-    const char * const data;
-    const char * const key;
-    const char expect[64];
-  } tv[] =
-    {
-      { "data-28 key-4",
-        "what do ya want for nothing?",
-        "Jefe",
-        { 0x16, 0x4b, 0x7a, 0x7b, 0xfc, 0xf8, 0x19, 0xe2,
-          0xe3, 0x95, 0xfb, 0xe7, 0x3b, 0x56, 0xe0, 0xa3,
-          0x87, 0xbd, 0x64, 0x22, 0x2e, 0x83, 0x1f, 0xd6,
-          0x10, 0x27, 0x0c, 0xd7, 0xea, 0x25, 0x05, 0x54,
-          0x97, 0x58, 0xbf, 0x75, 0xc0, 0x5a, 0x99, 0x4a,
-          0x6d, 0x03, 0x4f, 0x65, 0xf8, 0xf0, 0xe6, 0xfd,
-          0xca, 0xea, 0xb1, 0xa3, 0x4d, 0x4a, 0x6b, 0x4b,
-          0x63, 0x6e, 0x07, 0x0a, 0x38, 0xbc, 0xe7, 0x37 } },
-
-      { "data-9 key-20",
-        "Hi There",
-       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
-        "\x0b\x0b\x0b\x0b",
-        { 0x87, 0xaa, 0x7c, 0xde, 0xa5, 0xef, 0x61, 0x9d,
-          0x4f, 0xf0, 0xb4, 0x24, 0x1a, 0x1d, 0x6c, 0xb0,
-          0x23, 0x79, 0xf4, 0xe2, 0xce, 0x4e, 0xc2, 0x78,
-          0x7a, 0xd0, 0xb3, 0x05, 0x45, 0xe1, 0x7c, 0xde,
-          0xda, 0xa8, 0x33, 0xb7, 0xd6, 0xb8, 0xa7, 0x02,
-          0x03, 0x8b, 0x27, 0x4e, 0xae, 0xa3, 0xf4, 0xe4,
-          0xbe, 0x9d, 0x91, 0x4e, 0xeb, 0x61, 0xf1, 0x70,
-          0x2e, 0x69, 0x6c, 0x20, 0x3a, 0x12, 0x68, 0x54 } },
-
-      { "data-50 key-20",
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
-        "\xdd\xdd",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa",
-        { 0xfa, 0x73, 0xb0, 0x08, 0x9d, 0x56, 0xa2, 0x84,
-          0xef, 0xb0, 0xf0, 0x75, 0x6c, 0x89, 0x0b, 0xe9,
-          0xb1, 0xb5, 0xdb, 0xdd, 0x8e, 0xe8, 0x1a, 0x36,
-          0x55, 0xf8, 0x3e, 0x33, 0xb2, 0x27, 0x9d, 0x39,
-          0xbf, 0x3e, 0x84, 0x82, 0x79, 0xa7, 0x22, 0xc8,
-          0x06, 0xb4, 0x85, 0xa4, 0x7e, 0x67, 0xc8, 0x07,
-          0xb9, 0x46, 0xa3, 0x37, 0xbe, 0xe8, 0x94, 0x26,
-          0x74, 0x27, 0x88, 0x59, 0xe1, 0x32, 0x92, 0xfb } },
-
-      { "data-50 key-26",
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
-        "\xcd\xcd",
-       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
-        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
-        { 0xb0, 0xba, 0x46, 0x56, 0x37, 0x45, 0x8c, 0x69,
-          0x90, 0xe5, 0xa8, 0xc5, 0xf6, 0x1d, 0x4a, 0xf7,
-          0xe5, 0x76, 0xd9, 0x7f, 0xf9, 0x4b, 0x87, 0x2d,
-          0xe7, 0x6f, 0x80, 0x50, 0x36, 0x1e, 0xe3, 0xdb,
-          0xa9, 0x1c, 0xa5, 0xc1, 0x1a, 0xa2, 0x5e, 0xb4,
-          0xd6, 0x79, 0x27, 0x5c, 0xc5, 0x78, 0x80, 0x63,
-          0xa5, 0xf1, 0x97, 0x41, 0x12, 0x0c, 0x4f, 0x2d,
-          0xe2, 0xad, 0xeb, 0xeb, 0x10, 0xa2, 0x98, 0xdd } },
-
-      { "data-54 key-131",
-        "Test Using Larger Than Block-Size Key - Hash Key First",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-        { 0x80, 0xb2, 0x42, 0x63, 0xc7, 0xc1, 0xa3, 0xeb,
-          0xb7, 0x14, 0x93, 0xc1, 0xdd, 0x7b, 0xe8, 0xb4,
-          0x9b, 0x46, 0xd1, 0xf4, 0x1b, 0x4a, 0xee, 0xc1,
-          0x12, 0x1b, 0x01, 0x37, 0x83, 0xf8, 0xf3, 0x52,
-          0x6b, 0x56, 0xd0, 0x37, 0xe0, 0x5f, 0x25, 0x98,
-          0xbd, 0x0f, 0xd2, 0x21, 0x5d, 0x6a, 0x1e, 0x52,
-          0x95, 0xe6, 0x4f, 0x73, 0xf6, 0x3f, 0x0a, 0xec,
-          0x8b, 0x91, 0x5a, 0x98, 0x5d, 0x78, 0x65, 0x98 } },
-
-      { "data-152 key-131",
-        "This is a test using a larger than block-size key and a larger "
-        "than block-size data. The key needs to be hashed before being "
-        "used by the HMAC algorithm.",
-       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
-        "\xaa\xaa\xaa",
-        { 0xe3, 0x7b, 0x6a, 0x77, 0x5d, 0xc8, 0x7d, 0xba,
-          0xa4, 0xdf, 0xa9, 0xf9, 0x6e, 0x5e, 0x3f, 0xfd,
-          0xde, 0xbd, 0x71, 0xf8, 0x86, 0x72, 0x89, 0x86,
-          0x5d, 0xf5, 0xa3, 0x2d, 0x20, 0xcd, 0xc9, 0x44,
-          0xb6, 0x02, 0x2c, 0xac, 0x3c, 0x49, 0x82, 0xb1,
-          0x0d, 0x5e, 0xeb, 0x55, 0xc3, 0xe4, 0xde, 0x15,
-          0x13, 0x46, 0x76, 0xfb, 0x6d, 0xe0, 0x44, 0x60,
-          0x65, 0xc9, 0x74, 0x40, 0xfa, 0x8c, 0x6a, 0x58 } },
-
-      { NULL }
-    };
-  const char *what;
-  const char *errtxt;
-  int tvidx;
-
-  for (tvidx=0; tv[tvidx].desc; tvidx++)
-    {
-      what = tv[tvidx].desc;
-      errtxt = check_one (GCRY_MD_SHA512,
-                          tv[tvidx].data, strlen (tv[tvidx].data),
-                          tv[tvidx].key, strlen (tv[tvidx].key),
-                          tv[tvidx].expect, DIM (tv[tvidx].expect) );
-      if (errtxt)
-        goto failed;
-      if (!extended)
-        break;
-    }
-
-  return 0; /* Succeeded. */
-
- failed:
-  if (report)
-    report ("hmac", GCRY_MD_SHA512, what, errtxt);
-  return GPG_ERR_SELFTEST_FAILED;
-}
-
-
-
-/* Run a full self-test for ALGO and return 0 on success.  */
-static gpg_err_code_t
-run_selftests (int algo, int extended, selftest_report_func_t report)
-{
-  gpg_err_code_t ec;
-
-  switch (algo)
-    {
-    case GCRY_MD_SHA1:
-      ec = selftests_sha1 (extended, report);
-      break;
-    case GCRY_MD_SHA224:
-      ec = selftests_sha224 (extended, report);
-      break;
-    case GCRY_MD_SHA256:
-      ec = selftests_sha256 (extended, report);
-      break;
-    case GCRY_MD_SHA384:
-      ec = selftests_sha384 (extended, report);
-      break;
-    case GCRY_MD_SHA512:
-      ec = selftests_sha512 (extended, report);
-      break;
-    default:
-      ec = GPG_ERR_DIGEST_ALGO;
-      break;
-    }
-  return ec;
-}
-
-
-
-
-/* Run the selftests for HMAC with digest algorithm ALGO with optional
-   reporting function REPORT.  */
-gpg_error_t
-_gcry_hmac_selftest (int algo, int extended, selftest_report_func_t report)
-{
-  gcry_err_code_t ec = 0;
-
-  if (!gcry_md_test_algo (algo))
-    {
-      ec = run_selftests (algo, extended, report);
-    }
-  else
-    {
-      ec = GPG_ERR_DIGEST_ALGO;
-      if (report)
-        report ("hmac", algo, "module", "algorithm not available");
-    }
-  return gpg_error (ec);
-}
diff --git a/grub-core/lib/libgcrypt/cipher/idea.c 
b/grub-core/lib/libgcrypt/cipher/idea.c
index 3c5578f95..7f7066606 100644
--- a/grub-core/lib/libgcrypt/cipher/idea.c
+++ b/grub-core/lib/libgcrypt/cipher/idea.c
@@ -48,6 +48,7 @@
 #include "types.h"  /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
+#include "cipher-internal.h"
 
 
 #define IDEA_KEYSIZE 16
@@ -152,7 +153,7 @@ invert_key( u16 *ek, u16 dk[IDEA_KEYLEN] )
     *--p = t2;
     *--p = t1;
     memcpy(dk, temp, sizeof(temp) );
-    memset(temp, 0, sizeof(temp) );  /* burn temp */
+    wipememory(temp, sizeof(temp));
 }
 
 
@@ -250,7 +251,9 @@ do_setkey( IDEA_context *c, const byte *key, unsigned int 
keylen )
     if( selftest_failed )
        return GPG_ERR_SELFTEST_FAILED;
 
-    assert(keylen == 16);
+    if (keylen != 16)
+      return GPG_ERR_INV_KEYLEN;
+
     c->have_dk = 0;
     expand_key( key, c->ek );
     invert_key( c->ek, c->dk );
@@ -258,10 +261,12 @@ do_setkey( IDEA_context *c, const byte *key, unsigned int 
keylen )
 }
 
 static gcry_err_code_t
-idea_setkey (void *context, const byte *key, unsigned int keylen)
+idea_setkey (void *context, const byte *key, unsigned int keylen,
+             cipher_bulk_ops_t *bulk_ops)
 {
     IDEA_context *ctx = context;
     int rc = do_setkey (ctx, key, keylen);
+    (void)bulk_ops;
     _gcry_burn_stack (23+6*sizeof(void*));
     return rc;
 }
@@ -272,12 +277,12 @@ encrypt_block( IDEA_context *c, byte *outbuf, const byte 
*inbuf )
     cipher( outbuf, inbuf, c->ek );
 }
 
-static void
+static unsigned int
 idea_encrypt (void *context, byte *out, const byte *in)
 {
     IDEA_context *ctx = context;
     encrypt_block (ctx, out, in);
-    _gcry_burn_stack (24+3*sizeof (void*));
+    return /*burn_stack*/ (24+3*sizeof (void*));
 }
 
 static void
@@ -290,12 +295,12 @@ decrypt_block( IDEA_context *c, byte *outbuf, const byte 
*inbuf )
     cipher( outbuf, inbuf, c->dk );
 }
 
-static void
+static unsigned int
 idea_decrypt (void *context, byte *out, const byte *in)
 {
     IDEA_context *ctx = context;
     decrypt_block (ctx, out, in);
-    _gcry_burn_stack (24+3*sizeof (void*));
+    return /*burn_stack*/ (24+3*sizeof (void*));
 }
 
 
@@ -371,8 +376,9 @@ static struct {
 
 
 gcry_cipher_spec_t _gcry_cipher_spec_idea =
-{
+  {
+    GCRY_CIPHER_IDEA, {0, 0},
     "IDEA", NULL, NULL, IDEA_BLOCKSIZE, 128,
     sizeof (IDEA_context),
     idea_setkey, idea_encrypt, idea_decrypt
-};
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/kdf-internal.h 
b/grub-core/lib/libgcrypt/cipher/kdf-internal.h
new file mode 100644
index 000000000..9e9a432e5
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/kdf-internal.h
@@ -0,0 +1,43 @@
+/* kdf-internal.h  - Internal defs for kdf.c
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_KDF_INTERNAL_H
+#define GCRY_KDF_INTERNAL_H
+
+/*-- kdf.c --*/
+gpg_err_code_t
+_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen,
+                 int hashalgo,
+                 const void *salt, size_t saltlen,
+                 unsigned long iterations,
+                 size_t keysize, void *keybuffer);
+
+/*-- scrypt.c --*/
+gcry_err_code_t
+_gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen,
+                  int algo, int subalgo,
+                  const unsigned char *salt, size_t saltlen,
+                  unsigned long iterations,
+                  size_t dklen, unsigned char *dk);
+
+/*-- blake2.c --*/
+gcry_err_code_t
+blake2b_vl_hash (const void *in, size_t inlen, size_t outputlen, void *output);
+
+#endif /*GCRY_KDF_INTERNAL_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/kdf.c 
b/grub-core/lib/libgcrypt/cipher/kdf.c
index 46e8550df..9f67e4d9f 100644
--- a/grub-core/lib/libgcrypt/cipher/kdf.c
+++ b/grub-core/lib/libgcrypt/cipher/kdf.c
@@ -1,5 +1,6 @@
 /* kdf.c  - Key Derivation Functions
- * Copyright (C) 1998, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 1998, 2008, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -25,7 +26,7 @@
 
 #include "g10lib.h"
 #include "cipher.h"
-#include "ath.h"
+#include "kdf-internal.h"
 
 
 /* Transform a passphrase into a suitable key of length KEYSIZE and
@@ -33,7 +34,7 @@
    must provide an HASHALGO, a valid ALGO and depending on that algo a
    SALT of 8 bytes and the number of ITERATIONS.  Code taken from
    gnupg/agent/protect.c:hash_passphrase.  */
-gpg_err_code_t
+static gpg_err_code_t
 openpgp_s2k (const void *passphrase, size_t passphraselen,
              int algo, int hashalgo,
              const void *salt, size_t saltlen,
@@ -51,10 +52,9 @@ openpgp_s2k (const void *passphrase, size_t passphraselen,
       && (!salt || saltlen != 8))
     return GPG_ERR_INV_VALUE;
 
-  secmode = gcry_is_secure (passphrase) || gcry_is_secure (keybuffer);
+  secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer);
 
-  ec = gpg_err_code (gcry_md_open (&md, hashalgo,
-                                   secmode? GCRY_MD_FLAG_SECURE : 0));
+  ec = _gcry_md_open (&md, hashalgo, secmode? GCRY_MD_FLAG_SECURE : 0);
   if (ec)
     return ec;
 
@@ -62,10 +62,10 @@ openpgp_s2k (const void *passphrase, size_t passphraselen,
     {
       if (pass)
         {
-          gcry_md_reset (md);
+          _gcry_md_reset (md);
           for (i=0; i < pass; i++) /* Preset the hash context.  */
-            gcry_md_putc (md, 0);
-       }
+            _gcry_md_putc (md, 0);
+        }
 
       if (algo == GCRY_KDF_SALTED_S2K || algo == GCRY_KDF_ITERSALTED_S2K)
         {
@@ -81,30 +81,30 @@ openpgp_s2k (const void *passphrase, size_t passphraselen,
 
           while (count > len2)
             {
-              gcry_md_write (md, salt, saltlen);
-              gcry_md_write (md, passphrase, passphraselen);
+              _gcry_md_write (md, salt, saltlen);
+              _gcry_md_write (md, passphrase, passphraselen);
               count -= len2;
             }
           if (count < saltlen)
-            gcry_md_write (md, salt, count);
+            _gcry_md_write (md, salt, count);
           else
             {
-              gcry_md_write (md, salt, saltlen);
+              _gcry_md_write (md, salt, saltlen);
               count -= saltlen;
-              gcry_md_write (md, passphrase, count);
+              _gcry_md_write (md, passphrase, count);
             }
         }
       else
-        gcry_md_write (md, passphrase, passphraselen);
+        _gcry_md_write (md, passphrase, passphraselen);
 
-      gcry_md_final (md);
-      i = gcry_md_get_algo_dlen (hashalgo);
+      _gcry_md_final (md);
+      i = _gcry_md_get_algo_dlen (hashalgo);
       if (i > keysize - used)
         i = keysize - used;
-      memcpy (key+used, gcry_md_read (md, hashalgo), i);
+      memcpy (key+used, _gcry_md_read (md, hashalgo), i);
       used += i;
     }
-  gcry_md_close (md);
+  _gcry_md_close (md);
   return 0;
 }
 
@@ -116,16 +116,16 @@ openpgp_s2k (const void *passphrase, size_t passphraselen,
    used in HMAC mode.  SALT is a salt of length SALTLEN and ITERATIONS
    gives the number of iterations.  */
 gpg_err_code_t
-pkdf2 (const void *passphrase, size_t passphraselen,
-       int hashalgo,
-       const void *salt, size_t saltlen,
-       unsigned long iterations,
-       size_t keysize, void *keybuffer)
+_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen,
+                 int hashalgo,
+                 const void *salt, size_t saltlen,
+                 unsigned long iterations,
+                 size_t keysize, void *keybuffer)
 {
   gpg_err_code_t ec;
   gcry_md_hd_t md;
   int secmode;
-  unsigned int dklen = keysize;
+  unsigned long dklen = keysize;
   char *dk = keybuffer;
   unsigned int hlen;   /* Output length of the digest function.  */
   unsigned int l;      /* Rounded up number of blocks.  */
@@ -138,17 +138,27 @@ pkdf2 (const void *passphrase, size_t passphraselen,
   unsigned long iter;  /* Current iteration number.  */
   unsigned int i;
 
-  if (!salt || !saltlen || !iterations || !dklen)
+  /* We allow for a saltlen of 0 here to support scrypt.  It is not
+     clear whether rfc2898 allows for this this, thus we do a test on
+     saltlen > 0 only in gcry_kdf_derive.  */
+  if (!salt || !iterations || !dklen)
     return GPG_ERR_INV_VALUE;
 
-  hlen = gcry_md_get_algo_dlen (hashalgo);
+  hlen = _gcry_md_get_algo_dlen (hashalgo);
   if (!hlen)
     return GPG_ERR_DIGEST_ALGO;
 
-  secmode = gcry_is_secure (passphrase) || gcry_is_secure (keybuffer);
+  secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer);
 
-  /* We ignore step 1 from pksc5v2.1 which demands a check that dklen
-     is not larger that 0xffffffff * hlen.  */
+  /* Step 1 */
+  /* If dkLen > (2^32 - 1) * hLen, output "derived key too long" and
+   * stop.  We use a stronger inequality but only if our type can hold
+   * a larger value.  */
+
+#if SIZEOF_UNSIGNED_LONG > 4
+  if (dklen > 0xffffffffU)
+    return GPG_ERR_INV_VALUE;
+#endif
 
   /* Step 2 */
   l = ((dklen - 1)/ hlen) + 1;
@@ -156,19 +166,26 @@ pkdf2 (const void *passphrase, size_t passphraselen,
 
   /* Setup buffers and prepare a hash context.  */
   sbuf = (secmode
-          ? gcry_malloc_secure (saltlen + 4 + hlen + hlen)
-          : gcry_malloc (saltlen + 4 + hlen + hlen));
+          ? xtrymalloc_secure (saltlen + 4 + hlen + hlen)
+          : xtrymalloc (saltlen + 4 + hlen + hlen));
   if (!sbuf)
     return gpg_err_code_from_syserror ();
   tbuf = sbuf + saltlen + 4;
   ubuf = tbuf + hlen;
 
-  ec = gpg_err_code (gcry_md_open (&md, hashalgo,
-                                   (GCRY_MD_FLAG_HMAC
-                                    | (secmode?GCRY_MD_FLAG_SECURE:0))));
+  ec = _gcry_md_open (&md, hashalgo, (GCRY_MD_FLAG_HMAC
+                                      | (secmode?GCRY_MD_FLAG_SECURE:0)));
+  if (ec)
+    {
+      xfree (sbuf);
+      return ec;
+    }
+
+  ec = _gcry_md_setkey (md, passphrase, passphraselen);
   if (ec)
     {
-      gcry_free (sbuf);
+      _gcry_md_close (md);
+      xfree (sbuf);
       return ec;
     }
 
@@ -178,27 +195,21 @@ pkdf2 (const void *passphrase, size_t passphraselen,
     {
       for (iter = 0; iter < iterations; iter++)
         {
-          ec = gpg_err_code (gcry_md_setkey (md, passphrase, passphraselen));
-          if (ec)
-            {
-              gcry_md_close (md);
-              gcry_free (sbuf);
-              return ec;
-            }
+          _gcry_md_reset (md);
           if (!iter) /* Compute U_1:  */
             {
               sbuf[saltlen]     = (lidx >> 24);
               sbuf[saltlen + 1] = (lidx >> 16);
               sbuf[saltlen + 2] = (lidx >> 8);
               sbuf[saltlen + 3] = lidx;
-              gcry_md_write (md, sbuf, saltlen + 4);
-              memcpy (ubuf, gcry_md_read (md, 0), hlen);
+              _gcry_md_write (md, sbuf, saltlen + 4);
+              memcpy (ubuf, _gcry_md_read (md, 0), hlen);
               memcpy (tbuf, ubuf, hlen);
             }
           else /* Compute U_(2..c):  */
             {
-              gcry_md_write (md, ubuf, hlen);
-              memcpy (ubuf, gcry_md_read (md, 0), hlen);
+              _gcry_md_write (md, ubuf, hlen);
+              memcpy (ubuf, _gcry_md_read (md, 0), hlen);
               for (i=0; i < hlen; i++)
                 tbuf[i] ^= ubuf[i];
             }
@@ -212,8 +223,8 @@ pkdf2 (const void *passphrase, size_t passphraselen,
         }
     }
 
-  gcry_md_close (md);
-  gcry_free (sbuf);
+  _gcry_md_close (md);
+  xfree (sbuf);
   return 0;
 }
 
@@ -229,20 +240,21 @@ pkdf2 (const void *passphrase, size_t passphraselen,
    is a salt as needed by most KDF algorithms.  ITERATIONS is a
    positive integer parameter to most KDFs.  0 is returned on success,
    or an error code on failure.  */
-gpg_error_t
-gcry_kdf_derive (const void *passphrase, size_t passphraselen,
-                 int algo, int subalgo,
-                 const void *salt, size_t saltlen,
-                 unsigned long iterations,
-                 size_t keysize, void *keybuffer)
+gpg_err_code_t
+_gcry_kdf_derive (const void *passphrase, size_t passphraselen,
+                  int algo, int subalgo,
+                  const void *salt, size_t saltlen,
+                  unsigned long iterations,
+                  size_t keysize, void *keybuffer)
 {
   gpg_err_code_t ec;
 
-  if (!passphrase || (!passphraselen && algo != GCRY_KDF_PBKDF2))
+  if (!passphrase)
     {
       ec = GPG_ERR_INV_DATA;
       goto leave;
     }
+
   if (!keybuffer || !keysize)
     {
       ec = GPG_ERR_INV_VALUE;
@@ -255,8 +267,11 @@ gcry_kdf_derive (const void *passphrase, size_t 
passphraselen,
     case GCRY_KDF_SIMPLE_S2K:
     case GCRY_KDF_SALTED_S2K:
     case GCRY_KDF_ITERSALTED_S2K:
-      ec = openpgp_s2k (passphrase, passphraselen, algo, subalgo,
-                        salt, saltlen, iterations, keysize, keybuffer);
+      if (!passphraselen)
+        ec = GPG_ERR_INV_DATA;
+      else
+        ec = openpgp_s2k (passphrase, passphraselen, algo, subalgo,
+                          salt, saltlen, iterations, keysize, keybuffer);
       break;
 
     case GCRY_KDF_PBKDF1:
@@ -264,8 +279,39 @@ gcry_kdf_derive (const void *passphrase, size_t 
passphraselen,
       break;
 
     case GCRY_KDF_PBKDF2:
-      ec = pkdf2 (passphrase, passphraselen, subalgo,
-                  salt, saltlen, iterations, keysize, keybuffer);
+      if (!saltlen)
+        ec = GPG_ERR_INV_VALUE;
+      else
+        {
+          /* FIPS requires minimum passphrase length, see FIPS 140-3 IG D.N */
+          if (fips_mode () && passphraselen < 8)
+            return GPG_ERR_INV_VALUE;
+
+          /* FIPS requires minimum salt length of 128 b (SP 800-132 sec. 5.1, 
p.6) */
+          if (fips_mode () && saltlen < 16)
+            return GPG_ERR_INV_VALUE;
+
+          /* FIPS requires minimum iterations bound (SP 800-132 sec 5.2, p.6) 
*/
+          if (fips_mode () && iterations < 1000)
+            return GPG_ERR_INV_VALUE;
+
+          /* Check minimum key size */
+          if (fips_mode () && keysize < 14)
+            return GPG_ERR_INV_VALUE;
+
+          ec = _gcry_kdf_pkdf2 (passphrase, passphraselen, subalgo,
+                                salt, saltlen, iterations, keysize, keybuffer);
+        }
+      break;
+
+    case 41:
+    case GCRY_KDF_SCRYPT:
+#if USE_SCRYPT
+      ec = _gcry_kdf_scrypt (passphrase, passphraselen, algo, subalgo,
+                             salt, saltlen, iterations, keysize, keybuffer);
+#else
+      ec = GPG_ERR_UNSUPPORTED_ALGORITHM;
+#endif /*USE_SCRYPT*/
       break;
 
     default:
@@ -274,5 +320,913 @@ gcry_kdf_derive (const void *passphrase, size_t 
passphraselen,
     }
 
  leave:
+  return ec;
+}
+
+#include "bufhelp.h"
+
+typedef struct argon2_context *argon2_ctx_t;
+
+/* Per thread data for Argon2.  */
+struct argon2_thread_data {
+  argon2_ctx_t a;
+  unsigned int pass;
+  unsigned int slice;
+  unsigned int lane;
+};
+
+/* Argon2 context */
+struct argon2_context {
+  int algo;
+  int hash_type;
+
+  unsigned int outlen;
+
+  const unsigned char *password;
+  size_t passwordlen;
+
+  const unsigned char *salt;
+  size_t saltlen;
+
+  const unsigned char *key;
+  size_t keylen;
+
+  const unsigned char *ad;
+  size_t adlen;
+
+  unsigned int m_cost;
+
+  unsigned int passes;
+  unsigned int memory_blocks;
+  unsigned int segment_length;
+  unsigned int lane_length;
+  unsigned int lanes;
+
+  u64 *block;
+  struct argon2_thread_data *thread_data;
+
+  unsigned char out[1];  /* In future, we may use flexible array member.  */
+};
+
+#define ARGON2_VERSION 0x13
+
+#define ARGON2_WORDS_IN_BLOCK (1024/8)
+
+static void
+xor_block (u64 *dst, const u64 *src)
+{
+  int i;
+
+  for (i = 0; i < ARGON2_WORDS_IN_BLOCK; i++)
+    dst[i] ^= src[i];
+}
+
+static void
+beswap64_block (u64 *dst)
+{
+#ifdef WORDS_BIGENDIAN
+  int i;
+
+  /* Swap a block in big-endian 64-bit word into one in
+     little-endian.  */
+  for (i = 0; i < ARGON2_WORDS_IN_BLOCK; i++)
+    dst[i] = _gcry_bswap64 (dst[i]);
+#else
+  /* Nothing to do.  */
+  (void)dst;
+#endif
+}
+
+
+static gpg_err_code_t
+argon2_fill_first_blocks (argon2_ctx_t a)
+{
+  unsigned char h0_01_i[72];
+  unsigned char buf[10][4];
+  gcry_buffer_t iov[8];
+  unsigned int iov_count = 0;
+  int i;
+
+  /* Generate H0.  */
+  buf_put_le32 (buf[0], a->lanes);
+  buf_put_le32 (buf[1], a->outlen);
+  buf_put_le32 (buf[2], a->m_cost);
+  buf_put_le32 (buf[3], a->passes);
+  buf_put_le32 (buf[4], ARGON2_VERSION);
+  buf_put_le32 (buf[5], a->hash_type);
+  buf_put_le32 (buf[6], a->passwordlen);
+  iov[iov_count].data = buf[0];
+  iov[iov_count].len = 4 * 7;
+  iov[iov_count].off = 0;
+  iov_count++;
+  iov[iov_count].data = (void *)a->password;
+  iov[iov_count].len = a->passwordlen;
+  iov[iov_count].off = 0;
+  iov_count++;
+
+  buf_put_le32 (buf[7], a->saltlen);
+  iov[iov_count].data = buf[7];
+  iov[iov_count].len = 4;
+  iov[iov_count].off = 0;
+  iov_count++;
+  iov[iov_count].data = (void *)a->salt;
+  iov[iov_count].len = a->saltlen;
+  iov[iov_count].off = 0;
+  iov_count++;
+
+  buf_put_le32 (buf[8], a->keylen);
+  iov[iov_count].data = buf[8];
+  iov[iov_count].len = 4;
+  iov[iov_count].off = 0;
+  iov_count++;
+  if (a->key)
+    {
+      iov[iov_count].data = (void *)a->key;
+      iov[iov_count].len = a->keylen;
+      iov[iov_count].off = 0;
+      iov_count++;
+    }
+
+  buf_put_le32 (buf[9], a->adlen);
+  iov[iov_count].data = buf[9];
+  iov[iov_count].len = 4;
+  iov[iov_count].off = 0;
+  iov_count++;
+  if (a->ad)
+    {
+      iov[iov_count].data = (void *)a->ad;
+      iov[iov_count].len = a->adlen;
+      iov[iov_count].off = 0;
+      iov_count++;
+    }
+
+  _gcry_digest_spec_blake2b_512.hash_buffers (h0_01_i, 64, iov, iov_count);
+
+  for (i = 0; i < a->lanes; i++)
+    {
+      memset (h0_01_i+64, 0, 4);
+      buf_put_le32 (h0_01_i+64+4, i);
+      blake2b_vl_hash (h0_01_i, 72, 1024,
+                       &a->block[i*a->lane_length*ARGON2_WORDS_IN_BLOCK]);
+      beswap64_block (&a->block[i*a->lane_length*ARGON2_WORDS_IN_BLOCK]);
+
+      buf_put_le32 (h0_01_i+64, 1);
+      blake2b_vl_hash (h0_01_i, 72, 1024,
+                       &a->block[(i*a->lane_length+1)*ARGON2_WORDS_IN_BLOCK]);
+      beswap64_block (&a->block[(i*a->lane_length+1)*ARGON2_WORDS_IN_BLOCK]);
+    }
+  return 0;
+}
+
+static gpg_err_code_t
+argon2_init (argon2_ctx_t a, unsigned int parallelism,
+             unsigned int m_cost, unsigned int t_cost)
+{
+  gpg_err_code_t ec = 0;
+  unsigned int memory_blocks;
+  unsigned int segment_length;
+  void *block;
+  struct argon2_thread_data *thread_data;
+
+  memory_blocks = m_cost;
+  if (memory_blocks < 8 * parallelism)
+    memory_blocks = 8 * parallelism;
+
+  segment_length = memory_blocks / (parallelism * 4);
+  memory_blocks = segment_length * parallelism * 4;
+
+  a->passes = t_cost;
+  a->memory_blocks = memory_blocks;
+  a->segment_length = segment_length;
+  a->lane_length = segment_length * 4;
+  a->lanes = parallelism;
+
+  a->block = NULL;
+  a->thread_data = NULL;
+
+  block = xtrymalloc (1024 * memory_blocks);
+  if (!block)
+    {
+      ec = gpg_err_code_from_errno (errno);
+      return ec;
+    }
+  memset (block, 0, 1024 * memory_blocks);
+
+  thread_data = xtrymalloc (a->lanes * sizeof (struct argon2_thread_data));
+  if (!thread_data)
+    {
+      ec = gpg_err_code_from_errno (errno);
+      xfree (block);
+      return ec;
+    }
+
+  memset (thread_data, 0, a->lanes * sizeof (struct argon2_thread_data));
+
+  a->block = block;
+  a->thread_data = thread_data;
+  return 0;
+}
+
+
+static u64 fBlaMka (u64 x, u64 y)
+{
+  const u64 m = U64_C(0xFFFFFFFF);
+  return x + y + 2 * (x & m) * (y & m);
+}
+
+static u64 rotr64 (u64 w, unsigned int c)
+{
+  return (w >> c) | (w << (64 - c));
+}
+
+#define G(a, b, c, d)                                                          
\
+    do {                                                                       
\
+        a = fBlaMka(a, b);                                                     
\
+        d = rotr64(d ^ a, 32);                                                 
\
+        c = fBlaMka(c, d);                                                     
\
+        b = rotr64(b ^ c, 24);                                                 
\
+        a = fBlaMka(a, b);                                                     
\
+        d = rotr64(d ^ a, 16);                                                 
\
+        c = fBlaMka(c, d);                                                     
\
+        b = rotr64(b ^ c, 63);                                                 
\
+    } while ((void)0, 0)
+
+#define BLAKE2_ROUND_NOMSG(v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11,   
\
+                           v12, v13, v14, v15)                                 
\
+    do {                                                                       
\
+        G(v0, v4, v8, v12);                                                    
\
+        G(v1, v5, v9, v13);                                                    
\
+        G(v2, v6, v10, v14);                                                   
\
+        G(v3, v7, v11, v15);                                                   
\
+        G(v0, v5, v10, v15);                                                   
\
+        G(v1, v6, v11, v12);                                                   
\
+        G(v2, v7, v8, v13);                                                    
\
+        G(v3, v4, v9, v14);                                                    
\
+    } while ((void)0, 0)
+
+static void
+fill_block (const u64 *prev_block, const u64 *ref_block, u64 *curr_block,
+            int with_xor)
+{
+  u64 block_r[ARGON2_WORDS_IN_BLOCK];
+  u64 block_tmp[ARGON2_WORDS_IN_BLOCK];
+  int i;
+
+  memcpy (block_r, ref_block, 1024);
+  if (prev_block)
+    xor_block (block_r, prev_block);
+  memcpy (block_tmp, block_r, 1024);
+
+  if (with_xor)
+    xor_block (block_tmp, curr_block);
+
+  for (i = 0; i < 8; ++i)
+    BLAKE2_ROUND_NOMSG
+      (block_r[16 * i],      block_r[16 * i + 1],  block_r[16 * i + 2],
+       block_r[16 * i + 3],  block_r[16 * i + 4],  block_r[16 * i + 5],
+       block_r[16 * i + 6],  block_r[16 * i + 7],  block_r[16 * i + 8],
+       block_r[16 * i + 9],  block_r[16 * i + 10], block_r[16 * i + 11],
+       block_r[16 * i + 12], block_r[16 * i + 13], block_r[16 * i + 14],
+       block_r[16 * i + 15]);
+
+  for (i = 0; i < 8; i++)
+    BLAKE2_ROUND_NOMSG
+      (block_r[2 * i],      block_r[2 * i + 1],  block_r[2 * i + 16],
+       block_r[2 * i + 17], block_r[2 * i + 32], block_r[2 * i + 33],
+       block_r[2 * i + 48], block_r[2 * i + 49], block_r[2 * i + 64],
+       block_r[2 * i + 65], block_r[2 * i + 80], block_r[2 * i + 81],
+       block_r[2 * i + 96], block_r[2 * i + 97], block_r[2 * i + 112],
+       block_r[2 * i + 113]);
+
+  memcpy (curr_block, block_tmp, 1024);
+  xor_block (curr_block, block_r);
+}
+
+static void
+pseudo_random_generate (u64 *random_block, u64 *input_block)
+{
+  input_block[6]++;
+  fill_block (NULL, input_block, random_block, 0);
+  fill_block (NULL, random_block, random_block, 0);
+}
+
+static u32
+index_alpha (argon2_ctx_t a, const struct argon2_thread_data *t,
+             int segment_index, u32 random, int same_lane)
+{
+  u32 reference_area_size;
+  u64 relative_position;
+  u32 start_position;
+
+  if (t->pass == 0)
+    {
+      if (t->slice == 0)
+        reference_area_size = segment_index - 1;
+      else
+        {
+          if (same_lane)
+            reference_area_size = t->slice * a->segment_length
+              + segment_index - 1;
+          else
+            reference_area_size = t->slice * a->segment_length +
+              ((segment_index == 0) ? -1 : 0);
+        }
+    }
+  else
+    {
+      if (same_lane)
+        reference_area_size = a->lane_length
+          - a->segment_length + segment_index - 1;
+      else
+        reference_area_size = a->lane_length
+          - a->segment_length + ((segment_index == 0) ? -1 : 0);
+    }
+
+  relative_position = (random * (u64)random) >> 32;
+  relative_position = reference_area_size - 1 -
+    ((reference_area_size * relative_position) >> 32);
+
+  if (t->pass == 0)
+    start_position = 0;
+  else
+    start_position = (t->slice == 4 - 1)
+      ? 0
+      : (t->slice + 1) * a->segment_length;
+
+  return (start_position + relative_position) % a->lane_length;
+}
+
+static void
+argon2_compute_segment (void *priv)
+{
+  const struct argon2_thread_data *t = (const struct argon2_thread_data *)priv;
+  argon2_ctx_t a = t->a;
+  int i;
+  int prev_offset, curr_offset;
+  u32 ref_index, ref_lane;
+  u64 input_block[1024/sizeof (u64)];
+  u64 address_block[1024/sizeof (u64)];
+  u64 *random_block = NULL;
+
+  if (a->hash_type == GCRY_KDF_ARGON2I
+      || (a->hash_type == GCRY_KDF_ARGON2ID && t->pass == 0 && t->slice < 2))
+    {
+      memset (input_block, 0, 1024);
+      input_block[0] = t->pass;
+      input_block[1] = t->lane;
+      input_block[2] = t->slice;
+      input_block[3] = a->memory_blocks;
+      input_block[4] = a->passes;
+      input_block[5] = a->hash_type;
+      random_block = address_block;
+    }
+
+  if (t->pass == 0 && t->slice == 0)
+    {
+      if (random_block)
+        pseudo_random_generate (random_block, input_block);
+      i = 2;
+    }
+  else
+    i = 0;
+
+  curr_offset = t->lane * a->lane_length + t->slice * a->segment_length + i;
+  if ((curr_offset % a->lane_length))
+    prev_offset = curr_offset - 1;
+  else
+    prev_offset = curr_offset + a->lane_length - 1;
+
+  for (; i < a->segment_length; i++, curr_offset++, prev_offset++)
+    {
+      u64 *ref_block, *curr_block;
+      u64 rand64;
+
+      if ((curr_offset % a->lane_length) == 1)
+        prev_offset = curr_offset - 1;
+
+      if (random_block)
+        {
+          if ((i % (1024/sizeof (u64))) == 0)
+            pseudo_random_generate (random_block, input_block);
+
+          rand64 = random_block[(i% (1024/sizeof (u64)))];
+        }
+      else
+        rand64 = a->block[prev_offset*ARGON2_WORDS_IN_BLOCK];
+
+      if (t->pass == 0 && t->slice == 0)
+        ref_lane = t->lane;
+      else
+        ref_lane = (rand64 >> 32) % a->lanes;
+
+      ref_index = index_alpha (a, t, i, (rand64 & 0xffffffff),
+                               ref_lane == t->lane);
+      ref_block =
+        &a->block[(a->lane_length * ref_lane + ref_index)* 
ARGON2_WORDS_IN_BLOCK];
+
+      curr_block = &a->block[curr_offset * ARGON2_WORDS_IN_BLOCK];
+      fill_block (&a->block[prev_offset * ARGON2_WORDS_IN_BLOCK], ref_block,
+                  curr_block, t->pass != 0);
+    }
+}
+
+
+static gpg_err_code_t
+argon2_compute (argon2_ctx_t a, const struct gcry_kdf_thread_ops *ops)
+{
+  gpg_err_code_t ec;
+  unsigned int r;
+  unsigned int s;
+  unsigned int l;
+  int ret;
+
+  ec = argon2_fill_first_blocks (a);
+  if (ec)
+    return ec;
+
+  for (r = 0; r < a->passes; r++)
+    for (s = 0; s < 4; s++)
+      {
+        for (l = 0; l < a->lanes; l++)
+          {
+            struct argon2_thread_data *thread_data;
+
+            /* launch a thread.  */
+            thread_data = &a->thread_data[l];
+            thread_data->a = a;
+            thread_data->pass = r;
+            thread_data->slice = s;
+            thread_data->lane = l;
+
+            if (ops)
+             {
+               ret = ops->dispatch_job (ops->jobs_context,
+                                        argon2_compute_segment, thread_data);
+               if (ret < 0)
+                 return GPG_ERR_CANCELED;
+             }
+            else
+              argon2_compute_segment (thread_data);
+          }
+
+        if (ops)
+         {
+           ret = ops->wait_all_jobs (ops->jobs_context);
+           if (ret < 0)
+             return GPG_ERR_CANCELED;
+         }
+      }
+
+  return 0;
+}
+
+
+static gpg_err_code_t
+argon2_final (argon2_ctx_t a, size_t resultlen, void *result)
+{
+  int i;
+
+  if (resultlen != a->outlen)
+    return GPG_ERR_INV_VALUE;
+
+  memset (a->block, 0, 1024);
+  for (i = 0; i < a->lanes; i++)
+    {
+      u64 *last_block;
+
+      last_block = &a->block[(a->lane_length * i + (a->lane_length - 1))
+                             * ARGON2_WORDS_IN_BLOCK];
+      xor_block (a->block, last_block);
+    }
+
+  beswap64_block (a->block);
+  blake2b_vl_hash (a->block, 1024, a->outlen, result);
+  return 0;
+}
+
+static void
+argon2_close (argon2_ctx_t a)
+{
+  size_t n;
+
+  n = offsetof (struct argon2_context, out) + a->outlen;
+
+  if (a->block)
+    {
+      wipememory (a->block, 1024 * a->memory_blocks);
+      xfree (a->block);
+    }
+
+  if (a->thread_data)
+    xfree (a->thread_data);
+
+  wipememory (a, n);
+  xfree (a);
+}
+
+static gpg_err_code_t
+argon2_open (gcry_kdf_hd_t *hd, int subalgo,
+             const unsigned long *param, unsigned int paramlen,
+             const void *password, size_t passwordlen,
+             const void *salt, size_t saltlen,
+             const void *key, size_t keylen,
+             const void *ad, size_t adlen)
+{
+  int hash_type;
+  unsigned int taglen;
+  unsigned int t_cost;
+  unsigned int m_cost;
+  unsigned int parallelism = 1;
+  argon2_ctx_t a;
+  gpg_err_code_t ec;
+  size_t n;
+
+  if (subalgo != GCRY_KDF_ARGON2D
+      && subalgo != GCRY_KDF_ARGON2I
+      && subalgo != GCRY_KDF_ARGON2ID)
+    return GPG_ERR_INV_VALUE;
+  else
+    hash_type = subalgo;
+
+  /* param : [ tag_length, t_cost, m_cost, parallelism ] */
+  if (paramlen < 3 || paramlen > 4)
+    return GPG_ERR_INV_VALUE;
+  else
+    {
+      taglen = (unsigned int)param[0];
+      t_cost = (unsigned int)param[1];
+      m_cost = (unsigned int)param[2];
+      if (paramlen >= 4)
+        parallelism = (unsigned int)param[3];
+    }
+
+  if (parallelism == 0)
+    return GPG_ERR_INV_VALUE;
+
+  n = offsetof (struct argon2_context, out) + taglen;
+  a = xtrymalloc (n);
+  if (!a)
+    return gpg_err_code_from_errno (errno);
+
+  a->algo = GCRY_KDF_ARGON2;
+  a->hash_type = hash_type;
+
+  a->outlen = taglen;
+
+  a->password = password;
+  a->passwordlen = passwordlen;
+  a->salt = salt;
+  a->saltlen = saltlen;
+  a->key = key;
+  a->keylen = keylen;
+  a->ad = ad;
+  a->adlen = adlen;
+
+  a->m_cost = m_cost;
+
+  a->block = NULL;
+  a->thread_data = NULL;
+
+  ec = argon2_init (a, parallelism, m_cost, t_cost);
+  if (ec)
+    {
+      xfree (a);
+      return ec;
+    }
+
+  *hd = (void *)a;
+  return 0;
+}
+
+
+static gpg_err_code_t
+balloon_open (gcry_kdf_hd_t *hd, int subalgo,
+              const unsigned long *param, unsigned int paramlen,
+              const void *passphrase, size_t passphraselen,
+              const void *salt, size_t saltlen)
+{
+  /*
+   * It should have space_cost and time_cost.
+   * Optionally, for parallelised version, it has parallelism.
+   */
+  if (paramlen != 2 && paramlen != 3)
+    return GPG_ERR_INV_VALUE;
+
+  (void)param;
+  (void)subalgo;
+  (void)passphrase;
+  (void)passphraselen;
+  (void)salt;
+  (void)saltlen;
+  *hd = NULL;
+  return GPG_ERR_NOT_IMPLEMENTED;
+}
+
+
+struct gcry_kdf_handle {
+  int algo;
+  /* And algo specific parts come.  */
+};
+
+gpg_err_code_t
+_gcry_kdf_open (gcry_kdf_hd_t *hd, int algo, int subalgo,
+                const unsigned long *param, unsigned int paramlen,
+                const void *passphrase, size_t passphraselen,
+                const void *salt, size_t saltlen,
+                const void *key, size_t keylen,
+                const void *ad, size_t adlen)
+{
+  gpg_err_code_t ec;
+
+  switch (algo)
+    {
+    case GCRY_KDF_ARGON2:
+      if (!passphraselen || !saltlen)
+        ec = GPG_ERR_INV_VALUE;
+      else
+        ec = argon2_open (hd, subalgo, param, paramlen,
+                          passphrase, passphraselen, salt, saltlen,
+                          key, keylen, ad, adlen);
+      break;
+
+    case GCRY_KDF_BALLOON:
+      if (!passphraselen || !saltlen)
+        ec = GPG_ERR_INV_VALUE;
+      else
+        {
+          (void)key;
+          (void)keylen;
+          (void)ad;
+          (void)adlen;
+          ec = balloon_open (hd, subalgo, param, paramlen,
+                             passphrase, passphraselen, salt, saltlen);
+        }
+      break;
+
+    default:
+      ec = GPG_ERR_UNKNOWN_ALGORITHM;
+      break;
+    }
+
+  return ec;
+}
+
+gpg_err_code_t
+_gcry_kdf_compute (gcry_kdf_hd_t h, const struct gcry_kdf_thread_ops *ops)
+{
+  gpg_err_code_t ec;
+
+  switch (h->algo)
+    {
+    case GCRY_KDF_ARGON2:
+      ec = argon2_compute ((argon2_ctx_t)(void *)h, ops);
+      break;
+
+    default:
+      ec = GPG_ERR_UNKNOWN_ALGORITHM;
+      break;
+    }
+
+  return ec;
+}
+
+
+gpg_err_code_t
+_gcry_kdf_final (gcry_kdf_hd_t h, size_t resultlen, void *result)
+{
+  gpg_err_code_t ec;
+
+  switch (h->algo)
+    {
+    case GCRY_KDF_ARGON2:
+      ec = argon2_final ((argon2_ctx_t)(void *)h, resultlen, result);
+      break;
+
+    default:
+      ec = GPG_ERR_UNKNOWN_ALGORITHM;
+      break;
+    }
+
+  return ec;
+}
+
+void
+_gcry_kdf_close (gcry_kdf_hd_t h)
+{
+  switch (h->algo)
+    {
+    case GCRY_KDF_ARGON2:
+      argon2_close ((argon2_ctx_t)(void *)h);
+      break;
+
+    default:
+      break;
+    }
+}
+
+/* Check one KDF call with ALGO and HASH_ALGO using the regular KDF
+ * API. (passphrase,passphraselen) is the password to be derived,
+ * (salt,saltlen) the salt for the key derivation,
+ * iterations is the number of the kdf iterations,
+ * and (expect,expectlen) the expected result. Returns NULL on
+ * success or a string describing the failure.  */
+
+static const char *
+check_one (int algo, int hash_algo,
+           const void *passphrase, size_t passphraselen,
+           const void *salt, size_t saltlen,
+           unsigned long iterations,
+           const void *expect, size_t expectlen)
+{
+  unsigned char key[512]; /* hardcoded to avoid allocation */
+  size_t keysize = expectlen;
+  int rv;
+
+  if (keysize > sizeof(key))
+    return "invalid tests data";
+
+  rv = _gcry_kdf_derive (passphrase, passphraselen, algo,
+                         hash_algo, salt, saltlen, iterations,
+                         keysize, key);
+  /* In fips mode we have special requirements for the input and
+   * output parameters */
+  if (fips_mode ())
+    {
+      if (rv && (passphraselen < 8 || saltlen < 16 ||
+                 iterations < 1000 || expectlen < 14))
+        return NULL;
+      else if (rv)
+        return "gcry_kdf_derive unexpectedly failed in FIPS Mode";
+    }
+  else if (rv)
+    return "gcry_kdf_derive failed";
+
+  if (memcmp (key, expect, expectlen))
+    return "does not match";
+
+  return NULL;
+}
+
+
+static gpg_err_code_t
+selftest_pbkdf2 (int extended, selftest_report_func_t report)
+{
+  static const struct {
+    const char *desc;
+    const char *p;   /* Passphrase.  */
+    size_t plen;     /* Length of P. */
+    const char *salt;
+    size_t saltlen;
+    int hashalgo;
+    unsigned long c; /* Iterations.  */
+    int dklen;       /* Requested key length.  */
+    const char *dk;  /* Derived key.  */
+    int disabled;
+  } tv[] = {
+#if USE_SHA1
+#define NUM_TEST_VECTORS 9
+    /* SHA1 test vectors are from RFC-6070.  */
+    {
+      "Basic PBKDF2 SHA1 #1",
+      "password", 8,
+      "salt", 4,
+      GCRY_MD_SHA1,
+      1,
+      20,
+      "\x0c\x60\xc8\x0f\x96\x1f\x0e\x71\xf3\xa9"
+      "\xb5\x24\xaf\x60\x12\x06\x2f\xe0\x37\xa6"
+    },
+    {
+      "Basic PBKDF2 SHA1 #2",
+      "password", 8,
+      "salt", 4,
+      GCRY_MD_SHA1,
+      2,
+      20,
+      "\xea\x6c\x01\x4d\xc7\x2d\x6f\x8c\xcd\x1e"
+      "\xd9\x2a\xce\x1d\x41\xf0\xd8\xde\x89\x57"
+    },
+    {
+      "Basic PBKDF2 SHA1 #3",
+      "password", 8,
+      "salt", 4,
+      GCRY_MD_SHA1,
+      4096,
+      20,
+      "\x4b\x00\x79\x01\xb7\x65\x48\x9a\xbe\xad"
+      "\x49\xd9\x26\xf7\x21\xd0\x65\xa4\x29\xc1"
+    },
+    {
+      "Basic PBKDF2 SHA1 #4",
+      "password", 8,
+      "salt", 4,
+      GCRY_MD_SHA1,
+      16777216,
+      20,
+      "\xee\xfe\x3d\x61\xcd\x4d\xa4\xe4\xe9\x94"
+      "\x5b\x3d\x6b\xa2\x15\x8c\x26\x34\xe9\x84",
+      1 /* This test takes too long.  */
+    },
+    {
+      "Basic PBKDF2 SHA1 #5",
+      "passwordPASSWORDpassword", 24,
+      "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36,
+      GCRY_MD_SHA1,
+      4096,
+      25,
+      "\x3d\x2e\xec\x4f\xe4\x1c\x84\x9b\x80\xc8"
+      "\xd8\x36\x62\xc0\xe4\x4a\x8b\x29\x1a\x96"
+      "\x4c\xf2\xf0\x70\x38"
+    },
+    {
+      "Basic PBKDF2 SHA1 #6",
+      "pass\0word", 9,
+      "sa\0lt", 5,
+      GCRY_MD_SHA1,
+      4096,
+      16,
+      "\x56\xfa\x6a\xa7\x55\x48\x09\x9d\xcc\x37"
+      "\xd7\xf0\x34\x25\xe0\xc3"
+    },
+    { /* empty password test, not in RFC-6070 */
+      "Basic PBKDF2 SHA1 #7",
+      "", 0,
+      "salt", 4,
+      GCRY_MD_SHA1,
+      2,
+      20,
+      "\x13\x3a\x4c\xe8\x37\xb4\xd2\x52\x1e\xe2"
+      "\xbf\x03\xe1\x1c\x71\xca\x79\x4e\x07\x97"
+    },
+#else
+#define NUM_TEST_VECTORS 2
+#endif
+    {
+      "Basic PBKDF2 SHA256",
+      "password", 8,
+      "salt", 4,
+      GCRY_MD_SHA256,
+      2,
+      32,
+      "\xae\x4d\x0c\x95\xaf\x6b\x46\xd3\x2d\x0a\xdf\xf9\x28\xf0\x6d\xd0"
+      "\x2a\x30\x3f\x8e\xf3\xc2\x51\xdf\xd6\xe2\xd8\x5a\x95\x47\x4c\x43"
+    },
+    {
+      "Extended PBKDF2 SHA256",
+      "passwordPASSWORDpassword", 24,
+      "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36,
+      GCRY_MD_SHA256,
+      4096,
+      40,
+      "\x34\x8c\x89\xdb\xcb\xd3\x2b\x2f\x32\xd8\x14\xb8\x11\x6e\x84\xcf"
+      "\x2b\x17\x34\x7e\xbc\x18\x00\x18\x1c\x4e\x2a\x1f\xb8\xdd\x53\xe1"
+      "\xc6\x35\x51\x8c\x7d\xac\x47\xe9"
+    },
+    { NULL }
+  };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+
+  for (tvidx=0; tv[tvidx].desc; tvidx++)
+    {
+      what = tv[tvidx].desc;
+      if (tv[tvidx].disabled)
+        continue;
+      errtxt = check_one (GCRY_KDF_PBKDF2, tv[tvidx].hashalgo,
+                          tv[tvidx].p, tv[tvidx].plen,
+                          tv[tvidx].salt, tv[tvidx].saltlen,
+                          tv[tvidx].c,
+                          tv[tvidx].dk, tv[tvidx].dklen);
+      if (errtxt)
+        goto failed;
+      if (tvidx >= NUM_TEST_VECTORS - 1 && !extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("kdf", GCRY_KDF_PBKDF2, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run the selftests for KDF with KDF algorithm ALGO with optional
+   reporting function REPORT.  */
+gpg_error_t
+_gcry_kdf_selftest (int algo, int extended, selftest_report_func_t report)
+{
+  gcry_err_code_t ec = 0;
+
+  if (algo == GCRY_KDF_PBKDF2)
+    ec = selftest_pbkdf2 (extended, report);
+  else
+    {
+      ec = GPG_ERR_UNSUPPORTED_ALGORITHM;
+      if (report)
+        report ("kdf", algo, "module", "algorithm not available");
+    }
   return gpg_error (ec);
 }
diff --git a/grub-core/lib/libgcrypt/cipher/keccak-armv7-neon.S 
b/grub-core/lib/libgcrypt/cipher/keccak-armv7-neon.S
new file mode 100644
index 000000000..28a284a1e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/keccak-armv7-neon.S
@@ -0,0 +1,945 @@
+/* keccak-armv7-neon.S  -  ARMv7/NEON implementation of Keccak
+ *
+ * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON)
+
+/* Based on public-domain/CC0 implementation from SUPERCOP package
+ * (keccakc1024/inplace-armv7a-neon/keccak2.s)
+ *
+ * Original copyright header follows:
+ */
+
+@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
+@ Michaël Peeters and Gilles Van Assche. For more information, feedback or
+@ questions, please refer to our website: http://keccak.noekeon.org/
+@
+@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
+@
+@ To the extent possible under law, the implementer has waived all copyright
+@ and related or neighboring rights to the source code in this file.
+@ http://creativecommons.org/publicdomain/zero/1.0/
+
+.text
+
+.syntax unified
+.fpu neon
+.arm
+
+
+.extern _gcry_keccak_round_consts_64bit;
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+@//  --- offsets in state
+.equ Aba, 0*8
+.equ Aga, 1*8
+.equ Aka, 2*8
+.equ Ama, 3*8
+.equ Asa, 4*8
+
+@// --- macros
+
+.macro    KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
+
+    @Prepare Theta
+    @Ca = Aba^Aga^Aka^Ama^Asa@
+    @Ce = Abe^Age^Ake^Ame^Ase@
+    @Ci = Abi^Agi^Aki^Ami^Asi@
+    @Co = Abo^Ago^Ako^Amo^Aso@
+    @Cu = Abu^Agu^Aku^Amu^Asu@
+    @De = Ca^ROL64(Ci, 1)@
+    @Di = Ce^ROL64(Co, 1)@
+    @Do = Ci^ROL64(Cu, 1)@
+    @Du = Co^ROL64(Ca, 1)@
+    @Da = Cu^ROL64(Ce, 1)@
+
+    veor.64 q4, q6, q7
+    veor.64 q5, q9, q10
+    veor.64 d8,  d8,   d9
+    veor.64 d10,  d10,   d11
+    veor.64 d1,  d8,   d16
+    veor.64 d2,  d10,   d17
+
+    veor.64 q4, q11, q12
+    veor.64 q5, q14, q15
+    veor.64 d8,  d8,   d9
+    veor.64 d10,  d10,   d11
+    veor.64 d3,  d8,   d26
+
+    vadd.u64 q4, q1, q1
+    veor.64 d4,  d10,   d27
+    vmov.64  d0, d5
+    vsri.64 q4, q1, #63
+
+    vadd.u64 q5, q2, q2
+    veor.64 q4, q4, q0
+    vsri.64 q5, q2, #63
+    vadd.u64 d7, d1, d1
+    veor.64 \argA2, \argA2, d8
+    veor.64 q5, q5, q1
+
+    vsri.64 d7, d1, #63
+    vshl.u64 d1, \argA2, #44
+    veor.64 \argA3, \argA3, d9
+    veor.64 d7, d7, d4
+
+    @Ba = argA1^Da@
+    @Be = ROL64((argA2^De), 44)@
+    @Bi = ROL64((argA3^Di), 43)@
+    @Bo = ROL64((argA4^Do), 21)@
+    @Bu = ROL64((argA5^Du), 14)@
+    @argA2 =   Be ^((~Bi)& Bo )@
+    @argA3 =   Bi ^((~Bo)& Bu )@
+    @argA4 =   Bo ^((~Bu)& Ba )@
+    @argA5 =   Bu ^((~Ba)& Be )@
+    @argA1 =   Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@
+    vsri.64 d1, \argA2, #64-44
+    vshl.u64 d2, \argA3, #43
+    vldr.64 d0, [sp, #\argA1]
+    veor.64 \argA4, \argA4, d10
+    vsri.64 d2, \argA3, #64-43
+    vshl.u64 d3, \argA4, #21
+    veor.64 \argA5, \argA5, d11
+    veor.64 d0, d0, d7
+    vsri.64 d3, \argA4, #64-21
+    vbic.64   d5, d2, d1
+    vshl.u64 d4, \argA5, #14
+    vbic.64   \argA2, d3, d2
+    vld1.64   d6, [ip]!
+    veor.64   d5, d0
+    vsri.64 d4, \argA5, #64-14
+    veor.64   d5, d6
+    vbic.64   \argA5, d1, d0
+    vbic.64   \argA3, d4, d3
+    vbic.64   \argA4, d0, d4
+    veor.64   \argA2, d1
+    vstr.64   d5, [sp, #\argA1]
+    veor.64   \argA3, d2
+    veor.64   \argA4, d3
+    veor.64   \argA5, d4
+
+    .endm
+
+.macro    KeccakThetaRhoPiChi1   argA1, argA2, argA3, argA4, argA5
+
+    @d2 = ROL64((argA1^Da), 3)@
+    @d3 = ROL64((argA2^De), 45)@
+    @d4 = ROL64((argA3^Di), 61)@
+    @d0 = ROL64((argA4^Do), 28)@
+    @d1 = ROL64((argA5^Du), 20)@
+    @argA1 =   Ba ^((~Be)&  Bi )@ Ca ^= argA1@
+    @argA2 =   Be ^((~Bi)&  Bo )@
+    @argA3 =   Bi ^((~Bo)&  Bu )@
+    @argA4 =   Bo ^((~Bu)&  Ba )@
+    @argA5 =   Bu ^((~Ba)&  Be )@
+
+    veor.64 \argA2, \argA2, d8
+    veor.64 \argA3, \argA3, d9
+    vshl.u64  d3, \argA2, #45
+    vldr.64 d6, [sp, #\argA1]
+    vshl.u64  d4, \argA3, #61
+    veor.64 \argA4, \argA4, d10
+    vsri.64  d3, \argA2, #64-45
+    veor.64 \argA5, \argA5, d11
+    vsri.64  d4, \argA3, #64-61
+    vshl.u64  d0, \argA4, #28
+    veor.64 d6, d6, d7
+    vshl.u64  d1, \argA5, #20
+    vbic.64   \argA3, d4, d3
+    vsri.64  d0, \argA4, #64-28
+    vbic.64   \argA4, d0, d4
+    vshl.u64  d2, d6, #3
+    vsri.64  d1, \argA5, #64-20
+    veor.64   \argA4, d3
+    vsri.64  d2, d6, #64-3
+    vbic.64   \argA5, d1, d0
+    vbic.64   d6, d2, d1
+    vbic.64   \argA2, d3, d2
+    veor.64   d6, d0
+    veor.64   \argA2, d1
+    vstr.64   d6, [sp, #\argA1]
+    veor.64   \argA3, d2
+    veor.64  d5, d6
+    veor.64   \argA5, d4
+
+    .endm
+
+.macro    KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
+
+    @d4 = ROL64((argA1^Da), 18)@
+    @d0 = ROL64((argA2^De), 1)@
+    @d1 = ROL64((argA3^Di), 6)@
+    @d2 = ROL64((argA4^Do), 25)@
+    @d3 = ROL64((argA5^Du), 8)@
+    @argA1 =   Ba ^((~Be)&  Bi )@ Ca ^= argA1@
+    @argA2 =   Be ^((~Bi)&  Bo )@
+    @argA3 =   Bi ^((~Bo)&  Bu )@
+    @argA4 =   Bo ^((~Bu)&  Ba )@
+    @argA5 =   Bu ^((~Ba)&  Be )@
+
+    veor.64 \argA3, \argA3, d9
+    veor.64 \argA4, \argA4, d10
+    vshl.u64  d1, \argA3, #6
+    vldr.64 d6, [sp, #\argA1]
+    vshl.u64  d2, \argA4, #25
+    veor.64 \argA5, \argA5, d11
+    vsri.64  d1, \argA3, #64-6
+    veor.64 \argA2, \argA2, d8
+    vsri.64  d2, \argA4, #64-25
+    vext.8  d3, \argA5, \argA5, #7
+    veor.64 d6, d6, d7
+    vbic.64  \argA3, d2, d1
+    vadd.u64  d0, \argA2, \argA2
+    vbic.64   \argA4, d3, d2
+    vsri.64  d0, \argA2, #64-1
+    vshl.u64  d4, d6, #18
+    veor.64  \argA2, d1, \argA4
+    veor.64  \argA3, d0
+    vsri.64  d4, d6, #64-18
+    vstr.64   \argA3, [sp, #\argA1]
+    veor.64  d5, \argA3
+    vbic.64   \argA5, d1, d0
+    vbic.64   \argA3, d4, d3
+    vbic.64   \argA4, d0, d4
+    veor.64   \argA3, d2
+    veor.64   \argA4, d3
+    veor.64   \argA5, d4
+
+    .endm
+
+.macro    KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
+
+    @d1 = ROL64((argA1^Da), 36)@
+    @d2 = ROL64((argA2^De), 10)@
+    @d3 = ROL64((argA3^Di), 15)@
+    @d4 = ROL64((argA4^Do), 56)@
+    @d0 = ROL64((argA5^Du), 27)@
+    @argA1 =   Ba ^((~Be)&  Bi )@ Ca ^= argA1@
+    @argA2 =   Be ^((~Bi)&  Bo )@
+    @argA3 =   Bi ^((~Bo)&  Bu )@
+    @argA4 =   Bo ^((~Bu)&  Ba )@
+    @argA5 =   Bu ^((~Ba)&  Be )@
+
+    veor.64 \argA2, \argA2, d8
+    veor.64 \argA3, \argA3, d9
+    vshl.u64  d2, \argA2, #10
+    vldr.64 d6, [sp, #\argA1]
+    vshl.u64  d3, \argA3, #15
+    veor.64 \argA4, \argA4, d10
+    vsri.64  d2, \argA2, #64-10
+    vsri.64  d3, \argA3, #64-15
+    veor.64 \argA5, \argA5, d11
+    vext.8  d4, \argA4, \argA4, #1
+    vbic.64   \argA2, d3, d2
+    vshl.u64  d0, \argA5, #27
+    veor.64 d6, d6, d7
+    vbic.64   \argA3, d4, d3
+    vsri.64  d0, \argA5, #64-27
+    vshl.u64  d1, d6, #36
+    veor.64   \argA3, d2
+    vbic.64   \argA4, d0, d4
+    vsri.64  d1, d6, #64-36
+
+    veor.64   \argA4, d3
+    vbic.64   d6, d2, d1
+    vbic.64   \argA5, d1, d0
+    veor.64   d6, d0
+    veor.64   \argA2, d1
+    vstr.64   d6, [sp, #\argA1]
+    veor.64  d5, d6
+    veor.64   \argA5, d4
+
+    .endm
+
+.macro    KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
+
+    @d3 = ROL64((argA1^Da), 41)@
+    @d4 = ROL64((argA2^De), 2)@
+    @d0 = ROL64((argA3^Di), 62)@
+    @d1 = ROL64((argA4^Do), 55)@
+    @d2 = ROL64((argA5^Du), 39)@
+    @argA1 =   Ba ^((~Be)&  Bi )@ Ca ^= argA1@
+    @argA2 =   Be ^((~Bi)&  Bo )@
+    @argA3 =   Bi ^((~Bo)&  Bu )@
+    @argA4 =   Bo ^((~Bu)&  Ba )@
+    @argA5 =   Bu ^((~Ba)&  Be )@
+
+    veor.64 \argA2, \argA2, d8
+    veor.64 \argA3, \argA3, d9
+    vshl.u64  d4, \argA2, #2
+    veor.64 \argA5, \argA5, d11
+    vshl.u64  d0, \argA3, #62
+    vldr.64 d6, [sp, #\argA1]
+    vsri.64  d4, \argA2, #64-2
+    veor.64 \argA4, \argA4, d10
+    vsri.64  d0, \argA3, #64-62
+
+    vshl.u64  d1, \argA4, #55
+    veor.64 d6, d6, d7
+    vshl.u64  d2, \argA5, #39
+    vsri.64  d1, \argA4, #64-55
+    vbic.64  \argA4, d0, d4
+    vsri.64  d2, \argA5, #64-39
+    vbic.64  \argA2, d1, d0
+    vshl.u64  d3, d6, #41
+    veor.64  \argA5, d4, \argA2
+    vbic.64  \argA2, d2, d1
+    vsri.64  d3, d6, #64-41
+    veor.64  d6, d0, \argA2
+
+    vbic.64 \argA2, d3, d2
+    vbic.64 \argA3, d4, d3
+    veor.64 \argA2, d1
+    vstr.64 d6, [sp, #\argA1]
+    veor.64 d5, d6
+    veor.64 \argA3, d2
+    veor.64 \argA4, d3
+
+    .endm
+
+
+@// --- code
+
+@not callable from C!
+.p2align 3
+.type  KeccakF_armv7a_neon_asm,%function;
+KeccakF_armv7a_neon_asm:  @
+
+.LroundLoop:
+
+    KeccakThetaRhoPiChiIota  Aba, d13, d19, d25, d31
+    KeccakThetaRhoPiChi1    Aka, d15, d21, d22, d28
+    KeccakThetaRhoPiChi2    Asa, d12, d18, d24, d30
+    KeccakThetaRhoPiChi3    Aga, d14, d20, d26, d27
+    KeccakThetaRhoPiChi4    Ama, d16, d17, d23, d29
+
+    KeccakThetaRhoPiChiIota  Aba, d15, d18, d26, d29
+    KeccakThetaRhoPiChi1    Asa, d14, d17, d25, d28
+    KeccakThetaRhoPiChi2    Ama, d13, d21, d24, d27
+    KeccakThetaRhoPiChi3    Aka, d12, d20, d23, d31
+    KeccakThetaRhoPiChi4    Aga, d16, d19, d22, d30
+
+    KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30
+    KeccakThetaRhoPiChi1    Ama, d12, d19, d26, d28
+    KeccakThetaRhoPiChi2    Aga, d15, d17, d24, d31
+    KeccakThetaRhoPiChi3    Asa, d13, d20, d22, d29
+    KeccakThetaRhoPiChi4    Aka, d16, d18, d25, d27
+
+    KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27
+    KeccakThetaRhoPiChi1    Aga, d13, d18, d23, d28
+    KeccakThetaRhoPiChi2    Aka, d14, d19, d24, d29
+    ldr    r0, [ip]
+    KeccakThetaRhoPiChi3    Ama, d15, d20, d25, d30
+    cmp    r0, #0xFFFFFFFF
+    KeccakThetaRhoPiChi4    Asa, d16, d21, d26, d31
+
+    bne    .LroundLoop
+    sub    ip, #(8*24)
+    bx    lr
+.p2align 2
+.ltorg
+.size KeccakF_armv7a_neon_asm,.-KeccakF_armv7a_neon_asm;
+
+
+@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state)  callable from C
+.p2align 3
+.global   _gcry_keccak_permute_armv7_neon
+.type  _gcry_keccak_permute_armv7_neon,%function;
+_gcry_keccak_permute_armv7_neon:
+
+    push   {ip, lr}
+    vpush  {q4-q7}
+    sub    sp,sp, #5*8
+
+    vldr.64  d0,  [r0, #0*8]
+    vldr.64  d12, [r0, #1*8]
+    vldr.64  d17, [r0, #2*8]
+    vldr.64  d22, [r0, #3*8]
+    vldr.64  d27, [r0, #4*8]
+
+    GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr);
+
+    vldr.64  d1,  [r0, #5*8]
+    vldr.64  d13, [r0, #6*8]
+    vldr.64  d18, [r0, #7*8]
+    vldr.64  d23, [r0, #8*8]
+    vldr.64  d28, [r0, #9*8]
+
+    vldr.64  d2,  [r0, #10*8]
+    vldr.64  d14, [r0, #11*8]
+    vldr.64  d19, [r0, #12*8]
+    vldr.64  d24, [r0, #13*8]
+    vldr.64  d29, [r0, #14*8]
+
+    vldr.64  d3,  [r0, #15*8]
+    vldr.64  d15, [r0, #16*8]
+    vldr.64  d20, [r0, #17*8]
+    vldr.64  d25, [r0, #18*8]
+    vldr.64  d30, [r0, #19*8]
+
+    vldr.64  d4,  [r0, #20*8]
+    vldr.64  d16, [r0, #21*8]
+    vldr.64  d21, [r0, #22*8]
+    vldr.64  d26, [r0, #23*8]
+    vldr.64  d31, [r0, #24*8]
+
+    vstr.64  d0, [sp, #Aba]
+    vstr.64  d1, [sp, #Aga]
+    veor.64 q0, q0, q1
+    vstr.64  d2, [sp, #Aka]
+    veor.64 d5, d0,  d1
+    vstr.64  d3, [sp, #Ama]
+    mov      r1, r0
+    vstr.64  d4, [sp, #Asa]
+    veor.64 d5, d5,  d4
+
+    bl KeccakF_armv7a_neon_asm
+
+    vpop.64  { d0- d4 }
+
+    vstr.64  d0,  [r1, #0*8]
+    vstr.64  d12, [r1, #1*8]
+    vstr.64  d17, [r1, #2*8]
+    vstr.64  d22, [r1, #3*8]
+    vstr.64  d27, [r1, #4*8]
+
+    vstr.64  d1,  [r1, #5*8]
+    vstr.64  d13, [r1, #6*8]
+    vstr.64  d18, [r1, #7*8]
+    vstr.64  d23, [r1, #8*8]
+    vstr.64  d28, [r1, #9*8]
+
+    vstr.64  d2,  [r1, #10*8]
+    vstr.64  d14, [r1, #11*8]
+    vstr.64  d19, [r1, #12*8]
+    vstr.64  d24, [r1, #13*8]
+    vstr.64  d29, [r1, #14*8]
+
+    vstr.64  d3,  [r1, #15*8]
+    vstr.64  d15, [r1, #16*8]
+    vstr.64  d20, [r1, #17*8]
+    vstr.64  d25, [r1, #18*8]
+    vstr.64  d30, [r1, #19*8]
+
+    vstr.64  d4,  [r1, #20*8]
+    vstr.64  d16, [r1, #21*8]
+    vstr.64  d21, [r1, #22*8]
+    vstr.64  d26, [r1, #23*8]
+    vstr.64  d31, [r1, #24*8]
+
+    mov   r0, #112
+    vpop  {q4-q7}
+    pop   {ip, pc}
+.p2align 2
+.ltorg
+.size _gcry_keccak_permute_armv7_neon,.-_gcry_keccak_permute_armv7_neon;
+
+@//unsigned _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, @r4
+@                                              int pos,    @r1
+@                                              const byte *lanes,   @r2
+@                                              size_t nlanes, @r3
+@                                              int blocklanes) @ r5 callable 
from C
+.p2align 3
+.global   _gcry_keccak_absorb_lanes64_armv7_neon
+.type  _gcry_keccak_absorb_lanes64_armv7_neon,%function;
+_gcry_keccak_absorb_lanes64_armv7_neon:
+
+    cmp    r3, #0      @ nlanes == 0
+    itt eq
+    moveq  r0, #0
+    bxeq   lr
+
+    push   {r4-r5, ip, lr}
+    beq    .Lout
+    mov    r4, r0
+    ldr    r5, [sp, #(4*4)]
+    vpush  {q4-q7}
+
+    @ load state
+    vldr.64  d0,  [r4, #0*8]
+    vldr.64  d12, [r4, #1*8]
+    vldr.64  d17, [r4, #2*8]
+    vldr.64  d22, [r4, #3*8]
+    vldr.64  d27, [r4, #4*8]
+
+    GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr);
+
+    vldr.64  d1,  [r4, #5*8]
+    vldr.64  d13, [r4, #6*8]
+    vldr.64  d18, [r4, #7*8]
+    vldr.64  d23, [r4, #8*8]
+    vldr.64  d28, [r4, #9*8]
+
+    vldr.64  d2,  [r4, #10*8]
+    vldr.64  d14, [r4, #11*8]
+    vldr.64  d19, [r4, #12*8]
+    vldr.64  d24, [r4, #13*8]
+    vldr.64  d29, [r4, #14*8]
+
+    vldr.64  d3,  [r4, #15*8]
+    vldr.64  d15, [r4, #16*8]
+    vldr.64  d20, [r4, #17*8]
+    vldr.64  d25, [r4, #18*8]
+    vldr.64  d30, [r4, #19*8]
+
+    vldr.64  d4,  [r4, #20*8]
+    vldr.64  d16, [r4, #21*8]
+    vldr.64  d21, [r4, #22*8]
+    vldr.64  d26, [r4, #23*8]
+    vldr.64  d31, [r4, #24*8]
+
+.Lmain_loop:
+
+    @ detect absorb mode (full blocks vs lanes)
+
+    cmp r1, #0         @ pos != 0
+    bne .Llanes_loop
+
+.Lmain_loop_pos0:
+
+    @ full blocks mode
+
+    @ switch (blocksize)
+    cmp r5, #21
+    beq .Lfull_block_21
+    cmp r5, #18
+    beq .Lfull_block_18
+    cmp r5, #17
+    beq .Lfull_block_17
+    cmp r5, #13
+    beq .Lfull_block_13
+    cmp r5, #9
+    beq .Lfull_block_9
+
+    @ unknown blocksize
+    b .Llanes_loop
+
+.Lfull_block_21:
+
+    @ SHAKE128
+
+    cmp r3, #21                @ nlanes < blocklanes
+    blo .Llanes_loop
+
+    sub    sp,sp, #5*8
+
+    vld1.64 {d5-d8}, [r2]!
+    veor d0,  d5
+    vld1.64 {d9-d11}, [r2]!
+    veor d12, d6
+    veor d17, d7
+    veor d22, d8
+    vld1.64 {d5-d8}, [r2]!
+    veor d27, d9
+
+    veor d1,  d10
+    veor d13, d11
+    vld1.64 {d9-d11}, [r2]!
+    veor d18, d5
+    veor d23, d6
+    veor d28, d7
+
+    veor d2,  d8
+    vld1.64 {d5-d8}, [r2]!
+    veor d14, d9
+    veor d19, d10
+    veor d24, d11
+    vld1.64 {d9-d11}, [r2]!
+    veor d29, d5
+
+    veor d3,  d6
+    veor d15, d7
+    veor d20, d8
+    veor d25, d9
+    veor d30, d10
+
+    veor d4,  d11
+
+    vstr.64  d0, [sp, #Aba]
+    vstr.64  d1, [sp, #Aga]
+    veor.64 q0, q0, q1
+    vstr.64  d2, [sp, #Aka]
+    veor.64 d5, d0,  d1
+    vstr.64  d3, [sp, #Ama]
+    vstr.64  d4, [sp, #Asa]
+    veor.64 d5, d5,  d4
+
+    bl KeccakF_armv7a_neon_asm
+
+    subs r3, #21       @ nlanes -= 21
+    vpop.64  { d0-d4 }
+
+    beq .Ldone
+
+    b .Lfull_block_21
+
+.Lfull_block_18:
+
+    @ SHA3-224
+
+    cmp r3, #18                @ nlanes < blocklanes
+    blo .Llanes_loop
+
+    sub    sp,sp, #5*8
+
+    vld1.64 {d5-d8}, [r2]!
+    veor d0,  d5
+    vld1.64 {d9-d11}, [r2]!
+    veor d12, d6
+    veor d17, d7
+    veor d22, d8
+    vld1.64 {d5-d8}, [r2]!
+    veor d27, d9
+
+    veor d1,  d10
+    veor d13, d11
+    vld1.64 {d9-d11}, [r2]!
+    veor d18, d5
+    veor d23, d6
+    veor d28, d7
+
+    veor d2,  d8
+    vld1.64 {d5-d8}, [r2]!
+    veor d14, d9
+    veor d19, d10
+    veor d24, d11
+    veor d29, d5
+
+    veor d3,  d6
+    veor d15, d7
+    veor d20, d8
+
+    vstr.64  d0, [sp, #Aba]
+    vstr.64  d1, [sp, #Aga]
+    veor.64 q0, q0, q1
+    vstr.64  d2, [sp, #Aka]
+    veor.64 d5, d0,  d1
+    vstr.64  d3, [sp, #Ama]
+    vstr.64  d4, [sp, #Asa]
+    veor.64 d5, d5,  d4
+
+    bl KeccakF_armv7a_neon_asm
+
+    subs r3, #18       @ nlanes -= 18
+    vpop.64  { d0-d4 }
+
+    beq .Ldone
+
+    b .Lfull_block_18
+
+.Lfull_block_17:
+
+    @ SHA3-256 & SHAKE256
+
+    cmp r3, #17                @ nlanes < blocklanes
+    blo .Llanes_loop
+
+    sub    sp,sp, #5*8
+
+    vld1.64 {d5-d8}, [r2]!
+    veor d0,  d5
+    vld1.64 {d9-d11}, [r2]!
+    veor d12, d6
+    veor d17, d7
+    veor d22, d8
+    vld1.64 {d5-d8}, [r2]!
+    veor d27, d9
+
+    veor d1,  d10
+    veor d13, d11
+    vld1.64 {d9-d11}, [r2]!
+    veor d18, d5
+    veor d23, d6
+    veor d28, d7
+
+    veor d2,  d8
+    vld1.64 {d5-d7}, [r2]!
+    veor d14, d9
+    veor d19, d10
+    veor d24, d11
+    veor d29, d5
+
+    veor d3,  d6
+    veor d15, d7
+
+    vstr.64  d0, [sp, #Aba]
+    vstr.64  d1, [sp, #Aga]
+    veor.64 q0, q0, q1
+    vstr.64  d2, [sp, #Aka]
+    veor.64 d5, d0,  d1
+    vstr.64  d3, [sp, #Ama]
+    vstr.64  d4, [sp, #Asa]
+    veor.64 d5, d5,  d4
+
+    bl KeccakF_armv7a_neon_asm
+
+    subs r3, #17       @ nlanes -= 17
+    vpop.64  { d0-d4 }
+
+    beq .Ldone
+
+    b .Lfull_block_17
+
+.Lfull_block_13:
+
+    @ SHA3-384
+
+    cmp r3, #13                @ nlanes < blocklanes
+    blo .Llanes_loop
+
+    sub    sp,sp, #5*8
+
+    vld1.64 {d5-d8}, [r2]!
+    veor d0,  d5
+    vld1.64 {d9-d11}, [r2]!
+    veor d12, d6
+    veor d17, d7
+    veor d22, d8
+    vld1.64 {d5-d8}, [r2]!
+    veor d27, d9
+
+    veor d1,  d10
+    veor d13, d11
+    vld1.64 {d9-d10}, [r2]!
+    veor d18, d5
+    veor d23, d6
+    veor d28, d7
+
+    veor d2,  d8
+    veor d14, d9
+    veor d19, d10
+
+    vstr.64  d0, [sp, #Aba]
+    vstr.64  d1, [sp, #Aga]
+    veor.64 q0, q0, q1
+    vstr.64  d2, [sp, #Aka]
+    veor.64 d5, d0,  d1
+    vstr.64  d3, [sp, #Ama]
+    vstr.64  d4, [sp, #Asa]
+    veor.64 d5, d5,  d4
+
+    bl KeccakF_armv7a_neon_asm
+
+    subs r3, #13       @ nlanes -= 13
+    vpop.64  { d0-d4 }
+
+    beq .Ldone
+
+    b .Lfull_block_13
+
+.Lfull_block_9:
+
+    @ SHA3-512
+
+    cmp r3, #9         @ nlanes < blocklanes
+    blo .Llanes_loop
+
+    sub    sp,sp, #5*8
+
+    vld1.64 {d5-d8}, [r2]!
+    veor d0,  d5
+    vld1.64 {d9-d11}, [r2]!
+    veor d12, d6
+    veor d17, d7
+    veor d22, d8
+    vld1.64 {d5-d6}, [r2]!
+    veor d27, d9
+
+    veor d1,  d10
+    veor d13, d11
+    veor d18, d5
+    veor d23, d6
+
+    vstr.64  d0, [sp, #Aba]
+    vstr.64  d1, [sp, #Aga]
+    veor.64 q0, q0, q1
+    vstr.64  d2, [sp, #Aka]
+    veor.64 d5, d0,  d1
+    vstr.64  d3, [sp, #Ama]
+    vstr.64  d4, [sp, #Asa]
+    veor.64 d5, d5,  d4
+
+    bl KeccakF_armv7a_neon_asm
+
+    subs r3, #9                @ nlanes -= 9
+    vpop.64  { d0-d4 }
+
+    beq .Ldone
+
+    b .Lfull_block_9
+
+.Llanes_loop:
+
+    @ per-lane mode
+
+    @ switch (pos)
+    ldrb r0, [pc, r1]
+    add pc, pc, r0, lsl #2
+.Lswitch_table:
+    .byte (.Llane0-.Lswitch_table-4)/4
+    .byte (.Llane1-.Lswitch_table-4)/4
+    .byte (.Llane2-.Lswitch_table-4)/4
+    .byte (.Llane3-.Lswitch_table-4)/4
+    .byte (.Llane4-.Lswitch_table-4)/4
+    .byte (.Llane5-.Lswitch_table-4)/4
+    .byte (.Llane6-.Lswitch_table-4)/4
+    .byte (.Llane7-.Lswitch_table-4)/4
+    .byte (.Llane8-.Lswitch_table-4)/4
+    .byte (.Llane9-.Lswitch_table-4)/4
+    .byte (.Llane10-.Lswitch_table-4)/4
+    .byte (.Llane11-.Lswitch_table-4)/4
+    .byte (.Llane12-.Lswitch_table-4)/4
+    .byte (.Llane13-.Lswitch_table-4)/4
+    .byte (.Llane14-.Lswitch_table-4)/4
+    .byte (.Llane15-.Lswitch_table-4)/4
+    .byte (.Llane16-.Lswitch_table-4)/4
+    .byte (.Llane17-.Lswitch_table-4)/4
+    .byte (.Llane18-.Lswitch_table-4)/4
+    .byte (.Llane19-.Lswitch_table-4)/4
+    .byte (.Llane20-.Lswitch_table-4)/4
+    .byte (.Llane21-.Lswitch_table-4)/4
+    .byte (.Llane22-.Lswitch_table-4)/4
+    .byte (.Llane23-.Lswitch_table-4)/4
+    .byte (.Llane24-.Lswitch_table-4)/4
+.p2align 2
+
+#define ABSORB_LANE(label, vreg) \
+    label: \
+      add     r1, #1; \
+      vld1.64 d5, [r2]!; \
+      cmp     r1, r5; /* pos == blocklanes */ \
+      veor    vreg, vreg, d5; \
+      beq     .Llanes_permute; \
+      subs    r3, #1; \
+      beq     .Ldone;
+
+    ABSORB_LANE(.Llane0, d0)
+    ABSORB_LANE(.Llane1, d12)
+    ABSORB_LANE(.Llane2, d17)
+    ABSORB_LANE(.Llane3, d22)
+    ABSORB_LANE(.Llane4, d27)
+
+    ABSORB_LANE(.Llane5, d1)
+    ABSORB_LANE(.Llane6, d13)
+    ABSORB_LANE(.Llane7, d18)
+    ABSORB_LANE(.Llane8, d23)
+    ABSORB_LANE(.Llane9, d28)
+
+    ABSORB_LANE(.Llane10, d2)
+    ABSORB_LANE(.Llane11, d14)
+    ABSORB_LANE(.Llane12, d19)
+    ABSORB_LANE(.Llane13, d24)
+    ABSORB_LANE(.Llane14, d29)
+
+    ABSORB_LANE(.Llane15, d3)
+    ABSORB_LANE(.Llane16, d15)
+    ABSORB_LANE(.Llane17, d20)
+    ABSORB_LANE(.Llane18, d25)
+    ABSORB_LANE(.Llane19, d30)
+
+    ABSORB_LANE(.Llane20, d4)
+    ABSORB_LANE(.Llane21, d16)
+    ABSORB_LANE(.Llane22, d21)
+    ABSORB_LANE(.Llane23, d26)
+    ABSORB_LANE(.Llane24, d31)
+
+    b .Llanes_loop
+
+.Llanes_permute:
+
+    sub    sp,sp, #5*8
+    vstr.64  d0, [sp, #Aba]
+    vstr.64  d1, [sp, #Aga]
+    veor.64 q0, q0, q1
+    vstr.64  d2, [sp, #Aka]
+    veor.64 d5, d0,  d1
+    vstr.64  d3, [sp, #Ama]
+    vstr.64  d4, [sp, #Asa]
+    veor.64 d5, d5,  d4
+
+    bl KeccakF_armv7a_neon_asm
+
+    mov  r1, #0   @ pos <= 0
+    subs r3, #1
+
+    vpop.64  { d0-d4 }
+
+    beq  .Ldone
+
+    b .Lmain_loop_pos0
+
+.Ldone:
+
+    @ save state
+    vstr.64  d0,  [r4, #0*8]
+    vstr.64  d12, [r4, #1*8]
+    vstr.64  d17, [r4, #2*8]
+    vstr.64  d22, [r4, #3*8]
+    vstr.64  d27, [r4, #4*8]
+
+    vstr.64  d1,  [r4, #5*8]
+    vstr.64  d13, [r4, #6*8]
+    vstr.64  d18, [r4, #7*8]
+    vstr.64  d23, [r4, #8*8]
+    vstr.64  d28, [r4, #9*8]
+
+    vstr.64  d2,  [r4, #10*8]
+    vstr.64  d14, [r4, #11*8]
+    vstr.64  d19, [r4, #12*8]
+    vstr.64  d24, [r4, #13*8]
+    vstr.64  d29, [r4, #14*8]
+
+    vstr.64  d3,  [r4, #15*8]
+    vstr.64  d15, [r4, #16*8]
+    vstr.64  d20, [r4, #17*8]
+    vstr.64  d25, [r4, #18*8]
+    vstr.64  d30, [r4, #19*8]
+
+    vstr.64  d4,  [r4, #20*8]
+    vstr.64  d16, [r4, #21*8]
+    vstr.64  d21, [r4, #22*8]
+    vstr.64  d26, [r4, #23*8]
+    vstr.64  d31, [r4, #24*8]
+
+    mov   r0, #120
+    vpop  {q4-q7}
+.Lout:
+    pop   {r4-r5, ip, pc}
+.p2align 2
+.ltorg
+.size 
_gcry_keccak_absorb_lanes64_armv7_neon,.-_gcry_keccak_absorb_lanes64_armv7_neon;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/keccak.c 
b/grub-core/lib/libgcrypt/cipher/keccak.c
new file mode 100644
index 000000000..11e64b3e7
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/keccak.c
@@ -0,0 +1,1561 @@
+/* keccak.c - SHA3 hash functions
+ * Copyright (C) 2015  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <config.h>
+#include <string.h>
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+
+/* USE_64BIT indicates whether to use 64-bit generic implementation.
+ * USE_32BIT indicates whether to use 32-bit generic implementation. */
+#undef USE_64BIT
+#if defined(__x86_64__) || SIZEOF_UNSIGNED_LONG == 8
+# define USE_64BIT 1
+#else
+# define USE_32BIT 1
+#endif
+
+
+/* USE_64BIT_BMI2 indicates whether to compile with 64-bit Intel BMI2 code. */
+#undef USE_64BIT_BMI2
+#if defined(USE_64BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_CPU_ARCH_X86)
+# define USE_64BIT_BMI2 1
+#endif
+
+
+/* USE_64BIT_SHLD indicates whether to compile with 64-bit Intel SHLD code. */
+#undef USE_64BIT_SHLD
+#if defined(USE_64BIT) && defined (__GNUC__) && defined(__x86_64__) && \
+    defined(HAVE_CPU_ARCH_X86)
+# define USE_64BIT_SHLD 1
+#endif
+
+
+/* USE_32BIT_BMI2 indicates whether to compile with 32-bit Intel BMI2 code. */
+#undef USE_32BIT_BMI2
+#if defined(USE_32BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_CPU_ARCH_X86)
+# define USE_32BIT_BMI2 1
+#endif
+
+
+/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly
+ * code. */
+#undef USE_64BIT_ARM_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_NEON)
+#  define USE_64BIT_ARM_NEON 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+
+#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON)
+# define NEED_COMMON64 1
+#endif
+
+#ifdef USE_32BIT
+# define NEED_COMMON32BI 1
+#endif
+
+
+#define SHA3_DELIMITED_SUFFIX 0x06
+#define SHAKE_DELIMITED_SUFFIX 0x1F
+
+
+typedef struct
+{
+  union {
+#ifdef NEED_COMMON64
+    u64 state64[25];
+#endif
+#ifdef NEED_COMMON32BI
+    u32 state32bi[50];
+#endif
+  } u;
+} KECCAK_STATE;
+
+
+typedef struct
+{
+  unsigned int (*permute)(KECCAK_STATE *hd);
+  unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes,
+                        size_t nlanes, int blocklanes);
+  unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+                          unsigned int outlen);
+} keccak_ops_t;
+
+
+typedef struct KECCAK_CONTEXT_S
+{
+  KECCAK_STATE state;
+  unsigned int outlen;
+  unsigned int blocksize;
+  unsigned int count;
+  unsigned int suffix;
+  const keccak_ops_t *ops;
+#ifdef USE_S390X_CRYPTO
+  unsigned int kimd_func;
+  unsigned int buf_pos;
+  byte buf[1344 / 8]; /* SHAKE128 requires biggest buffer, 1344 bits. */
+#endif
+} KECCAK_CONTEXT;
+
+
+
+#ifdef NEED_COMMON64
+
+const u64 _gcry_keccak_round_consts_64bit[24 + 1] =
+{
+  U64_C(0x0000000000000001), U64_C(0x0000000000008082),
+  U64_C(0x800000000000808A), U64_C(0x8000000080008000),
+  U64_C(0x000000000000808B), U64_C(0x0000000080000001),
+  U64_C(0x8000000080008081), U64_C(0x8000000000008009),
+  U64_C(0x000000000000008A), U64_C(0x0000000000000088),
+  U64_C(0x0000000080008009), U64_C(0x000000008000000A),
+  U64_C(0x000000008000808B), U64_C(0x800000000000008B),
+  U64_C(0x8000000000008089), U64_C(0x8000000000008003),
+  U64_C(0x8000000000008002), U64_C(0x8000000000000080),
+  U64_C(0x000000000000800A), U64_C(0x800000008000000A),
+  U64_C(0x8000000080008081), U64_C(0x8000000000008080),
+  U64_C(0x0000000080000001), U64_C(0x8000000080008008),
+  U64_C(0xFFFFFFFFFFFFFFFF)
+};
+
+static unsigned int
+keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+                unsigned int outlen)
+{
+  unsigned int i;
+
+  /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
+
+  for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
+    {
+      u64 tmp = hd->u.state64[i];
+      buf_put_le64(outbuf, tmp);
+      outbuf += 8;
+    }
+
+  return 0;
+}
+
+#endif /* NEED_COMMON64 */
+
+
+#ifdef NEED_COMMON32BI
+
+static const u32 round_consts_32bit[2 * 24] =
+{
+  0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL,
+  0x00000000UL, 0x8000008bUL, 0x00000000UL, 0x80008080UL,
+  0x00000001UL, 0x0000008bUL, 0x00000001UL, 0x00008000UL,
+  0x00000001UL, 0x80008088UL, 0x00000001UL, 0x80000082UL,
+  0x00000000UL, 0x0000000bUL, 0x00000000UL, 0x0000000aUL,
+  0x00000001UL, 0x00008082UL, 0x00000000UL, 0x00008003UL,
+  0x00000001UL, 0x0000808bUL, 0x00000001UL, 0x8000000bUL,
+  0x00000001UL, 0x8000008aUL, 0x00000001UL, 0x80000081UL,
+  0x00000000UL, 0x80000081UL, 0x00000000UL, 0x80000008UL,
+  0x00000000UL, 0x00000083UL, 0x00000000UL, 0x80008003UL,
+  0x00000001UL, 0x80008088UL, 0x00000000UL, 0x80000088UL,
+  0x00000001UL, 0x00008000UL, 0x00000000UL, 0x80008082UL
+};
+
+static unsigned int
+keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+                  unsigned int outlen)
+{
+  unsigned int i;
+  u32 x0;
+  u32 x1;
+  u32 t;
+
+  /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
+
+  for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
+    {
+      x0 = hd->u.state32bi[i * 2 + 0];
+      x1 = hd->u.state32bi[i * 2 + 1];
+
+      t = (x0 & 0x0000FFFFUL) + (x1 << 16);
+      x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL);
+      x0 = t;
+      t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8);
+      t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4);
+      t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2);
+      t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1);
+      t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8);
+      t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4);
+      t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2);
+      t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1);
+
+      buf_put_le32(&outbuf[0], x0);
+      buf_put_le32(&outbuf[4], x1);
+      outbuf += 8;
+    }
+
+  return 0;
+}
+
+static inline void
+keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1)
+{
+  u32 t;
+
+  t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1);
+  t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2);
+  t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4);
+  t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8);
+  t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1);
+  t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2);
+  t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4);
+  t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8);
+  lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16);
+  lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL);
+}
+
+#endif /* NEED_COMMON32BI */
+
+
+/* Construct generic 64-bit implementation. */
+#ifdef USE_64BIT
+
+#if __GNUC__ >= 4 && defined(__x86_64__)
+
+static inline void absorb_lanes64_8(u64 *dst, const byte *in)
+{
+  asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
+       "movdqu 0*16(%[in]), %%xmm4\n\t"
+       "movdqu 1*16(%[dst]), %%xmm1\n\t"
+       "movdqu 1*16(%[in]), %%xmm5\n\t"
+       "movdqu 2*16(%[dst]), %%xmm2\n\t"
+       "movdqu 3*16(%[dst]), %%xmm3\n\t"
+       "pxor %%xmm4, %%xmm0\n\t"
+       "pxor %%xmm5, %%xmm1\n\t"
+       "movdqu 2*16(%[in]), %%xmm4\n\t"
+       "movdqu 3*16(%[in]), %%xmm5\n\t"
+       "movdqu %%xmm0, 0*16(%[dst])\n\t"
+       "pxor %%xmm4, %%xmm2\n\t"
+       "movdqu %%xmm1, 1*16(%[dst])\n\t"
+       "pxor %%xmm5, %%xmm3\n\t"
+       "movdqu %%xmm2, 2*16(%[dst])\n\t"
+       "movdqu %%xmm3, 3*16(%[dst])\n\t"
+       :
+       : [dst] "r" (dst), [in] "r" (in)
+       : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory");
+}
+
+static inline void absorb_lanes64_4(u64 *dst, const byte *in)
+{
+  asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
+       "movdqu 0*16(%[in]), %%xmm4\n\t"
+       "movdqu 1*16(%[dst]), %%xmm1\n\t"
+       "movdqu 1*16(%[in]), %%xmm5\n\t"
+       "pxor %%xmm4, %%xmm0\n\t"
+       "pxor %%xmm5, %%xmm1\n\t"
+       "movdqu %%xmm0, 0*16(%[dst])\n\t"
+       "movdqu %%xmm1, 1*16(%[dst])\n\t"
+       :
+       : [dst] "r" (dst), [in] "r" (in)
+       : "xmm0", "xmm1", "xmm4", "xmm5", "memory");
+}
+
+static inline void absorb_lanes64_2(u64 *dst, const byte *in)
+{
+  asm ("movdqu 0*16(%[dst]), %%xmm0\n\t"
+       "movdqu 0*16(%[in]), %%xmm4\n\t"
+       "pxor %%xmm4, %%xmm0\n\t"
+       "movdqu %%xmm0, 0*16(%[dst])\n\t"
+       :
+       : [dst] "r" (dst), [in] "r" (in)
+       : "xmm0", "xmm4", "memory");
+}
+
+#else /* __x86_64__ */
+
+static inline void absorb_lanes64_8(u64 *dst, const byte *in)
+{
+  dst[0] ^= buf_get_le64(in + 8 * 0);
+  dst[1] ^= buf_get_le64(in + 8 * 1);
+  dst[2] ^= buf_get_le64(in + 8 * 2);
+  dst[3] ^= buf_get_le64(in + 8 * 3);
+  dst[4] ^= buf_get_le64(in + 8 * 4);
+  dst[5] ^= buf_get_le64(in + 8 * 5);
+  dst[6] ^= buf_get_le64(in + 8 * 6);
+  dst[7] ^= buf_get_le64(in + 8 * 7);
+}
+
+static inline void absorb_lanes64_4(u64 *dst, const byte *in)
+{
+  dst[0] ^= buf_get_le64(in + 8 * 0);
+  dst[1] ^= buf_get_le64(in + 8 * 1);
+  dst[2] ^= buf_get_le64(in + 8 * 2);
+  dst[3] ^= buf_get_le64(in + 8 * 3);
+}
+
+static inline void absorb_lanes64_2(u64 *dst, const byte *in)
+{
+  dst[0] ^= buf_get_le64(in + 8 * 0);
+  dst[1] ^= buf_get_le64(in + 8 * 1);
+}
+
+#endif /* !__x86_64__ */
+
+static inline void absorb_lanes64_1(u64 *dst, const byte *in)
+{
+  dst[0] ^= buf_get_le64(in + 8 * 0);
+}
+
+
+# define ANDN64(x, y) (~(x) & (y))
+# define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \
+                     ((x) >> ((64 - (unsigned int)(n)) & 63)))
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64
+# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64
+# include "keccak_permute_64.h"
+
+# undef ANDN64
+# undef ROL64
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+# undef KECCAK_F1600_ABSORB_FUNC_NAME
+
+static const keccak_ops_t keccak_generic64_ops =
+{
+  .permute = keccak_f1600_state_permute64,
+  .absorb = keccak_absorb_lanes64,
+  .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT */
+
+
+/* Construct 64-bit Intel SHLD implementation. */
+#ifdef USE_64BIT_SHLD
+
+# define ANDN64(x, y) (~(x) & (y))
+# define ROL64(x, n) ({ \
+                       u64 tmp = (x); \
+                       asm ("shldq %1, %0, %0" \
+                            : "+r" (tmp) \
+                            : "J" ((n) & 63) \
+                            : "cc"); \
+                       tmp; })
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld
+# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld
+# include "keccak_permute_64.h"
+
+# undef ANDN64
+# undef ROL64
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+# undef KECCAK_F1600_ABSORB_FUNC_NAME
+
+static const keccak_ops_t keccak_shld_64_ops =
+{
+  .permute = keccak_f1600_state_permute64_shld,
+  .absorb = keccak_absorb_lanes64_shld,
+  .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT_SHLD */
+
+
+/* Construct 64-bit Intel BMI2 implementation. */
+#ifdef USE_64BIT_BMI2
+
+# define ANDN64(x, y) ({ \
+                       u64 tmp; \
+                       asm ("andnq %2, %1, %0" \
+                            : "=r" (tmp) \
+                            : "r0" (x), "rm" (y)); \
+                       tmp; })
+
+# define ROL64(x, n) ({ \
+                       u64 tmp; \
+                       asm ("rorxq %2, %1, %0" \
+                            : "=r" (tmp) \
+                            : "rm0" (x), "J" (64 - ((n) & 63))); \
+                       tmp; })
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2
+# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2
+# include "keccak_permute_64.h"
+
+# undef ANDN64
+# undef ROL64
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+# undef KECCAK_F1600_ABSORB_FUNC_NAME
+
+static const keccak_ops_t keccak_bmi2_64_ops =
+{
+  .permute = keccak_f1600_state_permute64_bmi2,
+  .absorb = keccak_absorb_lanes64_bmi2,
+  .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT_BMI2 */
+
+
+/* 64-bit ARMv7/NEON implementation. */
+#ifdef USE_64BIT_ARM_NEON
+
+unsigned int _gcry_keccak_permute_armv7_neon(u64 *state);
+unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos,
+                                                   const byte *lanes,
+                                                   size_t nlanes,
+                                                   int blocklanes);
+
+static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd)
+{
+  return _gcry_keccak_permute_armv7_neon(hd->u.state64);
+}
+
+static unsigned int
+keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes,
+                                size_t nlanes, int blocklanes)
+{
+  if (blocklanes < 0)
+    {
+      /* blocklanes == -1, permutationless absorb from keccak_final. */
+
+      while (nlanes)
+       {
+         hd->u.state64[pos] ^= buf_get_le64(lanes);
+         lanes += 8;
+         nlanes--;
+       }
+
+      return 0;
+    }
+  else
+    {
+      return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes,
+                                                   nlanes, blocklanes);
+    }
+}
+
+static const keccak_ops_t keccak_armv7_neon_64_ops =
+{
+  .permute = keccak_permute64_armv7_neon,
+  .absorb = keccak_absorb_lanes64_armv7_neon,
+  .extract = keccak_extract64,
+};
+
+#endif /* USE_64BIT_ARM_NEON */
+
+
+/* Construct generic 32-bit implementation. */
+#ifdef USE_32BIT
+
+# define ANDN32(x, y) (~(x) & (y))
+# define ROL32(x, n) (((x) << ((unsigned int)n & 31)) | \
+                     ((x) >> ((32 - (unsigned int)(n)) & 31)))
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi
+# include "keccak_permute_32.h"
+
+# undef ANDN32
+# undef ROL32
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+
+static unsigned int
+keccak_absorb_lanes32bi(KECCAK_STATE *hd, int pos, const byte *lanes,
+                       size_t nlanes, int blocklanes)
+{
+  unsigned int burn = 0;
+
+  while (nlanes)
+    {
+      keccak_absorb_lane32bi(&hd->u.state32bi[pos * 2],
+                            buf_get_le32(lanes + 0),
+                            buf_get_le32(lanes + 4));
+      lanes += 8;
+      nlanes--;
+
+      if (++pos == blocklanes)
+       {
+         burn = keccak_f1600_state_permute32bi(hd);
+         pos = 0;
+       }
+    }
+
+  return burn;
+}
+
+static const keccak_ops_t keccak_generic32bi_ops =
+{
+  .permute = keccak_f1600_state_permute32bi,
+  .absorb = keccak_absorb_lanes32bi,
+  .extract = keccak_extract32bi,
+};
+
+#endif /* USE_32BIT */
+
+
+/* Construct 32-bit Intel BMI2 implementation. */
+#ifdef USE_32BIT_BMI2
+
+# define ANDN32(x, y) ({ \
+                       u32 tmp; \
+                       asm ("andnl %2, %1, %0" \
+                            : "=r" (tmp) \
+                            : "r0" (x), "rm" (y)); \
+                       tmp; })
+
+# define ROL32(x, n) ({ \
+                       u32 tmp; \
+                       asm ("rorxl %2, %1, %0" \
+                            : "=r" (tmp) \
+                            : "rm0" (x), "J" (32 - ((n) & 31))); \
+                       tmp; })
+
+# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi_bmi2
+# include "keccak_permute_32.h"
+
+# undef ANDN32
+# undef ROL32
+# undef KECCAK_F1600_PERMUTE_FUNC_NAME
+
+static inline u32 pext(u32 x, u32 mask)
+{
+  u32 tmp;
+  asm ("pextl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask));
+  return tmp;
+}
+
+static inline u32 pdep(u32 x, u32 mask)
+{
+  u32 tmp;
+  asm ("pdepl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask));
+  return tmp;
+}
+
+static inline void
+keccak_absorb_lane32bi_bmi2(u32 *lane, u32 x0, u32 x1)
+{
+  x0 = pdep(pext(x0, 0x55555555), 0x0000ffff) | (pext(x0, 0xaaaaaaaa) << 16);
+  x1 = pdep(pext(x1, 0x55555555), 0x0000ffff) | (pext(x1, 0xaaaaaaaa) << 16);
+
+  lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16);
+  lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL);
+}
+
+static unsigned int
+keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes,
+                            size_t nlanes, int blocklanes)
+{
+  unsigned int burn = 0;
+
+  while (nlanes)
+    {
+      keccak_absorb_lane32bi_bmi2(&hd->u.state32bi[pos * 2],
+                                 buf_get_le32(lanes + 0),
+                                 buf_get_le32(lanes + 4));
+      lanes += 8;
+      nlanes--;
+
+      if (++pos == blocklanes)
+       {
+         burn = keccak_f1600_state_permute32bi_bmi2(hd);
+         pos = 0;
+       }
+    }
+
+  return burn;
+}
+
+static unsigned int
+keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf,
+                       unsigned int outlen)
+{
+  unsigned int i;
+  u32 x0;
+  u32 x1;
+  u32 t;
+
+  /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */
+
+  for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++)
+    {
+      x0 = hd->u.state32bi[i * 2 + 0];
+      x1 = hd->u.state32bi[i * 2 + 1];
+
+      t = (x0 & 0x0000FFFFUL) + (x1 << 16);
+      x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL);
+      x0 = t;
+
+      x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554);
+      x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554);
+
+      buf_put_le32(&outbuf[0], x0);
+      buf_put_le32(&outbuf[4], x1);
+      outbuf += 8;
+    }
+
+  return 0;
+}
+
+static const keccak_ops_t keccak_bmi2_32bi_ops =
+{
+  .permute = keccak_f1600_state_permute32bi_bmi2,
+  .absorb = keccak_absorb_lanes32bi_bmi2,
+  .extract = keccak_extract32bi_bmi2,
+};
+
+#endif /* USE_32BIT_BMI2 */
+
+
+#ifdef USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static inline void
+keccak_bwrite_s390x (void *context, const byte *in, size_t inlen)
+{
+  KECCAK_CONTEXT *ctx = context;
+
+  /* Write full-blocks. */
+  kimd_execute (ctx->kimd_func, &ctx->state, in, inlen);
+  return;
+}
+
+static inline void
+keccak_final_s390x (void *context)
+{
+  KECCAK_CONTEXT *ctx = context;
+
+  if (ctx->suffix == SHA3_DELIMITED_SUFFIX)
+    {
+      klmd_execute (ctx->kimd_func, &ctx->state, ctx->buf, ctx->count);
+    }
+  else
+    {
+      klmd_shake_execute (ctx->kimd_func, &ctx->state, NULL, 0, ctx->buf,
+                         ctx->count);
+      ctx->count = 0;
+      ctx->buf_pos = 0;
+    }
+
+  return;
+}
+
+static inline void
+keccak_bextract_s390x (void *context, byte *out, size_t outlen)
+{
+  KECCAK_CONTEXT *ctx = context;
+
+  /* Extract full-blocks. */
+  klmd_shake_execute (ctx->kimd_func | KLMD_PADDING_STATE, &ctx->state,
+                     out, outlen, NULL, 0);
+  return;
+}
+
+static void
+keccak_write_s390x (void *context, const byte *inbuf, size_t inlen)
+{
+  KECCAK_CONTEXT *hd = context;
+  const size_t blocksize = hd->blocksize;
+  size_t inblocks;
+  size_t copylen;
+
+  while (hd->count)
+    {
+      if (hd->count == blocksize)  /* Flush the buffer. */
+       {
+         keccak_bwrite_s390x (hd, hd->buf, blocksize);
+         hd->count = 0;
+       }
+      else
+       {
+         copylen = inlen;
+         if (copylen > blocksize - hd->count)
+           copylen = blocksize - hd->count;
+
+         if (copylen == 0)
+           break;
+
+         buf_cpy (&hd->buf[hd->count], inbuf, copylen);
+         hd->count += copylen;
+         inbuf += copylen;
+         inlen -= copylen;
+       }
+    }
+
+  if (inlen == 0)
+    return;
+
+  if (inlen >= blocksize)
+    {
+      inblocks = inlen / blocksize;
+      keccak_bwrite_s390x (hd, inbuf, inblocks * blocksize);
+      hd->count = 0;
+      inlen -= inblocks * blocksize;
+      inbuf += inblocks * blocksize;
+    }
+
+  if (inlen)
+    {
+      buf_cpy (hd->buf, inbuf, inlen);
+      hd->count = inlen;
+    }
+}
+
+static void
+keccak_extract_s390x (void *context, void *outbuf_arg, size_t outlen)
+{
+  KECCAK_CONTEXT *hd = context;
+  const size_t blocksize = hd->blocksize;
+  byte *outbuf = outbuf_arg;
+
+  while (outlen)
+    {
+      gcry_assert(hd->count == 0 || hd->buf_pos < hd->count);
+
+      if (hd->buf_pos < hd->count && outlen)
+       {
+         size_t copylen = hd->count - hd->buf_pos;
+
+         if (copylen > outlen)
+           copylen = outlen;
+
+         buf_cpy (outbuf, &hd->buf[hd->buf_pos], copylen);
+
+         outbuf += copylen;
+         outlen -= copylen;
+         hd->buf_pos += copylen;
+       }
+
+      if (hd->buf_pos == hd->count)
+       {
+         hd->buf_pos = 0;
+         hd->count = 0;
+       }
+
+      if (outlen == 0)
+       return;
+
+      if (outlen >= blocksize)
+       {
+         size_t outblocks = outlen / blocksize;
+
+         keccak_bextract_s390x (context, outbuf, outblocks * blocksize);
+
+         outlen -= outblocks * blocksize;
+         outbuf += outblocks * blocksize;
+
+         if (outlen == 0)
+           return;
+       }
+
+      keccak_bextract_s390x (context, hd->buf, blocksize);
+      hd->count = blocksize;
+    }
+}
+#endif /* USE_S390X_CRYPTO */
+
+
+static void
+keccak_write (void *context, const void *inbuf_arg, size_t inlen)
+{
+  KECCAK_CONTEXT *ctx = context;
+  const size_t bsize = ctx->blocksize;
+  const size_t blocklanes = bsize / 8;
+  const byte *inbuf = inbuf_arg;
+  unsigned int nburn, burn = 0;
+  unsigned int count, i;
+  unsigned int pos;
+  size_t nlanes;
+
+#ifdef USE_S390X_CRYPTO
+  if (ctx->kimd_func)
+    {
+      keccak_write_s390x (context, inbuf, inlen);
+      return;
+    }
+#endif
+
+  count = ctx->count;
+
+  if (inlen && (count % 8))
+    {
+      byte lane[8] = { 0, };
+
+      /* Complete absorbing partial input lane. */
+
+      pos = count / 8;
+
+      for (i = count % 8; inlen && i < 8; i++)
+       {
+         lane[i] = *inbuf++;
+         inlen--;
+         count++;
+       }
+
+      if (count == bsize)
+       count = 0;
+
+      nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1,
+                              (count % 8) ? -1 : blocklanes);
+      burn = nburn > burn ? nburn : burn;
+    }
+
+  /* Absorb full input lanes. */
+
+  pos = count / 8;
+  nlanes = inlen / 8;
+  if (nlanes > 0)
+    {
+      nburn = ctx->ops->absorb(&ctx->state, pos, inbuf, nlanes, blocklanes);
+      burn = nburn > burn ? nburn : burn;
+      inlen -= nlanes * 8;
+      inbuf += nlanes * 8;
+      count = ((size_t) count + nlanes * 8) % bsize;
+    }
+
+  if (inlen)
+    {
+      byte lane[8] = { 0, };
+
+      /* Absorb remaining partial input lane. */
+
+      pos = count / 8;
+
+      for (i = count % 8; inlen && i < 8; i++)
+       {
+         lane[i] = *inbuf++;
+         inlen--;
+         count++;
+       }
+
+      nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, -1);
+      burn = nburn > burn ? nburn : burn;
+
+      gcry_assert(count < bsize);
+    }
+
+  ctx->count = count;
+
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+
+static void
+keccak_init (int algo, void *context, unsigned int flags)
+{
+  KECCAK_CONTEXT *ctx = context;
+  KECCAK_STATE *hd = &ctx->state;
+  unsigned int features = _gcry_get_hw_features ();
+
+  (void)flags;
+  (void)features;
+
+  memset (hd, 0, sizeof *hd);
+
+  ctx->count = 0;
+
+  /* Select generic implementation. */
+#ifdef USE_64BIT
+  ctx->ops = &keccak_generic64_ops;
+#elif defined USE_32BIT
+  ctx->ops = &keccak_generic32bi_ops;
+#endif
+
+  /* Select optimized implementation based in hw features. */
+  if (0) {}
+#ifdef USE_64BIT_ARM_NEON
+  else if (features & HWF_ARM_NEON)
+    ctx->ops = &keccak_armv7_neon_64_ops;
+#endif
+#ifdef USE_64BIT_BMI2
+  else if (features & HWF_INTEL_BMI2)
+    ctx->ops = &keccak_bmi2_64_ops;
+#endif
+#ifdef USE_32BIT_BMI2
+  else if (features & HWF_INTEL_BMI2)
+    ctx->ops = &keccak_bmi2_32bi_ops;
+#endif
+#ifdef USE_64BIT_SHLD
+  else if (features & HWF_INTEL_FAST_SHLD)
+    ctx->ops = &keccak_shld_64_ops;
+#endif
+
+  /* Set input block size, in Keccak terms this is called 'rate'. */
+
+  switch (algo)
+    {
+    case GCRY_MD_SHA3_224:
+      ctx->suffix = SHA3_DELIMITED_SUFFIX;
+      ctx->blocksize = 1152 / 8;
+      ctx->outlen = 224 / 8;
+      break;
+    case GCRY_MD_SHA3_256:
+      ctx->suffix = SHA3_DELIMITED_SUFFIX;
+      ctx->blocksize = 1088 / 8;
+      ctx->outlen = 256 / 8;
+      break;
+    case GCRY_MD_SHA3_384:
+      ctx->suffix = SHA3_DELIMITED_SUFFIX;
+      ctx->blocksize = 832 / 8;
+      ctx->outlen = 384 / 8;
+      break;
+    case GCRY_MD_SHA3_512:
+      ctx->suffix = SHA3_DELIMITED_SUFFIX;
+      ctx->blocksize = 576 / 8;
+      ctx->outlen = 512 / 8;
+      break;
+    case GCRY_MD_SHAKE128:
+      ctx->suffix = SHAKE_DELIMITED_SUFFIX;
+      ctx->blocksize = 1344 / 8;
+      ctx->outlen = 0;
+      break;
+    case GCRY_MD_SHAKE256:
+      ctx->suffix = SHAKE_DELIMITED_SUFFIX;
+      ctx->blocksize = 1088 / 8;
+      ctx->outlen = 0;
+      break;
+    default:
+      BUG();
+    }
+
+#ifdef USE_S390X_CRYPTO
+  ctx->kimd_func = 0;
+  if ((features & HWF_S390X_MSA) != 0)
+    {
+      unsigned int kimd_func = 0;
+
+      switch (algo)
+       {
+       case GCRY_MD_SHA3_224:
+         kimd_func = KMID_FUNCTION_SHA3_224;
+         break;
+       case GCRY_MD_SHA3_256:
+         kimd_func = KMID_FUNCTION_SHA3_256;
+         break;
+       case GCRY_MD_SHA3_384:
+         kimd_func = KMID_FUNCTION_SHA3_384;
+         break;
+       case GCRY_MD_SHA3_512:
+         kimd_func = KMID_FUNCTION_SHA3_512;
+         break;
+       case GCRY_MD_SHAKE128:
+         kimd_func = KMID_FUNCTION_SHAKE128;
+         break;
+       case GCRY_MD_SHAKE256:
+         kimd_func = KMID_FUNCTION_SHAKE256;
+         break;
+       }
+
+      if ((kimd_query () & km_function_to_mask (kimd_func)) &&
+         (klmd_query () & km_function_to_mask (kimd_func)))
+       {
+         ctx->kimd_func = kimd_func;
+       }
+    }
+#endif
+}
+
+static void
+sha3_224_init (void *context, unsigned int flags)
+{
+  keccak_init (GCRY_MD_SHA3_224, context, flags);
+}
+
+static void
+sha3_256_init (void *context, unsigned int flags)
+{
+  keccak_init (GCRY_MD_SHA3_256, context, flags);
+}
+
+static void
+sha3_384_init (void *context, unsigned int flags)
+{
+  keccak_init (GCRY_MD_SHA3_384, context, flags);
+}
+
+static void
+sha3_512_init (void *context, unsigned int flags)
+{
+  keccak_init (GCRY_MD_SHA3_512, context, flags);
+}
+
+static void
+shake128_init (void *context, unsigned int flags)
+{
+  keccak_init (GCRY_MD_SHAKE128, context, flags);
+}
+
+static void
+shake256_init (void *context, unsigned int flags)
+{
+  keccak_init (GCRY_MD_SHAKE256, context, flags);
+}
+
+/* The routine final terminates the computation and
+ * returns the digest.
+ * The handle is prepared for a new cycle, but adding bytes to the
+ * handle will the destroy the returned buffer.
+ * Returns: 64 bytes representing the digest.  When used for sha384,
+ * we take the leftmost 48 of those bytes.
+ */
+static void
+keccak_final (void *context)
+{
+  KECCAK_CONTEXT *ctx = context;
+  KECCAK_STATE *hd = &ctx->state;
+  const size_t bsize = ctx->blocksize;
+  const byte suffix = ctx->suffix;
+  unsigned int nburn, burn = 0;
+  unsigned int lastbytes;
+  byte lane[8];
+
+#ifdef USE_S390X_CRYPTO
+  if (ctx->kimd_func)
+    {
+      keccak_final_s390x (context);
+      return;
+    }
+#endif
+
+  lastbytes = ctx->count;
+
+  /* Do the padding and switch to the squeezing phase */
+
+  /* Absorb the last few bits and add the first bit of padding (which
+     coincides with the delimiter in delimited suffix) */
+  buf_put_le64(lane, (u64)suffix << ((lastbytes % 8) * 8));
+  nburn = ctx->ops->absorb(&ctx->state, lastbytes / 8, lane, 1, -1);
+  burn = nburn > burn ? nburn : burn;
+
+  /* Add the second bit of padding. */
+  buf_put_le64(lane, (u64)0x80 << (((bsize - 1) % 8) * 8));
+  nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1);
+  burn = nburn > burn ? nburn : burn;
+
+  if (suffix == SHA3_DELIMITED_SUFFIX)
+    {
+      /* Switch to the squeezing phase. */
+      nburn = ctx->ops->permute(hd);
+      burn = nburn > burn ? nburn : burn;
+
+      /* Squeeze out the SHA3 digest. */
+      nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen);
+      burn = nburn > burn ? nburn : burn;
+    }
+  else
+    {
+      /* Output for SHAKE can now be read with md_extract(). */
+
+      ctx->count = 0;
+    }
+
+  wipememory(lane, sizeof(lane));
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+
+static byte *
+keccak_read (void *context)
+{
+  KECCAK_CONTEXT *ctx = (KECCAK_CONTEXT *) context;
+  KECCAK_STATE *hd = &ctx->state;
+  return (byte *)&hd->u;
+}
+
+
+static void
+keccak_extract (void *context, void *out, size_t outlen)
+{
+  KECCAK_CONTEXT *ctx = context;
+  KECCAK_STATE *hd = &ctx->state;
+  const size_t bsize = ctx->blocksize;
+  unsigned int nburn, burn = 0;
+  byte *outbuf = out;
+  unsigned int nlanes;
+  unsigned int nleft;
+  unsigned int count;
+  unsigned int i;
+  byte lane[8];
+
+#ifdef USE_S390X_CRYPTO
+  if (ctx->kimd_func)
+    {
+      keccak_extract_s390x (context, out, outlen);
+      return;
+    }
+#endif
+
+  count = ctx->count;
+
+  while (count && outlen && (outlen < 8 || count % 8))
+    {
+      /* Extract partial lane. */
+      nburn = ctx->ops->extract(hd, count / 8, lane, 8);
+      burn = nburn > burn ? nburn : burn;
+
+      for (i = count % 8; outlen && i < 8; i++)
+       {
+         *outbuf++ = lane[i];
+         outlen--;
+         count++;
+       }
+
+      gcry_assert(count <= bsize);
+
+      if (count == bsize)
+       count = 0;
+    }
+
+  if (outlen >= 8 && count)
+    {
+      /* Extract tail of partial block. */
+      nlanes = outlen / 8;
+      nleft = (bsize - count) / 8;
+      nlanes = nlanes < nleft ? nlanes : nleft;
+
+      nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8);
+      burn = nburn > burn ? nburn : burn;
+      outlen -= nlanes * 8;
+      outbuf += nlanes * 8;
+      count += nlanes * 8;
+
+      gcry_assert(count <= bsize);
+
+      if (count == bsize)
+       count = 0;
+    }
+
+  while (outlen >= bsize)
+    {
+      gcry_assert(count == 0);
+
+      /* Squeeze more. */
+      nburn = ctx->ops->permute(hd);
+      burn = nburn > burn ? nburn : burn;
+
+      /* Extract full block. */
+      nburn = ctx->ops->extract(hd, 0, outbuf, bsize);
+      burn = nburn > burn ? nburn : burn;
+
+      outlen -= bsize;
+      outbuf += bsize;
+    }
+
+  if (outlen)
+    {
+      gcry_assert(outlen < bsize);
+
+      if (count == 0)
+       {
+         /* Squeeze more. */
+         nburn = ctx->ops->permute(hd);
+         burn = nburn > burn ? nburn : burn;
+       }
+
+      if (outlen >= 8)
+       {
+         /* Extract head of partial block. */
+         nlanes = outlen / 8;
+         nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8);
+         burn = nburn > burn ? nburn : burn;
+         outlen -= nlanes * 8;
+         outbuf += nlanes * 8;
+         count += nlanes * 8;
+
+         gcry_assert(count < bsize);
+       }
+
+      if (outlen)
+       {
+         /* Extract head of partial lane. */
+         nburn = ctx->ops->extract(hd, count / 8, lane, 8);
+         burn = nburn > burn ? nburn : burn;
+
+         for (i = count % 8; outlen && i < 8; i++)
+           {
+             *outbuf++ = lane[i];
+             outlen--;
+             count++;
+           }
+
+         gcry_assert(count < bsize);
+       }
+    }
+
+  ctx->count = count;
+
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+
+/* Variant of the above shortcut function using multiple buffers.  */
+static void
+_gcry_sha3_hash_buffers (void *outbuf, size_t nbytes, const gcry_buffer_t *iov,
+                        int iovcnt, const gcry_md_spec_t *spec)
+{
+  KECCAK_CONTEXT hd;
+
+  spec->init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    keccak_write (&hd, (const char*)iov[0].data + iov[0].off, iov[0].len);
+  keccak_final (&hd);
+  if (spec->mdlen > 0)
+    memcpy (outbuf, keccak_read (&hd), spec->mdlen);
+  else
+    keccak_extract (&hd, outbuf, nbytes);
+}
+
+
+static void
+_gcry_sha3_224_hash_buffers (void *outbuf, size_t nbytes,
+                            const gcry_buffer_t *iov, int iovcnt)
+{
+  _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt,
+                          &_gcry_digest_spec_sha3_224);
+}
+
+static void
+_gcry_sha3_256_hash_buffers (void *outbuf, size_t nbytes,
+                            const gcry_buffer_t *iov, int iovcnt)
+{
+  _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt,
+                          &_gcry_digest_spec_sha3_256);
+}
+
+static void
+_gcry_sha3_384_hash_buffers (void *outbuf, size_t nbytes,
+                            const gcry_buffer_t *iov, int iovcnt)
+{
+  _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt,
+                          &_gcry_digest_spec_sha3_384);
+}
+
+static void
+_gcry_sha3_512_hash_buffers (void *outbuf, size_t nbytes,
+                            const gcry_buffer_t *iov, int iovcnt)
+{
+  _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt,
+                          &_gcry_digest_spec_sha3_512);
+}
+
+static void
+_gcry_shake128_hash_buffers (void *outbuf, size_t nbytes,
+                            const gcry_buffer_t *iov, int iovcnt)
+{
+  _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt,
+                          &_gcry_digest_spec_shake128);
+}
+
+static void
+_gcry_shake256_hash_buffers (void *outbuf, size_t nbytes,
+                            const gcry_buffer_t *iov, int iovcnt)
+{
+  _gcry_sha3_hash_buffers (outbuf, nbytes, iov, iovcnt,
+                          &_gcry_digest_spec_shake256);
+}
+
+
+/*
+     Self-test section.
+ */
+
+
+static gpg_err_code_t
+selftests_keccak (int algo, int extended, selftest_report_func_t report)
+{
+  const char *what;
+  const char *errtxt;
+  const char *short_hash;
+  const char *long_hash;
+  const char *one_million_a_hash;
+  int hash_len;
+
+  switch (algo)
+  {
+    default:
+      BUG();
+
+    case GCRY_MD_SHA3_224:
+      short_hash =
+       "\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76\x6f"
+       "\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf";
+      long_hash =
+       "\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5"
+       "\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc";
+      one_million_a_hash =
+       "\xd6\x93\x35\xb9\x33\x25\x19\x2e\x51\x6a\x91\x2e\x6d\x19\xa1\x5c"
+       "\xb5\x1c\x6e\xd5\xc1\x52\x43\xe7\xa7\xfd\x65\x3c";
+      hash_len = 28;
+      break;
+
+    case GCRY_MD_SHA3_256:
+      short_hash =
+       "\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90\xbd"
+       "\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43\x15\x32";
+      long_hash =
+       "\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf\xdb"
+       "\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad\x1d\x18";
+      one_million_a_hash =
+       "\x5c\x88\x75\xae\x47\x4a\x36\x34\xba\x4f\xd5\x5e\xc8\x5b\xff\xd6"
+       "\x61\xf3\x2a\xca\x75\xc6\xd6\x99\xd0\xcd\xcb\x6c\x11\x58\x91\xc1";
+      hash_len = 32;
+      break;
+
+    case GCRY_MD_SHA3_384:
+      short_hash =
+       "\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad\x8d"
+       "\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b\xe4\xb2"
+       "\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28\x37\x6d\x25";
+      long_hash =
+       "\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91"
+       "\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc"
+       "\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7";
+      one_million_a_hash =
+       "\xee\xe9\xe2\x4d\x78\xc1\x85\x53\x37\x98\x34\x51\xdf\x97\xc8\xad"
+       "\x9e\xed\xf2\x56\xc6\x33\x4f\x8e\x94\x8d\x25\x2d\x5e\x0e\x76\x84"
+       "\x7a\xa0\x77\x4d\xdb\x90\xa8\x42\x19\x0d\x2c\x55\x8b\x4b\x83\x40";
+      hash_len = 48;
+      break;
+
+    case GCRY_MD_SHA3_512:
+      short_hash =
+       "\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09\x6e"
+       "\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2\x71\x2e"
+       "\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47\xe3\x93\x40"
+       "\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27\x4e\xec\x53\xf0";
+      long_hash =
+       "\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9"
+       "\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa"
+       "\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89"
+       "\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85";
+      one_million_a_hash =
+       "\x3c\x3a\x87\x6d\xa1\x40\x34\xab\x60\x62\x7c\x07\x7b\xb9\x8f\x7e"
+       "\x12\x0a\x2a\x53\x70\x21\x2d\xff\xb3\x38\x5a\x18\xd4\xf3\x88\x59"
+       "\xed\x31\x1d\x0a\x9d\x51\x41\xce\x9c\xc5\xc6\x6e\xe6\x89\xb2\x66"
+       "\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87";
+      hash_len = 64;
+      break;
+
+    case GCRY_MD_SHAKE128:
+      short_hash =
+       "\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7"
+       "\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8";
+      long_hash =
+       "\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b"
+       "\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b";
+      one_million_a_hash =
+       "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11"
+       "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58";
+      hash_len = 32;
+      break;
+
+    case GCRY_MD_SHAKE256:
+      short_hash =
+       "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d"
+       "\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39";
+      long_hash =
+       "\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9"
+       "\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45";
+      one_million_a_hash =
+       "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb"
+       "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a";
+      hash_len = 32;
+      break;
+  }
+
+  what = "short string";
+  errtxt = _gcry_hash_selftest_check_one (algo, 0, "abc", 3, short_hash,
+                                         hash_len);
+  if (errtxt)
+    goto failed;
+
+  if (extended)
+    {
+      what = "long string";
+      errtxt = _gcry_hash_selftest_check_one
+       (algo, 0,
+       "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+       "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+       long_hash, hash_len);
+      if (errtxt)
+       goto failed;
+
+      what = "one million \"a\"";
+      errtxt = _gcry_hash_selftest_check_one (algo, 1, NULL, 0,
+                                             one_million_a_hash, hash_len);
+      if (errtxt)
+       goto failed;
+    }
+
+  return 0; /* Succeeded. */
+
+failed:
+  if (report)
+    report ("digest", algo, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success.  */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+  gpg_err_code_t ec;
+
+  switch (algo)
+    {
+    case GCRY_MD_SHA3_224:
+    case GCRY_MD_SHA3_256:
+    case GCRY_MD_SHA3_384:
+    case GCRY_MD_SHA3_512:
+    case GCRY_MD_SHAKE128:
+    case GCRY_MD_SHAKE256:
+      ec = selftests_keccak (algo, extended, report);
+      break;
+    default:
+      ec = GPG_ERR_DIGEST_ALGO;
+      break;
+    }
+
+  return ec;
+}
+
+
+
+
+static const byte sha3_224_asn[] = { 0x30 };
+static const gcry_md_oid_spec_t oid_spec_sha3_224[] =
+  {
+    { "2.16.840.1.101.3.4.2.7" },
+    /* PKCS#1 sha3_224WithRSAEncryption */
+    { "?" },
+    { NULL }
+  };
+static const byte sha3_256_asn[] = { 0x30 };
+static const gcry_md_oid_spec_t oid_spec_sha3_256[] =
+  {
+    { "2.16.840.1.101.3.4.2.8" },
+    /* PKCS#1 sha3_256WithRSAEncryption */
+    { "?" },
+    { NULL }
+  };
+static const byte sha3_384_asn[] = { 0x30 };
+static const gcry_md_oid_spec_t oid_spec_sha3_384[] =
+  {
+    { "2.16.840.1.101.3.4.2.9" },
+    /* PKCS#1 sha3_384WithRSAEncryption */
+    { "?" },
+    { NULL }
+  };
+static const byte sha3_512_asn[] = { 0x30 };
+static const gcry_md_oid_spec_t oid_spec_sha3_512[] =
+  {
+    { "2.16.840.1.101.3.4.2.10" },
+    /* PKCS#1 sha3_512WithRSAEncryption */
+    { "?" },
+    { NULL }
+  };
+static const byte shake128_asn[] = { 0x30 };
+static const gcry_md_oid_spec_t oid_spec_shake128[] =
+  {
+    { "2.16.840.1.101.3.4.2.11" },
+    /* PKCS#1 shake128WithRSAEncryption */
+    { "?" },
+    { NULL }
+  };
+static const byte shake256_asn[] = { 0x30 };
+static const gcry_md_oid_spec_t oid_spec_shake256[] =
+  {
+    { "2.16.840.1.101.3.4.2.12" },
+    /* PKCS#1 shake256WithRSAEncryption */
+    { "?" },
+    { NULL }
+  };
+
+const gcry_md_spec_t _gcry_digest_spec_sha3_224 =
+  {
+    GCRY_MD_SHA3_224, {0, 1},
+    "SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28,
+    sha3_224_init, keccak_write, keccak_final, keccak_read, NULL,
+    _gcry_sha3_224_hash_buffers,
+    sizeof (KECCAK_CONTEXT),
+    run_selftests
+  };
+const gcry_md_spec_t _gcry_digest_spec_sha3_256 =
+  {
+    GCRY_MD_SHA3_256, {0, 1},
+    "SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32,
+    sha3_256_init, keccak_write, keccak_final, keccak_read, NULL,
+    _gcry_sha3_256_hash_buffers,
+    sizeof (KECCAK_CONTEXT),
+    run_selftests
+  };
+const gcry_md_spec_t _gcry_digest_spec_sha3_384 =
+  {
+    GCRY_MD_SHA3_384, {0, 1},
+    "SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48,
+    sha3_384_init, keccak_write, keccak_final, keccak_read, NULL,
+    _gcry_sha3_384_hash_buffers,
+    sizeof (KECCAK_CONTEXT),
+    run_selftests
+  };
+const gcry_md_spec_t _gcry_digest_spec_sha3_512 =
+  {
+    GCRY_MD_SHA3_512, {0, 1},
+    "SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64,
+    sha3_512_init, keccak_write, keccak_final, keccak_read, NULL,
+    _gcry_sha3_512_hash_buffers,
+    sizeof (KECCAK_CONTEXT),
+    run_selftests
+  };
+const gcry_md_spec_t _gcry_digest_spec_shake128 =
+  {
+    GCRY_MD_SHAKE128, {0, 1},
+    "SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0,
+    shake128_init, keccak_write, keccak_final, NULL, keccak_extract,
+    _gcry_shake128_hash_buffers,
+    sizeof (KECCAK_CONTEXT),
+    run_selftests
+  };
+const gcry_md_spec_t _gcry_digest_spec_shake256 =
+  {
+    GCRY_MD_SHAKE256, {0, 1},
+    "SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0,
+    shake256_init, keccak_write, keccak_final, NULL, keccak_extract,
+    _gcry_shake256_hash_buffers,
+    sizeof (KECCAK_CONTEXT),
+    run_selftests
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/keccak_permute_32.h 
b/grub-core/lib/libgcrypt/cipher/keccak_permute_32.h
new file mode 100644
index 000000000..1ce42a42f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/keccak_permute_32.h
@@ -0,0 +1,536 @@
+/* keccak_permute_32.h - Keccak permute function (simple 32bit bit-interleaved)
+ * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 "keccakc1024/simple32bi/
+ * Keccak-simple32BI.c" implementation by Ronny Van Keer from SUPERCOP toolkit
+ * package.
+ */
+
+/* Function that computes the Keccak-f[1600] permutation on the given state. */
+static unsigned int
+KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd)
+{
+  const u32 *round_consts = round_consts_32bit;
+  const u32 *round_consts_end = round_consts_32bit + 2 * 24;
+  u32 Aba0, Abe0, Abi0, Abo0, Abu0;
+  u32 Aba1, Abe1, Abi1, Abo1, Abu1;
+  u32 Aga0, Age0, Agi0, Ago0, Agu0;
+  u32 Aga1, Age1, Agi1, Ago1, Agu1;
+  u32 Aka0, Ake0, Aki0, Ako0, Aku0;
+  u32 Aka1, Ake1, Aki1, Ako1, Aku1;
+  u32 Ama0, Ame0, Ami0, Amo0, Amu0;
+  u32 Ama1, Ame1, Ami1, Amo1, Amu1;
+  u32 Asa0, Ase0, Asi0, Aso0, Asu0;
+  u32 Asa1, Ase1, Asi1, Aso1, Asu1;
+  u32 BCa0, BCe0, BCi0, BCo0, BCu0;
+  u32 BCa1, BCe1, BCi1, BCo1, BCu1;
+  u32 Da0, De0, Di0, Do0, Du0;
+  u32 Da1, De1, Di1, Do1, Du1;
+  u32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0;
+  u32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1;
+  u32 Ega0, Ege0, Egi0, Ego0, Egu0;
+  u32 Ega1, Ege1, Egi1, Ego1, Egu1;
+  u32 Eka0, Eke0, Eki0, Eko0, Eku0;
+  u32 Eka1, Eke1, Eki1, Eko1, Eku1;
+  u32 Ema0, Eme0, Emi0, Emo0, Emu0;
+  u32 Ema1, Eme1, Emi1, Emo1, Emu1;
+  u32 Esa0, Ese0, Esi0, Eso0, Esu0;
+  u32 Esa1, Ese1, Esi1, Eso1, Esu1;
+  u32 *state = hd->u.state32bi;
+
+  Aba0 = state[0];
+  Aba1 = state[1];
+  Abe0 = state[2];
+  Abe1 = state[3];
+  Abi0 = state[4];
+  Abi1 = state[5];
+  Abo0 = state[6];
+  Abo1 = state[7];
+  Abu0 = state[8];
+  Abu1 = state[9];
+  Aga0 = state[10];
+  Aga1 = state[11];
+  Age0 = state[12];
+  Age1 = state[13];
+  Agi0 = state[14];
+  Agi1 = state[15];
+  Ago0 = state[16];
+  Ago1 = state[17];
+  Agu0 = state[18];
+  Agu1 = state[19];
+  Aka0 = state[20];
+  Aka1 = state[21];
+  Ake0 = state[22];
+  Ake1 = state[23];
+  Aki0 = state[24];
+  Aki1 = state[25];
+  Ako0 = state[26];
+  Ako1 = state[27];
+  Aku0 = state[28];
+  Aku1 = state[29];
+  Ama0 = state[30];
+  Ama1 = state[31];
+  Ame0 = state[32];
+  Ame1 = state[33];
+  Ami0 = state[34];
+  Ami1 = state[35];
+  Amo0 = state[36];
+  Amo1 = state[37];
+  Amu0 = state[38];
+  Amu1 = state[39];
+  Asa0 = state[40];
+  Asa1 = state[41];
+  Ase0 = state[42];
+  Ase1 = state[43];
+  Asi0 = state[44];
+  Asi1 = state[45];
+  Aso0 = state[46];
+  Aso1 = state[47];
+  Asu0 = state[48];
+  Asu1 = state[49];
+
+  do
+    {
+      /* prepareTheta */
+      BCa0 = Aba0 ^ Aga0 ^ Aka0 ^ Ama0 ^ Asa0;
+      BCa1 = Aba1 ^ Aga1 ^ Aka1 ^ Ama1 ^ Asa1;
+      BCe0 = Abe0 ^ Age0 ^ Ake0 ^ Ame0 ^ Ase0;
+      BCe1 = Abe1 ^ Age1 ^ Ake1 ^ Ame1 ^ Ase1;
+      BCi0 = Abi0 ^ Agi0 ^ Aki0 ^ Ami0 ^ Asi0;
+      BCi1 = Abi1 ^ Agi1 ^ Aki1 ^ Ami1 ^ Asi1;
+      BCo0 = Abo0 ^ Ago0 ^ Ako0 ^ Amo0 ^ Aso0;
+      BCo1 = Abo1 ^ Ago1 ^ Ako1 ^ Amo1 ^ Aso1;
+      BCu0 = Abu0 ^ Agu0 ^ Aku0 ^ Amu0 ^ Asu0;
+      BCu1 = Abu1 ^ Agu1 ^ Aku1 ^ Amu1 ^ Asu1;
+
+      /* thetaRhoPiChiIota(round  , A, E) */
+      Da0 = BCu0 ^ ROL32(BCe1, 1);
+      Da1 = BCu1 ^ BCe0;
+      De0 = BCa0 ^ ROL32(BCi1, 1);
+      De1 = BCa1 ^ BCi0;
+      Di0 = BCe0 ^ ROL32(BCo1, 1);
+      Di1 = BCe1 ^ BCo0;
+      Do0 = BCi0 ^ ROL32(BCu1, 1);
+      Do1 = BCi1 ^ BCu0;
+      Du0 = BCo0 ^ ROL32(BCa1, 1);
+      Du1 = BCo1 ^ BCa0;
+
+      Aba0 ^= Da0;
+      BCa0 = Aba0;
+      Age0 ^= De0;
+      BCe0 = ROL32(Age0, 22);
+      Aki1 ^= Di1;
+      BCi0 = ROL32(Aki1, 22);
+      Amo1 ^= Do1;
+      BCo0 = ROL32(Amo1, 11);
+      Asu0 ^= Du0;
+      BCu0 = ROL32(Asu0, 7);
+      Eba0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Eba0 ^= *(round_consts++);
+      Ebe0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Ebi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Ebo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Ebu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Aba1 ^= Da1;
+      BCa1 = Aba1;
+      Age1 ^= De1;
+      BCe1 = ROL32(Age1, 22);
+      Aki0 ^= Di0;
+      BCi1 = ROL32(Aki0, 21);
+      Amo0 ^= Do0;
+      BCo1 = ROL32(Amo0, 10);
+      Asu1 ^= Du1;
+      BCu1 = ROL32(Asu1, 7);
+      Eba1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Eba1 ^= *(round_consts++);
+      Ebe1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Ebi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Ebo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Ebu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Abo0 ^= Do0;
+      BCa0 = ROL32(Abo0, 14);
+      Agu0 ^= Du0;
+      BCe0 = ROL32(Agu0, 10);
+      Aka1 ^= Da1;
+      BCi0 = ROL32(Aka1, 2);
+      Ame1 ^= De1;
+      BCo0 = ROL32(Ame1, 23);
+      Asi1 ^= Di1;
+      BCu0 = ROL32(Asi1, 31);
+      Ega0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Ege0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Egi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Ego0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Egu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Abo1 ^= Do1;
+      BCa1 = ROL32(Abo1, 14);
+      Agu1 ^= Du1;
+      BCe1 = ROL32(Agu1, 10);
+      Aka0 ^= Da0;
+      BCi1 = ROL32(Aka0, 1);
+      Ame0 ^= De0;
+      BCo1 = ROL32(Ame0, 22);
+      Asi0 ^= Di0;
+      BCu1 = ROL32(Asi0, 30);
+      Ega1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Ege1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Egi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Ego1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Egu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Abe1 ^= De1;
+      BCa0 = ROL32(Abe1, 1);
+      Agi0 ^= Di0;
+      BCe0 = ROL32(Agi0, 3);
+      Ako1 ^= Do1;
+      BCi0 = ROL32(Ako1, 13);
+      Amu0 ^= Du0;
+      BCo0 = ROL32(Amu0, 4);
+      Asa0 ^= Da0;
+      BCu0 = ROL32(Asa0, 9);
+      Eka0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Eke0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Eki0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Eko0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Eku0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Abe0 ^= De0;
+      BCa1 = Abe0;
+      Agi1 ^= Di1;
+      BCe1 = ROL32(Agi1, 3);
+      Ako0 ^= Do0;
+      BCi1 = ROL32(Ako0, 12);
+      Amu1 ^= Du1;
+      BCo1 = ROL32(Amu1, 4);
+      Asa1 ^= Da1;
+      BCu1 = ROL32(Asa1, 9);
+      Eka1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Eke1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Eki1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Eko1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Eku1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Abu1 ^= Du1;
+      BCa0 = ROL32(Abu1, 14);
+      Aga0 ^= Da0;
+      BCe0 = ROL32(Aga0, 18);
+      Ake0 ^= De0;
+      BCi0 = ROL32(Ake0, 5);
+      Ami1 ^= Di1;
+      BCo0 = ROL32(Ami1, 8);
+      Aso0 ^= Do0;
+      BCu0 = ROL32(Aso0, 28);
+      Ema0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Eme0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Emi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Emo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Emu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Abu0 ^= Du0;
+      BCa1 = ROL32(Abu0, 13);
+      Aga1 ^= Da1;
+      BCe1 = ROL32(Aga1, 18);
+      Ake1 ^= De1;
+      BCi1 = ROL32(Ake1, 5);
+      Ami0 ^= Di0;
+      BCo1 = ROL32(Ami0, 7);
+      Aso1 ^= Do1;
+      BCu1 = ROL32(Aso1, 28);
+      Ema1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Eme1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Emi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Emo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Emu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Abi0 ^= Di0;
+      BCa0 = ROL32(Abi0, 31);
+      Ago1 ^= Do1;
+      BCe0 = ROL32(Ago1, 28);
+      Aku1 ^= Du1;
+      BCi0 = ROL32(Aku1, 20);
+      Ama1 ^= Da1;
+      BCo0 = ROL32(Ama1, 21);
+      Ase0 ^= De0;
+      BCu0 = ROL32(Ase0, 1);
+      Esa0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Ese0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Esi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Eso0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Esu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Abi1 ^= Di1;
+      BCa1 = ROL32(Abi1, 31);
+      Ago0 ^= Do0;
+      BCe1 = ROL32(Ago0, 27);
+      Aku0 ^= Du0;
+      BCi1 = ROL32(Aku0, 19);
+      Ama0 ^= Da0;
+      BCo1 = ROL32(Ama0, 20);
+      Ase1 ^= De1;
+      BCu1 = ROL32(Ase1, 1);
+      Esa1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Ese1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Esi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Eso1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Esu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      /* prepareTheta */
+      BCa0 = Eba0 ^ Ega0 ^ Eka0 ^ Ema0 ^ Esa0;
+      BCa1 = Eba1 ^ Ega1 ^ Eka1 ^ Ema1 ^ Esa1;
+      BCe0 = Ebe0 ^ Ege0 ^ Eke0 ^ Eme0 ^ Ese0;
+      BCe1 = Ebe1 ^ Ege1 ^ Eke1 ^ Eme1 ^ Ese1;
+      BCi0 = Ebi0 ^ Egi0 ^ Eki0 ^ Emi0 ^ Esi0;
+      BCi1 = Ebi1 ^ Egi1 ^ Eki1 ^ Emi1 ^ Esi1;
+      BCo0 = Ebo0 ^ Ego0 ^ Eko0 ^ Emo0 ^ Eso0;
+      BCo1 = Ebo1 ^ Ego1 ^ Eko1 ^ Emo1 ^ Eso1;
+      BCu0 = Ebu0 ^ Egu0 ^ Eku0 ^ Emu0 ^ Esu0;
+      BCu1 = Ebu1 ^ Egu1 ^ Eku1 ^ Emu1 ^ Esu1;
+
+      /* thetaRhoPiChiIota(round+1, E, A) */
+      Da0 = BCu0 ^ ROL32(BCe1, 1);
+      Da1 = BCu1 ^ BCe0;
+      De0 = BCa0 ^ ROL32(BCi1, 1);
+      De1 = BCa1 ^ BCi0;
+      Di0 = BCe0 ^ ROL32(BCo1, 1);
+      Di1 = BCe1 ^ BCo0;
+      Do0 = BCi0 ^ ROL32(BCu1, 1);
+      Do1 = BCi1 ^ BCu0;
+      Du0 = BCo0 ^ ROL32(BCa1, 1);
+      Du1 = BCo1 ^ BCa0;
+
+      Eba0 ^= Da0;
+      BCa0 = Eba0;
+      Ege0 ^= De0;
+      BCe0 = ROL32(Ege0, 22);
+      Eki1 ^= Di1;
+      BCi0 = ROL32(Eki1, 22);
+      Emo1 ^= Do1;
+      BCo0 = ROL32(Emo1, 11);
+      Esu0 ^= Du0;
+      BCu0 = ROL32(Esu0, 7);
+      Aba0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Aba0 ^= *(round_consts++);
+      Abe0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Abi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Abo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Abu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Eba1 ^= Da1;
+      BCa1 = Eba1;
+      Ege1 ^= De1;
+      BCe1 = ROL32(Ege1, 22);
+      Eki0 ^= Di0;
+      BCi1 = ROL32(Eki0, 21);
+      Emo0 ^= Do0;
+      BCo1 = ROL32(Emo0, 10);
+      Esu1 ^= Du1;
+      BCu1 = ROL32(Esu1, 7);
+      Aba1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Aba1 ^= *(round_consts++);
+      Abe1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Abi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Abo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Abu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Ebo0 ^= Do0;
+      BCa0 = ROL32(Ebo0, 14);
+      Egu0 ^= Du0;
+      BCe0 = ROL32(Egu0, 10);
+      Eka1 ^= Da1;
+      BCi0 = ROL32(Eka1, 2);
+      Eme1 ^= De1;
+      BCo0 = ROL32(Eme1, 23);
+      Esi1 ^= Di1;
+      BCu0 = ROL32(Esi1, 31);
+      Aga0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Age0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Agi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Ago0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Agu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Ebo1 ^= Do1;
+      BCa1 = ROL32(Ebo1, 14);
+      Egu1 ^= Du1;
+      BCe1 = ROL32(Egu1, 10);
+      Eka0 ^= Da0;
+      BCi1 = ROL32(Eka0, 1);
+      Eme0 ^= De0;
+      BCo1 = ROL32(Eme0, 22);
+      Esi0 ^= Di0;
+      BCu1 = ROL32(Esi0, 30);
+      Aga1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Age1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Agi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Ago1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Agu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Ebe1 ^= De1;
+      BCa0 = ROL32(Ebe1, 1);
+      Egi0 ^= Di0;
+      BCe0 = ROL32(Egi0, 3);
+      Eko1 ^= Do1;
+      BCi0 = ROL32(Eko1, 13);
+      Emu0 ^= Du0;
+      BCo0 = ROL32(Emu0, 4);
+      Esa0 ^= Da0;
+      BCu0 = ROL32(Esa0, 9);
+      Aka0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Ake0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Aki0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Ako0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Aku0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Ebe0 ^= De0;
+      BCa1 = Ebe0;
+      Egi1 ^= Di1;
+      BCe1 = ROL32(Egi1, 3);
+      Eko0 ^= Do0;
+      BCi1 = ROL32(Eko0, 12);
+      Emu1 ^= Du1;
+      BCo1 = ROL32(Emu1, 4);
+      Esa1 ^= Da1;
+      BCu1 = ROL32(Esa1, 9);
+      Aka1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Ake1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Aki1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Ako1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Aku1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Ebu1 ^= Du1;
+      BCa0 = ROL32(Ebu1, 14);
+      Ega0 ^= Da0;
+      BCe0 = ROL32(Ega0, 18);
+      Eke0 ^= De0;
+      BCi0 = ROL32(Eke0, 5);
+      Emi1 ^= Di1;
+      BCo0 = ROL32(Emi1, 8);
+      Eso0 ^= Do0;
+      BCu0 = ROL32(Eso0, 28);
+      Ama0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Ame0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Ami0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Amo0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Amu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Ebu0 ^= Du0;
+      BCa1 = ROL32(Ebu0, 13);
+      Ega1 ^= Da1;
+      BCe1 = ROL32(Ega1, 18);
+      Eke1 ^= De1;
+      BCi1 = ROL32(Eke1, 5);
+      Emi0 ^= Di0;
+      BCo1 = ROL32(Emi0, 7);
+      Eso1 ^= Do1;
+      BCu1 = ROL32(Eso1, 28);
+      Ama1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Ame1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Ami1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Amo1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Amu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+
+      Ebi0 ^= Di0;
+      BCa0 = ROL32(Ebi0, 31);
+      Ego1 ^= Do1;
+      BCe0 = ROL32(Ego1, 28);
+      Eku1 ^= Du1;
+      BCi0 = ROL32(Eku1, 20);
+      Ema1 ^= Da1;
+      BCo0 = ROL32(Ema1, 21);
+      Ese0 ^= De0;
+      BCu0 = ROL32(Ese0, 1);
+      Asa0 = BCa0 ^ ANDN32(BCe0, BCi0);
+      Ase0 = BCe0 ^ ANDN32(BCi0, BCo0);
+      Asi0 = BCi0 ^ ANDN32(BCo0, BCu0);
+      Aso0 = BCo0 ^ ANDN32(BCu0, BCa0);
+      Asu0 = BCu0 ^ ANDN32(BCa0, BCe0);
+
+      Ebi1 ^= Di1;
+      BCa1 = ROL32(Ebi1, 31);
+      Ego0 ^= Do0;
+      BCe1 = ROL32(Ego0, 27);
+      Eku0 ^= Du0;
+      BCi1 = ROL32(Eku0, 19);
+      Ema0 ^= Da0;
+      BCo1 = ROL32(Ema0, 20);
+      Ese1 ^= De1;
+      BCu1 = ROL32(Ese1, 1);
+      Asa1 = BCa1 ^ ANDN32(BCe1, BCi1);
+      Ase1 = BCe1 ^ ANDN32(BCi1, BCo1);
+      Asi1 = BCi1 ^ ANDN32(BCo1, BCu1);
+      Aso1 = BCo1 ^ ANDN32(BCu1, BCa1);
+      Asu1 = BCu1 ^ ANDN32(BCa1, BCe1);
+    }
+  while (round_consts < round_consts_end);
+
+  state[0] = Aba0;
+  state[1] = Aba1;
+  state[2] = Abe0;
+  state[3] = Abe1;
+  state[4] = Abi0;
+  state[5] = Abi1;
+  state[6] = Abo0;
+  state[7] = Abo1;
+  state[8] = Abu0;
+  state[9] = Abu1;
+  state[10] = Aga0;
+  state[11] = Aga1;
+  state[12] = Age0;
+  state[13] = Age1;
+  state[14] = Agi0;
+  state[15] = Agi1;
+  state[16] = Ago0;
+  state[17] = Ago1;
+  state[18] = Agu0;
+  state[19] = Agu1;
+  state[20] = Aka0;
+  state[21] = Aka1;
+  state[22] = Ake0;
+  state[23] = Ake1;
+  state[24] = Aki0;
+  state[25] = Aki1;
+  state[26] = Ako0;
+  state[27] = Ako1;
+  state[28] = Aku0;
+  state[29] = Aku1;
+  state[30] = Ama0;
+  state[31] = Ama1;
+  state[32] = Ame0;
+  state[33] = Ame1;
+  state[34] = Ami0;
+  state[35] = Ami1;
+  state[36] = Amo0;
+  state[37] = Amo1;
+  state[38] = Amu0;
+  state[39] = Amu1;
+  state[40] = Asa0;
+  state[41] = Asa1;
+  state[42] = Ase0;
+  state[43] = Ase1;
+  state[44] = Asi0;
+  state[45] = Asi1;
+  state[46] = Aso0;
+  state[47] = Aso1;
+  state[48] = Asu0;
+  state[49] = Asu1;
+
+  return sizeof(void *) * 4 + sizeof(u32) * 12 * 5 * 2;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/keccak_permute_64.h 
b/grub-core/lib/libgcrypt/cipher/keccak_permute_64.h
new file mode 100644
index 000000000..45ef462f2
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/keccak_permute_64.h
@@ -0,0 +1,385 @@
+/* keccak_permute_64.h - Keccak permute function (simple 64bit)
+ * Copyright (C) 2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* The code is based on public-domain/CC0 "keccakc1024/simple/Keccak-simple.c"
+ * implementation by Ronny Van Keer from SUPERCOP toolkit package.
+ */
+
+/* Function that computes the Keccak-f[1600] permutation on the given state. */
+static unsigned int
+KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd)
+{
+  const u64 *round_consts = _gcry_keccak_round_consts_64bit;
+  const u64 *round_consts_end = _gcry_keccak_round_consts_64bit + 24;
+  u64 Aba, Abe, Abi, Abo, Abu;
+  u64 Aga, Age, Agi, Ago, Agu;
+  u64 Aka, Ake, Aki, Ako, Aku;
+  u64 Ama, Ame, Ami, Amo, Amu;
+  u64 Asa, Ase, Asi, Aso, Asu;
+  u64 BCa, BCe, BCi, BCo, BCu;
+  u64 Da, De, Di, Do, Du;
+  u64 Eba, Ebe, Ebi, Ebo, Ebu;
+  u64 Ega, Ege, Egi, Ego, Egu;
+  u64 Eka, Eke, Eki, Eko, Eku;
+  u64 Ema, Eme, Emi, Emo, Emu;
+  u64 Esa, Ese, Esi, Eso, Esu;
+  u64 *state = hd->u.state64;
+
+  Aba = state[0];
+  Abe = state[1];
+  Abi = state[2];
+  Abo = state[3];
+  Abu = state[4];
+  Aga = state[5];
+  Age = state[6];
+  Agi = state[7];
+  Ago = state[8];
+  Agu = state[9];
+  Aka = state[10];
+  Ake = state[11];
+  Aki = state[12];
+  Ako = state[13];
+  Aku = state[14];
+  Ama = state[15];
+  Ame = state[16];
+  Ami = state[17];
+  Amo = state[18];
+  Amu = state[19];
+  Asa = state[20];
+  Ase = state[21];
+  Asi = state[22];
+  Aso = state[23];
+  Asu = state[24];
+
+  do
+    {
+      /* prepareTheta */
+      BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa;
+      BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase;
+      BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi;
+      BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso;
+      BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu;
+
+      /* thetaRhoPiChiIotaPrepareTheta(round  , A, E) */
+      Da = BCu ^ ROL64(BCe, 1);
+      De = BCa ^ ROL64(BCi, 1);
+      Di = BCe ^ ROL64(BCo, 1);
+      Do = BCi ^ ROL64(BCu, 1);
+      Du = BCo ^ ROL64(BCa, 1);
+
+      Aba ^= Da;
+      BCa = Aba;
+      Age ^= De;
+      BCe = ROL64(Age, 44);
+      Aki ^= Di;
+      BCi = ROL64(Aki, 43);
+      Amo ^= Do;
+      BCo = ROL64(Amo, 21);
+      Asu ^= Du;
+      BCu = ROL64(Asu, 14);
+      Eba = BCa ^ ANDN64(BCe, BCi);
+      Eba ^= *(round_consts++);
+      Ebe = BCe ^ ANDN64(BCi, BCo);
+      Ebi = BCi ^ ANDN64(BCo, BCu);
+      Ebo = BCo ^ ANDN64(BCu, BCa);
+      Ebu = BCu ^ ANDN64(BCa, BCe);
+
+      Abo ^= Do;
+      BCa = ROL64(Abo, 28);
+      Agu ^= Du;
+      BCe = ROL64(Agu, 20);
+      Aka ^= Da;
+      BCi = ROL64(Aka, 3);
+      Ame ^= De;
+      BCo = ROL64(Ame, 45);
+      Asi ^= Di;
+      BCu = ROL64(Asi, 61);
+      Ega = BCa ^ ANDN64(BCe, BCi);
+      Ege = BCe ^ ANDN64(BCi, BCo);
+      Egi = BCi ^ ANDN64(BCo, BCu);
+      Ego = BCo ^ ANDN64(BCu, BCa);
+      Egu = BCu ^ ANDN64(BCa, BCe);
+
+      Abe ^= De;
+      BCa = ROL64(Abe, 1);
+      Agi ^= Di;
+      BCe = ROL64(Agi, 6);
+      Ako ^= Do;
+      BCi = ROL64(Ako, 25);
+      Amu ^= Du;
+      BCo = ROL64(Amu, 8);
+      Asa ^= Da;
+      BCu = ROL64(Asa, 18);
+      Eka = BCa ^ ANDN64(BCe, BCi);
+      Eke = BCe ^ ANDN64(BCi, BCo);
+      Eki = BCi ^ ANDN64(BCo, BCu);
+      Eko = BCo ^ ANDN64(BCu, BCa);
+      Eku = BCu ^ ANDN64(BCa, BCe);
+
+      Abu ^= Du;
+      BCa = ROL64(Abu, 27);
+      Aga ^= Da;
+      BCe = ROL64(Aga, 36);
+      Ake ^= De;
+      BCi = ROL64(Ake, 10);
+      Ami ^= Di;
+      BCo = ROL64(Ami, 15);
+      Aso ^= Do;
+      BCu = ROL64(Aso, 56);
+      Ema = BCa ^ ANDN64(BCe, BCi);
+      Eme = BCe ^ ANDN64(BCi, BCo);
+      Emi = BCi ^ ANDN64(BCo, BCu);
+      Emo = BCo ^ ANDN64(BCu, BCa);
+      Emu = BCu ^ ANDN64(BCa, BCe);
+
+      Abi ^= Di;
+      BCa = ROL64(Abi, 62);
+      Ago ^= Do;
+      BCe = ROL64(Ago, 55);
+      Aku ^= Du;
+      BCi = ROL64(Aku, 39);
+      Ama ^= Da;
+      BCo = ROL64(Ama, 41);
+      Ase ^= De;
+      BCu = ROL64(Ase, 2);
+      Esa = BCa ^ ANDN64(BCe, BCi);
+      Ese = BCe ^ ANDN64(BCi, BCo);
+      Esi = BCi ^ ANDN64(BCo, BCu);
+      Eso = BCo ^ ANDN64(BCu, BCa);
+      Esu = BCu ^ ANDN64(BCa, BCe);
+
+      /* prepareTheta */
+      BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa;
+      BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese;
+      BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi;
+      BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso;
+      BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu;
+
+      /* thetaRhoPiChiIotaPrepareTheta(round+1, E, A) */
+      Da = BCu ^ ROL64(BCe, 1);
+      De = BCa ^ ROL64(BCi, 1);
+      Di = BCe ^ ROL64(BCo, 1);
+      Do = BCi ^ ROL64(BCu, 1);
+      Du = BCo ^ ROL64(BCa, 1);
+
+      Eba ^= Da;
+      BCa = Eba;
+      Ege ^= De;
+      BCe = ROL64(Ege, 44);
+      Eki ^= Di;
+      BCi = ROL64(Eki, 43);
+      Emo ^= Do;
+      BCo = ROL64(Emo, 21);
+      Esu ^= Du;
+      BCu = ROL64(Esu, 14);
+      Aba = BCa ^ ANDN64(BCe, BCi);
+      Aba ^= *(round_consts++);
+      Abe = BCe ^ ANDN64(BCi, BCo);
+      Abi = BCi ^ ANDN64(BCo, BCu);
+      Abo = BCo ^ ANDN64(BCu, BCa);
+      Abu = BCu ^ ANDN64(BCa, BCe);
+
+      Ebo ^= Do;
+      BCa = ROL64(Ebo, 28);
+      Egu ^= Du;
+      BCe = ROL64(Egu, 20);
+      Eka ^= Da;
+      BCi = ROL64(Eka, 3);
+      Eme ^= De;
+      BCo = ROL64(Eme, 45);
+      Esi ^= Di;
+      BCu = ROL64(Esi, 61);
+      Aga = BCa ^ ANDN64(BCe, BCi);
+      Age = BCe ^ ANDN64(BCi, BCo);
+      Agi = BCi ^ ANDN64(BCo, BCu);
+      Ago = BCo ^ ANDN64(BCu, BCa);
+      Agu = BCu ^ ANDN64(BCa, BCe);
+
+      Ebe ^= De;
+      BCa = ROL64(Ebe, 1);
+      Egi ^= Di;
+      BCe = ROL64(Egi, 6);
+      Eko ^= Do;
+      BCi = ROL64(Eko, 25);
+      Emu ^= Du;
+      BCo = ROL64(Emu, 8);
+      Esa ^= Da;
+      BCu = ROL64(Esa, 18);
+      Aka = BCa ^ ANDN64(BCe, BCi);
+      Ake = BCe ^ ANDN64(BCi, BCo);
+      Aki = BCi ^ ANDN64(BCo, BCu);
+      Ako = BCo ^ ANDN64(BCu, BCa);
+      Aku = BCu ^ ANDN64(BCa, BCe);
+
+      Ebu ^= Du;
+      BCa = ROL64(Ebu, 27);
+      Ega ^= Da;
+      BCe = ROL64(Ega, 36);
+      Eke ^= De;
+      BCi = ROL64(Eke, 10);
+      Emi ^= Di;
+      BCo = ROL64(Emi, 15);
+      Eso ^= Do;
+      BCu = ROL64(Eso, 56);
+      Ama = BCa ^ ANDN64(BCe, BCi);
+      Ame = BCe ^ ANDN64(BCi, BCo);
+      Ami = BCi ^ ANDN64(BCo, BCu);
+      Amo = BCo ^ ANDN64(BCu, BCa);
+      Amu = BCu ^ ANDN64(BCa, BCe);
+
+      Ebi ^= Di;
+      BCa = ROL64(Ebi, 62);
+      Ego ^= Do;
+      BCe = ROL64(Ego, 55);
+      Eku ^= Du;
+      BCi = ROL64(Eku, 39);
+      Ema ^= Da;
+      BCo = ROL64(Ema, 41);
+      Ese ^= De;
+      BCu = ROL64(Ese, 2);
+      Asa = BCa ^ ANDN64(BCe, BCi);
+      Ase = BCe ^ ANDN64(BCi, BCo);
+      Asi = BCi ^ ANDN64(BCo, BCu);
+      Aso = BCo ^ ANDN64(BCu, BCa);
+      Asu = BCu ^ ANDN64(BCa, BCe);
+    }
+  while (round_consts < round_consts_end);
+
+  state[0] = Aba;
+  state[1] = Abe;
+  state[2] = Abi;
+  state[3] = Abo;
+  state[4] = Abu;
+  state[5] = Aga;
+  state[6] = Age;
+  state[7] = Agi;
+  state[8] = Ago;
+  state[9] = Agu;
+  state[10] = Aka;
+  state[11] = Ake;
+  state[12] = Aki;
+  state[13] = Ako;
+  state[14] = Aku;
+  state[15] = Ama;
+  state[16] = Ame;
+  state[17] = Ami;
+  state[18] = Amo;
+  state[19] = Amu;
+  state[20] = Asa;
+  state[21] = Ase;
+  state[22] = Asi;
+  state[23] = Aso;
+  state[24] = Asu;
+
+  return sizeof(void *) * 4 + sizeof(u64) * 12 * 5;
+}
+
+static unsigned int
+KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes,
+                             size_t nlanes, int blocklanes)
+{
+  unsigned int burn = 0;
+
+  while (nlanes)
+    {
+      switch (blocklanes)
+       {
+       case 21:
+         /* SHAKE128 */
+         while (pos == 0 && nlanes >= 21)
+           {
+             nlanes -= 21;
+             absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+             absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8;
+             absorb_lanes64_4(&hd->u.state64[16], lanes); lanes += 8 * 4;
+             absorb_lanes64_1(&hd->u.state64[20], lanes); lanes += 8 * 1;
+
+             burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+           }
+         break;
+
+       case 18:
+         /* SHA3-224 */
+         while (pos == 0 && nlanes >= 18)
+           {
+             nlanes -= 18;
+             absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+             absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8;
+             absorb_lanes64_2(&hd->u.state64[16], lanes); lanes += 8 * 2;
+
+             burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+           }
+         break;
+
+       case 17:
+         /* SHA3-256 & SHAKE256 */
+         while (pos == 0 && nlanes >= 17)
+           {
+             nlanes -= 17;
+             absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+             absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8;
+             absorb_lanes64_1(&hd->u.state64[16], lanes); lanes += 8 * 1;
+
+             burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+           }
+         break;
+
+       case 13:
+         /* SHA3-384 */
+         while (pos == 0 && nlanes >= 13)
+           {
+             nlanes -= 13;
+             absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+             absorb_lanes64_4(&hd->u.state64[8], lanes); lanes += 8 * 4;
+             absorb_lanes64_1(&hd->u.state64[12], lanes); lanes += 8 * 1;
+
+             burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+           }
+         break;
+
+       case 9:
+         /* SHA3-512 */
+         while (pos == 0 && nlanes >= 9)
+           {
+             nlanes -= 9;
+             absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8;
+             absorb_lanes64_1(&hd->u.state64[8], lanes); lanes += 8 * 1;
+
+             burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+           }
+         break;
+       }
+
+      while (nlanes)
+       {
+         hd->u.state64[pos] ^= buf_get_le64(lanes);
+         lanes += 8;
+         nlanes--;
+
+         if (++pos == blocklanes)
+           {
+             burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd);
+             pos = 0;
+             break;
+           }
+       }
+    }
+
+  return burn;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/mac-cmac.c 
b/grub-core/lib/libgcrypt/cipher/mac-cmac.c
new file mode 100644
index 000000000..b80c3406c
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/mac-cmac.c
@@ -0,0 +1,524 @@
+/* mac-cmac.c  -  CMAC glue for MAC API
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "./mac-internal.h"
+
+
+static int
+map_mac_algo_to_cipher (int mac_algo)
+{
+  switch (mac_algo)
+    {
+    default:
+      return GCRY_CIPHER_NONE;
+    case GCRY_MAC_CMAC_AES:
+      return GCRY_CIPHER_AES;
+    case GCRY_MAC_CMAC_3DES:
+      return GCRY_CIPHER_3DES;
+    case GCRY_MAC_CMAC_CAMELLIA:
+      return GCRY_CIPHER_CAMELLIA128;
+    case GCRY_MAC_CMAC_IDEA:
+      return GCRY_CIPHER_IDEA;
+    case GCRY_MAC_CMAC_CAST5:
+      return GCRY_CIPHER_CAST5;
+    case GCRY_MAC_CMAC_BLOWFISH:
+      return GCRY_CIPHER_BLOWFISH;
+    case GCRY_MAC_CMAC_TWOFISH:
+      return GCRY_CIPHER_TWOFISH;
+    case GCRY_MAC_CMAC_SERPENT:
+      return GCRY_CIPHER_SERPENT128;
+    case GCRY_MAC_CMAC_SEED:
+      return GCRY_CIPHER_SEED;
+    case GCRY_MAC_CMAC_RFC2268:
+      return GCRY_CIPHER_RFC2268_128;
+    case GCRY_MAC_CMAC_GOST28147:
+      return GCRY_CIPHER_GOST28147;
+    case GCRY_MAC_CMAC_SM4:
+      return GCRY_CIPHER_SM4;
+    }
+}
+
+
+static gcry_err_code_t
+cmac_open (gcry_mac_hd_t h)
+{
+  gcry_err_code_t err;
+  gcry_cipher_hd_t hd;
+  int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+  int cipher_algo;
+  unsigned int flags;
+
+  cipher_algo = map_mac_algo_to_cipher (h->spec->algo);
+  flags = (secure ? GCRY_CIPHER_SECURE : 0);
+
+  err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_CMAC,
+                                    flags);
+  if (err)
+    return err;
+
+  h->u.cmac.cipher_algo = cipher_algo;
+  h->u.cmac.ctx = hd;
+  h->u.cmac.blklen = _gcry_cipher_get_algo_blklen (cipher_algo);
+  return 0;
+}
+
+
+static void
+cmac_close (gcry_mac_hd_t h)
+{
+  _gcry_cipher_close (h->u.cmac.ctx);
+  h->u.cmac.ctx = NULL;
+}
+
+
+static gcry_err_code_t
+cmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+  return _gcry_cipher_setkey (h->u.cmac.ctx, key, keylen);
+}
+
+
+static gcry_err_code_t
+cmac_reset (gcry_mac_hd_t h)
+{
+  return _gcry_cipher_reset (h->u.cmac.ctx);
+}
+
+
+static gcry_err_code_t
+cmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  return _gcry_cipher_cmac_authenticate (h->u.cmac.ctx, buf, buflen);
+}
+
+
+static gcry_err_code_t
+cmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+  if (*outlen > h->u.cmac.blklen)
+    *outlen = h->u.cmac.blklen;
+  return _gcry_cipher_cmac_get_tag (h->u.cmac.ctx, outbuf, *outlen);
+}
+
+
+static gcry_err_code_t
+cmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  return _gcry_cipher_cmac_check_tag (h->u.cmac.ctx, buf, buflen);
+}
+
+
+static unsigned int
+cmac_get_maclen (int algo)
+{
+  return _gcry_cipher_get_algo_blklen (map_mac_algo_to_cipher (algo));
+}
+
+
+static unsigned int
+cmac_get_keylen (int algo)
+{
+  return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo));
+}
+
+
+/* Check one CMAC with MAC ALGO using the regular MAC
+ * API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key
+ * and (EXPECT,EXPECTLEN) the expected result.  Returns NULL on
+ * success or a string describing the failure.  */
+static const char *
+check_one (int algo, const char *data, size_t datalen,
+           const char *key, size_t keylen,
+           const char *expect, size_t expectlen)
+{
+  gcry_mac_hd_t hd;
+  unsigned char mac[512]; /* hardcoded to avoid allocation */
+  unsigned int maclen;
+  size_t macoutlen;
+  int i;
+  gcry_error_t err = 0;
+
+  err = _gcry_mac_open (&hd, algo, 0, NULL);
+  if (err)
+    return "gcry_mac_open failed";
+
+  i = _gcry_mac_get_algo (hd);
+  if (i != algo)
+    return "gcry_mac_get_algo failed";
+
+  maclen = _gcry_mac_get_algo_maclen (algo);
+  if (maclen < 1 || maclen > 500)
+    return "gcry_mac_get_algo_maclen failed";
+
+  if (maclen != expectlen)
+    return "invalid tests data";
+
+  err = _gcry_mac_setkey (hd, key, keylen);
+  if (err)
+    {
+      _gcry_mac_close (hd);
+      return "gcry_mac_setkey failed";
+    }
+
+  err = _gcry_mac_write (hd, data, datalen);
+  if (err)
+    {
+      _gcry_mac_close (hd);
+      return "gcry_mac_write failed";
+    }
+
+  err = _gcry_mac_verify (hd, expect, maclen);
+  if (err)
+    {
+      _gcry_mac_close (hd);
+      return "gcry_mac_verify failed";
+    }
+
+  macoutlen = maclen;
+  err = _gcry_mac_read (hd, mac, &macoutlen);
+  _gcry_mac_close (hd);
+  if (err)
+    return "gcry_mac_read failed";
+
+  if (memcmp (mac, expect, maclen))
+    return "does not match";
+
+  return NULL;
+}
+
+
+/*
+ * CMAC AES and DES test vectors are from
+ * http://web.archive.org/web/20130930212819/http://csrc.nist.gov/publica \
+ * tions/nistpubs/800-38B/Updated_CMAC_Examples.pdf
+ */
+
+static gpg_err_code_t
+selftests_cmac_3des (int extended, selftest_report_func_t report)
+{
+  static const struct
+  {
+    const char *desc;
+    const char *data;
+    const char *key;
+    const char *expect;
+  } tv[] =
+    {
+      { "Basic 3DES",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57",
+        "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+        "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+        "\x74\x3d\xdb\xe0\xce\x2d\xc2\xed" },
+      { "Extended 3DES #1",
+        "",
+        "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+        "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+        "\xb7\xa6\x88\xe1\x22\xff\xaf\x95" },
+      { "Extended 3DES #2",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96",
+        "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+        "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+        "\x8e\x8f\x29\x31\x36\x28\x37\x97" },
+      { "Extended 3DES #3",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51",
+        "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58"
+        "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5",
+        "\x33\xe6\xb1\x09\x24\x00\xea\xe5" },
+      { "Extended 3DES #4",
+        "",
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+        "\xbd\x2e\xbf\x9a\x3b\xa0\x03\x61" },
+      { "Extended 3DES #5",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96",
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+        "\x4f\xf2\xab\x81\x3c\x53\xce\x83" },
+      { "Extended 3DES #6",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57",
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+        "\x62\xdd\x1b\x47\x19\x02\xbd\x4e" },
+      { "Extended 3DES #7",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51",
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38"
+        "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5",
+        "\x31\xb1\xe4\x31\xda\xbc\x4e\xb8" },
+      { NULL }
+    };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+
+  for (tvidx=0; tv[tvidx].desc; tvidx++)
+    {
+      what = tv[tvidx].desc;
+      errtxt = check_one (GCRY_MAC_CMAC_3DES,
+                          tv[tvidx].data, strlen (tv[tvidx].data),
+                          tv[tvidx].key, strlen (tv[tvidx].key),
+                          tv[tvidx].expect, 8);
+      if (errtxt)
+        goto failed;
+      if (!extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("cmac", GCRY_MAC_CMAC_3DES, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+static gpg_err_code_t
+selftests_cmac_aes (int extended, selftest_report_func_t report)
+{
+  static const struct
+  {
+    const char *desc;
+    const char *data;
+    const char *key;
+    const char *expect;
+  } tv[] =
+    {
+      { "Basic AES128",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+        "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
+        "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+        "\xdf\xa6\x67\x47\xde\x9a\xe6\x30\x30\xca\x32\x61\x14\x97\xc8\x27" },
+      { "Basic AES192",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+        "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
+        "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+        "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+        "\x8a\x1d\xe5\xbe\x2e\xb3\x1a\xad\x08\x9a\x82\xe6\xee\x90\x8b\x0e" },
+      { "Basic AES256",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+        "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11",
+        "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+        "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+        "\xaa\xf3\xd8\xf1\xde\x56\x40\xc2\x32\xf5\xb1\x69\xb9\xc9\x11\xe6" },
+      { "Extended AES #1",
+        "",
+        "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+        "\xbb\x1d\x69\x29\xe9\x59\x37\x28\x7f\xa3\x7d\x12\x9b\x75\x67\x46" },
+      { "Extended AES #2",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+        "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+        "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+        "\x9e\x99\xa7\xbf\x31\xe7\x10\x90\x06\x62\xf6\x5e\x61\x7c\x51\x84" },
+      { "Extended AES #3",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+        "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+        "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+        "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+        "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+        "\xe1\x99\x21\x90\x54\x9f\x6e\xd5\x69\x6a\x2c\x05\x6c\x31\x54\x10" },
+      { "Extended AES #4",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+        "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+        "\x07\x0a\x16\xb4\x6b\x4d\x41\x44\xf7\x9b\xdd\x9d\xd0\x4a\x28\x7c" },
+      { "Extended AES #5",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+        "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+        "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+        "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c",
+        "\x51\xf0\xbe\xbf\x7e\x3b\x9d\x92\xfc\x49\x74\x17\x79\x36\x3c\xfe" },
+      { "Extended AES #6",
+        "",
+        "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+        "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+        "\xd1\x7d\xdf\x46\xad\xaa\xcd\xe5\x31\xca\xc4\x83\xde\x7a\x93\x67" },
+      { "Extended AES #7",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+        "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+        "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+        "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+        "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+        "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+        "\xa1\xd5\xdf\x0e\xed\x79\x0f\x79\x4d\x77\x58\x96\x59\xf3\x9a\x11" },
+      { "Extended AES #8",
+        "",
+        "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+        "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+        "\x02\x89\x62\xf6\x1b\x7b\xf8\x9e\xfc\x6b\x55\x1f\x46\x67\xd9\x83" },
+      { "Extended AES #9",
+        "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a",
+        "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+        "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+        "\x28\xa7\x02\x3f\x45\x2e\x8f\x82\xbd\x4b\xf2\x8d\x8c\x37\xc3\x5c" },
+      { NULL }
+    };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+
+  for (tvidx=0; tv[tvidx].desc; tvidx++)
+    {
+      what = tv[tvidx].desc;
+      errtxt = check_one (GCRY_MAC_CMAC_AES,
+                          tv[tvidx].data, strlen (tv[tvidx].data),
+                          tv[tvidx].key, strlen (tv[tvidx].key),
+                          tv[tvidx].expect, strlen (tv[tvidx].expect));
+      if (errtxt)
+        goto failed;
+      if (tvidx >= 2 && !extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("cmac", GCRY_MAC_CMAC_AES, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+cmac_selftest (int algo, int extended, selftest_report_func_t report)
+{
+  gpg_err_code_t ec;
+
+  switch (algo)
+    {
+    case GCRY_MAC_CMAC_3DES:
+      ec = selftests_cmac_3des (extended, report);
+      break;
+    case GCRY_MAC_CMAC_AES:
+      ec = selftests_cmac_aes (extended, report);
+      break;
+
+    default:
+      ec = GPG_ERR_MAC_ALGO;
+      break;
+    }
+
+  return ec;
+}
+
+
+static gcry_mac_spec_ops_t cmac_ops = {
+  cmac_open,
+  cmac_close,
+  cmac_setkey,
+  NULL,
+  cmac_reset,
+  cmac_write,
+  cmac_read,
+  cmac_verify,
+  cmac_get_maclen,
+  cmac_get_keylen,
+  NULL,
+  cmac_selftest
+};
+
+
+#if USE_BLOWFISH
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish = {
+  GCRY_MAC_CMAC_BLOWFISH, {0, 0}, "CMAC_BLOWFISH",
+  &cmac_ops
+};
+#endif
+#if USE_DES
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes = {
+  GCRY_MAC_CMAC_3DES, {0, 0}, "CMAC_3DES",
+  &cmac_ops
+};
+#endif
+#if USE_CAST5
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5 = {
+  GCRY_MAC_CMAC_CAST5, {0, 0}, "CMAC_CAST5",
+  &cmac_ops
+};
+#endif
+#if USE_AES
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes = {
+  GCRY_MAC_CMAC_AES, {0, 1}, "CMAC_AES",
+  &cmac_ops
+};
+#endif
+#if USE_TWOFISH
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish = {
+  GCRY_MAC_CMAC_TWOFISH, {0, 0}, "CMAC_TWOFISH",
+  &cmac_ops
+};
+#endif
+#if USE_SERPENT
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent = {
+  GCRY_MAC_CMAC_SERPENT, {0, 0}, "CMAC_SERPENT",
+  &cmac_ops
+};
+#endif
+#if USE_RFC2268
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268 = {
+  GCRY_MAC_CMAC_RFC2268, {0, 0}, "CMAC_RFC2268",
+  &cmac_ops
+};
+#endif
+#if USE_SEED
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed = {
+  GCRY_MAC_CMAC_SEED, {0, 0}, "CMAC_SEED",
+  &cmac_ops
+};
+#endif
+#if USE_CAMELLIA
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia = {
+  GCRY_MAC_CMAC_CAMELLIA, {0, 0}, "CMAC_CAMELLIA",
+  &cmac_ops
+};
+#endif
+#if USE_IDEA
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea = {
+  GCRY_MAC_CMAC_IDEA, {0, 0}, "CMAC_IDEA",
+  &cmac_ops
+};
+#endif
+#if USE_GOST28147
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147 = {
+  GCRY_MAC_CMAC_GOST28147, {0, 0}, "CMAC_GOST28147",
+  &cmac_ops
+};
+#endif
+#if USE_SM4
+const gcry_mac_spec_t _gcry_mac_type_spec_cmac_sm4 = {
+  GCRY_MAC_CMAC_SM4, {0, 0}, "CMAC_SM4",
+  &cmac_ops
+};
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/mac-gmac.c 
b/grub-core/lib/libgcrypt/cipher/mac-gmac.c
new file mode 100644
index 000000000..12f515ebb
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/mac-gmac.c
@@ -0,0 +1,187 @@
+/* mac-gmac.c  -  GMAC glue for MAC API
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "cipher.h"
+#include "./mac-internal.h"
+
+
+static int
+map_mac_algo_to_cipher (int mac_algo)
+{
+  switch (mac_algo)
+    {
+    default:
+      return GCRY_CIPHER_NONE;
+    case GCRY_MAC_GMAC_AES:
+      return GCRY_CIPHER_AES;
+    case GCRY_MAC_GMAC_CAMELLIA:
+      return GCRY_CIPHER_CAMELLIA128;
+    case GCRY_MAC_GMAC_TWOFISH:
+      return GCRY_CIPHER_TWOFISH;
+    case GCRY_MAC_GMAC_SERPENT:
+      return GCRY_CIPHER_SERPENT128;
+    case GCRY_MAC_GMAC_SEED:
+      return GCRY_CIPHER_SEED;
+    }
+}
+
+
+static gcry_err_code_t
+gmac_open (gcry_mac_hd_t h)
+{
+  gcry_err_code_t err;
+  gcry_cipher_hd_t hd;
+  int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+  int cipher_algo;
+  unsigned int flags;
+
+  cipher_algo = map_mac_algo_to_cipher (h->spec->algo);
+  flags = (secure ? GCRY_CIPHER_SECURE : 0);
+
+  err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_GCM,
+                                    flags);
+  if (err)
+    return err;
+
+  h->u.gmac.cipher_algo = cipher_algo;
+  h->u.gmac.ctx = hd;
+  return 0;
+}
+
+
+static void
+gmac_close (gcry_mac_hd_t h)
+{
+  _gcry_cipher_close (h->u.gmac.ctx);
+  h->u.gmac.ctx = NULL;
+}
+
+
+static gcry_err_code_t
+gmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+  return _gcry_cipher_setkey (h->u.gmac.ctx, key, keylen);
+}
+
+
+static gcry_err_code_t
+gmac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen)
+{
+  return _gcry_cipher_setiv (h->u.gmac.ctx, iv, ivlen);
+}
+
+
+static gcry_err_code_t
+gmac_reset (gcry_mac_hd_t h)
+{
+  return _gcry_cipher_reset (h->u.gmac.ctx);
+}
+
+
+static gcry_err_code_t
+gmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  return _gcry_cipher_authenticate (h->u.gmac.ctx, buf, buflen);
+}
+
+
+static gcry_err_code_t
+gmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+  if (*outlen > GCRY_GCM_BLOCK_LEN)
+    *outlen = GCRY_GCM_BLOCK_LEN;
+  return _gcry_cipher_gettag (h->u.gmac.ctx, outbuf, *outlen);
+}
+
+
+static gcry_err_code_t
+gmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  return _gcry_cipher_checktag (h->u.gmac.ctx, buf, buflen);
+}
+
+
+static unsigned int
+gmac_get_maclen (int algo)
+{
+  (void)algo;
+  return GCRY_GCM_BLOCK_LEN;
+}
+
+
+static unsigned int
+gmac_get_keylen (int algo)
+{
+  return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo));
+}
+
+
+static gcry_mac_spec_ops_t gmac_ops = {
+  gmac_open,
+  gmac_close,
+  gmac_setkey,
+  gmac_setiv,
+  gmac_reset,
+  gmac_write,
+  gmac_read,
+  gmac_verify,
+  gmac_get_maclen,
+  gmac_get_keylen,
+  NULL,
+  NULL
+};
+
+
+#if USE_AES
+const gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes = {
+  GCRY_MAC_GMAC_AES, {0, 0}, "GMAC_AES",
+  &gmac_ops
+};
+#endif
+#if USE_TWOFISH
+const gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish = {
+  GCRY_MAC_GMAC_TWOFISH, {0, 0}, "GMAC_TWOFISH",
+  &gmac_ops
+};
+#endif
+#if USE_SERPENT
+const gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent = {
+  GCRY_MAC_GMAC_SERPENT, {0, 0}, "GMAC_SERPENT",
+  &gmac_ops
+};
+#endif
+#if USE_SEED
+const gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed = {
+  GCRY_MAC_GMAC_SEED, {0, 0}, "GMAC_SEED",
+  &gmac_ops
+};
+#endif
+#if USE_CAMELLIA
+const gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia = {
+  GCRY_MAC_GMAC_CAMELLIA, {0, 0}, "GMAC_CAMELLIA",
+  &gmac_ops
+};
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/mac-hmac.c 
b/grub-core/lib/libgcrypt/cipher/mac-hmac.c
new file mode 100644
index 000000000..9fac77dc7
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/mac-hmac.c
@@ -0,0 +1,1471 @@
+/* mac-hmac.c  -  HMAC glue for MAC API
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "./mac-internal.h"
+#include "bufhelp.h"
+#include "cipher.h"
+
+
+static int
+map_mac_algo_to_md (int mac_algo)
+{
+  switch (mac_algo)
+    {
+    default:
+      return GCRY_MD_NONE;
+    case GCRY_MAC_HMAC_MD2:
+      return GCRY_MD_MD2;
+    case GCRY_MAC_HMAC_MD4:
+      return GCRY_MD_MD4;
+    case GCRY_MAC_HMAC_MD5:
+      return GCRY_MD_MD5;
+    case GCRY_MAC_HMAC_SHA1:
+      return GCRY_MD_SHA1;
+    case GCRY_MAC_HMAC_SHA224:
+      return GCRY_MD_SHA224;
+    case GCRY_MAC_HMAC_SHA256:
+      return GCRY_MD_SHA256;
+    case GCRY_MAC_HMAC_SHA384:
+      return GCRY_MD_SHA384;
+    case GCRY_MAC_HMAC_SHA512:
+      return GCRY_MD_SHA512;
+    case GCRY_MAC_HMAC_SHA512_256:
+      return GCRY_MD_SHA512_256;
+    case GCRY_MAC_HMAC_SHA512_224:
+      return GCRY_MD_SHA512_224;
+    case GCRY_MAC_HMAC_SHA3_224:
+      return GCRY_MD_SHA3_224;
+    case GCRY_MAC_HMAC_SHA3_256:
+      return GCRY_MD_SHA3_256;
+    case GCRY_MAC_HMAC_SHA3_384:
+      return GCRY_MD_SHA3_384;
+    case GCRY_MAC_HMAC_SHA3_512:
+      return GCRY_MD_SHA3_512;
+    case GCRY_MAC_HMAC_RMD160:
+      return GCRY_MD_RMD160;
+    case GCRY_MAC_HMAC_TIGER1:
+      return GCRY_MD_TIGER1;
+    case GCRY_MAC_HMAC_WHIRLPOOL:
+      return GCRY_MD_WHIRLPOOL;
+    case GCRY_MAC_HMAC_GOSTR3411_94:
+      return GCRY_MD_GOSTR3411_94;
+    case GCRY_MAC_HMAC_GOSTR3411_CP:
+      return GCRY_MD_GOSTR3411_CP;
+    case GCRY_MAC_HMAC_STRIBOG256:
+      return GCRY_MD_STRIBOG256;
+    case GCRY_MAC_HMAC_STRIBOG512:
+      return GCRY_MD_STRIBOG512;
+    case GCRY_MAC_HMAC_BLAKE2B_512:
+      return GCRY_MD_BLAKE2B_512;
+    case GCRY_MAC_HMAC_BLAKE2B_384:
+      return GCRY_MD_BLAKE2B_384;
+    case GCRY_MAC_HMAC_BLAKE2B_256:
+      return GCRY_MD_BLAKE2B_256;
+    case GCRY_MAC_HMAC_BLAKE2B_160:
+      return GCRY_MD_BLAKE2B_160;
+    case GCRY_MAC_HMAC_BLAKE2S_256:
+      return GCRY_MD_BLAKE2S_256;
+    case GCRY_MAC_HMAC_BLAKE2S_224:
+      return GCRY_MD_BLAKE2S_224;
+    case GCRY_MAC_HMAC_BLAKE2S_160:
+      return GCRY_MD_BLAKE2S_160;
+    case GCRY_MAC_HMAC_BLAKE2S_128:
+      return GCRY_MD_BLAKE2S_128;
+    case GCRY_MAC_HMAC_SM3:
+      return GCRY_MD_SM3;
+    }
+}
+
+
+static gcry_err_code_t
+hmac_open (gcry_mac_hd_t h)
+{
+  gcry_err_code_t err;
+  gcry_md_hd_t hd;
+  int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+  unsigned int flags;
+  int md_algo;
+
+  md_algo = map_mac_algo_to_md (h->spec->algo);
+
+  flags = GCRY_MD_FLAG_HMAC;
+  flags |= (secure ? GCRY_MD_FLAG_SECURE : 0);
+
+  err = _gcry_md_open (&hd, md_algo, flags);
+  if (err)
+    return err;
+
+  h->u.hmac.md_algo = md_algo;
+  h->u.hmac.md_ctx = hd;
+  return 0;
+}
+
+
+static void
+hmac_close (gcry_mac_hd_t h)
+{
+  _gcry_md_close (h->u.hmac.md_ctx);
+  h->u.hmac.md_ctx = NULL;
+}
+
+
+static gcry_err_code_t
+hmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+  return _gcry_md_setkey (h->u.hmac.md_ctx, key, keylen);
+}
+
+
+static gcry_err_code_t
+hmac_reset (gcry_mac_hd_t h)
+{
+  _gcry_md_reset (h->u.hmac.md_ctx);
+  return 0;
+}
+
+
+static gcry_err_code_t
+hmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  _gcry_md_write (h->u.hmac.md_ctx, buf, buflen);
+  return 0;
+}
+
+
+static gcry_err_code_t
+hmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen)
+{
+  unsigned int dlen;
+  const unsigned char *digest;
+
+  dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo);
+  digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo);
+
+  if (*outlen <= dlen)
+    buf_cpy (outbuf, digest, *outlen);
+  else
+    {
+      buf_cpy (outbuf, digest, dlen);
+      *outlen = dlen;
+    }
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+hmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  unsigned int dlen;
+  const unsigned char *digest;
+
+  dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo);
+  digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo);
+
+  if (buflen > dlen)
+    return GPG_ERR_INV_LENGTH;
+
+  return buf_eq_const (buf, digest, buflen) ? 0 : GPG_ERR_CHECKSUM;
+}
+
+
+static unsigned int
+hmac_get_maclen (int algo)
+{
+  return _gcry_md_get_algo_dlen (map_mac_algo_to_md (algo));
+}
+
+
+static unsigned int
+hmac_get_keylen (int algo)
+{
+  /* Return blocksize for default key length. */
+  switch (algo)
+    {
+    case GCRY_MD_SHA3_224:
+      return 1152 / 8;
+    case GCRY_MD_SHA3_256:
+      return 1088 / 8;
+    case GCRY_MD_SHA3_384:
+      return 832 / 8;
+    case GCRY_MD_SHA3_512:
+      return 576 / 8;
+    case GCRY_MAC_HMAC_SHA384:
+    case GCRY_MAC_HMAC_SHA512:
+      return 128;
+    case GCRY_MAC_HMAC_GOSTR3411_94:
+      return 32;
+    default:
+      return 64;
+    }
+}
+
+
+/* Check one HMAC with digest ALGO using the regualr HAMC
+ * API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key
+ * and (EXPECT,EXPECTLEN) the expected result.  If TRUNC is set, the
+ * EXPECTLEN may be less than the digest length.  Returns NULL on
+ * success or a string describing the failure.  */
+static const char *
+check_one (int algo,
+           const void *data, size_t datalen,
+           const void *key, size_t keylen,
+           const void *expect, size_t expectlen, int trunc)
+{
+  gcry_md_hd_t hd;
+  const unsigned char *digest;
+
+/*   printf ("HMAC algo %d\n", algo); */
+
+  /* Skip test with shoter key in FIPS mode.  */
+  if (fips_mode () && keylen < 14)
+    return NULL;
+
+  if (trunc)
+    {
+      if (_gcry_md_get_algo_dlen (algo) < expectlen)
+        return "invalid tests data";
+    }
+  else
+    {
+      if (_gcry_md_get_algo_dlen (algo) != expectlen)
+        return "invalid tests data";
+    }
+  if (_gcry_md_open (&hd, algo, GCRY_MD_FLAG_HMAC))
+    return "gcry_md_open failed";
+  if (_gcry_md_setkey (hd, key, keylen))
+    {
+      _gcry_md_close (hd);
+      return "gcry_md_setkey failed";
+    }
+  _gcry_md_write (hd, data, datalen);
+  digest = _gcry_md_read (hd, algo);
+  if (!digest)
+    {
+      _gcry_md_close (hd);
+      return "gcry_md_read failed";
+    }
+  if (memcmp (digest, expect, expectlen))
+    {
+/*       int i; */
+
+/*       fputs ("        {", stdout); */
+/*       for (i=0; i < expectlen-1; i++) */
+/*         { */
+/*           if (i && !(i % 8)) */
+/*             fputs ("\n         ", stdout); */
+/*           printf (" 0x%02x,", digest[i]); */
+/*         } */
+/*       printf (" 0x%02x } },\n", digest[i]); */
+
+      _gcry_md_close (hd);
+      return "does not match";
+    }
+  _gcry_md_close (hd);
+  return NULL;
+}
+
+
+static gpg_err_code_t
+selftests_sha1 (int extended, selftest_report_func_t report)
+{
+  const char *what;
+  const char *errtxt;
+  unsigned char key[128];
+  int i, j;
+
+  what = "FIPS-198a, A.1";
+  for (i=0; i < 64; i++)
+    key[i] = i;
+  errtxt = check_one (GCRY_MD_SHA1,
+                      "Sample #1", 9,
+                      key, 64,
+                      "\x4f\x4c\xa3\xd5\xd6\x8b\xa7\xcc\x0a\x12"
+                      "\x08\xc9\xc6\x1e\x9c\x5d\xa0\x40\x3c\x0a", 20, 0);
+  if (errtxt)
+    goto failed;
+
+  if (extended)
+    {
+      what = "FIPS-198a, A.2";
+      for (i=0, j=0x30; i < 20; i++)
+        key[i] = j++;
+      errtxt = check_one (GCRY_MD_SHA1,
+                          "Sample #2", 9,
+                          key, 20,
+                          "\x09\x22\xd3\x40\x5f\xaa\x3d\x19\x4f\x82"
+                          "\xa4\x58\x30\x73\x7d\x5c\xc6\xc7\x5d\x24", 20, 0);
+      if (errtxt)
+        goto failed;
+
+      what = "FIPS-198a, A.3";
+      for (i=0, j=0x50; i < 100; i++)
+        key[i] = j++;
+      errtxt = check_one (GCRY_MD_SHA1,
+                          "Sample #3", 9,
+                          key, 100,
+                          "\xbc\xf4\x1e\xab\x8b\xb2\xd8\x02\xf3\xd0"
+                          "\x5c\xaf\x7c\xb0\x92\xec\xf8\xd1\xa3\xaa", 20, 0);
+      if (errtxt)
+        goto failed;
+
+      what = "FIPS-198a, A.4";
+      for (i=0, j=0x70; i < 49; i++)
+        key[i] = j++;
+      errtxt = check_one (GCRY_MD_SHA1,
+                          "Sample #4", 9,
+                          key, 49,
+                          "\x9e\xa8\x86\xef\xe2\x68\xdb\xec\xce\x42"
+                          "\x0c\x75\x24\xdf\x32\xe0\x75\x1a\x2a\x26", 20, 0);
+      if (errtxt)
+        goto failed;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("hmac", GCRY_MD_SHA1, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+static gpg_err_code_t
+selftests_sha224 (int extended, selftest_report_func_t report)
+{
+  static struct
+  {
+    const char * const desc;
+    const char * const data;
+    const char * const key;
+    const char expect[28];
+  } tv[] =
+    {
+      { "data-28 key-4",
+        "what do ya want for nothing?",
+        "Jefe",
+        { 0xa3, 0x0e, 0x01, 0x09, 0x8b, 0xc6, 0xdb, 0xbf,
+          0x45, 0x69, 0x0f, 0x3a, 0x7e, 0x9e, 0x6d, 0x0f,
+          0x8b, 0xbe, 0xa2, 0xa3, 0x9e, 0x61, 0x48, 0x00,
+          0x8f, 0xd0, 0x5e, 0x44 } },
+
+      { "data-9 key-20",
+        "Hi There",
+       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+        "\x0b\x0b\x0b\x0b",
+        { 0x89, 0x6f, 0xb1, 0x12, 0x8a, 0xbb, 0xdf, 0x19,
+          0x68, 0x32, 0x10, 0x7c, 0xd4, 0x9d, 0xf3, 0x3f,
+          0x47, 0xb4, 0xb1, 0x16, 0x99, 0x12, 0xba, 0x4f,
+          0x53, 0x68, 0x4b, 0x22 } },
+
+      { "data-50 key-20",
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa",
+        { 0x7f, 0xb3, 0xcb, 0x35, 0x88, 0xc6, 0xc1, 0xf6,
+          0xff, 0xa9, 0x69, 0x4d, 0x7d, 0x6a, 0xd2, 0x64,
+          0x93, 0x65, 0xb0, 0xc1, 0xf6, 0x5d, 0x69, 0xd1,
+          0xec, 0x83, 0x33, 0xea } },
+
+      { "data-50 key-26",
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd",
+       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+        { 0x6c, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3c, 0xac,
+          0x6a, 0x2a, 0xbc, 0x1b, 0xb3, 0x82, 0x62, 0x7c,
+          0xec, 0x6a, 0x90, 0xd8, 0x6e, 0xfc, 0x01, 0x2d,
+          0xe7, 0xaf, 0xec, 0x5a } },
+
+      { "data-54 key-131",
+        "Test Using Larger Than Block-Size Key - Hash Key First",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+        { 0x95, 0xe9, 0xa0, 0xdb, 0x96, 0x20, 0x95, 0xad,
+          0xae, 0xbe, 0x9b, 0x2d, 0x6f, 0x0d, 0xbc, 0xe2,
+          0xd4, 0x99, 0xf1, 0x12, 0xf2, 0xd2, 0xb7, 0x27,
+          0x3f, 0xa6, 0x87, 0x0e } },
+
+      { "data-152 key-131",
+        "This is a test using a larger than block-size key and a larger "
+        "than block-size data. The key needs to be hashed before being "
+        "used by the HMAC algorithm.",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+        { 0x3a, 0x85, 0x41, 0x66, 0xac, 0x5d, 0x9f, 0x02,
+          0x3f, 0x54, 0xd5, 0x17, 0xd0, 0xb3, 0x9d, 0xbd,
+          0x94, 0x67, 0x70, 0xdb, 0x9c, 0x2b, 0x95, 0xc9,
+          0xf6, 0xf5, 0x65, 0xd1 } },
+
+      { NULL }
+    };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+
+  for (tvidx=0; tv[tvidx].desc; tvidx++)
+    {
+      what = tv[tvidx].desc;
+      errtxt = check_one (GCRY_MD_SHA224,
+                          tv[tvidx].data, strlen (tv[tvidx].data),
+                          tv[tvidx].key, strlen (tv[tvidx].key),
+                          tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+      if (errtxt)
+        goto failed;
+      if (!extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("hmac", GCRY_MD_SHA224, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+selftests_sha256 (int extended, selftest_report_func_t report)
+{
+  static struct
+  {
+    const char * const desc;
+    const char * const data;
+    const char * const key;
+    const char expect[32];
+  } tv[] =
+    {
+      { "data-28 key-4",
+        "what do ya want for nothing?",
+        "Jefe",
+       { 0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e,
+          0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7,
+          0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83,
+          0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43 } },
+
+      { "data-9 key-20",
+        "Hi There",
+       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+        "\x0b\x0b\x0b\x0b",
+        { 0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53,
+          0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b,
+          0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7,
+          0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7 } },
+
+      { "data-50 key-20",
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa",
+        { 0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46,
+          0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7,
+          0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22,
+          0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe } },
+
+      { "data-50 key-26",
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd",
+       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+       { 0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e,
+          0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a,
+          0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07,
+          0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b } },
+
+      { "data-54 key-131",
+        "Test Using Larger Than Block-Size Key - Hash Key First",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+       { 0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f,
+          0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f,
+          0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14,
+          0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54 } },
+
+      { "data-152 key-131",
+        "This is a test using a larger than block-size key and a larger "
+        "than block-size data. The key needs to be hashed before being "
+        "used by the HMAC algorithm.",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+       { 0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb,
+          0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44,
+          0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93,
+          0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2 } },
+
+      { NULL }
+    };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+
+  for (tvidx=0; tv[tvidx].desc; tvidx++)
+    {
+      what = tv[tvidx].desc;
+      errtxt = check_one (GCRY_MD_SHA256,
+                          tv[tvidx].data, strlen (tv[tvidx].data),
+                          tv[tvidx].key, strlen (tv[tvidx].key),
+                          tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+      if (errtxt)
+        goto failed;
+      if (!extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("hmac", GCRY_MD_SHA256, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+selftests_sha384 (int extended, selftest_report_func_t report)
+{
+  static struct
+  {
+    const char * const desc;
+    const char * const data;
+    const char * const key;
+    const char expect[48];
+  } tv[] =
+    {
+      { "data-28 key-4",
+        "what do ya want for nothing?",
+        "Jefe",
+        { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31,
+          0x61, 0x7f, 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b,
+          0x9c, 0x7e, 0xf4, 0x64, 0xf5, 0xa0, 0x1b, 0x47,
+          0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22, 0x44, 0x5e,
+          0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7,
+          0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 } },
+
+      { "data-9 key-20",
+        "Hi There",
+       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+        "\x0b\x0b\x0b\x0b",
+        { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62,
+          0x6b, 0x08, 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f,
+          0x15, 0xf9, 0xda, 0xdb, 0xe4, 0x10, 0x1e, 0xc6,
+          0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb, 0xc5, 0x9c,
+          0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f,
+          0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 } },
+
+      { "data-50 key-20",
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa",
+        { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a,
+          0x0a, 0xa2, 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f,
+          0x0a, 0xa6, 0x35, 0xd9, 0x47, 0xac, 0x9f, 0xeb,
+          0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66, 0x14, 0x4b,
+          0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9,
+          0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 } },
+
+      { "data-50 key-26",
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd",
+       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+        { 0x3e, 0x8a, 0x69, 0xb7, 0x78, 0x3c, 0x25, 0x85,
+          0x19, 0x33, 0xab, 0x62, 0x90, 0xaf, 0x6c, 0xa7,
+          0x7a, 0x99, 0x81, 0x48, 0x08, 0x50, 0x00, 0x9c,
+          0xc5, 0x57, 0x7c, 0x6e, 0x1f, 0x57, 0x3b, 0x4e,
+          0x68, 0x01, 0xdd, 0x23, 0xc4, 0xa7, 0xd6, 0x79,
+          0xcc, 0xf8, 0xa3, 0x86, 0xc6, 0x74, 0xcf, 0xfb } },
+
+      { "data-54 key-131",
+        "Test Using Larger Than Block-Size Key - Hash Key First",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+        { 0x4e, 0xce, 0x08, 0x44, 0x85, 0x81, 0x3e, 0x90,
+          0x88, 0xd2, 0xc6, 0x3a, 0x04, 0x1b, 0xc5, 0xb4,
+          0x4f, 0x9e, 0xf1, 0x01, 0x2a, 0x2b, 0x58, 0x8f,
+          0x3c, 0xd1, 0x1f, 0x05, 0x03, 0x3a, 0xc4, 0xc6,
+          0x0c, 0x2e, 0xf6, 0xab, 0x40, 0x30, 0xfe, 0x82,
+          0x96, 0x24, 0x8d, 0xf1, 0x63, 0xf4, 0x49, 0x52 } },
+
+      { "data-152 key-131",
+        "This is a test using a larger than block-size key and a larger "
+        "than block-size data. The key needs to be hashed before being "
+        "used by the HMAC algorithm.",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+        { 0x66, 0x17, 0x17, 0x8e, 0x94, 0x1f, 0x02, 0x0d,
+          0x35, 0x1e, 0x2f, 0x25, 0x4e, 0x8f, 0xd3, 0x2c,
+          0x60, 0x24, 0x20, 0xfe, 0xb0, 0xb8, 0xfb, 0x9a,
+          0xdc, 0xce, 0xbb, 0x82, 0x46, 0x1e, 0x99, 0xc5,
+          0xa6, 0x78, 0xcc, 0x31, 0xe7, 0x99, 0x17, 0x6d,
+          0x38, 0x60, 0xe6, 0x11, 0x0c, 0x46, 0x52, 0x3e } },
+
+      { NULL }
+    };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+
+  for (tvidx=0; tv[tvidx].desc; tvidx++)
+    {
+      what = tv[tvidx].desc;
+      errtxt = check_one (GCRY_MD_SHA384,
+                          tv[tvidx].data, strlen (tv[tvidx].data),
+                          tv[tvidx].key, strlen (tv[tvidx].key),
+                          tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+      if (errtxt)
+        goto failed;
+      if (!extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("hmac", GCRY_MD_SHA384, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+selftests_sha512 (int extended, selftest_report_func_t report)
+{
+  static struct
+  {
+    const char * const desc;
+    const char * const data;
+    const char * const key;
+    const char expect[64];
+  } tv[] =
+    {
+      { "data-28 key-4",
+        "what do ya want for nothing?",
+        "Jefe",
+        { 0x16, 0x4b, 0x7a, 0x7b, 0xfc, 0xf8, 0x19, 0xe2,
+          0xe3, 0x95, 0xfb, 0xe7, 0x3b, 0x56, 0xe0, 0xa3,
+          0x87, 0xbd, 0x64, 0x22, 0x2e, 0x83, 0x1f, 0xd6,
+          0x10, 0x27, 0x0c, 0xd7, 0xea, 0x25, 0x05, 0x54,
+          0x97, 0x58, 0xbf, 0x75, 0xc0, 0x5a, 0x99, 0x4a,
+          0x6d, 0x03, 0x4f, 0x65, 0xf8, 0xf0, 0xe6, 0xfd,
+          0xca, 0xea, 0xb1, 0xa3, 0x4d, 0x4a, 0x6b, 0x4b,
+          0x63, 0x6e, 0x07, 0x0a, 0x38, 0xbc, 0xe7, 0x37 } },
+
+      { "data-9 key-20",
+        "Hi There",
+       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+        "\x0b\x0b\x0b\x0b",
+        { 0x87, 0xaa, 0x7c, 0xde, 0xa5, 0xef, 0x61, 0x9d,
+          0x4f, 0xf0, 0xb4, 0x24, 0x1a, 0x1d, 0x6c, 0xb0,
+          0x23, 0x79, 0xf4, 0xe2, 0xce, 0x4e, 0xc2, 0x78,
+          0x7a, 0xd0, 0xb3, 0x05, 0x45, 0xe1, 0x7c, 0xde,
+          0xda, 0xa8, 0x33, 0xb7, 0xd6, 0xb8, 0xa7, 0x02,
+          0x03, 0x8b, 0x27, 0x4e, 0xae, 0xa3, 0xf4, 0xe4,
+          0xbe, 0x9d, 0x91, 0x4e, 0xeb, 0x61, 0xf1, 0x70,
+          0x2e, 0x69, 0x6c, 0x20, 0x3a, 0x12, 0x68, 0x54 } },
+
+      { "data-50 key-20",
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa",
+        { 0xfa, 0x73, 0xb0, 0x08, 0x9d, 0x56, 0xa2, 0x84,
+          0xef, 0xb0, 0xf0, 0x75, 0x6c, 0x89, 0x0b, 0xe9,
+          0xb1, 0xb5, 0xdb, 0xdd, 0x8e, 0xe8, 0x1a, 0x36,
+          0x55, 0xf8, 0x3e, 0x33, 0xb2, 0x27, 0x9d, 0x39,
+          0xbf, 0x3e, 0x84, 0x82, 0x79, 0xa7, 0x22, 0xc8,
+          0x06, 0xb4, 0x85, 0xa4, 0x7e, 0x67, 0xc8, 0x07,
+          0xb9, 0x46, 0xa3, 0x37, 0xbe, 0xe8, 0x94, 0x26,
+          0x74, 0x27, 0x88, 0x59, 0xe1, 0x32, 0x92, 0xfb } },
+
+      { "data-50 key-26",
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd",
+       "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+        { 0xb0, 0xba, 0x46, 0x56, 0x37, 0x45, 0x8c, 0x69,
+          0x90, 0xe5, 0xa8, 0xc5, 0xf6, 0x1d, 0x4a, 0xf7,
+          0xe5, 0x76, 0xd9, 0x7f, 0xf9, 0x4b, 0x87, 0x2d,
+          0xe7, 0x6f, 0x80, 0x50, 0x36, 0x1e, 0xe3, 0xdb,
+          0xa9, 0x1c, 0xa5, 0xc1, 0x1a, 0xa2, 0x5e, 0xb4,
+          0xd6, 0x79, 0x27, 0x5c, 0xc5, 0x78, 0x80, 0x63,
+          0xa5, 0xf1, 0x97, 0x41, 0x12, 0x0c, 0x4f, 0x2d,
+          0xe2, 0xad, 0xeb, 0xeb, 0x10, 0xa2, 0x98, 0xdd } },
+
+      { "data-54 key-131",
+        "Test Using Larger Than Block-Size Key - Hash Key First",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+        { 0x80, 0xb2, 0x42, 0x63, 0xc7, 0xc1, 0xa3, 0xeb,
+          0xb7, 0x14, 0x93, 0xc1, 0xdd, 0x7b, 0xe8, 0xb4,
+          0x9b, 0x46, 0xd1, 0xf4, 0x1b, 0x4a, 0xee, 0xc1,
+          0x12, 0x1b, 0x01, 0x37, 0x83, 0xf8, 0xf3, 0x52,
+          0x6b, 0x56, 0xd0, 0x37, 0xe0, 0x5f, 0x25, 0x98,
+          0xbd, 0x0f, 0xd2, 0x21, 0x5d, 0x6a, 0x1e, 0x52,
+          0x95, 0xe6, 0x4f, 0x73, 0xf6, 0x3f, 0x0a, 0xec,
+          0x8b, 0x91, 0x5a, 0x98, 0x5d, 0x78, 0x65, 0x98 } },
+
+      { "data-152 key-131",
+        "This is a test using a larger than block-size key and a larger "
+        "than block-size data. The key needs to be hashed before being "
+        "used by the HMAC algorithm.",
+       "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+        { 0xe3, 0x7b, 0x6a, 0x77, 0x5d, 0xc8, 0x7d, 0xba,
+          0xa4, 0xdf, 0xa9, 0xf9, 0x6e, 0x5e, 0x3f, 0xfd,
+          0xde, 0xbd, 0x71, 0xf8, 0x86, 0x72, 0x89, 0x86,
+          0x5d, 0xf5, 0xa3, 0x2d, 0x20, 0xcd, 0xc9, 0x44,
+          0xb6, 0x02, 0x2c, 0xac, 0x3c, 0x49, 0x82, 0xb1,
+          0x0d, 0x5e, 0xeb, 0x55, 0xc3, 0xe4, 0xde, 0x15,
+          0x13, 0x46, 0x76, 0xfb, 0x6d, 0xe0, 0x44, 0x60,
+          0x65, 0xc9, 0x74, 0x40, 0xfa, 0x8c, 0x6a, 0x58 } },
+
+      { NULL }
+    };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+
+  for (tvidx=0; tv[tvidx].desc; tvidx++)
+    {
+      what = tv[tvidx].desc;
+      errtxt = check_one (GCRY_MD_SHA512,
+                          tv[tvidx].data, strlen (tv[tvidx].data),
+                          tv[tvidx].key, strlen (tv[tvidx].key),
+                          tv[tvidx].expect, DIM (tv[tvidx].expect), 0);
+      if (errtxt)
+        goto failed;
+      if (!extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("hmac", GCRY_MD_SHA512, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+
+/* Test for the SHA3 algorithms.  Vectors taken on 2017-07-18 from
+ * http://www.wolfgang-ehrhardt.de/hmac-sha3-testvectors.html  */
+static gpg_err_code_t
+selftests_sha3 (int hashalgo, int extended, selftest_report_func_t report)
+{
+  static struct
+  {
+    const char * const desc;
+    const char * const data;
+    const char * const key;
+    const char expect_224[28];
+    const char expect_256[32];
+    const char expect_384[48];
+    const char expect_512[64];
+    unsigned char trunc;
+  } tv[] =
+    {
+      { "data-9 key-20", /* Test 1 */
+        "Hi There",
+       "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b"
+        "\x0b\x0b\x0b\x0b",
+
+        { 0x3b, 0x16, 0x54, 0x6b, 0xbc, 0x7b, 0xe2, 0x70,
+          0x6a, 0x03, 0x1d, 0xca, 0xfd, 0x56, 0x37, 0x3d,
+          0x98, 0x84, 0x36, 0x76, 0x41, 0xd8, 0xc5, 0x9a,
+          0xf3, 0xc8, 0x60, 0xf7                          },
+        { 0xba, 0x85, 0x19, 0x23, 0x10, 0xdf, 0xfa, 0x96,
+          0xe2, 0xa3, 0xa4, 0x0e, 0x69, 0x77, 0x43, 0x51,
+          0x14, 0x0b, 0xb7, 0x18, 0x5e, 0x12, 0x02, 0xcd,
+          0xcc, 0x91, 0x75, 0x89, 0xf9, 0x5e, 0x16, 0xbb  },
+        { 0x68, 0xd2, 0xdc, 0xf7, 0xfd, 0x4d, 0xdd, 0x0a,
+          0x22, 0x40, 0xc8, 0xa4, 0x37, 0x30, 0x5f, 0x61,
+          0xfb, 0x73, 0x34, 0xcf, 0xb5, 0xd0, 0x22, 0x6e,
+          0x1b, 0xc2, 0x7d, 0xc1, 0x0a, 0x2e, 0x72, 0x3a,
+          0x20, 0xd3, 0x70, 0xb4, 0x77, 0x43, 0x13, 0x0e,
+          0x26, 0xac, 0x7e, 0x3d, 0x53, 0x28, 0x86, 0xbd  },
+        { 0xeb, 0x3f, 0xbd, 0x4b, 0x2e, 0xaa, 0xb8, 0xf5,
+          0xc5, 0x04, 0xbd, 0x3a, 0x41, 0x46, 0x5a, 0xac,
+          0xec, 0x15, 0x77, 0x0a, 0x7c, 0xab, 0xac, 0x53,
+          0x1e, 0x48, 0x2f, 0x86, 0x0b, 0x5e, 0xc7, 0xba,
+          0x47, 0xcc, 0xb2, 0xc6, 0xf2, 0xaf, 0xce, 0x8f,
+          0x88, 0xd2, 0x2b, 0x6d, 0xc6, 0x13, 0x80, 0xf2,
+          0x3a, 0x66, 0x8f, 0xd3, 0x88, 0x8b, 0xb8, 0x05,
+          0x37, 0xc0, 0xa0, 0xb8, 0x64, 0x07, 0x68, 0x9e  }
+      },
+
+      { "data-28 key-4",  /* Test 2  */
+        /* Test with a key shorter than the length of the HMAC output. */
+        "what do ya want for nothing?",
+        "Jefe",
+
+        { 0x7f, 0xdb, 0x8d, 0xd8, 0x8b, 0xd2, 0xf6, 0x0d,
+          0x1b, 0x79, 0x86, 0x34, 0xad, 0x38, 0x68, 0x11,
+          0xc2, 0xcf, 0xc8, 0x5b, 0xfa, 0xf5, 0xd5, 0x2b,
+          0xba, 0xce, 0x5e, 0x66                          },
+        { 0xc7, 0xd4, 0x07, 0x2e, 0x78, 0x88, 0x77, 0xae,
+          0x35, 0x96, 0xbb, 0xb0, 0xda, 0x73, 0xb8, 0x87,
+          0xc9, 0x17, 0x1f, 0x93, 0x09, 0x5b, 0x29, 0x4a,
+          0xe8, 0x57, 0xfb, 0xe2, 0x64, 0x5e, 0x1b, 0xa5  },
+        { 0xf1, 0x10, 0x1f, 0x8c, 0xbf, 0x97, 0x66, 0xfd,
+          0x67, 0x64, 0xd2, 0xed, 0x61, 0x90, 0x3f, 0x21,
+          0xca, 0x9b, 0x18, 0xf5, 0x7c, 0xf3, 0xe1, 0xa2,
+          0x3c, 0xa1, 0x35, 0x08, 0xa9, 0x32, 0x43, 0xce,
+          0x48, 0xc0, 0x45, 0xdc, 0x00, 0x7f, 0x26, 0xa2,
+          0x1b, 0x3f, 0x5e, 0x0e, 0x9d, 0xf4, 0xc2, 0x0a  },
+        { 0x5a, 0x4b, 0xfe, 0xab, 0x61, 0x66, 0x42, 0x7c,
+          0x7a, 0x36, 0x47, 0xb7, 0x47, 0x29, 0x2b, 0x83,
+          0x84, 0x53, 0x7c, 0xdb, 0x89, 0xaf, 0xb3, 0xbf,
+          0x56, 0x65, 0xe4, 0xc5, 0xe7, 0x09, 0x35, 0x0b,
+          0x28, 0x7b, 0xae, 0xc9, 0x21, 0xfd, 0x7c, 0xa0,
+          0xee, 0x7a, 0x0c, 0x31, 0xd0, 0x22, 0xa9, 0x5e,
+          0x1f, 0xc9, 0x2b, 0xa9, 0xd7, 0x7d, 0xf8, 0x83,
+          0x96, 0x02, 0x75, 0xbe, 0xb4, 0xe6, 0x20, 0x24  }
+      },
+
+      { "data-50 key-20",  /* Test 3 */
+        /* Test with a combined length of key and data that is larger
+         * than 64 bytes (= block-size of SHA-224 and SHA-256).  */
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd"
+        "\xdd\xdd",
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa",
+
+        { 0x67, 0x6c, 0xfc, 0x7d, 0x16, 0x15, 0x36, 0x38,
+          0x78, 0x03, 0x90, 0x69, 0x2b, 0xe1, 0x42, 0xd2,
+          0xdf, 0x7c, 0xe9, 0x24, 0xb9, 0x09, 0xc0, 0xc0,
+          0x8d, 0xbf, 0xdc, 0x1a  },
+        { 0x84, 0xec, 0x79, 0x12, 0x4a, 0x27, 0x10, 0x78,
+          0x65, 0xce, 0xdd, 0x8b, 0xd8, 0x2d, 0xa9, 0x96,
+          0x5e, 0x5e, 0xd8, 0xc3, 0x7b, 0x0a, 0xc9, 0x80,
+          0x05, 0xa7, 0xf3, 0x9e, 0xd5, 0x8a, 0x42, 0x07  },
+        { 0x27, 0x5c, 0xd0, 0xe6, 0x61, 0xbb, 0x8b, 0x15,
+          0x1c, 0x64, 0xd2, 0x88, 0xf1, 0xf7, 0x82, 0xfb,
+          0x91, 0xa8, 0xab, 0xd5, 0x68, 0x58, 0xd7, 0x2b,
+          0xab, 0xb2, 0xd4, 0x76, 0xf0, 0x45, 0x83, 0x73,
+          0xb4, 0x1b, 0x6a, 0xb5, 0xbf, 0x17, 0x4b, 0xec,
+          0x42, 0x2e, 0x53, 0xfc, 0x31, 0x35, 0xac, 0x6e  },
+        { 0x30, 0x9e, 0x99, 0xf9, 0xec, 0x07, 0x5e, 0xc6,
+          0xc6, 0xd4, 0x75, 0xed, 0xa1, 0x18, 0x06, 0x87,
+          0xfc, 0xf1, 0x53, 0x11, 0x95, 0x80, 0x2a, 0x99,
+          0xb5, 0x67, 0x74, 0x49, 0xa8, 0x62, 0x51, 0x82,
+          0x85, 0x1c, 0xb3, 0x32, 0xaf, 0xb6, 0xa8, 0x9c,
+          0x41, 0x13, 0x25, 0xfb, 0xcb, 0xcd, 0x42, 0xaf,
+          0xcb, 0x7b, 0x6e, 0x5a, 0xab, 0x7e, 0xa4, 0x2c,
+          0x66, 0x0f, 0x97, 0xfd, 0x85, 0x84, 0xbf, 0x03  }
+      },
+
+      { "data-50 key-25",  /* Test 4 */
+        /* Test with a combined length of key and data that is larger
+         * than 64 bytes (= block-size of SHA-224 and SHA-256).  */
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd"
+        "\xcd\xcd",
+        "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10"
+        "\x11\x12\x13\x14\x15\x16\x17\x18\x19",
+
+        { 0xa9, 0xd7, 0x68, 0x5a, 0x19, 0xc4, 0xe0, 0xdb,
+          0xd9, 0xdf, 0x25, 0x56, 0xcc, 0x8a, 0x7d, 0x2a,
+          0x77, 0x33, 0xb6, 0x76, 0x25, 0xce, 0x59, 0x4c,
+          0x78, 0x27, 0x0e, 0xeb   },
+        { 0x57, 0x36, 0x6a, 0x45, 0xe2, 0x30, 0x53, 0x21,
+          0xa4, 0xbc, 0x5a, 0xa5, 0xfe, 0x2e, 0xf8, 0xa9,
+          0x21, 0xf6, 0xaf, 0x82, 0x73, 0xd7, 0xfe, 0x7b,
+          0xe6, 0xcf, 0xed, 0xb3, 0xf0, 0xae, 0xa6, 0xd7  },
+        { 0x3a, 0x5d, 0x7a, 0x87, 0x97, 0x02, 0xc0, 0x86,
+          0xbc, 0x96, 0xd1, 0xdd, 0x8a, 0xa1, 0x5d, 0x9c,
+          0x46, 0x44, 0x6b, 0x95, 0x52, 0x13, 0x11, 0xc6,
+          0x06, 0xfd, 0xc4, 0xe3, 0x08, 0xf4, 0xb9, 0x84,
+          0xda, 0x2d, 0x0f, 0x94, 0x49, 0xb3, 0xba, 0x84,
+          0x25, 0xec, 0x7f, 0xb8, 0xc3, 0x1b, 0xc1, 0x36  },
+        { 0xb2, 0x7e, 0xab, 0x1d, 0x6e, 0x8d, 0x87, 0x46,
+          0x1c, 0x29, 0xf7, 0xf5, 0x73, 0x9d, 0xd5, 0x8e,
+          0x98, 0xaa, 0x35, 0xf8, 0xe8, 0x23, 0xad, 0x38,
+          0xc5, 0x49, 0x2a, 0x20, 0x88, 0xfa, 0x02, 0x81,
+          0x99, 0x3b, 0xbf, 0xff, 0x9a, 0x0e, 0x9c, 0x6b,
+          0xf1, 0x21, 0xae, 0x9e, 0xc9, 0xbb, 0x09, 0xd8,
+          0x4a, 0x5e, 0xba, 0xc8, 0x17, 0x18, 0x2e, 0xa9,
+          0x74, 0x67, 0x3f, 0xb1, 0x33, 0xca, 0x0d, 0x1d  }
+      },
+
+      { "data-20 key-20 trunc",  /* Test 5 */
+        /* Test with a truncation of output to 128 bits.  */
+        "Test With Truncation",
+        "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c"
+        "\x0c\x0c\x0c\x0c",
+
+        { 0x49, 0xfd, 0xd3, 0xab, 0xd0, 0x05, 0xeb, 0xb8,
+          0xae, 0x63, 0xfe, 0xa9, 0x46, 0xd1, 0x88, 0x3c  },
+        { 0x6e, 0x02, 0xc6, 0x45, 0x37, 0xfb, 0x11, 0x80,
+          0x57, 0xab, 0xb7, 0xfb, 0x66, 0xa2, 0x3b, 0x3c  },
+        { 0x47, 0xc5, 0x1a, 0xce, 0x1f, 0xfa, 0xcf, 0xfd,
+          0x74, 0x94, 0x72, 0x46, 0x82, 0x61, 0x57, 0x83  },
+        { 0x0f, 0xa7, 0x47, 0x59, 0x48, 0xf4, 0x3f, 0x48,
+          0xca, 0x05, 0x16, 0x67, 0x1e, 0x18, 0x97, 0x8c  },
+        16
+      },
+
+      { "data-54 key-131",  /* Test 6 */
+        /* Test with a key larger than 128 bytes (= block-size of
+         * SHA-384 and SHA-512).  */
+        "Test Using Larger Than Block-Size Key - Hash Key First",
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+
+        { 0xb4, 0xa1, 0xf0, 0x4c, 0x00, 0x28, 0x7a, 0x9b,
+          0x7f, 0x60, 0x75, 0xb3, 0x13, 0xd2, 0x79, 0xb8,
+          0x33, 0xbc, 0x8f, 0x75, 0x12, 0x43, 0x52, 0xd0,
+          0x5f, 0xb9, 0x99, 0x5f  },
+        { 0xed, 0x73, 0xa3, 0x74, 0xb9, 0x6c, 0x00, 0x52,
+          0x35, 0xf9, 0x48, 0x03, 0x2f, 0x09, 0x67, 0x4a,
+          0x58, 0xc0, 0xce, 0x55, 0x5c, 0xfc, 0x1f, 0x22,
+          0x3b, 0x02, 0x35, 0x65, 0x60, 0x31, 0x2c, 0x3b  },
+        { 0x0f, 0xc1, 0x95, 0x13, 0xbf, 0x6b, 0xd8, 0x78,
+          0x03, 0x70, 0x16, 0x70, 0x6a, 0x0e, 0x57, 0xbc,
+          0x52, 0x81, 0x39, 0x83, 0x6b, 0x9a, 0x42, 0xc3,
+          0xd4, 0x19, 0xe4, 0x98, 0xe0, 0xe1, 0xfb, 0x96,
+          0x16, 0xfd, 0x66, 0x91, 0x38, 0xd3, 0x3a, 0x11,
+          0x05, 0xe0, 0x7c, 0x72, 0xb6, 0x95, 0x3b, 0xcc  },
+        { 0x00, 0xf7, 0x51, 0xa9, 0xe5, 0x06, 0x95, 0xb0,
+          0x90, 0xed, 0x69, 0x11, 0xa4, 0xb6, 0x55, 0x24,
+          0x95, 0x1c, 0xdc, 0x15, 0xa7, 0x3a, 0x5d, 0x58,
+          0xbb, 0x55, 0x21, 0x5e, 0xa2, 0xcd, 0x83, 0x9a,
+          0xc7, 0x9d, 0x2b, 0x44, 0xa3, 0x9b, 0xaf, 0xab,
+          0x27, 0xe8, 0x3f, 0xde, 0x9e, 0x11, 0xf6, 0x34,
+          0x0b, 0x11, 0xd9, 0x91, 0xb1, 0xb9, 0x1b, 0xf2,
+          0xee, 0xe7, 0xfc, 0x87, 0x24, 0x26, 0xc3, 0xa4  }
+      },
+
+      { "data-54 key-147",  /* Test 6a */
+        /* Test with a key larger than 144 bytes (= block-size of
+         * SHA3-224).  */
+        "Test Using Larger Than Block-Size Key - Hash Key First",
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+
+        { 0xb9, 0x6d, 0x73, 0x0c, 0x14, 0x8c, 0x2d, 0xaa,
+          0xd8, 0x64, 0x9d, 0x83, 0xde, 0xfa, 0xa3, 0x71,
+          0x97, 0x38, 0xd3, 0x47, 0x75, 0x39, 0x7b, 0x75,
+          0x71, 0xc3, 0x85, 0x15  },
+        { 0xa6, 0x07, 0x2f, 0x86, 0xde, 0x52, 0xb3, 0x8b,
+          0xb3, 0x49, 0xfe, 0x84, 0xcd, 0x6d, 0x97, 0xfb,
+          0x6a, 0x37, 0xc4, 0xc0, 0xf6, 0x2a, 0xae, 0x93,
+          0x98, 0x11, 0x93, 0xa7, 0x22, 0x9d, 0x34, 0x67  },
+        { 0x71, 0x3d, 0xff, 0x03, 0x02, 0xc8, 0x50, 0x86,
+          0xec, 0x5a, 0xd0, 0x76, 0x8d, 0xd6, 0x5a, 0x13,
+          0xdd, 0xd7, 0x90, 0x68, 0xd8, 0xd4, 0xc6, 0x21,
+          0x2b, 0x71, 0x2e, 0x41, 0x64, 0x94, 0x49, 0x11,
+          0x14, 0x80, 0x23, 0x00, 0x44, 0x18, 0x5a, 0x99,
+          0x10, 0x3e, 0xd8, 0x20, 0x04, 0xdd, 0xbf, 0xcc  },
+        { 0xb1, 0x48, 0x35, 0xc8, 0x19, 0xa2, 0x90, 0xef,
+          0xb0, 0x10, 0xac, 0xe6, 0xd8, 0x56, 0x8d, 0xc6,
+          0xb8, 0x4d, 0xe6, 0x0b, 0xc4, 0x9b, 0x00, 0x4c,
+          0x3b, 0x13, 0xed, 0xa7, 0x63, 0x58, 0x94, 0x51,
+          0xe5, 0xdd, 0x74, 0x29, 0x28, 0x84, 0xd1, 0xbd,
+          0xce, 0x64, 0xe6, 0xb9, 0x19, 0xdd, 0x61, 0xdc,
+          0x9c, 0x56, 0xa2, 0x82, 0xa8, 0x1c, 0x0b, 0xd1,
+          0x4f, 0x1f, 0x36, 0x5b, 0x49, 0xb8, 0x3a, 0x5b  }
+      },
+
+      { "data-152 key-131",  /* Test 7  */
+        /* Test with a key and data that is larger than 128 bytes (=
+         * block-size of SHA-384 and SHA-512).  */
+        "This is a test using a larger than block-size key and a larger "
+        "than block-size data. The key needs to be hashed before being "
+        "used by the HMAC algorithm.",
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+
+        { 0x05, 0xd8, 0xcd, 0x6d, 0x00, 0xfa, 0xea, 0x8d,
+          0x1e, 0xb6, 0x8a, 0xde, 0x28, 0x73, 0x0b, 0xbd,
+          0x3c, 0xba, 0xb6, 0x92, 0x9f, 0x0a, 0x08, 0x6b,
+          0x29, 0xcd, 0x62, 0xa0  },
+        { 0x65, 0xc5, 0xb0, 0x6d, 0x4c, 0x3d, 0xe3, 0x2a,
+          0x7a, 0xef, 0x87, 0x63, 0x26, 0x1e, 0x49, 0xad,
+          0xb6, 0xe2, 0x29, 0x3e, 0xc8, 0xe7, 0xc6, 0x1e,
+          0x8d, 0xe6, 0x17, 0x01, 0xfc, 0x63, 0xe1, 0x23  },
+        { 0x02, 0x6f, 0xdf, 0x6b, 0x50, 0x74, 0x1e, 0x37,
+          0x38, 0x99, 0xc9, 0xf7, 0xd5, 0x40, 0x6d, 0x4e,
+          0xb0, 0x9f, 0xc6, 0x66, 0x56, 0x36, 0xfc, 0x1a,
+          0x53, 0x00, 0x29, 0xdd, 0xf5, 0xcf, 0x3c, 0xa5,
+          0xa9, 0x00, 0xed, 0xce, 0x01, 0xf5, 0xf6, 0x1e,
+          0x2f, 0x40, 0x8c, 0xdf, 0x2f, 0xd3, 0xe7, 0xe8  },
+        { 0x38, 0xa4, 0x56, 0xa0, 0x04, 0xbd, 0x10, 0xd3,
+          0x2c, 0x9a, 0xb8, 0x33, 0x66, 0x84, 0x11, 0x28,
+          0x62, 0xc3, 0xdb, 0x61, 0xad, 0xcc, 0xa3, 0x18,
+          0x29, 0x35, 0x5e, 0xaf, 0x46, 0xfd, 0x5c, 0x73,
+          0xd0, 0x6a, 0x1f, 0x0d, 0x13, 0xfe, 0xc9, 0xa6,
+          0x52, 0xfb, 0x38, 0x11, 0xb5, 0x77, 0xb1, 0xb1,
+          0xd1, 0xb9, 0x78, 0x9f, 0x97, 0xae, 0x5b, 0x83,
+          0xc6, 0xf4, 0x4d, 0xfc, 0xf1, 0xd6, 0x7e, 0xba  }
+      },
+
+      { "data-152 key-147",  /* Test 7a  */
+        /* Test with a key larger than 144 bytes (= block-size of
+         * SHA3-224). */
+        "This is a test using a larger than block-size key and a larger "
+        "than block-size data. The key needs to be hashed before being "
+        "used by the HMAC algorithm.",
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa"
+        "\xaa\xaa\xaa",
+
+        { 0xc7, 0x9c, 0x9b, 0x09, 0x34, 0x24, 0xe5, 0x88,
+          0xa9, 0x87, 0x8b, 0xbc, 0xb0, 0x89, 0xe0, 0x18,
+          0x27, 0x00, 0x96, 0xe9, 0xb4, 0xb1, 0xa9, 0xe8,
+          0x22, 0x0c, 0x86, 0x6a  },
+        { 0xe6, 0xa3, 0x6d, 0x9b, 0x91, 0x5f, 0x86, 0xa0,
+          0x93, 0xca, 0xc7, 0xd1, 0x10, 0xe9, 0xe0, 0x4c,
+          0xf1, 0xd6, 0x10, 0x0d, 0x30, 0x47, 0x55, 0x09,
+          0xc2, 0x47, 0x5f, 0x57, 0x1b, 0x75, 0x8b, 0x5a  },
+        { 0xca, 0xd1, 0x8a, 0x8f, 0xf6, 0xc4, 0xcc, 0x3a,
+          0xd4, 0x87, 0xb9, 0x5f, 0x97, 0x69, 0xe9, 0xb6,
+          0x1c, 0x06, 0x2a, 0xef, 0xd6, 0x95, 0x25, 0x69,
+          0xe6, 0xe6, 0x42, 0x18, 0x97, 0x05, 0x4c, 0xfc,
+          0x70, 0xb5, 0xfd, 0xc6, 0x60, 0x5c, 0x18, 0x45,
+          0x71, 0x12, 0xfc, 0x6a, 0xaa, 0xd4, 0x55, 0x85  },
+        { 0xdc, 0x03, 0x0e, 0xe7, 0x88, 0x70, 0x34, 0xf3,
+          0x2c, 0xf4, 0x02, 0xdf, 0x34, 0x62, 0x2f, 0x31,
+          0x1f, 0x3e, 0x6c, 0xf0, 0x48, 0x60, 0xc6, 0xbb,
+          0xd7, 0xfa, 0x48, 0x86, 0x74, 0x78, 0x2b, 0x46,
+          0x59, 0xfd, 0xbd, 0xf3, 0xfd, 0x87, 0x78, 0x52,
+          0x88, 0x5c, 0xfe, 0x6e, 0x22, 0x18, 0x5f, 0xe7,
+          0xb2, 0xee, 0x95, 0x20, 0x43, 0x62, 0x9b, 0xc9,
+          0xd5, 0xf3, 0x29, 0x8a, 0x41, 0xd0, 0x2c, 0x66  }
+      }/*,*/
+
+      /* Our API does not allow to specify a bit count and thus we
+       * can't use the following test.  */
+      /* { "data-5bit key-4",  /\* Test 8  *\/ */
+      /*   /\* Test with data bit size no multiple of 8, the data bits are */
+      /*    * '11001' from the NIST example using SHA-3 order (= 5 bits */
+      /*    * from LSB hex byte 13 or 5 bits from MSB hex byte c8).  *\/ */
+      /*   "\xc8", */
+      /*   "Jefe", */
+
+      /*   { 0x5f, 0x8c, 0x0e, 0xa7, 0xfa, 0xfe, 0xcd, 0x0c, */
+      /*     0x34, 0x63, 0xaa, 0xd0, 0x97, 0x42, 0xce, 0xce, */
+      /*     0xb1, 0x42, 0xfe, 0x0a, 0xb6, 0xf4, 0x53, 0x94, */
+      /*     0x38, 0xc5, 0x9d, 0xe8  }, */
+      /*   { 0xec, 0x82, 0x22, 0x77, 0x3f, 0xac, 0x68, 0xb3, */
+      /*     0xd3, 0xdc, 0xb1, 0x82, 0xae, 0xc8, 0xb0, 0x50, */
+      /*     0x7a, 0xce, 0x44, 0x48, 0xd2, 0x0a, 0x11, 0x47, */
+      /*     0xe6, 0x82, 0x11, 0x8d, 0xa4, 0xe3, 0xf4, 0x4c  }, */
+      /*   { 0x21, 0xfb, 0xd3, 0xbf, 0x3e, 0xbb, 0xa3, 0xcf, */
+      /*     0xc9, 0xef, 0x64, 0xc0, 0x59, 0x1c, 0x92, 0xc5, */
+      /*     0xac, 0xb2, 0x65, 0xe9, 0x2d, 0x87, 0x61, 0xd1, */
+      /*     0xf9, 0x1a, 0x52, 0xa1, 0x03, 0xa6, 0xc7, 0x96, */
+      /*     0x94, 0xcf, 0xd6, 0x7a, 0x9a, 0x2a, 0xc1, 0x32, */
+      /*     0x4f, 0x02, 0xfe, 0xa6, 0x3b, 0x81, 0xef, 0xfc  }, */
+      /*   { 0x27, 0xf9, 0x38, 0x8c, 0x15, 0x67, 0xef, 0x4e, */
+      /*     0xf2, 0x00, 0x60, 0x2a, 0x6c, 0xf8, 0x71, 0xd6, */
+      /*     0x8a, 0x6f, 0xb0, 0x48, 0xd4, 0x73, 0x7a, 0xc4, */
+      /*     0x41, 0x8a, 0x2f, 0x02, 0x12, 0x89, 0xd1, 0x3d, */
+      /*     0x1f, 0xd1, 0x12, 0x0f, 0xec, 0xb9, 0xcf, 0x96, */
+      /*     0x4c, 0x5b, 0x11, 0x7a, 0xb5, 0xb1, 0x1c, 0x61, */
+      /*     0x4b, 0x2d, 0xa3, 0x9d, 0xad, 0xd5, 0x1f, 0x2f, */
+      /*     0x5e, 0x22, 0xaa, 0xcc, 0xec, 0x7d, 0x57, 0x6e  } */
+      /* } */
+
+    };
+  const char *what;
+  const char *errtxt;
+  int tvidx;
+  const char *expect;
+  int nexpect;
+
+  for (tvidx=0; tvidx < DIM(tv); tvidx++)
+    {
+      what = tv[tvidx].desc;
+      if (hashalgo == GCRY_MD_SHA3_224)
+        {
+          expect = tv[tvidx].expect_224;
+          nexpect = DIM (tv[tvidx].expect_224);
+        }
+      else if (hashalgo == GCRY_MD_SHA3_256)
+        {
+          expect = tv[tvidx].expect_256;
+          nexpect = DIM (tv[tvidx].expect_256);
+        }
+      else if (hashalgo == GCRY_MD_SHA3_384)
+        {
+          expect = tv[tvidx].expect_384;
+          nexpect = DIM (tv[tvidx].expect_384);
+        }
+      else if (hashalgo == GCRY_MD_SHA3_512)
+        {
+          expect = tv[tvidx].expect_512;
+          nexpect = DIM (tv[tvidx].expect_512);
+        }
+      else
+        BUG();
+
+      if (tv[tvidx].trunc && tv[tvidx].trunc < nexpect)
+        nexpect = tv[tvidx].trunc;
+
+      errtxt = check_one (hashalgo,
+                          tv[tvidx].data, strlen (tv[tvidx].data),
+                          tv[tvidx].key, strlen (tv[tvidx].key),
+                          expect, nexpect, !!tv[tvidx].trunc);
+      if (errtxt)
+        goto failed;
+      if (!extended)
+        break;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("hmac", hashalgo, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static gpg_err_code_t
+hmac_selftest (int algo, int extended, selftest_report_func_t report)
+{
+  gpg_err_code_t ec;
+
+  switch (algo)
+    {
+    case GCRY_MAC_HMAC_SHA1:
+      ec = selftests_sha1 (extended, report);
+      break;
+    case GCRY_MAC_HMAC_SHA224:
+      ec = selftests_sha224 (extended, report);
+      break;
+    case GCRY_MAC_HMAC_SHA256:
+      ec = selftests_sha256 (extended, report);
+      break;
+    case GCRY_MAC_HMAC_SHA384:
+      ec = selftests_sha384 (extended, report);
+      break;
+    case GCRY_MAC_HMAC_SHA512:
+      ec = selftests_sha512 (extended, report);
+      break;
+
+    case GCRY_MAC_HMAC_SHA3_224:
+    case GCRY_MAC_HMAC_SHA3_256:
+    case GCRY_MAC_HMAC_SHA3_384:
+    case GCRY_MAC_HMAC_SHA3_512:
+      {
+        int md_algo = map_mac_algo_to_md (algo);
+        ec = selftests_sha3 (md_algo, extended, report);
+      }
+      break;
+
+    default:
+      ec = GPG_ERR_MAC_ALGO;
+      break;
+    }
+
+  return ec;
+}
+
+
+static const gcry_mac_spec_ops_t hmac_ops = {
+  hmac_open,
+  hmac_close,
+  hmac_setkey,
+  NULL,
+  hmac_reset,
+  hmac_write,
+  hmac_read,
+  hmac_verify,
+  hmac_get_maclen,
+  hmac_get_keylen,
+  NULL,
+  hmac_selftest
+};
+
+
+#if USE_SHA1
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1 = {
+  GCRY_MAC_HMAC_SHA1, {0, 1}, "HMAC_SHA1",
+  &hmac_ops
+};
+#endif
+#if USE_SHA256
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256 = {
+  GCRY_MAC_HMAC_SHA256, {0, 1}, "HMAC_SHA256",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224 = {
+  GCRY_MAC_HMAC_SHA224, {0, 1}, "HMAC_SHA224",
+  &hmac_ops
+};
+#endif
+#if USE_SHA512
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512 = {
+  GCRY_MAC_HMAC_SHA512, {0, 1}, "HMAC_SHA512",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384 = {
+  GCRY_MAC_HMAC_SHA384, {0, 1}, "HMAC_SHA384",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_256 = {
+  GCRY_MAC_HMAC_SHA512_256, {0, 1}, "HMAC_SHA512_256",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_224 = {
+  GCRY_MAC_HMAC_SHA512_224, {0, 1}, "HMAC_SHA512_224",
+  &hmac_ops
+};
+
+#endif
+#if USE_SHA3
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224 = {
+  GCRY_MAC_HMAC_SHA3_224, {0, 1}, "HMAC_SHA3_224",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256 = {
+  GCRY_MAC_HMAC_SHA3_256, {0, 1}, "HMAC_SHA3_256",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384 = {
+  GCRY_MAC_HMAC_SHA3_384, {0, 1}, "HMAC_SHA3_384",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512 = {
+  GCRY_MAC_HMAC_SHA3_512, {0, 1}, "HMAC_SHA3_512",
+  &hmac_ops
+};
+#endif
+#if USE_GOST_R_3411_94
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94 = {
+  GCRY_MAC_HMAC_GOSTR3411_94, {0, 0}, "HMAC_GOSTR3411_94",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_cp = {
+  GCRY_MAC_HMAC_GOSTR3411_CP, {0, 0}, "HMAC_GOSTR3411_CP",
+  &hmac_ops
+};
+#endif
+#if USE_GOST_R_3411_12
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256 = {
+  GCRY_MAC_HMAC_STRIBOG256, {0, 0}, "HMAC_STRIBOG256",
+  &hmac_ops
+};
+
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512 = {
+  GCRY_MAC_HMAC_STRIBOG512, {0, 0}, "HMAC_STRIBOG512",
+  &hmac_ops
+};
+#endif
+#if USE_WHIRLPOOL
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool = {
+  GCRY_MAC_HMAC_WHIRLPOOL, {0, 0}, "HMAC_WHIRLPOOL",
+  &hmac_ops
+};
+#endif
+#if USE_RMD160
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160 = {
+  GCRY_MAC_HMAC_RMD160, {0, 0}, "HMAC_RIPEMD160",
+  &hmac_ops
+};
+#endif
+#if USE_TIGER
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1 = {
+  GCRY_MAC_HMAC_TIGER1, {0, 0}, "HMAC_TIGER",
+  &hmac_ops
+};
+#endif
+#if USE_MD5
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5 = {
+  GCRY_MAC_HMAC_MD5, {0, 1}, "HMAC_MD5",
+  &hmac_ops
+};
+#endif
+#if USE_MD4
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4 = {
+  GCRY_MAC_HMAC_MD4, {0, 0}, "HMAC_MD4",
+  &hmac_ops
+};
+#endif
+#if USE_MD2
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_md2 = {
+  GCRY_MAC_HMAC_MD2, {0, 0}, "HMAC_MD2",
+  &hmac_ops
+};
+#endif
+#if USE_BLAKE2
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_512 = {
+  GCRY_MAC_HMAC_BLAKE2B_512, {0, 0}, "HMAC_BLAKE2B_512",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_384 = {
+  GCRY_MAC_HMAC_BLAKE2B_384, {0, 0}, "HMAC_BLAKE2B_384",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_256 = {
+  GCRY_MAC_HMAC_BLAKE2B_256, {0, 0}, "HMAC_BLAKE2B_256",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_160 = {
+  GCRY_MAC_HMAC_BLAKE2B_160, {0, 0}, "HMAC_BLAKE2B_160",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_256 = {
+  GCRY_MAC_HMAC_BLAKE2S_256, {0, 0}, "HMAC_BLAKE2S_256",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_224 = {
+  GCRY_MAC_HMAC_BLAKE2S_224, {0, 0}, "HMAC_BLAKE2S_224",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_160 = {
+  GCRY_MAC_HMAC_BLAKE2S_160, {0, 0}, "HMAC_BLAKE2S_160",
+  &hmac_ops
+};
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_128 = {
+  GCRY_MAC_HMAC_BLAKE2S_128, {0, 0}, "HMAC_BLAKE2S_128",
+  &hmac_ops
+};
+#endif
+#if USE_SM3
+const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sm3 = {
+  GCRY_MAC_HMAC_SM3, {0, 0}, "HMAC_SM3",
+  &hmac_ops
+};
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/mac-internal.h 
b/grub-core/lib/libgcrypt/cipher/mac-internal.h
new file mode 100644
index 000000000..019981524
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/mac-internal.h
@@ -0,0 +1,275 @@
+/* mac-internal.h  -  Internal defs for mac.c
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "g10lib.h"
+#include "cipher-proto.h"
+#include "gost.h"
+
+
+/* The data object used to hold a handle to an encryption object.  */
+struct gcry_mac_handle;
+
+/* The data object used to hold poly1305-mac context.  */
+struct poly1305mac_context_s;
+
+
+/*
+ *
+ * Message authentication code related definitions.
+ *
+ */
+
+
+/* Magic values for the context structure.  */
+#define CTX_MAC_MAGIC_NORMAL 0x59d9b8af
+#define CTX_MAC_MAGIC_SECURE 0x12c27cd0
+
+
+/* MAC module functions. */
+typedef gcry_err_code_t (*gcry_mac_open_func_t)(gcry_mac_hd_t h);
+typedef void (*gcry_mac_close_func_t)(gcry_mac_hd_t h);
+typedef gcry_err_code_t (*gcry_mac_setkey_func_t)(gcry_mac_hd_t h,
+                                                 const unsigned char *key,
+                                                 size_t keylen);
+typedef gcry_err_code_t (*gcry_mac_setiv_func_t)(gcry_mac_hd_t h,
+                                                const unsigned char *iv,
+                                                size_t ivlen);
+typedef gcry_err_code_t (*gcry_mac_reset_func_t)(gcry_mac_hd_t h);
+typedef gcry_err_code_t (*gcry_mac_write_func_t)(gcry_mac_hd_t h,
+                                                const unsigned char *inbuf,
+                                                size_t inlen);
+typedef gcry_err_code_t (*gcry_mac_read_func_t)(gcry_mac_hd_t h,
+                                               unsigned char *outbuf,
+                                               size_t *outlen);
+typedef gcry_err_code_t (*gcry_mac_verify_func_t)(gcry_mac_hd_t h,
+                                                 const unsigned char *inbuf,
+                                                 size_t inlen);
+typedef unsigned int (*gcry_mac_get_maclen_func_t)(int algo);
+typedef unsigned int (*gcry_mac_get_keylen_func_t)(int algo);
+
+/* The type used to convey additional information to a MAC.  */
+typedef gpg_err_code_t (*gcry_mac_set_extra_info_t)
+     (gcry_mac_hd_t h, int what, const void *buffer, size_t buflen);
+
+typedef struct gcry_mac_spec_ops
+{
+  gcry_mac_open_func_t open;
+  gcry_mac_close_func_t close;
+  gcry_mac_setkey_func_t setkey;
+  gcry_mac_setiv_func_t setiv;
+  gcry_mac_reset_func_t reset;
+  gcry_mac_write_func_t write;
+  gcry_mac_read_func_t read;
+  gcry_mac_verify_func_t verify;
+  gcry_mac_get_maclen_func_t get_maclen;
+  gcry_mac_get_keylen_func_t get_keylen;
+  gcry_mac_set_extra_info_t set_extra_info;
+  selftest_func_t selftest;
+} gcry_mac_spec_ops_t;
+
+
+/* Module specification structure for message authentication codes.  */
+typedef struct gcry_mac_spec
+{
+  int algo;
+  struct {
+    unsigned int disabled:1;
+    unsigned int fips:1;
+  } flags;
+  const char *name;
+  const gcry_mac_spec_ops_t *ops;
+} gcry_mac_spec_t;
+
+/* The handle structure.  */
+struct gcry_mac_handle
+{
+  int magic;
+  int algo;
+  const gcry_mac_spec_t *spec;
+  gcry_ctx_t gcry_ctx;
+  union {
+    struct {
+      gcry_md_hd_t md_ctx;
+      int md_algo;
+    } hmac;
+    struct {
+      gcry_cipher_hd_t ctx;
+      int cipher_algo;
+      unsigned int blklen;
+    } cmac;
+    struct {
+      gcry_cipher_hd_t ctx;
+      int cipher_algo;
+    } gmac;
+    struct {
+      struct poly1305mac_context_s *ctx;
+    } poly1305mac;
+    struct {
+      GOST28147_context ctx;
+      u32 n1, n2;
+      unsigned int unused;
+      unsigned int count;
+      unsigned char lastiv[8]; /* IMIT blocksize */
+    } imit;
+  } u;
+};
+
+
+/*
+ * The HMAC algorithm specifications (mac-hmac.c).
+ */
+#if USE_SHA1
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1;
+#endif
+#if USE_SHA256
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224;
+#endif
+#if USE_SHA512
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_224;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_256;
+#endif
+#if USE_SHA3
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512;
+#endif
+#if USE_GOST_R_3411_94
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_cp;
+#endif
+#if USE_GOST_R_3411_12
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512;
+#endif
+#if USE_WHIRLPOOL
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool;
+#endif
+#if USE_RMD160
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160;
+#endif
+#if USE_TIGER
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1;
+#endif
+#if USE_MD5
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5;
+#endif
+#if USE_MD4
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4;
+#endif
+#if USE_BLAKE2
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_512;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_384;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_256;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_160;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_256;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_224;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_160;
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_128;
+#endif
+#if USE_SM3
+extern const gcry_mac_spec_t _gcry_mac_type_spec_hmac_sm3;
+#endif
+
+/*
+ * The CMAC algorithm specifications (mac-cmac.c).
+ */
+#if USE_BLOWFISH
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish;
+#endif
+#if USE_DES
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes;
+#endif
+#if USE_CAST5
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5;
+#endif
+#if USE_AES
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes;
+#endif
+#if USE_TWOFISH
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish;
+#endif
+#if USE_SERPENT
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent;
+#endif
+#if USE_RFC2268
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268;
+#endif
+#if USE_SEED
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed;
+#endif
+#if USE_CAMELLIA
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia;
+#endif
+#if USE_IDEA
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea;
+#endif
+#if USE_GOST28147
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147;
+#endif
+#if USE_GOST28147
+extern const gcry_mac_spec_t _gcry_mac_type_spec_gost28147_imit;
+#endif
+#if USE_SM4
+extern const gcry_mac_spec_t _gcry_mac_type_spec_cmac_sm4;
+#endif
+
+/*
+ * The GMAC algorithm specifications (mac-gmac.c).
+ */
+#if USE_AES
+extern const gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes;
+#endif
+#if USE_TWOFISH
+extern const gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish;
+#endif
+#if USE_SERPENT
+extern const gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent;
+#endif
+#if USE_SEED
+extern const gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed;
+#endif
+#if USE_CAMELLIA
+extern const gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia;
+#endif
+
+/*
+ * The Poly1305 MAC algorithm specifications (mac-poly1305.c).
+ */
+extern const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac;
+#if USE_AES
+extern const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes;
+#endif
+#if USE_CAMELLIA
+extern const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia;
+#endif
+#if USE_TWOFISH
+extern const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish;
+#endif
+#if USE_SERPENT
+extern const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent;
+#endif
+#if USE_SEED
+extern const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed;
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/mac-poly1305.c 
b/grub-core/lib/libgcrypt/cipher/mac-poly1305.c
new file mode 100644
index 000000000..3abc77745
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/mac-poly1305.c
@@ -0,0 +1,364 @@
+/* mac-poly1305.c  -  Poly1305 based MACs
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mac-internal.h"
+#include "poly1305-internal.h"
+
+
+struct poly1305mac_context_s {
+  poly1305_context_t ctx;
+  gcry_cipher_hd_t hd;
+  struct {
+    unsigned int key_set:1;
+    unsigned int nonce_set:1;
+    unsigned int tag:1;
+  } marks;
+  byte tag[POLY1305_TAGLEN];
+  byte key[POLY1305_KEYLEN];
+};
+
+
+static gcry_err_code_t
+poly1305mac_open (gcry_mac_hd_t h)
+{
+  struct poly1305mac_context_s *mac_ctx;
+  int secure = (h->magic == CTX_MAC_MAGIC_SECURE);
+  unsigned int flags = (secure ? GCRY_CIPHER_SECURE : 0);
+  gcry_err_code_t err;
+  int cipher_algo;
+
+  if (secure)
+    mac_ctx = xtrycalloc_secure (1, sizeof(*mac_ctx));
+  else
+    mac_ctx = xtrycalloc (1, sizeof(*mac_ctx));
+
+  if (!mac_ctx)
+    return gpg_err_code_from_syserror ();
+
+  h->u.poly1305mac.ctx = mac_ctx;
+
+  switch (h->spec->algo)
+    {
+    default:
+      /* already checked. */
+    case GCRY_MAC_POLY1305:
+      /* plain Poly1305. */
+      cipher_algo = -1;
+      return 0;
+    case GCRY_MAC_POLY1305_AES:
+      cipher_algo = GCRY_CIPHER_AES;
+      break;
+    case GCRY_MAC_POLY1305_CAMELLIA:
+      cipher_algo = GCRY_CIPHER_CAMELLIA128;
+      break;
+    case GCRY_MAC_POLY1305_TWOFISH:
+      cipher_algo = GCRY_CIPHER_TWOFISH;
+      break;
+    case GCRY_MAC_POLY1305_SERPENT:
+      cipher_algo = GCRY_CIPHER_SERPENT128;
+      break;
+    case GCRY_MAC_POLY1305_SEED:
+      cipher_algo = GCRY_CIPHER_SEED;
+      break;
+    }
+
+  err = _gcry_cipher_open_internal (&mac_ctx->hd, cipher_algo,
+                                   GCRY_CIPHER_MODE_ECB, flags);
+  if (err)
+    goto err_free;
+
+  return 0;
+
+err_free:
+  xfree(h->u.poly1305mac.ctx);
+  return err;
+}
+
+
+static void
+poly1305mac_close (gcry_mac_hd_t h)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+  if (h->spec->algo != GCRY_MAC_POLY1305)
+    _gcry_cipher_close (mac_ctx->hd);
+
+  xfree(mac_ctx);
+}
+
+
+static gcry_err_code_t
+poly1305mac_prepare_key (gcry_mac_hd_t h, const unsigned char *key, size_t 
keylen)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+  size_t block_keylen = keylen - 16;
+
+  /* Need at least 16 + 1 byte key. */
+  if (keylen <= 16)
+    return GPG_ERR_INV_KEYLEN;
+
+  /* For Poly1305-AES, first part of key is passed to Poly1305 as is. */
+  memcpy (mac_ctx->key, key + block_keylen, 16);
+
+  /* Remaining part is used as key for the block cipher. */
+  return _gcry_cipher_setkey (mac_ctx->hd, key, block_keylen);
+}
+
+
+static gcry_err_code_t
+poly1305mac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+  gcry_err_code_t err;
+
+  memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+  memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag));
+  memset(&mac_ctx->key, 0, sizeof(mac_ctx->key));
+
+  mac_ctx->marks.key_set = 0;
+  mac_ctx->marks.nonce_set = 0;
+  mac_ctx->marks.tag = 0;
+
+  if (h->spec->algo != GCRY_MAC_POLY1305)
+    {
+      err = poly1305mac_prepare_key (h, key, keylen);
+      if (err)
+        return err;
+
+      /* Poly1305-AES/etc also need nonce. */
+      mac_ctx->marks.key_set = 1;
+      mac_ctx->marks.nonce_set = 0;
+    }
+  else
+    {
+      /* For plain Poly1305, key is the nonce and setup is complete now. */
+
+      if (keylen != POLY1305_KEYLEN)
+        return GPG_ERR_INV_KEYLEN;
+
+      memcpy (mac_ctx->key, key, keylen);
+
+      err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN);
+      if (err)
+        {
+          memset(&mac_ctx->key, 0, sizeof(mac_ctx->key));
+          return err;
+        }
+
+      mac_ctx->marks.key_set = 1;
+      mac_ctx->marks.nonce_set = 1;
+    }
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+  gcry_err_code_t err;
+
+  if (h->spec->algo == GCRY_MAC_POLY1305)
+    return GPG_ERR_INV_ARG;
+
+  if (ivlen != 16)
+    return GPG_ERR_INV_ARG;
+
+  if (!mac_ctx->marks.key_set)
+    return 0;
+
+  memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+  memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag));
+  mac_ctx->marks.nonce_set = 0;
+  mac_ctx->marks.tag = 0;
+
+  /* Prepare second part of the poly1305 key. */
+
+  err = _gcry_cipher_encrypt (mac_ctx->hd, mac_ctx->key + 16, 16, iv, 16);
+  if (err)
+    return err;
+
+  err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN);
+  if (err)
+    return err;
+
+  mac_ctx->marks.nonce_set = 1;
+  return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_reset (gcry_mac_hd_t h)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+  if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set)
+    return GPG_ERR_INV_STATE;
+
+  memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+  memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag));
+
+  mac_ctx->marks.key_set = 1;
+  mac_ctx->marks.nonce_set = 1;
+  mac_ctx->marks.tag = 0;
+
+  return _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN);
+}
+
+
+static gcry_err_code_t
+poly1305mac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+  if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set ||
+      mac_ctx->marks.tag)
+    return GPG_ERR_INV_STATE;
+
+  _gcry_poly1305_update (&mac_ctx->ctx, buf, buflen);
+  return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+
+  if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set)
+    return GPG_ERR_INV_STATE;
+
+  if (!mac_ctx->marks.tag)
+    {
+      _gcry_poly1305_finish(&mac_ctx->ctx, mac_ctx->tag);
+
+      memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx));
+      mac_ctx->marks.tag = 1;
+    }
+
+  if (*outlen == 0)
+    return 0;
+
+  if (*outlen <= POLY1305_TAGLEN)
+    buf_cpy (outbuf, mac_ctx->tag, *outlen);
+  else
+    {
+      buf_cpy (outbuf, mac_ctx->tag, POLY1305_TAGLEN);
+      *outlen = POLY1305_TAGLEN;
+    }
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+poly1305mac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen)
+{
+  struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx;
+  gcry_err_code_t err;
+  size_t outlen = 0;
+
+  /* Check and finalize tag. */
+  err = poly1305mac_read(h, NULL, &outlen);
+  if (err)
+    return err;
+
+  if (buflen > POLY1305_TAGLEN)
+    return GPG_ERR_INV_LENGTH;
+
+  return buf_eq_const (buf, mac_ctx->tag, buflen) ? 0 : GPG_ERR_CHECKSUM;
+}
+
+
+static unsigned int
+poly1305mac_get_maclen (int algo)
+{
+  (void)algo;
+
+  return POLY1305_TAGLEN;
+}
+
+
+static unsigned int
+poly1305mac_get_keylen (int algo)
+{
+  (void)algo;
+
+  return POLY1305_KEYLEN;
+}
+
+
+static gcry_mac_spec_ops_t poly1305mac_ops = {
+  poly1305mac_open,
+  poly1305mac_close,
+  poly1305mac_setkey,
+  poly1305mac_setiv,
+  poly1305mac_reset,
+  poly1305mac_write,
+  poly1305mac_read,
+  poly1305mac_verify,
+  poly1305mac_get_maclen,
+  poly1305mac_get_keylen,
+  NULL,
+  NULL,
+};
+
+
+const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac = {
+  GCRY_MAC_POLY1305, {0, 0}, "POLY1305",
+  &poly1305mac_ops
+};
+#if USE_AES
+const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes = {
+  GCRY_MAC_POLY1305_AES, {0, 0}, "POLY1305_AES",
+  &poly1305mac_ops
+};
+#endif
+#if USE_CAMELLIA
+const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia = {
+  GCRY_MAC_POLY1305_CAMELLIA, {0, 0}, "POLY1305_CAMELLIA",
+  &poly1305mac_ops
+};
+#endif
+#if USE_TWOFISH
+const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish = {
+  GCRY_MAC_POLY1305_TWOFISH, {0, 0}, "POLY1305_TWOFISH",
+  &poly1305mac_ops
+};
+#endif
+#if USE_SERPENT
+const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent = {
+  GCRY_MAC_POLY1305_SERPENT, {0, 0}, "POLY1305_SERPENT",
+  &poly1305mac_ops
+};
+#endif
+#if USE_SEED
+const gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed = {
+  GCRY_MAC_POLY1305_SEED, {0, 0}, "POLY1305_SEED",
+  &poly1305mac_ops
+};
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/mac.c 
b/grub-core/lib/libgcrypt/cipher/mac.c
new file mode 100644
index 000000000..ba1eb300a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/mac.c
@@ -0,0 +1,802 @@
+/* mac.c  -  message authentication code dispatcher
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "g10lib.h"
+#include "mac-internal.h"
+
+
+/* This is the list of the digest implementations included in
+   libgcrypt.  */
+static const gcry_mac_spec_t * const mac_list[] = {
+#if USE_SHA1
+  &_gcry_mac_type_spec_hmac_sha1,
+#endif
+#if USE_SHA256
+  &_gcry_mac_type_spec_hmac_sha256,
+  &_gcry_mac_type_spec_hmac_sha224,
+#endif
+#if USE_SHA512
+  &_gcry_mac_type_spec_hmac_sha512,
+  &_gcry_mac_type_spec_hmac_sha384,
+  &_gcry_mac_type_spec_hmac_sha512_256,
+  &_gcry_mac_type_spec_hmac_sha512_224,
+#endif
+#if USE_SHA3
+  &_gcry_mac_type_spec_hmac_sha3_224,
+  &_gcry_mac_type_spec_hmac_sha3_256,
+  &_gcry_mac_type_spec_hmac_sha3_384,
+  &_gcry_mac_type_spec_hmac_sha3_512,
+#endif
+#if USE_GOST_R_3411_94
+  &_gcry_mac_type_spec_hmac_gost3411_94,
+  &_gcry_mac_type_spec_hmac_gost3411_cp,
+#endif
+#if USE_GOST_R_3411_12
+  &_gcry_mac_type_spec_hmac_stribog256,
+  &_gcry_mac_type_spec_hmac_stribog512,
+#endif
+#if USE_WHIRLPOOL
+  &_gcry_mac_type_spec_hmac_whirlpool,
+#endif
+#if USE_RMD160
+  &_gcry_mac_type_spec_hmac_rmd160,
+#endif
+#if USE_TIGER
+  &_gcry_mac_type_spec_hmac_tiger1,
+#endif
+#if USE_MD5
+  &_gcry_mac_type_spec_hmac_md5,
+#endif
+#if USE_MD4
+  &_gcry_mac_type_spec_hmac_md4,
+#endif
+#if USE_BLAKE2
+  &_gcry_mac_type_spec_hmac_blake2b_512,
+  &_gcry_mac_type_spec_hmac_blake2b_384,
+  &_gcry_mac_type_spec_hmac_blake2b_256,
+  &_gcry_mac_type_spec_hmac_blake2b_160,
+  &_gcry_mac_type_spec_hmac_blake2s_256,
+  &_gcry_mac_type_spec_hmac_blake2s_224,
+  &_gcry_mac_type_spec_hmac_blake2s_160,
+  &_gcry_mac_type_spec_hmac_blake2s_128,
+#endif
+#if USE_SM3
+  &_gcry_mac_type_spec_hmac_sm3,
+#endif
+#if USE_BLOWFISH
+  &_gcry_mac_type_spec_cmac_blowfish,
+#endif
+#if USE_DES
+  &_gcry_mac_type_spec_cmac_tripledes,
+#endif
+#if USE_CAST5
+  &_gcry_mac_type_spec_cmac_cast5,
+#endif
+#if USE_AES
+  &_gcry_mac_type_spec_cmac_aes,
+  &_gcry_mac_type_spec_gmac_aes,
+  &_gcry_mac_type_spec_poly1305mac_aes,
+#endif
+#if USE_TWOFISH
+  &_gcry_mac_type_spec_cmac_twofish,
+  &_gcry_mac_type_spec_gmac_twofish,
+  &_gcry_mac_type_spec_poly1305mac_twofish,
+#endif
+#if USE_SERPENT
+  &_gcry_mac_type_spec_cmac_serpent,
+  &_gcry_mac_type_spec_gmac_serpent,
+  &_gcry_mac_type_spec_poly1305mac_serpent,
+#endif
+#if USE_RFC2268
+  &_gcry_mac_type_spec_cmac_rfc2268,
+#endif
+#if USE_SEED
+  &_gcry_mac_type_spec_cmac_seed,
+  &_gcry_mac_type_spec_gmac_seed,
+  &_gcry_mac_type_spec_poly1305mac_seed,
+#endif
+#if USE_CAMELLIA
+  &_gcry_mac_type_spec_cmac_camellia,
+  &_gcry_mac_type_spec_gmac_camellia,
+  &_gcry_mac_type_spec_poly1305mac_camellia,
+#endif
+#if USE_IDEA
+  &_gcry_mac_type_spec_cmac_idea,
+#endif
+#if USE_GOST28147
+  &_gcry_mac_type_spec_cmac_gost28147,
+  &_gcry_mac_type_spec_gost28147_imit,
+#endif
+  &_gcry_mac_type_spec_poly1305mac,
+#if USE_SM4
+  &_gcry_mac_type_spec_cmac_sm4,
+#endif
+  NULL,
+};
+
+/* HMAC implementations start with index 101 (enum gcry_mac_algos) */
+static const gcry_mac_spec_t * const mac_list_algo101[] =
+  {
+#if USE_SHA256
+    &_gcry_mac_type_spec_hmac_sha256,
+    &_gcry_mac_type_spec_hmac_sha224,
+#else
+    NULL,
+    NULL,
+#endif
+#if USE_SHA512
+    &_gcry_mac_type_spec_hmac_sha512,
+    &_gcry_mac_type_spec_hmac_sha384,
+#else
+    NULL,
+    NULL,
+#endif
+#if USE_SHA1
+    &_gcry_mac_type_spec_hmac_sha1,
+#else
+    NULL,
+#endif
+#if USE_MD5
+    &_gcry_mac_type_spec_hmac_md5,
+#else
+    NULL,
+#endif
+#if USE_MD4
+    &_gcry_mac_type_spec_hmac_md4,
+#else
+    NULL,
+#endif
+#if USE_RMD160
+    &_gcry_mac_type_spec_hmac_rmd160,
+#else
+    NULL,
+#endif
+#if USE_TIGER
+    &_gcry_mac_type_spec_hmac_tiger1,
+#else
+    NULL,
+#endif
+#if USE_WHIRLPOOL
+    &_gcry_mac_type_spec_hmac_whirlpool,
+#else
+    NULL,
+#endif
+#if USE_GOST_R_3411_94
+    &_gcry_mac_type_spec_hmac_gost3411_94,
+#else
+    NULL,
+#endif
+#if USE_GOST_R_3411_12
+    &_gcry_mac_type_spec_hmac_stribog256,
+    &_gcry_mac_type_spec_hmac_stribog512,
+#else
+    NULL,
+    NULL,
+#endif
+#if USE_MD2
+    &_gcry_mac_type_spec_hmac_md2,
+#else
+    NULL,
+#endif
+#if USE_SHA3
+    &_gcry_mac_type_spec_hmac_sha3_224,
+    &_gcry_mac_type_spec_hmac_sha3_256,
+    &_gcry_mac_type_spec_hmac_sha3_384,
+    &_gcry_mac_type_spec_hmac_sha3_512,
+#else
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+#endif
+#if USE_GOST_R_3411_94
+    &_gcry_mac_type_spec_hmac_gost3411_cp,
+#else
+    NULL,
+#endif
+#if USE_BLAKE2
+    &_gcry_mac_type_spec_hmac_blake2b_512,
+    &_gcry_mac_type_spec_hmac_blake2b_384,
+    &_gcry_mac_type_spec_hmac_blake2b_256,
+    &_gcry_mac_type_spec_hmac_blake2b_160,
+    &_gcry_mac_type_spec_hmac_blake2s_256,
+    &_gcry_mac_type_spec_hmac_blake2s_224,
+    &_gcry_mac_type_spec_hmac_blake2s_160,
+    &_gcry_mac_type_spec_hmac_blake2s_128,
+#else
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+#endif
+#if USE_SM3
+    &_gcry_mac_type_spec_hmac_sm3,
+#else
+    NULL,
+#endif
+#if USE_SHA512
+    &_gcry_mac_type_spec_hmac_sha512_256,
+    &_gcry_mac_type_spec_hmac_sha512_224,
+#else
+    NULL,
+    NULL,
+#endif
+  };
+
+/* CMAC implementations start with index 201 (enum gcry_mac_algos) */
+static const gcry_mac_spec_t * const mac_list_algo201[] =
+  {
+#if USE_AES
+    &_gcry_mac_type_spec_cmac_aes,
+#else
+    NULL,
+#endif
+#if USE_DES
+    &_gcry_mac_type_spec_cmac_tripledes,
+#else
+    NULL,
+#endif
+#if USE_CAMELLIA
+    &_gcry_mac_type_spec_cmac_camellia,
+#else
+    NULL,
+#endif
+#if USE_CAST5
+    &_gcry_mac_type_spec_cmac_cast5,
+#else
+    NULL,
+#endif
+#if USE_BLOWFISH
+    &_gcry_mac_type_spec_cmac_blowfish,
+#else
+    NULL,
+#endif
+#if USE_TWOFISH
+    &_gcry_mac_type_spec_cmac_twofish,
+#else
+    NULL,
+#endif
+#if USE_SERPENT
+    &_gcry_mac_type_spec_cmac_serpent,
+#else
+    NULL,
+#endif
+#if USE_SEED
+    &_gcry_mac_type_spec_cmac_seed,
+#else
+    NULL,
+#endif
+#if USE_RFC2268
+    &_gcry_mac_type_spec_cmac_rfc2268,
+#else
+    NULL,
+#endif
+#if USE_IDEA
+    &_gcry_mac_type_spec_cmac_idea,
+#else
+    NULL,
+#endif
+#if USE_GOST28147
+    &_gcry_mac_type_spec_cmac_gost28147,
+#else
+    NULL,
+#endif
+#if USE_SM4
+    &_gcry_mac_type_spec_cmac_sm4
+#else
+    NULL
+#endif
+  };
+
+/* GMAC implementations start with index 401 (enum gcry_mac_algos) */
+static const gcry_mac_spec_t * const mac_list_algo401[] =
+  {
+#if USE_AES
+    &_gcry_mac_type_spec_gmac_aes,
+#else
+    NULL,
+#endif
+#if USE_CAMELLIA
+    &_gcry_mac_type_spec_gmac_camellia,
+#else
+    NULL,
+#endif
+#if USE_TWOFISH
+    &_gcry_mac_type_spec_gmac_twofish,
+#else
+    NULL,
+#endif
+#if USE_SERPENT
+    &_gcry_mac_type_spec_gmac_serpent,
+#else
+    NULL,
+#endif
+#if USE_SEED
+    &_gcry_mac_type_spec_gmac_seed
+#else
+    NULL
+#endif
+  };
+
+/* Poly1305-MAC implementations start with index 501 (enum gcry_mac_algos) */
+static const gcry_mac_spec_t * const mac_list_algo501[] =
+  {
+    &_gcry_mac_type_spec_poly1305mac,
+#if USE_AES
+    &_gcry_mac_type_spec_poly1305mac_aes,
+#else
+    NULL,
+#endif
+#if USE_CAMELLIA
+    &_gcry_mac_type_spec_poly1305mac_camellia,
+#else
+    NULL,
+#endif
+#if USE_TWOFISH
+    &_gcry_mac_type_spec_poly1305mac_twofish,
+#else
+    NULL,
+#endif
+#if USE_SERPENT
+    &_gcry_mac_type_spec_poly1305mac_serpent,
+#else
+    NULL,
+#endif
+#if USE_SEED
+    &_gcry_mac_type_spec_poly1305mac_seed
+#else
+    NULL
+#endif
+  };
+
+
+
+
+/* Explicitly initialize this module.  */
+gcry_err_code_t
+_gcry_mac_init (void)
+{
+  return 0;
+}
+
+
+/* Return the spec structure for the MAC algorithm ALGO.  For an
+   unknown algorithm NULL is returned.  */
+static const gcry_mac_spec_t *
+spec_from_algo (int algo)
+{
+  const gcry_mac_spec_t *spec = NULL;
+
+  if (algo >= 101 && algo < 101 + DIM(mac_list_algo101))
+    spec = mac_list_algo101[algo - 101];
+  else if (algo >= 201 && algo < 201 + DIM(mac_list_algo201))
+    spec = mac_list_algo201[algo - 201];
+  else if (algo >= 401 && algo < 401 + DIM(mac_list_algo401))
+    spec = mac_list_algo401[algo - 401];
+  else if (algo >= 501 && algo < 501 + DIM(mac_list_algo501))
+    spec = mac_list_algo501[algo - 501];
+#if USE_GOST28147
+  else if (algo == GCRY_MAC_GOST28147_IMIT)
+    spec = &_gcry_mac_type_spec_gost28147_imit;
+#endif
+
+  if (spec)
+    gcry_assert (spec->algo == algo);
+
+  return spec;
+}
+
+
+/* Lookup a mac's spec by its name.  */
+static const gcry_mac_spec_t *
+spec_from_name (const char *name)
+{
+  const gcry_mac_spec_t *spec;
+  int idx;
+
+  for (idx = 0; (spec = mac_list[idx]); idx++)
+    if (!stricmp (name, spec->name))
+      return spec;
+
+  return NULL;
+}
+
+
+/****************
+ * Map a string to the mac algo
+ */
+int
+_gcry_mac_map_name (const char *string)
+{
+  const gcry_mac_spec_t *spec;
+
+  if (!string)
+    return 0;
+
+  /* Not found, search a matching mac name.  */
+  spec = spec_from_name (string);
+  if (spec)
+    return spec->algo;
+
+  return 0;
+}
+
+
+/****************
+ * This function simply returns the name of the algorithm or some constant
+ * string when there is no algo.  It will never return NULL.
+ * Use the macro gcry_mac_test_algo() to check whether the algorithm
+ * is valid.
+ */
+const char *
+_gcry_mac_algo_name (int algorithm)
+{
+  const gcry_mac_spec_t *spec;
+
+  spec = spec_from_algo (algorithm);
+  return spec ? spec->name : "?";
+}
+
+
+static gcry_err_code_t
+check_mac_algo (int algorithm)
+{
+  const gcry_mac_spec_t *spec;
+
+  spec = spec_from_algo (algorithm);
+  if (spec && !spec->flags.disabled && (spec->flags.fips || !fips_mode ()))
+    return 0;
+
+  return GPG_ERR_MAC_ALGO;
+}
+
+
+/****************
+ * Open a message digest handle for use with algorithm ALGO.
+ */
+static gcry_err_code_t
+mac_open (gcry_mac_hd_t * hd, int algo, int secure, gcry_ctx_t ctx)
+{
+  const gcry_mac_spec_t *spec;
+  gcry_err_code_t err;
+  gcry_mac_hd_t h;
+
+  spec = spec_from_algo (algo);
+  if (!spec)
+    return GPG_ERR_MAC_ALGO;
+  else if (spec->flags.disabled)
+    return GPG_ERR_MAC_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    return GPG_ERR_MAC_ALGO;
+  else if (!spec->ops)
+    return GPG_ERR_MAC_ALGO;
+  else if (!spec->ops->open || !spec->ops->write || !spec->ops->setkey ||
+           !spec->ops->read || !spec->ops->verify || !spec->ops->reset)
+    return GPG_ERR_MAC_ALGO;
+
+  if (secure)
+    h = xtrycalloc_secure (1, sizeof (*h));
+  else
+    h = xtrycalloc (1, sizeof (*h));
+
+  if (!h)
+    return gpg_err_code_from_syserror ();
+
+  h->magic = secure ? CTX_MAC_MAGIC_SECURE : CTX_MAC_MAGIC_NORMAL;
+  h->spec = spec;
+  h->algo = algo;
+  h->gcry_ctx = ctx;
+
+  err = h->spec->ops->open (h);
+  if (err)
+    xfree (h);
+  else
+    *hd = h;
+
+  return err;
+}
+
+
+static gcry_err_code_t
+mac_reset (gcry_mac_hd_t hd)
+{
+  if (hd->spec->ops->reset)
+    return hd->spec->ops->reset (hd);
+
+  return 0;
+}
+
+
+static void
+mac_close (gcry_mac_hd_t hd)
+{
+  if (hd->spec->ops->close)
+    hd->spec->ops->close (hd);
+
+  wipememory (hd, sizeof (*hd));
+
+  xfree (hd);
+}
+
+
+static gcry_err_code_t
+mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen)
+{
+  if (!hd->spec->ops->setkey)
+    return GPG_ERR_INV_ARG;
+  if (keylen > 0 && !key)
+    return GPG_ERR_INV_ARG;
+
+  return hd->spec->ops->setkey (hd, key, keylen);
+}
+
+
+static gcry_err_code_t
+mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen)
+{
+  if (!hd->spec->ops->setiv)
+    return GPG_ERR_INV_ARG;
+  if (ivlen > 0 && !iv)
+    return GPG_ERR_INV_ARG;
+
+  return hd->spec->ops->setiv (hd, iv, ivlen);
+}
+
+
+static gcry_err_code_t
+mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen)
+{
+  if (!hd->spec->ops->write)
+    return GPG_ERR_INV_ARG;
+  if (inlen > 0 && !inbuf)
+    return GPG_ERR_INV_ARG;
+
+  return hd->spec->ops->write (hd, inbuf, inlen);
+}
+
+
+static gcry_err_code_t
+mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen)
+{
+  if (!outbuf || !outlen || *outlen == 0 || !hd->spec->ops->read)
+    return GPG_ERR_INV_ARG;
+
+  return hd->spec->ops->read (hd, outbuf, outlen);
+}
+
+
+static gcry_err_code_t
+mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen)
+{
+  if (!buf || buflen == 0 || !hd->spec->ops->verify)
+    return GPG_ERR_INV_ARG;
+
+  return hd->spec->ops->verify (hd, buf, buflen);
+}
+
+
+/* Create a MAC object for algorithm ALGO.  FLAGS may be
+   given as an bitwise OR of the gcry_mac_flags values.
+   H is guaranteed to be a valid handle or NULL on error.  */
+gpg_err_code_t
+_gcry_mac_open (gcry_mac_hd_t * h, int algo, unsigned int flags,
+                gcry_ctx_t ctx)
+{
+  gcry_err_code_t rc;
+  gcry_mac_hd_t hd = NULL;
+
+  if ((flags & ~GCRY_MAC_FLAG_SECURE))
+    rc = GPG_ERR_INV_ARG;
+  else
+    rc = mac_open (&hd, algo, !!(flags & GCRY_MAC_FLAG_SECURE), ctx);
+
+  *h = rc ? NULL : hd;
+  return rc;
+}
+
+
+void
+_gcry_mac_close (gcry_mac_hd_t hd)
+{
+  if (hd)
+    mac_close (hd);
+}
+
+
+gcry_err_code_t
+_gcry_mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen)
+{
+  return mac_setkey (hd, key, keylen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen)
+{
+  return mac_setiv (hd, iv, ivlen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen)
+{
+  return mac_write (hd, inbuf, inlen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen)
+{
+  return mac_read (hd, outbuf, outlen);
+}
+
+
+gcry_err_code_t
+_gcry_mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen)
+{
+  return mac_verify (hd, buf, buflen);
+}
+
+
+int
+_gcry_mac_get_algo (gcry_mac_hd_t hd)
+{
+  return hd->algo;
+}
+
+
+unsigned int
+_gcry_mac_get_algo_maclen (int algo)
+{
+  const gcry_mac_spec_t *spec;
+
+  spec = spec_from_algo (algo);
+  if (!spec || !spec->ops || !spec->ops->get_maclen)
+    return 0;
+
+  return spec->ops->get_maclen (algo);
+}
+
+
+unsigned int
+_gcry_mac_get_algo_keylen (int algo)
+{
+  const gcry_mac_spec_t *spec;
+
+  spec = spec_from_algo (algo);
+  if (!spec || !spec->ops || !spec->ops->get_keylen)
+    return 0;
+
+  return spec->ops->get_keylen (algo);
+}
+
+
+gcry_err_code_t
+_gcry_mac_ctl (gcry_mac_hd_t hd, int cmd, void *buffer, size_t buflen)
+{
+  gcry_err_code_t rc;
+
+  /* Currently not used.  */
+  (void) hd;
+  (void) buffer;
+  (void) buflen;
+
+  switch (cmd)
+    {
+    case GCRYCTL_RESET:
+      rc = mac_reset (hd);
+      break;
+    case GCRYCTL_SET_SBOX:
+      if (hd->spec->ops->set_extra_info)
+        rc = hd->spec->ops->set_extra_info
+          (hd, GCRYCTL_SET_SBOX, buffer, buflen);
+      else
+        rc = GPG_ERR_NOT_SUPPORTED;
+      break;
+    default:
+      rc = GPG_ERR_INV_OP;
+    }
+  return rc;
+}
+
+
+/* Return information about the given MAC algorithm ALGO.
+
+    GCRYCTL_TEST_ALGO:
+        Returns 0 if the specified algorithm ALGO is available for use.
+        BUFFER and NBYTES must be zero.
+
+   Note: Because this function is in most cases used to return an
+   integer value, we can make it easier for the caller to just look at
+   the return value.  The caller will in all cases consult the value
+   and thereby detecting whether a error occurred or not (i.e. while
+   checking the block size)
+ */
+gcry_err_code_t
+_gcry_mac_algo_info (int algo, int what, void *buffer, size_t * nbytes)
+{
+  gcry_err_code_t rc = 0;
+  unsigned int ui;
+
+  switch (what)
+    {
+    case GCRYCTL_GET_KEYLEN:
+      if (buffer || (!nbytes))
+        rc = GPG_ERR_INV_ARG;
+      else
+        {
+          ui = _gcry_mac_get_algo_keylen (algo);
+          if (ui > 0)
+            *nbytes = (size_t) ui;
+          else
+            /* The only reason for an error is an invalid algo.  */
+            rc = GPG_ERR_MAC_ALGO;
+        }
+      break;
+    case GCRYCTL_TEST_ALGO:
+      if (buffer || nbytes)
+        rc = GPG_ERR_INV_ARG;
+      else
+        rc = check_mac_algo (algo);
+      break;
+
+    default:
+      rc = GPG_ERR_INV_OP;
+    }
+
+  return rc;
+}
+
+
+/* Run the self-tests for the MAC.  */
+gpg_error_t
+_gcry_mac_selftest (int algo, int extended, selftest_report_func_t report)
+{
+  gcry_err_code_t ec;
+  const gcry_mac_spec_t *spec;
+
+  spec = spec_from_algo (algo);
+  if (spec && !spec->flags.disabled
+      && (spec->flags.fips || !fips_mode ())
+      && spec->ops && spec->ops->selftest)
+    ec = spec->ops->selftest (algo, extended, report);
+  else
+    {
+      ec = GPG_ERR_MAC_ALGO;
+      if (report)
+        report ("mac", algo, "module",
+                spec && !spec->flags.disabled
+                && (spec->flags.fips || !fips_mode ())?
+                "no selftest available" :
+                spec? "algorithm disabled" :
+                "algorithm not found");
+    }
+
+  return gpg_error (ec);
+}
diff --git a/grub-core/lib/libgcrypt/cipher/md.c 
b/grub-core/lib/libgcrypt/cipher/md.c
index c3b3a4f3a..34336b5cc 100644
--- a/grub-core/lib/libgcrypt/cipher/md.c
+++ b/grub-core/lib/libgcrypt/cipher/md.c
@@ -1,6 +1,7 @@
 /* md.c  -  message digest dispatcher
  * Copyright (C) 1998, 1999, 2002, 2003, 2006,
  *               2008 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2014 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -26,367 +27,389 @@
 
 #include "g10lib.h"
 #include "cipher.h"
-#include "ath.h"
-
-#include "rmd.h"
-
-/* A dummy extraspec so that we do not need to tests the extraspec
-   field from the module specification against NULL and instead
-   directly test the respective fields of extraspecs.  */
-static md_extra_spec_t dummy_extra_spec;
 
 
 /* This is the list of the digest implementations included in
    libgcrypt.  */
-static struct digest_table_entry
-{
-  gcry_md_spec_t *digest;
-  md_extra_spec_t *extraspec;
-  unsigned int algorithm;
-  int fips_allowed;
-} digest_table[] =
+static const gcry_md_spec_t * const digest_list[] =
   {
 #if USE_CRC
-    /* We allow the CRC algorithms even in FIPS mode because they are
-       actually no cryptographic primitives.  */
-    { &_gcry_digest_spec_crc32,
-      &dummy_extra_spec,                 GCRY_MD_CRC32, 1 },
-    { &_gcry_digest_spec_crc32_rfc1510,
-      &dummy_extra_spec,                 GCRY_MD_CRC32_RFC1510, 1 },
-    { &_gcry_digest_spec_crc24_rfc2440,
-      &dummy_extra_spec,                 GCRY_MD_CRC24_RFC2440, 1 },
+     &_gcry_digest_spec_crc32,
+     &_gcry_digest_spec_crc32_rfc1510,
+     &_gcry_digest_spec_crc24_rfc2440,
+#endif
+#if USE_SHA1
+     &_gcry_digest_spec_sha1,
+#endif
+#if USE_SHA256
+     &_gcry_digest_spec_sha256,
+     &_gcry_digest_spec_sha224,
+#endif
+#if USE_SHA512
+     &_gcry_digest_spec_sha512,
+     &_gcry_digest_spec_sha384,
+     &_gcry_digest_spec_sha512_256,
+     &_gcry_digest_spec_sha512_224,
+#endif
+#if USE_SHA3
+     &_gcry_digest_spec_sha3_224,
+     &_gcry_digest_spec_sha3_256,
+     &_gcry_digest_spec_sha3_384,
+     &_gcry_digest_spec_sha3_512,
+     &_gcry_digest_spec_shake128,
+     &_gcry_digest_spec_shake256,
+#endif
+#if USE_GOST_R_3411_94
+     &_gcry_digest_spec_gost3411_94,
+     &_gcry_digest_spec_gost3411_cp,
+#endif
+#if USE_GOST_R_3411_12
+     &_gcry_digest_spec_stribog_256,
+     &_gcry_digest_spec_stribog_512,
+#endif
+#if USE_WHIRLPOOL
+     &_gcry_digest_spec_whirlpool,
+#endif
+#if USE_RMD160
+     &_gcry_digest_spec_rmd160,
+#endif
+#if USE_TIGER
+     &_gcry_digest_spec_tiger,
+     &_gcry_digest_spec_tiger1,
+     &_gcry_digest_spec_tiger2,
+#endif
+#if USE_MD5
+     &_gcry_digest_spec_md5,
 #endif
 #if USE_MD4
-    { &_gcry_digest_spec_md4,
-      &dummy_extra_spec,                 GCRY_MD_MD4 },
+     &_gcry_digest_spec_md4,
+#endif
+#if USE_MD2
+     &_gcry_digest_spec_md2,
 #endif
+#if USE_BLAKE2
+     &_gcry_digest_spec_blake2b_512,
+     &_gcry_digest_spec_blake2b_384,
+     &_gcry_digest_spec_blake2b_256,
+     &_gcry_digest_spec_blake2b_160,
+     &_gcry_digest_spec_blake2s_256,
+     &_gcry_digest_spec_blake2s_224,
+     &_gcry_digest_spec_blake2s_160,
+     &_gcry_digest_spec_blake2s_128,
+#endif
+#if USE_SM3
+     &_gcry_digest_spec_sm3,
+#endif
+     NULL
+  };
+
+/* Digest implementations starting with index 0 (enum gcry_md_algos) */
+static const gcry_md_spec_t * const digest_list_algo0[] =
+  {
+    NULL, /* GCRY_MD_NONE */
 #if USE_MD5
-    { &_gcry_digest_spec_md5,
-      &dummy_extra_spec,                 GCRY_MD_MD5, 1 },
+    &_gcry_digest_spec_md5,
+#else
+    NULL,
+#endif
+#if USE_SHA1
+    &_gcry_digest_spec_sha1,
+#else
+    NULL,
 #endif
 #if USE_RMD160
-    { &_gcry_digest_spec_rmd160,
-      &dummy_extra_spec,                 GCRY_MD_RMD160 },
+    &_gcry_digest_spec_rmd160,
+#else
+    NULL,
 #endif
-#if USE_SHA1
-    { &_gcry_digest_spec_sha1,
-      &_gcry_digest_extraspec_sha1,      GCRY_MD_SHA1, 1 },
+    NULL, /* Unused index 4 */
+#if USE_MD2
+    &_gcry_digest_spec_md2,
+#else
+    NULL,
+#endif
+#if USE_TIGER
+    &_gcry_digest_spec_tiger,
+#else
+    NULL,
 #endif
+    NULL, /* GCRY_MD_HAVAL */
 #if USE_SHA256
-    { &_gcry_digest_spec_sha256,
-      &_gcry_digest_extraspec_sha256,    GCRY_MD_SHA256, 1 },
-    { &_gcry_digest_spec_sha224,
-      &_gcry_digest_extraspec_sha224,    GCRY_MD_SHA224, 1 },
+    &_gcry_digest_spec_sha256,
+#else
+    NULL,
 #endif
 #if USE_SHA512
-    { &_gcry_digest_spec_sha512,
-      &_gcry_digest_extraspec_sha512,    GCRY_MD_SHA512, 1 },
-    { &_gcry_digest_spec_sha384,
-      &_gcry_digest_extraspec_sha384,    GCRY_MD_SHA384, 1 },
+    &_gcry_digest_spec_sha384,
+    &_gcry_digest_spec_sha512,
+#else
+    NULL,
+    NULL,
 #endif
-#if USE_TIGER
-    { &_gcry_digest_spec_tiger,
-      &dummy_extra_spec,                 GCRY_MD_TIGER },
-    { &_gcry_digest_spec_tiger1,
-      &dummy_extra_spec,                 GCRY_MD_TIGER1 },
-    { &_gcry_digest_spec_tiger2,
-      &dummy_extra_spec,                 GCRY_MD_TIGER2 },
+#if USE_SHA256
+    &_gcry_digest_spec_sha224
+#else
+    NULL
+#endif
+  };
+
+/* Digest implementations starting with index 301 (enum gcry_md_algos) */
+static const gcry_md_spec_t * const digest_list_algo301[] =
+  {
+#if USE_MD4
+    &_gcry_digest_spec_md4,
+#else
+    NULL,
+#endif
+#if USE_CRC
+    &_gcry_digest_spec_crc32,
+    &_gcry_digest_spec_crc32_rfc1510,
+    &_gcry_digest_spec_crc24_rfc2440,
+#else
+    NULL,
+    NULL,
+    NULL,
 #endif
 #if USE_WHIRLPOOL
-    { &_gcry_digest_spec_whirlpool,
-      &dummy_extra_spec,                 GCRY_MD_WHIRLPOOL },
+    &_gcry_digest_spec_whirlpool,
+#else
+    NULL,
+#endif
+#if USE_TIGER
+    &_gcry_digest_spec_tiger1,
+    &_gcry_digest_spec_tiger2,
+#else
+    NULL,
+    NULL,
+#endif
+#if USE_GOST_R_3411_94
+    &_gcry_digest_spec_gost3411_94,
+#else
+    NULL,
+#endif
+#if USE_GOST_R_3411_12
+    &_gcry_digest_spec_stribog_256,
+    &_gcry_digest_spec_stribog_512,
+#else
+    NULL,
+    NULL,
+#endif
+#if USE_GOST_R_3411_94
+    &_gcry_digest_spec_gost3411_cp,
+#else
+    NULL,
+#endif
+#if USE_SHA3
+    &_gcry_digest_spec_sha3_224,
+    &_gcry_digest_spec_sha3_256,
+    &_gcry_digest_spec_sha3_384,
+    &_gcry_digest_spec_sha3_512,
+    &_gcry_digest_spec_shake128,
+    &_gcry_digest_spec_shake256,
+#else
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+#endif
+#if USE_BLAKE2
+    &_gcry_digest_spec_blake2b_512,
+    &_gcry_digest_spec_blake2b_384,
+    &_gcry_digest_spec_blake2b_256,
+    &_gcry_digest_spec_blake2b_160,
+    &_gcry_digest_spec_blake2s_256,
+    &_gcry_digest_spec_blake2s_224,
+    &_gcry_digest_spec_blake2s_160,
+    &_gcry_digest_spec_blake2s_128,
+#else
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+#endif
+#if USE_SM3
+    &_gcry_digest_spec_sm3,
+#else
+    NULL,
+#endif
+#if USE_SHA512
+    &_gcry_digest_spec_sha512_256,
+    &_gcry_digest_spec_sha512_224,
+#else
+    NULL,
+    NULL,
 #endif
-    { NULL },
   };
 
-/* List of registered digests.  */
-static gcry_module_t digests_registered;
-
-/* This is the lock protecting DIGESTS_REGISTERED.  */
-static ath_mutex_t digests_registered_lock = ATH_MUTEX_INITIALIZER;
-
-/* Flag to check whether the default ciphers have already been
-   registered.  */
-static int default_digests_registered;
 
 typedef struct gcry_md_list
 {
-  gcry_md_spec_t *digest;
-  gcry_module_t module;
+  const gcry_md_spec_t *spec;
   struct gcry_md_list *next;
   size_t actual_struct_size;     /* Allocated size of this structure. */
-  PROPERLY_ALIGNED_TYPE context;
+  PROPERLY_ALIGNED_TYPE context[1];
 } GcryDigestEntry;
 
-/* this structure is put right after the gcry_md_hd_t buffer, so that
+/* This structure is put right after the gcry_md_hd_t buffer, so that
  * only one memory block is needed. */
 struct gcry_md_context
 {
   int  magic;
+  struct {
+    unsigned int secure:1;
+    unsigned int finalized:1;
+    unsigned int bugemu1:1;
+    unsigned int hmac:1;
+  } flags;
   size_t actual_handle_size;     /* Allocated size of this handle. */
-  int  secure;
   FILE  *debug;
-  int finalized;
   GcryDigestEntry *list;
-  byte *macpads;
-  int macpads_Bsize;             /* Blocksize as used for the HMAC pads. */
 };
 
 
 #define CTX_MAGIC_NORMAL 0x11071961
 #define CTX_MAGIC_SECURE 0x16917011
 
-/* Convenient macro for registering the default digests.  */
-#define REGISTER_DEFAULT_DIGESTS                   \
-  do                                               \
-    {                                              \
-      ath_mutex_lock (&digests_registered_lock);   \
-      if (! default_digests_registered)            \
-        {                                          \
-          md_register_default ();                  \
-          default_digests_registered = 1;          \
-        }                                          \
-      ath_mutex_unlock (&digests_registered_lock); \
-    }                                              \
-  while (0)
-
-
-static const char * digest_algo_to_string( int algo );
-static gcry_err_code_t check_digest_algo (int algo);
-static gcry_err_code_t md_open (gcry_md_hd_t *h, int algo,
-                                int secure, int hmac);
 static gcry_err_code_t md_enable (gcry_md_hd_t hd, int algo);
-static gcry_err_code_t md_copy (gcry_md_hd_t a, gcry_md_hd_t *b);
 static void md_close (gcry_md_hd_t a);
 static void md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen);
-static void md_final(gcry_md_hd_t a);
 static byte *md_read( gcry_md_hd_t a, int algo );
 static int md_get_algo( gcry_md_hd_t a );
 static int md_digest_length( int algo );
-static const byte *md_asn_oid( int algo, size_t *asnlen, size_t *mdlen );
 static void md_start_debug ( gcry_md_hd_t a, const char *suffix );
 static void md_stop_debug ( gcry_md_hd_t a );
 
 
+
+static int
+map_algo (int algo)
+{
+  return algo;
+}
 
 
-/* Internal function.  Register all the ciphers included in
-   CIPHER_TABLE.  Returns zero on success or an error code.  */
-static void
-md_register_default (void)
+/* Return the spec structure for the hash algorithm ALGO.  For an
+   unknown algorithm NULL is returned.  */
+static const gcry_md_spec_t *
+spec_from_algo (int algo)
 {
-  gcry_err_code_t err = 0;
-  int i;
+  const gcry_md_spec_t *spec = NULL;
 
-  for (i = 0; !err && digest_table[i].digest; i++)
-    {
-      if ( fips_mode ())
-        {
-          if (!digest_table[i].fips_allowed)
-            continue;
-          if (digest_table[i].algorithm == GCRY_MD_MD5
-              && _gcry_enforced_fips_mode () )
-            continue;  /* Do not register in enforced fips mode.  */
-        }
+  algo = map_algo (algo);
 
-      err = _gcry_module_add (&digests_registered,
-                              digest_table[i].algorithm,
-                              (void *) digest_table[i].digest,
-                              (void *) digest_table[i].extraspec,
-                              NULL);
-    }
+  if (algo >= 0 && algo < DIM(digest_list_algo0))
+    spec = digest_list_algo0[algo];
+  else if (algo >= 301 && algo < 301 + DIM(digest_list_algo301))
+    spec = digest_list_algo301[algo - 301];
 
-  if (err)
-    BUG ();
-}
+  if (spec)
+    gcry_assert (spec->algo == algo);
 
-/* Internal callback function.  */
-static int
-gcry_md_lookup_func_name (void *spec, void *data)
-{
-  gcry_md_spec_t *digest = (gcry_md_spec_t *) spec;
-  char *name = (char *) data;
-
-  return (! stricmp (digest->name, name));
+  return spec;
 }
 
-/* Internal callback function.  Used via _gcry_module_lookup.  */
-static int
-gcry_md_lookup_func_oid (void *spec, void *data)
+
+/* Lookup a hash's spec by its name.  */
+static const gcry_md_spec_t *
+spec_from_name (const char *name)
 {
-  gcry_md_spec_t *digest = (gcry_md_spec_t *) spec;
-  char *oid = (char *) data;
-  gcry_md_oid_spec_t *oid_specs = digest->oids;
-  int ret = 0, i;
+  const gcry_md_spec_t *spec;
+  int idx;
 
-  if (oid_specs)
+  for (idx=0; (spec = digest_list[idx]); idx++)
     {
-      for (i = 0; oid_specs[i].oidstring && (! ret); i++)
-        if (! stricmp (oid, oid_specs[i].oidstring))
-          ret = 1;
+      if (!stricmp (name, spec->name))
+        return spec;
     }
 
-  return ret;
-}
-
-/* Internal function.  Lookup a digest entry by it's name.  */
-static gcry_module_t
-gcry_md_lookup_name (const char *name)
-{
-  gcry_module_t digest;
-
-  digest = _gcry_module_lookup (digests_registered, (void *) name,
-                               gcry_md_lookup_func_name);
-
-  return digest;
+  return NULL;
 }
 
-/* Internal function.  Lookup a cipher entry by it's oid.  */
-static gcry_module_t
-gcry_md_lookup_oid (const char *oid)
-{
-  gcry_module_t digest;
-
-  digest = _gcry_module_lookup (digests_registered, (void *) oid,
-                               gcry_md_lookup_func_oid);
 
-  return digest;
-}
-
-/* Register a new digest module whose specification can be found in
-   DIGEST.  On success, a new algorithm ID is stored in ALGORITHM_ID
-   and a pointer representhing this module is stored in MODULE.  */
-gcry_error_t
-_gcry_md_register (gcry_md_spec_t *digest,
-                   md_extra_spec_t *extraspec,
-                   unsigned int *algorithm_id,
-                   gcry_module_t *module)
+/* Lookup a hash's spec by its OID.  */
+static const gcry_md_spec_t *
+spec_from_oid (const char *oid)
 {
-  gcry_err_code_t err = 0;
-  gcry_module_t mod;
-
-  /* We do not support module loading in fips mode.  */
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  ath_mutex_lock (&digests_registered_lock);
-  err = _gcry_module_add (&digests_registered, 0,
-                         (void *) digest,
-                         (void *)(extraspec? extraspec : &dummy_extra_spec),
-                          &mod);
-  ath_mutex_unlock (&digests_registered_lock);
+  const gcry_md_spec_t *spec;
+  const gcry_md_oid_spec_t *oid_specs;
+  int idx, j;
 
-  if (! err)
+  for (idx=0; (spec = digest_list[idx]); idx++)
     {
-      *module = mod;
-      *algorithm_id = mod->mod_id;
+      oid_specs = spec->oids;
+      if (oid_specs)
+        {
+          for (j = 0; oid_specs[j].oidstring; j++)
+            if (!stricmp (oid, oid_specs[j].oidstring))
+              return spec;
+        }
     }
 
-  return gcry_error (err);
-}
-
-/* Unregister the digest identified by ID, which must have been
-   registered with gcry_digest_register.  */
-void
-gcry_md_unregister (gcry_module_t module)
-{
-  ath_mutex_lock (&digests_registered_lock);
-  _gcry_module_release (module);
-  ath_mutex_unlock (&digests_registered_lock);
+  return NULL;
 }
 
 
-static int
-search_oid (const char *oid, int *algorithm, gcry_md_oid_spec_t *oid_spec)
+static const gcry_md_spec_t *
+search_oid (const char *oid, gcry_md_oid_spec_t *oid_spec)
 {
-  gcry_module_t module;
-  int ret = 0;
+  const gcry_md_spec_t *spec;
+  int i;
+
+  if (!oid)
+    return NULL;
 
-  if (oid && ((! strncmp (oid, "oid.", 4))
-             || (! strncmp (oid, "OID.", 4))))
+  if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4))
     oid += 4;
 
-  module = gcry_md_lookup_oid (oid);
-  if (module)
+  spec = spec_from_oid (oid);
+  if (spec && spec->oids)
     {
-      gcry_md_spec_t *digest = module->spec;
-      int i;
-
-      for (i = 0; digest->oids[i].oidstring && !ret; i++)
-       if (! stricmp (oid, digest->oids[i].oidstring))
+      for (i = 0; spec->oids[i].oidstring; i++)
+       if (!stricmp (oid, spec->oids[i].oidstring))
          {
-           if (algorithm)
-             *algorithm = module->mod_id;
            if (oid_spec)
-             *oid_spec = digest->oids[i];
-           ret = 1;
+             *oid_spec = spec->oids[i];
+           return spec;
          }
-      _gcry_module_release (module);
     }
 
-  return ret;
+  return NULL;
 }
 
+
 /****************
  * Map a string to the digest algo
  */
 int
-gcry_md_map_name (const char *string)
+_gcry_md_map_name (const char *string)
 {
-  gcry_module_t digest;
-  int ret, algorithm = 0;
+  const gcry_md_spec_t *spec;
 
-  if (! string)
+  if (!string)
     return 0;
 
-  REGISTER_DEFAULT_DIGESTS;
-
   /* If the string starts with a digit (optionally prefixed with
      either "OID." or "oid."), we first look into our table of ASN.1
      object identifiers to figure out the algorithm */
+  spec = search_oid (string, NULL);
+  if (spec)
+    return spec->algo;
 
-  ath_mutex_lock (&digests_registered_lock);
-
-  ret = search_oid (string, &algorithm, NULL);
-  if (! ret)
-    {
-      /* Not found, search a matching digest name.  */
-      digest = gcry_md_lookup_name (string);
-      if (digest)
-       {
-         algorithm = digest->mod_id;
-         _gcry_module_release (digest);
-       }
-    }
-  ath_mutex_unlock (&digests_registered_lock);
+  /* Not found, search a matching digest name.  */
+  spec = spec_from_name (string);
+  if (spec)
+    return spec->algo;
 
-  return algorithm;
+  return 0;
 }
 
 
-/****************
- * Map a digest algo to a string
- */
-static const char *
-digest_algo_to_string (int algorithm)
-{
-  const char *name = NULL;
-  gcry_module_t digest;
-
-  REGISTER_DEFAULT_DIGESTS;
-
-  ath_mutex_lock (&digests_registered_lock);
-  digest = _gcry_module_lookup_id (digests_registered, algorithm);
-  if (digest)
-    {
-      name = ((gcry_md_spec_t *) digest->spec)->name;
-      _gcry_module_release (digest);
-    }
-  ath_mutex_unlock (&digests_registered_lock);
-
-  return name;
-}
-
 /****************
  * This function simply returns the name of the algorithm or some constant
  * string when there is no algo.  It will never return NULL.
@@ -394,45 +417,41 @@ digest_algo_to_string (int algorithm)
  * is valid.
  */
 const char *
-gcry_md_algo_name (int algorithm)
+_gcry_md_algo_name (int algorithm)
 {
-  const char *s = digest_algo_to_string (algorithm);
-  return s ? s : "?";
+  const gcry_md_spec_t *spec;
+
+  spec = spec_from_algo (algorithm);
+  return spec ? spec->name : "?";
 }
 
 
 static gcry_err_code_t
 check_digest_algo (int algorithm)
 {
-  gcry_err_code_t rc = 0;
-  gcry_module_t digest;
+  const gcry_md_spec_t *spec;
 
-  REGISTER_DEFAULT_DIGESTS;
+  spec = spec_from_algo (algorithm);
+  if (spec && !spec->flags.disabled && (spec->flags.fips || !fips_mode ()))
+    return 0;
 
-  ath_mutex_lock (&digests_registered_lock);
-  digest = _gcry_module_lookup_id (digests_registered, algorithm);
-  if (digest)
-    _gcry_module_release (digest);
-  else
-    rc = GPG_ERR_DIGEST_ALGO;
-  ath_mutex_unlock (&digests_registered_lock);
+  return GPG_ERR_DIGEST_ALGO;
 
-  return rc;
 }
 
 
-
 /****************
  * Open a message digest handle for use with algorithm ALGO.
  * More algorithms may be added by md_enable(). The initial algorithm
  * may be 0.
  */
 static gcry_err_code_t
-md_open (gcry_md_hd_t *h, int algo, int secure, int hmac)
+md_open (gcry_md_hd_t *h, int algo, unsigned int flags)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t err = 0;
+  int secure = !!(flags & GCRY_MD_FLAG_SECURE);
+  int hmac =   !!(flags & GCRY_MD_FLAG_HMAC);
   int bufsize = secure ? 512 : 1024;
-  struct gcry_md_context *ctx;
   gcry_md_hd_t hd;
   size_t n;
 
@@ -450,51 +469,36 @@ md_open (gcry_md_hd_t *h, int algo, int secure, int hmac)
    *
    * We have to make sure that private is well aligned.
    */
-  n = sizeof (struct gcry_md_handle) + bufsize;
+  n = offsetof (struct gcry_md_handle, buf) + bufsize;
   n = ((n + sizeof (PROPERLY_ALIGNED_TYPE) - 1)
        / sizeof (PROPERLY_ALIGNED_TYPE)) * sizeof (PROPERLY_ALIGNED_TYPE);
 
   /* Allocate and set the Context pointer to the private data */
   if (secure)
-    hd = gcry_malloc_secure (n + sizeof (struct gcry_md_context));
+    hd = xtrymalloc_secure (n + sizeof (struct gcry_md_context));
   else
-    hd = gcry_malloc (n + sizeof (struct gcry_md_context));
+    hd = xtrymalloc (n + sizeof (struct gcry_md_context));
 
   if (! hd)
     err = gpg_err_code_from_errno (errno);
 
   if (! err)
     {
-      hd->ctx = ctx = (struct gcry_md_context *) ((char *) hd + n);
+      struct gcry_md_context *ctx;
+
+      ctx = (void *) (hd->buf - offsetof (struct gcry_md_handle, buf) + n);
       /* Setup the globally visible data (bctl in the diagram).*/
-      hd->bufsize = n - sizeof (struct gcry_md_handle) + 1;
+      hd->ctx = ctx;
+      hd->bufsize = n - offsetof (struct gcry_md_handle, buf);
       hd->bufpos = 0;
 
       /* Initialize the private data. */
-      memset (hd->ctx, 0, sizeof *hd->ctx);
+      wipememory2 (ctx, 0, sizeof *ctx);
       ctx->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL;
       ctx->actual_handle_size = n + sizeof (struct gcry_md_context);
-      ctx->secure = secure;
-
-      if (hmac)
-       {
-         switch (algo)
-            {
-              case GCRY_MD_SHA384:
-              case GCRY_MD_SHA512:
-                ctx->macpads_Bsize = 128;
-                break;
-              default:
-                ctx->macpads_Bsize = 64;
-                break;
-            }
-          ctx->macpads = gcry_malloc_secure (2*(ctx->macpads_Bsize));
-         if (!ctx->macpads)
-           {
-             err = gpg_err_code_from_errno (errno);
-             md_close (hd);
-           }
-       }
+      ctx->flags.secure = secure;
+      ctx->flags.hmac = hmac;
+      ctx->flags.bugemu1 = !!(flags & GCRY_MD_FLAG_BUGEMU1);
     }
 
   if (! err)
@@ -521,22 +525,21 @@ md_open (gcry_md_hd_t *h, int algo, int secure, int hmac)
    given as 0 if the algorithms to be used are later set using
    gcry_md_enable. H is guaranteed to be a valid handle or NULL on
    error.  */
-gcry_error_t
-gcry_md_open (gcry_md_hd_t *h, int algo, unsigned int flags)
+gcry_err_code_t
+_gcry_md_open (gcry_md_hd_t *h, int algo, unsigned int flags)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc;
   gcry_md_hd_t hd;
 
-  if ((flags & ~(GCRY_MD_FLAG_SECURE | GCRY_MD_FLAG_HMAC)))
-    err = GPG_ERR_INV_ARG;
+  if ((flags & ~(GCRY_MD_FLAG_SECURE
+                 | GCRY_MD_FLAG_HMAC
+                 | GCRY_MD_FLAG_BUGEMU1)))
+    rc = GPG_ERR_INV_ARG;
   else
-    {
-      err = md_open (&hd, algo, (flags & GCRY_MD_FLAG_SECURE),
-                    (flags & GCRY_MD_FLAG_HMAC));
-    }
+    rc = md_open (&hd, algo, flags);
 
-  *h = err? NULL : hd;
-  return gcry_error (err);
+  *h = rc? NULL : hd;
+  return rc;
 }
 
 
@@ -545,74 +548,58 @@ static gcry_err_code_t
 md_enable (gcry_md_hd_t hd, int algorithm)
 {
   struct gcry_md_context *h = hd->ctx;
-  gcry_md_spec_t *digest = NULL;
+  const gcry_md_spec_t *spec;
   GcryDigestEntry *entry;
-  gcry_module_t module;
   gcry_err_code_t err = 0;
 
   for (entry = h->list; entry; entry = entry->next)
-    if (entry->module->mod_id == algorithm)
-      return err; /* already enabled */
-
-  REGISTER_DEFAULT_DIGESTS;
+    if (entry->spec->algo == algorithm)
+      return 0; /* Already enabled */
 
-  ath_mutex_lock (&digests_registered_lock);
-  module = _gcry_module_lookup_id (digests_registered, algorithm);
-  ath_mutex_unlock (&digests_registered_lock);
-  if (! module)
+  spec = spec_from_algo (algorithm);
+  if (!spec)
     {
       log_debug ("md_enable: algorithm %d not available\n", algorithm);
       err = GPG_ERR_DIGEST_ALGO;
     }
- else
-    digest = (gcry_md_spec_t *) module->spec;
 
+  if (!err && spec->flags.disabled)
+    err = GPG_ERR_DIGEST_ALGO;
+
+  /* Any non-FIPS algorithm should go this way */
+  if (!err && !spec->flags.fips && fips_mode ())
+    err = GPG_ERR_DIGEST_ALGO;
 
-  if (!err && algorithm == GCRY_MD_MD5 && fips_mode ())
+  if (!err && h->flags.hmac && spec->read == NULL)
     {
-      _gcry_inactivate_fips_mode ("MD5 used");
-      if (_gcry_enforced_fips_mode () )
-        {
-          /* We should never get to here because we do not register
-             MD5 in enforced fips mode. But better throw an error.  */
-          err = GPG_ERR_DIGEST_ALGO;
-        }
+      /* Expandable output function cannot act as part of HMAC. */
+      err = GPG_ERR_DIGEST_ALGO;
     }
 
   if (!err)
     {
       size_t size = (sizeof (*entry)
-                     + digest->contextsize
+                     + spec->contextsize * (h->flags.hmac? 3 : 1)
                      - sizeof (entry->context));
 
       /* And allocate a new list entry. */
-      if (h->secure)
-       entry = gcry_malloc_secure (size);
+      if (h->flags.secure)
+       entry = xtrymalloc_secure (size);
       else
-       entry = gcry_malloc (size);
+       entry = xtrymalloc (size);
 
       if (! entry)
        err = gpg_err_code_from_errno (errno);
       else
        {
-         entry->digest = digest;
-         entry->module = module;
+         entry->spec = spec;
          entry->next = h->list;
           entry->actual_struct_size = size;
          h->list = entry;
 
          /* And init this instance. */
-         entry->digest->init (&entry->context.c);
-       }
-    }
-
-  if (err)
-    {
-      if (module)
-       {
-          ath_mutex_lock (&digests_registered_lock);
-          _gcry_module_release (module);
-          ath_mutex_unlock (&digests_registered_lock);
+         entry->spec->init (entry->context,
+                             h->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
        }
     }
 
@@ -620,16 +607,17 @@ md_enable (gcry_md_hd_t hd, int algorithm)
 }
 
 
-gcry_error_t
-gcry_md_enable (gcry_md_hd_t hd, int algorithm)
+gcry_err_code_t
+_gcry_md_enable (gcry_md_hd_t hd, int algorithm)
 {
-  return gcry_error (md_enable (hd, algorithm));
+  return md_enable (hd, algorithm);
 }
 
+
 static gcry_err_code_t
 md_copy (gcry_md_hd_t ahd, gcry_md_hd_t *b_hd)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t err = 0;
   struct gcry_md_context *a = ahd->ctx;
   struct gcry_md_context *b;
   GcryDigestEntry *ar, *br;
@@ -640,113 +628,98 @@ md_copy (gcry_md_hd_t ahd, gcry_md_hd_t *b_hd)
     md_write (ahd, NULL, 0);
 
   n = (char *) ahd->ctx - (char *) ahd;
-  if (a->secure)
-    bhd = gcry_malloc_secure (n + sizeof (struct gcry_md_context));
+  if (a->flags.secure)
+    bhd = xtrymalloc_secure (n + sizeof (struct gcry_md_context));
   else
-    bhd = gcry_malloc (n + sizeof (struct gcry_md_context));
+    bhd = xtrymalloc (n + sizeof (struct gcry_md_context));
 
-  if (! bhd)
-    err = gpg_err_code_from_errno (errno);
-
-  if (! err)
+  if (!bhd)
     {
-      bhd->ctx = b = (struct gcry_md_context *) ((char *) bhd + n);
-      /* No need to copy the buffer due to the write above. */
-      gcry_assert (ahd->bufsize == (n - sizeof (struct gcry_md_handle) + 1));
-      bhd->bufsize = ahd->bufsize;
-      bhd->bufpos = 0;
-      gcry_assert (! ahd->bufpos);
-      memcpy (b, a, sizeof *a);
-      b->list = NULL;
-      b->debug = NULL;
-      if (a->macpads)
-       {
-         b->macpads = gcry_malloc_secure (2*(a->macpads_Bsize));
-         if (! b->macpads)
-           {
-             err = gpg_err_code_from_errno (errno);
-             md_close (bhd);
-           }
-         else
-           memcpy (b->macpads, a->macpads, (2*(a->macpads_Bsize)));
-       }
+      err = gpg_err_code_from_syserror ();
+      goto leave;
     }
 
+  bhd->ctx = b = (void *) ((char *) bhd + n);
+  /* No need to copy the buffer due to the write above. */
+  gcry_assert (ahd->bufsize == (n - offsetof (struct gcry_md_handle, buf)));
+  bhd->bufsize = ahd->bufsize;
+  bhd->bufpos = 0;
+  gcry_assert (! ahd->bufpos);
+  memcpy (b, a, sizeof *a);
+  b->list = NULL;
+  b->debug = NULL;
+
   /* Copy the complete list of algorithms.  The copied list is
      reversed, but that doesn't matter. */
-  if (!err)
+  for (ar = a->list; ar; ar = ar->next)
     {
-      for (ar = a->list; ar; ar = ar->next)
+      if (a->flags.secure)
+        br = xtrymalloc_secure (ar->actual_struct_size);
+      else
+        br = xtrymalloc (ar->actual_struct_size);
+      if (!br)
         {
-          if (a->secure)
-            br = gcry_malloc_secure (sizeof *br
-                                     + ar->digest->contextsize
-                                     - sizeof(ar->context));
-          else
-            br = gcry_malloc (sizeof *br
-                              + ar->digest->contextsize
-                              - sizeof (ar->context));
-          if (!br)
-            {
-             err = gpg_err_code_from_errno (errno);
-              md_close (bhd);
-              break;
-            }
-
-          memcpy (br, ar, (sizeof (*br) + ar->digest->contextsize
-                           - sizeof (ar->context)));
-          br->next = b->list;
-          b->list = br;
-
-          /* Add a reference to the module.  */
-          ath_mutex_lock (&digests_registered_lock);
-          _gcry_module_use (br->module);
-          ath_mutex_unlock (&digests_registered_lock);
+          err = gpg_err_code_from_syserror ();
+          md_close (bhd);
+          goto leave;
         }
+
+      memcpy (br, ar, ar->actual_struct_size);
+      br->next = b->list;
+      b->list = br;
     }
 
-  if (a->debug && !err)
+  if (a->debug)
     md_start_debug (bhd, "unknown");
 
-  if (!err)
-    *b_hd = bhd;
+  *b_hd = bhd;
 
+ leave:
   return err;
 }
 
-gcry_error_t
-gcry_md_copy (gcry_md_hd_t *handle, gcry_md_hd_t hd)
+
+gcry_err_code_t
+_gcry_md_copy (gcry_md_hd_t *handle, gcry_md_hd_t hd)
 {
-  gcry_err_code_t err;
+  gcry_err_code_t rc;
 
-  err = md_copy (hd, handle);
-  if (err)
+  rc = md_copy (hd, handle);
+  if (rc)
     *handle = NULL;
-  return gcry_error (err);
+  return rc;
 }
 
+
 /*
  * Reset all contexts and discard any buffered stuff.  This may be used
  * instead of a md_close(); md_open().
  */
 void
-gcry_md_reset (gcry_md_hd_t a)
+_gcry_md_reset (gcry_md_hd_t a)
 {
   GcryDigestEntry *r;
 
   /* Note: We allow this even in fips non operational mode.  */
 
-  a->bufpos = a->ctx->finalized = 0;
+  a->bufpos = a->ctx->flags.finalized = 0;
 
-  for (r = a->ctx->list; r; r = r->next)
-    {
-      memset (r->context.c, 0, r->digest->contextsize);
-      (*r->digest->init) (&r->context.c);
-    }
-  if (a->ctx->macpads)
-    md_write (a, a->ctx->macpads, a->ctx->macpads_Bsize); /* inner pad */
+  if (a->ctx->flags.hmac)
+    for (r = a->ctx->list; r; r = r->next)
+      {
+        memcpy (r->context, (char *)r->context + r->spec->contextsize,
+                r->spec->contextsize);
+      }
+  else
+    for (r = a->ctx->list; r; r = r->next)
+      {
+        memset (r->context, 0, r->spec->contextsize);
+        (*r->spec->init) (r->context,
+                          a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
+      }
 }
 
+
 static void
 md_close (gcry_md_hd_t a)
 {
@@ -759,30 +732,23 @@ md_close (gcry_md_hd_t a)
   for (r = a->ctx->list; r; r = r2)
     {
       r2 = r->next;
-      ath_mutex_lock (&digests_registered_lock);
-      _gcry_module_release (r->module);
-      ath_mutex_unlock (&digests_registered_lock);
       wipememory (r, r->actual_struct_size);
-      gcry_free (r);
-    }
-
-  if (a->ctx->macpads)
-    {
-      wipememory (a->ctx->macpads, 2*(a->ctx->macpads_Bsize));
-      gcry_free(a->ctx->macpads);
+      xfree (r);
     }
 
   wipememory (a, a->ctx->actual_handle_size);
-  gcry_free(a);
+  xfree(a);
 }
 
+
 void
-gcry_md_close (gcry_md_hd_t hd)
+_gcry_md_close (gcry_md_hd_t hd)
 {
   /* Note: We allow this even in fips non operational mode.  */
   md_close (hd);
 }
 
+
 static void
 md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen)
 {
@@ -799,106 +765,249 @@ md_write (gcry_md_hd_t a, const void *inbuf, size_t 
inlen)
   for (r = a->ctx->list; r; r = r->next)
     {
       if (a->bufpos)
-       (*r->digest->write) (&r->context.c, a->buf, a->bufpos);
-      (*r->digest->write) (&r->context.c, inbuf, inlen);
+       (*r->spec->write) (r->context, a->buf, a->bufpos);
+      (*r->spec->write) (r->context, inbuf, inlen);
     }
   a->bufpos = 0;
 }
 
+
+/* Note that this function may be used after finalize and read to keep
+   on writing to the transform function so to mitigate timing
+   attacks.  */
 void
-gcry_md_write (gcry_md_hd_t hd, const void *inbuf, size_t inlen)
+_gcry_md_write (gcry_md_hd_t hd, const void *inbuf, size_t inlen)
 {
   md_write (hd, inbuf, inlen);
 }
 
+
 static void
 md_final (gcry_md_hd_t a)
 {
   GcryDigestEntry *r;
 
-  if (a->ctx->finalized)
+  if (a->ctx->flags.finalized)
     return;
 
   if (a->bufpos)
     md_write (a, NULL, 0);
 
   for (r = a->ctx->list; r; r = r->next)
-    (*r->digest->final) (&r->context.c);
+    (*r->spec->final) (r->context);
+
+  a->ctx->flags.finalized = 1;
 
-  a->ctx->finalized = 1;
+  if (!a->ctx->flags.hmac)
+    return;
 
-  if (a->ctx->macpads)
+  for (r = a->ctx->list; r; r = r->next)
     {
-      /* Finish the hmac. */
-      int algo = md_get_algo (a);
-      byte *p = md_read (a, algo);
-      size_t dlen = md_digest_length (algo);
-      gcry_md_hd_t om;
-      gcry_err_code_t err = md_open (&om, algo, a->ctx->secure, 0);
+      byte *p;
+      size_t dlen = r->spec->mdlen;
+      byte *hash;
+      gcry_err_code_t err;
 
-      if (err)
-       _gcry_fatal_error (err, NULL);
-      md_write (om,
-                (a->ctx->macpads)+(a->ctx->macpads_Bsize),
-                a->ctx->macpads_Bsize);
-      md_write (om, p, dlen);
-      md_final (om);
-      /* Replace our digest with the mac (they have the same size). */
-      memcpy (p, md_read (om, algo), dlen);
-      md_close (om);
+      if (r->spec->read == NULL)
+        continue;
+
+      p = r->spec->read (r->context);
+
+      if (a->ctx->flags.secure)
+        hash = xtrymalloc_secure (dlen);
+      else
+        hash = xtrymalloc (dlen);
+      if (!hash)
+        {
+          err = gpg_err_code_from_errno (errno);
+          _gcry_fatal_error (err, NULL);
+        }
+
+      memcpy (hash, p, dlen);
+      memcpy (r->context, (char *)r->context + r->spec->contextsize * 2,
+              r->spec->contextsize);
+      (*r->spec->write) (r->context, hash, dlen);
+      (*r->spec->final) (r->context);
+      xfree (hash);
     }
 }
 
+
 static gcry_err_code_t
-prepare_macpads (gcry_md_hd_t hd, const unsigned char *key, size_t keylen)
+md_setkey (gcry_md_hd_t h, const unsigned char *key, size_t keylen)
 {
-  int i;
-  int algo = md_get_algo (hd);
-  unsigned char *helpkey = NULL;
-  unsigned char *ipad, *opad;
+  gcry_err_code_t rc = 0;
+  GcryDigestEntry *r;
+  int algo_had_setkey = 0;
 
-  if (!algo)
+  if (!h->ctx->list)
     return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled.  */
 
-  if ( keylen > hd->ctx->macpads_Bsize )
+  if (h->ctx->flags.hmac)
+    return GPG_ERR_DIGEST_ALGO; /* Tried md_setkey for HMAC md. */
+
+  for (r = h->ctx->list; r; r = r->next)
     {
-      helpkey = gcry_malloc_secure (md_digest_length (algo));
-      if (!helpkey)
-        return gpg_err_code_from_errno (errno);
-      gcry_md_hash_buffer (algo, helpkey, key, keylen);
-      key = helpkey;
-      keylen = md_digest_length (algo);
-      gcry_assert ( keylen <= hd->ctx->macpads_Bsize );
+      switch (r->spec->algo)
+       {
+#if USE_BLAKE2
+       /* TODO? add spec->init_with_key? */
+       case GCRY_MD_BLAKE2B_512:
+       case GCRY_MD_BLAKE2B_384:
+       case GCRY_MD_BLAKE2B_256:
+       case GCRY_MD_BLAKE2B_160:
+       case GCRY_MD_BLAKE2S_256:
+       case GCRY_MD_BLAKE2S_224:
+       case GCRY_MD_BLAKE2S_160:
+       case GCRY_MD_BLAKE2S_128:
+         algo_had_setkey = 1;
+         memset (r->context, 0, r->spec->contextsize);
+         rc = _gcry_blake2_init_with_key (r->context,
+                                          h->ctx->flags.bugemu1
+                                            ? GCRY_MD_FLAG_BUGEMU1:0,
+                                          key, keylen, r->spec->algo);
+         break;
+#endif
+       default:
+         rc = GPG_ERR_DIGEST_ALGO;
+         break;
+       }
+
+      if (rc)
+       break;
     }
 
-  memset ( hd->ctx->macpads, 0, 2*(hd->ctx->macpads_Bsize) );
-  ipad = hd->ctx->macpads;
-  opad = (hd->ctx->macpads)+(hd->ctx->macpads_Bsize);
-  memcpy ( ipad, key, keylen );
-  memcpy ( opad, key, keylen );
-  for (i=0; i < hd->ctx->macpads_Bsize; i++ )
+  if (rc && !algo_had_setkey)
+    {
+      /* None of algorithms had setkey implementation, so contexts were not
+       * modified. Just return error. */
+      return rc;
+    }
+  else if (rc && algo_had_setkey)
     {
-      ipad[i] ^= 0x36;
-      opad[i] ^= 0x5c;
+      /* Some of the contexts have been modified, but got error. Reset
+       * all contexts. */
+      _gcry_md_reset (h);
+      return rc;
     }
-  gcry_free (helpkey);
 
-  return GPG_ERR_NO_ERROR;
+  /* Successful md_setkey implies reset. */
+  h->bufpos = h->ctx->flags.finalized = 0;
+
+  return 0;
 }
 
-gcry_error_t
-gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, size_t buflen)
+
+static gcry_err_code_t
+prepare_macpads (gcry_md_hd_t a, const unsigned char *key, size_t keylen)
+{
+  GcryDigestEntry *r;
+
+  if (!a->ctx->list)
+    return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled.  */
+
+  if (!a->ctx->flags.hmac)
+    return GPG_ERR_DIGEST_ALGO; /* Tried prepare_macpads for non-HMAC md. */
+
+  for (r = a->ctx->list; r; r = r->next)
+    {
+      const unsigned char *k;
+      size_t k_len;
+      unsigned char *key_allocated = NULL;
+      int macpad_Bsize;
+      int i;
+
+      switch (r->spec->algo)
+        {
+       /* TODO: add spec->blocksize */
+        case GCRY_MD_SHA3_224:
+          macpad_Bsize = 1152 / 8;
+          break;
+        case GCRY_MD_SHA3_256:
+          macpad_Bsize = 1088 / 8;
+          break;
+        case GCRY_MD_SHA3_384:
+          macpad_Bsize = 832 / 8;
+          break;
+        case GCRY_MD_SHA3_512:
+          macpad_Bsize = 576 / 8;
+          break;
+        case GCRY_MD_SHA384:
+        case GCRY_MD_SHA512:
+        case GCRY_MD_SHA512_256:
+        case GCRY_MD_SHA512_224:
+        case GCRY_MD_BLAKE2B_512:
+        case GCRY_MD_BLAKE2B_384:
+        case GCRY_MD_BLAKE2B_256:
+        case GCRY_MD_BLAKE2B_160:
+          macpad_Bsize = 128;
+          break;
+        case GCRY_MD_GOSTR3411_94:
+        case GCRY_MD_GOSTR3411_CP:
+          macpad_Bsize = 32;
+          break;
+        default:
+          macpad_Bsize = 64;
+          break;
+        }
+
+      if ( keylen > macpad_Bsize )
+        {
+          k = key_allocated = xtrymalloc_secure (r->spec->mdlen);
+          if (!k)
+            return gpg_err_code_from_errno (errno);
+          _gcry_md_hash_buffer (r->spec->algo, key_allocated, key, keylen);
+          k_len = r->spec->mdlen;
+          gcry_assert ( k_len <= macpad_Bsize );
+        }
+      else
+        {
+          k = key;
+          k_len = keylen;
+        }
+
+      (*r->spec->init) (r->context,
+                        a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
+      a->bufpos = 0;
+      for (i=0; i < k_len; i++ )
+        _gcry_md_putc (a, k[i] ^ 0x36);
+      for (; i < macpad_Bsize; i++ )
+        _gcry_md_putc (a, 0x36);
+      (*r->spec->write) (r->context, a->buf, a->bufpos);
+      memcpy ((char *)r->context + r->spec->contextsize, r->context,
+              r->spec->contextsize);
+
+      (*r->spec->init) (r->context,
+                        a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0);
+      a->bufpos = 0;
+      for (i=0; i < k_len; i++ )
+        _gcry_md_putc (a, k[i] ^ 0x5c);
+      for (; i < macpad_Bsize; i++ )
+        _gcry_md_putc (a, 0x5c);
+      (*r->spec->write) (r->context, a->buf, a->bufpos);
+      memcpy ((char *)r->context + r->spec->contextsize*2, r->context,
+              r->spec->contextsize);
+
+      xfree (key_allocated);
+    }
+
+  a->bufpos = 0;
+  return 0;
+}
+
+
+gcry_err_code_t
+_gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, size_t buflen)
 {
   gcry_err_code_t rc = 0;
 
+  (void)buflen; /* Currently not used.  */
+
   switch (cmd)
     {
     case GCRYCTL_FINALIZE:
       md_final (hd);
       break;
-    case GCRYCTL_SET_KEY:
-      rc = gcry_err_code (gcry_md_setkey (hd, buffer, buflen));
-      break;
     case GCRYCTL_START_DUMP:
       md_start_debug (hd, buffer);
       break;
@@ -908,31 +1017,35 @@ gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, 
size_t buflen)
     default:
       rc = GPG_ERR_INV_OP;
     }
-  return gcry_error (rc);
+  return rc;
 }
 
-gcry_error_t
-gcry_md_setkey (gcry_md_hd_t hd, const void *key, size_t keylen)
+
+gcry_err_code_t
+_gcry_md_setkey (gcry_md_hd_t hd, const void *key, size_t keylen)
 {
-  gcry_err_code_t rc = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc;
 
-  if (!hd->ctx->macpads)
-    rc = GPG_ERR_CONFLICT;
-  else
+  if (hd->ctx->flags.hmac)
     {
       rc = prepare_macpads (hd, key, keylen);
-      if (! rc)
-       gcry_md_reset (hd);
+      if (!rc)
+       _gcry_md_reset (hd);
+    }
+  else
+    {
+      rc = md_setkey (hd, key, keylen);
     }
 
-  return gcry_error (rc);
+  return rc;
 }
 
+
 /* The new debug interface.  If SUFFIX is a string it creates an debug
    file for the context HD.  IF suffix is NULL, the file is closed and
    debugging is stopped.  */
 void
-gcry_md_debug (gcry_md_hd_t hd, const char *suffix)
+_gcry_md_debug (gcry_md_hd_t hd, const char *suffix)
 {
   if (suffix)
     md_start_debug (hd, suffix);
@@ -941,9 +1054,9 @@ gcry_md_debug (gcry_md_hd_t hd, const char *suffix)
 }
 
 
-
 /****************
- * if ALGO is null get the digest for the used algo (which should be only one)
+ * If ALGO is null get the digest for the used algo (which should be
+ * only one)
  */
 static byte *
 md_read( gcry_md_hd_t a, int algo )
@@ -957,39 +1070,96 @@ md_read( gcry_md_hd_t a, int algo )
         {
           if (r->next)
             log_debug ("more than one algorithm in md_read(0)\n");
-          return r->digest->read (&r->context.c);
+          if (r->spec->read)
+            return r->spec->read (r->context);
         }
     }
   else
     {
       for (r = a->ctx->list; r; r = r->next)
-       if (r->module->mod_id == algo)
-         return r->digest->read (&r->context.c);
+       if (r->spec->algo == algo)
+         {
+           if (r->spec->read)
+              return r->spec->read (r->context);
+            break;
+         }
     }
-  BUG();
+
+  if (r && !r->spec->read)
+    _gcry_fatal_error (GPG_ERR_DIGEST_ALGO,
+                       "requested algo has no fixed digest length");
+  else
+    _gcry_fatal_error (GPG_ERR_DIGEST_ALGO, "requested algo not in md 
context");
   return NULL;
 }
 
+
 /*
  * Read out the complete digest, this function implictly finalizes
  * the hash.
  */
 byte *
-gcry_md_read (gcry_md_hd_t hd, int algo)
+_gcry_md_read (gcry_md_hd_t hd, int algo)
 {
   /* This function is expected to always return a digest, thus we
      can't return an error which we actually should do in
      non-operational state.  */
-  gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+  _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
   return md_read (hd, algo);
 }
 
 
+/****************
+ * If ALGO is null get the digest for the used algo (which should be
+ * only one)
+ */
+static gcry_err_code_t
+md_extract(gcry_md_hd_t a, int algo, void *out, size_t outlen)
+{
+  GcryDigestEntry *r = a->ctx->list;
+
+  if (!algo)
+    {
+      /* Return the first algorithm */
+      if (r && r->spec->extract)
+       {
+         if (r->next)
+           log_debug ("more than one algorithm in md_extract(0)\n");
+         r->spec->extract (r->context, out, outlen);
+         return 0;
+       }
+    }
+  else
+    {
+      for (r = a->ctx->list; r; r = r->next)
+       if (r->spec->algo == algo && r->spec->extract)
+         {
+           r->spec->extract (r->context, out, outlen);
+           return 0;
+         }
+    }
+
+  return GPG_ERR_DIGEST_ALGO;
+}
+
+
+/*
+ * Expand the output from XOF class digest, this function implictly finalizes
+ * the hash.
+ */
+gcry_err_code_t
+_gcry_md_extract (gcry_md_hd_t hd, int algo, void *out, size_t outlen)
+{
+  _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0);
+  return md_extract (hd, algo, out, outlen);
+}
+
+
 /*
  * Read out an intermediate digest.  Not yet functional.
  */
 gcry_err_code_t
-gcry_md_get (gcry_md_hd_t hd, int algo, byte *buffer, int buflen)
+_gcry_md_get (gcry_md_hd_t hd, int algo, byte *buffer, int buflen)
 {
   (void)hd;
   (void)algo;
@@ -1009,42 +1179,164 @@ gcry_md_get (gcry_md_hd_t hd, int algo, byte *buffer, 
int buflen)
  * hash.  No error is returned, the function will abort on an invalid
  * algo.  DISABLED_ALGOS are ignored here.  */
 void
-gcry_md_hash_buffer (int algo, void *digest,
-                     const void *buffer, size_t length)
+_gcry_md_hash_buffer (int algo, void *digest,
+                      const void *buffer, size_t length)
 {
-  if (algo == GCRY_MD_SHA1)
-    _gcry_sha1_hash_buffer (digest, buffer, length);
-  else if (algo == GCRY_MD_RMD160 && !fips_mode () )
-    _gcry_rmd160_hash_buffer (digest, buffer, length);
+  const gcry_md_spec_t *spec;
+
+  spec = spec_from_algo (algo);
+  if (!spec)
+    {
+      log_debug ("md_hash_buffer: algorithm %d not available\n", algo);
+      return;
+    }
+
+  if (spec->hash_buffers != NULL)
+    {
+      gcry_buffer_t iov;
+
+      iov.size = 0;
+      iov.data = (void *)buffer;
+      iov.off = 0;
+      iov.len = length;
+
+      if (spec->flags.disabled || (!spec->flags.fips && fips_mode ()))
+        log_bug ("gcry_md_hash_buffer failed for algo %d: %s",
+                algo, gpg_strerror (gcry_error (GPG_ERR_DIGEST_ALGO)));
+
+      spec->hash_buffers (digest, spec->mdlen, &iov, 1);
+    }
   else
     {
       /* For the others we do not have a fast function, so we use the
-        normal functions. */
+         normal functions. */
       gcry_md_hd_t h;
       gpg_err_code_t err;
 
-      if (algo == GCRY_MD_MD5 && fips_mode ())
+      err = md_open (&h, algo, 0);
+      if (err)
+        log_bug ("gcry_md_open failed for algo %d: %s",
+                algo, gpg_strerror (gcry_error(err)));
+      md_write (h, (byte *) buffer, length);
+      md_final (h);
+      memcpy (digest, md_read (h, algo), md_digest_length (algo));
+      md_close (h);
+    }
+}
+
+
+/* Shortcut function to hash multiple buffers with a given algo.  In
+   contrast to gcry_md_hash_buffer, this function returns an error on
+   invalid arguments or on other problems; disabled algorithms are
+   _not_ ignored but flagged as an error.
+
+   The data to sign is taken from the array IOV which has IOVCNT items.
+
+   The only supported flag in FLAGS is GCRY_MD_FLAG_HMAC which turns
+   this function into a HMAC function; the first item in IOV is then
+   used as the key.
+
+   On success 0 is returned and resulting hash or HMAC is stored at
+   DIGEST. DIGESTLEN may be given as -1, in which case DIGEST must
+   have been provided by the caller with an appropriate length.
+   DIGESTLEN may also be the appropriate length or, in case of XOF
+   algorithms, DIGESTLEN indicates number bytes to extract from XOF
+   to DIGEST.  */
+gpg_err_code_t
+_gcry_md_hash_buffers_extract (int algo, unsigned int flags, void *digest,
+                              int digestlen, const gcry_buffer_t *iov,
+                              int iovcnt)
+{
+  const gcry_md_spec_t *spec;
+  int hmac;
+
+  if (!iov || iovcnt < 0)
+    return GPG_ERR_INV_ARG;
+  if (flags & ~(GCRY_MD_FLAG_HMAC))
+    return GPG_ERR_INV_ARG;
+
+  hmac = !!(flags & GCRY_MD_FLAG_HMAC);
+  if (hmac && iovcnt < 1)
+    return GPG_ERR_INV_ARG;
+
+  spec = spec_from_algo (algo);
+  if (!spec)
+    {
+      log_debug ("md_hash_buffers: algorithm %d not available\n", algo);
+      return GPG_ERR_DIGEST_ALGO;
+    }
+
+  if (spec->mdlen > 0 && digestlen != -1 && digestlen != spec->mdlen)
+    return GPG_ERR_DIGEST_ALGO;
+  if (spec->mdlen == 0 && digestlen == -1)
+    return GPG_ERR_DIGEST_ALGO;
+
+  if (!hmac && spec->hash_buffers)
+    {
+      if (spec->flags.disabled || (!spec->flags.fips && fips_mode ()))
+        return GPG_ERR_DIGEST_ALGO;
+
+      spec->hash_buffers (digest, digestlen, iov, iovcnt);
+    }
+  else
+    {
+      /* For the others we do not have a fast function, so we use the
+         normal functions.  */
+      gcry_md_hd_t h;
+      gpg_err_code_t rc;
+
+      rc = md_open (&h, algo, (hmac? GCRY_MD_FLAG_HMAC:0));
+      if (rc)
+        return rc;
+
+      if (hmac)
         {
-          _gcry_inactivate_fips_mode ("MD5 used");
-          if (_gcry_enforced_fips_mode () )
+          rc = _gcry_md_setkey (h,
+                                (const char*)iov[0].data + iov[0].off,
+                                iov[0].len);
+          if (rc)
             {
-              /* We should never get to here because we do not register
-                 MD5 in enforced fips mode.  */
-              _gcry_fips_noreturn ();
+              md_close (h);
+              return rc;
             }
+          iov++; iovcnt--;
         }
-
-      err = md_open (&h, algo, 0, 0);
-      if (err)
-       log_bug ("gcry_md_open failed for algo %d: %s",
-                 algo, gpg_strerror (gcry_error(err)));
-      md_write (h, (byte *) buffer, length);
+      for (;iovcnt; iov++, iovcnt--)
+        md_write (h, (const char*)iov[0].data + iov[0].off, iov[0].len);
       md_final (h);
-      memcpy (digest, md_read (h, algo), md_digest_length (algo));
+      if (spec->mdlen > 0)
+       memcpy (digest, md_read (h, algo), spec->mdlen);
+      else if (digestlen > 0)
+       md_extract (h, algo, digest, digestlen);
       md_close (h);
     }
+
+  return 0;
+}
+
+
+/* Shortcut function to hash multiple buffers with a given algo.  In
+   contrast to gcry_md_hash_buffer, this function returns an error on
+   invalid arguments or on other problems; disabled algorithms are
+   _not_ ignored but flagged as an error.
+
+   The data to sign is taken from the array IOV which has IOVCNT items.
+
+   The only supported flag in FLAGS is GCRY_MD_FLAG_HMAC which turns
+   this function into a HMAC function; the first item in IOV is then
+   used as the key.
+
+   On success 0 is returned and resulting hash or HMAC is stored at
+   DIGEST which must have been provided by the caller with an
+   appropriate length.  */
+gpg_err_code_t
+_gcry_md_hash_buffers (int algo, unsigned int flags, void *digest,
+                      const gcry_buffer_t *iov, int iovcnt)
+{
+  return _gcry_md_hash_buffers_extract(algo, flags, digest, -1, iov, iovcnt);
 }
 
+
 static int
 md_get_algo (gcry_md_hd_t a)
 {
@@ -1055,11 +1347,12 @@ md_get_algo (gcry_md_hd_t a)
       fips_signal_error ("possible usage error");
       log_error ("WARNING: more than one algorithm in md_get_algo()\n");
     }
-  return r ? r->module->mod_id : 0;
+  return r ? r->spec->algo : 0;
 }
 
+
 int
-gcry_md_get_algo (gcry_md_hd_t hd)
+_gcry_md_get_algo (gcry_md_hd_t hd)
 {
   return md_get_algo (hd);
 }
@@ -1071,29 +1364,19 @@ gcry_md_get_algo (gcry_md_hd_t hd)
 static int
 md_digest_length (int algorithm)
 {
-  gcry_module_t digest;
-  int mdlen = 0;
-
-  REGISTER_DEFAULT_DIGESTS;
+  const gcry_md_spec_t *spec;
 
-  ath_mutex_lock (&digests_registered_lock);
-  digest = _gcry_module_lookup_id (digests_registered, algorithm);
-  if (digest)
-    {
-      mdlen = ((gcry_md_spec_t *) digest->spec)->mdlen;
-      _gcry_module_release (digest);
-    }
-  ath_mutex_unlock (&digests_registered_lock);
-
-  return mdlen;
+  spec = spec_from_algo (algorithm);
+  return spec? spec->mdlen : 0;
 }
 
+
 /****************
  * Return the length of the digest in bytes.
  * This function will return 0 in case of errors.
  */
 unsigned int
-gcry_md_get_algo_dlen (int algorithm)
+_gcry_md_get_algo_dlen (int algorithm)
 {
   return md_digest_length (algorithm);
 }
@@ -1104,31 +1387,25 @@ gcry_md_get_algo_dlen (int algorithm)
 static const byte *
 md_asn_oid (int algorithm, size_t *asnlen, size_t *mdlen)
 {
+  const gcry_md_spec_t *spec;
   const byte *asnoid = NULL;
-  gcry_module_t digest;
 
-  REGISTER_DEFAULT_DIGESTS;
-
-  ath_mutex_lock (&digests_registered_lock);
-  digest = _gcry_module_lookup_id (digests_registered, algorithm);
-  if (digest)
+  spec = spec_from_algo (algorithm);
+  if (spec)
     {
       if (asnlen)
-       *asnlen = ((gcry_md_spec_t *) digest->spec)->asnlen;
+       *asnlen = spec->asnlen;
       if (mdlen)
-       *mdlen = ((gcry_md_spec_t *) digest->spec)->mdlen;
-      asnoid = ((gcry_md_spec_t *) digest->spec)->asnoid;
-      _gcry_module_release (digest);
+       *mdlen = spec->mdlen;
+      asnoid = spec->asnoid;
     }
   else
     log_bug ("no ASN.1 OID for md algo %d\n", algorithm);
-  ath_mutex_unlock (&digests_registered_lock);
 
   return asnoid;
 }
 
 
-
 /****************
  * Return information about the given cipher algorithm
  * WHAT select the kind of information returned:
@@ -1138,6 +1415,8 @@ md_asn_oid (int algorithm, size_t *asnlen, size_t *mdlen)
  *  GCRYCTL_GET_ASNOID:
  *     Return the ASNOID of the algorithm in buffer. if buffer is NULL, only
  *     the required length is returned.
+ *  GCRYCTL_SELFTEST
+ *      Helper for the regression tests - shall not be used by applications.
  *
  * Note:  Because this function is in most cases used to return an
  * integer value, we can make it easier for the caller to just look at
@@ -1145,52 +1424,59 @@ md_asn_oid (int algorithm, size_t *asnlen, size_t 
*mdlen)
  * and thereby detecting whether a error occurred or not (i.e. while checking
  * the block size)
  */
-gcry_error_t
-gcry_md_algo_info (int algo, int what, void *buffer, size_t *nbytes)
+gcry_err_code_t
+_gcry_md_algo_info (int algo, int what, void *buffer, size_t *nbytes)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc;
 
   switch (what)
     {
     case GCRYCTL_TEST_ALGO:
       if (buffer || nbytes)
-       err = GPG_ERR_INV_ARG;
+       rc = GPG_ERR_INV_ARG;
       else
-       err = check_digest_algo (algo);
+       rc = check_digest_algo (algo);
       break;
 
     case GCRYCTL_GET_ASNOID:
       /* We need to check that the algo is available because
          md_asn_oid would otherwise raise an assertion. */
-      err = check_digest_algo (algo);
-      if (!err)
+      rc = check_digest_algo (algo);
+      if (!rc)
         {
           const char unsigned *asn;
           size_t asnlen;
 
           asn = md_asn_oid (algo, &asnlen, NULL);
           if (buffer && (*nbytes >= asnlen))
-         {
-           memcpy (buffer, asn, asnlen);
-           *nbytes = asnlen;
-         }
+            {
+              memcpy (buffer, asn, asnlen);
+              *nbytes = asnlen;
+            }
           else if (!buffer && nbytes)
             *nbytes = asnlen;
           else
             {
               if (buffer)
-                err = GPG_ERR_TOO_SHORT;
+                rc = GPG_ERR_TOO_SHORT;
               else
-                err = GPG_ERR_INV_ARG;
+                rc = GPG_ERR_INV_ARG;
             }
         }
       break;
 
-  default:
-    err = GPG_ERR_INV_OP;
+    case GCRYCTL_SELFTEST:
+      /* Helper function for the regression tests.  */
+      rc = gpg_err_code (_gcry_md_selftest (algo, nbytes? (int)*nbytes : 0,
+                                             NULL));
+      break;
+
+    default:
+      rc = GPG_ERR_INV_OP;
+      break;
   }
 
-  return gcry_error (err);
+  return rc;
 }
 
 
@@ -1215,6 +1501,7 @@ md_start_debug ( gcry_md_hd_t md, const char *suffix )
     log_debug("md debug: can't open %s\n", buf );
 }
 
+
 static void
 md_stop_debug( gcry_md_hd_t md )
 {
@@ -1226,15 +1513,13 @@ md_stop_debug( gcry_md_hd_t md )
       md->ctx->debug = NULL;
     }
 
-#ifdef HAVE_U64_TYPEDEF
   {  /* a kludge to pull in the __muldi3 for Solaris */
-    volatile u32 a = (u32)(ulong)md;
+    volatile u32 a = (u32)(uintptr_t)md;
     volatile u64 b = 42;
     volatile u64 c;
     c = a * b;
     (void)c;
   }
-#endif
 }
 
 
@@ -1248,15 +1533,15 @@ md_stop_debug( gcry_md_hd_t md )
  *     Returns 1 if the algo is enabled for that handle.
  *     The algo must be passed as the address of an int.
  */
-gcry_error_t
-gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, size_t *nbytes)
+gcry_err_code_t
+_gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, size_t *nbytes)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc = 0;
 
   switch (cmd)
     {
     case GCRYCTL_IS_SECURE:
-      *nbytes = h->ctx->secure;
+      *nbytes = h->ctx->flags.secure;
       break;
 
     case GCRYCTL_IS_ALGO_ENABLED:
@@ -1264,15 +1549,15 @@ gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, 
size_t *nbytes)
        GcryDigestEntry *r;
        int algo;
 
-       if ( !buffer || (nbytes && (*nbytes != sizeof (int))))
-         err = GPG_ERR_INV_ARG;
+       if ( !buffer || !nbytes || *nbytes != sizeof (int))
+         rc = GPG_ERR_INV_ARG;
        else
          {
            algo = *(int*)buffer;
 
            *nbytes = 0;
            for(r=h->ctx->list; r; r = r->next ) {
-             if (r->module->mod_id == algo)
+             if (r->spec->algo == algo)
                {
                  *nbytes = 1;
                  break;
@@ -1283,10 +1568,10 @@ gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, 
size_t *nbytes)
       }
 
   default:
-    err = GPG_ERR_INV_OP;
+    rc = GPG_ERR_INV_OP;
   }
 
-  return gcry_error (err);
+  return rc;
 }
 
 
@@ -1294,20 +1579,16 @@ gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, 
size_t *nbytes)
 gcry_err_code_t
 _gcry_md_init (void)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  REGISTER_DEFAULT_DIGESTS;
-
-  return err;
+  return 0;
 }
 
 
 int
-gcry_md_is_secure (gcry_md_hd_t a)
+_gcry_md_is_secure (gcry_md_hd_t a)
 {
   size_t value;
 
-  if (gcry_md_info (a, GCRYCTL_IS_SECURE, NULL, &value))
+  if (_gcry_md_info (a, GCRYCTL_IS_SECURE, NULL, &value))
     value = 1; /* It seems to be better to assume secure memory on
                   error. */
   return value;
@@ -1315,69 +1596,41 @@ gcry_md_is_secure (gcry_md_hd_t a)
 
 
 int
-gcry_md_is_enabled (gcry_md_hd_t a, int algo)
+_gcry_md_is_enabled (gcry_md_hd_t a, int algo)
 {
   size_t value;
 
   value = sizeof algo;
-  if (gcry_md_info (a, GCRYCTL_IS_ALGO_ENABLED, &algo, &value))
+  if (_gcry_md_info (a, GCRYCTL_IS_ALGO_ENABLED, &algo, &value))
     value = 0;
   return value;
 }
 
-/* Get a list consisting of the IDs of the loaded message digest
-   modules.  If LIST is zero, write the number of loaded message
-   digest modules to LIST_LENGTH and return.  If LIST is non-zero, the
-   first *LIST_LENGTH algorithm IDs are stored in LIST, which must be
-   of according size.  In case there are less message digest modules
-   than *LIST_LENGTH, *LIST_LENGTH is updated to the correct
-   number.  */
-gcry_error_t
-gcry_md_list (int *list, int *list_length)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  ath_mutex_lock (&digests_registered_lock);
-  err = _gcry_module_list (digests_registered, list, list_length);
-  ath_mutex_unlock (&digests_registered_lock);
-
-  return err;
-}
-
 
 /* Run the selftests for digest algorithm ALGO with optional reporting
    function REPORT.  */
 gpg_error_t
 _gcry_md_selftest (int algo, int extended, selftest_report_func_t report)
 {
-  gcry_module_t module = NULL;
-  cipher_extra_spec_t *extraspec = NULL;
   gcry_err_code_t ec = 0;
+  const gcry_md_spec_t *spec;
 
-  REGISTER_DEFAULT_DIGESTS;
-
-  ath_mutex_lock (&digests_registered_lock);
-  module = _gcry_module_lookup_id (digests_registered, algo);
-  if (module && !(module->flags & FLAG_MODULE_DISABLED))
-    extraspec = module->extraspec;
-  ath_mutex_unlock (&digests_registered_lock);
-  if (extraspec && extraspec->selftest)
-    ec = extraspec->selftest (algo, extended, report);
+  spec = spec_from_algo (algo);
+  if (spec && !spec->flags.disabled
+      && (spec->flags.fips || !fips_mode ())
+      && spec->selftest)
+    ec = spec->selftest (algo, extended, report);
   else
     {
-      ec = GPG_ERR_DIGEST_ALGO;
+      ec = (spec && spec->selftest) ? GPG_ERR_DIGEST_ALGO
+        /* */                       : GPG_ERR_NOT_IMPLEMENTED;
       if (report)
         report ("digest", algo, "module",
-                module && !(module->flags & FLAG_MODULE_DISABLED)?
+                spec && !spec->flags.disabled
+                && (spec->flags.fips || !fips_mode ())?
                 "no selftest available" :
-                module? "algorithm disabled" : "algorithm not found");
+                spec? "algorithm disabled" : "algorithm not found");
     }
 
-  if (module)
-    {
-      ath_mutex_lock (&digests_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&digests_registered_lock);
-    }
   return gpg_error (ec);
 }
diff --git a/grub-core/lib/libgcrypt/cipher/md4.c 
b/grub-core/lib/libgcrypt/cipher/md4.c
index 22fbf8d90..49b2af2a0 100644
--- a/grub-core/lib/libgcrypt/cipher/md4.c
+++ b/grub-core/lib/libgcrypt/cipher/md4.c
@@ -56,28 +56,35 @@
 #include "cipher.h"
 
 #include "bithelp.h"
+#include "bufhelp.h"
+#include "hash-common.h"
 
 
 typedef struct {
+    gcry_md_block_ctx_t bctx;
     u32 A,B,C,D;         /* chaining variables */
-    u32  nblocks;
-    byte buf[64];
-    int  count;
 } MD4_CONTEXT;
 
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks );
 
 static void
-md4_init( void *context )
+md4_init (void *context, unsigned int flags)
 {
   MD4_CONTEXT *ctx = context;
 
+  (void)flags;
+
   ctx->A = 0x67452301;
   ctx->B = 0xefcdab89;
   ctx->C = 0x98badcfe;
   ctx->D = 0x10325476;
 
-  ctx->nblocks = 0;
-  ctx->count = 0;
+  ctx->bctx.nblocks = 0;
+  ctx->bctx.nblocks_high = 0;
+  ctx->bctx.count = 0;
+  ctx->bctx.blocksize_shift = _gcry_ctz(64);
+  ctx->bctx.bwrite = transform;
 }
 
 #define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
@@ -88,31 +95,19 @@ md4_init( void *context )
 /****************
  * transform 64 bytes
  */
-static void
-transform ( MD4_CONTEXT *ctx, const unsigned char *data )
+static unsigned int
+transform_blk ( void *c, const unsigned char *data )
 {
+  MD4_CONTEXT *ctx = c;
   u32 in[16];
   register u32 A = ctx->A;
   register u32 B = ctx->B;
   register u32 C = ctx->C;
   register u32 D = ctx->D;
+  int i;
 
-#ifdef WORDS_BIGENDIAN
-  {
-    int i;
-    byte *p2;
-    const byte *p1;
-    for(i=0, p1=data, p2=(byte*)in; i < 16; i++, p2 += 4 )
-      {
-       p2[3] = *p1++;
-       p2[2] = *p1++;
-       p2[1] = *p1++;
-       p2[0] = *p1++;
-      }
-  }
-#else
-  memcpy (in, data, 64);
-#endif
+  for ( i = 0; i < 16; i++ )
+    in[i] = buf_get_le32(data + i * 4);
 
   /* Round 1.  */
 #define function(a,b,c,d,k,s) a=rol(a+F(b,c,d)+in[k],s);
@@ -183,54 +178,27 @@ transform ( MD4_CONTEXT *ctx, const unsigned char *data )
   ctx->B += B;
   ctx->C += C;
   ctx->D += D;
-}
 
+  return /*burn_stack*/ 80+6*sizeof(void*);
+}
 
 
-/* The routine updates the message-digest context to
- * account for the presence of each of the characters inBuf[0..inLen-1]
- * in the message whose digest is being computed.
- */
-static void
-md4_write ( void *context, const void *inbuf_arg, size_t inlen)
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
 {
-  const unsigned char *inbuf = inbuf_arg;
-  MD4_CONTEXT *hd = context;
+  unsigned int burn;
 
-  if( hd->count == 64 ) /* flush the buffer */
+  do
     {
-      transform( hd, hd->buf );
-      _gcry_burn_stack (80+6*sizeof(void*));
-      hd->count = 0;
-      hd->nblocks++;
+      burn = transform_blk (c, data);
+      data += 64;
     }
-  if( !inbuf )
-    return;
+  while (--nblks);
 
-  if( hd->count )
-    {
-      for( ; inlen && hd->count < 64; inlen-- )
-        hd->buf[hd->count++] = *inbuf++;
-      md4_write( hd, NULL, 0 );
-      if( !inlen )
-        return;
-    }
-  _gcry_burn_stack (80+6*sizeof(void*));
-
-  while( inlen >= 64 )
-    {
-      transform( hd, inbuf );
-      hd->count = 0;
-      hd->nblocks++;
-      inlen -= 64;
-      inbuf += 64;
-    }
-  for( ; inlen && hd->count < 64; inlen-- )
-    hd->buf[hd->count++] = *inbuf++;
+  return burn;
 }
 
 
-
 /* The routine final terminates the message-digest computation and
  * ends with the desired message digest in mdContext->digest[0...15].
  * The handle is prepared for a new MD4 cycle.
@@ -241,18 +209,22 @@ static void
 md4_final( void *context )
 {
   MD4_CONTEXT *hd = context;
-  u32 t, msb, lsb;
+  u32 t, th, msb, lsb;
   byte *p;
+  unsigned int burn;
 
-  md4_write(hd, NULL, 0); /* flush */;
+  t = hd->bctx.nblocks;
+  if (sizeof t == sizeof hd->bctx.nblocks)
+    th = hd->bctx.nblocks_high;
+  else
+    th = hd->bctx.nblocks >> 32;
 
-  t = hd->nblocks;
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
-  msb = t >> 26;
+  msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
-  if( (lsb += hd->count) < t )
+  if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
@@ -260,68 +232,65 @@ md4_final( void *context )
   msb <<= 3;
   msb |= t >> 29;
 
-  if( hd->count < 56 )  /* enough room */
+  if (hd->bctx.count < 56)  /* enough room */
     {
-      hd->buf[hd->count++] = 0x80; /* pad */
-      while( hd->count < 56 )
-        hd->buf[hd->count++] = 0;  /* pad */
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+      if (hd->bctx.count < 56)
+       memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 60, msb);
+      burn = transform (hd, hd->bctx.buf, 1);
     }
   else /* need one extra block */
     {
-      hd->buf[hd->count++] = 0x80; /* pad character */
-      while( hd->count < 64 )
-        hd->buf[hd->count++] = 0;
-      md4_write(hd, NULL, 0);  /* flush */;
-      memset(hd->buf, 0, 56 ); /* fill next block with zeroes */
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+      /* fill pad and next block with zeroes */
+      memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+      burn = transform (hd, hd->bctx.buf, 2);
     }
-  /* append the 64 bit count */
-  hd->buf[56] = lsb       ;
-  hd->buf[57] = lsb >>  8;
-  hd->buf[58] = lsb >> 16;
-  hd->buf[59] = lsb >> 24;
-  hd->buf[60] = msb       ;
-  hd->buf[61] = msb >>  8;
-  hd->buf[62] = msb >> 16;
-  hd->buf[63] = msb >> 24;
-  transform( hd, hd->buf );
-  _gcry_burn_stack (80+6*sizeof(void*));
-
-  p = hd->buf;
-#ifdef WORDS_BIGENDIAN
-#define X(a) do { *p++ = hd->a      ; *p++ = hd->a >> 8;      \
-                 *p++ = hd->a >> 16; *p++ = hd->a >> 24; } while(0)
-#else /* little endian */
-#define X(a) do { *(u32*)p = (*hd).a ; p += 4; } while(0)
-#endif
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0)
   X(A);
   X(B);
   X(C);
   X(D);
 #undef X
 
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
 }
 
 static byte *
 md4_read (void *context)
 {
   MD4_CONTEXT *hd = context;
-  return hd->buf;
+  return hd->bctx.buf;
 }
 
-static byte asn[18] = /* Object ID is 1.2.840.113549.2.4 */
+static const byte asn[18] = /* Object ID is 1.2.840.113549.2.4 */
   { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48,
     0x86, 0xf7, 0x0d, 0x02, 0x04, 0x05, 0x00, 0x04, 0x10 };
 
-static gcry_md_oid_spec_t oid_spec_md4[] =
+static const gcry_md_oid_spec_t oid_spec_md4[] =
   {
     /* iso.member-body.us.rsadsi.digestAlgorithm.md4 */
     { "1.2.840.113549.2.4" },
     { NULL },
   };
 
-gcry_md_spec_t _gcry_digest_spec_md4 =
+const gcry_md_spec_t _gcry_digest_spec_md4 =
   {
+    GCRY_MD_MD4, {0, 0},
     "MD4", asn, DIM (asn), oid_spec_md4,16,
-    md4_init, md4_write, md4_final, md4_read,
+    md4_init, _gcry_md_block_write, md4_final, md4_read, NULL,
+    NULL,
     sizeof (MD4_CONTEXT)
   };
diff --git a/grub-core/lib/libgcrypt/cipher/md5.c 
b/grub-core/lib/libgcrypt/cipher/md5.c
index a98678a9b..744a2cc19 100644
--- a/grub-core/lib/libgcrypt/cipher/md5.c
+++ b/grub-core/lib/libgcrypt/cipher/md5.c
@@ -40,28 +40,35 @@
 #include "cipher.h"
 
 #include "bithelp.h"
+#include "bufhelp.h"
+#include "hash-common.h"
 
 
 typedef struct {
+    gcry_md_block_ctx_t bctx;
     u32 A,B,C,D;         /* chaining variables */
-    u32  nblocks;
-    byte buf[64];
-    int  count;
 } MD5_CONTEXT;
 
+static unsigned int
+transform ( void *ctx, const unsigned char *data, size_t datalen );
 
 static void
-md5_init( void *context )
+md5_init( void *context, unsigned int flags)
 {
   MD5_CONTEXT *ctx = context;
 
+  (void)flags;
+
   ctx->A = 0x67452301;
   ctx->B = 0xefcdab89;
   ctx->C = 0x98badcfe;
   ctx->D = 0x10325476;
 
-  ctx->nblocks = 0;
-  ctx->count = 0;
+  ctx->bctx.nblocks = 0;
+  ctx->bctx.nblocks_high = 0;
+  ctx->bctx.count = 0;
+  ctx->bctx.blocksize_shift = _gcry_ctz(64);
+  ctx->bctx.bwrite = transform;
 }
 
 
@@ -76,35 +83,22 @@ md5_init( void *context )
 
 
 /****************
- * transform n*64 bytes
+ * transform 64 bytes
  */
-static void
-transform ( MD5_CONTEXT *ctx, const unsigned char *data )
+static unsigned int
+transform_blk ( void *c, const unsigned char *data )
 {
+  MD5_CONTEXT *ctx = c;
   u32 correct_words[16];
   register u32 A = ctx->A;
   register u32 B = ctx->B;
   register u32 C = ctx->C;
   register u32 D = ctx->D;
   u32 *cwp = correct_words;
+  int i;
 
-#ifdef WORDS_BIGENDIAN
-  {
-    int i;
-    byte *p2;
-    const byte *p1;
-    for(i=0, p1=data, p2=(byte*)correct_words; i < 16; i++, p2 += 4 )
-      {
-        p2[3] = *p1++;
-       p2[2] = *p1++;
-       p2[1] = *p1++;
-       p2[0] = *p1++;
-      }
-  }
-#else
-  memcpy( correct_words, data, 64 );
-#endif
-
+  for ( i = 0; i < 16; i++ )
+    correct_words[i] = buf_get_le32(data + i * 4);
 
 #define OP(a, b, c, d, s, T) \
   do                                      \
@@ -208,55 +202,27 @@ transform ( MD5_CONTEXT *ctx, const unsigned char *data )
   ctx->B += B;
   ctx->C += C;
   ctx->D += D;
-}
 
+  return /*burn_stack*/ 80+6*sizeof(void*);
+}
 
 
-/* The routine updates the message-digest context to
- * account for the presence of each of the characters inBuf[0..inLen-1]
- * in the message whose digest is being computed.
- */
-static void
-md5_write( void *context, const void *inbuf_arg , size_t inlen)
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
 {
-  const unsigned char *inbuf = inbuf_arg;
-  MD5_CONTEXT *hd = context;
+  unsigned int burn;
 
-  if( hd->count == 64 )  /* flush the buffer */
+  do
     {
-      transform( hd, hd->buf );
-      _gcry_burn_stack (80+6*sizeof(void*));
-      hd->count = 0;
-      hd->nblocks++;
+      burn = transform_blk (c, data);
+      data += 64;
     }
-  if( !inbuf )
-    return;
-
-  if( hd->count )
-    {
-      for( ; inlen && hd->count < 64; inlen-- )
-        hd->buf[hd->count++] = *inbuf++;
-      md5_write( hd, NULL, 0 );
-      if( !inlen )
-        return;
-    }
-  _gcry_burn_stack (80+6*sizeof(void*));
-
-  while( inlen >= 64 )
-    {
-      transform( hd, inbuf );
-      hd->count = 0;
-      hd->nblocks++;
-      inlen -= 64;
-      inbuf += 64;
-    }
-  for( ; inlen && hd->count < 64; inlen-- )
-    hd->buf[hd->count++] = *inbuf++;
+  while (--nblks);
 
+  return burn;
 }
 
 
-
 /* The routine final terminates the message-digest computation and
  * ends with the desired message digest in mdContext->digest[0...15].
  * The handle is prepared for a new MD5 cycle.
@@ -267,18 +233,22 @@ static void
 md5_final( void *context)
 {
   MD5_CONTEXT *hd = context;
-  u32 t, msb, lsb;
+  u32 t, th, msb, lsb;
   byte *p;
+  unsigned int burn;
 
-  md5_write(hd, NULL, 0); /* flush */;
+  t = hd->bctx.nblocks;
+  if (sizeof t == sizeof hd->bctx.nblocks)
+    th = hd->bctx.nblocks_high;
+  else
+    th = hd->bctx.nblocks >> 32;
 
-  t = hd->nblocks;
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
-  msb = t >> 26;
+  msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
-  if( (lsb += hd->count) < t )
+  if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
@@ -286,59 +256,54 @@ md5_final( void *context)
   msb <<= 3;
   msb |= t >> 29;
 
-  if( hd->count < 56 )  /* enough room */
+  if (hd->bctx.count < 56)  /* enough room */
     {
-      hd->buf[hd->count++] = 0x80; /* pad */
-      while( hd->count < 56 )
-        hd->buf[hd->count++] = 0;  /* pad */
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+      if (hd->bctx.count < 56)
+       memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 60, msb);
+      burn = transform (hd, hd->bctx.buf, 1);
     }
-  else  /* need one extra block */
+  else /* need one extra block */
     {
-      hd->buf[hd->count++] = 0x80; /* pad character */
-      while( hd->count < 64 )
-        hd->buf[hd->count++] = 0;
-      md5_write(hd, NULL, 0);  /* flush */;
-      memset(hd->buf, 0, 56 ); /* fill next block with zeroes */
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+      /* fill pad and next block with zeroes */
+      memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+      burn = transform (hd, hd->bctx.buf, 2);
     }
-  /* append the 64 bit count */
-  hd->buf[56] = lsb       ;
-  hd->buf[57] = lsb >>  8;
-  hd->buf[58] = lsb >> 16;
-  hd->buf[59] = lsb >> 24;
-  hd->buf[60] = msb       ;
-  hd->buf[61] = msb >>  8;
-  hd->buf[62] = msb >> 16;
-  hd->buf[63] = msb >> 24;
-  transform( hd, hd->buf );
-  _gcry_burn_stack (80+6*sizeof(void*));
-
-  p = hd->buf;
-#ifdef WORDS_BIGENDIAN
-#define X(a) do { *p++ = hd->a      ; *p++ = hd->a >> 8;      \
-                 *p++ = hd->a >> 16; *p++ = hd->a >> 24; } while(0)
-#else /* little endian */
-#define X(a) do { *(u32*)p = (*hd).a ; p += 4; } while(0)
-#endif
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0)
   X(A);
   X(B);
   X(C);
   X(D);
 #undef X
 
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
 }
 
 static byte *
 md5_read( void *context )
 {
   MD5_CONTEXT *hd = (MD5_CONTEXT *) context;
-  return hd->buf;
+  return hd->bctx.buf;
 }
 
-static byte asn[18] = /* Object ID is 1.2.840.113549.2.5 */
+static const byte asn[18] = /* Object ID is 1.2.840.113549.2.5 */
   { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48,
     0x86, 0xf7, 0x0d, 0x02, 0x05, 0x05, 0x00, 0x04, 0x10 };
 
-static gcry_md_oid_spec_t oid_spec_md5[] =
+static const gcry_md_oid_spec_t oid_spec_md5[] =
   {
     /* iso.member-body.us.rsadsi.pkcs.pkcs-1.4 (md5WithRSAEncryption) */
     { "1.2.840.113549.1.1.4" },
@@ -347,9 +312,11 @@ static gcry_md_oid_spec_t oid_spec_md5[] =
     { NULL },
   };
 
-gcry_md_spec_t _gcry_digest_spec_md5 =
+const gcry_md_spec_t _gcry_digest_spec_md5 =
   {
+    GCRY_MD_MD5, {0, 1},
     "MD5", asn, DIM (asn), oid_spec_md5, 16,
-    md5_init, md5_write, md5_final, md5_read,
+    md5_init, _gcry_md_block_write, md5_final, md5_read, NULL,
+    NULL,
     sizeof (MD5_CONTEXT)
   };
diff --git a/grub-core/lib/libgcrypt/cipher/poly1305-internal.h 
b/grub-core/lib/libgcrypt/cipher/poly1305-internal.h
new file mode 100644
index 000000000..19cee5f6f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/poly1305-internal.h
@@ -0,0 +1,64 @@
+/* poly1305-internal.h  -  Poly1305 internals
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_POLY1305_INTERNAL_H
+#define G10_POLY1305_INTERNAL_H
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+
+#define POLY1305_TAGLEN 16
+#define POLY1305_KEYLEN 32
+#define POLY1305_BLOCKSIZE 16
+
+
+typedef struct
+{
+  u32 k[4];
+  u32 r[4];
+  u32 h[5];
+} POLY1305_STATE;
+
+typedef struct poly1305_context_s
+{
+  POLY1305_STATE state;
+  byte buffer[POLY1305_BLOCKSIZE];
+  unsigned int leftover;
+} poly1305_context_t;
+
+
+gcry_err_code_t _gcry_poly1305_init (poly1305_context_t *ctx, const byte *key,
+                                    size_t keylen);
+
+void _gcry_poly1305_finish (poly1305_context_t *ctx,
+                            byte mac[POLY1305_TAGLEN]);
+
+void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *buf,
+                            size_t buflen);
+
+unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx,
+                                        const byte *m, size_t bytes);
+
+#endif /* G10_POLY1305_INTERNAL_H */
diff --git a/grub-core/lib/libgcrypt/cipher/poly1305-s390x.S 
b/grub-core/lib/libgcrypt/cipher/poly1305-s390x.S
new file mode 100644
index 000000000..28bed5600
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/poly1305-s390x.S
@@ -0,0 +1,87 @@
+/* poly1305-s390x.S  -  zSeries implementation of Poly1305
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+
+#include "asm-poly1305-s390x.h"
+
+.text
+
+.balign 8
+.globl _gcry_poly1305_s390x_blocks1
+ELF(.type _gcry_poly1305_s390x_blocks1,@function;)
+
+_gcry_poly1305_s390x_blocks1:
+       /* input:
+        *      %r2: poly1305-state
+        *      %r3: src
+        *      %r4: len
+        *      %r5: high_pad
+        */
+       CFI_STARTPROC();
+
+       stmg %r6, %r14, 6 * 8(%r15);
+
+       lgr POLY_RSTATE, %r2;
+       lgr POLY_RSRC, %r3;
+       srlg %r0, %r4, 4;
+
+       cgije %r5, 0, .Lpoly_high0;
+
+       POLY1305_LOAD_STATE();
+
+.balign 4
+.Lpoly_loop_high1:
+       POLY1305_BLOCK_PART1(0 * 16);
+       INC_POLY1305_SRC(1 * 16);
+.Lpoly_block_part2:
+       POLY1305_BLOCK_PART2();
+       POLY1305_BLOCK_PART3();
+       POLY1305_BLOCK_PART4();
+       POLY1305_BLOCK_PART5();
+       POLY1305_BLOCK_PART6();
+       POLY1305_BLOCK_PART7();
+       POLY1305_BLOCK_PART8();
+
+       brctg %r0, .Lpoly_loop_high1;
+
+.balign 4
+.Lpoly_done:
+       POLY1305_STORE_STATE();
+
+       lmg %r6, %r14, 6 * 8(%r15);
+       xgr %r2, %r2;
+       br %r14;
+
+.balign 4
+.Lpoly_high0:
+       lghi %r0, 1;
+       POLY1305_LOAD_STATE();
+       POLY1305_BLOCK_PART1_HB(0 * 16, 0);
+       j .Lpoly_block_part2;
+
+       CFI_ENDPROC();
+ELF(.size _gcry_poly1305_s390x_blocks1,
+    .-_gcry_poly1305_s390x_blocks1;)
+
+#endif /*HAVE_GCC_INLINE_ASM_S390X*/
+#endif /*__s390x__*/
diff --git a/grub-core/lib/libgcrypt/cipher/poly1305.c 
b/grub-core/lib/libgcrypt/cipher/poly1305.c
new file mode 100644
index 000000000..e57e64f33
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/poly1305.c
@@ -0,0 +1,763 @@
+/* poly1305.c  -  Poly1305 internals and generic implementation
+ * Copyright (C) 2014,2017,2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "poly1305-internal.h"
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+
+static const char *selftest (void);
+
+
+#undef HAVE_ASM_POLY1305_BLOCKS
+
+
+#undef USE_MPI_64BIT
+#undef USE_MPI_32BIT
+#if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_TYPE_U64)
+# define USE_MPI_64BIT 1
+#elif BYTES_PER_MPI_LIMB == 4
+# define USE_MPI_32BIT 1
+#else
+# error please implement for this limb size.
+#endif
+
+
+/* USE_S390X_ASM indicates whether to enable zSeries code. */
+#undef USE_S390X_ASM
+#if BYTES_PER_MPI_LIMB == 8
+# if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9
+#  if defined(HAVE_GCC_INLINE_ASM_S390X)
+#   define USE_S390X_ASM 1
+#  endif /* USE_S390X_ASM */
+# endif
+#endif
+
+
+#ifdef USE_S390X_ASM
+
+#define HAVE_ASM_POLY1305_BLOCKS 1
+
+extern unsigned int _gcry_poly1305_s390x_blocks1(void *state,
+                                                const byte *buf, size_t len,
+                                                byte high_pad);
+
+static unsigned int
+poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
+                byte high_pad)
+{
+  return _gcry_poly1305_s390x_blocks1(&ctx->state, buf, len, high_pad);
+}
+
+#endif /* USE_S390X_ASM */
+
+
+static void poly1305_init (poly1305_context_t *ctx,
+                          const byte key[POLY1305_KEYLEN])
+{
+  POLY1305_STATE *st = &ctx->state;
+
+  ctx->leftover = 0;
+
+  st->h[0] = 0;
+  st->h[1] = 0;
+  st->h[2] = 0;
+  st->h[3] = 0;
+  st->h[4] = 0;
+
+  st->r[0] = buf_get_le32(key + 0)  & 0x0fffffff;
+  st->r[1] = buf_get_le32(key + 4)  & 0x0ffffffc;
+  st->r[2] = buf_get_le32(key + 8)  & 0x0ffffffc;
+  st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc;
+
+  st->k[0] = buf_get_le32(key + 16);
+  st->k[1] = buf_get_le32(key + 20);
+  st->k[2] = buf_get_le32(key + 24);
+  st->k[3] = buf_get_le32(key + 28);
+}
+
+
+#ifdef USE_MPI_64BIT
+
+#if defined (__aarch64__) && defined(HAVE_CPU_ARCH_ARM) && __GNUC__ >= 4
+
+/* A += B (armv8/aarch64) */
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+      __asm__ ("adds %0, %3, %0\n" \
+              "adcs %1, %4, %1\n" \
+              "adc  %2, %5, %2\n" \
+              : "+r" (A0), "+r" (A1), "+r" (A2) \
+              : "r" (B0), "r" (B1), "r" (B2) \
+              : "cc" )
+
+#endif /* __aarch64__ */
+
+#if defined (__x86_64__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4
+
+/* A += B (x86-64) */
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+      __asm__ ("addq %3, %0\n" \
+              "adcq %4, %1\n" \
+              "adcq %5, %2\n" \
+              : "+r" (A0), "+r" (A1), "+r" (A2) \
+              : "g" (B0), "g" (B1), "g" (B2) \
+              : "cc" )
+
+#endif /* __x86_64__ */
+
+#if defined (__powerpc__) && defined(HAVE_CPU_ARCH_PPC) && __GNUC__ >= 4
+
+/* A += B (ppc64) */
+#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \
+      __asm__ ("addc %0, %3, %0\n" \
+              "adde %1, %4, %1\n" \
+              "adde %2, %5, %2\n" \
+              : "+r" (A0), "+r" (A1), "+r" (A2) \
+              : "r" (B0), "r" (B1), "r" (B2) \
+              : "cc" )
+
+#endif /* __powerpc__ */
+
+#ifndef ADD_1305_64
+/* A += B (generic, mpi) */
+#  define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \
+    u64 carry; \
+    add_ssaaaa(carry, A0, 0, A0, 0, B0); \
+    add_ssaaaa(A2, A1, A2, A1, B2, B1); \
+    add_ssaaaa(A2, A1, A2, A1, 0, carry); \
+  } while (0)
+#endif
+
+/* H = H * R mod 2¹³⁰-5 */
+#define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \
+    u64 x0_lo, x0_hi, x1_lo, x1_hi; \
+    u64 t0_lo, t0_hi, t1_lo, t1_hi; \
+    \
+    /* x = a * r (partial mod 2^130-5) */ \
+    umul_ppmm(x0_hi, x0_lo, H0, R0);  /* h0 * r0 */ \
+    umul_ppmm(x1_hi, x1_lo, H0, R1);  /* h0 * r1 */ \
+    \
+    umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \
+    add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \
+    umul_ppmm(t1_hi, t1_lo, H1, R0);       /* h1 * r0 */ \
+    add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \
+    \
+    t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \
+    t1_hi = H2 * R0;       /* h2 * r0 */ \
+    add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \
+    \
+    /* carry propagation */ \
+    H2 = H0 & 3; \
+    H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \
+    ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \
+  } while (0)
+
+#ifndef HAVE_ASM_POLY1305_BLOCKS
+
+static unsigned int
+poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
+                byte high_pad)
+{
+  POLY1305_STATE *st = &ctx->state;
+  u64 r0, r1, r1_mult5;
+  u64 h0, h1, h2;
+  u64 m0, m1, m2;
+
+  m2 = high_pad;
+
+  h0 = st->h[0] + ((u64)st->h[1] << 32);
+  h1 = st->h[2] + ((u64)st->h[3] << 32);
+  h2 = st->h[4];
+
+  r0 = st->r[0] + ((u64)st->r[1] << 32);
+  r1 = st->r[2] + ((u64)st->r[3] << 32);
+
+  r1_mult5 = (r1 >> 2) + r1;
+
+  m0 = buf_get_le64(buf + 0);
+  m1 = buf_get_le64(buf + 8);
+  buf += POLY1305_BLOCKSIZE;
+  len -= POLY1305_BLOCKSIZE;
+
+  while (len >= POLY1305_BLOCKSIZE)
+    {
+      /* a = h + m */
+      ADD_1305_64(h2, h1, h0, m2, m1, m0);
+
+      m0 = buf_get_le64(buf + 0);
+      m1 = buf_get_le64(buf + 8);
+
+      /* h = a * r (partial mod 2^130-5) */
+      MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5);
+
+      buf += POLY1305_BLOCKSIZE;
+      len -= POLY1305_BLOCKSIZE;
+    }
+
+  /* a = h + m */
+  ADD_1305_64(h2, h1, h0, m2, m1, m0);
+
+  /* h = a * r (partial mod 2^130-5) */
+  MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5);
+
+  st->h[0] = h0;
+  st->h[1] = h0 >> 32;
+  st->h[2] = h1;
+  st->h[3] = h1 >> 32;
+  st->h[4] = h2;
+
+  return 6 * sizeof (void *) + 18 * sizeof (u64);
+}
+
+#endif /* !HAVE_ASM_POLY1305_BLOCKS */
+
+static unsigned int poly1305_final (poly1305_context_t *ctx,
+                                   byte mac[POLY1305_TAGLEN])
+{
+  POLY1305_STATE *st = &ctx->state;
+  unsigned int burn = 0;
+  u64 u, carry;
+  u64 k0, k1;
+  u64 h0, h1;
+  u64 h2;
+
+  /* process the remaining block */
+  if (ctx->leftover)
+    {
+      ctx->buffer[ctx->leftover++] = 1;
+      if (ctx->leftover < POLY1305_BLOCKSIZE)
+       {
+         memset (&ctx->buffer[ctx->leftover], 0,
+                 POLY1305_BLOCKSIZE - ctx->leftover);
+         ctx->leftover = POLY1305_BLOCKSIZE;
+       }
+      burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0);
+    }
+
+  h0 = st->h[0] + ((u64)st->h[1] << 32);
+  h1 = st->h[2] + ((u64)st->h[3] << 32);
+  h2 = st->h[4];
+
+  k0 = st->k[0] + ((u64)st->k[1] << 32);
+  k1 = st->k[2] + ((u64)st->k[3] << 32);
+
+  /* check if h is more than 2^130-5, by adding 5. */
+  add_ssaaaa(carry, u, 0, h0, 0, 5);
+  add_ssaaaa(carry, u, 0, carry, 0, h1);
+  u = (carry + h2) >> 2; /* u == 0 or 1 */
+
+  /* minus 2^130-5 ... (+5) */
+  u = (-u) & 5;
+  add_ssaaaa(h1, h0, h1, h0, 0, u);
+
+  /* add high part of key + h */
+  add_ssaaaa(h1, h0, h1, h0, k1, k0);
+  buf_put_le64(mac + 0, h0);
+  buf_put_le64(mac + 8, h1);
+
+  /* burn_stack */
+  return 4 * sizeof (void *) + 7 * sizeof (u64) + burn;
+}
+
+#endif /* USE_MPI_64BIT */
+
+#ifdef USE_MPI_32BIT
+
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+/* HI:LO += A * B (arm) */
+#define UMUL_ADD_32(HI, LO, A, B) \
+      __asm__ ("umlal %1, %0, %4, %5" \
+              : "=r" (HI), "=r" (LO) \
+              : "0" (HI), "1" (LO), "r" (A), "r" (B) )
+
+/* A += B (arm) */
+#ifdef __GCC_ASM_FLAG_OUTPUTS__
+#  define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \
+      u32 __carry; \
+      __asm__ ("adds %0, %0, %5\n" \
+              "adcs %1, %1, %6\n" \
+              "adcs %2, %2, %7\n" \
+              "adcs %3, %3, %8\n" \
+              : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), \
+                "=@cccs" (__carry) \
+              : "r" (B0), "r" (B1), "r" (B2), "r" (B3) \
+              : ); \
+      (A4) += (B4) + __carry; \
+    } while (0)
+#else
+#  define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \
+      u32 __carry = (B0); \
+      __asm__ ("adds %0, %0, %2\n" \
+              "adcs %1, %1, %3\n" \
+              "rrx %2, %2\n" /* carry to 31th bit */ \
+              : "+r" (A0), "+r" (A1), "+r" (__carry) \
+              : "r" (B1), "r" (0) \
+              : "cc" ); \
+      __asm__ ("lsls %0, %0, #1\n" /* carry from 31th bit */ \
+              "adcs %1, %1, %4\n" \
+              "adcs %2, %2, %5\n" \
+              "adc  %3, %3, %6\n" \
+              : "+r" (__carry), "+r" (A2), "+r" (A3), "+r" (A4) \
+              : "r" (B2), "r" (B3), "r" (B4) \
+              : "cc" ); \
+    } while (0)
+#endif
+
+#endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */
+
+#if defined (__i386__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 5
+/* Note: ADD_1305_32 below does not compile on GCC-4.7 */
+
+/* A += B (i386) */
+#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \
+      __asm__ ("addl %5, %0\n" \
+              "adcl %6, %1\n" \
+              "adcl %7, %2\n" \
+              "adcl %8, %3\n" \
+              "adcl %9, %4\n" \
+              : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \
+              : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \
+              : "cc" )
+
+#endif /* __i386__ */
+
+#ifndef UMUL_ADD_32
+/* HI:LO += A * B (generic, mpi) */
+#  define UMUL_ADD_32(HI, LO, A, B) do { \
+    u32 t_lo, t_hi; \
+    umul_ppmm(t_hi, t_lo, A, B); \
+    add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \
+  } while (0)
+#endif
+
+#ifndef ADD_1305_32
+/* A += B (generic, mpi) */
+#  define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \
+    u32 carry0, carry1, carry2; \
+    add_ssaaaa(carry0, A0, 0, A0, 0, B0); \
+    add_ssaaaa(carry1, A1, 0, A1, 0, B1); \
+    add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \
+    add_ssaaaa(carry2, A2, 0, A2, 0, B2); \
+    add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \
+    add_ssaaaa(A4, A3, A4, A3, B4, B3); \
+    add_ssaaaa(A4, A3, A4, A3, 0, carry2); \
+  } while (0)
+#endif
+
+/* H = H * R mod 2¹³⁰-5 */
+#define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \
+                        R3_MULT5, R2_MULT5, R1_MULT5) do { \
+    u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \
+    u32 t0_lo, t0_hi; \
+    \
+    /* x = a * r (partial mod 2^130-5) */ \
+    umul_ppmm(x0_hi, x0_lo, H0, R0);  /* h0 * r0 */ \
+    umul_ppmm(x1_hi, x1_lo, H0, R1);  /* h0 * r1 */ \
+    umul_ppmm(x2_hi, x2_lo, H0, R2);  /* h0 * r2 */ \
+    umul_ppmm(x3_hi, x3_lo, H0, R3);  /* h0 * r3 */ \
+    \
+    UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \
+    UMUL_ADD_32(x1_hi, x1_lo, H1, R0);       /* h1 * r0 */ \
+    UMUL_ADD_32(x2_hi, x2_lo, H1, R1);       /* h1 * r1 */ \
+    UMUL_ADD_32(x3_hi, x3_lo, H1, R2);       /* h1 * r2 */ \
+    \
+    UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \
+    UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \
+    UMUL_ADD_32(x2_hi, x2_lo, H2, R0);       /* h2 * r0 */ \
+    UMUL_ADD_32(x3_hi, x3_lo, H2, R1);       /* h2 * r1 */ \
+    \
+    UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \
+    H1 = x0_hi; \
+    UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \
+    UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \
+    UMUL_ADD_32(x3_hi, x3_lo, H3, R0);       /* h3 * r0 */ \
+    \
+    t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \
+    t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \
+    add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \
+    add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \
+    t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \
+    t0_hi = H4 * R0;       /* h4 * r0 */ \
+    add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \
+    \
+    /* carry propagation */ \
+    H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \
+    H4 = H4 & 3; \
+    ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \
+  } while (0)
+
+#ifndef HAVE_ASM_POLY1305_BLOCKS
+
+static unsigned int
+poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len,
+                byte high_pad)
+{
+  POLY1305_STATE *st = &ctx->state;
+  u32 r1_mult5, r2_mult5, r3_mult5;
+  u32 h0, h1, h2, h3, h4;
+  u32 m0, m1, m2, m3, m4;
+
+  m4 = high_pad;
+
+  h0 = st->h[0];
+  h1 = st->h[1];
+  h2 = st->h[2];
+  h3 = st->h[3];
+  h4 = st->h[4];
+
+  r1_mult5 = (st->r[1] >> 2) + st->r[1];
+  r2_mult5 = (st->r[2] >> 2) + st->r[2];
+  r3_mult5 = (st->r[3] >> 2) + st->r[3];
+
+  while (len >= POLY1305_BLOCKSIZE)
+    {
+      m0 = buf_get_le32(buf + 0);
+      m1 = buf_get_le32(buf + 4);
+      m2 = buf_get_le32(buf + 8);
+      m3 = buf_get_le32(buf + 12);
+
+      /* a = h + m */
+      ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0);
+
+      /* h = a * r (partial mod 2^130-5) */
+      MUL_MOD_1305_32(h4, h3, h2, h1, h0,
+                     st->r[3], st->r[2], st->r[1], st->r[0],
+                     r3_mult5, r2_mult5, r1_mult5);
+
+      buf += POLY1305_BLOCKSIZE;
+      len -= POLY1305_BLOCKSIZE;
+    }
+
+  st->h[0] = h0;
+  st->h[1] = h1;
+  st->h[2] = h2;
+  st->h[3] = h3;
+  st->h[4] = h4;
+
+  return 6 * sizeof (void *) + 28 * sizeof (u32);
+}
+
+#endif /* !HAVE_ASM_POLY1305_BLOCKS */
+
+static unsigned int poly1305_final (poly1305_context_t *ctx,
+                                   byte mac[POLY1305_TAGLEN])
+{
+  POLY1305_STATE *st = &ctx->state;
+  unsigned int burn = 0;
+  u32 carry, tmp0, tmp1, tmp2, u;
+  u32 h4, h3, h2, h1, h0;
+
+  /* process the remaining block */
+  if (ctx->leftover)
+    {
+      ctx->buffer[ctx->leftover++] = 1;
+      if (ctx->leftover < POLY1305_BLOCKSIZE)
+       {
+         memset (&ctx->buffer[ctx->leftover], 0,
+                 POLY1305_BLOCKSIZE - ctx->leftover);
+         ctx->leftover = POLY1305_BLOCKSIZE;
+       }
+      burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0);
+    }
+
+  h0 = st->h[0];
+  h1 = st->h[1];
+  h2 = st->h[2];
+  h3 = st->h[3];
+  h4 = st->h[4];
+
+  /* check if h is more than 2^130-5, by adding 5. */
+  add_ssaaaa(carry, tmp0, 0, h0, 0, 5);
+  add_ssaaaa(carry, tmp0, 0, carry, 0, h1);
+  add_ssaaaa(carry, tmp0, 0, carry, 0, h2);
+  add_ssaaaa(carry, tmp0, 0, carry, 0, h3);
+  u = (carry + h4) >> 2; /* u == 0 or 1 */
+
+  /* minus 2^130-5 ... (+5) */
+  u = (-u) & 5;
+  add_ssaaaa(carry, h0, 0, h0, 0, u);
+  add_ssaaaa(carry, h1, 0, h1, 0, carry);
+  add_ssaaaa(carry, h2, 0, h2, 0, carry);
+  add_ssaaaa(carry, h3, 0, h3, 0, carry);
+
+  /* add high part of key + h */
+  add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]);
+  add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]);
+  add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0);
+  add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]);
+  add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1);
+  add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]);
+  h3 += tmp2;
+
+  buf_put_le32(mac + 0, h0);
+  buf_put_le32(mac + 4, h1);
+  buf_put_le32(mac + 8, h2);
+  buf_put_le32(mac + 12, h3);
+
+  /* burn_stack */
+  return 4 * sizeof (void *) + 10 * sizeof (u32) + burn;
+}
+
+#endif /* USE_MPI_32BIT */
+
+
+unsigned int
+_gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m,
+                           size_t bytes)
+{
+  unsigned int burn = 0;
+
+  /* handle leftover */
+  if (ctx->leftover)
+    {
+      size_t want = (POLY1305_BLOCKSIZE - ctx->leftover);
+      if (want > bytes)
+       want = bytes;
+      buf_cpy (ctx->buffer + ctx->leftover, m, want);
+      bytes -= want;
+      m += want;
+      ctx->leftover += want;
+      if (ctx->leftover < POLY1305_BLOCKSIZE)
+       return 0;
+      burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1);
+      ctx->leftover = 0;
+    }
+
+  /* process full blocks */
+  if (bytes >= POLY1305_BLOCKSIZE)
+    {
+      size_t nblks = bytes / POLY1305_BLOCKSIZE;
+      burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1);
+      m += nblks * POLY1305_BLOCKSIZE;
+      bytes -= nblks * POLY1305_BLOCKSIZE;
+    }
+
+  /* store leftover */
+  if (bytes)
+    {
+      buf_cpy (ctx->buffer + ctx->leftover, m, bytes);
+      ctx->leftover += bytes;
+    }
+
+  return burn;
+}
+
+
+void
+_gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes)
+{
+  unsigned int burn;
+
+  burn = _gcry_poly1305_update_burn (ctx, m, bytes);
+
+  if (burn)
+    _gcry_burn_stack (burn);
+}
+
+
+void
+_gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN])
+{
+  unsigned int burn;
+
+  burn = poly1305_final (ctx, mac);
+
+  _gcry_burn_stack (burn);
+}
+
+
+gcry_err_code_t
+_gcry_poly1305_init (poly1305_context_t * ctx, const byte * key,
+                    size_t keylen)
+{
+  static int initialized;
+  static const char *selftest_failed;
+
+  if (!initialized)
+    {
+      initialized = 1;
+      selftest_failed = selftest ();
+      if (selftest_failed)
+       log_error ("Poly1305 selftest failed (%s)\n", selftest_failed);
+    }
+
+  if (keylen != POLY1305_KEYLEN)
+    return GPG_ERR_INV_KEYLEN;
+
+  if (selftest_failed)
+    return GPG_ERR_SELFTEST_FAILED;
+
+  poly1305_init (ctx, key);
+
+  return 0;
+}
+
+
+static void
+poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes,
+              const byte * key)
+{
+  poly1305_context_t ctx;
+
+  memset (&ctx, 0, sizeof (ctx));
+
+  _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN);
+  _gcry_poly1305_update (&ctx, m, bytes);
+  _gcry_poly1305_finish (&ctx, mac);
+
+  wipememory (&ctx, sizeof (ctx));
+}
+
+
+static const char *
+selftest (void)
+{
+  /* example from nacl */
+  static const byte nacl_key[POLY1305_KEYLEN] = {
+    0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91,
+    0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25,
+    0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65,
+    0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80,
+  };
+
+  static const byte nacl_msg[131] = {
+    0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73,
+    0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce,
+    0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4,
+    0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a,
+    0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b,
+    0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72,
+    0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2,
+    0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38,
+    0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a,
+    0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae,
+    0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea,
+    0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda,
+    0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde,
+    0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3,
+    0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6,
+    0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74,
+    0xe3, 0x55, 0xa5
+  };
+
+  static const byte nacl_mac[16] = {
+    0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5,
+    0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9
+  };
+
+  /* generates a final value of (2^130 - 2) == 3 */
+  static const byte wrap_key[POLY1305_KEYLEN] = {
+    0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  };
+
+  static const byte wrap_msg[16] = {
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+  };
+
+  static const byte wrap_mac[16] = {
+    0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  };
+
+  /* mac of the macs of messages of length 0 to 256, where the key and messages
+   * have all their values set to the length
+   */
+  static const byte total_key[POLY1305_KEYLEN] = {
+    0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+    0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+  };
+
+  static const byte total_mac[16] = {
+    0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd,
+    0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39
+  };
+
+  poly1305_context_t ctx;
+  poly1305_context_t total_ctx;
+  byte all_key[POLY1305_KEYLEN];
+  byte all_msg[256];
+  byte mac[16];
+  size_t i, j;
+
+  memset (&ctx, 0, sizeof (ctx));
+  memset (&total_ctx, 0, sizeof (total_ctx));
+
+  memset (mac, 0, sizeof (mac));
+  poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key);
+  if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0)
+    return "Poly1305 test 1 failed.";
+
+  /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so
+   * make sure everything still works varying between them */
+  memset (mac, 0, sizeof (mac));
+  _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN);
+  _gcry_poly1305_update (&ctx, nacl_msg + 0, 32);
+  _gcry_poly1305_update (&ctx, nacl_msg + 32, 64);
+  _gcry_poly1305_update (&ctx, nacl_msg + 96, 16);
+  _gcry_poly1305_update (&ctx, nacl_msg + 112, 8);
+  _gcry_poly1305_update (&ctx, nacl_msg + 120, 4);
+  _gcry_poly1305_update (&ctx, nacl_msg + 124, 2);
+  _gcry_poly1305_update (&ctx, nacl_msg + 126, 1);
+  _gcry_poly1305_update (&ctx, nacl_msg + 127, 1);
+  _gcry_poly1305_update (&ctx, nacl_msg + 128, 1);
+  _gcry_poly1305_update (&ctx, nacl_msg + 129, 1);
+  _gcry_poly1305_update (&ctx, nacl_msg + 130, 1);
+  _gcry_poly1305_finish (&ctx, mac);
+  if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0)
+    return "Poly1305 test 2 failed.";
+
+  memset (mac, 0, sizeof (mac));
+  poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key);
+  if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0)
+    return "Poly1305 test 3 failed.";
+
+  _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN);
+  for (i = 0; i < 256; i++)
+    {
+      /* set key and message to 'i,i,i..' */
+      for (j = 0; j < sizeof (all_key); j++)
+       all_key[j] = i;
+      for (j = 0; j < i; j++)
+       all_msg[j] = i;
+      poly1305_auth (mac, all_msg, i, all_key);
+      _gcry_poly1305_update (&total_ctx, mac, 16);
+    }
+  _gcry_poly1305_finish (&total_ctx, mac);
+  if (memcmp (total_mac, mac, sizeof (total_mac)) != 0)
+    return "Poly1305 test 4 failed.";
+
+  return NULL;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/primegen.c 
b/grub-core/lib/libgcrypt/cipher/primegen.c
index b12e79b19..e24de4dc7 100644
--- a/grub-core/lib/libgcrypt/cipher/primegen.c
+++ b/grub-core/lib/libgcrypt/cipher/primegen.c
@@ -29,7 +29,6 @@
 #include "g10lib.h"
 #include "mpi.h"
 #include "cipher.h"
-#include "ath.h"
 
 static gcry_mpi_t gen_prime (unsigned int nbits, int secret, int randomlevel,
                              int (*extra_check)(void *, gcry_mpi_t),
@@ -141,9 +140,17 @@ struct primepool_s
 };
 struct primepool_s *primepool;
 /* Mutex used to protect access to the primepool.  */
-static ath_mutex_t primepool_lock = ATH_MUTEX_INITIALIZER;
+GPGRT_LOCK_DEFINE (primepool_lock);
 
 
+gcry_err_code_t
+_gcry_primegen_init (void)
+{
+  /* This function was formerly used to initialize the primepool
+     Mutex. This has been replace by a static initialization.  */
+  return 0;
+}
+
 
 /* Save PRIME which has been generated at RANDOMLEVEL for later
    use. Needs to be called while primepool_lock is being hold.  Note
@@ -168,7 +175,7 @@ save_pool_prime (gcry_mpi_t prime, gcry_random_level_t 
randomlevel)
         {
           if (i >= n/3*2)
             {
-              gcry_mpi_release (item2->prime);
+              _gcry_mpi_release (item2->prime);
               item2->prime = NULL;
               if (!item)
                 item = item2;
@@ -177,11 +184,11 @@ save_pool_prime (gcry_mpi_t prime, gcry_random_level_t 
randomlevel)
     }
   if (!item)
     {
-      item = gcry_calloc (1, sizeof *item);
+      item = xtrycalloc (1, sizeof *item);
       if (!item)
         {
           /* Out of memory.  Silently giving up. */
-          gcry_mpi_release (prime);
+          _gcry_mpi_release (prime);
           return;
         }
       item->next = primepool;
@@ -195,7 +202,7 @@ save_pool_prime (gcry_mpi_t prime, gcry_random_level_t 
randomlevel)
 
 /* Return a prime for the prime pool or NULL if none has been found.
    The prime needs to match NBITS and randomlevel. This function needs
-   to be called why the primepool_look is being hold. */
+   to be called with the primepool_look is being hold. */
 static gcry_mpi_t
 get_pool_prime (unsigned int nbits, gcry_random_level_t randomlevel)
 {
@@ -365,7 +372,7 @@ prime_generate_internal (int need_q_factor,
                pbits, req_qbits, qbits, fbits, n);
 
   /* Allocate an integer to old the new prime. */
-  prime = gcry_mpi_new (pbits);
+  prime = mpi_new (pbits);
 
   /* Generate first prime factor.  */
   q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL);
@@ -375,7 +382,7 @@ prime_generate_internal (int need_q_factor,
     q_factor = gen_prime (req_qbits, is_secret, randomlevel, NULL, NULL);
 
   /* Allocate an array to hold all factors + 2 for later usage.  */
-  factors = gcry_calloc (n + 2, sizeof (*factors));
+  factors = xtrycalloc (n + 2, sizeof (*factors));
   if (!factors)
     {
       err = gpg_err_code_from_errno (errno);
@@ -383,7 +390,7 @@ prime_generate_internal (int need_q_factor,
     }
 
   /* Allocate an array to track pool usage. */
-  pool_in_use = gcry_calloc (n, sizeof *pool_in_use);
+  pool_in_use = xtrymalloc (n * sizeof *pool_in_use);
   if (!pool_in_use)
     {
       err = gpg_err_code_from_errno (errno);
@@ -402,7 +409,7 @@ prime_generate_internal (int need_q_factor,
     m += 5;
   if (m < 30)
     m = 30;
-  pool = gcry_calloc (m , sizeof (*pool));
+  pool = xtrycalloc (m , sizeof (*pool));
   if (! pool)
     {
       err = gpg_err_code_from_errno (errno);
@@ -428,19 +435,18 @@ prime_generate_internal (int need_q_factor,
             }
 
           /* Init m_out_of_n().  */
-          perms = gcry_calloc (1, m);
+          perms = xtrycalloc (1, m);
           if (!perms)
             {
               err = gpg_err_code_from_errno (errno);
               goto leave;
             }
 
-          if (ath_mutex_lock (&primepool_lock))
-            {
-              err = GPG_ERR_INTERNAL;
-              goto leave;
-            }
+          err = gpgrt_lock_lock (&primepool_lock);
+          if (err)
+            goto leave;
           is_locked = 1;
+
           for (i = 0; i < n; i++)
             {
               perms[i] = 1;
@@ -459,11 +465,9 @@ prime_generate_internal (int need_q_factor,
                   pool[i] = get_pool_prime (fbits, poolrandomlevel);
                   if (!pool[i])
                     {
-                      if (ath_mutex_unlock (&primepool_lock))
-                        {
-                          err = GPG_ERR_INTERNAL;
-                          goto leave;
-                        }
+                      err = gpgrt_lock_unlock (&primepool_lock);
+                      if (err)
+                        goto leave;
                       is_locked = 0;
                     }
                 }
@@ -472,23 +476,20 @@ prime_generate_internal (int need_q_factor,
               pool_in_use[i] = i;
               factors[i] = pool[i];
             }
-          if (is_locked && ath_mutex_unlock (&primepool_lock))
-            {
-              err = GPG_ERR_INTERNAL;
-              goto leave;
-            }
+
+          if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock)))
+            goto leave;
           is_locked = 0;
         }
       else
         {
           /* Get next permutation. */
           m_out_of_n ( (char*)perms, n, m);
-          if (ath_mutex_lock (&primepool_lock))
-            {
-              err = GPG_ERR_INTERNAL;
-              goto leave;
-            }
+
+          if ((err = gpgrt_lock_lock (&primepool_lock)))
+            goto leave;
           is_locked = 1;
+
           for (i = j = 0; (i < m) && (j < n); i++)
             if (perms[i])
               {
@@ -498,11 +499,8 @@ prime_generate_internal (int need_q_factor,
                     pool[i] = get_pool_prime (fbits, poolrandomlevel);
                     if (!pool[i])
                       {
-                        if (ath_mutex_unlock (&primepool_lock))
-                          {
-                            err = GPG_ERR_INTERNAL;
-                            goto leave;
-                          }
+                        if ((err = gpgrt_lock_unlock (&primepool_lock)))
+                          goto leave;
                         is_locked = 0;
                       }
                   }
@@ -511,16 +509,15 @@ prime_generate_internal (int need_q_factor,
                 pool_in_use[j] = i;
                 factors[j++] = pool[i];
               }
-          if (is_locked && ath_mutex_unlock (&primepool_lock))
-            {
-              err = GPG_ERR_INTERNAL;
-              goto leave;
-            }
+
+          if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock)))
+            goto leave;
           is_locked = 0;
+
           if (i == n)
             {
               /* Ran out of permutations: Allocate new primes.  */
-              gcry_free (perms);
+              xfree (perms);
               perms = NULL;
               progress ('!');
               goto next_try;
@@ -575,25 +572,25 @@ prime_generate_internal (int need_q_factor,
   if (DBG_CIPHER)
     {
       progress ('\n');
-      log_mpidump ("prime    : ", prime);
-      log_mpidump ("factor  q: ", q);
+      log_mpidump ("prime    ", prime);
+      log_mpidump ("factor  q", q);
       if (need_q_factor)
-        log_mpidump ("factor q0: ", q_factor);
+        log_mpidump ("factor q0", q_factor);
       for (i = 0; i < n; i++)
-        log_mpidump ("factor pi: ", factors[i]);
+        log_mpidump ("factor pi", factors[i]);
       log_debug ("bit sizes: prime=%u, q=%u",
                  mpi_get_nbits (prime), mpi_get_nbits (q));
       if (need_q_factor)
-        log_debug (", q0=%u", mpi_get_nbits (q_factor));
+        log_printf (", q0=%u", mpi_get_nbits (q_factor));
       for (i = 0; i < n; i++)
-        log_debug (", p%d=%u", i, mpi_get_nbits (factors[i]));
-      progress('\n');
+        log_printf (", p%d=%u", i, mpi_get_nbits (factors[i]));
+      log_printf ("\n");
     }
 
   if (ret_factors)
     {
       /* Caller wants the factors.  */
-      factors_new = gcry_calloc (n + 4, sizeof (*factors_new));
+      factors_new = xtrycalloc (n + 4, sizeof (*factors_new));
       if (! factors_new)
         {
           err = gpg_err_code_from_errno (errno);
@@ -603,7 +600,7 @@ prime_generate_internal (int need_q_factor,
       if (all_factors)
         {
           i = 0;
-          factors_new[i++] = gcry_mpi_set_ui (NULL, 2);
+          factors_new[i++] = mpi_set_ui (NULL, 2);
           factors_new[i++] = mpi_copy (q);
           if (need_q_factor)
             factors_new[i++] = mpi_copy (q_factor);
@@ -625,51 +622,44 @@ prime_generate_internal (int need_q_factor,
         }
     }
 
-  if (g)
+  if (g && need_q_factor)
+    err = GPG_ERR_NOT_IMPLEMENTED;
+  else if (g)
     {
       /* Create a generator (start with 3).  */
       gcry_mpi_t tmp = mpi_alloc (mpi_get_nlimbs (prime));
       gcry_mpi_t b = mpi_alloc (mpi_get_nlimbs (prime));
       gcry_mpi_t pmin1 = mpi_alloc (mpi_get_nlimbs (prime));
 
-      if (need_q_factor)
-        err = GPG_ERR_NOT_IMPLEMENTED;
-      else
+      factors[n] = q;
+      factors[n + 1] = mpi_alloc_set_ui (2);
+      mpi_sub_ui (pmin1, prime, 1);
+      mpi_set_ui (g, 2);
+      do
         {
-          factors[n] = q;
-          factors[n + 1] = mpi_alloc_set_ui (2);
-          mpi_sub_ui (pmin1, prime, 1);
-          mpi_set_ui (g, 2);
-          do
+          mpi_add_ui (g, g, 1);
+          if (DBG_CIPHER)
+            log_printmpi ("checking g", g);
+          else
+            progress('^');
+          for (i = 0; i < n + 2; i++)
             {
-              mpi_add_ui (g, g, 1);
-              if (DBG_CIPHER)
-                {
-                  log_debug ("checking g:");
-                  gcry_mpi_dump (g);
-                  log_printf ("\n");
-                }
-              else
-                progress('^');
-              for (i = 0; i < n + 2; i++)
-                {
-                  mpi_fdiv_q (tmp, pmin1, factors[i]);
-                  /* No mpi_pow(), but it is okay to use this with mod
-                     prime.  */
-                  gcry_mpi_powm (b, g, tmp, prime);
-                  if (! mpi_cmp_ui (b, 1))
-                    break;
-                }
-              if (DBG_CIPHER)
-                progress('\n');
+              mpi_fdiv_q (tmp, pmin1, factors[i]);
+              /* No mpi_pow(), but it is okay to use this with mod
+                 prime.  */
+              mpi_powm (b, g, tmp, prime);
+              if (! mpi_cmp_ui (b, 1))
+                break;
             }
-          while (i < n + 2);
-
-          mpi_free (factors[n+1]);
-          mpi_free (tmp);
-          mpi_free (b);
-          mpi_free (pmin1);
+          if (DBG_CIPHER)
+            progress('\n');
         }
+      while (i < n + 2);
+
+      mpi_free (factors[n+1]);
+      mpi_free (tmp);
+      mpi_free (b);
+      mpi_free (pmin1);
     }
 
   if (! DBG_CIPHER)
@@ -679,7 +669,7 @@ prime_generate_internal (int need_q_factor,
  leave:
   if (pool)
     {
-      is_locked = !ath_mutex_lock (&primepool_lock);
+      is_locked = !gpgrt_lock_lock (&primepool_lock);
       for(i = 0; i < m; i++)
         {
           if (pool[i])
@@ -696,16 +686,16 @@ prime_generate_internal (int need_q_factor,
                 mpi_free (pool[i]);
             }
         }
-      if (is_locked && ath_mutex_unlock (&primepool_lock))
-        err = GPG_ERR_INTERNAL;
+      if (is_locked)
+        err = gpgrt_lock_unlock (&primepool_lock);
       is_locked = 0;
-      gcry_free (pool);
+      xfree (pool);
     }
-  gcry_free (pool_in_use);
+  xfree (pool_in_use);
   if (factors)
-    gcry_free (factors);  /* Factors are shallow copies.  */
+    xfree (factors);  /* Factors are shallow copies.  */
   if (perms)
-    gcry_free (perms);
+    xfree (perms);
 
   mpi_free (val_2);
   mpi_free (q);
@@ -723,7 +713,7 @@ prime_generate_internal (int need_q_factor,
        {
          for (i = 0; factors_new[i]; i++)
            mpi_free (factors_new[i]);
-         gcry_free (factors_new);
+         xfree (factors_new);
        }
       mpi_free (prime);
     }
@@ -733,19 +723,22 @@ prime_generate_internal (int need_q_factor,
 
 
 /* Generate a prime used for discrete logarithm algorithms; i.e. this
-   prime will be public and no strong random is required.  */
-gcry_mpi_t
+   prime will be public and no strong random is required.  On success
+   R_PRIME receives a new MPI with the prime.  On error R_PRIME is set
+   to NULL and an error code is returned.  If RET_FACTORS is not NULL
+   it is set to an allocated array of factors on success or to NULL on
+   error.  */
+gcry_err_code_t
 _gcry_generate_elg_prime (int mode, unsigned pbits, unsigned qbits,
-                         gcry_mpi_t g, gcry_mpi_t **ret_factors)
+                         gcry_mpi_t g,
+                          gcry_mpi_t *r_prime, gcry_mpi_t **ret_factors)
 {
-  gcry_mpi_t prime = NULL;
-
-  if (prime_generate_internal ((mode == 1), &prime, pbits, qbits, g,
-                               ret_factors, GCRY_WEAK_RANDOM, 0, 0,
-                               NULL, NULL))
-    prime = NULL; /* (Should be NULL in the error case anyway.)  */
-
-  return prime;
+  *r_prime = NULL;
+  if (ret_factors)
+    *ret_factors = NULL;
+  return prime_generate_internal ((mode == 1), r_prime, pbits, qbits, g,
+                                  ret_factors, GCRY_WEAK_RANDOM, 0, 0,
+                                  NULL, NULL);
 }
 
 
@@ -765,11 +758,12 @@ gen_prime (unsigned int nbits, int secret, int 
randomlevel,
   if (nbits < 16)
     log_fatal ("can't generate a prime with less than %d bits\n", 16);
 
-  mods = gcry_xcalloc( no_of_small_prime_numbers, sizeof *mods);
+  mods = (secret? xmalloc_secure (no_of_small_prime_numbers * sizeof *mods)
+          /* */ : xmalloc (no_of_small_prime_numbers * sizeof *mods));
   /* Make nbits fit into gcry_mpi_t implementation. */
   val_2  = mpi_alloc_set_ui( 2 );
   val_3 = mpi_alloc_set_ui( 3);
-  prime  = secret? gcry_mpi_snew ( nbits ): gcry_mpi_new ( nbits );
+  prime  = secret? mpi_snew (nbits): mpi_new (nbits);
   result = mpi_alloc_like( prime );
   pminus1= mpi_alloc_like( prime );
   ptest  = mpi_alloc_like( prime );
@@ -779,7 +773,7 @@ gen_prime (unsigned int nbits, int secret, int randomlevel,
       int dotcount=0;
 
       /* generate a random number */
-      gcry_mpi_randomize( prime, nbits, randomlevel );
+      _gcry_mpi_randomize( prime, nbits, randomlevel );
 
       /* Set high order bit to 1, set low order bit to 1.  If we are
          generating a secret prime we are most probably doing that
@@ -814,7 +808,7 @@ gen_prime (unsigned int nbits, int secret, int randomlevel,
           /* Do a fast Fermat test now. */
           count2++;
           mpi_sub_ui( pminus1, ptest, 1);
-          gcry_mpi_powm( result, val_2, pminus1, ptest );
+          mpi_powm( result, val_2, pminus1, ptest );
           if ( !mpi_cmp_ui( result, 1 ) )
             {
               /* Not composite, perform stronger tests */
@@ -841,7 +835,7 @@ gen_prime (unsigned int nbits, int secret, int randomlevel,
                       mpi_free(result);
                       mpi_free(pminus1);
                       mpi_free(prime);
-                      gcry_free(mods);
+                      xfree(mods);
                       return ptest;
                     }
                 }
@@ -872,7 +866,7 @@ check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int 
rm_rounds,
   for (i=0; (x = small_prime_numbers[i]); i++ )
     {
       if ( mpi_divisible_ui( prime, x ) )
-        return 0;
+        return !mpi_cmp_ui (prime, x);
     }
 
   /* A quick Fermat test. */
@@ -880,7 +874,7 @@ check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int 
rm_rounds,
     gcry_mpi_t result = mpi_alloc_like( prime );
     gcry_mpi_t pminus1 = mpi_alloc_like( prime );
     mpi_sub_ui( pminus1, prime, 1);
-    gcry_mpi_powm( result, val_2, pminus1, prime );
+    mpi_powm( result, val_2, pminus1, prime );
     mpi_free( pminus1 );
     if ( mpi_cmp_ui( result, 1 ) )
       {
@@ -942,27 +936,32 @@ is_prime (gcry_mpi_t n, int steps, unsigned int *count)
         }
       else
         {
-          gcry_mpi_randomize( x, nbits, GCRY_WEAK_RANDOM );
-
-          /* Make sure that the number is smaller than the prime and
-             keep the randomness of the high bit. */
-          if ( mpi_test_bit ( x, nbits-2) )
-            {
-              mpi_set_highbit ( x, nbits-2); /* Clear all higher bits. */
-            }
-          else
+          /* We need to loop to avoid an X with value 0 or 1.  */
+          do
             {
-              mpi_set_highbit( x, nbits-2 );
-              mpi_clear_bit( x, nbits-2 );
+              _gcry_mpi_randomize (x, nbits, GCRY_WEAK_RANDOM);
+
+              /* Make sure that the number is smaller than the prime
+               * and keep the randomness of the high bit. */
+              if (mpi_test_bit (x, nbits-2))
+                {
+                  mpi_set_highbit (x, nbits-2); /* Clear all higher bits. */
+                }
+              else
+                {
+                  mpi_set_highbit (x, nbits-2);
+                  mpi_clear_bit (x, nbits-2);
+                }
             }
-          gcry_assert (mpi_cmp (x, nminus1) < 0 && mpi_cmp_ui (x, 1) > 0);
+          while (mpi_cmp_ui (x, 1) <= 0);
+          gcry_assert (mpi_cmp (x, nminus1) < 0);
        }
-      gcry_mpi_powm ( y, x, q, n);
+      mpi_powm ( y, x, q, n);
       if ( mpi_cmp_ui(y, 1) && mpi_cmp( y, nminus1 ) )
         {
           for ( j=1; j < k && mpi_cmp( y, nminus1 ); j++ )
             {
-              gcry_mpi_powm(y, y, a2, n);
+              mpi_powm(y, y, a2, n);
               if( !mpi_cmp_ui( y, 1 ) )
                 goto leave; /* Not a prime. */
             }
@@ -1114,105 +1113,129 @@ m_out_of_n ( char *array, int m, int n )
    non-zero, allocate a new, NULL-terminated array holding the prime
    factors and store it in FACTORS.  FLAGS might be used to influence
    the prime number generation process.  */
-gcry_error_t
-gcry_prime_generate (gcry_mpi_t *prime, unsigned int prime_bits,
-                    unsigned int factor_bits, gcry_mpi_t **factors,
-                    gcry_prime_check_func_t cb_func, void *cb_arg,
-                    gcry_random_level_t random_level,
-                    unsigned int flags)
+gcry_err_code_t
+_gcry_prime_generate (gcry_mpi_t *prime, unsigned int prime_bits,
+                      unsigned int factor_bits, gcry_mpi_t **factors,
+                      gcry_prime_check_func_t cb_func, void *cb_arg,
+                      gcry_random_level_t random_level,
+                      unsigned int flags)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc = 0;
   gcry_mpi_t *factors_generated = NULL;
   gcry_mpi_t prime_generated = NULL;
   unsigned int mode = 0;
 
   if (!prime)
-    return gpg_error (GPG_ERR_INV_ARG);
+    return GPG_ERR_INV_ARG;
   *prime = NULL;
 
   if (flags & GCRY_PRIME_FLAG_SPECIAL_FACTOR)
     mode = 1;
 
   /* Generate.  */
-  err = prime_generate_internal ((mode==1), &prime_generated, prime_bits,
-                                factor_bits, NULL,
-                                 factors? &factors_generated : NULL,
-                                random_level, flags, 1,
-                                 cb_func, cb_arg);
+  rc = prime_generate_internal ((mode==1), &prime_generated, prime_bits,
+                                factor_bits, NULL,
+                                factors? &factors_generated : NULL,
+                                random_level, flags, 1,
+                                cb_func, cb_arg);
 
-  if (! err)
-    if (cb_func)
-      {
-       /* Additional check. */
-       if ( !cb_func (cb_arg, GCRY_PRIME_CHECK_AT_FINISH, prime_generated))
-         {
-           /* Failed, deallocate resources.  */
-           unsigned int i;
+  if (!rc && cb_func)
+    {
+      /* Additional check. */
+      if ( !cb_func (cb_arg, GCRY_PRIME_CHECK_AT_FINISH, prime_generated))
+        {
+          /* Failed, deallocate resources.  */
+          unsigned int i;
 
-           mpi_free (prime_generated);
-            if (factors)
-              {
-                for (i = 0; factors_generated[i]; i++)
-                  mpi_free (factors_generated[i]);
-                gcry_free (factors_generated);
-              }
-           err = GPG_ERR_GENERAL;
-         }
-      }
+          mpi_free (prime_generated);
+          if (factors)
+            {
+              for (i = 0; factors_generated[i]; i++)
+                mpi_free (factors_generated[i]);
+              xfree (factors_generated);
+            }
+          rc = GPG_ERR_GENERAL;
+        }
+    }
 
-  if (! err)
+  if (!rc)
     {
       if (factors)
         *factors = factors_generated;
       *prime = prime_generated;
     }
 
-  return gcry_error (err);
+  return rc;
 }
 
 /* Check whether the number X is prime.  */
-gcry_error_t
-gcry_prime_check (gcry_mpi_t x, unsigned int flags)
+gcry_err_code_t
+_gcry_prime_check (gcry_mpi_t x, unsigned int flags)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  gcry_mpi_t val_2 = mpi_alloc_set_ui (2); /* Used by the Fermat test. */
-
   (void)flags;
 
+  switch (mpi_cmp_ui (x, 2))
+    {
+    case 0:  return 0;                /* 2 is a prime */
+    case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes.  */
+    }
+
   /* We use 64 rounds because the prime we are going to test is not
      guaranteed to be a random one. */
-  if (! check_prime (x, val_2, 64, NULL, NULL))
-    err = GPG_ERR_NO_PRIME;
+  if (check_prime (x, mpi_const (MPI_C_TWO), 64, NULL, NULL))
+    return 0;
 
-  mpi_free (val_2);
+  return GPG_ERR_NO_PRIME;
+}
 
-  return gcry_error (err);
+
+/* Check whether the number X is prime according to FIPS 186-4 table C.2.  */
+gcry_err_code_t
+_gcry_fips186_4_prime_check (gcry_mpi_t x, unsigned int bits)
+{
+  gcry_err_code_t ec = GPG_ERR_NO_ERROR;
+
+  switch (mpi_cmp_ui (x, 2))
+    {
+    case 0:  return ec;               /* 2 is a prime */
+    case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes.  */
+    }
+
+  /* We use 5 or 4 rounds as specified in table C.2 */
+  if (! check_prime (x, mpi_const (MPI_C_TWO), bits > 1024 ? 4 : 5, NULL, 
NULL))
+    ec = GPG_ERR_NO_PRIME;
+
+  return ec;
 }
 
+
 /* Find a generator for PRIME where the factorization of (prime-1) is
    in the NULL terminated array FACTORS. Return the generator as a
    newly allocated MPI in R_G.  If START_G is not NULL, use this as s
    atart for the search. Returns 0 on success.*/
-gcry_error_t
-gcry_prime_group_generator (gcry_mpi_t *r_g,
-                            gcry_mpi_t prime, gcry_mpi_t *factors,
-                            gcry_mpi_t start_g)
+gcry_err_code_t
+_gcry_prime_group_generator (gcry_mpi_t *r_g,
+                             gcry_mpi_t prime, gcry_mpi_t *factors,
+                             gcry_mpi_t start_g)
 {
-  gcry_mpi_t tmp = gcry_mpi_new (0);
-  gcry_mpi_t b = gcry_mpi_new (0);
-  gcry_mpi_t pmin1 = gcry_mpi_new (0);
-  gcry_mpi_t g = start_g? gcry_mpi_copy (start_g) : gcry_mpi_set_ui (NULL, 3);
-  int first = 1;
-  int i, n;
-
-  if (!factors || !r_g || !prime)
-    return gpg_error (GPG_ERR_INV_ARG);
+  gcry_mpi_t tmp, b, pmin1, g;
+  int first, i, n;
+
+  if (!r_g)
+    return GPG_ERR_INV_ARG;
   *r_g = NULL;
+  if (!factors || !prime)
+    return GPG_ERR_INV_ARG;
 
   for (n=0; factors[n]; n++)
     ;
   if (n < 2)
-    return gpg_error (GPG_ERR_INV_ARG);
+    return GPG_ERR_INV_ARG;
+
+  tmp   = mpi_new (0);
+  b     = mpi_new (0);
+  pmin1 = mpi_new (0);
+  g     = start_g? mpi_copy (start_g) : mpi_set_ui (NULL, 3);
 
   /* Extra sanity check - usually disabled. */
 /*   mpi_set (tmp, factors[0]); */
@@ -1222,27 +1245,24 @@ gcry_prime_group_generator (gcry_mpi_t *r_g,
 /*   if (mpi_cmp (prime, tmp)) */
 /*     return gpg_error (GPG_ERR_INV_ARG); */
 
-  gcry_mpi_sub_ui (pmin1, prime, 1);
+  mpi_sub_ui (pmin1, prime, 1);
+  first = 1;
   do
     {
       if (first)
         first = 0;
       else
-        gcry_mpi_add_ui (g, g, 1);
+        mpi_add_ui (g, g, 1);
 
       if (DBG_CIPHER)
-        {
-          log_debug ("checking g:");
-          gcry_mpi_dump (g);
-          log_debug ("\n");
-        }
+        log_printmpi ("checking g", g);
       else
         progress('^');
 
       for (i = 0; i < n; i++)
         {
           mpi_fdiv_q (tmp, pmin1, factors[i]);
-          gcry_mpi_powm (b, g, tmp, prime);
+          mpi_powm (b, g, tmp, prime);
           if (! mpi_cmp_ui (b, 1))
             break;
         }
@@ -1251,9 +1271,9 @@ gcry_prime_group_generator (gcry_mpi_t *r_g,
     }
   while (i < n);
 
-  gcry_mpi_release (tmp);
-  gcry_mpi_release (b);
-  gcry_mpi_release (pmin1);
+  _gcry_mpi_release (tmp);
+  _gcry_mpi_release (b);
+  _gcry_mpi_release (pmin1);
   *r_g = g;
 
   return 0;
@@ -1261,7 +1281,7 @@ gcry_prime_group_generator (gcry_mpi_t *r_g,
 
 /* Convenience function to release the factors array. */
 void
-gcry_prime_release_factors (gcry_mpi_t *factors)
+_gcry_prime_release_factors (gcry_mpi_t *factors)
 {
   if (factors)
     {
@@ -1269,7 +1289,7 @@ gcry_prime_release_factors (gcry_mpi_t *factors)
 
       for (i=0; factors[i]; i++)
         mpi_free (factors[i]);
-      gcry_free (factors);
+      xfree (factors);
     }
 }
 
@@ -1282,12 +1302,12 @@ find_x931_prime (const gcry_mpi_t pfirst)
   gcry_mpi_t val_2 = mpi_alloc_set_ui (2);
   gcry_mpi_t prime;
 
-  prime = gcry_mpi_copy (pfirst);
+  prime = mpi_copy (pfirst);
   /* If P is even add 1.  */
   mpi_set_bit (prime, 0);
 
   /* We use 64 Rabin-Miller rounds which is better and thus
-     sufficient.  We do not have a Lucas test implementaion thus we
+     sufficient.  We do not have a Lucas test implementation thus we
      can't do it in the X9.31 preferred way of running a few
      Rabin-Miller followed by one Lucas test.  */
   while ( !check_prime (prime, val_2, 64, NULL, NULL) )
@@ -1340,7 +1360,7 @@ _gcry_derive_x931_prime (const gcry_mpi_t xp,
     mpi_sub (r1, r1, tmp);
 
     /* Fixup a negative value.  */
-    if (mpi_is_neg (r1))
+    if (mpi_has_sign (r1))
       mpi_add (r1, r1, p1p2);
 
     /* yp0 = xp + (r1 - xp mod p1*p2)  */
@@ -1386,7 +1406,7 @@ _gcry_derive_x931_prime (const gcry_mpi_t xp,
     mpi_sub_ui (yp0, yp0, 1);   /* Ditto.  */
     for (;;)
       {
-        gcdres = gcry_mpi_gcd (gcdtmp, e, yp0);
+        gcdres = mpi_gcd (gcdtmp, e, yp0);
         mpi_add_ui (yp0, yp0, 1);
         if (!gcdres)
           progress ('/');  /* gcd (e, yp0-1) != 1  */
@@ -1455,7 +1475,7 @@ _gcry_generate_fips186_2_prime (unsigned int pbits, 
unsigned int qbits,
     return GPG_ERR_INV_ARG;
 
   /* Allocate a buffer to later compute SEED+some_increment. */
-  seed_plus = gcry_malloc (seedlen < 20? 20:seedlen);
+  seed_plus = xtrymalloc (seedlen < 20? 20:seedlen);
   if (!seed_plus)
     {
       ec = gpg_err_code_from_syserror ();
@@ -1465,8 +1485,8 @@ _gcry_generate_fips186_2_prime (unsigned int pbits, 
unsigned int qbits,
   val_2   = mpi_alloc_set_ui (2);
   value_n = (pbits - 1) / qbits;
   value_b = (pbits - 1) - value_n * qbits;
-  value_w = gcry_mpi_new (pbits);
-  value_x = gcry_mpi_new (pbits);
+  value_w = mpi_new (pbits);
+  value_x = mpi_new (pbits);
 
  restart:
   /* Generate Q.  */
@@ -1476,7 +1496,7 @@ _gcry_generate_fips186_2_prime (unsigned int pbits, 
unsigned int qbits,
       if (!seed)
         {
           seedlen = sizeof seed_help_buffer;
-          gcry_create_nonce (seed_help_buffer, seedlen);
+          _gcry_create_nonce (seed_help_buffer, seedlen);
           seed = seed_help_buffer;
         }
 
@@ -1488,15 +1508,15 @@ _gcry_generate_fips186_2_prime (unsigned int pbits, 
unsigned int qbits,
           if (seed_plus[i])
             break;
         }
-      gcry_md_hash_buffer (GCRY_MD_SHA1, value_u, seed, seedlen);
-      gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
+      _gcry_md_hash_buffer (GCRY_MD_SHA1, value_u, seed, seedlen);
+      _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
       for (i=0; i < sizeof value_u; i++)
         value_u[i] ^= digest[i];
 
       /* Step 3:  Form q from U  */
-      gcry_mpi_release (prime_q); prime_q = NULL;
-      ec = gpg_err_code (gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
-                                        value_u, sizeof value_u, NULL));
+      _gcry_mpi_release (prime_q); prime_q = NULL;
+      ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
+                           value_u, sizeof value_u, NULL);
       if (ec)
         goto leave;
       mpi_set_highbit (prime_q, qbits-1 );
@@ -1515,7 +1535,7 @@ _gcry_generate_fips186_2_prime (unsigned int pbits, 
unsigned int qbits,
   counter = 0;
 
   /* Generate P. */
-  prime_p = gcry_mpi_new (pbits);
+  prime_p = mpi_new (pbits);
   for (;;)
     {
       /* Step 7: For k = 0,...n let
@@ -1541,11 +1561,11 @@ _gcry_generate_fips186_2_prime (unsigned int pbits, 
unsigned int qbits,
               if (seed_plus[i])
                 break;
             }
-          gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
+          _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
 
-          gcry_mpi_release (tmpval); tmpval = NULL;
-          ec = gpg_err_code (gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
-                                            digest, sizeof digest, NULL));
+          _gcry_mpi_release (tmpval); tmpval = NULL;
+          ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
+                               digest, sizeof digest, NULL);
           if (ec)
             goto leave;
           if (value_k == value_n)
@@ -1607,35 +1627,33 @@ _gcry_generate_fips186_2_prime (unsigned int pbits, 
unsigned int qbits,
 
 
  leave:
-  gcry_mpi_release (tmpval);
-  gcry_mpi_release (value_x);
-  gcry_mpi_release (value_w);
-  gcry_mpi_release (prime_p);
-  gcry_mpi_release (prime_q);
-  gcry_free (seed_plus);
-  gcry_mpi_release (val_2);
+  _gcry_mpi_release (tmpval);
+  _gcry_mpi_release (value_x);
+  _gcry_mpi_release (value_w);
+  _gcry_mpi_release (prime_p);
+  _gcry_mpi_release (prime_q);
+  xfree (seed_plus);
+  _gcry_mpi_release (val_2);
   return ec;
 }
 
 
 
-/* WARNING: The code below has not yet been tested!  However, it is
-   not yet used.  We need to wait for FIPS 186-3 final and for test
-   vectors.
-
-   Generate the two prime used for DSA using the algorithm specified
-   in FIPS 186-3, A.1.1.2.  PBITS is the desired length of the prime P
-   and a QBITS the length of the prime Q.  If SEED is not supplied and
-   SEEDLEN is 0 the function generates an appropriate SEED.  On
-   success the generated primes are stored at R_Q and R_P, the counter
-   value is stored at R_COUNTER and the seed actually used for
-   generation is stored at R_SEED and R_SEEDVALUE.  The hash algorithm
-   used is stored at R_HASHALGO.
-
-   Note that this function is very similar to the fips186_2 code.  Due
-   to the minor differences, other buffer sizes and for documentarion,
-   we use a separate function.
-*/
+/* WARNING: The code below has not yet been tested!
+ *
+ * Generate the two prime used for DSA using the algorithm specified
+ * in FIPS 186-3, A.1.1.2.  PBITS is the desired length of the prime P
+ * and a QBITS the length of the prime Q.  If SEED is not supplied and
+ * SEEDLEN is 0 the function generates an appropriate SEED.  On
+ * success the generated primes are stored at R_Q and R_P, the counter
+ * value is stored at R_COUNTER and the seed actually used for
+ * generation is stored at R_SEED and R_SEEDVALUE.  The hash algorithm
+ * used is stored at R_HASHALGO.
+ *
+ * Note that this function is very similar to the fips186_2 code.  Due
+ * to the minor differences, other buffer sizes and for documentarion,
+ * we use a separate function.
+ */
 gpg_err_code_t
 _gcry_generate_fips186_3_prime (unsigned int pbits, unsigned int qbits,
                                 const void *seed, size_t seedlen,
@@ -1647,7 +1665,7 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
   gpg_err_code_t ec;
   unsigned char seed_help_buffer[256/8];  /* Used to hold a generated SEED. */
   unsigned char *seed_plus;     /* Malloced buffer to hold SEED+x.  */
-  unsigned char digest[256/8];  /* Helper buffer for SHA-1 digest.  */
+  unsigned char digest[256/8];  /* Helper buffer for SHA-2 digest.  */
   gcry_mpi_t val_2 = NULL;      /* Helper for the prime test.  */
   gcry_mpi_t tmpval = NULL;     /* Helper variable.  */
   int hashalgo;                 /* The id of the Approved Hash Function.  */
@@ -1666,9 +1684,7 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
 
   /* Step 1:  Check the requested prime lengths.  */
   /* Note that due to the size of our buffers QBITS is limited to 256.  */
-  if (pbits == 1024 && qbits == 160)
-    hashalgo = GCRY_MD_SHA1;
-  else if (pbits == 2048 && qbits == 224)
+  if (pbits == 2048 && qbits == 224)
     hashalgo = GCRY_MD_SHA224;
   else if (pbits == 2048 && qbits == 256)
     hashalgo = GCRY_MD_SHA256;
@@ -1678,11 +1694,11 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
     return GPG_ERR_INV_KEYLEN;
 
   /* Also check that the hash algorithm is available.  */
-  ec = gpg_err_code (gcry_md_test_algo (hashalgo));
+  ec = _gcry_md_test_algo (hashalgo);
   if (ec)
     return ec;
   gcry_assert (qbits/8 <= sizeof digest);
-  gcry_assert (gcry_md_get_algo_dlen (hashalgo) == qbits/8);
+  gcry_assert (_gcry_md_get_algo_dlen (hashalgo) == qbits/8);
 
 
   /* Step 2:  Check seedlen.  */
@@ -1693,16 +1709,16 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
 
   /* Allocate a buffer to later compute SEED+some_increment and a few
      helper variables.  */
-  seed_plus = gcry_malloc (seedlen < sizeof seed_help_buffer?
-                           sizeof seed_help_buffer : seedlen);
+  seed_plus = xtrymalloc (seedlen < sizeof seed_help_buffer?
+                          sizeof seed_help_buffer : seedlen);
   if (!seed_plus)
     {
       ec = gpg_err_code_from_syserror ();
       goto leave;
     }
   val_2   = mpi_alloc_set_ui (2);
-  value_w = gcry_mpi_new (pbits);
-  value_x = gcry_mpi_new (pbits);
+  value_w = mpi_new (pbits);
+  value_x = mpi_new (pbits);
 
   /* Step 3: n = \lceil L / outlen \rceil - 1  */
   value_n = (pbits + qbits - 1) / qbits - 1;
@@ -1718,12 +1734,12 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
         {
           seedlen = qbits/8;
           gcry_assert (seedlen <= sizeof seed_help_buffer);
-          gcry_create_nonce (seed_help_buffer, seedlen);
+          _gcry_create_nonce (seed_help_buffer, seedlen);
           seed = seed_help_buffer;
         }
 
       /* Step 6:  U = hash(seed)  */
-      gcry_md_hash_buffer (hashalgo, value_u, seed, seedlen);
+      _gcry_md_hash_buffer (hashalgo, value_u, seed, seedlen);
 
       /* Step 7:  q = 2^{N-1} + U + 1 - (U mod 2)  */
       if ( !(value_u[qbits/8-1] & 0x01) )
@@ -1735,9 +1751,9 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
                 break;
             }
         }
-      gcry_mpi_release (prime_q); prime_q = NULL;
-      ec = gpg_err_code (gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
-                                        value_u, sizeof value_u, NULL));
+      _gcry_mpi_release (prime_q); prime_q = NULL;
+      ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG,
+                           value_u, qbits/8, NULL);
       if (ec)
         goto leave;
       mpi_set_highbit (prime_q, qbits-1 );
@@ -1758,7 +1774,7 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
   counter = 0;
 
   /* Generate P. */
-  prime_p = gcry_mpi_new (pbits);
+  prime_p = mpi_new (pbits);
   for (;;)
     {
       /* Step 11.1: For j = 0,...n let
@@ -1782,11 +1798,11 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
               if (seed_plus[i])
                 break;
             }
-          gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen);
+          _gcry_md_hash_buffer (hashalgo, digest, seed_plus, seedlen);
 
-          gcry_mpi_release (tmpval); tmpval = NULL;
-          ec = gpg_err_code (gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
-                                            digest, sizeof digest, NULL));
+          _gcry_mpi_release (tmpval); tmpval = NULL;
+          ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG,
+                               digest, qbits/8, NULL);
           if (ec)
             goto leave;
           if (value_j == value_n)
@@ -1822,11 +1838,12 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
     }
 
   /* Step 12:  Save p, q, counter and seed.  */
-  log_debug ("fips186-3 pbits p=%u q=%u counter=%d\n",
-             mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter);
-  log_printhex("fips186-3 seed:", seed, seedlen);
-  log_mpidump ("fips186-3 prime p", prime_p);
-  log_mpidump ("fips186-3 prime q", prime_q);
+  /* log_debug ("fips186-3 pbits p=%u q=%u counter=%d\n", */
+  /*            mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */
+  /* log_printhex ("fips186-3 seed", seed, seedlen); */
+  /* log_printmpi ("fips186-3    p", prime_p); */
+  /* log_printmpi ("fips186-3    q", prime_q); */
+
   if (r_q)
     {
       *r_q = prime_q;
@@ -1850,12 +1867,12 @@ _gcry_generate_fips186_3_prime (unsigned int pbits, 
unsigned int qbits,
     *r_hashalgo = hashalgo;
 
  leave:
-  gcry_mpi_release (tmpval);
-  gcry_mpi_release (value_x);
-  gcry_mpi_release (value_w);
-  gcry_mpi_release (prime_p);
-  gcry_mpi_release (prime_q);
-  gcry_free (seed_plus);
-  gcry_mpi_release (val_2);
+  _gcry_mpi_release (tmpval);
+  _gcry_mpi_release (value_x);
+  _gcry_mpi_release (value_w);
+  _gcry_mpi_release (prime_p);
+  _gcry_mpi_release (prime_q);
+  xfree (seed_plus);
+  _gcry_mpi_release (val_2);
   return ec;
 }
diff --git a/grub-core/lib/libgcrypt/cipher/pubkey-internal.h 
b/grub-core/lib/libgcrypt/cipher/pubkey-internal.h
new file mode 100644
index 000000000..0ca770991
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/pubkey-internal.h
@@ -0,0 +1,107 @@
+/* pubkey-internal.h  - Internal defs for pubkey.c
+ * Copyright (C) 2013 g10 code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_PUBKEY_INTERNAL_H
+#define GCRY_PUBKEY_INTERNAL_H
+
+/*-- pubkey-util.c --*/
+gpg_err_code_t _gcry_pk_util_parse_flaglist (gcry_sexp_t list,
+                                             int *r_flags,
+                                             enum pk_encoding *r_encoding);
+gpg_err_code_t _gcry_pk_util_get_nbits (gcry_sexp_t list,
+                                        unsigned int *r_nbits);
+gpg_err_code_t _gcry_pk_util_get_rsa_use_e (gcry_sexp_t list,
+                                            unsigned long *r_e);
+gpg_err_code_t _gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig,
+                                              const char **algo_names,
+                                              gcry_sexp_t *r_parms,
+                                              int *r_eccflags);
+gpg_err_code_t _gcry_pk_util_preparse_encval (gcry_sexp_t sexp,
+                                              const char **algo_names,
+                                              gcry_sexp_t *r_parms,
+                                              struct pk_encoding_ctx *ctx);
+void _gcry_pk_util_init_encoding_ctx (struct pk_encoding_ctx *ctx,
+                                      enum pk_operation op,
+                                      unsigned int nbits);
+void _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx);
+gcry_err_code_t _gcry_pk_util_data_to_mpi (gcry_sexp_t input,
+                                           gcry_mpi_t *ret_mpi,
+                                           struct pk_encoding_ctx *ctx);
+
+
+
+/*-- rsa-common.c --*/
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_enc (gcry_mpi_t *r_result, unsigned int nbits,
+                                const unsigned char *value, size_t valuelen,
+                                const unsigned char *random_override,
+                                size_t random_override_len);
+gpg_err_code_t
+_gcry_rsa_pkcs1_decode_for_enc (unsigned char **r_result, size_t *r_resultlen,
+                                unsigned int nbits, gcry_mpi_t value);
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_raw_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+                                const unsigned char *value, size_t valuelen);
+
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+                                const unsigned char *value, size_t valuelen,
+                                int algo);
+gpg_err_code_t
+_gcry_rsa_oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+                       const unsigned char *value, size_t valuelen,
+                       const unsigned char *label, size_t labellen,
+                       const void *random_override, size_t 
random_override_len);
+gpg_err_code_t
+_gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen,
+                       unsigned int nbits, int algo,
+                       gcry_mpi_t value,
+                       const unsigned char *label, size_t labellen);
+gpg_err_code_t
+_gcry_rsa_pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+                      int hashed_already, int saltlen,
+                      const unsigned char *value, size_t valuelen,
+                      const void *random_override);
+gpg_err_code_t
+_gcry_rsa_pss_verify (gcry_mpi_t value, int hashed_already, gcry_mpi_t encoded,
+                      unsigned int nbits, int algo, size_t saltlen);
+
+
+
+/*-- dsa-common.c --*/
+void _gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits);
+gcry_mpi_t _gcry_dsa_gen_k (gcry_mpi_t q, int security_level);
+gpg_err_code_t _gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k,
+                                        gcry_mpi_t dsa_q, gcry_mpi_t dsa_x,
+                                        const unsigned char *h1,
+                                        unsigned int h1len,
+                                        int halgo,
+                                        unsigned int extraloops);
+gpg_err_code_t _gcry_dsa_compute_hash (gcry_mpi_t *r_hash, gcry_mpi_t input,
+                                       int hashalgo);
+gpg_err_code_t _gcry_dsa_normalize_hash (gcry_mpi_t input,
+                                         gcry_mpi_t *out,
+                                         unsigned int qbits);
+
+/*-- ecc.c --*/
+gpg_err_code_t _gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode,
+                                      mpi_ec_t ec);
+
+
+#endif /*GCRY_PUBKEY_INTERNAL_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/pubkey-util.c 
b/grub-core/lib/libgcrypt/cipher/pubkey-util.c
new file mode 100644
index 000000000..244dd5d40
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/pubkey-util.c
@@ -0,0 +1,1369 @@
+/* pubkey-util.c - Supporting functions for all pubkey modules.
+ * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005,
+ *               2007, 2008, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2015 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+
+
+/* Callback for the pubkey algorithm code to verify PSS signatures.
+   OPAQUE is the data provided by the actual caller.  The meaning of
+   TMP depends on the actual algorithm (but there is only RSA); now
+   for RSA it is the output of running the public key function on the
+   input.  */
+static int
+pss_verify_cmp (void *opaque, gcry_mpi_t tmp)
+{
+  struct pk_encoding_ctx *ctx = opaque;
+  gcry_mpi_t value = ctx->verify_arg;
+
+  return _gcry_rsa_pss_verify (value, !(ctx->flags & PUBKEY_FLAG_PREHASH),
+                               tmp, ctx->nbits - 1,
+                               ctx->hash_algo, ctx->saltlen);
+}
+
+
+/* Parser for a flag list.  On return the encoding is stored at
+   R_ENCODING and the flags are stored at R_FLAGS.  If any of them is
+   not needed, NULL may be passed.  The function returns 0 on success
+   or an error code. */
+gpg_err_code_t
+_gcry_pk_util_parse_flaglist (gcry_sexp_t list,
+                              int *r_flags, enum pk_encoding *r_encoding)
+{
+  gpg_err_code_t rc = 0;
+  const char *s;
+  size_t n;
+  int i;
+  int encoding = PUBKEY_ENC_UNKNOWN;
+  int flags = 0;
+  int igninvflag = 0;
+
+  for (i = list ? sexp_length (list)-1 : 0; i > 0; i--)
+    {
+      s = sexp_nth_data (list, i, &n);
+      if (!s)
+        continue; /* Not a data element. */
+
+      switch (n)
+        {
+        case 3:
+          if (!memcmp (s, "pss", 3) && encoding == PUBKEY_ENC_UNKNOWN)
+            {
+              encoding = PUBKEY_ENC_PSS;
+              flags |= PUBKEY_FLAG_FIXEDLEN;
+            }
+          else if (!memcmp (s, "raw", 3) && encoding == PUBKEY_ENC_UNKNOWN)
+            {
+              encoding = PUBKEY_ENC_RAW;
+              flags |= PUBKEY_FLAG_RAW_FLAG; /* Explicitly given.  */
+            }
+          else if (!memcmp (s, "sm2", 3))
+            {
+                encoding = PUBKEY_ENC_RAW;
+                flags |= PUBKEY_FLAG_SM2 | PUBKEY_FLAG_RAW_FLAG;
+            }
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 4:
+          if (!memcmp (s, "comp", 4))
+            flags |= PUBKEY_FLAG_COMP;
+          else if (!memcmp (s, "oaep", 4) && encoding == PUBKEY_ENC_UNKNOWN)
+            {
+              encoding = PUBKEY_ENC_OAEP;
+              flags |= PUBKEY_FLAG_FIXEDLEN;
+            }
+          else if (!memcmp (s, "gost", 4))
+            {
+              encoding = PUBKEY_ENC_RAW;
+              flags |= PUBKEY_FLAG_GOST;
+            }
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 5:
+          if (!memcmp (s, "eddsa", 5))
+            {
+              encoding = PUBKEY_ENC_RAW;
+              flags |= PUBKEY_FLAG_EDDSA;
+              flags |= PUBKEY_FLAG_DJB_TWEAK;
+            }
+          else if (!memcmp (s, "pkcs1", 5) && encoding == PUBKEY_ENC_UNKNOWN)
+            {
+              encoding = PUBKEY_ENC_PKCS1;
+              flags |= PUBKEY_FLAG_FIXEDLEN;
+            }
+          else if (!memcmp (s, "param", 5))
+            flags |= PUBKEY_FLAG_PARAM;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 6:
+          if (!memcmp (s, "nocomp", 6))
+            flags |= PUBKEY_FLAG_NOCOMP;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 7:
+          if (!memcmp (s, "rfc6979", 7))
+            flags |= PUBKEY_FLAG_RFC6979;
+          else if (!memcmp (s, "noparam", 7))
+            ; /* Ignore - it is the default.  */
+          else if (!memcmp (s, "prehash", 7))
+            flags |= PUBKEY_FLAG_PREHASH;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 8:
+          if (!memcmp (s, "use-x931", 8))
+            flags |= PUBKEY_FLAG_USE_X931;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 9:
+          if (!memcmp (s, "pkcs1-raw", 9) && encoding == PUBKEY_ENC_UNKNOWN)
+            {
+              encoding = PUBKEY_ENC_PKCS1_RAW;
+              flags |= PUBKEY_FLAG_FIXEDLEN;
+            }
+          else if (!memcmp (s, "djb-tweak", 9))
+            {
+              encoding = PUBKEY_ENC_RAW;
+              flags |= PUBKEY_FLAG_DJB_TWEAK;
+            }
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 10:
+          if (!memcmp (s, "igninvflag", 10))
+            igninvflag = 1;
+          else if (!memcmp (s, "no-keytest", 10))
+            flags |= PUBKEY_FLAG_NO_KEYTEST;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 11:
+          if (!memcmp (s, "no-blinding", 11))
+            flags |= PUBKEY_FLAG_NO_BLINDING;
+          else if (!memcmp (s, "use-fips186", 11))
+            flags |= PUBKEY_FLAG_USE_FIPS186;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        case 13:
+          if (!memcmp (s, "use-fips186-2", 13))
+            flags |= PUBKEY_FLAG_USE_FIPS186_2;
+          else if (!memcmp (s, "transient-key", 13))
+            flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+          else if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+
+        default:
+          if (!igninvflag)
+            rc = GPG_ERR_INV_FLAG;
+          break;
+        }
+    }
+
+  if (r_flags)
+    *r_flags = flags;
+  if (r_encoding)
+    *r_encoding = encoding;
+
+  return rc;
+}
+
+
+static int
+get_hash_algo (const char *s, size_t n)
+{
+  static const struct { const char *name; int algo; } hashnames[] = {
+    { "sha1",   GCRY_MD_SHA1 },
+    { "md5",    GCRY_MD_MD5 },
+    { "sha256", GCRY_MD_SHA256 },
+    { "ripemd160", GCRY_MD_RMD160 },
+    { "rmd160", GCRY_MD_RMD160 },
+    { "sha384", GCRY_MD_SHA384 },
+    { "sha512", GCRY_MD_SHA512 },
+    { "sha224", GCRY_MD_SHA224 },
+    { "md2",    GCRY_MD_MD2 },
+    { "md4",    GCRY_MD_MD4 },
+    { "tiger",  GCRY_MD_TIGER },
+    { "haval",  GCRY_MD_HAVAL },
+    { "sha3-224", GCRY_MD_SHA3_224 },
+    { "sha3-256", GCRY_MD_SHA3_256 },
+    { "sha3-384", GCRY_MD_SHA3_384 },
+    { "sha3-512", GCRY_MD_SHA3_512 },
+    { "sm3", GCRY_MD_SM3 },
+    { "shake128", GCRY_MD_SHAKE128 },
+    { "shake256", GCRY_MD_SHAKE256 },
+    { "sha512-224", GCRY_MD_SHA512_224 },
+    { "sha512-256", GCRY_MD_SHA512_256 },
+    { NULL, 0 }
+  };
+  int algo;
+  int i;
+
+  for (i=0; hashnames[i].name; i++)
+    {
+      if ( strlen (hashnames[i].name) == n
+          && !memcmp (hashnames[i].name, s, n))
+       break;
+    }
+  if (hashnames[i].name)
+    algo = hashnames[i].algo;
+  else
+    {
+      /* In case of not listed or dynamically allocated hash
+        algorithm we fall back to this somewhat slower
+        method.  Further, it also allows to use OIDs as
+        algorithm names. */
+      char *tmpname;
+
+      tmpname = xtrymalloc (n+1);
+      if (!tmpname)
+       algo = 0;  /* Out of core - silently give up.  */
+      else
+       {
+         memcpy (tmpname, s, n);
+         tmpname[n] = 0;
+         algo = _gcry_md_map_name (tmpname);
+         xfree (tmpname);
+       }
+    }
+  return algo;
+}
+
+
+/* Get the "nbits" parameter from an s-expression of the format:
+ *
+ *   (algo
+ *     (parameter_name_1 ....)
+ *      ....
+ *     (parameter_name_n ....))
+ *
+ * Example:
+ *
+ *   (rsa
+ *     (nbits 4:2048))
+ *
+ * On success the value for nbits is stored at R_NBITS.  If no nbits
+ * parameter is found, the function returns success and stores 0 at
+ * R_NBITS.  For parsing errors the function returns an error code and
+ * stores 0 at R_NBITS.
+ */
+gpg_err_code_t
+_gcry_pk_util_get_nbits (gcry_sexp_t list, unsigned int *r_nbits)
+{
+  char buf[50];
+  const char *s;
+  size_t n;
+
+  *r_nbits = 0;
+
+  list = sexp_find_token (list, "nbits", 0);
+  if (!list)
+    return 0; /* No NBITS found.  */
+
+  s = sexp_nth_data (list, 1, &n);
+  if (!s || n >= DIM (buf) - 1 )
+    {
+      /* NBITS given without a cdr.  */
+      sexp_release (list);
+      return GPG_ERR_INV_OBJ;
+    }
+  memcpy (buf, s, n);
+  buf[n] = 0;
+  *r_nbits = (unsigned int)strtoul (buf, NULL, 0);
+  sexp_release (list);
+  return 0;
+}
+
+
+/* Get the optional "rsa-use-e" parameter from an s-expression of the
+ * format:
+ *
+ *   (algo
+ *     (parameter_name_1 ....)
+ *      ....
+ *     (parameter_name_n ....))
+ *
+ * Example:
+ *
+ *   (rsa
+ *     (nbits 4:2048)
+ *     (rsa-use-e 2:41))
+ *
+ * On success the value for nbits is stored at R_E.  If no rsa-use-e
+ * parameter is found, the function returns success and stores 65537 at
+ * R_E.  For parsing errors the function returns an error code and
+ * stores 0 at R_E.
+ */
+gpg_err_code_t
+_gcry_pk_util_get_rsa_use_e (gcry_sexp_t list, unsigned long *r_e)
+{
+  char buf[50];
+  const char *s;
+  size_t n;
+
+  *r_e = 0;
+
+  list = sexp_find_token (list, "rsa-use-e", 0);
+  if (!list)
+    {
+      *r_e = 65537; /* Not given, use the value generated by old versions. */
+      return 0;
+    }
+
+  s = sexp_nth_data (list, 1, &n);
+  if (!s || n >= DIM (buf) - 1 )
+    {
+      /* No value or value too large.  */
+      sexp_release (list);
+      return GPG_ERR_INV_OBJ;
+    }
+  memcpy (buf, s, n);
+  buf[n] = 0;
+  *r_e = strtoul (buf, NULL, 0);
+  sexp_release (list);
+  return 0;
+}
+
+
+/* Parse a "sig-val" s-expression and store the inner parameter list at
+   R_PARMS.  ALGO_NAMES is used to verify that the algorithm in
+   "sig-val" is valid.  Returns 0 on success and stores a new list at
+   R_PARMS which must be freed by the caller.  On error R_PARMS is set
+   to NULL and an error code returned.  If R_ECCFLAGS is not NULL flag
+   values are set into it; as of now they are only used with ecc
+   algorithms.  */
+gpg_err_code_t
+_gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, const char **algo_names,
+                               gcry_sexp_t *r_parms, int *r_eccflags)
+{
+  gpg_err_code_t rc;
+  gcry_sexp_t l1 = NULL;
+  gcry_sexp_t l2 = NULL;
+  char *name = NULL;
+  int i;
+
+  *r_parms = NULL;
+  if (r_eccflags)
+    *r_eccflags = 0;
+
+  /* Extract the signature value.  */
+  l1 = sexp_find_token (s_sig, "sig-val", 0);
+  if (!l1)
+    {
+      rc = GPG_ERR_INV_OBJ; /* Does not contain a signature value object.  */
+      goto leave;
+    }
+
+  l2 = sexp_nth (l1, 1);
+  if (!l2)
+    {
+      rc = GPG_ERR_NO_OBJ;   /* No cadr for the sig object.  */
+      goto leave;
+    }
+  name = sexp_nth_string (l2, 0);
+  if (!name)
+    {
+      rc = GPG_ERR_INV_OBJ;  /* Invalid structure of object.  */
+      goto leave;
+    }
+  else if (!strcmp (name, "flags"))
+    {
+      /* Skip a "flags" parameter and look again for the algorithm
+        name.  This is not used but here just for the sake of
+        consistent S-expressions we need to handle it. */
+      sexp_release (l2);
+      l2 = sexp_nth (l1, 2);
+      if (!l2)
+       {
+         rc = GPG_ERR_INV_OBJ;
+          goto leave;
+       }
+      xfree (name);
+      name = sexp_nth_string (l2, 0);
+      if (!name)
+        {
+          rc = GPG_ERR_INV_OBJ;  /* Invalid structure of object.  */
+          goto leave;
+        }
+    }
+
+  for (i=0; algo_names[i]; i++)
+    if (!stricmp (name, algo_names[i]))
+      break;
+  if (!algo_names[i])
+    {
+      rc = GPG_ERR_CONFLICT; /* "sig-val" uses an unexpected algo. */
+      goto leave;
+    }
+  if (r_eccflags)
+    {
+      if (!strcmp (name, "eddsa"))
+        *r_eccflags = PUBKEY_FLAG_EDDSA;
+      if (!strcmp (name, "gost"))
+        *r_eccflags = PUBKEY_FLAG_GOST;
+      if (!strcmp (name, "sm2"))
+        *r_eccflags = PUBKEY_FLAG_SM2;
+    }
+
+  *r_parms = l2;
+  l2 = NULL;
+  rc = 0;
+
+ leave:
+  xfree (name);
+  sexp_release (l2);
+  sexp_release (l1);
+  return rc;
+}
+
+
+/* Parse a "enc-val" s-expression and store the inner parameter list
+   at R_PARMS.  ALGO_NAMES is used to verify that the algorithm in
+   "enc-val" is valid.  Returns 0 on success and stores a new list at
+   R_PARMS which must be freed by the caller.  On error R_PARMS is set
+   to NULL and an error code returned.  If R_ECCFLAGS is not NULL flag
+   values are set into it; as of now they are only used with ecc
+   algorithms.
+
+     (enc-val
+       [(flags [raw, pkcs1, oaep, no-blinding])]
+       [(hash-algo <algo>)]
+       [(label <label>)]
+        (<algo>
+          (<param_name1> <mpi>)
+          ...
+          (<param_namen> <mpi>)))
+
+   HASH-ALGO and LABEL are specific to OAEP.  CTX will be updated with
+   encoding information.  */
+gpg_err_code_t
+_gcry_pk_util_preparse_encval (gcry_sexp_t sexp, const char **algo_names,
+                               gcry_sexp_t *r_parms,
+                               struct pk_encoding_ctx *ctx)
+{
+  gcry_err_code_t rc = 0;
+  gcry_sexp_t l1 = NULL;
+  gcry_sexp_t l2 = NULL;
+  char *name = NULL;
+  size_t n;
+  int parsed_flags = 0;
+  int i;
+
+  *r_parms = NULL;
+
+  /* Check that the first element is valid.  */
+  l1 = sexp_find_token (sexp, "enc-val" , 0);
+  if (!l1)
+    {
+      rc = GPG_ERR_INV_OBJ; /* Does not contain an encrypted value object.  */
+      goto leave;
+    }
+
+  l2 = sexp_nth (l1, 1);
+  if (!l2)
+    {
+      rc = GPG_ERR_NO_OBJ;  /* No cadr for the data object.  */
+      goto leave;
+    }
+
+  /* Extract identifier of sublist.  */
+  name = sexp_nth_string (l2, 0);
+  if (!name)
+    {
+      rc = GPG_ERR_INV_OBJ; /* Invalid structure of object.  */
+      goto leave;
+    }
+
+  if (!strcmp (name, "flags"))
+    {
+      const char *s;
+
+      /* There is a flags element - process it.  */
+      rc = _gcry_pk_util_parse_flaglist (l2, &parsed_flags, &ctx->encoding);
+      if (rc)
+        goto leave;
+      if (ctx->encoding == PUBKEY_ENC_PSS)
+        {
+          rc = GPG_ERR_CONFLICT;
+          goto leave;
+        }
+
+      /* Get the OAEP parameters HASH-ALGO and LABEL, if any. */
+      if (ctx->encoding == PUBKEY_ENC_OAEP)
+       {
+         /* Get HASH-ALGO. */
+          sexp_release (l2);
+         l2 = sexp_find_token (l1, "hash-algo", 0);
+         if (l2)
+           {
+             s = sexp_nth_data (l2, 1, &n);
+             if (!s)
+               rc = GPG_ERR_NO_OBJ;
+             else
+               {
+                 ctx->hash_algo = get_hash_algo (s, n);
+                 if (!ctx->hash_algo)
+                   rc = GPG_ERR_DIGEST_ALGO;
+               }
+             if (rc)
+               goto leave;
+           }
+
+         /* Get LABEL. */
+          sexp_release (l2);
+         l2 = sexp_find_token (l1, "label", 0);
+         if (l2)
+           {
+             s = sexp_nth_data (l2, 1, &n);
+             if (!s)
+               rc = GPG_ERR_NO_OBJ;
+             else if (n > 0)
+               {
+                 ctx->label = xtrymalloc (n);
+                 if (!ctx->label)
+                   rc = gpg_err_code_from_syserror ();
+                 else
+                   {
+                     memcpy (ctx->label, s, n);
+                     ctx->labellen = n;
+                   }
+               }
+             if (rc)
+               goto leave;
+           }
+       }
+
+      /* Get the next which has the actual data - skip HASH-ALGO and LABEL. */
+      for (i = 2; (sexp_release (l2), l2 = sexp_nth (l1, i)); i++)
+       {
+         s = sexp_nth_data (l2, 0, &n);
+         if (!(n == 9 && !memcmp (s, "hash-algo", 9))
+             && !(n == 5 && !memcmp (s, "label", 5))
+             && !(n == 15 && !memcmp (s, "random-override", 15)))
+           break;
+       }
+      if (!l2)
+        {
+          rc = GPG_ERR_NO_OBJ; /* No cadr for the data object. */
+          goto leave;
+        }
+
+      /* Extract sublist identifier.  */
+      xfree (name);
+      name = sexp_nth_string (l2, 0);
+      if (!name)
+        {
+          rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */
+          goto leave;
+        }
+    }
+  else /* No flags - flag as legacy structure.  */
+    parsed_flags |= PUBKEY_FLAG_LEGACYRESULT;
+
+  for (i=0; algo_names[i]; i++)
+    if (!stricmp (name, algo_names[i]))
+      break;
+  if (!algo_names[i])
+    {
+      rc = GPG_ERR_CONFLICT; /* "enc-val" uses an unexpected algo. */
+      goto leave;
+    }
+
+  *r_parms = l2;
+  l2 = NULL;
+  ctx->flags |= parsed_flags;
+  rc = 0;
+
+ leave:
+  xfree (name);
+  sexp_release (l2);
+  sexp_release (l1);
+  return rc;
+}
+
+
+/* Initialize an encoding context.  */
+void
+_gcry_pk_util_init_encoding_ctx (struct pk_encoding_ctx *ctx,
+                                 enum pk_operation op,
+                                 unsigned int nbits)
+{
+  ctx->op = op;
+  ctx->nbits = nbits;
+  ctx->encoding = PUBKEY_ENC_UNKNOWN;
+  ctx->flags = 0;
+  if (fips_mode ())
+    {
+      ctx->hash_algo = GCRY_MD_SHA256;
+    }
+  else
+    {
+      ctx->hash_algo = GCRY_MD_SHA1;
+    }
+  ctx->label = NULL;
+  ctx->labellen = 0;
+  ctx->saltlen = 20;
+  ctx->verify_cmp = NULL;
+  ctx->verify_arg = NULL;
+}
+
+/* Free a context initialzied by _gcry_pk_util_init_encoding_ctx.  */
+void
+_gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx)
+{
+  xfree (ctx->label);
+}
+
+
+/* Take the hash value and convert into an MPI, suitable for
+   passing to the low level functions.  We currently support the
+   old style way of passing just a MPI and the modern interface which
+   allows to pass flags so that we can choose between raw and pkcs1
+   padding - may be more padding options later.
+
+   (<mpi>)
+   or
+   (data
+    [(flags [raw, direct, pkcs1, oaep, pss,
+             no-blinding, rfc6979, eddsa, prehash])]
+    [(hash <algo> <value>)]
+    [(value <text>)]
+    [(hash-algo <algo>)]
+    [(label <label>)]
+    [(salt-length <length>)]
+    [(random-override <data>)]
+   )
+
+   Either the VALUE or the HASH element must be present for use
+   with signatures.  VALUE is used for encryption.
+
+   HASH-ALGO is specific to OAEP, PSS and EDDSA.
+
+   LABEL is specific to OAEP.
+
+   SALT-LENGTH is for PSS, it is limited to 16384 bytes.
+
+   RANDOM-OVERRIDE is used to replace random nonces for regression
+   testing.  */
+gcry_err_code_t
+_gcry_pk_util_data_to_mpi (gcry_sexp_t input, gcry_mpi_t *ret_mpi,
+                           struct pk_encoding_ctx *ctx)
+{
+  gcry_err_code_t rc = 0;
+  gcry_sexp_t ldata, lhash, lvalue;
+  size_t n;
+  const char *s;
+  int unknown_flag = 0;
+  int parsed_flags = 0;
+
+  *ret_mpi = NULL;
+  ldata = sexp_find_token (input, "data", 0);
+  if (!ldata)
+    { /* assume old style */
+      int mpifmt = (ctx->flags & PUBKEY_FLAG_RAW_FLAG) ?
+        GCRYMPI_FMT_OPAQUE : GCRYMPI_FMT_STD;
+
+      *ret_mpi = sexp_nth_mpi (input, 0, mpifmt);
+      return *ret_mpi ? GPG_ERR_NO_ERROR : GPG_ERR_INV_OBJ;
+    }
+
+  /* See whether there is a flags list.  */
+  {
+    gcry_sexp_t lflags = sexp_find_token (ldata, "flags", 0);
+    if (lflags)
+      {
+        if (_gcry_pk_util_parse_flaglist (lflags,
+                                          &parsed_flags, &ctx->encoding))
+          unknown_flag = 1;
+        sexp_release (lflags);
+      }
+  }
+
+  if (ctx->encoding == PUBKEY_ENC_UNKNOWN)
+    ctx->encoding = PUBKEY_ENC_RAW; /* default to raw */
+
+  /* Get HASH or MPI */
+  lhash = sexp_find_token (ldata, "hash", 0);
+  lvalue = lhash? NULL : sexp_find_token (ldata, "value", 0);
+
+  if (!(!lhash ^ !lvalue))
+    rc = GPG_ERR_INV_OBJ; /* none or both given */
+  else if (unknown_flag)
+    rc = GPG_ERR_INV_FLAG;
+  else if (ctx->encoding == PUBKEY_ENC_RAW
+           && ((parsed_flags & PUBKEY_FLAG_EDDSA)
+               || (ctx->flags & PUBKEY_FLAG_EDDSA)))
+    {
+      /* Prepare for EdDSA.  */
+      gcry_sexp_t list;
+      void *value;
+      size_t valuelen;
+
+      if (!lvalue)
+        {
+          rc = GPG_ERR_INV_OBJ;
+          goto leave;
+        }
+      /* Hash algo is determined by curve.  No hash-algo is OK.  */
+      /* Get HASH-ALGO. */
+      list = sexp_find_token (ldata, "hash-algo", 0);
+      if (list)
+        {
+          s = sexp_nth_data (list, 1, &n);
+          if (!s)
+            rc = GPG_ERR_NO_OBJ;
+          else
+            {
+              ctx->hash_algo = get_hash_algo (s, n);
+              if (!ctx->hash_algo)
+                rc = GPG_ERR_DIGEST_ALGO;
+            }
+          sexp_release (list);
+        }
+      if (rc)
+        goto leave;
+
+      /* Get LABEL. */
+      list = sexp_find_token (ldata, "label", 0);
+      if (list)
+        {
+          s = sexp_nth_data (list, 1, &n);
+          if (!s)
+            rc = GPG_ERR_NO_OBJ;
+          else if (n > 0)
+            {
+              ctx->label = xtrymalloc (n);
+              if (!ctx->label)
+                rc = gpg_err_code_from_syserror ();
+              else
+                {
+                  memcpy (ctx->label, s, n);
+                  ctx->labellen = n;
+                }
+            }
+          sexp_release (list);
+          if (rc)
+            goto leave;
+        }
+
+      /* Get VALUE.  */
+      value = sexp_nth_buffer (lvalue, 1, &valuelen);
+      if (!value)
+        {
+          /* We assume that a zero length message is meant by
+             "(value)".  This is commonly used by test vectors.  Note
+             that S-expression do not allow zero length items. */
+          valuelen = 0;
+          value = xtrymalloc (1);
+          if (!value)
+            rc = gpg_err_code_from_syserror ();
+        }
+      else if ((valuelen * 8) < valuelen)
+        {
+          xfree (value);
+          rc = GPG_ERR_TOO_LARGE;
+        }
+      if (rc)
+        goto leave;
+
+      /* Note that mpi_set_opaque takes ownership of VALUE.  */
+      *ret_mpi = mpi_set_opaque (NULL, value, valuelen*8);
+    }
+  else if (ctx->encoding == PUBKEY_ENC_RAW
+           && (lhash || (lvalue && (parsed_flags & PUBKEY_FLAG_PREHASH)))
+           && ((parsed_flags & PUBKEY_FLAG_RAW_FLAG)
+               || (parsed_flags & PUBKEY_FLAG_RFC6979)))
+    {
+      void * value;
+      size_t valuelen;
+      gcry_sexp_t list;
+
+      /* Raw encoding along with a hash element.  This is commonly
+         used for DSA.  For better backward error compatibility we
+         allow this only if either the rfc6979 flag has been given or
+         the raw flags was explicitly given.  */
+
+      if (lvalue && (parsed_flags & PUBKEY_FLAG_PREHASH))
+        {
+          /* Get HASH-ALGO. */
+          list = sexp_find_token (ldata, "hash-algo", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else
+                {
+                  ctx->hash_algo = get_hash_algo (s, n);
+                  if (!ctx->hash_algo)
+                    rc = GPG_ERR_DIGEST_ALGO;
+                }
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+          /* Get optional LABEL.  */
+          list = sexp_find_token (ldata, "label", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else if (n > 0)
+                {
+                  ctx->label = xtrymalloc (n);
+                  if (!ctx->label)
+                    rc = gpg_err_code_from_syserror ();
+                  else
+                    {
+                      memcpy (ctx->label, s, n);
+                      ctx->labellen = n;
+                    }
+                }
+              else
+                rc = GPG_ERR_INV_ARG;
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+          if ( !(value=sexp_nth_buffer (lvalue, 1, &valuelen)) || !valuelen )
+            rc = GPG_ERR_INV_OBJ;
+          else if ((valuelen * 8) < valuelen)
+            {
+              xfree (value);
+              rc = GPG_ERR_TOO_LARGE;
+            }
+          else
+            *ret_mpi = mpi_set_opaque (NULL, value, valuelen*8);
+        }
+      else if (lhash)
+        {
+          /* Get optional LABEL.  */
+          list = sexp_find_token (ldata, "label", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else if (n > 0)
+                {
+                  ctx->label = xtrymalloc (n);
+                  if (!ctx->label)
+                    rc = gpg_err_code_from_syserror ();
+                  else
+                    {
+                      memcpy (ctx->label, s, n);
+                      ctx->labellen = n;
+                    }
+                }
+              else
+                rc = GPG_ERR_INV_ARG;
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+          if (sexp_length (lhash) != 3)
+            rc = GPG_ERR_INV_OBJ;
+          else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+            rc = GPG_ERR_INV_OBJ;
+          else
+            {
+              ctx->hash_algo = get_hash_algo (s, n);
+
+              if (!ctx->hash_algo)
+                rc = GPG_ERR_DIGEST_ALGO;
+              else if ( !(value=sexp_nth_buffer (lhash, 2, &valuelen))
+                        || !valuelen )
+                rc = GPG_ERR_INV_OBJ;
+              else if ((valuelen * 8) < valuelen)
+                {
+                  xfree (value);
+                  rc = GPG_ERR_TOO_LARGE;
+                }
+              else
+                *ret_mpi = mpi_set_opaque (NULL, value, valuelen*8);
+            }
+        }
+      else
+        rc = GPG_ERR_CONFLICT;
+
+      if (rc)
+        goto leave;
+    }
+  else if (ctx->encoding == PUBKEY_ENC_RAW && lvalue)
+    {
+      /* RFC6979 may only be used with the a hash value and not the
+         MPI based value.  */
+      if (parsed_flags & PUBKEY_FLAG_RFC6979)
+        {
+          rc = GPG_ERR_CONFLICT;
+          goto leave;
+        }
+
+      /* Get the value */
+      *ret_mpi = sexp_nth_mpi (lvalue, 1, GCRYMPI_FMT_USG);
+      if (!*ret_mpi)
+        rc = GPG_ERR_INV_OBJ;
+    }
+  else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lvalue
+          && ctx->op == PUBKEY_OP_ENCRYPT)
+    {
+      const void * value;
+      size_t valuelen;
+      gcry_sexp_t list;
+      void *random_override = NULL;
+      size_t random_override_len = 0;
+
+      /* The RSA PKCS#1.5 encryption is no longer supported by FIPS */
+      if (fips_mode ())
+        rc = GPG_ERR_INV_FLAG;
+      else if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen )
+        rc = GPG_ERR_INV_OBJ;
+      else
+        {
+          /* Get optional RANDOM-OVERRIDE.  */
+          list = sexp_find_token (ldata, "random-override", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else if (n > 0)
+                {
+                  random_override = xtrymalloc (n);
+                  if (!random_override)
+                    rc = gpg_err_code_from_syserror ();
+                  else
+                    {
+                      memcpy (random_override, s, n);
+                      random_override_len = n;
+                    }
+                }
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+          rc = _gcry_rsa_pkcs1_encode_for_enc (ret_mpi, ctx->nbits,
+                                               value, valuelen,
+                                               random_override,
+                                               random_override_len);
+          xfree (random_override);
+        }
+    }
+  else if (ctx->encoding == PUBKEY_ENC_PKCS1
+           && (lhash || (lvalue && (parsed_flags & PUBKEY_FLAG_PREHASH)))
+          && (ctx->op == PUBKEY_OP_SIGN || ctx->op == PUBKEY_OP_VERIFY))
+    {
+      if (lvalue && (parsed_flags & PUBKEY_FLAG_PREHASH))
+        {
+          void * value;
+          size_t valuelen;
+          gcry_sexp_t list;
+
+          /* Get HASH-ALGO. */
+          list = sexp_find_token (ldata, "hash-algo", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else
+                {
+                  ctx->hash_algo = get_hash_algo (s, n);
+                  if (!ctx->hash_algo)
+                    rc = GPG_ERR_DIGEST_ALGO;
+                }
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+          if ( !(value=sexp_nth_buffer (lvalue, 1, &valuelen)) || !valuelen )
+            rc = GPG_ERR_INV_OBJ;
+          else if ((valuelen * 8) < valuelen)
+            {
+              xfree (value);
+              rc = GPG_ERR_TOO_LARGE;
+            }
+          else
+            {
+              void *hash;
+
+              n = _gcry_md_get_algo_dlen (ctx->hash_algo);
+              hash = xtrymalloc (n);
+              if (!hash)
+                rc = gpg_err_code_from_syserror ();
+              else
+                {
+                  _gcry_md_hash_buffer (ctx->hash_algo, hash, value, valuelen);
+                  rc = _gcry_rsa_pkcs1_encode_for_sig (ret_mpi, ctx->nbits,
+                                                       hash, n, 
ctx->hash_algo);
+                  xfree (hash);
+                }
+            }
+        }
+      else if (lhash)
+        {
+          if (sexp_length (lhash) != 3)
+            rc = GPG_ERR_INV_OBJ;
+          else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+            rc = GPG_ERR_INV_OBJ;
+          else
+            {
+              const void * value;
+              size_t valuelen;
+
+              ctx->hash_algo = get_hash_algo (s, n);
+
+              if (!ctx->hash_algo)
+                rc = GPG_ERR_DIGEST_ALGO;
+              else if ( !(value=sexp_nth_data (lhash, 2, &valuelen))
+                        || !valuelen )
+                rc = GPG_ERR_INV_OBJ;
+              else
+                rc = _gcry_rsa_pkcs1_encode_for_sig (ret_mpi, ctx->nbits,
+                                                     value, valuelen,
+                                                     ctx->hash_algo);
+            }
+        }
+    }
+  else if (ctx->encoding == PUBKEY_ENC_PKCS1_RAW && lvalue
+          && (ctx->op == PUBKEY_OP_SIGN || ctx->op == PUBKEY_OP_VERIFY))
+    {
+      const void * value;
+      size_t valuelen;
+
+      if (sexp_length (lvalue) != 2)
+        rc = GPG_ERR_INV_OBJ;
+      else if ( !(value=sexp_nth_data (lvalue, 1, &valuelen))
+                || !valuelen )
+        rc = GPG_ERR_INV_OBJ;
+      else
+        rc = _gcry_rsa_pkcs1_encode_raw_for_sig (ret_mpi, ctx->nbits,
+                                                 value, valuelen);
+    }
+  else if (ctx->encoding == PUBKEY_ENC_OAEP && lvalue
+          && ctx->op == PUBKEY_OP_ENCRYPT)
+    {
+      const void * value;
+      size_t valuelen;
+
+      /* The RSA OAEP encryption requires some more assurances in FIPS */
+      if (fips_mode ())
+        rc = GPG_ERR_INV_FLAG;
+      else if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen )
+       rc = GPG_ERR_INV_OBJ;
+      else
+       {
+         gcry_sexp_t list;
+          void *random_override = NULL;
+          size_t random_override_len = 0;
+
+         /* Get HASH-ALGO. */
+         list = sexp_find_token (ldata, "hash-algo", 0);
+         if (list)
+           {
+             s = sexp_nth_data (list, 1, &n);
+             if (!s)
+               rc = GPG_ERR_NO_OBJ;
+             else
+               {
+                 ctx->hash_algo = get_hash_algo (s, n);
+                 if (!ctx->hash_algo)
+                   rc = GPG_ERR_DIGEST_ALGO;
+               }
+             sexp_release (list);
+             if (rc)
+               goto leave;
+           }
+
+         /* Get LABEL. */
+         list = sexp_find_token (ldata, "label", 0);
+         if (list)
+           {
+             s = sexp_nth_data (list, 1, &n);
+             if (!s)
+               rc = GPG_ERR_NO_OBJ;
+             else if (n > 0)
+               {
+                 ctx->label = xtrymalloc (n);
+                 if (!ctx->label)
+                   rc = gpg_err_code_from_syserror ();
+                 else
+                   {
+                     memcpy (ctx->label, s, n);
+                     ctx->labellen = n;
+                   }
+               }
+             sexp_release (list);
+             if (rc)
+               goto leave;
+           }
+          /* Get optional RANDOM-OVERRIDE.  */
+          list = sexp_find_token (ldata, "random-override", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else if (n > 0)
+                {
+                  random_override = xtrymalloc (n);
+                  if (!random_override)
+                    rc = gpg_err_code_from_syserror ();
+                  else
+                    {
+                      memcpy (random_override, s, n);
+                      random_override_len = n;
+                    }
+                }
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+         rc = _gcry_rsa_oaep_encode (ret_mpi, ctx->nbits, ctx->hash_algo,
+                                      value, valuelen,
+                                      ctx->label, ctx->labellen,
+                                      random_override, random_override_len);
+
+          xfree (random_override);
+       }
+    }
+  else if (ctx->encoding == PUBKEY_ENC_PSS && ctx->op == PUBKEY_OP_SIGN)
+    {
+      const void * value;
+      size_t valuelen;
+      gcry_sexp_t list;
+      void *random_override = NULL;
+
+      if (lvalue)
+        {
+          /* Get HASH-ALGO. */
+          list = sexp_find_token (ldata, "hash-algo", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else
+                {
+                  ctx->hash_algo = get_hash_algo (s, n);
+                  if (!ctx->hash_algo)
+                    rc = GPG_ERR_DIGEST_ALGO;
+                }
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+          if ( !(value=sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen )
+            rc = GPG_ERR_INV_OBJ;
+          ctx->flags |= PUBKEY_FLAG_PREHASH;
+        }
+      else if (lhash)
+        {
+          if (sexp_length (lhash) != 3)
+            rc = GPG_ERR_INV_OBJ;
+          else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+            rc = GPG_ERR_INV_OBJ;
+          else
+            {
+              ctx->hash_algo = get_hash_algo (s, n);
+
+              if (!ctx->hash_algo)
+                rc = GPG_ERR_DIGEST_ALGO;
+              else if ( !(value=sexp_nth_data (lhash, 2, &valuelen))
+                        || !valuelen )
+                rc = GPG_ERR_INV_OBJ;
+            }
+        }
+      else
+        rc = GPG_ERR_CONFLICT;
+
+      if (rc)
+        goto leave;
+
+      /* Get SALT-LENGTH. */
+      list = sexp_find_token (ldata, "salt-length", 0);
+      if (list)
+        {
+          s = sexp_nth_data (list, 1, &n);
+          if (!s)
+            {
+              rc = GPG_ERR_NO_OBJ;
+              goto leave;
+            }
+          ctx->saltlen = (unsigned int)strtoul (s, NULL, 10);
+          sexp_release (list);
+        }
+
+      /* Get optional RANDOM-OVERRIDE.  */
+      list = sexp_find_token (ldata, "random-override", 0);
+      if (list)
+        {
+          s = sexp_nth_data (list, 1, &n);
+          if (!s)
+            rc = GPG_ERR_NO_OBJ;
+          else if (n == ctx->saltlen)
+            {
+              random_override = xtrymalloc (n);
+              if (!random_override)
+                rc = gpg_err_code_from_syserror ();
+              else
+                memcpy (random_override, s, n);
+            }
+          else
+            rc = GPG_ERR_INV_ARG;
+          sexp_release (list);
+          if (rc)
+            goto leave;
+        }
+
+      /* Encode the data.  (NBITS-1 is due to 8.1.1, step 1.) */
+      rc = _gcry_rsa_pss_encode (ret_mpi, ctx->nbits - 1,
+                                 ctx->hash_algo, ctx->saltlen,
+                                 !(ctx->flags & PUBKEY_FLAG_PREHASH),
+                                 value, valuelen,
+                                 random_override);
+      xfree (random_override);
+    }
+  else if (ctx->encoding == PUBKEY_ENC_PSS && ctx->op == PUBKEY_OP_VERIFY)
+    {
+      gcry_sexp_t list;
+
+      if (lvalue)
+        {
+          /* Get HASH-ALGO. */
+          list = sexp_find_token (ldata, "hash-algo", 0);
+          if (list)
+            {
+              s = sexp_nth_data (list, 1, &n);
+              if (!s)
+                rc = GPG_ERR_NO_OBJ;
+              else
+                {
+                  ctx->hash_algo = get_hash_algo (s, n);
+                  if (!ctx->hash_algo)
+                    rc = GPG_ERR_DIGEST_ALGO;
+                }
+              sexp_release (list);
+              if (rc)
+                goto leave;
+            }
+
+          *ret_mpi = sexp_nth_mpi (lvalue, 1, GCRYMPI_FMT_OPAQUE);
+          if (!*ret_mpi)
+            rc = GPG_ERR_INV_OBJ;
+
+          ctx->flags |= PUBKEY_FLAG_PREHASH;
+        }
+      else if (lhash)
+        {
+          if (sexp_length (lhash) != 3)
+            rc = GPG_ERR_INV_OBJ;
+          else if ( !(s=sexp_nth_data (lhash, 1, &n)) || !n )
+            rc = GPG_ERR_INV_OBJ;
+          else
+            {
+              ctx->hash_algo = get_hash_algo (s, n);
+
+              if (!ctx->hash_algo)
+                rc = GPG_ERR_DIGEST_ALGO;
+              else
+                {
+                  *ret_mpi = sexp_nth_mpi (lhash, 2, GCRYMPI_FMT_OPAQUE);
+                  if (!*ret_mpi)
+                    rc = GPG_ERR_INV_OBJ;
+                }
+            }
+        }
+      else
+        rc = GPG_ERR_CONFLICT;
+
+      if (rc)
+        goto leave;
+
+      /* Get SALT-LENGTH. */
+      list = sexp_find_token (ldata, "salt-length", 0);
+      if (list)
+        {
+          s = sexp_nth_data (list, 1, &n);
+          if (!s)
+            {
+              rc = GPG_ERR_NO_OBJ;
+              goto leave;
+            }
+          ctx->saltlen = (unsigned int)strtoul (s, NULL, 10);
+          if (ctx->saltlen > 16384)
+            rc = GPG_ERR_TOO_LARGE;
+          sexp_release (list);
+          if (rc)
+            goto leave;
+        }
+
+      ctx->verify_cmp = pss_verify_cmp;
+      ctx->verify_arg = *ret_mpi;
+    }
+  else
+    rc = GPG_ERR_CONFLICT;
+
+ leave:
+  sexp_release (ldata);
+  sexp_release (lhash);
+  sexp_release (lvalue);
+
+  if (!rc)
+    ctx->flags |= parsed_flags;
+  else
+    {
+      xfree (ctx->label);
+      ctx->label = NULL;
+    }
+
+  return rc;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/pubkey.c 
b/grub-core/lib/libgcrypt/cipher/pubkey.c
index ca087ad75..4612f64d6 100644
--- a/grub-core/lib/libgcrypt/cipher/pubkey.c
+++ b/grub-core/lib/libgcrypt/cipher/pubkey.c
@@ -1,6 +1,7 @@
 /* pubkey.c  - pubkey dispatcher
  * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005,
  *               2007, 2008, 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -27,2796 +28,267 @@
 #include "g10lib.h"
 #include "mpi.h"
 #include "cipher.h"
-#include "ath.h"
+#include "context.h"
+#include "pubkey-internal.h"
 
 
-static gcry_err_code_t pubkey_decrypt (int algo, gcry_mpi_t *result,
-                                       gcry_mpi_t *data, gcry_mpi_t *skey,
-                                       int flags);
-static gcry_err_code_t pubkey_sign (int algo, gcry_mpi_t *resarr,
-                                    gcry_mpi_t hash, gcry_mpi_t *skey);
-static gcry_err_code_t pubkey_verify (int algo, gcry_mpi_t hash,
-                                      gcry_mpi_t *data, gcry_mpi_t *pkey,
-                                    int (*cmp) (void *, gcry_mpi_t),
-                                      void *opaque);
-
-
-/* A dummy extraspec so that we do not need to tests the extraspec
-   field from the module specification against NULL and instead
-   directly test the respective fields of extraspecs.  */
-static pk_extra_spec_t dummy_extra_spec;
-
-
-/* This is the list of the default public-key ciphers included in
-   libgcrypt.  FIPS_ALLOWED indicated whether the algorithm is used in
-   FIPS mode. */
-static struct pubkey_table_entry
-{
-  gcry_pk_spec_t *pubkey;
-  pk_extra_spec_t *extraspec;
-  unsigned int algorithm;
-  int fips_allowed;
-} pubkey_table[] =
+/* This is the list of the public-key algorithms included in
+   Libgcrypt.  */
+static gcry_pk_spec_t * const pubkey_list[] =
   {
-#if USE_RSA
-    { &_gcry_pubkey_spec_rsa,
-      &_gcry_pubkey_extraspec_rsa,   GCRY_PK_RSA, 1},
+#if USE_ECC
+    &_gcry_pubkey_spec_ecc,
 #endif
-#if USE_ELGAMAL
-    { &_gcry_pubkey_spec_elg,
-      &_gcry_pubkey_extraspec_elg,    GCRY_PK_ELG   },
-    { &_gcry_pubkey_spec_elg,
-      &_gcry_pubkey_extraspec_elg,    GCRY_PK_ELG_E },
+#if USE_RSA
+    &_gcry_pubkey_spec_rsa,
 #endif
 #if USE_DSA
-    { &_gcry_pubkey_spec_dsa,
-      &_gcry_pubkey_extraspec_dsa,   GCRY_PK_DSA, 1   },
+    &_gcry_pubkey_spec_dsa,
 #endif
-#if USE_ECC
-    { &_gcry_pubkey_spec_ecdsa,
-      &_gcry_pubkey_extraspec_ecdsa, GCRY_PK_ECDSA, 0 },
-    { &_gcry_pubkey_spec_ecdh,
-      &_gcry_pubkey_extraspec_ecdsa, GCRY_PK_ECDH, 0 },
+#if USE_ELGAMAL
+    &_gcry_pubkey_spec_elg,
 #endif
-    { NULL, 0 },
+    NULL
   };
 
-/* List of registered ciphers.  */
-static gcry_module_t pubkeys_registered;
-
-/* This is the lock protecting PUBKEYS_REGISTERED.  */
-static ath_mutex_t pubkeys_registered_lock = ATH_MUTEX_INITIALIZER;;
-
-/* Flag to check whether the default pubkeys have already been
-   registered.  */
-static int default_pubkeys_registered;
-
-/* Convenient macro for registering the default digests.  */
-#define REGISTER_DEFAULT_PUBKEYS                   \
-  do                                               \
-    {                                              \
-      ath_mutex_lock (&pubkeys_registered_lock);   \
-      if (! default_pubkeys_registered)            \
-        {                                          \
-          pk_register_default ();                  \
-          default_pubkeys_registered = 1;          \
-        }                                          \
-      ath_mutex_unlock (&pubkeys_registered_lock); \
-    }                                              \
-  while (0)
-
-/* These dummy functions are used in case a cipher implementation
-   refuses to provide it's own functions.  */
-
-static gcry_err_code_t
-dummy_generate (int algorithm, unsigned int nbits, unsigned long dummy,
-                gcry_mpi_t *skey, gcry_mpi_t **retfactors)
-{
-  (void)algorithm;
-  (void)nbits;
-  (void)dummy;
-  (void)skey;
-  (void)retfactors;
-  fips_signal_error ("using dummy public key function");
-  return GPG_ERR_NOT_IMPLEMENTED;
-}
-
-static gcry_err_code_t
-dummy_check_secret_key (int algorithm, gcry_mpi_t *skey)
-{
-  (void)algorithm;
-  (void)skey;
-  fips_signal_error ("using dummy public key function");
-  return GPG_ERR_NOT_IMPLEMENTED;
-}
-
-static gcry_err_code_t
-dummy_encrypt (int algorithm, gcry_mpi_t *resarr, gcry_mpi_t data,
-               gcry_mpi_t *pkey, int flags)
-{
-  (void)algorithm;
-  (void)resarr;
-  (void)data;
-  (void)pkey;
-  (void)flags;
-  fips_signal_error ("using dummy public key function");
-  return GPG_ERR_NOT_IMPLEMENTED;
-}
-
-static gcry_err_code_t
-dummy_decrypt (int algorithm, gcry_mpi_t *result, gcry_mpi_t *data,
-               gcry_mpi_t *skey, int flags)
-{
-  (void)algorithm;
-  (void)result;
-  (void)data;
-  (void)skey;
-  (void)flags;
-  fips_signal_error ("using dummy public key function");
-  return GPG_ERR_NOT_IMPLEMENTED;
-}
-
-static gcry_err_code_t
-dummy_sign (int algorithm, gcry_mpi_t *resarr, gcry_mpi_t data,
-            gcry_mpi_t *skey)
-{
-  (void)algorithm;
-  (void)resarr;
-  (void)data;
-  (void)skey;
-  fips_signal_error ("using dummy public key function");
-  return GPG_ERR_NOT_IMPLEMENTED;
-}
-
-static gcry_err_code_t
-dummy_verify (int algorithm, gcry_mpi_t hash, gcry_mpi_t *data,
-              gcry_mpi_t *pkey,
-             int (*cmp) (void *, gcry_mpi_t), void *opaquev)
-{
-  (void)algorithm;
-  (void)hash;
-  (void)data;
-  (void)pkey;
-  (void)cmp;
-  (void)opaquev;
-  fips_signal_error ("using dummy public key function");
-  return GPG_ERR_NOT_IMPLEMENTED;
-}
-
-static unsigned
-dummy_get_nbits (int algorithm, gcry_mpi_t *pkey)
-{
-  (void)algorithm;
-  (void)pkey;
-  fips_signal_error ("using dummy public key function");
-  return 0;
-}
-
-/* Internal function.  Register all the pubkeys included in
-   PUBKEY_TABLE.  Returns zero on success or an error code.  */
-static void
-pk_register_default (void)
-{
-  gcry_err_code_t err = 0;
-  int i;
-
-  for (i = 0; (! err) && pubkey_table[i].pubkey; i++)
-    {
-#define pubkey_use_dummy(func)                       \
-      if (! pubkey_table[i].pubkey->func)            \
-       pubkey_table[i].pubkey->func = dummy_##func;
-
-      pubkey_use_dummy (generate);
-      pubkey_use_dummy (check_secret_key);
-      pubkey_use_dummy (encrypt);
-      pubkey_use_dummy (decrypt);
-      pubkey_use_dummy (sign);
-      pubkey_use_dummy (verify);
-      pubkey_use_dummy (get_nbits);
-#undef pubkey_use_dummy
-
-      err = _gcry_module_add (&pubkeys_registered,
-                             pubkey_table[i].algorithm,
-                             (void *) pubkey_table[i].pubkey,
-                             (void *) pubkey_table[i].extraspec,
-                              NULL);
-    }
-
-  if (err)
-    BUG ();
-}
-
-/* Internal callback function.  Used via _gcry_module_lookup.  */
-static int
-gcry_pk_lookup_func_name (void *spec, void *data)
-{
-  gcry_pk_spec_t *pubkey = (gcry_pk_spec_t *) spec;
-  char *name = (char *) data;
-  const char **aliases = pubkey->aliases;
-  int ret = stricmp (name, pubkey->name);
-
-  while (ret && *aliases)
-    ret = stricmp (name, *aliases++);
-
-  return ! ret;
-}
-
-/* Internal function.  Lookup a pubkey entry by it's name.  */
-static gcry_module_t
-gcry_pk_lookup_name (const char *name)
-{
-  gcry_module_t pubkey;
-
-  pubkey = _gcry_module_lookup (pubkeys_registered, (void *) name,
-                               gcry_pk_lookup_func_name);
-
-  return pubkey;
-}
-
-/* Register a new pubkey module whose specification can be found in
-   PUBKEY.  On success, a new algorithm ID is stored in ALGORITHM_ID
-   and a pointer representhing this module is stored in MODULE.  */
-gcry_error_t
-_gcry_pk_register (gcry_pk_spec_t *pubkey,
-                   pk_extra_spec_t *extraspec,
-                   unsigned int *algorithm_id,
-                   gcry_module_t *module)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  gcry_module_t mod;
-
-  /* We do not support module loading in fips mode.  */
-  if (fips_mode ())
-    return gpg_error (GPG_ERR_NOT_SUPPORTED);
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  err = _gcry_module_add (&pubkeys_registered, 0,
-                         (void *) pubkey,
-                         (void *)(extraspec? extraspec : &dummy_extra_spec),
-                          &mod);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  if (! err)
-    {
-      *module = mod;
-      *algorithm_id = mod->mod_id;
-    }
-
-  return err;
-}
-
-/* Unregister the pubkey identified by ID, which must have been
-   registered with gcry_pk_register.  */
-void
-gcry_pk_unregister (gcry_module_t module)
-{
-  ath_mutex_lock (&pubkeys_registered_lock);
-  _gcry_module_release (module);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-}
-
-static void
-release_mpi_array (gcry_mpi_t *array)
-{
-  for (; *array; array++)
-    {
-      mpi_free(*array);
-      *array = NULL;
-    }
-}
-
-/****************
- * Map a string to the pubkey algo
- */
-int
-gcry_pk_map_name (const char *string)
-{
-  gcry_module_t pubkey;
-  int algorithm = 0;
-
-  if (!string)
-    return 0;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = gcry_pk_lookup_name (string);
-  if (pubkey)
-    {
-      algorithm = pubkey->mod_id;
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return algorithm;
-}
-
-
-/* Map the public key algorithm whose ID is contained in ALGORITHM to
-   a string representation of the algorithm name.  For unknown
-   algorithm IDs this functions returns "?". */
-const char *
-gcry_pk_algo_name (int algorithm)
-{
-  gcry_module_t pubkey;
-  const char *name;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      name = ((gcry_pk_spec_t *) pubkey->spec)->name;
-      _gcry_module_release (pubkey);
-    }
-  else
-    name = "?";
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return name;
-}
-
-
-/* A special version of gcry_pk_algo name to return the first aliased
-   name of the algorithm.  This is required to adhere to the spki
-   specs where the algorithm names are lowercase. */
-const char *
-_gcry_pk_aliased_algo_name (int algorithm)
-{
-  const char *name = NULL;
-  gcry_module_t module;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (module)
-    {
-      gcry_pk_spec_t *pubkey = (gcry_pk_spec_t *) module->spec;
-
-      name = pubkey->aliases? *pubkey->aliases : NULL;
-      if (!name || !*name)
-        name = pubkey->name;
-      _gcry_module_release (module);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return name;
-}
-
-
-static void
-disable_pubkey_algo (int algorithm)
-{
-  gcry_module_t pubkey;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      if (! (pubkey-> flags & FLAG_MODULE_DISABLED))
-       pubkey->flags |= FLAG_MODULE_DISABLED;
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-}
-
 
-/****************
- * A USE of 0 means: don't care.
- */
-static gcry_err_code_t
-check_pubkey_algo (int algorithm, unsigned use)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  gcry_pk_spec_t *pubkey;
-  gcry_module_t module;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (module)
-    {
-      pubkey = (gcry_pk_spec_t *) module->spec;
-
-      if (((use & GCRY_PK_USAGE_SIGN)
-          && (! (pubkey->use & GCRY_PK_USAGE_SIGN)))
-         || ((use & GCRY_PK_USAGE_ENCR)
-             && (! (pubkey->use & GCRY_PK_USAGE_ENCR))))
-       err = GPG_ERR_WRONG_PUBKEY_ALGO;
-      else if (module->flags & FLAG_MODULE_DISABLED)
-       err = GPG_ERR_PUBKEY_ALGO;
-      _gcry_module_release (module);
-    }
-  else
-    err = GPG_ERR_PUBKEY_ALGO;
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return err;
-}
-
-
-/****************
- * Return the number of public key material numbers
- */
-static int
-pubkey_get_npkey (int algorithm)
-{
-  gcry_module_t pubkey;
-  int npkey = 0;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      npkey = strlen (((gcry_pk_spec_t *) pubkey->spec)->elements_pkey);
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return npkey;
-}
-
-/****************
- * Return the number of secret key material numbers
- */
-static int
-pubkey_get_nskey (int algorithm)
-{
-  gcry_module_t pubkey;
-  int nskey = 0;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      nskey = strlen (((gcry_pk_spec_t *) pubkey->spec)->elements_skey);
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return nskey;
-}
-
-/****************
- * Return the number of signature material numbers
- */
-static int
-pubkey_get_nsig (int algorithm)
-{
-  gcry_module_t pubkey;
-  int nsig = 0;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      nsig = strlen (((gcry_pk_spec_t *) pubkey->spec)->elements_sig);
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return nsig;
-}
-
-/****************
- * Return the number of encryption material numbers
- */
 static int
-pubkey_get_nenc (int algorithm)
-{
-  gcry_module_t pubkey;
-  int nenc = 0;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      nenc = strlen (((gcry_pk_spec_t *) pubkey->spec)->elements_enc);
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return nenc;
-}
-
-
-/* Generate a new public key with algorithm ALGORITHM of size NBITS
-   and return it at SKEY.  USE_E depends on the ALGORITHM.  GENPARMS
-   is passed to the algorithm module if it features an extended
-   generation function.  RETFACTOR is used by some algorithms to
-   return certain additional information which are in general not
-   required.
-
-   The function returns the error code number or 0 on success. */
-static gcry_err_code_t
-pubkey_generate (int algorithm,
-                 unsigned int nbits,
-                 unsigned long use_e,
-                 gcry_sexp_t genparms,
-                 gcry_mpi_t *skey, gcry_mpi_t **retfactors,
-                 gcry_sexp_t *r_extrainfo)
+map_algo (int algo)
 {
-  gcry_err_code_t ec = GPG_ERR_PUBKEY_ALGO;
-  gcry_module_t pubkey;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      pk_extra_spec_t *extraspec = pubkey->extraspec;
-
-      if (extraspec && extraspec->ext_generate)
-        {
-          /* Use the extended generate function.  */
-          ec = extraspec->ext_generate
-            (algorithm, nbits, use_e, genparms, skey, retfactors, r_extrainfo);
-        }
-      else
-        {
-          /* Use the standard generate function.  */
-          ec = ((gcry_pk_spec_t *) pubkey->spec)->generate
-            (algorithm, nbits, use_e, skey, retfactors);
-        }
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return ec;
+ switch (algo)
+   {
+   case GCRY_PK_RSA_E: return GCRY_PK_RSA;
+   case GCRY_PK_RSA_S: return GCRY_PK_RSA;
+   case GCRY_PK_ELG_E: return GCRY_PK_ELG;
+   case GCRY_PK_ECDSA: return GCRY_PK_ECC;
+   case GCRY_PK_EDDSA: return GCRY_PK_ECC;
+   case GCRY_PK_ECDH:  return GCRY_PK_ECC;
+   default:            return algo;
+   }
 }
 
 
-static gcry_err_code_t
-pubkey_check_secret_key (int algorithm, gcry_mpi_t *skey)
+/* Return the spec structure for the public key algorithm ALGO.  For
+   an unknown algorithm NULL is returned.  */
+static gcry_pk_spec_t *
+spec_from_algo (int algo)
 {
-  gcry_err_code_t err = GPG_ERR_PUBKEY_ALGO;
-  gcry_module_t pubkey;
-
-  REGISTER_DEFAULT_PUBKEYS;
+  int idx;
+  gcry_pk_spec_t *spec;
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    {
-      err = ((gcry_pk_spec_t *) pubkey->spec)->check_secret_key
-        (algorithm, skey);
-      _gcry_module_release (pubkey);
-    }
-  ath_mutex_unlock (&pubkeys_registered_lock);
+  algo = map_algo (algo);
 
-  return err;
+  for (idx = 0; (spec = pubkey_list[idx]); idx++)
+    if (algo == spec->algo)
+      return spec;
+  return NULL;
 }
 
 
-/****************
- * This is the interface to the public key encryption.  Encrypt DATA
- * with PKEY and put it into RESARR which should be an array of MPIs
- * of size PUBKEY_MAX_NENC (or less if the algorithm allows this -
- * check with pubkey_get_nenc() )
- */
-static gcry_err_code_t
-pubkey_encrypt (int algorithm, gcry_mpi_t *resarr, gcry_mpi_t data,
-                gcry_mpi_t *pkey, int flags)
+/* Return the spec structure for the public key algorithm with NAME.
+   For an unknown name NULL is returned.  */
+static gcry_pk_spec_t *
+spec_from_name (const char *name)
 {
-  gcry_pk_spec_t *pubkey;
-  gcry_module_t module;
-  gcry_err_code_t rc;
-  int i;
-
-  /* Note: In fips mode DBG_CIPHER will enver evaluate to true but as
-     an extra failsafe protection we explicitly test for fips mode
-     here. */
-  if (DBG_CIPHER && !fips_mode ())
-    {
-      log_debug ("pubkey_encrypt: algo=%d\n", algorithm);
-      for(i = 0; i < pubkey_get_npkey (algorithm); i++)
-       log_mpidump ("  pkey:", pkey[i]);
-      log_mpidump ("  data:", data);
-    }
+  gcry_pk_spec_t *spec;
+  int idx;
+  const char **aliases;
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (module)
+  for (idx=0; (spec = pubkey_list[idx]); idx++)
     {
-      pubkey = (gcry_pk_spec_t *) module->spec;
-      rc = pubkey->encrypt (algorithm, resarr, data, pkey, flags);
-      _gcry_module_release (module);
-      goto ready;
+      if (!stricmp (name, spec->name))
+        return spec;
+      for (aliases = spec->aliases; *aliases; aliases++)
+        if (!stricmp (name, *aliases))
+          return spec;
     }
-  rc = GPG_ERR_PUBKEY_ALGO;
-
- ready:
-  ath_mutex_unlock (&pubkeys_registered_lock);
 
-  if (!rc && DBG_CIPHER && !fips_mode ())
-    {
-      for(i = 0; i < pubkey_get_nenc (algorithm); i++)
-       log_mpidump("  encr:", resarr[i] );
-    }
-  return rc;
+  return NULL;
 }
 
 
-/****************
- * This is the interface to the public key decryption.
- * ALGO gives the algorithm to use and this implicitly determines
- * the size of the arrays.
- * result is a pointer to a mpi variable which will receive a
- * newly allocated mpi or NULL in case of an error.
- */
-static gcry_err_code_t
-pubkey_decrypt (int algorithm, gcry_mpi_t *result, gcry_mpi_t *data,
-                gcry_mpi_t *skey, int flags)
-{
-  gcry_pk_spec_t *pubkey;
-  gcry_module_t module;
-  gcry_err_code_t rc;
-  int i;
-
-  *result = NULL; /* so the caller can always do a mpi_free */
-  if (DBG_CIPHER && !fips_mode ())
-    {
-      log_debug ("pubkey_decrypt: algo=%d\n", algorithm);
-      for(i = 0; i < pubkey_get_nskey (algorithm); i++)
-       log_mpidump ("  skey:", skey[i]);
-      for(i = 0; i < pubkey_get_nenc (algorithm); i++)
-       log_mpidump ("  data:", data[i]);
-    }
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (module)
-    {
-      pubkey = (gcry_pk_spec_t *) module->spec;
-      rc = pubkey->decrypt (algorithm, result, data, skey, flags);
-      _gcry_module_release (module);
-      goto ready;
-    }
-
-  rc = GPG_ERR_PUBKEY_ALGO;
-
- ready:
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  if (!rc && DBG_CIPHER && !fips_mode ())
-    log_mpidump (" plain:", *result);
-
-  return rc;
-}
 
-
-/****************
- * This is the interface to the public key signing.
- * Sign data with skey and put the result into resarr which
- * should be an array of MPIs of size PUBKEY_MAX_NSIG (or less if the
- * algorithm allows this - check with pubkey_get_nsig() )
+/* Given the s-expression SEXP with the first element be either
+ * "private-key" or "public-key" return the spec structure for it.  We
+ * look through the list to find a list beginning with "private-key"
+ * or "public-key" - the first one found is used.  If WANT_PRIVATE is
+ * set the function will only succeed if a private key has been given.
+ * On success the spec is stored at R_SPEC.  On error NULL is stored
+ * at R_SPEC and an error code returned.  If R_PARMS is not NULL and
+ * the function returns success, the parameter list below
+ * "private-key" or "public-key" is stored there and the caller must
+ * call gcry_sexp_release on it.
  */
 static gcry_err_code_t
-pubkey_sign (int algorithm, gcry_mpi_t *resarr, gcry_mpi_t data,
-             gcry_mpi_t *skey)
+spec_from_sexp (gcry_sexp_t sexp, int want_private,
+                gcry_pk_spec_t **r_spec, gcry_sexp_t *r_parms)
 {
-  gcry_pk_spec_t *pubkey;
-  gcry_module_t module;
-  gcry_err_code_t rc;
-  int i;
+  gcry_sexp_t list, l2;
+  char *name;
+  gcry_pk_spec_t *spec;
 
-  if (DBG_CIPHER && !fips_mode ())
-    {
-      log_debug ("pubkey_sign: algo=%d\n", algorithm);
-      for(i = 0; i < pubkey_get_nskey (algorithm); i++)
-       log_mpidump ("  skey:", skey[i]);
-      log_mpidump("  data:", data );
-    }
+  *r_spec = NULL;
+  if (r_parms)
+    *r_parms = NULL;
+
+  /* Check that the first element is valid.  If we are looking for a
+     public key but a private key was supplied, we allow the use of
+     the private key anyway.  The rationale for this is that the
+     private key is a superset of the public key.  */
+  list = sexp_find_token (sexp, want_private? "private-key":"public-key", 0);
+  if (!list && !want_private)
+    list = sexp_find_token (sexp, "private-key", 0);
+  if (!list)
+    return GPG_ERR_INV_OBJ; /* Does not contain a key object.  */
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (module)
-    {
-      pubkey = (gcry_pk_spec_t *) module->spec;
-      rc = pubkey->sign (algorithm, resarr, data, skey);
-      _gcry_module_release (module);
-      goto ready;
-    }
-
-  rc = GPG_ERR_PUBKEY_ALGO;
-
- ready:
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  if (!rc && DBG_CIPHER && !fips_mode ())
-    for (i = 0; i < pubkey_get_nsig (algorithm); i++)
-      log_mpidump ("   sig:", resarr[i]);
-
-  return rc;
-}
-
-/****************
- * Verify a public key signature.
- * Return 0 if the signature is good
- */
-static gcry_err_code_t
-pubkey_verify (int algorithm, gcry_mpi_t hash, gcry_mpi_t *data,
-               gcry_mpi_t *pkey,
-              int (*cmp)(void *, gcry_mpi_t), void *opaquev)
-{
-  gcry_pk_spec_t *pubkey;
-  gcry_module_t module;
-  gcry_err_code_t rc;
-  int i;
-
-  if (DBG_CIPHER && !fips_mode ())
-    {
-      log_debug ("pubkey_verify: algo=%d\n", algorithm);
-      for (i = 0; i < pubkey_get_npkey (algorithm); i++)
-       log_mpidump ("  pkey", pkey[i]);
-      for (i = 0; i < pubkey_get_nsig (algorithm); i++)
-       log_mpidump ("   sig", data[i]);
-      log_mpidump ("  hash", hash);
-    }
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (module)
-    {
-      pubkey = (gcry_pk_spec_t *) module->spec;
-      rc = pubkey->verify (algorithm, hash, data, pkey, cmp, opaquev);
-      _gcry_module_release (module);
-      goto ready;
-    }
-
-  rc = GPG_ERR_PUBKEY_ALGO;
-
- ready:
-  ath_mutex_unlock (&pubkeys_registered_lock);
-  return rc;
-}
-
-
-/* Turn VALUE into an octet string and store it in an allocated buffer
-   at R_FRAME or - if R_RAME is NULL - copy it into the caller
-   provided buffer SPACE; either SPACE or R_FRAME may be used.  If
-   SPACE if not NULL, the caller must provide a buffer of at least
-   NBYTES.  If the resulting octet string is shorter than NBYTES pad
-   it to the left with zeroes.  If VALUE does not fit into NBYTES
-   return an error code.  */
-static gpg_err_code_t
-octet_string_from_mpi (unsigned char **r_frame, void *space,
-                       gcry_mpi_t value, size_t nbytes)
-{
-  gpg_err_code_t rc;
-  size_t nframe, noff, n;
-  unsigned char *frame;
-
-  if (!r_frame == !space)
-    return GPG_ERR_INV_ARG;  /* Only one may be used.  */
-
-  if (r_frame)
-    *r_frame = NULL;
-
-  rc = gcry_err_code (gcry_mpi_print (GCRYMPI_FMT_USG,
-                                      NULL, 0, &nframe, value));
-  if (rc)
-    return rc;
-  if (nframe > nbytes)
-    return GPG_ERR_TOO_LARGE; /* Value too long to fit into NBYTES.  */
-
-  noff = (nframe < nbytes)? nbytes - nframe : 0;
-  n = nframe + noff;
-  if (space)
-    frame = space;
-  else
-    {
-      frame = mpi_is_secure (value)? gcry_malloc_secure (n) : gcry_malloc (n);
-      if (!frame)
-        {
-          rc = gpg_err_code_from_syserror ();
-          return rc;
-        }
-    }
-  if (noff)
-    memset (frame, 0, noff);
-  nframe += noff;
-  rc = gcry_err_code (gcry_mpi_print (GCRYMPI_FMT_USG,
-                                      frame+noff, nframe-noff, NULL, value));
-  if (rc)
-    {
-      gcry_free (frame);
-      return rc;
-    }
-
-  if (r_frame)
-    *r_frame = frame;
-  return 0;
-}
-
-
-/* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block
-   type 2 padding.  On sucess the result is stored as a new MPI at
-   R_RESULT.  On error the value at R_RESULT is undefined.
-
-   If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
-   the seed instead of using a random string for it.  This feature is
-   only useful for regression tests.  Note that this value may not
-   contain zero bytes.
-
-   We encode the value in this way:
-
-     0  2  RND(n bytes)  0  VALUE
-
-   0   is a marker we unfortunately can't encode because we return an
-       MPI which strips all leading zeroes.
-   2   is the block type.
-   RND are non-zero random bytes.
-
-   (Note that OpenPGP includes the cipher algorithm and a checksum in
-   VALUE; the caller needs to prepare the value accordingly.)
-  */
-static gcry_err_code_t
-pkcs1_encode_for_encryption (gcry_mpi_t *r_result, unsigned int nbits,
-                            const unsigned char *value, size_t valuelen,
-                             const unsigned char *random_override,
-                             size_t random_override_len)
-{
-  gcry_err_code_t rc = 0;
-  gcry_error_t err;
-  unsigned char *frame = NULL;
-  size_t nframe = (nbits+7) / 8;
-  int i;
-  size_t n;
-  unsigned char *p;
-
-  if (valuelen + 7 > nframe || !nframe)
-    {
-      /* Can't encode a VALUELEN value in a NFRAME bytes frame.  */
-      return GPG_ERR_TOO_SHORT; /* The key is too short.  */
-    }
-
-  if ( !(frame = gcry_malloc_secure (nframe)))
-    return gpg_err_code_from_syserror ();
-
-  n = 0;
-  frame[n++] = 0;
-  frame[n++] = 2; /* block type */
-  i = nframe - 3 - valuelen;
-  gcry_assert (i > 0);
-
-  if (random_override)
-    {
-      int j;
-
-      if (random_override_len != i)
-        {
-          gcry_free (frame);
-          return GPG_ERR_INV_ARG;
-        }
-      /* Check that random does not include a zero byte.  */
-      for (j=0; j < random_override_len; j++)
-        if (!random_override[j])
-          {
-            gcry_free (frame);
-            return GPG_ERR_INV_ARG;
-          }
-      memcpy (frame + n, random_override, random_override_len);
-      n += random_override_len;
-    }
-  else
-    {
-      p = gcry_random_bytes_secure (i, GCRY_STRONG_RANDOM);
-      /* Replace zero bytes by new values. */
-      for (;;)
-        {
-          int j, k;
-          unsigned char *pp;
-
-          /* Count the zero bytes. */
-          for (j=k=0; j < i; j++)
-            {
-              if (!p[j])
-                k++;
-            }
-          if (!k)
-            break; /* Okay: no (more) zero bytes. */
-
-          k += k/128 + 3; /* Better get some more. */
-          pp = gcry_random_bytes_secure (k, GCRY_STRONG_RANDOM);
-          for (j=0; j < i && k; )
-            {
-              if (!p[j])
-                p[j] = pp[--k];
-              if (p[j])
-                j++;
-            }
-          gcry_free (pp);
-        }
-      memcpy (frame+n, p, i);
-      n += i;
-      gcry_free (p);
-    }
-
-  frame[n++] = 0;
-  memcpy (frame+n, value, valuelen);
-  n += valuelen;
-  gcry_assert (n == nframe);
-
-  err = gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
-  if (err)
-    rc = gcry_err_code (err);
-  else if (DBG_CIPHER)
-    log_mpidump ("PKCS#1 block type 2 encoded data", *r_result);
-  gcry_free (frame);
-
-  return rc;
-}
-
-
-/* Decode a plaintext in VALUE assuming pkcs#1 block type 2 padding.
-   NBITS is the size of the secret key.  On success the result is
-   stored as a newly allocated buffer at R_RESULT and its valid length at
-   R_RESULTLEN.  On error NULL is stored at R_RESULT.  */
-static gcry_err_code_t
-pkcs1_decode_for_encryption (unsigned char **r_result, size_t *r_resultlen,
-                             unsigned int nbits, gcry_mpi_t value)
-{
-  gcry_error_t err;
-  unsigned char *frame = NULL;
-  size_t nframe = (nbits+7) / 8;
-  size_t n;
-
-  *r_result = NULL;
-
-  if ( !(frame = gcry_malloc_secure (nframe)))
-    return gpg_err_code_from_syserror ();
-
-  err = gcry_mpi_print (GCRYMPI_FMT_USG, frame, nframe, &n, value);
-  if (err)
-    {
-      gcry_free (frame);
-      return gcry_err_code (err);
-    }
-
-  nframe = n; /* Set NFRAME to the actual length.  */
-
-  /* FRAME = 0x00 || 0x02 || PS || 0x00 || M
-
-     pkcs#1 requires that the first byte is zero.  Our MPIs usually
-     strip leading zero bytes; thus we are not able to detect them.
-     However due to the way gcry_mpi_print is implemented we may see
-     leading zero bytes nevertheless.  We handle this by making the
-     first zero byte optional.  */
-  if (nframe < 4)
-    {
-      gcry_free (frame);
-      return GPG_ERR_ENCODING_PROBLEM;  /* Too short.  */
-    }
-  n = 0;
-  if (!frame[0])
-    n++;
-  if (frame[n++] != 0x02)
-    {
-      gcry_free (frame);
-      return GPG_ERR_ENCODING_PROBLEM;  /* Wrong block type.  */
-    }
-
-  /* Skip the non-zero random bytes and the terminating zero byte.  */
-  for (; n < nframe && frame[n] != 0x00; n++)
-    ;
-  if (n+1 >= nframe)
-    {
-      gcry_free (frame);
-      return GPG_ERR_ENCODING_PROBLEM; /* No zero byte.  */
-    }
-  n++; /* Skip the zero byte.  */
-
-  /* To avoid an extra allocation we reuse the frame buffer.  The only
-     caller of this function will anyway free the result soon.  */
-  memmove (frame, frame + n, nframe - n);
-  *r_result = frame;
-  *r_resultlen = nframe - n;
-
-  if (DBG_CIPHER)
-    log_printhex ("value extracted from PKCS#1 block type 2 encoded data:",
-                  *r_result, *r_resultlen);
-
-  return 0;
-}
-
-
-/* Encode {VALUE,VALUELEN} for an NBITS keys and hash algorith ALGO
-   using the pkcs#1 block type 1 padding.  On success the result is
-   stored as a new MPI at R_RESULT.  On error the value at R_RESULT is
-   undefined.
-
-   We encode the value in this way:
-
-     0  1  PAD(n bytes)  0  ASN(asnlen bytes) VALUE(valuelen bytes)
-
-   0   is a marker we unfortunately can't encode because we return an
-       MPI which strips all leading zeroes.
-   1   is the block type.
-   PAD consists of 0xff bytes.
-   0   marks the end of the padding.
-   ASN is the DER encoding of the hash algorithm; along with the VALUE
-       it yields a valid DER encoding.
-
-   (Note that PGP prior to version 2.3 encoded the message digest as:
-      0   1   MD(16 bytes)   0   PAD(n bytes)   1
-    The MD is always 16 bytes here because it's always MD5.  GnuPG
-    does not not support pre-v2.3 signatures, but I'm including this
-    comment so the information is easily found if needed.)
-*/
-static gcry_err_code_t
-pkcs1_encode_for_signature (gcry_mpi_t *r_result, unsigned int nbits,
-                           const unsigned char *value, size_t valuelen,
-                           int algo)
-{
-  gcry_err_code_t rc = 0;
-  gcry_error_t err;
-  byte asn[100];
-  byte *frame = NULL;
-  size_t nframe = (nbits+7) / 8;
-  int i;
-  size_t n;
-  size_t asnlen, dlen;
-
-  asnlen = DIM(asn);
-  dlen = gcry_md_get_algo_dlen (algo);
-
-  if (gcry_md_algo_info (algo, GCRYCTL_GET_ASNOID, asn, &asnlen))
-    {
-      /* We don't have yet all of the above algorithms.  */
-      return GPG_ERR_NOT_IMPLEMENTED;
-    }
-
-  if ( valuelen != dlen )
-    {
-      /* Hash value does not match the length of digest for
-         the given algorithm.  */
-      return GPG_ERR_CONFLICT;
-    }
-
-  if ( !dlen || dlen + asnlen + 4 > nframe)
-    {
-      /* Can't encode an DLEN byte digest MD into an NFRAME byte
-         frame.  */
-      return GPG_ERR_TOO_SHORT;
-    }
-
-  if ( !(frame = gcry_malloc (nframe)) )
-    return gpg_err_code_from_syserror ();
-
-  /* Assemble the pkcs#1 block type 1. */
-  n = 0;
-  frame[n++] = 0;
-  frame[n++] = 1; /* block type */
-  i = nframe - valuelen - asnlen - 3 ;
-  gcry_assert (i > 1);
-  memset (frame+n, 0xff, i );
-  n += i;
-  frame[n++] = 0;
-  memcpy (frame+n, asn, asnlen);
-  n += asnlen;
-  memcpy (frame+n, value, valuelen );
-  n += valuelen;
-  gcry_assert (n == nframe);
-
-  /* Convert it into an MPI. */
-  err = gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
-  if (err)
-    rc = gcry_err_code (err);
-  else if (DBG_CIPHER)
-    log_mpidump ("PKCS#1 block type 1 encoded data", *r_result);
-  gcry_free (frame);
-
-  return rc;
-}
-
-
-/* Mask generation function for OAEP.  See RFC-3447 B.2.1.  */
-static gcry_err_code_t
-mgf1 (unsigned char *output, size_t outlen, unsigned char *seed, size_t 
seedlen,
-      int algo)
-{
-  size_t dlen, nbytes, n;
-  int idx;
-  gcry_md_hd_t hd;
-  gcry_error_t err;
-
-  err = gcry_md_open (&hd, algo, 0);
-  if (err)
-    return gpg_err_code (err);
-
-  dlen = gcry_md_get_algo_dlen (algo);
-
-  /* We skip step 1 which would be assert(OUTLEN <= 2^32).  The loop
-     in step 3 is merged with step 4 by concatenating no more octets
-     than what would fit into OUTPUT.  The ceiling for the counter IDX
-     is implemented indirectly.  */
-  nbytes = 0;  /* Step 2.  */
-  idx = 0;
-  while ( nbytes < outlen )
-    {
-      unsigned char c[4], *digest;
-
-      if (idx)
-        gcry_md_reset (hd);
-
-      c[0] = (idx >> 24) & 0xFF;
-      c[1] = (idx >> 16) & 0xFF;
-      c[2] = (idx >> 8) & 0xFF;
-      c[3] = idx & 0xFF;
-      idx++;
-
-      gcry_md_write (hd, seed, seedlen);
-      gcry_md_write (hd, c, 4);
-      digest = gcry_md_read (hd, 0);
-
-      n = (outlen - nbytes < dlen)? (outlen - nbytes) : dlen;
-      memcpy (output+nbytes, digest, n);
-      nbytes += n;
-    }
-
-  gcry_md_close (hd);
-  return GPG_ERR_NO_ERROR;
-}
-
-
-/* RFC-3447 (pkcs#1 v2.1) OAEP encoding.  NBITS is the length of the
-   key measured in bits.  ALGO is the hash function; it must be a
-   valid and usable algorithm.  {VALUE,VALUELEN} is the message to
-   encrypt.  {LABEL,LABELLEN} is the optional label to be associated
-   with the message, if LABEL is NULL the default is to use the empty
-   string as label.  On success the encoded ciphertext is returned at
-   R_RESULT.
-
-   If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
-   the seed instead of using a random string for it.  This feature is
-   only useful for regression tests.
-
-   Here is figure 1 from the RFC depicting the process:
-
-                             +----------+---------+-------+
-                        DB = |  lHash   |    PS   |   M   |
-                             +----------+---------+-------+
-                                            |
-                  +----------+              V
-                  |   seed   |--> MGF ---> xor
-                  +----------+              |
-                        |                   |
-               +--+     V                   |
-               |00|    xor <----- MGF <-----|
-               +--+     |                   |
-                 |      |                   |
-                 V      V                   V
-               +--+----------+----------------------------+
-         EM =  |00|maskedSeed|          maskedDB          |
-               +--+----------+----------------------------+
-  */
-static gcry_err_code_t
-oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
-             const unsigned char *value, size_t valuelen,
-             const unsigned char *label, size_t labellen,
-             const void *random_override, size_t random_override_len)
-{
-  gcry_err_code_t rc = 0;
-  gcry_error_t err;
-  unsigned char *frame = NULL;
-  size_t nframe = (nbits+7) / 8;
-  unsigned char *p;
-  size_t hlen;
-  size_t n;
-
-  *r_result = NULL;
-
-  /* Set defaults for LABEL.  */
-  if (!label || !labellen)
-    {
-      label = (const unsigned char*)"";
-      labellen = 0;
-    }
-
-  hlen = gcry_md_get_algo_dlen (algo);
-
-  /* We skip step 1a which would be to check that LABELLEN is not
-     greater than 2^61-1.  See rfc-3447 7.1.1. */
-
-  /* Step 1b.  Note that the obsolete rfc-2437 uses the check:
-     valuelen > nframe - 2 * hlen - 1 .  */
-  if (valuelen > nframe - 2 * hlen - 2 || !nframe)
-    {
-      /* Can't encode a VALUELEN value in a NFRAME bytes frame. */
-      return GPG_ERR_TOO_SHORT; /* The key is too short.  */
-    }
-
-  /* Allocate the frame.  */
-  frame = gcry_calloc_secure (1, nframe);
-  if (!frame)
-    return gpg_err_code_from_syserror ();
-
-  /* Step 2a: Compute the hash of the label.  We store it in the frame
-     where later the maskedDB will commence.  */
-  gcry_md_hash_buffer (algo, frame + 1 + hlen, label, labellen);
-
-  /* Step 2b: Set octet string to zero.  */
-  /* This has already been done while allocating FRAME.  */
-
-  /* Step 2c: Create DB by concatenating lHash, PS, 0x01 and M.  */
-  n = nframe - valuelen - 1;
-  frame[n] = 0x01;
-  memcpy (frame + n + 1, value, valuelen);
-
-  /* Step 3d: Generate seed.  We store it where the maskedSeed will go
-     later. */
-  if (random_override)
-    {
-      if (random_override_len != hlen)
-        {
-          gcry_free (frame);
-          return GPG_ERR_INV_ARG;
-        }
-      memcpy (frame + 1, random_override, hlen);
-    }
-  else
-    gcry_randomize (frame + 1, hlen, GCRY_STRONG_RANDOM);
-
-  /* Step 2e and 2f: Create maskedDB.  */
-  {
-    unsigned char *dmask;
-
-    dmask = gcry_malloc_secure (nframe - hlen - 1);
-    if (!dmask)
-      {
-        rc = gpg_err_code_from_syserror ();
-        gcry_free (frame);
-        return rc;
-      }
-    rc = mgf1 (dmask, nframe - hlen - 1, frame+1, hlen, algo);
-    if (rc)
-      {
-        gcry_free (dmask);
-        gcry_free (frame);
-        return rc;
-      }
-    for (n = 1 + hlen, p = dmask; n < nframe; n++)
-      frame[n] ^= *p++;
-    gcry_free (dmask);
-  }
-
-  /* Step 2g and 2h: Create maskedSeed.  */
-  {
-    unsigned char *smask;
-
-    smask = gcry_malloc_secure (hlen);
-    if (!smask)
-      {
-        rc = gpg_err_code_from_syserror ();
-        gcry_free (frame);
-        return rc;
-      }
-    rc = mgf1 (smask, hlen, frame + 1 + hlen, nframe - hlen - 1, algo);
-    if (rc)
-      {
-        gcry_free (smask);
-        gcry_free (frame);
-        return rc;
-      }
-    for (n = 1, p = smask; n < 1 + hlen; n++)
-      frame[n] ^= *p++;
-    gcry_free (smask);
-  }
-
-  /* Step 2i: Concatenate 0x00, maskedSeed and maskedDB.  */
-  /* This has already been done by using in-place operations.  */
-
-  /* Convert the stuff into an MPI as expected by the caller.  */
-  err = gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, nframe, NULL);
-  if (err)
-    rc = gcry_err_code (err);
-  else if (DBG_CIPHER)
-    log_mpidump ("OAEP encoded data", *r_result);
-  gcry_free (frame);
-
-  return rc;
-}
-
-
-/* RFC-3447 (pkcs#1 v2.1) OAEP decoding.  NBITS is the length of the
-   key measured in bits.  ALGO is the hash function; it must be a
-   valid and usable algorithm.  VALUE is the raw decrypted message
-   {LABEL,LABELLEN} is the optional label to be associated with the
-   message, if LABEL is NULL the default is to use the empty string as
-   label.  On success the plaintext is returned as a newly allocated
-   buffer at R_RESULT; its valid length is stored at R_RESULTLEN.  On
-   error NULL is stored at R_RESULT.  */
-static gcry_err_code_t
-oaep_decode (unsigned char **r_result, size_t *r_resultlen,
-             unsigned int nbits, int algo,
-             gcry_mpi_t value, const unsigned char *label, size_t labellen)
-{
-  gcry_err_code_t rc;
-  unsigned char *frame = NULL; /* Encoded messages (EM).  */
-  unsigned char *masked_seed;  /* Points into FRAME.  */
-  unsigned char *masked_db;    /* Points into FRAME.  */
-  unsigned char *seed = NULL;  /* Allocated space for the seed and DB.  */
-  unsigned char *db;           /* Points into SEED.  */
-  unsigned char *lhash = NULL; /* Hash of the label.  */
-  size_t nframe;               /* Length of the ciphertext (EM).  */
-  size_t hlen;                 /* Length of the hash digest.  */
-  size_t db_len;               /* Length of DB and masked_db.  */
-  size_t nkey = (nbits+7)/8;   /* Length of the key in bytes.  */
-  int failed = 0;              /* Error indicator.  */
-  size_t n;
-
-  *r_result = NULL;
-
-  /* This code is implemented as described by rfc-3447 7.1.2.  */
-
-  /* Set defaults for LABEL.  */
-  if (!label || !labellen)
-    {
-      label = (const unsigned char*)"";
-      labellen = 0;
-    }
-
-  /* Get the length of the digest.  */
-  hlen = gcry_md_get_algo_dlen (algo);
-
-  /* Hash the label right away.  */
-  lhash = gcry_malloc (hlen);
-  if (!lhash)
-    return gpg_err_code_from_syserror ();
-  gcry_md_hash_buffer (algo, lhash, label, labellen);
-
-  /* Turn the MPI into an octet string.  If the octet string is
-     shorter than the key we pad it to the left with zeroes.  This may
-     happen due to the leading zero in OAEP frames and due to the
-     following random octets (seed^mask) which may have leading zero
-     bytes.  This all is needed to cope with our leading zeroes
-     suppressing MPI implementation.  The code implictly implements
-     Step 1b (bail out if NFRAME != N).  */
-  rc = octet_string_from_mpi (&frame, NULL, value, nkey);
-  if (rc)
-    {
-      gcry_free (lhash);
-      return GPG_ERR_ENCODING_PROBLEM;
-    }
-  nframe = nkey;
-
-  /* Step 1c: Check that the key is long enough.  */
-  if ( nframe < 2 * hlen + 2 )
-    {
-      gcry_free (frame);
-      gcry_free (lhash);
-      return GPG_ERR_ENCODING_PROBLEM;
-    }
-
-  /* Step 2 has already been done by the caller and the
-     gcry_mpi_aprint above.  */
-
-  /* Allocate space for SEED and DB.  */
-  seed = gcry_malloc_secure (nframe - 1);
-  if (!seed)
-    {
-      rc = gpg_err_code_from_syserror ();
-      gcry_free (frame);
-      gcry_free (lhash);
-      return rc;
-    }
-  db = seed + hlen;
-
-  /* To avoid choosen ciphertext attacks from now on we make sure to
-     run all code even in the error case; this avoids possible timing
-     attacks as described by Manger.  */
-
-  /* Step 3a: Hash the label.  */
-  /* This has already been done.  */
-
-  /* Step 3b: Separate the encoded message.  */
-  masked_seed = frame + 1;
-  masked_db   = frame + 1 + hlen;
-  db_len      = nframe - 1 - hlen;
-
-  /* Step 3c and 3d: seed = maskedSeed ^ mgf(maskedDB, hlen).  */
-  if (mgf1 (seed, hlen, masked_db, db_len, algo))
-    failed = 1;
-  for (n = 0; n < hlen; n++)
-    seed[n] ^= masked_seed[n];
-
-  /* Step 3e and 3f: db = maskedDB ^ mgf(seed, db_len).  */
-  if (mgf1 (db, db_len, seed, hlen, algo))
-    failed = 1;
-  for (n = 0; n < db_len; n++)
-    db[n] ^= masked_db[n];
-
-  /* Step 3g: Check lhash, an possible empty padding string terminated
-     by 0x01 and the first byte of EM being 0.  */
-  if (memcmp (lhash, db, hlen))
-    failed = 1;
-  for (n = hlen; n < db_len; n++)
-    if (db[n] == 0x01)
-      break;
-  if (n == db_len)
-    failed = 1;
-  if (frame[0])
-    failed = 1;
-
-  gcry_free (lhash);
-  gcry_free (frame);
-  if (failed)
-    {
-      gcry_free (seed);
-      return GPG_ERR_ENCODING_PROBLEM;
-    }
-
-  /* Step 4: Output M.  */
-  /* To avoid an extra allocation we reuse the seed buffer.  The only
-     caller of this function will anyway free the result soon.  */
-  n++;
-  memmove (seed, db + n, db_len - n);
-  *r_result = seed;
-  *r_resultlen = db_len - n;
-  seed = NULL;
-
-  if (DBG_CIPHER)
-    log_printhex ("value extracted from OAEP encoded data:",
-                  *r_result, *r_resultlen);
-
-  return 0;
-}
-
-
-/* RFC-3447 (pkcs#1 v2.1) PSS encoding.  Encode {VALUE,VALUELEN} for
-   an NBITS key.  Note that VALUE is already the mHash from the
-   picture below.  ALGO is a valid hash algorithm and SALTLEN is the
-   length of salt to be used.  On success the result is stored as a
-   new MPI at R_RESULT.  On error the value at R_RESULT is undefined.
-
-   If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
-   the salt instead of using a random string for the salt.  This
-   feature is only useful for regression tests.
-
-   Here is figure 2 from the RFC (errata 595 applied) depicting the
-   process:
-
-                                  +-----------+
-                                  |     M     |
-                                  +-----------+
-                                        |
-                                        V
-                                      Hash
-                                        |
-                                        V
-                          +--------+----------+----------+
-                     M' = |Padding1|  mHash   |   salt   |
-                          +--------+----------+----------+
-                                         |
-               +--------+----------+     V
-         DB =  |Padding2| salt     |   Hash
-               +--------+----------+     |
-                         |               |
-                         V               |    +----+
-                        xor <--- MGF <---|    |0xbc|
-                         |               |    +----+
-                         |               |      |
-                         V               V      V
-               +-------------------+----------+----+
-         EM =  |    maskedDB       |     H    |0xbc|
-               +-------------------+----------+----+
-
-  */
-static gcry_err_code_t
-pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
-           const unsigned char *value, size_t valuelen, int saltlen,
-            const void *random_override, size_t random_override_len)
-{
-  gcry_err_code_t rc = 0;
-  gcry_error_t err;
-  size_t hlen;                 /* Length of the hash digest.  */
-  unsigned char *em = NULL;    /* Encoded message.  */
-  size_t emlen = (nbits+7)/8;  /* Length in bytes of EM.  */
-  unsigned char *h;            /* Points into EM.  */
-  unsigned char *buf = NULL;   /* Help buffer.  */
-  size_t buflen;               /* Length of BUF.  */
-  unsigned char *mhash;        /* Points into BUF.  */
-  unsigned char *salt;         /* Points into BUF.  */
-  unsigned char *dbmask;       /* Points into BUF.  */
-  unsigned char *p;
-  size_t n;
-
-  /* This code is implemented as described by rfc-3447 9.1.1.  */
-
-  /* Get the length of the digest.  */
-  hlen = gcry_md_get_algo_dlen (algo);
-  gcry_assert (hlen);  /* We expect a valid ALGO here.  */
-
-  /* Allocate a help buffer and setup some pointers.  */
-  buflen = 8 + hlen + saltlen + (emlen - hlen - 1);
-  buf = gcry_malloc (buflen);
-  if (!buf)
-    {
-      rc = gpg_err_code_from_syserror ();
-      goto leave;
-    }
-  mhash = buf + 8;
-  salt  = mhash + hlen;
-  dbmask= salt + saltlen;
-
-  /* Step 2: That would be: mHash = Hash(M) but our input is already
-     mHash thus we do only a consistency check and copy to MHASH.  */
-  if (valuelen != hlen)
-    {
-      rc = GPG_ERR_INV_LENGTH;
-      goto leave;
-    }
-  memcpy (mhash, value, hlen);
-
-  /* Step 3: Check length constraints.  */
-  if (emlen < hlen + saltlen + 2)
-    {
-      rc = GPG_ERR_TOO_SHORT;
-      goto leave;
-    }
-
-  /* Allocate space for EM.  */
-  em = gcry_malloc (emlen);
-  if (!em)
-    {
-      rc = gpg_err_code_from_syserror ();
-      goto leave;
-    }
-  h = em + emlen - 1 - hlen;
-
-  /* Step 4: Create a salt.  */
-  if (saltlen)
-    {
-      if (random_override)
-        {
-          if (random_override_len != saltlen)
-            {
-              rc = GPG_ERR_INV_ARG;
-              goto leave;
-            }
-          memcpy (salt, random_override, saltlen);
-        }
-      else
-        gcry_randomize (salt, saltlen, GCRY_STRONG_RANDOM);
-    }
-
-  /* Step 5 and 6: M' = Hash(Padding1 || mHash || salt).  */
-  memset (buf, 0, 8);  /* Padding.  */
-  gcry_md_hash_buffer (algo, h, buf, 8 + hlen + saltlen);
-
-  /* Step 7 and 8: DB = PS || 0x01 || salt.  */
-  /* Note that we use EM to store DB and later Xor in-place.  */
-  p = em + emlen - 1 - hlen - saltlen - 1;
-  memset (em, 0, p - em);
-  *p++ = 0x01;
-  memcpy (p, salt, saltlen);
-
-  /* Step 9: dbmask = MGF(H, emlen - hlen - 1).  */
-  mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo);
-
-  /* Step 10: maskedDB = DB ^ dbMask */
-  for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++)
-    em[n] ^= *p;
-
-  /* Step 11: Set the leftmost bits to zero.  */
-  em[0] &= 0xFF >> (8 * emlen - nbits);
-
-  /* Step 12: EM = maskedDB || H || 0xbc.  */
-  em[emlen-1] = 0xbc;
-
-  /* Convert EM into an MPI.  */
-  err = gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, em, emlen, NULL);
-  if (err)
-    rc = gcry_err_code (err);
-  else if (DBG_CIPHER)
-    log_mpidump ("PSS encoded data", *r_result);
-
- leave:
-  if (em)
-    {
-      wipememory (em, emlen);
-      gcry_free (em);
-    }
-  if (buf)
-    {
-      wipememory (buf, buflen);
-      gcry_free (buf);
-    }
-  return rc;
-}
-
-
-/* Verify a signature assuming PSS padding.  VALUE is the hash of the
-   message (mHash) encoded as an MPI; its length must match the digest
-   length of ALGO.  ENCODED is the output of the RSA public key
-   function (EM).  NBITS is the size of the public key.  ALGO is the
-   hash algorithm and SALTLEN is the length of the used salt.  The
-   function returns 0 on success or on error code.  */
-static gcry_err_code_t
-pss_verify (gcry_mpi_t value, gcry_mpi_t encoded, unsigned int nbits, int algo,
-            size_t saltlen)
-{
-  gcry_err_code_t rc = 0;
-  size_t hlen;                 /* Length of the hash digest.  */
-  unsigned char *em = NULL;    /* Encoded message.  */
-  size_t emlen = (nbits+7)/8;  /* Length in bytes of EM.  */
-  unsigned char *salt;         /* Points into EM.  */
-  unsigned char *h;            /* Points into EM.  */
-  unsigned char *buf = NULL;   /* Help buffer.  */
-  size_t buflen;               /* Length of BUF.  */
-  unsigned char *dbmask;       /* Points into BUF.  */
-  unsigned char *mhash;        /* Points into BUF.  */
-  unsigned char *p;
-  size_t n;
-
-  /* This code is implemented as described by rfc-3447 9.1.2.  */
-
-  /* Get the length of the digest.  */
-  hlen = gcry_md_get_algo_dlen (algo);
-  gcry_assert (hlen);  /* We expect a valid ALGO here.  */
-
-  /* Allocate a help buffer and setup some pointers.
-     This buffer is used for two purposes:
-        +------------------------------+-------+
-     1. | dbmask                       | mHash |
-        +------------------------------+-------+
-           emlen - hlen - 1              hlen
-
-        +----------+-------+---------+-+-------+
-     2. | padding1 | mHash | salt    | | mHash |
-        +----------+-------+---------+-+-------+
-             8       hlen    saltlen     hlen
-  */
-  buflen = 8 + hlen + saltlen;
-  if (buflen < emlen - hlen - 1)
-    buflen = emlen - hlen - 1;
-  buflen += hlen;
-  buf = gcry_malloc (buflen);
-  if (!buf)
-    {
-      rc = gpg_err_code_from_syserror ();
-      goto leave;
-    }
-  dbmask = buf;
-  mhash = buf + buflen - hlen;
-
-  /* Step 2: That would be: mHash = Hash(M) but our input is already
-     mHash thus we only need to convert VALUE into MHASH.  */
-  rc = octet_string_from_mpi (NULL, mhash, value, hlen);
-  if (rc)
-    goto leave;
-
-  /* Convert the signature into an octet string.  */
-  rc = octet_string_from_mpi (&em, NULL, encoded, emlen);
-  if (rc)
-    goto leave;
-
-  /* Step 3: Check length of EM.  Because we internally use MPI
-     functions we can't do this properly; EMLEN is always the length
-     of the key because octet_string_from_mpi needs to left pad the
-     result with zero to cope with the fact that our MPIs suppress all
-     leading zeroes.  Thus what we test here are merely the digest and
-     salt lengths to the key.  */
-  if (emlen < hlen + saltlen + 2)
-    {
-      rc = GPG_ERR_TOO_SHORT; /* For the hash and saltlen.  */
-      goto leave;
-    }
-
-  /* Step 4: Check last octet.  */
-  if (em[emlen - 1] != 0xbc)
-    {
-      rc = GPG_ERR_BAD_SIGNATURE;
-      goto leave;
-    }
-
-  /* Step 5: Split EM.  */
-  h = em + emlen - 1 - hlen;
-
-  /* Step 6: Check the leftmost bits.  */
-  if ((em[0] & ~(0xFF >> (8 * emlen - nbits))))
-    {
-      rc = GPG_ERR_BAD_SIGNATURE;
-      goto leave;
-    }
-
-  /* Step 7: dbmask = MGF(H, emlen - hlen - 1).  */
-  mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo);
-
-  /* Step 8: maskedDB = DB ^ dbMask.  */
-  for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++)
-    em[n] ^= *p;
-
-  /* Step 9: Set leftmost bits in DB to zero.  */
-  em[0] &= 0xFF >> (8 * emlen - nbits);
-
-  /* Step 10: Check the padding of DB.  */
-  for (n = 0; n < emlen - hlen - saltlen - 2 && !em[n]; n++)
-    ;
-  if (n != emlen - hlen - saltlen - 2 || em[n++] != 1)
-    {
-      rc = GPG_ERR_BAD_SIGNATURE;
-      goto leave;
-    }
-
-  /* Step 11: Extract salt from DB.  */
-  salt = em + n;
-
-  /* Step 12:  M' = (0x)00 00 00 00 00 00 00 00 || mHash || salt */
-  memset (buf, 0, 8);
-  memcpy (buf+8, mhash, hlen);
-  memcpy (buf+8+hlen, salt, saltlen);
-
-  /* Step 13:  H' = Hash(M').  */
-  gcry_md_hash_buffer (algo, buf, buf, 8 + hlen + saltlen);
-
-  /* Step 14:  Check H == H'.   */
-  rc = memcmp (h, buf, hlen) ? GPG_ERR_BAD_SIGNATURE : GPG_ERR_NO_ERROR;
-
- leave:
-  if (em)
-    {
-      wipememory (em, emlen);
-      gcry_free (em);
-    }
-  if (buf)
-    {
-      wipememory (buf, buflen);
-      gcry_free (buf);
-    }
-  return rc;
-}
-
-
-/* Callback for the pubkey algorithm code to verify PSS signatures.
-   OPAQUE is the data provided by the actual caller.  The meaning of
-   TMP depends on the actual algorithm (but there is only RSA); now
-   for RSA it is the output of running the public key function on the
-   input.  */
-static int
-pss_verify_cmp (void *opaque, gcry_mpi_t tmp)
-{
-  struct pk_encoding_ctx *ctx = opaque;
-  gcry_mpi_t hash = ctx->verify_arg;
-
-  return pss_verify (hash, tmp, ctx->nbits - 1, ctx->hash_algo, ctx->saltlen);
-}
-
-
-/* Internal function.   */
-static gcry_err_code_t
-sexp_elements_extract (gcry_sexp_t key_sexp, const char *element_names,
-                      gcry_mpi_t *elements, const char *algo_name)
-{
-  gcry_err_code_t err = 0;
-  int i, idx;
-  const char *name;
-  gcry_sexp_t list;
-
-  for (name = element_names, idx = 0; *name && !err; name++, idx++)
-    {
-      list = gcry_sexp_find_token (key_sexp, name, 1);
-      if (!list)
-       elements[idx] = NULL;
-      else
-       {
-         elements[idx] = gcry_sexp_nth_mpi (list, 1, GCRYMPI_FMT_USG);
-         gcry_sexp_release (list);
-         if (!elements[idx])
-           err = GPG_ERR_INV_OBJ;
-       }
-    }
-
-  if (!err)
-    {
-      /* Check that all elements are available.  */
-      for (name = element_names, idx = 0; *name; name++, idx++)
-        if (!elements[idx])
-          break;
-      if (*name)
-        {
-          err = GPG_ERR_NO_OBJ;
-          /* Some are missing.  Before bailing out we test for
-             optional parameters.  */
-          if (algo_name && !strcmp (algo_name, "RSA")
-              && !strcmp (element_names, "nedpqu") )
-            {
-              /* This is RSA.  Test whether we got N, E and D and that
-                 the optional P, Q and U are all missing.  */
-              if (elements[0] && elements[1] && elements[2]
-                  && !elements[3] && !elements[4] && !elements[5])
-                err = 0;
-            }
-        }
-    }
-
-
-  if (err)
-    {
-      for (i = 0; i < idx; i++)
-        if (elements[i])
-          gcry_free (elements[i]);
-    }
-  return err;
-}
-
-
-/* Internal function used for ecc.  Note, that this function makes use
-   of its intimate knowledge about the ECC parameters from ecc.c. */
-static gcry_err_code_t
-sexp_elements_extract_ecc (gcry_sexp_t key_sexp, const char *element_names,
-                           gcry_mpi_t *elements, pk_extra_spec_t *extraspec)
-
-{
-  gcry_err_code_t err = 0;
-  int idx;
-  const char *name;
-  gcry_sexp_t list;
-
-  /* Clear the array for easier error cleanup. */
-  for (name = element_names, idx = 0; *name; name++, idx++)
-    elements[idx] = NULL;
-  gcry_assert (idx >= 5); /* We know that ECC has at least 5 elements
-                             (params only) or 6 (full public key).  */
-  if (idx == 5)
-    elements[5] = NULL;   /* Extra clear for the params only case.  */
-
-
-  /* Init the array with the available curve parameters. */
-  for (name = element_names, idx = 0; *name && !err; name++, idx++)
-    {
-      list = gcry_sexp_find_token (key_sexp, name, 1);
-      if (!list)
-       elements[idx] = NULL;
-      else
-       {
-         elements[idx] = gcry_sexp_nth_mpi (list, 1, GCRYMPI_FMT_USG);
-         gcry_sexp_release (list);
-         if (!elements[idx])
-            {
-              err = GPG_ERR_INV_OBJ;
-              goto leave;
-            }
-       }
-    }
-
-  /* Check whether a curve parameter has been given and then fill any
-     missing elements.  */
-  list = gcry_sexp_find_token (key_sexp, "curve", 5);
-  if (list)
-    {
-      if (extraspec->get_param)
-        {
-          char *curve;
-          gcry_mpi_t params[6];
-
-          for (idx = 0; idx < DIM(params); idx++)
-            params[idx] = NULL;
-
-          curve = _gcry_sexp_nth_string (list, 1);
-          gcry_sexp_release (list);
-          if (!curve)
-            {
-              /* No curve name given (or out of core). */
-              err = GPG_ERR_INV_OBJ;
-              goto leave;
-            }
-          err = extraspec->get_param (curve, params);
-          gcry_free (curve);
-          if (err)
-            goto leave;
-
-          for (idx = 0; idx < DIM(params); idx++)
-            {
-              if (!elements[idx])
-                elements[idx] = params[idx];
-              else
-                mpi_free (params[idx]);
-            }
-        }
-      else
-        {
-          gcry_sexp_release (list);
-          err = GPG_ERR_INV_OBJ; /* "curve" given but ECC not supported. */
-          goto leave;
-        }
-    }
-
-  /* Check that all parameters are known.  */
-  for (name = element_names, idx = 0; *name; name++, idx++)
-    if (!elements[idx])
-      {
-        err = GPG_ERR_NO_OBJ;
-        goto leave;
-      }
-
- leave:
-  if (err)
-    {
-      for (name = element_names, idx = 0; *name; name++, idx++)
-        if (elements[idx])
-          gcry_free (elements[idx]);
-    }
-  return err;
-}
-
-
-
-/****************
- * Convert a S-Exp with either a private or a public key to our
- * internal format. Currently we do only support the following
- * algorithms:
- *    dsa
- *    rsa
- *    openpgp-dsa
- *    openpgp-rsa
- *    openpgp-elg
- *    openpgp-elg-sig
- *    ecdsa
- *    ecdh
- * Provide a SE with the first element be either "private-key" or
- * or "public-key". It is followed by a list with its first element
- * be one of the above algorithm identifiers and the remaning
- * elements are pairs with parameter-id and value.
- * NOTE: we look through the list to find a list beginning with
- * "private-key" or "public-key" - the first one found is used.
- *
- * If OVERRIDE_ELEMS is not NULL those elems override the parameter
- * specification taken from the module.  This ise used by
- * gcry_pk_get_curve.
- *
- * Returns: A pointer to an allocated array of MPIs if the return value is
- *         zero; the caller has to release this array.
- *
- * Example of a DSA public key:
- *  (private-key
- *    (dsa
- *     (p <mpi>)
- *     (g <mpi>)
- *     (y <mpi>)
- *     (x <mpi>)
- *    )
- *  )
- * The <mpi> are expected to be in GCRYMPI_FMT_USG
- */
-static gcry_err_code_t
-sexp_to_key (gcry_sexp_t sexp, int want_private, const char *override_elems,
-             gcry_mpi_t **retarray, gcry_module_t *retalgo)
-{
-  gcry_err_code_t err = 0;
-  gcry_sexp_t list, l2;
-  char *name;
-  const char *elems;
-  gcry_mpi_t *array;
-  gcry_module_t module;
-  gcry_pk_spec_t *pubkey;
-  pk_extra_spec_t *extraspec;
-  int is_ecc;
-
-  /* Check that the first element is valid.  */
-  list = gcry_sexp_find_token (sexp,
-                               want_private? "private-key":"public-key", 0);
-  if (!list)
-    return GPG_ERR_INV_OBJ; /* Does not contain a key object.  */
-
-  l2 = gcry_sexp_cadr( list );
-  gcry_sexp_release ( list );
+  l2 = sexp_cadr (list);
+  sexp_release (list);
   list = l2;
-  name = _gcry_sexp_nth_string (list, 0);
-  if (!name)
-    {
-      gcry_sexp_release ( list );
-      return GPG_ERR_INV_OBJ;      /* Invalid structure of object. */
-    }
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = gcry_pk_lookup_name (name);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  /* Fixme: We should make sure that an ECC key is always named "ecc"
-     and not "ecdsa".  "ecdsa" should be used for the signature
-     itself.  We need a function to test whether an algorithm given
-     with a key is compatible with an application of the key (signing,
-     encryption).  For RSA this is easy, but ECC is the first
-     algorithm which has many flavours.  */
-  is_ecc = ( !strcmp (name, "ecdsa")
-             || !strcmp (name, "ecdh")
-             || !strcmp (name, "ecc") );
-  gcry_free (name);
-
-  if (!module)
-    {
-      gcry_sexp_release (list);
-      return GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm. */
-    }
-  else
-    {
-      pubkey = (gcry_pk_spec_t *) module->spec;
-      extraspec = module->extraspec;
-    }
-
-  if (override_elems)
-    elems = override_elems;
-  else if (want_private)
-    elems = pubkey->elements_skey;
-  else
-    elems = pubkey->elements_pkey;
-  array = gcry_calloc (strlen (elems) + 1, sizeof (*array));
-  if (!array)
-    err = gpg_err_code_from_syserror ();
-  if (!err)
-    {
-      if (is_ecc)
-        err = sexp_elements_extract_ecc (list, elems, array, extraspec);
-      else
-        err = sexp_elements_extract (list, elems, array, pubkey->name);
-    }
-
-  gcry_sexp_release (list);
-
-  if (err)
-    {
-      gcry_free (array);
-
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-    }
-  else
-    {
-      *retarray = array;
-      *retalgo = module;
-    }
-
-  return err;
-}
-
-
-static gcry_err_code_t
-sexp_to_sig (gcry_sexp_t sexp, gcry_mpi_t **retarray,
-            gcry_module_t *retalgo)
-{
-  gcry_err_code_t err = 0;
-  gcry_sexp_t list, l2;
-  char *name;
-  const char *elems;
-  gcry_mpi_t *array;
-  gcry_module_t module;
-  gcry_pk_spec_t *pubkey;
-
-  /* Check that the first element is valid.  */
-  list = gcry_sexp_find_token( sexp, "sig-val" , 0 );
-  if (!list)
-    return GPG_ERR_INV_OBJ; /* Does not contain a signature value object.  */
-
-  l2 = gcry_sexp_nth (list, 1);
-  if (!l2)
-    {
-      gcry_sexp_release (list);
-      return GPG_ERR_NO_OBJ;   /* No cadr for the sig object.  */
-    }
-  name = _gcry_sexp_nth_string (l2, 0);
-  if (!name)
-    {
-      gcry_sexp_release (list);
-      gcry_sexp_release (l2);
-      return GPG_ERR_INV_OBJ;  /* Invalid structure of object.  */
-    }
-  else if (!strcmp (name, "flags"))
-    {
-      /* Skip flags, since they are not used but here just for the
-        sake of consistent S-expressions.  */
-      gcry_free (name);
-      gcry_sexp_release (l2);
-      l2 = gcry_sexp_nth (list, 2);
-      if (!l2)
-       {
-         gcry_sexp_release (list);
-         return GPG_ERR_INV_OBJ;
-       }
-      name = _gcry_sexp_nth_string (l2, 0);
-    }
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = gcry_pk_lookup_name (name);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-  gcry_free (name);
-  name = NULL;
-
-  if (!module)
-    {
-      gcry_sexp_release (l2);
-      gcry_sexp_release (list);
-      return GPG_ERR_PUBKEY_ALGO;  /* Unknown algorithm. */
-    }
-  else
-    pubkey = (gcry_pk_spec_t *) module->spec;
-
-  elems = pubkey->elements_sig;
-  array = gcry_calloc (strlen (elems) + 1 , sizeof *array );
-  if (!array)
-    err = gpg_err_code_from_syserror ();
-
-  if (!err)
-    err = sexp_elements_extract (list, elems, array, NULL);
-
-  gcry_sexp_release (l2);
-  gcry_sexp_release (list);
-
-  if (err)
-    {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-
-      gcry_free (array);
-    }
-  else
-    {
-      *retarray = array;
-      *retalgo = module;
-    }
-
-  return err;
-}
-
-static inline int
-get_hash_algo (const char *s, size_t n)
-{
-  static const struct { const char *name; int algo; } hashnames[] = {
-    { "sha1",   GCRY_MD_SHA1 },
-    { "md5",    GCRY_MD_MD5 },
-    { "sha256", GCRY_MD_SHA256 },
-    { "ripemd160", GCRY_MD_RMD160 },
-    { "rmd160", GCRY_MD_RMD160 },
-    { "sha384", GCRY_MD_SHA384 },
-    { "sha512", GCRY_MD_SHA512 },
-    { "sha224", GCRY_MD_SHA224 },
-    { "md2",    GCRY_MD_MD2 },
-    { "md4",    GCRY_MD_MD4 },
-    { "tiger",  GCRY_MD_TIGER },
-    { "haval",  GCRY_MD_HAVAL },
-    { NULL, 0 }
-  };
-  int algo;
-  int i;
-
-  for (i=0; hashnames[i].name; i++)
-    {
-      if ( strlen (hashnames[i].name) == n
-          && !memcmp (hashnames[i].name, s, n))
-       break;
-    }
-  if (hashnames[i].name)
-    algo = hashnames[i].algo;
-  else
-    {
-      /* In case of not listed or dynamically allocated hash
-        algorithm we fall back to this somewhat slower
-        method.  Further, it also allows to use OIDs as
-        algorithm names. */
-      char *tmpname;
-
-      tmpname = gcry_malloc (n+1);
-      if (!tmpname)
-       algo = 0;  /* Out of core - silently give up.  */
-      else
-       {
-         memcpy (tmpname, s, n);
-         tmpname[n] = 0;
-         algo = gcry_md_map_name (tmpname);
-         gcry_free (tmpname);
-       }
-    }
-  return algo;
-}
-
-
-/****************
- * Take sexp and return an array of MPI as used for our internal decrypt
- * function.
- * s_data = (enc-val
- *           [(flags [raw, pkcs1, oaep, no-blinding])]
- *           [(hash-algo <algo>)]
- *           [(label <label>)]
- *           (<algo>
- *             (<param_name1> <mpi>)
- *             ...
- *             (<param_namen> <mpi>)
- *           ))
- * HASH-ALGO and LABEL are specific to OAEP.
- * RET_MODERN is set to true when at least an empty flags list has been found.
- * CTX is used to return encoding information; it may be NULL in which
- * case raw encoding is used.
- */
-static gcry_err_code_t
-sexp_to_enc (gcry_sexp_t sexp, gcry_mpi_t **retarray, gcry_module_t *retalgo,
-             int *ret_modern, int *flags, struct pk_encoding_ctx *ctx)
-{
-  gcry_err_code_t err = 0;
-  gcry_sexp_t list = NULL, l2 = NULL;
-  gcry_pk_spec_t *pubkey = NULL;
-  gcry_module_t module = NULL;
-  char *name = NULL;
-  size_t n;
-  int parsed_flags = 0;
-  const char *elems;
-  gcry_mpi_t *array = NULL;
-
-  *ret_modern = 0;
-
-  /* Check that the first element is valid.  */
-  list = gcry_sexp_find_token (sexp, "enc-val" , 0);
-  if (!list)
-    {
-      err = GPG_ERR_INV_OBJ; /* Does not contain an encrypted value object.  */
-      goto leave;
-    }
-
-  l2 = gcry_sexp_nth (list, 1);
-  if (!l2)
-    {
-      err = GPG_ERR_NO_OBJ; /* No cdr for the data object.  */
-      goto leave;
-    }
-
-  /* Extract identifier of sublist.  */
-  name = _gcry_sexp_nth_string (l2, 0);
+  name = sexp_nth_string (list, 0);
   if (!name)
     {
-      err = GPG_ERR_INV_OBJ; /* Invalid structure of object.  */
-      goto leave;
+      sexp_release ( list );
+      return GPG_ERR_INV_OBJ;      /* Invalid structure of object. */
     }
-
-  if (!strcmp (name, "flags"))
+  spec = spec_from_name (name);
+  xfree (name);
+  if (!spec)
     {
-      /* There is a flags element - process it.  */
-      const char *s;
-      int i;
+      sexp_release (list);
+      return GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm. */
+    }
+  *r_spec = spec;
+  if (r_parms)
+    *r_parms = list;
+  else
+    sexp_release (list);
+  return 0;
+}
 
-      *ret_modern = 1;
-      for (i = gcry_sexp_length (l2) - 1; i > 0; i--)
-        {
-          s = gcry_sexp_nth_data (l2, i, &n);
-          if (! s)
-            ; /* Not a data element - ignore.  */
-          else if (n == 3 && !memcmp (s, "raw", 3)
-                   && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-            ctx->encoding = PUBKEY_ENC_RAW;
-          else if (n == 5 && !memcmp (s, "pkcs1", 5)
-                   && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-           ctx->encoding = PUBKEY_ENC_PKCS1;
-          else if (n == 4 && !memcmp (s, "oaep", 4)
-                   && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-           ctx->encoding = PUBKEY_ENC_OAEP;
-          else if (n == 3 && !memcmp (s, "pss", 3)
-                   && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-           {
-             err = GPG_ERR_CONFLICT;
-             goto leave;
-           }
-          else if (n == 11 && ! memcmp (s, "no-blinding", 11))
-            parsed_flags |= PUBKEY_FLAG_NO_BLINDING;
-          else
-            {
-              err = GPG_ERR_INV_FLAG;
-              goto leave;
-            }
-        }
-      gcry_sexp_release (l2);
 
-      /* Get the OAEP parameters HASH-ALGO and LABEL, if any. */
-      if (ctx->encoding == PUBKEY_ENC_OAEP)
-       {
-         /* Get HASH-ALGO. */
-         l2 = gcry_sexp_find_token (list, "hash-algo", 0);
-         if (l2)
-           {
-             s = gcry_sexp_nth_data (l2, 1, &n);
-             if (!s)
-               err = GPG_ERR_NO_OBJ;
-             else
-               {
-                 ctx->hash_algo = get_hash_algo (s, n);
-                 if (!ctx->hash_algo)
-                   err = GPG_ERR_DIGEST_ALGO;
-               }
-             gcry_sexp_release (l2);
-             if (err)
-               goto leave;
-           }
-
-         /* Get LABEL. */
-         l2 = gcry_sexp_find_token (list, "label", 0);
-         if (l2)
-           {
-             s = gcry_sexp_nth_data (l2, 1, &n);
-             if (!s)
-               err = GPG_ERR_NO_OBJ;
-             else if (n > 0)
-               {
-                 ctx->label = gcry_malloc (n);
-                 if (!ctx->label)
-                   err = gpg_err_code_from_syserror ();
-                 else
-                   {
-                     memcpy (ctx->label, s, n);
-                     ctx->labellen = n;
-                   }
-               }
-             gcry_sexp_release (l2);
-             if (err)
-               goto leave;
-           }
-       }
 
-      /* Get the next which has the actual data - skip HASH-ALGO and LABEL. */
-      for (i = 2; (l2 = gcry_sexp_nth (list, i)) != NULL; i++)
-       {
-         s = gcry_sexp_nth_data (l2, 0, &n);
-         if (!(n == 9 && !memcmp (s, "hash-algo", 9))
-             && !(n == 5 && !memcmp (s, "label", 5))
-             && !(n == 15 && !memcmp (s, "random-override", 15)))
-           break;
-         gcry_sexp_release (l2);
-       }
+/* Disable the use of the algorithm ALGO.  This is not thread safe and
+   should thus be called early.  */
+static void
+disable_pubkey_algo (int algo)
+{
+  gcry_pk_spec_t *spec = spec_from_algo (algo);
 
-      if (!l2)
-        {
-          err = GPG_ERR_NO_OBJ; /* No cdr for the data object. */
-          goto leave;
-        }
+  if (spec)
+    spec->flags.disabled = 1;
+}
 
-      /* Extract sublist identifier.  */
-      gcry_free (name);
-      name = _gcry_sexp_nth_string (l2, 0);
-      if (!name)
-        {
-          err = GPG_ERR_INV_OBJ; /* Invalid structure of object. */
-          goto leave;
-        }
 
-      gcry_sexp_release (list);
-      list = l2;
-      l2 = NULL;
-    }
+
+/*
+ * Map a string to the pubkey algo
+ */
+int
+_gcry_pk_map_name (const char *string)
+{
+  gcry_pk_spec_t *spec;
+
+  if (!string)
+    return 0;
+  spec = spec_from_name (string);
+  if (!spec)
+    return 0;
+  if (spec->flags.disabled)
+    return 0;
+  if (!spec->flags.fips && fips_mode ())
+    return 0;
+  return spec->algo;
+}
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = gcry_pk_lookup_name (name);
-  ath_mutex_unlock (&pubkeys_registered_lock);
 
-  if (!module)
-    {
-      err = GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm.  */
-      goto leave;
-    }
-  pubkey = (gcry_pk_spec_t *) module->spec;
+/* Map the public key algorithm whose ID is contained in ALGORITHM to
+   a string representation of the algorithm name.  For unknown
+   algorithm IDs this functions returns "?". */
+const char *
+_gcry_pk_algo_name (int algo)
+{
+  gcry_pk_spec_t *spec;
 
-  elems = pubkey->elements_enc;
-  array = gcry_calloc (strlen (elems) + 1, sizeof (*array));
-  if (!array)
-    {
-      err = gpg_err_code_from_syserror ();
-      goto leave;
-    }
+  spec = spec_from_algo (algo);
+  if (spec)
+    return spec->name;
+  return "?";
+}
 
-  err = sexp_elements_extract (list, elems, array, NULL);
 
- leave:
-  gcry_sexp_release (list);
-  gcry_sexp_release (l2);
-  gcry_free (name);
+/****************
+ * A USE of 0 means: don't care.
+ */
+static gcry_err_code_t
+check_pubkey_algo (int algo, unsigned use)
+{
+  gcry_err_code_t err = 0;
+  gcry_pk_spec_t *spec;
 
-  if (err)
+  spec = spec_from_algo (algo);
+  if (spec && !spec->flags.disabled && (spec->flags.fips || !fips_mode ()))
     {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-      gcry_free (array);
-      gcry_free (ctx->label);
-      ctx->label = NULL;
+      if (((use & GCRY_PK_USAGE_SIGN)
+          && (! (spec->use & GCRY_PK_USAGE_SIGN)))
+         || ((use & GCRY_PK_USAGE_ENCR)
+             && (! (spec->use & GCRY_PK_USAGE_ENCR))))
+       err = GPG_ERR_WRONG_PUBKEY_ALGO;
     }
   else
-    {
-      *retarray = array;
-      *retalgo = module;
-      *flags = parsed_flags;
-    }
+    err = GPG_ERR_PUBKEY_ALGO;
 
   return err;
 }
 
-/* Take the hash value and convert into an MPI, suitable for
-   passing to the low level functions.  We currently support the
-   old style way of passing just a MPI and the modern interface which
-   allows to pass flags so that we can choose between raw and pkcs1
-   padding - may be more padding options later.
-
-   (<mpi>)
-   or
-   (data
-    [(flags [raw, pkcs1, oaep, pss, no-blinding])]
-    [(hash <algo> <value>)]
-    [(value <text>)]
-    [(hash-algo <algo>)]
-    [(label <label>)]
-    [(salt-length <length>)]
-    [(random-override <data>)]
-   )
-
-   Either the VALUE or the HASH element must be present for use
-   with signatures.  VALUE is used for encryption.
-
-   HASH-ALGO and LABEL are specific to OAEP.
 
-   SALT-LENGTH is for PSS.
-
-   RANDOM-OVERRIDE is used to replace random nonces for regression
-   testing.  */
-static gcry_err_code_t
-sexp_data_to_mpi (gcry_sexp_t input, gcry_mpi_t *ret_mpi,
-                 struct pk_encoding_ctx *ctx)
+/****************
+ * Return the number of public key material numbers
+ */
+static int
+pubkey_get_npkey (int algo)
 {
-  gcry_err_code_t rc = 0;
-  gcry_sexp_t ldata, lhash, lvalue;
-  int i;
-  size_t n;
-  const char *s;
-  int unknown_flag=0;
-  int parsed_flags = 0;
-
-  *ret_mpi = NULL;
-  ldata = gcry_sexp_find_token (input, "data", 0);
-  if (!ldata)
-    { /* assume old style */
-      *ret_mpi = gcry_sexp_nth_mpi (input, 0, 0);
-      return *ret_mpi ? GPG_ERR_NO_ERROR : GPG_ERR_INV_OBJ;
-    }
-
-  /* see whether there is a flags object */
-  {
-    gcry_sexp_t lflags = gcry_sexp_find_token (ldata, "flags", 0);
-    if (lflags)
-      { /* parse the flags list. */
-        for (i=gcry_sexp_length (lflags)-1; i > 0; i--)
-          {
-            s = gcry_sexp_nth_data (lflags, i, &n);
-            if (!s)
-              ; /* not a data element*/
-            else if ( n == 3 && !memcmp (s, "raw", 3)
-                      && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-              ctx->encoding = PUBKEY_ENC_RAW;
-            else if ( n == 5 && !memcmp (s, "pkcs1", 5)
-                      && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-              ctx->encoding = PUBKEY_ENC_PKCS1;
-            else if ( n == 4 && !memcmp (s, "oaep", 4)
-                      && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-              ctx->encoding = PUBKEY_ENC_OAEP;
-            else if ( n == 3 && !memcmp (s, "pss", 3)
-                      && ctx->encoding == PUBKEY_ENC_UNKNOWN)
-              ctx->encoding = PUBKEY_ENC_PSS;
-           else if (n == 11 && ! memcmp (s, "no-blinding", 11))
-             parsed_flags |= PUBKEY_FLAG_NO_BLINDING;
-            else
-              unknown_flag = 1;
-          }
-        gcry_sexp_release (lflags);
-      }
-  }
+  gcry_pk_spec_t *spec = spec_from_algo (algo);
 
-  if (ctx->encoding == PUBKEY_ENC_UNKNOWN)
-    ctx->encoding = PUBKEY_ENC_RAW; /* default to raw */
+  return spec? strlen (spec->elements_pkey) : 0;
+}
 
-  /* Get HASH or MPI */
-  lhash = gcry_sexp_find_token (ldata, "hash", 0);
-  lvalue = lhash? NULL : gcry_sexp_find_token (ldata, "value", 0);
 
-  if (!(!lhash ^ !lvalue))
-    rc = GPG_ERR_INV_OBJ; /* none or both given */
-  else if (unknown_flag)
-    rc = GPG_ERR_INV_FLAG;
-  else if (ctx->encoding == PUBKEY_ENC_RAW && lvalue)
-    {
-      *ret_mpi = gcry_sexp_nth_mpi (lvalue, 1, GCRYMPI_FMT_USG);
-      if (!*ret_mpi)
-        rc = GPG_ERR_INV_OBJ;
-    }
-  else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lvalue
-          && ctx->op == PUBKEY_OP_ENCRYPT)
-    {
-      const void * value;
-      size_t valuelen;
-      gcry_sexp_t list;
-      void *random_override = NULL;
-      size_t random_override_len = 0;
-
-      if ( !(value=gcry_sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen )
-        rc = GPG_ERR_INV_OBJ;
-      else
-        {
-          /* Get optional RANDOM-OVERRIDE.  */
-          list = gcry_sexp_find_token (ldata, "random-override", 0);
-          if (list)
-            {
-              s = gcry_sexp_nth_data (list, 1, &n);
-              if (!s)
-                rc = GPG_ERR_NO_OBJ;
-              else if (n > 0)
-                {
-                  random_override = gcry_malloc (n);
-                  if (!random_override)
-                    rc = gpg_err_code_from_syserror ();
-                  else
-                    {
-                      memcpy (random_override, s, n);
-                      random_override_len = n;
-                    }
-                }
-              gcry_sexp_release (list);
-              if (rc)
-                goto leave;
-            }
-
-          rc = pkcs1_encode_for_encryption (ret_mpi, ctx->nbits,
-                                            value, valuelen,
-                                            random_override,
-                                            random_override_len);
-          gcry_free (random_override);
-        }
-    }
-  else if (ctx->encoding == PUBKEY_ENC_PKCS1 && lhash
-          && (ctx->op == PUBKEY_OP_SIGN || ctx->op == PUBKEY_OP_VERIFY))
-    {
-      if (gcry_sexp_length (lhash) != 3)
-        rc = GPG_ERR_INV_OBJ;
-      else if ( !(s=gcry_sexp_nth_data (lhash, 1, &n)) || !n )
-        rc = GPG_ERR_INV_OBJ;
-      else
-        {
-          const void * value;
-          size_t valuelen;
-
-         ctx->hash_algo = get_hash_algo (s, n);
-
-          if (!ctx->hash_algo)
-            rc = GPG_ERR_DIGEST_ALGO;
-          else if ( !(value=gcry_sexp_nth_data (lhash, 2, &valuelen))
-                    || !valuelen )
-            rc = GPG_ERR_INV_OBJ;
-          else
-           rc = pkcs1_encode_for_signature (ret_mpi, ctx->nbits,
-                                            value, valuelen,
-                                            ctx->hash_algo);
-        }
-    }
-  else if (ctx->encoding == PUBKEY_ENC_OAEP && lvalue
-          && ctx->op == PUBKEY_OP_ENCRYPT)
-    {
-      const void * value;
-      size_t valuelen;
+/****************
+ * Return the number of secret key material numbers
+ */
+static int
+pubkey_get_nskey (int algo)
+{
+  gcry_pk_spec_t *spec = spec_from_algo (algo);
 
-      if ( !(value=gcry_sexp_nth_data (lvalue, 1, &valuelen)) || !valuelen )
-       rc = GPG_ERR_INV_OBJ;
-      else
-       {
-         gcry_sexp_t list;
-          void *random_override = NULL;
-          size_t random_override_len = 0;
-
-         /* Get HASH-ALGO. */
-         list = gcry_sexp_find_token (ldata, "hash-algo", 0);
-         if (list)
-           {
-             s = gcry_sexp_nth_data (list, 1, &n);
-             if (!s)
-               rc = GPG_ERR_NO_OBJ;
-             else
-               {
-                 ctx->hash_algo = get_hash_algo (s, n);
-                 if (!ctx->hash_algo)
-                   rc = GPG_ERR_DIGEST_ALGO;
-               }
-             gcry_sexp_release (list);
-             if (rc)
-               goto leave;
-           }
-
-         /* Get LABEL. */
-         list = gcry_sexp_find_token (ldata, "label", 0);
-         if (list)
-           {
-             s = gcry_sexp_nth_data (list, 1, &n);
-             if (!s)
-               rc = GPG_ERR_NO_OBJ;
-             else if (n > 0)
-               {
-                 ctx->label = gcry_malloc (n);
-                 if (!ctx->label)
-                   rc = gpg_err_code_from_syserror ();
-                 else
-                   {
-                     memcpy (ctx->label, s, n);
-                     ctx->labellen = n;
-                   }
-               }
-             gcry_sexp_release (list);
-             if (rc)
-               goto leave;
-           }
-          /* Get optional RANDOM-OVERRIDE.  */
-          list = gcry_sexp_find_token (ldata, "random-override", 0);
-          if (list)
-            {
-              s = gcry_sexp_nth_data (list, 1, &n);
-              if (!s)
-                rc = GPG_ERR_NO_OBJ;
-              else if (n > 0)
-                {
-                  random_override = gcry_malloc (n);
-                  if (!random_override)
-                    rc = gpg_err_code_from_syserror ();
-                  else
-                    {
-                      memcpy (random_override, s, n);
-                      random_override_len = n;
-                    }
-                }
-              gcry_sexp_release (list);
-              if (rc)
-                goto leave;
-            }
-
-         rc = oaep_encode (ret_mpi, ctx->nbits, ctx->hash_algo,
-                           value, valuelen,
-                           ctx->label, ctx->labellen,
-                            random_override, random_override_len);
-
-          gcry_free (random_override);
-       }
-    }
-  else if (ctx->encoding == PUBKEY_ENC_PSS && lhash
-          && ctx->op == PUBKEY_OP_SIGN)
-    {
-      if (gcry_sexp_length (lhash) != 3)
-        rc = GPG_ERR_INV_OBJ;
-      else if ( !(s=gcry_sexp_nth_data (lhash, 1, &n)) || !n )
-        rc = GPG_ERR_INV_OBJ;
-      else
-        {
-          const void * value;
-          size_t valuelen;
-          void *random_override = NULL;
-          size_t random_override_len = 0;
-
-         ctx->hash_algo = get_hash_algo (s, n);
-
-          if (!ctx->hash_algo)
-            rc = GPG_ERR_DIGEST_ALGO;
-          else if ( !(value=gcry_sexp_nth_data (lhash, 2, &valuelen))
-                    || !valuelen )
-            rc = GPG_ERR_INV_OBJ;
-          else
-           {
-             gcry_sexp_t list;
-
-             /* Get SALT-LENGTH. */
-             list = gcry_sexp_find_token (ldata, "salt-length", 0);
-             if (list)
-               {
-                 s = gcry_sexp_nth_data (list, 1, &n);
-                 if (!s)
-                   {
-                     rc = GPG_ERR_NO_OBJ;
-                     goto leave;
-                   }
-                 ctx->saltlen = (unsigned int)strtoul (s, NULL, 10);
-                 gcry_sexp_release (list);
-               }
-
-              /* Get optional RANDOM-OVERRIDE.  */
-              list = gcry_sexp_find_token (ldata, "random-override", 0);
-              if (list)
-                {
-                  s = gcry_sexp_nth_data (list, 1, &n);
-                  if (!s)
-                    rc = GPG_ERR_NO_OBJ;
-                  else if (n > 0)
-                    {
-                      random_override = gcry_malloc (n);
-                      if (!random_override)
-                        rc = gpg_err_code_from_syserror ();
-                      else
-                        {
-                          memcpy (random_override, s, n);
-                          random_override_len = n;
-                        }
-                    }
-                  gcry_sexp_release (list);
-                  if (rc)
-                    goto leave;
-                }
-
-              /* Encode the data.  (NBITS-1 is due to 8.1.1, step 1.) */
-             rc = pss_encode (ret_mpi, ctx->nbits - 1, ctx->hash_algo,
-                              value, valuelen, ctx->saltlen,
-                               random_override, random_override_len);
-
-              gcry_free (random_override);
-           }
-        }
-    }
-  else if (ctx->encoding == PUBKEY_ENC_PSS && lhash
-          && ctx->op == PUBKEY_OP_VERIFY)
-    {
-      if (gcry_sexp_length (lhash) != 3)
-        rc = GPG_ERR_INV_OBJ;
-      else if ( !(s=gcry_sexp_nth_data (lhash, 1, &n)) || !n )
-        rc = GPG_ERR_INV_OBJ;
-      else
-        {
-         ctx->hash_algo = get_hash_algo (s, n);
-
-          if (!ctx->hash_algo)
-            rc = GPG_ERR_DIGEST_ALGO;
-         else
-           {
-             *ret_mpi = gcry_sexp_nth_mpi (lhash, 2, GCRYMPI_FMT_USG);
-             if (!*ret_mpi)
-               rc = GPG_ERR_INV_OBJ;
-             ctx->verify_cmp = pss_verify_cmp;
-             ctx->verify_arg = *ret_mpi;
-           }
-       }
-    }
-  else
-    rc = GPG_ERR_CONFLICT;
+  return spec? strlen (spec->elements_skey) : 0;
+}
 
- leave:
-  gcry_sexp_release (ldata);
-  gcry_sexp_release (lhash);
-  gcry_sexp_release (lvalue);
 
-  if (!rc)
-    ctx->flags = parsed_flags;
-  else
-    {
-      gcry_free (ctx->label);
-      ctx->label = NULL;
-    }
+/****************
+ * Return the number of signature material numbers
+ */
+static int
+pubkey_get_nsig (int algo)
+{
+  gcry_pk_spec_t *spec = spec_from_algo (algo);
 
-  return rc;
+  return spec? strlen (spec->elements_sig) : 0;
 }
 
-static void
-init_encoding_ctx (struct pk_encoding_ctx *ctx, enum pk_operation op,
-                  unsigned int nbits)
+/****************
+ * Return the number of encryption material numbers
+ */
+static int
+pubkey_get_nenc (int algo)
 {
-  ctx->op = op;
-  ctx->nbits = nbits;
-  ctx->encoding = PUBKEY_ENC_UNKNOWN;
-  ctx->flags = 0;
-  ctx->hash_algo = GCRY_MD_SHA1;
-  ctx->label = NULL;
-  ctx->labellen = 0;
-  ctx->saltlen = 20;
-  ctx->verify_cmp = NULL;
-  ctx->verify_arg = NULL;
+  gcry_pk_spec_t *spec = spec_from_algo (algo);
+
+  return spec? strlen (spec->elements_enc) : 0;
 }
 
 
+
 /*
    Do a PK encrypt operation
 
@@ -2829,7 +301,7 @@ init_encoding_ctx (struct pk_encoding_ctx *ctx, enum 
pk_operation op,
 
    Returns: 0 or an errorcode.
 
-   s_data = See comment for sexp_data_to_mpi
+   s_data = See comment for _gcry_pk_util_data_to_mpi
    s_pkey = <key-as-defined-in-sexp_to_key>
    r_ciph = (enc-val
                (<algo>
@@ -2839,150 +311,34 @@ init_encoding_ctx (struct pk_encoding_ctx *ctx, enum 
pk_operation op,
                ))
 
 */
-gcry_error_t
-gcry_pk_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t s_pkey)
+gcry_err_code_t
+_gcry_pk_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t s_pkey)
 {
-  gcry_mpi_t *pkey = NULL, data = NULL, *ciph = NULL;
-  const char *algo_name, *algo_elems;
-  struct pk_encoding_ctx ctx;
   gcry_err_code_t rc;
-  gcry_pk_spec_t *pubkey = NULL;
-  gcry_module_t module = NULL;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms;
 
   *r_ciph = NULL;
 
-  REGISTER_DEFAULT_PUBKEYS;
-
-  /* Get the key. */
-  rc = sexp_to_key (s_pkey, 0, NULL, &pkey, &module);
-  if (rc)
-    goto leave;
-
-  gcry_assert (module);
-  pubkey = (gcry_pk_spec_t *) module->spec;
-
-  /* If aliases for the algorithm name exists, take the first one
-     instead of the regular name to adhere to SPKI conventions.  We
-     assume that the first alias name is the lowercase version of the
-     regular one.  This change is required for compatibility with
-     1.1.12 generated S-expressions. */
-  algo_name = pubkey->aliases? *pubkey->aliases : NULL;
-  if (!algo_name || !*algo_name)
-    algo_name = pubkey->name;
-
-  algo_elems = pubkey->elements_enc;
-
-  /* Get the stuff we want to encrypt. */
-  init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, gcry_pk_get_nbits (s_pkey));
-  rc = sexp_data_to_mpi (s_data, &data, &ctx);
-  if (rc)
-    goto leave;
-
-  /* Now we can encrypt DATA to CIPH. */
-  ciph = gcry_calloc (strlen (algo_elems) + 1, sizeof (*ciph));
-  if (!ciph)
-    {
-      rc = gpg_err_code_from_syserror ();
-      goto leave;
-    }
-  rc = pubkey_encrypt (module->mod_id, ciph, data, pkey, ctx.flags);
-  mpi_free (data);
-  data = NULL;
+  rc = spec_from_sexp (s_pkey, 0, &spec, &keyparms);
   if (rc)
     goto leave;
 
-  /* We did it.  Now build the return list */
-  if (ctx.encoding == PUBKEY_ENC_OAEP
-      || ctx.encoding == PUBKEY_ENC_PKCS1)
-    {
-      /* We need to make sure to return the correct length to avoid
-         problems with missing leading zeroes.  We know that this
-         encoding does only make sense with RSA thus we don't need to
-         build the S-expression on the fly.  */
-      unsigned char *em;
-      size_t emlen = (ctx.nbits+7)/8;
-
-      rc = octet_string_from_mpi (&em, NULL, ciph[0], emlen);
-      if (rc)
-        goto leave;
-      rc = gcry_err_code (gcry_sexp_build (r_ciph, NULL,
-                                           "(enc-val(%s(a%b)))",
-                                           algo_name, (int)emlen, em));
-      gcry_free (em);
-      if (rc)
-        goto leave;
-    }
+  if (spec->flags.disabled)
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (spec->encrypt)
+    rc = spec->encrypt (r_ciph, s_data, keyparms);
   else
-    {
-      char *string, *p;
-      int i;
-      size_t nelem = strlen (algo_elems);
-      size_t needed = 19 + strlen (algo_name) + (nelem * 5);
-      void **arg_list;
-
-      /* Build the string.  */
-      string = p = gcry_malloc (needed);
-      if (!string)
-        {
-          rc = gpg_err_code_from_syserror ();
-          goto leave;
-        }
-      p = stpcpy ( p, "(enc-val(" );
-      p = stpcpy ( p, algo_name );
-      for (i=0; algo_elems[i]; i++ )
-        {
-          *p++ = '(';
-          *p++ = algo_elems[i];
-          p = stpcpy ( p, "%m)" );
-        }
-      strcpy ( p, "))" );
-
-      /* And now the ugly part: We don't have a function to pass an
-       * array to a format string, so we have to do it this way :-(.  */
-      /* FIXME: There is now such a format specifier, so we can
-         change the code to be more clear. */
-      arg_list = calloc (nelem, sizeof *arg_list);
-      if (!arg_list)
-        {
-          rc = gpg_err_code_from_syserror ();
-          goto leave;
-        }
-
-      for (i = 0; i < nelem; i++)
-        arg_list[i] = ciph + i;
-
-      rc = gcry_sexp_build_array (r_ciph, NULL, string, arg_list);
-      free (arg_list);
-      if (rc)
-        BUG ();
-      gcry_free (string);
-    }
+    rc = GPG_ERR_NOT_IMPLEMENTED;
 
  leave:
-  if (pkey)
-    {
-      release_mpi_array (pkey);
-      gcry_free (pkey);
-    }
-
-  if (ciph)
-    {
-      release_mpi_array (ciph);
-      gcry_free (ciph);
-    }
-
-  if (module)
-    {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-    }
-
-  gcry_free (ctx.label);
-
-  return gcry_error (rc);
+  sexp_release (keyparms);
+  return rc;
 }
 
+
 /*
    Do a PK decrypt operation
 
@@ -3011,103 +367,31 @@ gcry_pk_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t 
s_data, gcry_sexp_t s_pkey)
             With pkcs1 or oaep decoding enabled the returned value is a
             verbatim octet string.
  */
-gcry_error_t
-gcry_pk_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t s_skey)
+gcry_err_code_t
+_gcry_pk_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t s_skey)
 {
-  gcry_mpi_t *skey = NULL, *data = NULL, plain = NULL;
-  unsigned char *unpad = NULL;
-  size_t unpadlen = 0;
-  int modern, flags;
-  struct pk_encoding_ctx ctx;
   gcry_err_code_t rc;
-  gcry_module_t module_enc = NULL, module_key = NULL;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms;
 
   *r_plain = NULL;
-  ctx.label = NULL;
 
-  REGISTER_DEFAULT_PUBKEYS;
-
-  rc = sexp_to_key (s_skey, 1, NULL, &skey, &module_key);
-  if (rc)
-    goto leave;
-
-  init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, gcry_pk_get_nbits (s_skey));
-  rc = sexp_to_enc (s_data, &data, &module_enc, &modern, &flags, &ctx);
-  if (rc)
-    goto leave;
-
-  if (module_key->mod_id != module_enc->mod_id)
-    {
-      rc = GPG_ERR_CONFLICT; /* Key algo does not match data algo. */
-      goto leave;
-    }
-
-  rc = pubkey_decrypt (module_key->mod_id, &plain, data, skey, flags);
+  rc = spec_from_sexp (s_skey, 1, &spec, &keyparms);
   if (rc)
     goto leave;
 
-  /* Do un-padding if necessary. */
-  switch (ctx.encoding)
-    {
-    case PUBKEY_ENC_PKCS1:
-      rc = pkcs1_decode_for_encryption (&unpad, &unpadlen,
-                                        gcry_pk_get_nbits (s_skey), plain);
-      mpi_free (plain);
-      plain = NULL;
-      if (!rc)
-        rc = gcry_err_code (gcry_sexp_build (r_plain, NULL, "(value %b)",
-                                             (int)unpadlen, unpad));
-      break;
-
-    case PUBKEY_ENC_OAEP:
-      rc = oaep_decode (&unpad, &unpadlen,
-                        gcry_pk_get_nbits (s_skey), ctx.hash_algo,
-                       plain, ctx.label, ctx.labellen);
-      mpi_free (plain);
-      plain = NULL;
-      if (!rc)
-        rc = gcry_err_code (gcry_sexp_build (r_plain, NULL, "(value %b)",
-                                             (int)unpadlen, unpad));
-      break;
-
-    default:
-      /* Raw format.  For backward compatibility we need to assume a
-         signed mpi by using the sexp format string "%m".  */
-      rc = gcry_err_code (gcry_sexp_build
-                          (r_plain, NULL, modern? "(value %m)" : "%m", plain));
-      break;
-    }
+  if (spec->flags.disabled)
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (spec->decrypt)
+    rc = spec->decrypt (r_plain, s_data, keyparms);
+  else
+    rc = GPG_ERR_NOT_IMPLEMENTED;
 
  leave:
-  gcry_free (unpad);
-
-  if (skey)
-    {
-      release_mpi_array (skey);
-      gcry_free (skey);
-    }
-
-  mpi_free (plain);
-
-  if (data)
-    {
-      release_mpi_array (data);
-      gcry_free (data);
-    }
-
-  if (module_key || module_enc)
-    {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      if (module_key)
-       _gcry_module_release (module_key);
-      if (module_enc)
-       _gcry_module_release (module_enc);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-    }
-
-  gcry_free (ctx.label);
-
-  return gcry_error (rc);
+  sexp_release (keyparms);
+  return rc;
 }
 
 
@@ -3128,7 +412,7 @@ gcry_pk_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, 
gcry_sexp_t s_skey)
             other arguments but is always suitable to be passed to
             gcry_pk_verify
 
-   s_hash = See comment for sexp_data_to_mpi
+   s_hash = See comment for _gcry-pk_util_data_to_mpi
 
    s_skey = <key-as-defined-in-sexp_to_key>
    r_sig  = (sig-val
@@ -3140,133 +424,172 @@ gcry_pk_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t 
s_data, gcry_sexp_t s_skey)
 
   Note that (hash algo) in R_SIG is not used.
 */
-gcry_error_t
-gcry_pk_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_hash, gcry_sexp_t s_skey)
+gcry_err_code_t
+_gcry_pk_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_hash, gcry_sexp_t s_skey)
 {
-  gcry_mpi_t *skey = NULL, hash = NULL, *result = NULL;
-  gcry_pk_spec_t *pubkey = NULL;
-  gcry_module_t module = NULL;
-  const char *algo_name, *algo_elems;
-  struct pk_encoding_ctx ctx;
-  int i;
   gcry_err_code_t rc;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms;
 
   *r_sig = NULL;
 
-  REGISTER_DEFAULT_PUBKEYS;
-
-  rc = sexp_to_key (s_skey, 1, NULL, &skey, &module);
+  rc = spec_from_sexp (s_skey, 1, &spec, &keyparms);
   if (rc)
     goto leave;
 
-  gcry_assert (module);
-  pubkey = (gcry_pk_spec_t *) module->spec;
-  algo_name = pubkey->aliases? *pubkey->aliases : NULL;
-  if (!algo_name || !*algo_name)
-    algo_name = pubkey->name;
+  if (spec->flags.disabled)
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (spec->sign)
+    rc = spec->sign (r_sig, s_hash, keyparms);
+  else
+    rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+  sexp_release (keyparms);
+  return rc;
+}
 
-  algo_elems = pubkey->elements_sig;
 
-  /* Get the stuff we want to sign.  Note that pk_get_nbits does also
-      work on a private key. */
-  init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, gcry_pk_get_nbits (s_skey));
-  rc = sexp_data_to_mpi (s_hash, &hash, &ctx);
-  if (rc)
-    goto leave;
+gcry_err_code_t
+_gcry_pk_sign_md (gcry_sexp_t *r_sig, const char *tmpl, gcry_md_hd_t hd_orig,
+                  gcry_sexp_t s_skey, gcry_ctx_t ctx)
+{
+  gcry_err_code_t rc;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms = NULL;
+  gcry_sexp_t s_hash = NULL;
+  int algo;
+  const unsigned char *digest;
+  int digest_size;
+  gcry_error_t err;
+  gcry_md_hd_t hd;
+  const char *s;
+  char *hash_name;
+
+  *r_sig = NULL;
+
+  /* Check if it has fixed hash name or %s */
+  s = strstr (tmpl, "(hash ");
+  if (s == NULL)
+    return GPG_ERR_DIGEST_ALGO;
 
-  result = gcry_calloc (strlen (algo_elems) + 1, sizeof (*result));
-  if (!result)
+  s += 6;
+  if (!strncmp (s, "%s", 2))
+    hash_name = NULL;
+  else
     {
-      rc = gpg_err_code_from_syserror ();
-      goto leave;
+      const char *p;
+
+      for (p = s; *p && *p != ' '; p++)
+       ;
+
+      hash_name = xtrymalloc (p - s + 1);
+      if (!hash_name)
+       return gpg_error_from_syserror ();
+      memcpy (hash_name, s, p - s);
+      hash_name[p - s] = 0;
+    }
+
+  err = _gcry_md_copy (&hd, hd_orig);
+  if (err)
+    {
+      xfree (hash_name);
+      return gpg_err_code (err);
+    }
+
+  if (hash_name)
+    {
+      algo = _gcry_md_map_name (hash_name);
+      digest_size = (int) _gcry_md_get_algo_dlen (algo);
+
+      if (algo == 0 || digest_size == 0
+          || (fips_mode () && algo == GCRY_MD_SHA1))
+       {
+         xfree (hash_name);
+         _gcry_md_close (hd);
+         return GPG_ERR_DIGEST_ALGO;
+       }
+
+      digest = _gcry_md_read (hd, algo);
+    }
+  else
+    {
+      algo = _gcry_md_get_algo (hd);
+      digest_size = (int) _gcry_md_get_algo_dlen (algo);
+
+      if (digest_size == 0 || (fips_mode () && algo == GCRY_MD_SHA1))
+        {
+          _gcry_md_close (hd);
+          return GPG_ERR_DIGEST_ALGO;
+        }
+
+      digest = _gcry_md_read (hd, 0);
+    }
+
+  if (!digest)
+    {
+      xfree (hash_name);
+      _gcry_md_close (hd);
+      return GPG_ERR_NOT_IMPLEMENTED;
     }
-  rc = pubkey_sign (module->mod_id, result, hash, skey);
-  if (rc)
-    goto leave;
 
-  if (ctx.encoding == PUBKEY_ENC_PSS
-      || ctx.encoding == PUBKEY_ENC_PKCS1)
+  if (!ctx)
     {
-      /* We need to make sure to return the correct length to avoid
-         problems with missing leading zeroes.  We know that this
-         encoding does only make sense with RSA thus we don't need to
-         build the S-expression on the fly.  */
-      unsigned char *em;
-      size_t emlen = (ctx.nbits+7)/8;
-
-      rc = octet_string_from_mpi (&em, NULL, result[0], emlen);
-      if (rc)
-        goto leave;
-      rc = gcry_err_code (gcry_sexp_build (r_sig, NULL,
-                                           "(sig-val(%s(s%b)))",
-                                           algo_name, (int)emlen, em));
-      gcry_free (em);
-      if (rc)
-        goto leave;
+      if (hash_name)
+       rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                              digest_size, digest);
+      else
+       rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                              _gcry_md_algo_name (algo),
+                              digest_size, digest);
     }
   else
     {
-      /* General purpose output encoding.  Do it on the fly.  */
-      char *string, *p;
-      size_t nelem, needed = strlen (algo_name) + 20;
-      void **arg_list;
-
-      nelem = strlen (algo_elems);
-
-      /* Count elements, so that we can allocate enough space. */
-      needed += 10 * nelem;
-
-      /* Build the string. */
-      string = p = gcry_malloc (needed);
-      if (!string)
-        {
-          rc = gpg_err_code_from_syserror ();
-          goto leave;
-        }
-      p = stpcpy (p, "(sig-val(");
-      p = stpcpy (p, algo_name);
-      for (i = 0; algo_elems[i]; i++)
-        {
-          *p++ = '(';
-          *p++ = algo_elems[i];
-          p = stpcpy (p, "%M)");
-        }
-      strcpy (p, "))");
+      const unsigned char *p;
+      size_t len;
 
-      arg_list = calloc (nelem, sizeof *arg_list);
-      if (!arg_list)
+      rc = _gcry_pk_get_random_override (ctx, &p, &len);
+      if (rc)
         {
-          rc = gpg_err_code_from_syserror ();
-          goto leave;
+          _gcry_md_close (hd);
+          return rc;
         }
 
-      for (i = 0; i < nelem; i++)
-        arg_list[i] = result + i;
-
-      rc = gcry_sexp_build_array (r_sig, NULL, string, arg_list);
-      free (arg_list);
-      if (rc)
-        BUG ();
-      gcry_free (string);
+      if (hash_name)
+       rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                              digest_size, digest,
+                              (int) len, p);
+      else
+       rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                              _gcry_md_algo_name (algo),
+                              digest_size, digest,
+                              (int) len, p);
     }
 
- leave:
-  if (skey)
-    {
-      release_mpi_array (skey);
-      gcry_free (skey);
-    }
+  xfree (hash_name);
+  _gcry_md_close (hd);
+  if (rc)
+    return rc;
 
-  if (hash)
-    mpi_free (hash);
+  rc = spec_from_sexp (s_skey, 1, &spec, &keyparms);
+  if (rc)
+    goto leave;
 
-  if (result)
-    {
-      release_mpi_array (result);
-      gcry_free (result);
-    }
+  if (spec->flags.disabled)
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (spec->sign)
+    rc = spec->sign (r_sig, s_hash, keyparms);
+  else
+    rc = GPG_ERR_NOT_IMPLEMENTED;
 
-  return gcry_error (rc);
+ leave:
+  sexp_release (s_hash);
+  sexp_release (keyparms);
+  return rc;
 }
 
 
@@ -3277,67 +600,168 @@ gcry_pk_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_hash, 
gcry_sexp_t s_skey)
    hashvalue data.  Public key has to be a standard public key given
    as an S-Exp, sig is a S-Exp as returned from gcry_pk_sign and data
    must be an S-Exp like the one in sign too.  */
-gcry_error_t
-gcry_pk_verify (gcry_sexp_t s_sig, gcry_sexp_t s_hash, gcry_sexp_t s_pkey)
+gcry_err_code_t
+_gcry_pk_verify (gcry_sexp_t s_sig, gcry_sexp_t s_hash, gcry_sexp_t s_pkey)
 {
-  gcry_module_t module_key = NULL, module_sig = NULL;
-  gcry_mpi_t *pkey = NULL, hash = NULL, *sig = NULL;
-  struct pk_encoding_ctx ctx;
   gcry_err_code_t rc;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms;
 
-  REGISTER_DEFAULT_PUBKEYS;
-
-  rc = sexp_to_key (s_pkey, 0, NULL, &pkey, &module_key);
+  rc = spec_from_sexp (s_pkey, 0, &spec, &keyparms);
   if (rc)
     goto leave;
 
-  rc = sexp_to_sig (s_sig, &sig, &module_sig);
-  if (rc)
-    goto leave;
+  if (spec->flags.disabled)
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (spec->verify)
+    rc = spec->verify (s_sig, s_hash, keyparms);
+  else
+    rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+  sexp_release (keyparms);
+  return rc;
+}
+
+
+gcry_err_code_t
+_gcry_pk_verify_md (gcry_sexp_t s_sig, const char *tmpl, gcry_md_hd_t hd_orig,
+                    gcry_sexp_t s_pkey, gcry_ctx_t ctx)
+{
+  gcry_err_code_t rc;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms = NULL;
+  gcry_sexp_t s_hash = NULL;
+  int algo;
+  const unsigned char *digest;
+  int digest_size;
+  gcry_error_t err;
+  gcry_md_hd_t hd;
+  const char *s;
+  char *hash_name;
 
-  /* Fixme: Check that the algorithm of S_SIG is compatible to the one
-     of S_PKEY.  */
+  /* Check if it has fixed hash name or %s */
+  s = strstr (tmpl, "(hash ");
+  if (s == NULL)
+    return GPG_ERR_DIGEST_ALGO;
 
-  if (module_key->mod_id != module_sig->mod_id)
+  s += 6;
+  if (!strncmp (s, "%s", 2))
+    hash_name = NULL;
+  else
     {
-      rc = GPG_ERR_CONFLICT;
-      goto leave;
+      const char *p;
+
+      for (p = s; *p && *p != ' '; p++)
+        ;
+
+      hash_name = xtrymalloc (p - s + 1);
+      if (!hash_name)
+        return gpg_error_from_syserror ();
+      memcpy (hash_name, s, p - s);
+      hash_name[p - s] = 0;
     }
 
-  /* Get the stuff we want to verify. */
-  init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, gcry_pk_get_nbits (s_pkey));
-  rc = sexp_data_to_mpi (s_hash, &hash, &ctx);
-  if (rc)
-    goto leave;
+  err = _gcry_md_copy (&hd, hd_orig);
+  if (err)
+    {
+      xfree (hash_name);
+      return gpg_err_code (err);
+    }
 
-  rc = pubkey_verify (module_key->mod_id, hash, sig, pkey,
-                     ctx.verify_cmp, &ctx);
+  if (hash_name)
+    {
+      algo = _gcry_md_map_name (hash_name);
+      digest_size = (int) _gcry_md_get_algo_dlen (algo);
 
- leave:
-  if (pkey)
+      if (algo == 0 || digest_size == 0
+          || (fips_mode () && algo == GCRY_MD_SHA1))
+        {
+          xfree (hash_name);
+          _gcry_md_close (hd);
+          return GPG_ERR_DIGEST_ALGO;
+        }
+
+      digest = _gcry_md_read (hd, algo);
+    }
+  else
     {
-      release_mpi_array (pkey);
-      gcry_free (pkey);
+      algo = _gcry_md_get_algo (hd);
+      digest_size = (int) _gcry_md_get_algo_dlen (algo);
+
+      if (digest_size == 0 || (fips_mode () && algo == GCRY_MD_SHA1))
+        {
+          _gcry_md_close (hd);
+          return GPG_ERR_DIGEST_ALGO;
+        }
+
+      digest = _gcry_md_read (hd, 0);
     }
-  if (sig)
+
+  if (!digest)
     {
-      release_mpi_array (sig);
-      gcry_free (sig);
+      xfree (hash_name);
+      _gcry_md_close (hd);
+      return GPG_ERR_DIGEST_ALGO;
     }
-  if (hash)
-    mpi_free (hash);
 
-  if (module_key || module_sig)
+  if (!ctx)
+    {
+      if (hash_name)
+        rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                               digest_size, digest);
+      else
+        rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                               _gcry_md_algo_name (algo),
+                               digest_size, digest);
+    }
+  else
     {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      if (module_key)
-       _gcry_module_release (module_key);
-      if (module_sig)
-       _gcry_module_release (module_sig);
-      ath_mutex_unlock (&pubkeys_registered_lock);
+      const unsigned char *p;
+      size_t len;
+
+      rc = _gcry_pk_get_random_override (ctx, &p, &len);
+      if (rc)
+        {
+          _gcry_md_close (hd);
+          return rc;
+        }
+
+      if (hash_name)
+        rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                               digest_size, digest,
+                               (int) len, p);
+      else
+        rc = _gcry_sexp_build (&s_hash, NULL, tmpl,
+                               _gcry_md_algo_name (algo),
+                               digest_size, digest,
+                               (int) len, p);
     }
 
-  return gcry_error (rc);
+  xfree (hash_name);
+  _gcry_md_close (hd);
+  if (rc)
+    return rc;
+
+  rc = spec_from_sexp (s_pkey, 0, &spec, &keyparms);
+  if (rc)
+    goto leave;
+
+  if (spec->flags.disabled)
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (spec->verify)
+    rc = spec->verify (s_sig, s_hash, keyparms);
+  else
+    rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+  sexp_release (s_hash);
+  sexp_release (keyparms);
+  return rc;
 }
 
 
@@ -3349,25 +773,30 @@ gcry_pk_verify (gcry_sexp_t s_sig, gcry_sexp_t s_hash, 
gcry_sexp_t s_pkey)
 
    Returns: 0 or an errorcode.
 
-   s_key = <key-as-defined-in-sexp_to_key> */
-gcry_error_t
-gcry_pk_testkey (gcry_sexp_t s_key)
+   NOTE: We currently support only secret key checking. */
+gcry_err_code_t
+_gcry_pk_testkey (gcry_sexp_t s_key)
 {
-  gcry_module_t module = NULL;
-  gcry_mpi_t *key = NULL;
   gcry_err_code_t rc;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms;
 
-  REGISTER_DEFAULT_PUBKEYS;
+  rc = spec_from_sexp (s_key, 1, &spec, &keyparms);
+  if (rc)
+    goto leave;
 
-  /* Note we currently support only secret key checking. */
-  rc = sexp_to_key (s_key, 1, NULL, &key, &module);
-  if (! rc)
-    {
-      rc = pubkey_check_secret_key (module->mod_id, key);
-      release_mpi_array (key);
-      gcry_free (key);
-    }
-  return gcry_error (rc);
+  if (spec->flags.disabled)
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (!spec->flags.fips && fips_mode ())
+    rc = GPG_ERR_PUBKEY_ALGO;
+  else if (spec->check_secret_key)
+    rc = spec->check_secret_key (keyparms);
+  else
+    rc = GPG_ERR_NOT_IMPLEMENTED;
+
+ leave:
+  sexp_release (keyparms);
+  return rc;
 }
 
 
@@ -3404,41 +833,26 @@ gcry_pk_testkey (gcry_sexp_t s_key)
       (pm1-factors n1 n2 ... nn)
    ))
  */
-gcry_error_t
-gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms)
+gcry_err_code_t
+_gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms)
 {
-  gcry_pk_spec_t *pubkey = NULL;
-  gcry_module_t module = NULL;
+  gcry_pk_spec_t *spec = NULL;
   gcry_sexp_t list = NULL;
   gcry_sexp_t l2 = NULL;
-  gcry_sexp_t l3 = NULL;
   char *name = NULL;
-  size_t n;
-  gcry_err_code_t rc = GPG_ERR_NO_ERROR;
-  int i, j;
-  const char *algo_name = NULL;
-  int algo;
-  const char *sec_elems = NULL, *pub_elems = NULL;
-  gcry_mpi_t skey[12];
-  gcry_mpi_t *factors = NULL;
-  gcry_sexp_t extrainfo = NULL;
-  unsigned int nbits = 0;
-  unsigned long use_e = 0;
-
-  skey[0] = NULL;
-  *r_key = NULL;
+  gcry_err_code_t rc;
 
-  REGISTER_DEFAULT_PUBKEYS;
+  *r_key = NULL;
 
-  list = gcry_sexp_find_token (s_parms, "genkey", 0);
+  list = sexp_find_token (s_parms, "genkey", 0);
   if (!list)
     {
       rc = GPG_ERR_INV_OBJ; /* Does not contain genkey data. */
       goto leave;
     }
 
-  l2 = gcry_sexp_cadr (list);
-  gcry_sexp_release (list);
+  l2 = sexp_cadr (list);
+  sexp_release (list);
   list = l2;
   l2 = NULL;
   if (! list)
@@ -3454,222 +868,26 @@ gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms)
       goto leave;
     }
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = gcry_pk_lookup_name (name);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-  gcry_free (name);
+  spec = spec_from_name (name);
+  xfree (name);
   name = NULL;
-  if (!module)
+  if (!spec || spec->flags.disabled || (!spec->flags.fips && fips_mode ()))
     {
       rc = GPG_ERR_PUBKEY_ALGO; /* Unknown algorithm.  */
       goto leave;
     }
 
-  pubkey = (gcry_pk_spec_t *) module->spec;
-  algo = module->mod_id;
-  algo_name = pubkey->aliases? *pubkey->aliases : NULL;
-  if (!algo_name || !*algo_name)
-    algo_name = pubkey->name;
-  pub_elems = pubkey->elements_pkey;
-  sec_elems = pubkey->elements_skey;
-  if (strlen (sec_elems) >= DIM(skey))
-    BUG ();
-
-  /* Handle the optional rsa-use-e element.  Actually this belong into
-     the algorithm module but we have this parameter in the public
-     module API, so we need to parse it right here.  */
-  l2 = gcry_sexp_find_token (list, "rsa-use-e", 0);
-  if (l2)
-    {
-      char buf[50];
-      const char *s;
-
-      s = gcry_sexp_nth_data (l2, 1, &n);
-      if ( !s || n >= DIM (buf) - 1 )
-        {
-          rc = GPG_ERR_INV_OBJ; /* No value or value too large.  */
-          goto leave;
-        }
-      memcpy (buf, s, n);
-      buf[n] = 0;
-      use_e = strtoul (buf, NULL, 0);
-      gcry_sexp_release (l2);
-      l2 = NULL;
-    }
-  else
-    use_e = 65537; /* Not given, use the value generated by old versions. */
-
-
-  /* Get the "nbits" parameter.  */
-  l2 = gcry_sexp_find_token (list, "nbits", 0);
-  if (l2)
-    {
-      char buf[50];
-      const char *s;
-
-      s = gcry_sexp_nth_data (l2, 1, &n);
-      if (!s || n >= DIM (buf) - 1 )
-        {
-          rc = GPG_ERR_INV_OBJ; /* NBITS given without a cdr.  */
-          goto leave;
-        }
-      memcpy (buf, s, n);
-      buf[n] = 0;
-      nbits = (unsigned int)strtoul (buf, NULL, 0);
-      gcry_sexp_release (l2); l2 = NULL;
-    }
+  if (spec->generate)
+    rc = spec->generate (list, r_key);
   else
-    nbits = 0;
-
-  /* Pass control to the algorithm module. */
-  rc = pubkey_generate (module->mod_id, nbits, use_e, list, skey,
-                        &factors, &extrainfo);
-  gcry_sexp_release (list); list = NULL;
-  if (rc)
-    goto leave;
-
-  /* Key generation succeeded: Build an S-expression.  */
-  {
-    char *string, *p;
-    size_t nelem=0, nelem_cp = 0, needed=0;
-    gcry_mpi_t mpis[30];
-    int percent_s_idx = -1;
-
-    /* Estimate size of format string.  */
-    nelem = strlen (pub_elems) + strlen (sec_elems);
-    if (factors)
-      {
-        for (i = 0; factors[i]; i++)
-          nelem++;
-      }
-    nelem_cp = nelem;
-
-    needed += nelem * 10;
-    /* (+5 is for EXTRAINFO ("%S")).  */
-    needed += 2 * strlen (algo_name) + 300 + 5;
-    if (nelem > DIM (mpis))
-      BUG ();
-
-    /* Build the string. */
-    nelem = 0;
-    string = p = gcry_malloc (needed);
-    if (!string)
-      {
-        rc = gpg_err_code_from_syserror ();
-        goto leave;
-      }
-    p = stpcpy (p, "(key-data");
-    p = stpcpy (p, "(public-key(");
-    p = stpcpy (p, algo_name);
-    for(i = 0; pub_elems[i]; i++)
-      {
-        *p++ = '(';
-        *p++ = pub_elems[i];
-        p = stpcpy (p, "%m)");
-        mpis[nelem++] = skey[i];
-      }
-    if (extrainfo && (algo == GCRY_PK_ECDSA || algo == GCRY_PK_ECDH))
-      {
-        /* Very ugly hack to insert the used curve parameter into the
-           list of public key parameters.  */
-        percent_s_idx = nelem;
-        p = stpcpy (p, "%S");
-      }
-    p = stpcpy (p, "))");
-    p = stpcpy (p, "(private-key(");
-    p = stpcpy (p, algo_name);
-    for (i = 0; sec_elems[i]; i++)
-      {
-        *p++ = '(';
-        *p++ = sec_elems[i];
-        p = stpcpy (p, "%m)");
-        mpis[nelem++] = skey[i];
-      }
-    p = stpcpy (p, "))");
-
-    /* Hack to make release_mpi_array() work.  */
-    skey[i] = NULL;
-
-    if (extrainfo && percent_s_idx == -1)
-      {
-        /* If we have extrainfo we should not have any factors.  */
-        p = stpcpy (p, "%S");
-      }
-    else if (factors && factors[0])
-      {
-        p = stpcpy (p, "(misc-key-info(pm1-factors");
-        for(i = 0; factors[i]; i++)
-          {
-            p = stpcpy (p, "%m");
-            mpis[nelem++] = factors[i];
-          }
-        p = stpcpy (p, "))");
-      }
-    strcpy (p, ")");
-    gcry_assert (p - string < needed);
-
-    while (nelem < DIM (mpis))
-      mpis[nelem++] = NULL;
-
-    {
-      int elem_n = strlen (pub_elems) + strlen (sec_elems);
-      void **arg_list;
-
-      /* Allocate one extra for EXTRAINFO ("%S").  */
-      arg_list = gcry_calloc (nelem_cp+1, sizeof *arg_list);
-      if (!arg_list)
-        {
-          rc = gpg_err_code_from_syserror ();
-          goto leave;
-        }
-      for (i = j = 0; i < elem_n; i++)
-        {
-          if (i == percent_s_idx)
-            arg_list[j++] = &extrainfo;
-          arg_list[j++] = mpis + i;
-        }
-      if (extrainfo && percent_s_idx == -1)
-        arg_list[j] = &extrainfo;
-      else if (factors && factors[0])
-        {
-          for (; i < nelem_cp; i++)
-            arg_list[j++] = factors + i - elem_n;
-        }
-      rc = gcry_sexp_build_array (r_key, NULL, string, arg_list);
-      gcry_free (arg_list);
-      if (rc)
-       BUG ();
-      gcry_assert (DIM (mpis) == 30); /* Reminder to make sure that
-                                         the array gets increased if
-                                         new parameters are added. */
-    }
-    gcry_free (string);
-  }
+    rc = GPG_ERR_NOT_IMPLEMENTED;
 
  leave:
-  gcry_free (name);
-  gcry_sexp_release (extrainfo);
-  release_mpi_array (skey);
-  /* Don't free SKEY itself, it is an stack allocated array. */
+  sexp_release (list);
+  xfree (name);
+  sexp_release (l2);
 
-  if (factors)
-    {
-      release_mpi_array ( factors );
-      gcry_free (factors);
-    }
-
-  gcry_sexp_release (l3);
-  gcry_sexp_release (l2);
-  gcry_sexp_release (list);
-
-  if (module)
-    {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-    }
-
-  return gcry_error (rc);
+  return rc;
 }
 
 
@@ -3679,32 +897,27 @@ gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms)
    Hmmm: Should we have really this function or is it better to have a
    more general function to retrieve different properties of the key?  */
 unsigned int
-gcry_pk_get_nbits (gcry_sexp_t key)
+_gcry_pk_get_nbits (gcry_sexp_t key)
 {
-  gcry_module_t module = NULL;
-  gcry_pk_spec_t *pubkey;
-  gcry_mpi_t *keyarr = NULL;
-  unsigned int nbits = 0;
-  gcry_err_code_t rc;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  rc = sexp_to_key (key, 0, NULL, &keyarr, &module);
-  if (rc == GPG_ERR_INV_OBJ)
-    rc = sexp_to_key (key, 1, NULL, &keyarr, &module);
-  if (rc)
-    return 0; /* Error - 0 is a suitable indication for that. */
-
-  pubkey = (gcry_pk_spec_t *) module->spec;
-  nbits = (*pubkey->get_nbits) (module->mod_id, keyarr);
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  _gcry_module_release (module);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  release_mpi_array (keyarr);
-  gcry_free (keyarr);
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t parms;
+  unsigned int nbits;
+
+  /* Parsing KEY might be considered too much overhead.  For example
+     for RSA we would only need to look at P and stop parsing right
+     away.  However, with ECC things are more complicate in that only
+     a curve name might be specified.  Thus we need to tear the sexp
+     apart. */
+
+  if (spec_from_sexp (key, 0, &spec, &parms))
+    return 0; /* Error - 0 is a suitable indication for that.  */
+  if (spec->flags.disabled)
+    return 0;
+  if (!spec->flags.fips && fips_mode ())
+    return 0;
 
+  nbits = spec->get_nbits (parms);
+  sexp_release (parms);
   return nbits;
 }
 
@@ -3717,12 +930,11 @@ gcry_pk_get_nbits (gcry_sexp_t key)
    NULL is returned to indicate an error which is most likely an
    unknown algorithm.  The function accepts public or secret keys. */
 unsigned char *
-gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char *array)
+_gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char *array)
 {
-  gcry_sexp_t list = NULL, l2 = NULL;
-  gcry_pk_spec_t *pubkey = NULL;
-  gcry_module_t module = NULL;
-  pk_extra_spec_t *extraspec;
+  gcry_sexp_t list = NULL;
+  gcry_sexp_t l2 = NULL;
+  gcry_pk_spec_t *spec = NULL;
   const char *s;
   char *name = NULL;
   int idx;
@@ -3730,21 +942,19 @@ gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char 
*array)
   gcry_md_hd_t md = NULL;
   int okay = 0;
 
-  REGISTER_DEFAULT_PUBKEYS;
-
   /* Check that the first element is valid. */
-  list = gcry_sexp_find_token (key, "public-key", 0);
+  list = sexp_find_token (key, "public-key", 0);
   if (! list)
-    list = gcry_sexp_find_token (key, "private-key", 0);
+    list = sexp_find_token (key, "private-key", 0);
   if (! list)
-    list = gcry_sexp_find_token (key, "protected-private-key", 0);
+    list = sexp_find_token (key, "protected-private-key", 0);
   if (! list)
-    list = gcry_sexp_find_token (key, "shadowed-private-key", 0);
+    list = sexp_find_token (key, "shadowed-private-key", 0);
   if (! list)
     return NULL; /* No public- or private-key object. */
 
-  l2 = gcry_sexp_cadr (list);
-  gcry_sexp_release (list);
+  l2 = sexp_cadr (list);
+  sexp_release (list);
   list = l2;
   l2 = NULL;
 
@@ -3752,27 +962,21 @@ gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char 
*array)
   if (!name)
     goto fail; /* Invalid structure of object. */
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = gcry_pk_lookup_name (name);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  if (!module)
+  spec = spec_from_name (name);
+  if (!spec)
     goto fail; /* Unknown algorithm.  */
 
-  pubkey = (gcry_pk_spec_t *) module->spec;
-  extraspec = module->extraspec;
-
-  elems = pubkey->elements_grip;
+  elems = spec->elements_grip;
   if (!elems)
     goto fail; /* No grip parameter.  */
 
-  if (gcry_md_open (&md, GCRY_MD_SHA1, 0))
+  if (_gcry_md_open (&md, GCRY_MD_SHA1, 0))
     goto fail;
 
-  if (extraspec && extraspec->comp_keygrip)
+  if (spec->comp_keygrip)
     {
       /* Module specific method to compute a keygrip.  */
-      if (extraspec->comp_keygrip (md, list))
+      if (spec->comp_keygrip (md, list))
         goto fail;
     }
   else
@@ -3784,157 +988,105 @@ gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char 
*array)
           size_t datalen;
           char buf[30];
 
-          l2 = gcry_sexp_find_token (list, s, 1);
+          l2 = sexp_find_token (list, s, 1);
           if (! l2)
             goto fail;
-          data = gcry_sexp_nth_data (l2, 1, &datalen);
+          data = sexp_nth_data (l2, 1, &datalen);
           if (! data)
             goto fail;
 
           snprintf (buf, sizeof buf, "(1:%c%u:", *s, (unsigned int)datalen);
-          gcry_md_write (md, buf, strlen (buf));
-          gcry_md_write (md, data, datalen);
-          gcry_sexp_release (l2);
+          _gcry_md_write (md, buf, strlen (buf));
+          _gcry_md_write (md, data, datalen);
+          sexp_release (l2);
           l2 = NULL;
-          gcry_md_write (md, ")", 1);
+          _gcry_md_write (md, ")", 1);
         }
     }
 
   if (!array)
     {
-      array = gcry_malloc (20);
+      array = xtrymalloc (20);
       if (! array)
         goto fail;
     }
 
-  memcpy (array, gcry_md_read (md, GCRY_MD_SHA1), 20);
+  memcpy (array, _gcry_md_read (md, GCRY_MD_SHA1), 20);
   okay = 1;
 
  fail:
-  gcry_free (name);
-  gcry_sexp_release (l2);
-  gcry_md_close (md);
-  gcry_sexp_release (list);
+  xfree (name);
+  sexp_release (l2);
+  _gcry_md_close (md);
+  sexp_release (list);
   return okay? array : NULL;
 }
 
 
 
 const char *
-gcry_pk_get_curve (gcry_sexp_t key, int iterator, unsigned int *r_nbits)
+_gcry_pk_get_curve (gcry_sexp_t key, int iterator, unsigned int *r_nbits)
 {
-  gcry_mpi_t *pkey = NULL;
-  gcry_sexp_t list = NULL;
-  gcry_sexp_t l2;
-  gcry_module_t module = NULL;
-  pk_extra_spec_t *extraspec;
-  char *name = NULL;
   const char *result = NULL;
-  int want_private = 1;
+  gcry_pk_spec_t *spec;
+  gcry_sexp_t keyparms = NULL;
 
   if (r_nbits)
     *r_nbits = 0;
 
-  REGISTER_DEFAULT_PUBKEYS;
-
   if (key)
     {
       iterator = 0;
 
-      /* Check that the first element is valid. */
-      list = gcry_sexp_find_token (key, "public-key", 0);
-      if (list)
-        want_private = 0;
-      if (!list)
-        list = gcry_sexp_find_token (key, "private-key", 0);
-      if (!list)
-        return NULL; /* No public- or private-key object. */
-
-      l2 = gcry_sexp_cadr (list);
-      gcry_sexp_release (list);
-      list = l2;
-      l2 = NULL;
-
-      name = _gcry_sexp_nth_string (list, 0);
-      if (!name)
-        goto leave; /* Invalid structure of object. */
-
-      /* Get the key.  We pass the names of the parameters for
-         override_elems; this allows to call this function without the
-         actual public key parameter.  */
-      if (sexp_to_key (key, want_private, "pabgn", &pkey, &module))
-        goto leave;
+      if (spec_from_sexp (key, 0, &spec, &keyparms))
+        return NULL;
     }
   else
     {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      module = gcry_pk_lookup_name ("ecc");
-      ath_mutex_unlock (&pubkeys_registered_lock);
-      if (!module)
-        goto leave;
+      spec = spec_from_name ("ecc");
+      if (!spec)
+        return NULL;
     }
 
-  extraspec = module->extraspec;
-  if (!extraspec || !extraspec->get_curve)
-    goto leave;
-
-  result = extraspec->get_curve (pkey, iterator, r_nbits);
+  if (spec->flags.disabled)
+    return NULL;
+  if (!spec->flags.fips && fips_mode ())
+    return NULL;
+  if (spec->get_curve)
+    result = spec->get_curve (keyparms, iterator, r_nbits);
 
- leave:
-  if (pkey)
-    {
-      release_mpi_array (pkey);
-      gcry_free (pkey);
-    }
-  if (module)
-    {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-    }
-  gcry_free (name);
-  gcry_sexp_release (list);
+  sexp_release (keyparms);
   return result;
 }
 
 
 
 gcry_sexp_t
-gcry_pk_get_param (int algo, const char *name)
+_gcry_pk_get_param (int algo, const char *name)
 {
-  gcry_module_t module = NULL;
-  pk_extra_spec_t *extraspec;
   gcry_sexp_t result = NULL;
+  gcry_pk_spec_t *spec = NULL;
 
-  if (algo != GCRY_PK_ECDSA && algo != GCRY_PK_ECDH)
-    return NULL;
+  algo = map_algo (algo);
 
-  REGISTER_DEFAULT_PUBKEYS;
+  if (algo != GCRY_PK_ECC)
+    return NULL;
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = gcry_pk_lookup_name ("ecc");
-  ath_mutex_unlock (&pubkeys_registered_lock);
-  if (module)
+  spec = spec_from_name ("ecc");
+  if (spec)
     {
-      extraspec = module->extraspec;
-      if (extraspec && extraspec->get_curve_param)
-        result = extraspec->get_curve_param (name);
-
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
+      if (spec && spec->get_curve_param)
+        result = spec->get_curve_param (name);
     }
   return result;
 }
 
 
 
-gcry_error_t
-gcry_pk_ctl (int cmd, void *buffer, size_t buflen)
+gcry_err_code_t
+_gcry_pk_ctl (int cmd, void *buffer, size_t buflen)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  REGISTER_DEFAULT_PUBKEYS;
+  gcry_err_code_t rc = 0;
 
   switch (cmd)
     {
@@ -3942,16 +1094,16 @@ gcry_pk_ctl (int cmd, void *buffer, size_t buflen)
       /* This one expects a buffer pointing to an integer with the
          algo number.  */
       if ((! buffer) || (buflen != sizeof (int)))
-       err = GPG_ERR_INV_ARG;
+       rc = GPG_ERR_INV_ARG;
       else
        disable_pubkey_algo (*((int *) buffer));
       break;
 
     default:
-      err = GPG_ERR_INV_OP;
+      rc = GPG_ERR_INV_OP;
     }
 
-  return gcry_error (err);
+  return rc;
 }
 
 
@@ -3976,10 +1128,10 @@ gcry_pk_ctl (int cmd, void *buffer, size_t buflen)
    the return value.  The caller will in all cases consult the value
    and thereby detecting whether a error occurred or not (i.e. while
    checking the block size) */
-gcry_error_t
-gcry_pk_algo_info (int algorithm, int what, void *buffer, size_t *nbytes)
+gcry_err_code_t
+_gcry_pk_algo_info (int algorithm, int what, void *buffer, size_t *nbytes)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
+  gcry_err_code_t rc = 0;
 
   switch (what)
     {
@@ -3987,31 +1139,18 @@ gcry_pk_algo_info (int algorithm, int what, void 
*buffer, size_t *nbytes)
       {
        int use = nbytes ? *nbytes : 0;
        if (buffer)
-         err = GPG_ERR_INV_ARG;
+         rc = GPG_ERR_INV_ARG;
        else if (check_pubkey_algo (algorithm, use))
-         err = GPG_ERR_PUBKEY_ALGO;
+         rc = GPG_ERR_PUBKEY_ALGO;
        break;
       }
 
     case GCRYCTL_GET_ALGO_USAGE:
       {
-       gcry_module_t pubkey;
-       int use = 0;
-
-       REGISTER_DEFAULT_PUBKEYS;
-
-       ath_mutex_lock (&pubkeys_registered_lock);
-       pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-       if (pubkey)
-         {
-           use = ((gcry_pk_spec_t *) pubkey->spec)->use;
-           _gcry_module_release (pubkey);
-         }
-       ath_mutex_unlock (&pubkeys_registered_lock);
-
-       /* FIXME? */
-       *nbytes = use;
+       gcry_pk_spec_t *spec;
 
+       spec = spec_from_algo (algorithm);
+        *nbytes = spec? spec->use : 0;
        break;
       }
 
@@ -4045,69 +1184,60 @@ gcry_pk_algo_info (int algorithm, int what, void 
*buffer, size_t *nbytes)
       }
 
     default:
-      err = GPG_ERR_INV_OP;
+      rc = GPG_ERR_INV_OP;
     }
 
-  return gcry_error (err);
+  return rc;
 }
 
 
-/* Explicitly initialize this module.  */
-gcry_err_code_t
-_gcry_pk_init (void)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  return err;
-}
-
+/* Return an S-expression representing the context CTX.  Depending on
+   the state of that context, the S-expression may either be a public
+   key, a private key or any other object used with public key
+   operations.  On success a new S-expression is stored at R_SEXP and
+   0 is returned, on error NULL is store there and an error code is
+   returned.  MODE is either 0 or one of the GCRY_PK_GET_xxx values.
 
+   As of now it only support certain ECC operations because a context
+   object is right now only defined for ECC.  Over time this function
+   will be extended to cover more algorithms.  Note also that the name
+   of the function is gcry_pubkey_xxx and not gcry_pk_xxx.  The idea
+   is that we will eventually provide variants of the existing
+   gcry_pk_xxx functions which will take a context parameter.   */
 gcry_err_code_t
-_gcry_pk_module_lookup (int algorithm, gcry_module_t *module)
+_gcry_pubkey_get_sexp (gcry_sexp_t *r_sexp, int mode, gcry_ctx_t ctx)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  gcry_module_t pubkey;
+  mpi_ec_t ec;
 
-  REGISTER_DEFAULT_PUBKEYS;
+  if (!r_sexp)
+    return GPG_ERR_INV_VALUE;
+  *r_sexp = NULL;
+  switch (mode)
+    {
+    case 0:
+    case GCRY_PK_GET_PUBKEY:
+    case GCRY_PK_GET_SECKEY:
+      break;
+    default:
+      return GPG_ERR_INV_VALUE;
+    }
+  if (!ctx)
+    return GPG_ERR_NO_CRYPT_CTX;
 
-  ath_mutex_lock (&pubkeys_registered_lock);
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algorithm);
-  if (pubkey)
-    *module = pubkey;
-  else
-    err = GPG_ERR_PUBKEY_ALGO;
-  ath_mutex_unlock (&pubkeys_registered_lock);
+  ec = _gcry_ctx_find_pointer (ctx, CONTEXT_TYPE_EC);
+  if (ec)
+    return _gcry_pk_ecc_get_sexp (r_sexp, mode, ec);
 
-  return err;
+  return GPG_ERR_WRONG_CRYPT_CTX;
 }
 
 
-void
-_gcry_pk_module_release (gcry_module_t module)
-{
-  ath_mutex_lock (&pubkeys_registered_lock);
-  _gcry_module_release (module);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-}
-
-/* Get a list consisting of the IDs of the loaded pubkey modules.  If
-   LIST is zero, write the number of loaded pubkey modules to
-   LIST_LENGTH and return.  If LIST is non-zero, the first
-   *LIST_LENGTH algorithm IDs are stored in LIST, which must be of
-   according size.  In case there are less pubkey modules than
-   *LIST_LENGTH, *LIST_LENGTH is updated to the correct number.  */
-gcry_error_t
-gcry_pk_list (int *list, int *list_length)
+
+/* Explicitly initialize this module.  */
+gcry_err_code_t
+_gcry_pk_init (void)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  err = _gcry_module_list (pubkeys_registered, list, list_length);
-  ath_mutex_unlock (&pubkeys_registered_lock);
-
-  return err;
+  return 0;
 }
 
 
@@ -4116,97 +1246,74 @@ gcry_pk_list (int *list, int *list_length)
 gpg_error_t
 _gcry_pk_selftest (int algo, int extended, selftest_report_func_t report)
 {
-  gcry_module_t module = NULL;
-  pk_extra_spec_t *extraspec = NULL;
-  gcry_err_code_t ec = 0;
-
-  REGISTER_DEFAULT_PUBKEYS;
-
-  ath_mutex_lock (&pubkeys_registered_lock);
-  module = _gcry_module_lookup_id (pubkeys_registered, algo);
-  if (module && !(module->flags & FLAG_MODULE_DISABLED))
-    extraspec = module->extraspec;
-  ath_mutex_unlock (&pubkeys_registered_lock);
-  if (extraspec && extraspec->selftest)
-    ec = extraspec->selftest (algo, extended, report);
+  gcry_err_code_t ec;
+  gcry_pk_spec_t *spec;
+
+  algo = map_algo (algo);
+  spec = spec_from_algo (algo);
+  if (spec && !spec->flags.disabled
+      && (spec->flags.fips || !fips_mode ())
+      && spec->selftest)
+    ec = spec->selftest (algo, extended, report);
   else
     {
       ec = GPG_ERR_PUBKEY_ALGO;
+      /* Fixme: We need to change the report function to allow passing
+         of an encryption mode (e.g. pkcs1, ecdsa, or ecdh).  */
       if (report)
         report ("pubkey", algo, "module",
-                module && !(module->flags & FLAG_MODULE_DISABLED)?
+                spec && !spec->flags.disabled
+                && (spec->flags.fips || !fips_mode ())?
                 "no selftest available" :
-                module? "algorithm disabled" : "algorithm not found");
+                spec? "algorithm disabled" :
+                "algorithm not found");
     }
 
-  if (module)
-    {
-      ath_mutex_lock (&pubkeys_registered_lock);
-      _gcry_module_release (module);
-      ath_mutex_unlock (&pubkeys_registered_lock);
-    }
   return gpg_error (ec);
 }
 
 
-/* This function is only used by ac.c!  */
-gcry_err_code_t
-_gcry_pk_get_elements (int algo, char **enc, char **sig)
-{
-  gcry_module_t pubkey;
-  gcry_pk_spec_t *spec;
-  gcry_err_code_t err;
-  char *enc_cp;
-  char *sig_cp;
-
-  REGISTER_DEFAULT_PUBKEYS;
+struct pk_random_override {
+  size_t len;
+  unsigned char area[1];  /* In future, we may use flexible array member.  */
+};
 
-  enc_cp = NULL;
-  sig_cp = NULL;
-  spec = NULL;
+gpg_err_code_t
+_gcry_pk_random_override_new (gcry_ctx_t *r_ctx,
+                              const unsigned char *p, size_t len)
+{
+  gcry_ctx_t ctx;
+  struct pk_random_override *pro;
 
-  pubkey = _gcry_module_lookup_id (pubkeys_registered, algo);
-  if (! pubkey)
-    {
-      err = GPG_ERR_INTERNAL;
-      goto out;
-    }
-  spec = pubkey->spec;
+  *r_ctx = NULL;
+  if (!p)
+    return GPG_ERR_EINVAL;
 
-  if (enc)
-    {
-      enc_cp = strdup (spec->elements_enc);
-      if (! enc_cp)
-       {
-         err = gpg_err_code_from_syserror ();
-         goto out;
-       }
-    }
+  ctx = _gcry_ctx_alloc (CONTEXT_TYPE_RANDOM_OVERRIDE,
+                         offsetof (struct pk_random_override, area) + len,
+                         NULL);
+  if (!ctx)
+    return gpg_err_code_from_syserror ();
+  pro = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_RANDOM_OVERRIDE);
+  pro->len = len;
+  memcpy (pro->area, p, len);
 
-  if (sig)
-    {
-      sig_cp = strdup (spec->elements_sig);
-      if (! sig_cp)
-       {
-         err = gpg_err_code_from_syserror ();
-         goto out;
-       }
-    }
+  *r_ctx = ctx;
+  return 0;
+}
 
-  if (enc)
-    *enc = enc_cp;
-  if (sig)
-    *sig = sig_cp;
-  err = 0;
+gpg_err_code_t
+_gcry_pk_get_random_override (gcry_ctx_t ctx,
+                              const unsigned char **r_p, size_t *r_len)
+{
+  struct pk_random_override *pro;
 
- out:
+  pro = _gcry_ctx_find_pointer (ctx, CONTEXT_TYPE_RANDOM_OVERRIDE);
+  if (!pro)
+    return GPG_ERR_EINVAL;
 
-  _gcry_module_release (pubkey);
-  if (err)
-    {
-      free (enc_cp);
-      free (sig_cp);
-    }
+  *r_p = pro->area;
+  *r_len = pro->len;
 
-  return err;
+  return 0;
 }
diff --git a/grub-core/lib/libgcrypt/cipher/rfc2268.c 
b/grub-core/lib/libgcrypt/cipher/rfc2268.c
index 1c9c8d413..c270ce9b3 100644
--- a/grub-core/lib/libgcrypt/cipher/rfc2268.c
+++ b/grub-core/lib/libgcrypt/cipher/rfc2268.c
@@ -35,6 +35,7 @@
 #include "g10lib.h"
 #include "types.h"
 #include "cipher.h"
+#include "cipher-internal.h"
 
 #define RFC2268_BLOCKSIZE 8
 
@@ -136,6 +137,13 @@ do_encrypt (void *context, unsigned char *outbuf, const 
unsigned char *inbuf)
   outbuf[7] = word3 >> 8;
 }
 
+static unsigned int
+encrypt_block (void *context, unsigned char *outbuf, const unsigned char 
*inbuf)
+{
+  do_encrypt (context, outbuf, inbuf);
+  return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 
4);
+}
+
 static void
 do_decrypt (void *context, unsigned char *outbuf, const unsigned char *inbuf)
 {
@@ -188,6 +196,13 @@ do_decrypt (void *context, unsigned char *outbuf, const 
unsigned char *inbuf)
   outbuf[7] = word3 >> 8;
 }
 
+static unsigned int
+decrypt_block (void *context, unsigned char *outbuf, const unsigned char 
*inbuf)
+{
+  do_decrypt (context, outbuf, inbuf);
+  return /*burn_stack*/ (4 * sizeof(void *) + sizeof(void *) + sizeof(u32) * 
4);
+}
+
 
 static gpg_err_code_t
 setkey_core (void *context, const unsigned char *key, unsigned int keylen, int 
with_phase2)
@@ -213,6 +228,9 @@ setkey_core (void *context, const unsigned char *key, 
unsigned int keylen, int w
   if (keylen < 40 / 8) /* We want at least 40 bits. */
     return GPG_ERR_INV_KEYLEN;
 
+  if (keylen > 128)
+    return GPG_ERR_INV_KEYLEN;
+
   S = (unsigned char *) ctx->S;
 
   for (i = 0; i < keylen; i++)
@@ -248,8 +266,10 @@ setkey_core (void *context, const unsigned char *key, 
unsigned int keylen, int w
 }
 
 static gpg_err_code_t
-do_setkey (void *context, const unsigned char *key, unsigned int keylen)
+do_setkey (void *context, const unsigned char *key, unsigned int keylen,
+           cipher_bulk_ops_t *bulk_ops)
 {
+  (void)bulk_ops;
   return setkey_core (context, key, keylen, 1);
 }
 
@@ -329,7 +349,7 @@ selftest (void)
 
 
 
-static gcry_cipher_oid_spec_t oids_rfc2268_40[] =
+static const gcry_cipher_oid_spec_t oids_rfc2268_40[] =
   {
     /*{ "1.2.840.113549.3.2", GCRY_CIPHER_MODE_CBC },*/
     /* pbeWithSHAAnd40BitRC2_CBC */
@@ -337,8 +357,25 @@ static gcry_cipher_oid_spec_t oids_rfc2268_40[] =
     { NULL }
   };
 
-gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40 = {
-  "RFC2268_40", NULL, oids_rfc2268_40,
-  RFC2268_BLOCKSIZE, 40, sizeof(RFC2268_context),
-  do_setkey, do_encrypt, do_decrypt
-};
+static const gcry_cipher_oid_spec_t oids_rfc2268_128[] =
+  {
+    /* pbeWithSHAAnd128BitRC2_CBC */
+    { "1.2.840.113549.1.12.1.5", GCRY_CIPHER_MODE_CBC },
+    { NULL }
+  };
+
+gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40 =
+  {
+    GCRY_CIPHER_RFC2268_40, {0, 0},
+    "RFC2268_40", NULL, oids_rfc2268_40,
+    RFC2268_BLOCKSIZE, 40, sizeof(RFC2268_context),
+    do_setkey, encrypt_block, decrypt_block
+  };
+
+gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128 =
+  {
+    GCRY_CIPHER_RFC2268_128, {0, 0},
+    "RFC2268_128", NULL, oids_rfc2268_128,
+    RFC2268_BLOCKSIZE, 128, sizeof(RFC2268_context),
+    do_setkey, encrypt_block, decrypt_block
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-aarch64.S 
b/grub-core/lib/libgcrypt/cipher/rijndael-aarch64.S
new file mode 100644
index 000000000..184fcd20a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-aarch64.S
@@ -0,0 +1,514 @@
+/* rijndael-aarch64.S  -  ARMv8/AArch64 assembly implementation of AES cipher
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__)
+#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+
+.text
+
+/* register macros */
+#define CTX    x0
+#define RDST   x1
+#define RSRC   x2
+#define NROUNDS        w3
+#define RTAB   x4
+#define RMASK  w5
+
+#define RA     w8
+#define RB     w9
+#define RC     w10
+#define RD     w11
+
+#define RNA    w12
+#define RNB    w13
+#define RNC    w14
+#define RND    w15
+
+#define RT0    w6
+#define RT1    w7
+#define RT2    w16
+#define xRT0   x6
+#define xRT1   x7
+#define xRT2   x16
+
+#define xw8    x8
+#define xw9    x9
+#define xw10   x10
+#define xw11   x11
+
+#define xw12   x12
+#define xw13   x13
+#define xw14   x14
+#define xw15   x15
+
+/***********************************************************************
+ * ARMv8/AArch64 assembly implementation of the AES cipher
+ ***********************************************************************/
+#define preload_first_key(round, ra) \
+       ldr ra, [CTX, #(((round) * 16) + 0 * 4)];
+
+#define dummy(round, ra) /* nothing */
+
+#define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       ldp rna, rnb, [CTX]; \
+       ldp rnc, rnd, [CTX, #8]; \
+       eor ra, ra, rna; \
+       eor rb, rb, rnb; \
+       eor rc, rc, rnc; \
+       preload_key(1, rna); \
+       eor rd, rd, rnd;
+
+#define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+       \
+       and RT0, RMASK, ra, lsl#2; \
+       ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+       and RT1, RMASK, ra, lsr#(8 - 2); \
+       ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+       and RT2, RMASK, ra, lsr#(16 - 2); \
+       ldr RT0, [RTAB, xRT0]; \
+       and ra,  RMASK, ra, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rna, rna, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rd, lsl#2; \
+       ldr ra,  [RTAB, x##ra]; \
+       \
+       eor rnd, rnd, RT1, ror #24; \
+       and RT1, RMASK, rd, lsr#(8 - 2); \
+       eor rnc, rnc, RT2, ror #16; \
+       and RT2, RMASK, rd, lsr#(16 - 2); \
+       eor rnb, rnb, ra, ror #8; \
+       ldr RT0, [RTAB, xRT0]; \
+       and rd,  RMASK, rd, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rnd, rnd, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rc, lsl#2; \
+       ldr rd,  [RTAB, x##rd]; \
+       \
+       eor rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#(8 - 2); \
+       eor rnb, rnb, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#(16 - 2); \
+       eor rna, rna, rd, ror #8; \
+       ldr RT0, [RTAB, xRT0]; \
+       and rc,  RMASK, rc, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rnc, rnc, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rb, lsl#2; \
+       ldr rc,  [RTAB, x##rc]; \
+       \
+       eor rnb, rnb, RT1, ror #24; \
+       and RT1, RMASK, rb, lsr#(8 - 2); \
+       eor rna, rna, RT2, ror #16; \
+       and RT2, RMASK, rb, lsr#(16 - 2); \
+       eor rnd, rnd, rc, ror #8; \
+       ldr RT0, [RTAB, xRT0]; \
+       and rb,  RMASK, rb, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rnb, rnb, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       eor rna, rna, RT1, ror #24; \
+       ldr rb,  [RTAB, x##rb]; \
+       \
+       eor rnd, rnd, RT2, ror #16; \
+       preload_key((next_r) + 1, ra); \
+       eor rnc, rnc, rb, ror #8;
+
+#define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       and RT0, RMASK, ra, lsl#2; \
+       and RT1, RMASK, ra, lsr#(8 - 2); \
+       and RT2, RMASK, ra, lsr#(16 - 2); \
+       ldrb rna, [RTAB, xRT0]; \
+       and ra,  RMASK, ra, lsr#(24 - 2); \
+       ldrb rnd, [RTAB, xRT1]; \
+       and RT0, RMASK, rd, lsl#2; \
+       ldrb rnc, [RTAB, xRT2]; \
+       ror rnd, rnd, #24; \
+       ldrb rnb, [RTAB, x##ra]; \
+       and RT1, RMASK, rd, lsr#(8 - 2); \
+       ror rnc, rnc, #16; \
+       and RT2, RMASK, rd, lsr#(16 - 2); \
+       ror rnb, rnb, #8; \
+       ldrb RT0, [RTAB, xRT0]; \
+       and rd,  RMASK, rd, lsr#(24 - 2); \
+       ldrb RT1, [RTAB, xRT1]; \
+       \
+       orr rnd, rnd, RT0; \
+       ldrb RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rc, lsl#2; \
+       ldrb rd,  [RTAB, x##rd]; \
+       orr rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#(8 - 2); \
+       orr rnb, rnb, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#(16 - 2); \
+       orr rna, rna, rd, ror #8; \
+       ldrb RT0, [RTAB, xRT0]; \
+       and rc,  RMASK, rc, lsr#(24 - 2); \
+       ldrb RT1, [RTAB, xRT1]; \
+       \
+       orr rnc, rnc, RT0; \
+       ldrb RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rb, lsl#2; \
+       ldrb rc,  [RTAB, x##rc]; \
+       orr rnb, rnb, RT1, ror #24; \
+       and RT1, RMASK, rb, lsr#(8 - 2); \
+       orr rna, rna, RT2, ror #16; \
+       ldrb RT0, [RTAB, xRT0]; \
+       and RT2, RMASK, rb, lsr#(16 - 2); \
+       ldrb RT1, [RTAB, xRT1]; \
+       orr rnd, rnd, rc, ror #8; \
+       ldrb RT2, [RTAB, xRT2]; \
+       and rb,  RMASK, rb, lsr#(24 - 2); \
+       ldrb rb,  [RTAB, x##rb]; \
+       \
+       orr rnb, rnb, RT0; \
+       orr rna, rna, RT1, ror #24; \
+       orr rnd, rnd, RT2, ror #16; \
+       orr rnc, rnc, rb, ror #8;
+
+#define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \
+       do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, 
preload_first_key);
+
+#define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, 
preload_key);
+
+#define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       add CTX, CTX, #(((round) + 1) * 16); \
+       add RTAB, RTAB, #1; \
+       do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+       addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.globl _gcry_aes_arm_encrypt_block
+ELF(.type   _gcry_aes_arm_encrypt_block,%function;)
+
+_gcry_aes_arm_encrypt_block:
+       /* input:
+        *      %x0: keysched, CTX
+        *      %x1: dst
+        *      %x2: src
+        *      %w3: number of rounds.. 10, 12 or 14
+        *      %x4: encryption table
+        */
+       CFI_STARTPROC();
+
+       /* read input block */
+
+       /* aligned load */
+       ldp     RA, RB, [RSRC];
+       ldp     RC, RD, [RSRC, #8];
+#ifndef __AARCH64EL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+
+       mov     RMASK, #(0xff<<2);
+
+       firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+       encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+
+       cmp     NROUNDS, #12;
+       bge     .Lenc_not_128;
+
+       encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+       lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+.Lenc_done:
+
+       /* store output block */
+
+       /* aligned store */
+#ifndef __AARCH64EL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+       /* write output block */
+       stp     RA, RB, [RDST];
+       stp     RC, RD, [RDST, #8];
+
+       mov     x0, #(0);
+       ret_spec_stop;
+
+.ltorg
+.Lenc_not_128:
+       beq .Lenc_192
+
+       encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+       lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+       b .Lenc_done;
+
+.ltorg
+.Lenc_192:
+       encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+       lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+       b .Lenc_done;
+       CFI_ENDPROC();
+ELF(.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;)
+
+#define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
+       ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \
+       eor ra, ra, rna; \
+       ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \
+       eor rb, rb, rnb; \
+       ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \
+       eor rc, rc, rnc; \
+       preload_first_key((round) - 1, rna); \
+       eor rd, rd, rnd;
+
+#define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+       \
+       and RT0, RMASK, ra, lsl#2; \
+       ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+       and RT1, RMASK, ra, lsr#(8 - 2); \
+       ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+       and RT2, RMASK, ra, lsr#(16 - 2); \
+       ldr RT0, [RTAB, xRT0]; \
+       and ra,  RMASK, ra, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rna, rna, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rb, lsl#2; \
+       ldr ra,  [RTAB, x##ra]; \
+       \
+       eor rnb, rnb, RT1, ror #24; \
+       and RT1, RMASK, rb, lsr#(8 - 2); \
+       eor rnc, rnc, RT2, ror #16; \
+       and RT2, RMASK, rb, lsr#(16 - 2); \
+       eor rnd, rnd, ra, ror #8; \
+       ldr RT0, [RTAB, xRT0]; \
+       and rb,  RMASK, rb, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rnb, rnb, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rc, lsl#2; \
+       ldr rb,  [RTAB, x##rb]; \
+       \
+       eor rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#(8 - 2); \
+       eor rnd, rnd, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#(16 - 2); \
+       eor rna, rna, rb, ror #8; \
+       ldr RT0, [RTAB, xRT0]; \
+       and rc,  RMASK, rc, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rnc, rnc, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rd, lsl#2; \
+       ldr rc,  [RTAB, x##rc]; \
+       \
+       eor rnd, rnd, RT1, ror #24; \
+       and RT1, RMASK, rd, lsr#(8 - 2); \
+       eor rna, rna, RT2, ror #16; \
+       and RT2, RMASK, rd, lsr#(16 - 2); \
+       eor rnb, rnb, rc, ror #8; \
+       ldr RT0, [RTAB, xRT0]; \
+       and rd,  RMASK, rd, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, xRT1]; \
+       eor rnd, rnd, RT0; \
+       ldr RT2, [RTAB, xRT2]; \
+       eor rna, rna, RT1, ror #24; \
+       ldr rd,  [RTAB, x##rd]; \
+       \
+       eor rnb, rnb, RT2, ror #16; \
+       preload_key((next_r) - 1, ra); \
+       eor rnc, rnc, rd, ror #8;
+
+#define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       and RT0, RMASK, ra; \
+       and RT1, RMASK, ra, lsr#8; \
+       and RT2, RMASK, ra, lsr#16; \
+       ldrb rna, [RTAB, xRT0]; \
+       lsr ra,  ra, #24; \
+       ldrb rnb, [RTAB, xRT1]; \
+       and RT0, RMASK, rb; \
+       ldrb rnc, [RTAB, xRT2]; \
+       ror rnb, rnb, #24; \
+       ldrb rnd, [RTAB, x##ra]; \
+       and RT1, RMASK, rb, lsr#8; \
+       ror rnc, rnc, #16; \
+       and RT2, RMASK, rb, lsr#16; \
+       ror rnd, rnd, #8; \
+       ldrb RT0, [RTAB, xRT0]; \
+       lsr rb,  rb, #24; \
+       ldrb RT1, [RTAB, xRT1]; \
+       \
+       orr rnb, rnb, RT0; \
+       ldrb RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rc; \
+       ldrb rb,  [RTAB, x##rb]; \
+       orr rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#8; \
+       orr rnd, rnd, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#16; \
+       orr rna, rna, rb, ror #8; \
+       ldrb RT0, [RTAB, xRT0]; \
+       lsr rc,  rc, #24; \
+       ldrb RT1, [RTAB, xRT1]; \
+       \
+       orr rnc, rnc, RT0; \
+       ldrb RT2, [RTAB, xRT2]; \
+       and RT0, RMASK, rd; \
+       ldrb rc,  [RTAB, x##rc]; \
+       orr rnd, rnd, RT1, ror #24; \
+       and RT1, RMASK, rd, lsr#8; \
+       orr rna, rna, RT2, ror #16; \
+       ldrb RT0, [RTAB, xRT0]; \
+       and RT2, RMASK, rd, lsr#16; \
+       ldrb RT1, [RTAB, xRT1]; \
+       orr rnb, rnb, rc, ror #8; \
+       ldrb RT2, [RTAB, xRT2]; \
+       lsr rd,  rd, #24; \
+       ldrb rd,  [RTAB, x##rd]; \
+       \
+       orr rnd, rnd, RT0; \
+       orr rna, rna, RT1, ror #24; \
+       orr rnb, rnb, RT2, ror #16; \
+       orr rnc, rnc, rd, ror #8;
+
+#define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+       do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, 
preload_first_key);
+
+#define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key);
+
+#define set_last_round_rmask(_, __) \
+       mov RMASK, #0xff;
+
+#define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       add RTAB, RTAB, #(4 * 256); \
+       do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+       addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.globl _gcry_aes_arm_decrypt_block
+ELF(.type   _gcry_aes_arm_decrypt_block,%function;)
+
+_gcry_aes_arm_decrypt_block:
+       /* input:
+        *      %x0: keysched, CTX
+        *      %x1: dst
+        *      %x2: src
+        *      %w3: number of rounds.. 10, 12 or 14
+        *      %x4: decryption table
+        */
+       CFI_STARTPROC();
+
+       /* read input block */
+
+       /* aligned load */
+       ldp     RA, RB, [RSRC];
+       ldp     RC, RD, [RSRC, #8];
+#ifndef __AARCH64EL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+
+       mov     RMASK, #(0xff << 2);
+
+       cmp     NROUNDS, #12;
+       bge     .Ldec_256;
+
+       firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+.Ldec_tail:
+       decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask);
+       lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+       /* store output block */
+
+       /* aligned store */
+#ifndef __AARCH64EL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+       /* write output block */
+       stp     RA, RB, [RDST];
+       stp     RC, RD, [RDST, #8];
+
+       mov     x0, #(0);
+       ret_spec_stop;
+
+.ltorg
+.Ldec_256:
+       beq .Ldec_192;
+
+       firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+       decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+       b .Ldec_tail;
+
+.ltorg
+.Ldec_192:
+       firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+       decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+       b .Ldec_tail;
+       CFI_ENDPROC();
+ELF(.size _gcry_aes_arm_decrypt_block,.-_gcry_aes_arm_decrypt_block;)
+
+#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
+#endif /*__AARCH64EL__ */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-aesni.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-aesni.c
new file mode 100644
index 000000000..ff6b0b264
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-aesni.c
@@ -0,0 +1,4880 @@
+/* AES-NI accelerated AES for Libgcrypt
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ *               2008, 2011, 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h"  /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_AESNI
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+#  pragma GCC target("no-sse")
+#endif
+#if __clang__
+#  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = 
function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+typedef struct u128_s
+{
+  u32 a, b, c, d;
+} __attribute__((packed, aligned(1), may_alias)) u128_t;
+
+
+/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l
+   because of 'pragma target'. */
+static ASM_FUNC_ATTR_INLINE const unsigned char *
+aes_ocb_get_l (gcry_cipher_hd_t c, u64 n)
+{
+  unsigned long ntz;
+
+  /* Assumes that N != 0. */
+  asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+        : [ntz] "=r" (ntz)
+        : [low] "r" ((unsigned long)n)
+        : "cc");
+
+  return c->u_mode.ocb.L[ntz];
+}
+
+
+/* Two macros to be called prior and after the use of AESNI
+   instructions.  There should be no external function calls between
+   the use of these macros.  There purpose is to make sure that the
+   SSE regsiters are cleared and won't reveal any information about
+   the key or the data.  */
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define aesni_prepare_2_7_variable char win64tmp[16 * 2]
+# define aesni_prepare_8_15_variable char win64tmp8_15[16 * 8]
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_7()                                            \
+   do { asm volatile ("movdqu %%xmm6, %0\n\t"                           \
+                     "movdqu %%xmm7, %1\n\t"                           \
+                      : "=m" (*win64tmp), "=m" (*(win64tmp+16))         \
+                      :                                                 \
+                      : "memory");                                      \
+   } while (0)
+# define aesni_prepare_8_15()                                           \
+   do { asm volatile ("movdqu %%xmm8,  0*16(%0)\n\t"                    \
+                      "movdqu %%xmm9,  1*16(%0)\n\t"                    \
+                      "movdqu %%xmm10, 2*16(%0)\n\t"                    \
+                      "movdqu %%xmm11, 3*16(%0)\n\t"                    \
+                      "movdqu %%xmm12, 4*16(%0)\n\t"                    \
+                      "movdqu %%xmm13, 5*16(%0)\n\t"                    \
+                      "movdqu %%xmm14, 6*16(%0)\n\t"                    \
+                      "movdqu %%xmm15, 7*16(%0)\n\t"                    \
+                      :                                                 \
+                      : "r" (win64tmp8_15)                              \
+                      : "memory");                                      \
+   } while (0)
+# define aesni_cleanup()                                                \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
+                      "pxor %%xmm1, %%xmm1\n" :: );                     \
+   } while (0)
+# define aesni_cleanup_2_7()                                            \
+   do { asm volatile ("movdqu %0,   %%xmm6\n\t"                         \
+                     "movdqu %1,   %%xmm7\n\t"                         \
+                      "pxor %%xmm2, %%xmm2\n"                           \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      :                                                 \
+                      : "m" (*win64tmp), "m" (*(win64tmp+16))           \
+                      : "memory");                                      \
+   } while (0)
+# define aesni_cleanup_8_15()                                           \
+   do { asm volatile ("movdqu 0*16(%0), %%xmm8\n\t"                     \
+                      "movdqu 1*16(%0), %%xmm9\n\t"                     \
+                      "movdqu 2*16(%0), %%xmm10\n\t"                    \
+                      "movdqu 3*16(%0), %%xmm11\n\t"                    \
+                      "movdqu 4*16(%0), %%xmm12\n\t"                    \
+                      "movdqu 5*16(%0), %%xmm13\n\t"                    \
+                      "movdqu 6*16(%0), %%xmm14\n\t"                    \
+                      "movdqu 7*16(%0), %%xmm15\n\t"                    \
+                      :                                                 \
+                      : "r" (win64tmp8_15)                              \
+                      : "memory");                                      \
+   } while (0)
+#else
+# define aesni_prepare_2_7_variable
+# define aesni_prepare() do { } while (0)
+# define aesni_prepare_2_7() do { } while (0)
+# define aesni_cleanup()                                                \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                         \
+                      "pxor %%xmm1, %%xmm1\n" :: );                     \
+   } while (0)
+# define aesni_cleanup_2_7()                                            \
+   do { asm volatile ("pxor %%xmm7, %%xmm7\n\t"                         \
+                      "pxor %%xmm2, %%xmm2\n\t"                         \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      "pxor %%xmm6, %%xmm6\n":: );                      \
+   } while (0)
+# ifdef __x86_64__
+#  define aesni_prepare_8_15_variable
+#  define aesni_prepare_8_15() do { } while (0)
+#  define aesni_cleanup_8_15()                                          \
+   do { asm volatile ("pxor %%xmm8, %%xmm8\n"                           \
+                      "pxor %%xmm9, %%xmm9\n"                           \
+                      "pxor %%xmm10, %%xmm10\n"                         \
+                      "pxor %%xmm11, %%xmm11\n"                         \
+                      "pxor %%xmm12, %%xmm12\n"                         \
+                      "pxor %%xmm13, %%xmm13\n"                         \
+                      "pxor %%xmm14, %%xmm14\n"                         \
+                      "pxor %%xmm15, %%xmm15\n":: );                    \
+   } while (0)
+# endif
+#endif
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_do_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare();
+  aesni_prepare_2_7();
+
+  if (ctx->rounds < 12)
+    {
+      /* 128-bit key */
+#define AESKEYGENASSIST_xmm1_xmm2(imm8) \
+       ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t"
+#define AESKEY_EXPAND128 \
+       "pshufd $0xff, %%xmm2, %%xmm2\n\t" \
+       "movdqa %%xmm1, %%xmm3\n\t" \
+       "pslldq $4, %%xmm3\n\t" \
+       "pxor   %%xmm3, %%xmm1\n\t" \
+       "pslldq $4, %%xmm3\n\t" \
+       "pxor   %%xmm3, %%xmm1\n\t" \
+       "pslldq $4, %%xmm3\n\t" \
+       "pxor   %%xmm3, %%xmm2\n\t" \
+       "pxor   %%xmm2, %%xmm1\n\t"
+
+      asm volatile ("movdqu (%[key]), %%xmm1\n\t"     /* xmm1 := key   */
+                    "movdqa %%xmm1, (%[ksch])\n\t"     /* ksch[0] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x01)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x10(%[ksch])\n\t" /* ksch[1] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x02)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x04)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x08)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x10)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x50(%[ksch])\n\t" /* ksch[5] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x20)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x40)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x70(%[ksch])\n\t" /* ksch[7] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x80)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x1b)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x36)
+                    AESKEY_EXPAND128
+                    "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1  */
+                    :
+                    : [key] "r" (key), [ksch] "r" (ctx->keyschenc)
+                    : "cc", "memory" );
+#undef AESKEYGENASSIST_xmm1_xmm2
+#undef AESKEY_EXPAND128
+    }
+  else if (ctx->rounds == 12)
+    {
+      /* 192-bit key */
+#define AESKEYGENASSIST_xmm3_xmm2(imm8) \
+       ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t"
+#define AESKEY_EXPAND192 \
+       "pshufd $0x55, %%xmm2, %%xmm2\n\t" \
+       "movdqu %%xmm1, %%xmm4\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm1\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm1\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm1\n\t" \
+       "pxor %%xmm2, %%xmm1\n\t" \
+       "pshufd $0xff, %%xmm1, %%xmm2\n\t" \
+       "movdqu %%xmm3, %%xmm4\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm3\n\t" \
+       "pxor %%xmm2, %%xmm3\n\t"
+
+      asm volatile ("movdqu (%[key]), %%xmm1\n\t"     /* xmm1 := key[0..15]   
*/
+                    "movq 16(%[key]), %%xmm3\n\t"     /* xmm3 := key[16..23]  
*/
+                    "movdqa %%xmm1, (%[ksch])\n\t"    /* ksch[0] := xmm1  */
+                    "movdqa %%xmm3, %%xmm5\n\t"
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x01)
+                    AESKEY_EXPAND192
+                    "shufpd $0, %%xmm1, %%xmm5\n\t"
+                    "movdqa %%xmm5, 0x10(%[ksch])\n\t" /* ksch[1] := xmm5  */
+                    "movdqa %%xmm1, %%xmm6\n\t"
+                    "shufpd $1, %%xmm3, %%xmm6\n\t"
+                    "movdqa %%xmm6, 0x20(%[ksch])\n\t" /* ksch[2] := xmm6  */
+                    AESKEYGENASSIST_xmm3_xmm2(0x02)
+                    AESKEY_EXPAND192
+                    "movdqa %%xmm1, 0x30(%[ksch])\n\t" /* ksch[3] := xmm1  */
+                    "movdqa %%xmm3, %%xmm5\n\t"
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x04)
+                    AESKEY_EXPAND192
+                    "shufpd $0, %%xmm1, %%xmm5\n\t"
+                    "movdqa %%xmm5, 0x40(%[ksch])\n\t" /* ksch[4] := xmm5  */
+                    "movdqa %%xmm1, %%xmm6\n\t"
+                    "shufpd $1, %%xmm3, %%xmm6\n\t"
+                    "movdqa %%xmm6, 0x50(%[ksch])\n\t" /* ksch[5] := xmm6  */
+                    AESKEYGENASSIST_xmm3_xmm2(0x08)
+                    AESKEY_EXPAND192
+                    "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1  */
+                    "movdqa %%xmm3, %%xmm5\n\t"
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x10)
+                    AESKEY_EXPAND192
+                    "shufpd $0, %%xmm1, %%xmm5\n\t"
+                    "movdqa %%xmm5, 0x70(%[ksch])\n\t" /* ksch[7] := xmm5  */
+                    "movdqa %%xmm1, %%xmm6\n\t"
+                    "shufpd $1, %%xmm3, %%xmm6\n\t"
+                    "movdqa %%xmm6, 0x80(%[ksch])\n\t" /* ksch[8] := xmm6  */
+                    AESKEYGENASSIST_xmm3_xmm2(0x20)
+                    AESKEY_EXPAND192
+                    "movdqa %%xmm1, 0x90(%[ksch])\n\t" /* ksch[9] := xmm1  */
+                    "movdqa %%xmm3, %%xmm5\n\t"
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x40)
+                    AESKEY_EXPAND192
+                    "shufpd $0, %%xmm1, %%xmm5\n\t"
+                    "movdqa %%xmm5, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm5  */
+                    "movdqa %%xmm1, %%xmm6\n\t"
+                    "shufpd $1, %%xmm3, %%xmm6\n\t"
+                    "movdqa %%xmm6, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm6  */
+                    AESKEYGENASSIST_xmm3_xmm2(0x80)
+                    AESKEY_EXPAND192
+                    "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1  */
+                    :
+                    : [key] "r" (key), [ksch] "r" (ctx->keyschenc)
+                    : "cc", "memory" );
+#undef AESKEYGENASSIST_xmm3_xmm2
+#undef AESKEY_EXPAND192
+    }
+  else if (ctx->rounds > 12)
+    {
+      /* 256-bit key */
+#define AESKEYGENASSIST_xmm1_xmm2(imm8) \
+       ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd1, " #imm8 " \n\t"
+#define AESKEYGENASSIST_xmm3_xmm2(imm8) \
+       ".byte 0x66, 0x0f, 0x3a, 0xdf, 0xd3, " #imm8 " \n\t"
+#define AESKEY_EXPAND256_A \
+       "pshufd $0xff, %%xmm2, %%xmm2\n\t" \
+       "movdqa %%xmm1, %%xmm4\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm1\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm1\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm1\n\t" \
+       "pxor %%xmm2, %%xmm1\n\t"
+#define AESKEY_EXPAND256_B \
+       "pshufd $0xaa, %%xmm2, %%xmm2\n\t" \
+       "movdqa %%xmm3, %%xmm4\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm3\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm3\n\t" \
+       "pslldq $4, %%xmm4\n\t" \
+       "pxor %%xmm4, %%xmm3\n\t" \
+       "pxor %%xmm2, %%xmm3\n\t"
+
+      asm volatile ("movdqu (%[key]), %%xmm1\n\t"     /* xmm1 := key[0..15]   
*/
+                    "movdqu 16(%[key]), %%xmm3\n\t"   /* xmm3 := key[16..31]  
*/
+                    "movdqa %%xmm1, (%[ksch])\n\t"     /* ksch[0] := xmm1  */
+                    "movdqa %%xmm3, 0x10(%[ksch])\n\t" /* ksch[1] := xmm3  */
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x01)
+                    AESKEY_EXPAND256_A
+                    "movdqa %%xmm1, 0x20(%[ksch])\n\t" /* ksch[2] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x00)
+                    AESKEY_EXPAND256_B
+                    "movdqa %%xmm3, 0x30(%[ksch])\n\t" /* ksch[3] := xmm3  */
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x02)
+                    AESKEY_EXPAND256_A
+                    "movdqa %%xmm1, 0x40(%[ksch])\n\t" /* ksch[4] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x00)
+                    AESKEY_EXPAND256_B
+                    "movdqa %%xmm3, 0x50(%[ksch])\n\t" /* ksch[5] := xmm3  */
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x04)
+                    AESKEY_EXPAND256_A
+                    "movdqa %%xmm1, 0x60(%[ksch])\n\t" /* ksch[6] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x00)
+                    AESKEY_EXPAND256_B
+                    "movdqa %%xmm3, 0x70(%[ksch])\n\t" /* ksch[7] := xmm3  */
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x08)
+                    AESKEY_EXPAND256_A
+                    "movdqa %%xmm1, 0x80(%[ksch])\n\t" /* ksch[8] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x00)
+                    AESKEY_EXPAND256_B
+                    "movdqa %%xmm3, 0x90(%[ksch])\n\t" /* ksch[9] := xmm3  */
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x10)
+                    AESKEY_EXPAND256_A
+                    "movdqa %%xmm1, 0xa0(%[ksch])\n\t" /* ksch[10] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x00)
+                    AESKEY_EXPAND256_B
+                    "movdqa %%xmm3, 0xb0(%[ksch])\n\t" /* ksch[11] := xmm3  */
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x20)
+                    AESKEY_EXPAND256_A
+                    "movdqa %%xmm1, 0xc0(%[ksch])\n\t" /* ksch[12] := xmm1  */
+                    AESKEYGENASSIST_xmm1_xmm2(0x00)
+                    AESKEY_EXPAND256_B
+                    "movdqa %%xmm3, 0xd0(%[ksch])\n\t" /* ksch[13] := xmm3  */
+
+                    AESKEYGENASSIST_xmm3_xmm2(0x40)
+                    AESKEY_EXPAND256_A
+                    "movdqa %%xmm1, 0xe0(%[ksch])\n\t" /* ksch[14] := xmm1  */
+
+                    :
+                    : [key] "r" (key), [ksch] "r" (ctx->keyschenc)
+                    : "cc", "memory" );
+#undef AESKEYGENASSIST_xmm1_xmm2
+#undef AESKEYGENASSIST_xmm3_xmm2
+#undef AESKEY_EXPAND256_A
+#undef AESKEY_EXPAND256_B
+    }
+
+  aesni_cleanup();
+  aesni_cleanup_2_7();
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  /* The AES-NI decrypt instructions use the Equivalent Inverse
+     Cipher, thus we can't use the the standard decrypt key
+     preparation.  */
+  u128_t *ekey = (u128_t *)ctx->keyschenc;
+  u128_t *dkey = (u128_t *)ctx->keyschdec;
+  int rr;
+  int r;
+
+#define DO_AESNI_AESIMC() \
+  asm volatile ("movdqa %[ekey], %%xmm1\n\t" \
+                /*"aesimc %%xmm1, %%xmm1\n\t"*/ \
+                ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" \
+                "movdqa %%xmm1, %[dkey]" \
+                : [dkey] "=m" (dkey[r]) \
+                : [ekey] "m" (ekey[rr]) \
+                : "memory")
+
+  dkey[0] = ekey[ctx->rounds];
+  r=1;
+  rr=ctx->rounds-1;
+  DO_AESNI_AESIMC(); r++; rr--; /* round 1 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 2 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 3 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 4 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 5 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 6 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 7 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 8 */
+  DO_AESNI_AESIMC(); r++; rr--; /* round 9 */
+  if (ctx->rounds > 10)
+    {
+      DO_AESNI_AESIMC(); r++; rr--; /* round 10 */
+      DO_AESNI_AESIMC(); r++; rr--; /* round 11 */
+      if (ctx->rounds > 12)
+        {
+          DO_AESNI_AESIMC(); r++; rr--; /* round 12 */
+          DO_AESNI_AESIMC(); r++; rr--; /* round 13 */
+        }
+    }
+
+  dkey[r] = ekey[0];
+
+#undef DO_AESNI_AESIMC
+}
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  aesni_prepare();
+  do_aesni_prepare_decryption (ctx);
+  aesni_cleanup();
+}
+
+
+/* Encrypt one block using the Intel AES-NI instructions.  Block is input
+ * and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_enc (const RIJNDAEL_context *ctx)
+{
+#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+  asm volatile ("movdqa (%[key]), %%xmm1\n\t"    /* xmm1 := key[0] */
+                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
+                "cmpl $10, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xe0(%[key]), %%xmm1\n"
+
+                ".Lenclast%=:\n\t"
+                aesenclast_xmm1_xmm0
+                "\n"
+                :
+                : [key] "r" (ctx->keyschenc),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm1_xmm0
+}
+
+
+/* Decrypt one block using the Intel AES-NI instructions.  Block is input
+ * and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_dec (const RIJNDAEL_context *ctx)
+{
+#define aesdec_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t"
+#define aesdeclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t"
+  asm volatile ("movdqa (%[key]), %%xmm1\n\t"
+                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
+                "cmpl $10, %[rounds]\n\t"
+                "jz .Ldeclast%=\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "jz .Ldeclast%=\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
+                aesdec_xmm1_xmm0
+                "movdqa 0xe0(%[key]), %%xmm1\n"
+
+                ".Ldeclast%=:\n\t"
+                aesdeclast_xmm1_xmm0
+                "\n"
+                :
+                : [key] "r" (ctx->keyschdec),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
+#undef aesdec_xmm1_xmm0
+#undef aesdeclast_xmm1_xmm0
+}
+
+
+/* Encrypt four blocks using the Intel AES-NI instructions.  Blocks are input
+ * and output through SSE registers xmm1 to xmm4.  */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_enc_vec4 (const RIJNDAEL_context *ctx)
+{
+#define aesenc_xmm0_xmm1      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc8\n\t"
+#define aesenc_xmm0_xmm2      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd0\n\t"
+#define aesenc_xmm0_xmm3      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd8\n\t"
+#define aesenc_xmm0_xmm4      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe0\n\t"
+#define aesenclast_xmm0_xmm1  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc8\n\t"
+#define aesenclast_xmm0_xmm2  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd0\n\t"
+#define aesenclast_xmm0_xmm3  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd8\n\t"
+#define aesenclast_xmm0_xmm4  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe0\n\t"
+  asm volatile ("movdqa (%[key]), %%xmm0\n\t"
+                "pxor   %%xmm0, %%xmm1\n\t"     /* xmm1 ^= key[0] */
+                "pxor   %%xmm0, %%xmm2\n\t"     /* xmm2 ^= key[0] */
+                "pxor   %%xmm0, %%xmm3\n\t"     /* xmm3 ^= key[0] */
+                "pxor   %%xmm0, %%xmm4\n\t"     /* xmm4 ^= key[0] */
+                "movdqa 0x10(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x20(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x30(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x40(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x50(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x60(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x70(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x80(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0x90(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                "cmpl $10, %[rounds]\n\t"
+                "jz .Ldeclast%=\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "jz .Ldeclast%=\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                aesenc_xmm0_xmm1
+                aesenc_xmm0_xmm2
+                aesenc_xmm0_xmm3
+                aesenc_xmm0_xmm4
+                "movdqa 0xe0(%[key]), %%xmm0\n"
+
+                ".Ldeclast%=:\n\t"
+                aesenclast_xmm0_xmm1
+                aesenclast_xmm0_xmm2
+                aesenclast_xmm0_xmm3
+                aesenclast_xmm0_xmm4
+                : /* no output */
+                : [key] "r" (ctx->keyschenc),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
+#undef aesenc_xmm0_xmm1
+#undef aesenc_xmm0_xmm2
+#undef aesenc_xmm0_xmm3
+#undef aesenc_xmm0_xmm4
+#undef aesenclast_xmm0_xmm1
+#undef aesenclast_xmm0_xmm2
+#undef aesenclast_xmm0_xmm3
+#undef aesenclast_xmm0_xmm4
+}
+
+
+/* Decrypt four blocks using the Intel AES-NI instructions.  Blocks are input
+ * and output through SSE registers xmm1 to xmm4.  */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_dec_vec4 (const RIJNDAEL_context *ctx)
+{
+#define aesdec_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc8\n\t"
+#define aesdec_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd0\n\t"
+#define aesdec_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xde, 0xd8\n\t"
+#define aesdec_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xde, 0xe0\n\t"
+#define aesdeclast_xmm0_xmm1 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc8\n\t"
+#define aesdeclast_xmm0_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd0\n\t"
+#define aesdeclast_xmm0_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xd8\n\t"
+#define aesdeclast_xmm0_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xe0\n\t"
+  asm volatile ("movdqa (%[key]), %%xmm0\n\t"
+                "pxor   %%xmm0, %%xmm1\n\t"     /* xmm1 ^= key[0] */
+                "pxor   %%xmm0, %%xmm2\n\t"     /* xmm2 ^= key[0] */
+                "pxor   %%xmm0, %%xmm3\n\t"     /* xmm3 ^= key[0] */
+                "pxor   %%xmm0, %%xmm4\n\t"     /* xmm4 ^= key[0] */
+                "movdqa 0x10(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x20(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x30(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x40(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x50(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x60(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x70(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x80(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0x90(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                "cmpl $10, %[rounds]\n\t"
+                "jz .Ldeclast%=\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "jz .Ldeclast%=\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                aesdec_xmm0_xmm1
+                aesdec_xmm0_xmm2
+                aesdec_xmm0_xmm3
+                aesdec_xmm0_xmm4
+                "movdqa 0xe0(%[key]), %%xmm0\n"
+
+                ".Ldeclast%=:\n\t"
+                aesdeclast_xmm0_xmm1
+                aesdeclast_xmm0_xmm2
+                aesdeclast_xmm0_xmm3
+                aesdeclast_xmm0_xmm4
+                : /* no output */
+                : [key] "r" (ctx->keyschdec),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
+#undef aesdec_xmm0_xmm1
+#undef aesdec_xmm0_xmm2
+#undef aesdec_xmm0_xmm3
+#undef aesdec_xmm0_xmm4
+#undef aesdeclast_xmm0_xmm1
+#undef aesdeclast_xmm0_xmm2
+#undef aesdeclast_xmm0_xmm3
+#undef aesdeclast_xmm0_xmm4
+}
+
+
+#ifdef __x86_64__
+
+/* Encrypt eight blocks using the Intel AES-NI instructions.  Blocks are input
+ * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11.  */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_enc_vec8 (const RIJNDAEL_context *ctx)
+{
+  asm volatile ("movdqa 0x10(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x20(%[key]), %%xmm0\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x30(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x40(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x50(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x60(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x70(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x80(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0x90(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                "jb .Ldeclast%=\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                "je .Ldeclast%=\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                "aesenc %%xmm0, %%xmm1\n\t"
+                "aesenc %%xmm0, %%xmm2\n\t"
+                "aesenc %%xmm0, %%xmm3\n\t"
+                "aesenc %%xmm0, %%xmm4\n\t"
+                "aesenc %%xmm0, %%xmm8\n\t"
+                "aesenc %%xmm0, %%xmm9\n\t"
+                "aesenc %%xmm0, %%xmm10\n\t"
+                "aesenc %%xmm0, %%xmm11\n\t"
+                "movdqa 0xe0(%[key]), %%xmm0\n"
+
+                ".Ldeclast%=:\n\t"
+                : /* no output */
+                : [key] "r" (ctx->keyschenc),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
+}
+
+
+/* Decrypt eight blocks using the Intel AES-NI instructions.  Blocks are input
+ * and output through SSE registers xmm1 to xmm4 and xmm8 to xmm11.  */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_dec_vec8 (const RIJNDAEL_context *ctx)
+{
+  asm volatile ("movdqa 0x10(%[key]), %%xmm0\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x20(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x30(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x40(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x50(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x60(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x70(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x80(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0x90(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                "jb .Ldeclast%=\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                "je .Ldeclast%=\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                "aesdec %%xmm0, %%xmm1\n\t"
+                "aesdec %%xmm0, %%xmm2\n\t"
+                "aesdec %%xmm0, %%xmm3\n\t"
+                "aesdec %%xmm0, %%xmm4\n\t"
+                "aesdec %%xmm0, %%xmm8\n\t"
+                "aesdec %%xmm0, %%xmm9\n\t"
+                "aesdec %%xmm0, %%xmm10\n\t"
+                "aesdec %%xmm0, %%xmm11\n\t"
+                "movdqa 0xe0(%[key]), %%xmm0\n"
+
+                ".Ldeclast%=:\n\t"
+                : /* no output */
+                : [key] "r" (ctx->keyschdec),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
+}
+
+#endif /* __x86_64__ */
+
+
+/* Perform a CTR encryption round using the counter CTR and the input
+   block A.  Write the result to the output block B and update CTR.
+   CTR needs to be a 16 byte aligned little-endian value.  */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_ctr (const RIJNDAEL_context *ctx,
+              unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+
+  asm volatile ("movdqa %%xmm5, %%xmm0\n\t"     /* xmm0 := CTR (xmm5)  */
+                "pcmpeqd %%xmm1, %%xmm1\n\t"
+                "psrldq $8, %%xmm1\n\t"         /* xmm1 = -1 */
+
+                "pshufb %%xmm6, %%xmm5\n\t"
+                "psubq  %%xmm1, %%xmm5\n\t"     /* xmm5++ (big endian) */
+
+                /* detect if 64-bit carry handling is needed */
+                "cmpl   $0xffffffff, 8(%[ctr])\n\t"
+                "jne    .Lno_carry%=\n\t"
+                "cmpl   $0xffffffff, 12(%[ctr])\n\t"
+                "jne    .Lno_carry%=\n\t"
+
+                "pslldq $8, %%xmm1\n\t"         /* move lower 64-bit to high */
+                "psubq   %%xmm1, %%xmm5\n\t"    /* add carry to upper 64bits */
+
+                ".Lno_carry%=:\n\t"
+
+                "pshufb %%xmm6, %%xmm5\n\t"
+                "movdqa %%xmm5, (%[ctr])\n\t"   /* Update CTR (mem).       */
+
+                "pxor (%[key]), %%xmm0\n\t"     /* xmm1 ^= key[0]    */
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
+                "cmpl $10, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                "movdqa 0xe0(%[key]), %%xmm1\n"
+
+                ".Lenclast%=:\n\t"
+                aesenclast_xmm1_xmm0
+                "movdqu %[src], %%xmm1\n\t"      /* xmm1 := input   */
+                "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR ^= input  */
+                "movdqu %%xmm0, %[dst]"          /* Store EncCTR.    */
+
+                : [dst] "=m" (*b)
+                : [src] "m" (*a),
+                  [ctr] "r" (ctr),
+                  [key] "r" (ctx->keyschenc),
+                  [rounds] "g" (ctx->rounds)
+                : "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm1_xmm0
+}
+
+
+/* Four blocks at a time variant of do_aesni_ctr.  */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
+                unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+  static const byte bige_addb_const[4][16] __attribute__ ((aligned (16))) =
+    {
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 }
+    };
+  const void *bige_addb = bige_addb_const;
+#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenc_xmm1_xmm2      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
+#define aesenc_xmm1_xmm3      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
+#define aesenc_xmm1_xmm4      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t"
+#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+#define aesenclast_xmm1_xmm2  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t"
+#define aesenclast_xmm1_xmm3  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t"
+#define aesenclast_xmm1_xmm4  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t"
+
+  /* Register usage:
+      [key] keyschedule
+      xmm0  CTR-0
+      xmm1  temp / round key
+      xmm2  CTR-1
+      xmm3  CTR-2
+      xmm4  CTR-3
+      xmm5  copy of *ctr
+      xmm6  endian swapping mask
+   */
+
+  asm volatile (/* detect if 8-bit carry handling is needed */
+                "addb   $4, 15(%[ctr])\n\t"
+                "jc     .Ladd32bit%=\n\t"
+
+                "movdqa %%xmm5, %%xmm0\n\t"     /* xmm0 := CTR (xmm5) */
+                "movdqa 0*16(%[addb]), %%xmm2\n\t"  /* xmm2 := be(1) */
+                "movdqa 1*16(%[addb]), %%xmm3\n\t"  /* xmm3 := be(2) */
+                "movdqa 2*16(%[addb]), %%xmm4\n\t"  /* xmm4 := be(3) */
+                "movdqa 3*16(%[addb]), %%xmm5\n\t"  /* xmm5 := be(4) */
+                "paddb  %%xmm0, %%xmm2\n\t"     /* xmm2 := be(1) + CTR (xmm0) 
*/
+                "paddb  %%xmm0, %%xmm3\n\t"     /* xmm3 := be(2) + CTR (xmm0) 
*/
+                "paddb  %%xmm0, %%xmm4\n\t"     /* xmm4 := be(3) + CTR (xmm0) 
*/
+                "paddb  %%xmm0, %%xmm5\n\t"     /* xmm5 := be(4) + CTR (xmm0) 
*/
+                "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0] */
+                "jmp    .Ldone_ctr%=\n\t"
+
+                ".Ladd32bit%=:\n\t"
+                "movdqa %%xmm5, (%[ctr])\n\t"   /* Restore CTR.  */
+                "movdqa %%xmm5, %%xmm0\n\t"     /* xmm0, xmm2 := CTR (xmm5) */
+                "movdqa %%xmm0, %%xmm2\n\t"
+                "pcmpeqd %%xmm1, %%xmm1\n\t"
+                "psrldq $8, %%xmm1\n\t"         /* xmm1 = -1 */
+
+                "pshufb %%xmm6, %%xmm2\n\t"     /* xmm2 := le(xmm2) */
+                "psubq  %%xmm1, %%xmm2\n\t"     /* xmm2++           */
+                "movdqa %%xmm2, %%xmm3\n\t"     /* xmm3 := xmm2     */
+                "psubq  %%xmm1, %%xmm3\n\t"     /* xmm3++           */
+                "movdqa %%xmm3, %%xmm4\n\t"     /* xmm4 := xmm3     */
+                "psubq  %%xmm1, %%xmm4\n\t"     /* xmm4++           */
+                "movdqa %%xmm4, %%xmm5\n\t"     /* xmm5 := xmm4     */
+                "psubq  %%xmm1, %%xmm5\n\t"     /* xmm5++           */
+
+                /* detect if 64-bit carry handling is needed */
+                "cmpl   $0xffffffff, 8(%[ctr])\n\t"
+                "jne    .Lno_carry%=\n\t"
+                "movl   12(%[ctr]), %%esi\n\t"
+                "bswapl %%esi\n\t"
+                "cmpl   $0xfffffffc, %%esi\n\t"
+                "jb     .Lno_carry%=\n\t"       /* no carry */
+
+                "pslldq $8, %%xmm1\n\t"         /* move lower 64-bit to high */
+                "je     .Lcarry_xmm5%=\n\t"     /* esi == 0xfffffffc */
+                "cmpl   $0xfffffffe, %%esi\n\t"
+                "jb     .Lcarry_xmm4%=\n\t"     /* esi == 0xfffffffd */
+                "je     .Lcarry_xmm3%=\n\t"     /* esi == 0xfffffffe */
+                /* esi == 0xffffffff */
+
+                "psubq   %%xmm1, %%xmm2\n\t"
+                ".Lcarry_xmm3%=:\n\t"
+                "psubq   %%xmm1, %%xmm3\n\t"
+                ".Lcarry_xmm4%=:\n\t"
+                "psubq   %%xmm1, %%xmm4\n\t"
+                ".Lcarry_xmm5%=:\n\t"
+                "psubq   %%xmm1, %%xmm5\n\t"
+
+                ".Lno_carry%=:\n\t"
+                "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0]    */
+
+                "pshufb %%xmm6, %%xmm2\n\t"     /* xmm2 := be(xmm2) */
+                "pshufb %%xmm6, %%xmm3\n\t"     /* xmm3 := be(xmm3) */
+                "pshufb %%xmm6, %%xmm4\n\t"     /* xmm4 := be(xmm4) */
+                "pshufb %%xmm6, %%xmm5\n\t"     /* xmm5 := be(xmm5) */
+
+                "movdqa %%xmm5, (%[ctr])\n\t"   /* Update CTR (mem).  */
+
+                ".Ldone_ctr%=:\n\t"
+                :
+                : [ctr] "r" (ctr),
+                  [key] "r" (ctx->keyschenc),
+                  [addb] "r" (bige_addb)
+                : "%esi", "cc", "memory");
+
+  asm volatile ("pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm4\n\t"     /* xmm4 ^= key[0]    */
+                "movdqa 0x10(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x20(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
+                "cmpl $10, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "jz .Lenclast%=\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
+                aesenc_xmm1_xmm0
+                aesenc_xmm1_xmm2
+                aesenc_xmm1_xmm3
+                aesenc_xmm1_xmm4
+                "movdqa 0xe0(%[key]), %%xmm1\n"
+
+                ".Lenclast%=:\n\t"
+                aesenclast_xmm1_xmm0
+                aesenclast_xmm1_xmm2
+                aesenclast_xmm1_xmm3
+                aesenclast_xmm1_xmm4
+                :
+                : [key] "r" (ctx->keyschenc),
+                  [rounds] "r" (ctx->rounds)
+                : "cc", "memory");
+
+  asm volatile ("movdqu (%[src]), %%xmm1\n\t"    /* Get block 1.      */
+                "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR-1 ^= input */
+                "movdqu %%xmm0, (%[dst])\n\t"    /* Store block 1     */
+
+                "movdqu 16(%[src]), %%xmm1\n\t"  /* Get block 2.      */
+                "pxor %%xmm1, %%xmm2\n\t"        /* EncCTR-2 ^= input */
+                "movdqu %%xmm2, 16(%[dst])\n\t"  /* Store block 2.    */
+
+                "movdqu 32(%[src]), %%xmm1\n\t"  /* Get block 3.      */
+                "pxor %%xmm1, %%xmm3\n\t"        /* EncCTR-3 ^= input */
+                "movdqu %%xmm3, 32(%[dst])\n\t"  /* Store block 3.    */
+
+                "movdqu 48(%[src]), %%xmm1\n\t"  /* Get block 4.      */
+                "pxor %%xmm1, %%xmm4\n\t"        /* EncCTR-4 ^= input */
+                "movdqu %%xmm4, 48(%[dst])"      /* Store block 4.   */
+                :
+                : [src] "r" (a),
+                  [dst] "r" (b)
+                : "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenc_xmm1_xmm2
+#undef aesenc_xmm1_xmm3
+#undef aesenc_xmm1_xmm4
+#undef aesenclast_xmm1_xmm0
+#undef aesenclast_xmm1_xmm2
+#undef aesenclast_xmm1_xmm3
+#undef aesenclast_xmm1_xmm4
+}
+
+
+#ifdef __x86_64__
+
+/* Eight blocks at a time variant of do_aesni_ctr.  */
+static ASM_FUNC_ATTR_INLINE void
+do_aesni_ctr_8 (const RIJNDAEL_context *ctx,
+                unsigned char *ctr, unsigned char *b, const unsigned char *a)
+{
+  static const byte bige_addb_const[8][16] __attribute__ ((aligned (16))) =
+    {
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 },
+      { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }
+    };
+  const void *bige_addb = bige_addb_const;
+
+  /* Register usage:
+      [key] keyschedule
+      xmm0  CTR-0
+      xmm1  temp / round key
+      xmm2  CTR-1
+      xmm3  CTR-2
+      xmm4  CTR-3
+      xmm5  copy of *ctr
+      xmm6  endian swapping mask
+      xmm8  CTR-4
+      xmm9  CTR-5
+      xmm10 CTR-6
+      xmm11 CTR-7
+      xmm12 temp
+      xmm13 temp
+      xmm14 temp
+      xmm15 temp
+   */
+
+  asm volatile (/* detect if 8-bit carry handling is needed */
+                "addb   $8, 15(%[ctr])\n\t"
+                "jc     .Ladd32bit%=\n\t"
+
+                "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0] */
+                "movdqa 16(%[key]), %%xmm7\n\t" /* xmm7 := key[1] */
+
+                "movdqa %%xmm5, %%xmm0\n\t"     /* xmm0 := CTR (xmm5) */
+                "movdqa %%xmm5, %%xmm2\n\t"     /* xmm2 := CTR (xmm5) */
+                "movdqa %%xmm5, %%xmm3\n\t"     /* xmm3 := CTR (xmm5) */
+                "movdqa %%xmm5, %%xmm4\n\t"     /* xmm4 := CTR (xmm5) */
+                "paddb  0*16(%[addb]), %%xmm2\n\t" /* xmm2 := be(1) + CTR */
+                "paddb  1*16(%[addb]), %%xmm3\n\t" /* xmm3 := be(2) + CTR */
+                "paddb  2*16(%[addb]), %%xmm4\n\t" /* xmm4 := be(3) + CTR */
+                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm4\n\t"     /* xmm4 ^= key[0]    */
+                "aesenc %%xmm7, %%xmm0\n\t"
+                "aesenc %%xmm7, %%xmm2\n\t"
+                "aesenc %%xmm7, %%xmm3\n\t"
+                "aesenc %%xmm7, %%xmm4\n\t"
+                "movdqa %%xmm5, %%xmm8\n\t"     /* xmm8 := CTR (xmm5) */
+                "movdqa %%xmm5, %%xmm9\n\t"     /* xmm9 := CTR (xmm5) */
+                "movdqa %%xmm5, %%xmm10\n\t"    /* xmm10 := CTR (xmm5) */
+                "movdqa %%xmm5, %%xmm11\n\t"    /* xmm11 := CTR (xmm5) */
+                "paddb  3*16(%[addb]), %%xmm8\n\t"  /* xmm8 := be(4) + CTR */
+                "paddb  4*16(%[addb]), %%xmm9\n\t"  /* xmm9 := be(5) + CTR */
+                "paddb  5*16(%[addb]), %%xmm10\n\t" /* xmm10 := be(6) + CTR */
+                "paddb  6*16(%[addb]), %%xmm11\n\t" /* xmm11 := be(7) + CTR */
+                "pxor   %%xmm1, %%xmm8\n\t"     /* xmm8 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm9\n\t"     /* xmm9 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm10\n\t"    /* xmm10 ^= key[0]   */
+                "pxor   %%xmm1, %%xmm11\n\t"    /* xmm11 ^= key[0]   */
+                "aesenc %%xmm7, %%xmm8\n\t"
+                "aesenc %%xmm7, %%xmm9\n\t"
+                "aesenc %%xmm7, %%xmm10\n\t"
+                "aesenc %%xmm7, %%xmm11\n\t"
+
+                "paddb  7*16(%[addb]), %%xmm5\n\t" /* xmm5 := be(8) + CTR */
+
+                "jmp    .Ldone_ctr%=\n\t"
+
+                ".Ladd32bit%=:\n\t"
+                "movdqa %%xmm5, (%[ctr])\n\t"   /* Restore CTR. */
+                "movdqa %%xmm5, %%xmm0\n\t"     /* xmm0, xmm2 := CTR (xmm5) */
+                "movdqa %%xmm0, %%xmm2\n\t"
+                "pcmpeqd %%xmm1, %%xmm1\n\t"
+                "psrldq $8, %%xmm1\n\t"         /* xmm1 = -1 */
+
+                "pshufb %%xmm6, %%xmm2\n\t"     /* xmm2 := le(xmm2) */
+                "psubq  %%xmm1, %%xmm2\n\t"     /* xmm2++           */
+                "movdqa %%xmm2, %%xmm3\n\t"     /* xmm3 := xmm2     */
+                "psubq  %%xmm1, %%xmm3\n\t"     /* xmm3++           */
+                "movdqa %%xmm3, %%xmm4\n\t"     /* xmm4 := xmm3     */
+                "psubq  %%xmm1, %%xmm4\n\t"     /* xmm4++           */
+                "movdqa %%xmm4, %%xmm8\n\t"     /* xmm8 := xmm4     */
+                "psubq  %%xmm1, %%xmm8\n\t"     /* xmm8++           */
+                "movdqa %%xmm8, %%xmm9\n\t"     /* xmm9 := xmm8     */
+                "psubq  %%xmm1, %%xmm9\n\t"     /* xmm9++           */
+                "movdqa %%xmm9, %%xmm10\n\t"    /* xmm10 := xmm9    */
+                "psubq  %%xmm1, %%xmm10\n\t"    /* xmm10++          */
+                "movdqa %%xmm10, %%xmm11\n\t"   /* xmm11 := xmm10   */
+                "psubq  %%xmm1, %%xmm11\n\t"    /* xmm11++          */
+                "movdqa %%xmm11, %%xmm5\n\t"    /* xmm5 := xmm11    */
+                "psubq  %%xmm1, %%xmm5\n\t"     /* xmm5++           */
+
+                /* detect if 64-bit carry handling is needed */
+                "cmpl   $0xffffffff, 8(%[ctr])\n\t"
+                "jne    .Lno_carry%=\n\t"
+                "movl   12(%[ctr]), %%esi\n\t"
+                "bswapl %%esi\n\t"
+                "cmpl   $0xfffffff8, %%esi\n\t"
+                "jb     .Lno_carry%=\n\t"       /* no carry */
+
+                "pslldq $8, %%xmm1\n\t"         /* move lower 64-bit to high */
+                "je     .Lcarry_xmm5%=\n\t"     /* esi == 0xfffffff8 */
+                "cmpl   $0xfffffffa, %%esi\n\t"
+                "jb     .Lcarry_xmm11%=\n\t"     /* esi == 0xfffffff9 */
+                "je     .Lcarry_xmm10%=\n\t"     /* esi == 0xfffffffa */
+                "cmpl   $0xfffffffc, %%esi\n\t"
+                "jb     .Lcarry_xmm9%=\n\t"     /* esi == 0xfffffffb */
+                "je     .Lcarry_xmm8%=\n\t"     /* esi == 0xfffffffc */
+                "cmpl   $0xfffffffe, %%esi\n\t"
+                "jb     .Lcarry_xmm4%=\n\t"     /* esi == 0xfffffffd */
+                "je     .Lcarry_xmm3%=\n\t"     /* esi == 0xfffffffe */
+                /* esi == 0xffffffff */
+
+                "psubq   %%xmm1, %%xmm2\n\t"
+                ".Lcarry_xmm3%=:\n\t"
+                "psubq   %%xmm1, %%xmm3\n\t"
+                ".Lcarry_xmm4%=:\n\t"
+                "psubq   %%xmm1, %%xmm4\n\t"
+                ".Lcarry_xmm8%=:\n\t"
+                "psubq   %%xmm1, %%xmm8\n\t"
+                ".Lcarry_xmm9%=:\n\t"
+                "psubq   %%xmm1, %%xmm9\n\t"
+                ".Lcarry_xmm10%=:\n\t"
+                "psubq   %%xmm1, %%xmm10\n\t"
+                ".Lcarry_xmm11%=:\n\t"
+                "psubq   %%xmm1, %%xmm11\n\t"
+                ".Lcarry_xmm5%=:\n\t"
+                "psubq   %%xmm1, %%xmm5\n\t"
+
+                ".Lno_carry%=:\n\t"
+                "movdqa (%[key]), %%xmm1\n\t"   /* xmm1 := key[0] */
+                "movdqa 16(%[key]), %%xmm7\n\t" /* xmm7 := key[1] */
+
+                "pshufb %%xmm6, %%xmm2\n\t"     /* xmm2 := be(xmm2) */
+                "pshufb %%xmm6, %%xmm3\n\t"     /* xmm3 := be(xmm3) */
+                "pshufb %%xmm6, %%xmm4\n\t"     /* xmm4 := be(xmm4) */
+                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm4\n\t"     /* xmm4 ^= key[0]    */
+                "aesenc %%xmm7, %%xmm0\n\t"
+                "aesenc %%xmm7, %%xmm2\n\t"
+                "aesenc %%xmm7, %%xmm3\n\t"
+                "aesenc %%xmm7, %%xmm4\n\t"
+                "pshufb %%xmm6, %%xmm8\n\t"     /* xmm8 := be(xmm8) */
+                "pshufb %%xmm6, %%xmm9\n\t"     /* xmm9 := be(xmm9) */
+                "pshufb %%xmm6, %%xmm10\n\t"    /* xmm10 := be(xmm10) */
+                "pshufb %%xmm6, %%xmm11\n\t"    /* xmm11 := be(xmm11) */
+                "pxor   %%xmm1, %%xmm8\n\t"     /* xmm8 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm9\n\t"     /* xmm9 ^= key[0]    */
+                "pxor   %%xmm1, %%xmm10\n\t"    /* xmm10 ^= key[0]   */
+                "pxor   %%xmm1, %%xmm11\n\t"    /* xmm11 ^= key[0]   */
+                "aesenc %%xmm7, %%xmm8\n\t"
+                "aesenc %%xmm7, %%xmm9\n\t"
+                "aesenc %%xmm7, %%xmm10\n\t"
+                "aesenc %%xmm7, %%xmm11\n\t"
+
+                "pshufb %%xmm6, %%xmm5\n\t"     /* xmm5 := be(xmm5) */
+                "movdqa %%xmm5, (%[ctr])\n\t"   /* Update CTR (mem).  */
+
+                ".align 16\n\t"
+                ".Ldone_ctr%=:\n\t"
+                :
+                : [ctr] "r" (ctr),
+                  [key] "r" (ctx->keyschenc),
+                  [addb] "r" (bige_addb)
+                : "%esi", "cc", "memory");
+
+  asm volatile ("movdqa 0x20(%[key]), %%xmm1\n\t"
+                "movdqu 0*16(%[src]), %%xmm12\n\t" /* Get block 1.      */
+                "movdqu 1*16(%[src]), %%xmm13\n\t" /* Get block 2.      */
+                "movdqu 2*16(%[src]), %%xmm14\n\t" /* Get block 3.      */
+                "movdqu 3*16(%[src]), %%xmm15\n\t" /* Get block 4.      */
+                "movdqu 4*16(%[src]), %%xmm7\n\t"  /* Get block 5.      */
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "cmpl $12, %[rounds]\n\t"
+                "movdqa 0x30(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0x40(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0x50(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0x60(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0x70(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0x80(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0x90(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0xa0(%[key]), %%xmm1\n\t"
+                "jb .Lenclast%=\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0xb0(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0xc0(%[key]), %%xmm1\n\t"
+                "je .Lenclast%=\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0xd0(%[key]), %%xmm1\n\t"
+                "aesenc %%xmm1, %%xmm0\n\t"
+                "aesenc %%xmm1, %%xmm2\n\t"
+                "aesenc %%xmm1, %%xmm3\n\t"
+                "aesenc %%xmm1, %%xmm4\n\t"
+                "aesenc %%xmm1, %%xmm8\n\t"
+                "aesenc %%xmm1, %%xmm9\n\t"
+                "aesenc %%xmm1, %%xmm10\n\t"
+                "aesenc %%xmm1, %%xmm11\n\t"
+                "movdqa 0xe0(%[key]), %%xmm1\n"
+
+                ".Lenclast%=:\n\t"
+                :
+                : [key] "r" (ctx->keyschenc),
+                  [rounds] "r" (ctx->rounds),
+                  [src] "r" (a)
+                : "cc", "memory");
+
+  asm volatile ("pxor %%xmm1, %%xmm12\n\t"         /* block1 ^= lastkey */
+                "pxor %%xmm1, %%xmm13\n\t"         /* block2 ^= lastkey */
+                "pxor %%xmm1, %%xmm14\n\t"         /* block3 ^= lastkey */
+                "pxor %%xmm1, %%xmm15\n\t"         /* block4 ^= lastkey */
+                "aesenclast %%xmm12, %%xmm0\n\t"
+                "aesenclast %%xmm13, %%xmm2\n\t"
+                "aesenclast %%xmm14, %%xmm3\n\t"
+                "aesenclast %%xmm15, %%xmm4\n\t"
+                "movdqu 5*16(%[src]), %%xmm12\n\t" /* Get block 6.      */
+                "movdqu 6*16(%[src]), %%xmm13\n\t" /* Get block 7.      */
+                "movdqu 7*16(%[src]), %%xmm14\n\t" /* Get block 8.      */
+                "movdqu %%xmm0, 0*16(%[dst])\n\t"  /* Store block 1.    */
+                "movdqu %%xmm2, 1*16(%[dst])\n\t"  /* Store block 2.    */
+                "movdqu %%xmm3, 2*16(%[dst])\n\t"  /* Store block 3.    */
+                "movdqu %%xmm4, 3*16(%[dst])\n\t"  /* Store block 4.    */
+                "pxor %%xmm1, %%xmm7\n\t"          /* block5 ^= lastkey */
+                "pxor %%xmm1, %%xmm12\n\t"         /* block6 ^= lastkey */
+                "pxor %%xmm1, %%xmm13\n\t"         /* block7 ^= lastkey */
+                "pxor %%xmm1, %%xmm14\n\t"         /* block8 ^= lastkey */
+                "aesenclast %%xmm7, %%xmm8\n\t"
+                "aesenclast %%xmm12, %%xmm9\n\t"
+                "aesenclast %%xmm13, %%xmm10\n\t"
+                "aesenclast %%xmm14, %%xmm11\n\t"
+                "movdqu %%xmm8, 4*16(%[dst])\n\t"  /* Store block 8.    */
+                "movdqu %%xmm9, 5*16(%[dst])\n\t"  /* Store block 9.    */
+                "movdqu %%xmm10, 6*16(%[dst])\n\t" /* Store block 10.   */
+                "movdqu %%xmm11, 7*16(%[dst])\n\t" /* Store block 11.   */
+                :
+                : [src] "r" (a),
+                  [dst] "r" (b)
+                : "memory");
+}
+
+#endif /* __x86_64__ */
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+                         const unsigned char *src)
+{
+  aesni_prepare ();
+  asm volatile ("movdqu %[src], %%xmm0\n\t"
+                :
+                : [src] "m" (*src)
+                : "memory" );
+  do_aesni_enc (ctx);
+  asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+                : [dst] "=m" (*dst)
+                :
+                : "memory" );
+  aesni_cleanup ();
+  return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  unsigned int rounds = ctx->rounds;
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7();
+
+  asm volatile ("movdqu %[iv], %%xmm0\n\t"
+               : /* No output */
+               : [iv] "m" (*iv)
+               : "memory" );
+
+  asm volatile ("movdqa %[key0], %%xmm2\n\t"     /* xmm2 = key[0] */
+               "movdqa %[keylast], %%xmm4\n\t"  /* xmm4 = key[last] */
+               "movdqa %%xmm0, %%xmm3\n"
+               "pxor %%xmm2, %%xmm4\n\t"        /* xmm4 = key[0] ^ key[last] */
+               "pxor %%xmm2, %%xmm0\n\t"        /* xmm0 = IV ^ key[0] */
+               : /* No output */
+               : [key0] "m" (ctx->keyschenc[0][0][0]),
+                 [keylast] "m" (ctx->keyschenc[rounds][0][0])
+               : "memory" );
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqu %[inbuf], %%xmm5\n\t"
+                   "movdqa %%xmm2, %%xmm3\n\t"
+                   "pxor %%xmm4, %%xmm5\n\t"  /* xmm5 = input ^ key[last] ^ 
key[0] */
+                   :
+                   : [inbuf] "m" (*inbuf)
+                   : "memory" );
+
+#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
+#define aesenclast_xmm5_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc5\n\t"
+      asm volatile ("movdqa 0x10(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x20(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x30(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x40(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x50(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x60(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x70(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x80(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x90(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "cmpl $10, %[rounds]\n\t"
+                   "jz .Lenclast%=\n\t"
+                   "movdqa 0xa0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0xb0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "cmpl $12, %[rounds]\n\t"
+                   "jz .Lenclast%=\n\t"
+                   "movdqa 0xc0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0xd0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+
+                   ".Lenclast%=:\n\t"
+                   aesenclast_xmm5_xmm0
+                   :
+                   : [key] "r" (ctx->keyschenc),
+                     [rounds] "r" (rounds)
+                   : "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm1_xmm0
+#undef aesenclast_xmm5_xmm0
+
+      asm volatile ("pxor %%xmm0, %%xmm3\n\t"
+                   "movdqu %%xmm3, %[outbuf]\n\t"
+                   : [outbuf] "=m" (*outbuf)
+                   : [inbuf] "m" (*inbuf)
+                   : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm3, %[iv]\n\t"
+               : [iv] "=m" (*iv)
+               :
+               : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks, int cbc_mac)
+{
+  unsigned int rounds = ctx->rounds;
+  aesni_prepare_2_7_variable;
+
+  if (nblocks == 0) /* CMAC may call with nblocks 0. */
+    return;
+
+  aesni_prepare ();
+  aesni_prepare_2_7();
+
+  asm volatile ("movdqu %[iv], %%xmm0\n\t"
+               : /* No output */
+               : [iv] "m" (*iv)
+               : "memory" );
+
+  asm volatile ("movdqa %[key0], %%xmm2\n\t"     /* xmm2 = key[0] */
+               "movdqa %[keylast], %%xmm3\n\t"  /* xmm3 = key[last] */
+               "pxor %%xmm2, %%xmm0\n\t"        /* xmm0 = IV ^ key[0] */
+               "pxor %%xmm3, %%xmm2\n\t"        /* xmm2 = key[0] ^ key[last] */
+               : /* No output */
+               : [key0] "m" (ctx->keyschenc[0][0][0]),
+                 [keylast] "m" (ctx->keyschenc[rounds][0][0])
+               : "memory" );
+
+  asm volatile ("movdqu %[inbuf], %%xmm4\n\t"
+               "pxor %%xmm4, %%xmm0\n\t"  /* xmm0 = IV ^ key[0] ^ input */
+               :
+               : [inbuf] "m" (*inbuf)
+               : "memory" );
+  inbuf += BLOCKSIZE;
+
+  for ( ;nblocks; )
+    {
+      if (--nblocks)
+       {
+         asm volatile ("movdqu %[inbuf], %%xmm4\n\t"
+                       /* xmm4 = IV ^ key[0] ^ key[last] ^ input: */
+                       "pxor %%xmm2, %%xmm4\n\t"
+                       :
+                       : [inbuf] "m" (*inbuf)
+                       : "memory" );
+         inbuf += BLOCKSIZE;
+       }
+
+#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
+#define aesenclast_xmm4_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc4\n\t"
+#define aesenclast_xmm3_xmm5  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xeb\n\t"
+      asm volatile ("movdqa 0x10(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x20(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x30(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x40(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x50(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x60(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x70(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x80(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0x90(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "cmpl $10, %[rounds]\n\t"
+                   "jz .Lenclast%=\n\t"
+                   "movdqa 0xa0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0xb0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "cmpl $12, %[rounds]\n\t"
+                   "jz .Lenclast%=\n\t"
+                   "movdqa 0xc0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+                   "movdqa 0xd0(%[key]), %%xmm1\n\t"
+                   aesenc_xmm1_xmm0
+
+                   ".Lenclast%=:\n\t"
+                   "movdqa %%xmm0, %%xmm5\n"
+                   aesenclast_xmm4_xmm0  /* xmm0 = IV ^ key[0] */
+                   aesenclast_xmm3_xmm5  /* xmm5 = IV */
+                   :
+                   : [key] "r" (ctx->keyschenc),
+                     [rounds] "r" (rounds)
+                   : "cc", "memory");
+#undef aesenc_xmm1_xmm0
+#undef aesenclast_xmm4_xmm0
+#undef aesenclast_xmm3_xmm5
+
+      asm volatile ("movdqu %%xmm5, %[outbuf]\n\t"
+                   : [outbuf] "=m" (*outbuf)
+                   :
+                   : "memory" );
+
+      outbuf += -(!cbc_mac) & BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+               : [iv] "=m" (*iv)
+               :
+               : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7();
+
+  asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
+                "movdqa %[ctr], %%xmm5\n\t"  /* Preload CTR */
+                : /* No output */
+                : [mask] "m" (*be_mask),
+                  [ctr] "m" (*ctr)
+                : "memory");
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      aesni_prepare_8_15_variable;
+
+      aesni_prepare_8_15();
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         do_aesni_ctr_8 (ctx, ctr, outbuf, inbuf);
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4 ; nblocks -= 4 )
+    {
+      do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf);
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+  for ( ;nblocks; nblocks-- )
+    {
+      do_aesni_ctr (ctx, ctr, outbuf, inbuf);
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_ctr32le_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+                            unsigned char *outbuf, const unsigned char *inbuf,
+                            size_t nblocks)
+{
+  static const byte le_addd_const[8][16] __attribute__ ((aligned (16))) =
+    {
+      { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+      { 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+    };
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7();
+
+  asm volatile ("movdqa %[ctr], %%xmm5\n\t"  /* Preload CTR */
+               : /* No output */
+               : [ctr] "m" (*ctr)
+               : "memory");
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      aesni_prepare_8_15_variable;
+
+      aesni_prepare_8_15();
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         asm volatile
+           ("movdqa (%[key]), %%xmm0\n\t"
+
+            "movdqa %%xmm5,         %%xmm1\n\t" /* load input blocks */
+            "movdqa %%xmm5,         %%xmm2\n\t"
+            "movdqa %%xmm5,         %%xmm3\n\t"
+            "movdqa %%xmm5,         %%xmm4\n\t"
+            "movdqa %%xmm5,         %%xmm8\n\t"
+            "movdqa %%xmm5,         %%xmm9\n\t"
+            "movdqa %%xmm5,         %%xmm10\n\t"
+            "movdqa %%xmm5,         %%xmm11\n\t"
+
+            "paddd 0*16(%[addd]),   %%xmm2\n\t"
+            "paddd 1*16(%[addd]),   %%xmm3\n\t"
+            "paddd 2*16(%[addd]),   %%xmm4\n\t"
+            "paddd 3*16(%[addd]),   %%xmm8\n\t"
+            "paddd 4*16(%[addd]),   %%xmm9\n\t"
+            "paddd 5*16(%[addd]),   %%xmm10\n\t"
+            "paddd 6*16(%[addd]),   %%xmm11\n\t"
+
+            "pxor   %%xmm0, %%xmm1\n\t"     /* xmm1 ^= key[0] */
+            "pxor   %%xmm0, %%xmm2\n\t"     /* xmm2 ^= key[0] */
+            "pxor   %%xmm0, %%xmm3\n\t"     /* xmm3 ^= key[0] */
+            "pxor   %%xmm0, %%xmm4\n\t"     /* xmm4 ^= key[0] */
+            "pxor   %%xmm0, %%xmm8\n\t"     /* xmm8 ^= key[0] */
+            "pxor   %%xmm0, %%xmm9\n\t"     /* xmm9 ^= key[0] */
+            "pxor   %%xmm0, %%xmm10\n\t"    /* xmm10 ^= key[0] */
+            "pxor   %%xmm0, %%xmm11\n\t"    /* xmm11 ^= key[0] */
+
+            "movdqu 0*16(%[inbuf]), %%xmm6\n\t"
+            "movdqu 1*16(%[inbuf]), %%xmm7\n\t"
+            "movdqu 2*16(%[inbuf]), %%xmm12\n\t"
+            "movdqu 3*16(%[inbuf]), %%xmm13\n\t"
+            "movdqu 4*16(%[inbuf]), %%xmm14\n\t"
+            "movdqu 5*16(%[inbuf]), %%xmm15\n\t"
+
+            "paddd 7*16(%[addd]),   %%xmm5\n\t"
+            : /* No output */
+            : [addd] "r" (&le_addd_const[0][0]),
+              [inbuf] "r" (inbuf),
+              [key] "r" (ctx->keyschenc)
+            : "memory");
+
+         do_aesni_enc_vec8 (ctx);
+
+         asm volatile
+           ("pxor %%xmm0, %%xmm6\n\t"
+            "pxor %%xmm0, %%xmm7\n\t"
+            "pxor %%xmm0, %%xmm12\n\t"
+            "pxor %%xmm0, %%xmm13\n\t"
+            "pxor %%xmm0, %%xmm14\n\t"
+            "pxor %%xmm0, %%xmm15\n\t"
+            "aesenclast %%xmm6, %%xmm1\n\t"
+            "aesenclast %%xmm7, %%xmm2\n\t"
+            "movdqu 6*16(%[inbuf]), %%xmm6\n\t"
+            "movdqu 7*16(%[inbuf]), %%xmm7\n\t"
+            "aesenclast %%xmm12, %%xmm3\n\t"
+            "aesenclast %%xmm13, %%xmm4\n\t"
+            "pxor %%xmm0, %%xmm6\n\t"
+            "pxor %%xmm0, %%xmm7\n\t"
+            "aesenclast %%xmm14, %%xmm8\n\t"
+            "aesenclast %%xmm15, %%xmm9\n\t"
+            "aesenclast %%xmm6, %%xmm10\n\t"
+            "aesenclast %%xmm7, %%xmm11\n\t"
+            "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+            "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+            "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+            "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+            "movdqu %%xmm8, 4*16(%[outbuf])\n\t"
+            "movdqu %%xmm9, 5*16(%[outbuf])\n\t"
+            "movdqu %%xmm10, 6*16(%[outbuf])\n\t"
+            "movdqu %%xmm11, 7*16(%[outbuf])\n\t"
+            : /* No output */
+            : [inbuf] "r" (inbuf),
+              [outbuf] "r" (outbuf)
+            : "memory");
+
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4 ; nblocks -= 4 )
+    {
+      asm volatile
+       ("movdqa %%xmm5,         %%xmm1\n\t" /* load input blocks */
+        "movdqa %%xmm5,         %%xmm2\n\t"
+        "movdqa %%xmm5,         %%xmm3\n\t"
+        "movdqa %%xmm5,         %%xmm4\n\t"
+        "paddd 0*16(%[addd]),   %%xmm2\n\t"
+        "paddd 1*16(%[addd]),   %%xmm3\n\t"
+        "paddd 2*16(%[addd]),   %%xmm4\n\t"
+        "paddd 3*16(%[addd]),   %%xmm5\n\t"
+        "movdqu 0*16(%[inbuf]), %%xmm6\n\t"
+        "movdqu 1*16(%[inbuf]), %%xmm7\n\t"
+        : /* No output */
+        : [addd] "r" (&le_addd_const[0][0]),
+          [inbuf] "r" (inbuf)
+        : "memory");
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile
+       ("pxor %%xmm6, %%xmm1\n\t"
+        "pxor %%xmm7, %%xmm2\n\t"
+        "movdqu 2*16(%[inbuf]), %%xmm6\n\t"
+        "movdqu 3*16(%[inbuf]), %%xmm7\n\t"
+        "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+        "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+        "pxor %%xmm6, %%xmm3\n\t"
+        "pxor %%xmm7, %%xmm4\n\t"
+        "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+        "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+        : /* No output */
+        : [inbuf] "r" (inbuf),
+          [outbuf] "r" (outbuf)
+        : "memory");
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqa %%xmm5, %%xmm0\n\t"
+                   "paddd %[add_one], %%xmm5\n\t"
+                   "movdqu %[inbuf], %%xmm6\n\t"
+                   :
+                   : [add_one] "m" (*le_addd_const[0]),
+                     [inbuf] "m" (*inbuf)
+                   : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor %%xmm0, %%xmm6\n\t"
+                   "movdqu %%xmm6, %[outbuf]\n\t"
+                   : [outbuf] "=m" (*outbuf)
+                   :
+                   : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqa %%xmm5, %[ctr]\n\t"
+                : [ctr] "=m" (*ctr)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+                         const unsigned char *src)
+{
+  aesni_prepare ();
+  asm volatile ("movdqu %[src], %%xmm0\n\t"
+                :
+                : [src] "m" (*src)
+                : "memory" );
+  do_aesni_dec (ctx);
+  asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+                : [dst] "=m" (*dst)
+                :
+                : "memory" );
+  aesni_cleanup ();
+  return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7();
+
+  asm volatile ("movdqu %[iv], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*iv)
+                : "memory" );
+
+  /* CFB decryption can be parallelized */
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      aesni_prepare_8_15_variable;
+
+      aesni_prepare_8_15();
+
+      for ( ;nblocks >= 8; nblocks -= 8)
+       {
+         asm volatile
+           ("movdqa (%[key]), %%xmm0\n\t"
+
+            "movdqu %%xmm6,         %%xmm1\n\t" /* load input blocks */
+            "movdqu 0*16(%[inbuf]), %%xmm2\n\t"
+            "movdqu 1*16(%[inbuf]), %%xmm3\n\t"
+            "movdqu 2*16(%[inbuf]), %%xmm4\n\t"
+            "movdqu 3*16(%[inbuf]), %%xmm8\n\t"
+            "movdqu 4*16(%[inbuf]), %%xmm9\n\t"
+            "movdqu 5*16(%[inbuf]), %%xmm10\n\t"
+            "movdqu 6*16(%[inbuf]), %%xmm11\n\t"
+
+            "movdqu 7*16(%[inbuf]), %%xmm6\n\t" /* update IV */
+
+            "movdqa %%xmm2, %%xmm12\n\t"
+            "movdqa %%xmm3, %%xmm13\n\t"
+            "movdqa %%xmm4, %%xmm14\n\t"
+            "movdqa %%xmm8, %%xmm15\n\t"
+
+             "pxor   %%xmm0, %%xmm1\n\t"     /* xmm1 ^= key[0] */
+             "pxor   %%xmm0, %%xmm2\n\t"     /* xmm2 ^= key[0] */
+             "pxor   %%xmm0, %%xmm3\n\t"     /* xmm3 ^= key[0] */
+             "pxor   %%xmm0, %%xmm4\n\t"     /* xmm4 ^= key[0] */
+             "pxor   %%xmm0, %%xmm8\n\t"     /* xmm8 ^= key[0] */
+             "pxor   %%xmm0, %%xmm9\n\t"     /* xmm9 ^= key[0] */
+             "pxor   %%xmm0, %%xmm10\n\t"     /* xmm10 ^= key[0] */
+             "pxor   %%xmm0, %%xmm11\n\t"     /* xmm11 ^= key[0] */
+            : /* No output */
+            : [inbuf] "r" (inbuf),
+              [key] "r" (ctx->keyschenc)
+            : "memory");
+
+         do_aesni_enc_vec8 (ctx);
+
+         asm volatile
+           (
+            "pxor %%xmm0, %%xmm12\n\t"
+            "pxor %%xmm0, %%xmm13\n\t"
+            "pxor %%xmm0, %%xmm14\n\t"
+            "pxor %%xmm0, %%xmm15\n\t"
+            "aesenclast %%xmm12, %%xmm1\n\t"
+            "aesenclast %%xmm13, %%xmm2\n\t"
+            "aesenclast %%xmm14, %%xmm3\n\t"
+            "aesenclast %%xmm15, %%xmm4\n\t"
+
+            "movdqu 4*16(%[inbuf]), %%xmm12\n\t"
+            "movdqu 5*16(%[inbuf]), %%xmm13\n\t"
+            "movdqu 6*16(%[inbuf]), %%xmm14\n\t"
+            "movdqu 7*16(%[inbuf]), %%xmm15\n\t"
+            "pxor %%xmm0, %%xmm12\n\t"
+            "pxor %%xmm0, %%xmm13\n\t"
+            "pxor %%xmm0, %%xmm14\n\t"
+            "pxor %%xmm0, %%xmm15\n\t"
+
+            "aesenclast %%xmm12, %%xmm8\n\t"
+            "aesenclast %%xmm13, %%xmm9\n\t"
+            "aesenclast %%xmm14, %%xmm10\n\t"
+            "aesenclast %%xmm15, %%xmm11\n\t"
+
+            "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+            "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+            "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+            "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+
+            "movdqu %%xmm8, 4*16(%[outbuf])\n\t"
+            "movdqu %%xmm9, 5*16(%[outbuf])\n\t"
+            "movdqu %%xmm10, 6*16(%[outbuf])\n\t"
+            "movdqu %%xmm11, 7*16(%[outbuf])\n\t"
+
+            : /* No output */
+            : [inbuf] "r" (inbuf),
+              [outbuf] "r" (outbuf)
+            : "memory");
+
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4; nblocks -= 4)
+    {
+      asm volatile
+        ("movdqu %%xmm6,         %%xmm1\n\t" /* load input blocks */
+         "movdqu 0*16(%[inbuf]), %%xmm2\n\t"
+         "movdqu 1*16(%[inbuf]), %%xmm3\n\t"
+         "movdqu 2*16(%[inbuf]), %%xmm4\n\t"
+
+         "movdqu 3*16(%[inbuf]), %%xmm6\n\t" /* update IV */
+         : /* No output */
+         : [inbuf] "r" (inbuf)
+         : "memory");
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile
+        ("movdqu 0*16(%[inbuf]), %%xmm5\n\t"
+         "pxor %%xmm5, %%xmm1\n\t"
+         "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+
+         "movdqu 1*16(%[inbuf]), %%xmm5\n\t"
+         "pxor %%xmm5, %%xmm2\n\t"
+         "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+
+         "movdqu 2*16(%[inbuf]), %%xmm5\n\t"
+         "pxor %%xmm5, %%xmm3\n\t"
+         "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+
+         "movdqu 3*16(%[inbuf]), %%xmm5\n\t"
+         "pxor %%xmm5, %%xmm4\n\t"
+         "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+
+         : /* No output */
+         : [inbuf] "r" (inbuf),
+           [outbuf] "r" (outbuf)
+         : "memory");
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm6, %%xmm0\n\t" ::: "cc");
+
+  for ( ;nblocks; nblocks-- )
+    {
+      do_aesni_enc (ctx);
+
+      asm volatile ("movdqa %%xmm0, %%xmm6\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor %%xmm0, %%xmm6\n\t"
+                    "movdqu %%xmm6, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    : [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm0, %[iv]\n\t"
+                : [iv] "=m" (*iv)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7();
+
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  asm volatile
+    ("movdqu %[iv], %%xmm5\n\t"        /* use xmm5 as fast IV storage */
+     : /* No output */
+     : [iv] "m" (*iv)
+     : "memory");
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      aesni_prepare_8_15_variable;
+
+      aesni_prepare_8_15();
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         asm volatile
+           ("movdqa (%[key]), %%xmm0\n\t"
+
+            "movdqu 0*16(%[inbuf]), %%xmm1\n\t"        /* load input blocks */
+            "movdqu 1*16(%[inbuf]), %%xmm2\n\t"
+            "movdqu 2*16(%[inbuf]), %%xmm3\n\t"
+            "movdqu 3*16(%[inbuf]), %%xmm4\n\t"
+            "movdqu 4*16(%[inbuf]), %%xmm8\n\t"
+            "movdqu 5*16(%[inbuf]), %%xmm9\n\t"
+            "movdqu 6*16(%[inbuf]), %%xmm10\n\t"
+            "movdqu 7*16(%[inbuf]), %%xmm11\n\t"
+
+            "movdqa %%xmm1, %%xmm12\n\t"
+            "movdqa %%xmm2, %%xmm13\n\t"
+            "movdqa %%xmm3, %%xmm14\n\t"
+            "movdqa %%xmm4, %%xmm15\n\t"
+
+            "pxor   %%xmm0, %%xmm1\n\t"     /* xmm1 ^= key[0] */
+            "pxor   %%xmm0, %%xmm2\n\t"     /* xmm2 ^= key[0] */
+            "pxor   %%xmm0, %%xmm3\n\t"     /* xmm3 ^= key[0] */
+            "pxor   %%xmm0, %%xmm4\n\t"     /* xmm4 ^= key[0] */
+            "pxor   %%xmm0, %%xmm8\n\t"     /* xmm8 ^= key[0] */
+            "pxor   %%xmm0, %%xmm9\n\t"     /* xmm9 ^= key[0] */
+            "pxor   %%xmm0, %%xmm10\n\t"    /* xmm10 ^= key[0] */
+            "pxor   %%xmm0, %%xmm11\n\t"    /* xmm11 ^= key[0] */
+
+            : /* No output */
+            : [inbuf] "r" (inbuf),
+              [key] "r" (ctx->keyschdec)
+            : "memory");
+
+         do_aesni_dec_vec8 (ctx);
+
+         asm volatile
+           (
+            "pxor %%xmm0, %%xmm5\n\t"                  /* xor IV with key */
+            "pxor %%xmm0, %%xmm12\n\t"                 /* xor IV with key */
+            "pxor %%xmm0, %%xmm13\n\t"                 /* xor IV with key */
+            "pxor %%xmm0, %%xmm14\n\t"                 /* xor IV with key */
+            "pxor %%xmm0, %%xmm15\n\t"                 /* xor IV with key */
+
+            "aesdeclast %%xmm5, %%xmm1\n\t"
+            "aesdeclast %%xmm12, %%xmm2\n\t"
+            "aesdeclast %%xmm13, %%xmm3\n\t"
+            "aesdeclast %%xmm14, %%xmm4\n\t"
+
+            "movdqu 4*16(%[inbuf]), %%xmm12\n\t"
+            "movdqu 5*16(%[inbuf]), %%xmm13\n\t"
+            "movdqu 6*16(%[inbuf]), %%xmm14\n\t"
+            "movdqu 7*16(%[inbuf]), %%xmm5\n\t"
+            "pxor %%xmm0, %%xmm12\n\t"                 /* xor IV with key */
+            "pxor %%xmm0, %%xmm13\n\t"                 /* xor IV with key */
+            "pxor %%xmm0, %%xmm14\n\t"                 /* xor IV with key */
+
+            "aesdeclast %%xmm15, %%xmm8\n\t"
+            "aesdeclast %%xmm12, %%xmm9\n\t"
+            "aesdeclast %%xmm13, %%xmm10\n\t"
+            "aesdeclast %%xmm14, %%xmm11\n\t"
+
+            "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+            "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+            "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+            "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+            "movdqu %%xmm8, 4*16(%[outbuf])\n\t"
+            "movdqu %%xmm9, 5*16(%[outbuf])\n\t"
+            "movdqu %%xmm10, 6*16(%[outbuf])\n\t"
+            "movdqu %%xmm11, 7*16(%[outbuf])\n\t"
+
+            : /* No output */
+            : [inbuf] "r" (inbuf),
+              [outbuf] "r" (outbuf)
+            : "memory");
+
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4 ; nblocks -= 4 )
+    {
+      asm volatile
+        ("movdqu 0*16(%[inbuf]), %%xmm1\n\t"   /* load input blocks */
+         "movdqu 1*16(%[inbuf]), %%xmm2\n\t"
+         "movdqu 2*16(%[inbuf]), %%xmm3\n\t"
+         "movdqu 3*16(%[inbuf]), %%xmm4\n\t"
+         : /* No output */
+         : [inbuf] "r" (inbuf)
+         : "memory");
+
+      do_aesni_dec_vec4 (ctx);
+
+      asm volatile
+        ("pxor %%xmm5, %%xmm1\n\t"             /* xor IV with output */
+         "movdqu 0*16(%[inbuf]), %%xmm5\n\t"   /* load new IV */
+         "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+
+         "pxor %%xmm5, %%xmm2\n\t"             /* xor IV with output */
+         "movdqu 1*16(%[inbuf]), %%xmm5\n\t"   /* load new IV */
+         "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+
+         "pxor %%xmm5, %%xmm3\n\t"             /* xor IV with output */
+         "movdqu 2*16(%[inbuf]), %%xmm5\n\t"   /* load new IV */
+         "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+
+         "pxor %%xmm5, %%xmm4\n\t"             /* xor IV with output */
+         "movdqu 3*16(%[inbuf]), %%xmm5\n\t"   /* load new IV */
+         "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+
+         : /* No output */
+         : [inbuf] "r" (inbuf),
+           [outbuf] "r" (outbuf)
+         : "memory");
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile
+        ("movdqu %[inbuf], %%xmm0\n\t"
+         "movdqa %%xmm0, %%xmm2\n\t"    /* use xmm2 as savebuf */
+         : /* No output */
+         : [inbuf] "m" (*inbuf)
+         : "memory");
+
+      /* uses only xmm0 and xmm1 */
+      do_aesni_dec (ctx);
+
+      asm volatile
+        ("pxor %%xmm5, %%xmm0\n\t"     /* xor IV with output */
+         "movdqu %%xmm0, %[outbuf]\n\t"
+         "movdqu %%xmm2, %%xmm5\n\t"   /* store savebuf as new IV */
+         : [outbuf] "=m" (*outbuf)
+         :
+         : "memory");
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile
+    ("movdqu %%xmm5, %[iv]\n\t"        /* store IV */
+     : /* No output */
+     : [iv] "m" (*iv)
+     : "memory");
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+aesni_ocb_checksum (gcry_cipher_hd_t c, const unsigned char *plaintext,
+                   size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+
+  /* Calculate checksum */
+  asm volatile ("movdqu %[checksum], %%xmm6\n\t"
+                "pxor %%xmm1, %%xmm1\n\t"
+                "pxor %%xmm2, %%xmm2\n\t"
+                "pxor %%xmm3, %%xmm3\n\t"
+                :
+                :[checksum] "m" (*c->u_ctr.ctr)
+                : "memory" );
+
+  if (0) {}
+#if defined(HAVE_GCC_INLINE_ASM_AVX2)
+  else if (nblocks >= 16 && ctx->use_avx2)
+    {
+      /* Use wider 256-bit registers for fast xoring of plaintext. */
+      asm volatile ("vzeroupper\n\t"
+                   "vpxor %%xmm0, %%xmm0, %%xmm0\n\t"
+                   "vpxor %%xmm4, %%xmm4, %%xmm4\n\t"
+                   "vpxor %%xmm5, %%xmm5, %%xmm5\n\t"
+                   "vpxor %%xmm7, %%xmm7, %%xmm7\n\t"
+                    :
+                    :
+                    : "memory");
+
+      for (;nblocks >= 16; nblocks -= 16)
+       {
+         asm volatile ("vpxor %[ptr0], %%ymm6, %%ymm6\n\t"
+                       "vpxor %[ptr1], %%ymm1, %%ymm1\n\t"
+                       "vpxor %[ptr2], %%ymm2, %%ymm2\n\t"
+                       "vpxor %[ptr3], %%ymm3, %%ymm3\n\t"
+                       :
+                       : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
+                         [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
+                         [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
+                         [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+                       : "memory" );
+         asm volatile ("vpxor %[ptr4], %%ymm0, %%ymm0\n\t"
+                       "vpxor %[ptr5], %%ymm4, %%ymm4\n\t"
+                       "vpxor %[ptr6], %%ymm5, %%ymm5\n\t"
+                       "vpxor %[ptr7], %%ymm7, %%ymm7\n\t"
+                       :
+                       : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+                         [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
+                         [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
+                         [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
+                       : "memory" );
+         plaintext += BLOCKSIZE * 16;
+       }
+
+      asm volatile ("vpxor %%ymm0, %%ymm6, %%ymm6\n\t"
+                   "vpxor %%ymm4, %%ymm1, %%ymm1\n\t"
+                   "vpxor %%ymm5, %%ymm2, %%ymm2\n\t"
+                   "vpxor %%ymm7, %%ymm3, %%ymm3\n\t"
+                   "vextracti128 $1, %%ymm6, %%xmm0\n\t"
+                   "vextracti128 $1, %%ymm1, %%xmm4\n\t"
+                   "vextracti128 $1, %%ymm2, %%xmm5\n\t"
+                   "vextracti128 $1, %%ymm3, %%xmm7\n\t"
+                   "vpxor %%xmm0, %%xmm6, %%xmm6\n\t"
+                   "vpxor %%xmm4, %%xmm1, %%xmm1\n\t"
+                   "vpxor %%xmm5, %%xmm2, %%xmm2\n\t"
+                   "vpxor %%xmm7, %%xmm3, %%xmm3\n\t"
+                   "vzeroupper\n\t"
+                   :
+                   :
+                   : "memory" );
+    }
+#endif
+#if defined(HAVE_GCC_INLINE_ASM_AVX)
+  else if (nblocks >= 16 && ctx->use_avx)
+    {
+      /* Same as AVX2, except using 256-bit floating point instructions. */
+      asm volatile ("vzeroupper\n\t"
+                   "vxorpd %%xmm0, %%xmm0, %%xmm0\n\t"
+                   "vxorpd %%xmm4, %%xmm4, %%xmm4\n\t"
+                   "vxorpd %%xmm5, %%xmm5, %%xmm5\n\t"
+                   "vxorpd %%xmm7, %%xmm7, %%xmm7\n\t"
+                    :
+                    :
+                    : "memory");
+
+      for (;nblocks >= 16; nblocks -= 16)
+       {
+         asm volatile ("vxorpd %[ptr0], %%ymm6, %%ymm6\n\t"
+                       "vxorpd %[ptr1], %%ymm1, %%ymm1\n\t"
+                       "vxorpd %[ptr2], %%ymm2, %%ymm2\n\t"
+                       "vxorpd %[ptr3], %%ymm3, %%ymm3\n\t"
+                       :
+                       : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE * 2)),
+                         [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE * 2)),
+                         [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE * 2)),
+                         [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE * 2))
+                       : "memory" );
+         asm volatile ("vxorpd %[ptr4], %%ymm0, %%ymm0\n\t"
+                       "vxorpd %[ptr5], %%ymm4, %%ymm4\n\t"
+                       "vxorpd %[ptr6], %%ymm5, %%ymm5\n\t"
+                       "vxorpd %[ptr7], %%ymm7, %%ymm7\n\t"
+                       :
+                       : [ptr4] "m" (*(plaintext + 4 * BLOCKSIZE * 2)),
+                         [ptr5] "m" (*(plaintext + 5 * BLOCKSIZE * 2)),
+                         [ptr6] "m" (*(plaintext + 6 * BLOCKSIZE * 2)),
+                         [ptr7] "m" (*(plaintext + 7 * BLOCKSIZE * 2))
+                       : "memory" );
+         plaintext += BLOCKSIZE * 16;
+       }
+
+      asm volatile ("vxorpd %%ymm0, %%ymm6, %%ymm6\n\t"
+                   "vxorpd %%ymm4, %%ymm1, %%ymm1\n\t"
+                   "vxorpd %%ymm5, %%ymm2, %%ymm2\n\t"
+                   "vxorpd %%ymm7, %%ymm3, %%ymm3\n\t"
+                   "vextractf128 $1, %%ymm6, %%xmm0\n\t"
+                   "vextractf128 $1, %%ymm1, %%xmm4\n\t"
+                   "vextractf128 $1, %%ymm2, %%xmm5\n\t"
+                   "vextractf128 $1, %%ymm3, %%xmm7\n\t"
+                   "vxorpd %%xmm0, %%xmm6, %%xmm6\n\t"
+                   "vxorpd %%xmm4, %%xmm1, %%xmm1\n\t"
+                   "vxorpd %%xmm5, %%xmm2, %%xmm2\n\t"
+                   "vxorpd %%xmm7, %%xmm3, %%xmm3\n\t"
+                   "vzeroupper\n\t"
+                   :
+                   :
+                   : "memory" );
+    }
+#endif
+
+  for (;nblocks >= 4; nblocks -= 4)
+    {
+      asm volatile ("movdqu %[ptr0], %%xmm0\n\t"
+                   "movdqu %[ptr1], %%xmm4\n\t"
+                   "movdqu %[ptr2], %%xmm5\n\t"
+                   "movdqu %[ptr3], %%xmm7\n\t"
+                   "pxor %%xmm0, %%xmm6\n\t"
+                   "pxor %%xmm4, %%xmm1\n\t"
+                   "pxor %%xmm5, %%xmm2\n\t"
+                   "pxor %%xmm7, %%xmm3\n\t"
+                   :
+                   : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE)),
+                     [ptr1] "m" (*(plaintext + 1 * BLOCKSIZE)),
+                     [ptr2] "m" (*(plaintext + 2 * BLOCKSIZE)),
+                     [ptr3] "m" (*(plaintext + 3 * BLOCKSIZE))
+                   : "memory" );
+      plaintext += BLOCKSIZE * 4;
+    }
+
+  for (;nblocks >= 1; nblocks -= 1)
+    {
+      asm volatile ("movdqu %[ptr0], %%xmm0\n\t"
+                   "pxor %%xmm0, %%xmm6\n\t"
+                   :
+                   : [ptr0] "m" (*(plaintext + 0 * BLOCKSIZE))
+                   : "memory" );
+      plaintext += BLOCKSIZE;
+    }
+
+  asm volatile ("pxor %%xmm1, %%xmm6\n\t"
+               "pxor %%xmm2, %%xmm6\n\t"
+               "pxor %%xmm3, %%xmm6\n\t"
+               "movdqu %%xmm6, %[checksum]\n\t"
+               : [checksum] "=m" (*c->u_ctr.ctr)
+               :
+               : "memory" );
+}
+
+
+static unsigned int ASM_FUNC_ATTR_NOINLINE
+aesni_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+               const void *inbuf_arg, size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  u64 n = c->u_mode.ocb.data_nblocks;
+  const unsigned char *l;
+  byte tmpbuf_store[3 * 16 + 15];
+  byte *tmpbuf;
+  aesni_prepare_2_7_variable;
+
+  asm volatile ("" : "=r" (tmpbuf) : "0" (tmpbuf_store) : "memory");
+  tmpbuf = tmpbuf + (-(uintptr_t)tmpbuf & 15);
+
+  aesni_prepare ();
+  aesni_prepare_2_7 ();
+
+  /* Preload Offset */
+  asm volatile ("movdqu %[iv], %%xmm5\n\t"
+               "movdqu %[ctr], %%xmm7\n\t"
+               : /* No output */
+               : [iv] "m" (*c->u_iv.iv),
+                 [ctr] "m" (*c->u_ctr.ctr)
+               : "memory" );
+
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm0,   %%xmm7\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      unsigned char last_xor_first_key_store[16 + 15];
+      unsigned char *lxf_key;
+      aesni_prepare_8_15_variable;
+
+      asm volatile (""
+                    : "=r" (lxf_key)
+                   : "0" (last_xor_first_key_store)
+                   : "memory");
+      lxf_key = lxf_key + (-(uintptr_t)lxf_key & 15);
+
+      aesni_prepare_8_15();
+
+      asm volatile ("movdqu %[l0], %%xmm6\n\t"
+                   "movdqa %[last_key], %%xmm0\n\t"
+                   "pxor %[first_key], %%xmm5\n\t"
+                   "pxor %[first_key], %%xmm0\n\t"
+                   "movdqa %%xmm0, %[lxfkey]\n\t"
+                   : [lxfkey] "=m" (*lxf_key)
+                   : [l0] "m" (*c->u_mode.ocb.L[0]),
+                     [last_key] "m" (ctx->keyschenc[ctx->rounds][0][0]),
+                     [first_key] "m" (ctx->keyschenc[0][0][0])
+                   : "memory" );
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         n += 4;
+         l = aes_ocb_get_l(c, n);
+
+         asm volatile ("movdqu %[l0l1],   %%xmm10\n\t"
+                       "movdqu %[l1],     %%xmm11\n\t"
+                       "movdqu %[l3],     %%xmm15\n\t"
+                       :
+                       : [l0l1] "m" (*c->u_mode.ocb.L0L1),
+                         [l1] "m" (*c->u_mode.ocb.L[1]),
+                         [l3] "m" (*l)
+                       : "memory" );
+
+         n += 4;
+         l = aes_ocb_get_l(c, n);
+
+          /* Checksum_i = Checksum_{i-1} xor P_i  */
+         /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+         /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i)  */
+         asm volatile ("movdqu %[inbuf0], %%xmm1\n\t"
+                       "movdqu %[inbuf1], %%xmm2\n\t"
+                       "movdqu %[inbuf2], %%xmm3\n\t"
+                       :
+                       : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)),
+                         [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)),
+                         [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+                       : "memory" );
+         asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+                       "movdqu %[inbuf4], %%xmm8\n\t"
+                       "movdqu %[inbuf5], %%xmm9\n\t"
+                       :
+                       : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)),
+                         [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)),
+                         [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
+                       : "memory" );
+         asm volatile ("movdqa %[lxfkey], %%xmm0\n\t"
+                       "movdqa %%xmm6,    %%xmm12\n\t"
+                       "pxor   %%xmm5,    %%xmm12\n\t"
+                       "pxor   %%xmm1,    %%xmm7\n\t"
+                       "pxor   %%xmm12,   %%xmm1\n\t"
+                       "pxor   %%xmm0,    %%xmm12\n\t"
+
+                       "movdqa %%xmm10,   %%xmm13\n\t"
+                       "pxor   %%xmm5,    %%xmm13\n\t"
+                       "pxor   %%xmm2,    %%xmm7\n\t"
+                       "pxor   %%xmm13,   %%xmm2\n\t"
+                       "pxor   %%xmm0,    %%xmm13\n\t"
+
+                       "movdqa %%xmm11,   %%xmm14\n\t"
+                       "pxor   %%xmm5,    %%xmm14\n\t"
+                       "pxor   %%xmm3,    %%xmm7\n\t"
+                       "pxor   %%xmm14,   %%xmm3\n\t"
+                       "pxor   %%xmm0,    %%xmm14\n\t"
+
+                       "pxor   %%xmm11,   %%xmm5\n\t"
+                       "pxor   %%xmm15,   %%xmm5\n\t"
+                       "pxor   %%xmm4,    %%xmm7\n\t"
+                       "pxor   %%xmm5,    %%xmm4\n\t"
+                       "movdqa %%xmm5,    %%xmm15\n\t"
+                       "pxor   %%xmm0,    %%xmm15\n\t"
+
+                       "movdqa %%xmm5,    %%xmm0\n\t"
+                       "pxor   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm8,    %%xmm7\n\t"
+                       "pxor   %%xmm0,    %%xmm8\n\t"
+                       "pxor   %[lxfkey], %%xmm0\n\t"
+                       "movdqa %%xmm0,    %[tmpbuf0]\n\t"
+
+                       "movdqa %%xmm10,   %%xmm0\n\t"
+                       "pxor   %%xmm5,    %%xmm0\n\t"
+                       "pxor   %%xmm9,    %%xmm7\n\t"
+                       "pxor   %%xmm0,    %%xmm9\n\t"
+                       "pxor   %[lxfkey], %%xmm0\n"
+                       "movdqa %%xmm0,    %[tmpbuf1]\n\t"
+                       : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                         [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+                       : [lxfkey] "m" (*lxf_key)
+                       : "memory" );
+         asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "pxor   %%xmm5,    %%xmm0\n\t"
+                       "pxor   %%xmm10,   %%xmm7\n\t"
+                       "pxor   %%xmm0,    %%xmm10\n\t"
+                       "pxor   %[lxfkey], %%xmm0\n\t"
+                       "movdqa %%xmm0,    %[tmpbuf2]\n\t"
+                       : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+                       : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)),
+                         [lxfkey] "m" (*lxf_key)
+                       : "memory" );
+         asm volatile ("movdqu %[l7],     %%xmm0\n\t"
+                       "pxor   %%xmm11,   %%xmm5\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       "movdqa 0x10(%[key]), %%xmm0\n\t"
+                       "movdqu %[inbuf7], %%xmm11\n\t"
+                       "pxor   %%xmm11,   %%xmm7\n\t"
+                       "pxor   %%xmm5,    %%xmm11\n\t"
+                       :
+                       : [l7] "m" (*l),
+                         [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)),
+                         [key] "r" (ctx->keyschenc)
+                       : "memory" );
+
+         asm volatile ("cmpl $12, %[rounds]\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x20(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x30(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x40(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x50(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x60(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x70(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x80(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x90(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "jb .Ldeclast%=\n\t"
+                       "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "je .Ldeclast%=\n\t"
+                       "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+
+                       ".Ldeclast%=:\n\t"
+                       :
+                       : [key] "r" (ctx->keyschenc),
+                         [rounds] "r" (ctx->rounds)
+                       : "cc", "memory");
+
+         asm volatile ("aesenclast %%xmm12,   %%xmm1\n\t"
+                       "aesenclast %%xmm13,   %%xmm2\n\t"
+                       "aesenclast %%xmm14,   %%xmm3\n\t"
+                       "aesenclast %%xmm15,   %%xmm4\n\t"
+                       "aesenclast %[tmpbuf0],%%xmm8\n\t"
+                       "aesenclast %[tmpbuf1],%%xmm9\n\t"
+                       "aesenclast %[tmpbuf2],%%xmm10\n\t"
+                       :
+                       : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                         [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+                         [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE)),
+                         [lxfkey] "m" (*lxf_key)
+                       : "memory" );
+         asm volatile ("aesenclast %%xmm5,    %%xmm11\n\t"
+                       "pxor   %[lxfkey], %%xmm11\n\t"
+                       "movdqu %%xmm1,    %[outbuf0]\n\t"
+                       "movdqu %%xmm2,    %[outbuf1]\n\t"
+                       : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+                         [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+                       : [lxfkey] "m" (*lxf_key)
+                       : "memory" );
+         asm volatile ("movdqu %%xmm3,    %[outbuf2]\n\t"
+                       "movdqu %%xmm4,    %[outbuf3]\n\t"
+                       "movdqu %%xmm8,    %[outbuf4]\n\t"
+                       : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+                         [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+                         [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+                       :
+                       : "memory" );
+         asm volatile ("movdqu %%xmm9,    %[outbuf5]\n\t"
+                       "movdqu %%xmm10,   %[outbuf6]\n\t"
+                       "movdqu %%xmm11,   %[outbuf7]\n\t"
+                       : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+                         [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
+                         [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
+                       :
+                       : "memory" );
+
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      asm volatile ("pxor %[first_key], %%xmm5\n\t"
+                   "pxor %%xmm0, %%xmm0\n\t"
+                   "movdqu %%xmm0, %[lxfkey]\n\t"
+                   : [lxfkey] "=m" (*lxf_key)
+                   : [first_key] "m" (ctx->keyschenc[0][0][0])
+                   : "memory" );
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4 ; nblocks -= 4 )
+    {
+      n += 4;
+      l = aes_ocb_get_l(c, n);
+
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+                   "movdqu %[inbuf0], %%xmm1\n\t"
+                   "movdqu %[l0l1],   %%xmm3\n\t"
+                   :
+                   : [l0] "m" (*c->u_mode.ocb.L[0]),
+                     [l0l1] "m" (*c->u_mode.ocb.L0L1),
+                     [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm4\n\t"
+                   "movdqu %[l3],     %%xmm6\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "pxor   %%xmm1,    %%xmm7\n\t"
+                   "pxor   %%xmm0,    %%xmm1\n\t"
+                   "movdqa %%xmm0,    %[tmpbuf0]\n\t"
+                   : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE))
+                   : [l1] "m" (*c->u_mode.ocb.L[1]),
+                     [l3] "m" (*l)
+                   : "memory" );
+      asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+                   "pxor   %%xmm5,    %%xmm3\n\t"
+                   "pxor   %%xmm2,    %%xmm7\n\t"
+                   "pxor   %%xmm3,    %%xmm2\n\t"
+                   "movdqa %%xmm3,    %[tmpbuf1]\n\t"
+                   : [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+                   : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("movdqa %%xmm4,    %%xmm0\n\t"
+                   "movdqu %[inbuf2], %%xmm3\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "pxor   %%xmm3,    %%xmm7\n\t"
+                   "pxor   %%xmm0,    %%xmm3\n\t"
+                   "movdqa %%xmm0,    %[tmpbuf2]\n\t"
+                   : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+                   :
+                     [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("pxor   %%xmm6,    %%xmm5\n\t"
+                   "pxor   %%xmm4,    %%xmm5\n\t"
+                   "movdqu %[inbuf3], %%xmm4\n\t"
+                   "pxor   %%xmm4,    %%xmm7\n\t"
+                   "pxor   %%xmm5,    %%xmm4\n\t"
+                   :
+                   : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+                   : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("pxor   %[tmpbuf0],%%xmm1\n\t"
+                   "movdqu %%xmm1,    %[outbuf0]\n\t"
+                   "pxor   %[tmpbuf1],%%xmm2\n\t"
+                   "movdqu %%xmm2,    %[outbuf1]\n\t"
+                   : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+                     [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+                   : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                     [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("pxor   %[tmpbuf2],%%xmm3\n\t"
+                   "movdqu %%xmm3,    %[outbuf2]\n\t"
+                   "pxor   %%xmm5,    %%xmm4\n\t"
+                   "movdqu %%xmm4,    %[outbuf3]\n\t"
+                   : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+                     [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+                   : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+                   : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                   "pxor   %%xmm0,   %%xmm7\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.data_nblocks = n;
+  asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+                "movdqu %%xmm7, %[ctr]\n\t"
+               : [iv] "=m" (*c->u_iv.iv),
+                 [ctr] "=m" (*c->u_ctr.ctr)
+                :
+                : "memory" );
+
+  asm volatile ("pxor   %%xmm0, %%xmm0\n\t"
+                "movdqa %%xmm0, %[tmpbuf0]\n\t"
+                "movdqa %%xmm0, %[tmpbuf1]\n\t"
+                "movdqa %%xmm0, %[tmpbuf2]\n\t"
+               : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                 [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)),
+                 [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+
+  return 0;
+}
+
+
+static unsigned int ASM_FUNC_ATTR_NOINLINE
+aesni_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+               const void *inbuf_arg, size_t nblocks_arg)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  u64 n = c->u_mode.ocb.data_nblocks;
+  const unsigned char *l;
+  size_t nblocks = nblocks_arg;
+  byte tmpbuf_store[3 * 16 + 15];
+  byte *tmpbuf;
+  aesni_prepare_2_7_variable;
+
+  asm volatile ("" : "=r" (tmpbuf) : "0" (tmpbuf_store) : "memory");
+  tmpbuf = tmpbuf + (-(uintptr_t)tmpbuf & 15);
+
+  aesni_prepare ();
+  aesni_prepare_2_7 ();
+
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  /* Preload Offset */
+  asm volatile ("movdqu %[iv], %%xmm5\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_iv.iv)
+                : "memory" );
+
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_dec (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      unsigned char last_xor_first_key_store[16 + 15];
+      unsigned char *lxf_key;
+      aesni_prepare_8_15_variable;
+
+      asm volatile (""
+                    : "=r" (lxf_key)
+                   : "0" (last_xor_first_key_store)
+                   : "memory");
+      lxf_key = lxf_key + (-(uintptr_t)lxf_key & 15);
+
+      aesni_prepare_8_15();
+
+      asm volatile ("movdqu %[l0], %%xmm6\n\t"
+                   "movdqa %[last_key], %%xmm0\n\t"
+                   "pxor %[first_key], %%xmm5\n\t"
+                   "pxor %[first_key], %%xmm0\n\t"
+                   "movdqa %%xmm0, %[lxfkey]\n\t"
+                   : [lxfkey] "=m" (*lxf_key)
+                   : [l0] "m" (*c->u_mode.ocb.L[0]),
+                     [last_key] "m" (ctx->keyschdec[ctx->rounds][0][0]),
+                     [first_key] "m" (ctx->keyschdec[0][0][0])
+                   : "memory" );
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         n += 4;
+         l = aes_ocb_get_l(c, n);
+
+         asm volatile ("movdqu %[l0l1],   %%xmm10\n\t"
+                       "movdqu %[l1],     %%xmm11\n\t"
+                       "movdqu %[l3],     %%xmm15\n\t"
+                       :
+                       : [l0l1] "m" (*c->u_mode.ocb.L0L1),
+                         [l1] "m" (*c->u_mode.ocb.L[1]),
+                         [l3] "m" (*l)
+                       : "memory" );
+
+         n += 4;
+         l = aes_ocb_get_l(c, n);
+
+         /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+         /* P_i = Offset_i xor ENCIPHER(K, C_i xor Offset_i)  */
+         asm volatile ("movdqu %[inbuf0], %%xmm1\n\t"
+                       "movdqu %[inbuf1], %%xmm2\n\t"
+                       "movdqu %[inbuf2], %%xmm3\n\t"
+                       :
+                       : [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE)),
+                         [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE)),
+                         [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+                       : "memory" );
+         asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+                       "movdqu %[inbuf4], %%xmm8\n\t"
+                       "movdqu %[inbuf5], %%xmm9\n\t"
+                       :
+                       : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE)),
+                         [inbuf4] "m" (*(inbuf + 4 * BLOCKSIZE)),
+                         [inbuf5] "m" (*(inbuf + 5 * BLOCKSIZE))
+                       : "memory" );
+         asm volatile ("movdqa %[lxfkey], %%xmm0\n\t"
+                       "movdqa %%xmm6,    %%xmm12\n\t"
+                       "pxor   %%xmm5,    %%xmm12\n\t"
+                       "pxor   %%xmm12,   %%xmm1\n\t"
+                       "pxor   %%xmm0,    %%xmm12\n\t"
+
+                       "movdqa %%xmm10,   %%xmm13\n\t"
+                       "pxor   %%xmm5,    %%xmm13\n\t"
+                       "pxor   %%xmm13,   %%xmm2\n\t"
+                       "pxor   %%xmm0,    %%xmm13\n\t"
+
+                       "movdqa %%xmm11,   %%xmm14\n\t"
+                       "pxor   %%xmm5,    %%xmm14\n\t"
+                       "pxor   %%xmm14,   %%xmm3\n\t"
+                       "pxor   %%xmm0,    %%xmm14\n\t"
+
+                       "pxor   %%xmm11,   %%xmm5\n\t"
+                       "pxor   %%xmm15,   %%xmm5\n\t"
+                       "pxor   %%xmm5,    %%xmm4\n\t"
+                       "movdqa %%xmm5,    %%xmm15\n\t"
+                       "pxor   %%xmm0,    %%xmm15\n\t"
+
+                       "movdqa %%xmm5,    %%xmm0\n\t"
+                       "pxor   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm8\n\t"
+                       "pxor   %[lxfkey], %%xmm0\n\t"
+                       "movdqa %%xmm0,    %[tmpbuf0]\n\t"
+
+                       "movdqa %%xmm10,   %%xmm0\n\t"
+                       "pxor   %%xmm5,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm9\n\t"
+                       "pxor   %[lxfkey], %%xmm0\n"
+                       "movdqa %%xmm0,    %[tmpbuf1]\n\t"
+                       : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                         [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+                       : [lxfkey] "m" (*lxf_key)
+                       : "memory" );
+         asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "pxor   %%xmm5,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm10\n\t"
+                       "pxor   %[lxfkey], %%xmm0\n\t"
+                       "movdqa %%xmm0,    %[tmpbuf2]\n\t"
+                       : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+                       : [inbuf6] "m" (*(inbuf + 6 * BLOCKSIZE)),
+                         [lxfkey] "m" (*lxf_key)
+                       : "memory" );
+         asm volatile ("movdqu %[l7],     %%xmm0\n\t"
+                       "pxor   %%xmm11,   %%xmm5\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       "movdqa 0x10(%[key]), %%xmm0\n\t"
+                       "movdqu %[inbuf7], %%xmm11\n\t"
+                       "pxor   %%xmm5,    %%xmm11\n\t"
+                       :
+                       : [l7] "m" (*l),
+                         [inbuf7] "m" (*(inbuf + 7 * BLOCKSIZE)),
+                         [key] "r" (ctx->keyschdec)
+                       : "memory" );
+
+         asm volatile ("cmpl $12, %[rounds]\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x20(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x30(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x40(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x50(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x60(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x70(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x80(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x90(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "jb .Ldeclast%=\n\t"
+                       "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "je .Ldeclast%=\n\t"
+                       "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+
+                       ".Ldeclast%=:\n\t"
+                       :
+                       : [key] "r" (ctx->keyschdec),
+                         [rounds] "r" (ctx->rounds)
+                       : "cc", "memory");
+
+         asm volatile ("aesdeclast %%xmm12,   %%xmm1\n\t"
+                       "aesdeclast %%xmm13,   %%xmm2\n\t"
+                       "aesdeclast %%xmm14,   %%xmm3\n\t"
+                       "aesdeclast %%xmm15,   %%xmm4\n\t"
+                       "aesdeclast %[tmpbuf0],%%xmm8\n\t"
+                       "aesdeclast %[tmpbuf1],%%xmm9\n\t"
+                       "aesdeclast %[tmpbuf2],%%xmm10\n\t"
+                       :
+                       : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                         [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE)),
+                         [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+                       : "memory" );
+         asm volatile ("aesdeclast %%xmm5,    %%xmm11\n\t"
+                       "pxor   %[lxfkey], %%xmm11\n\t"
+                       "movdqu %%xmm1,    %[outbuf0]\n\t"
+                       "movdqu %%xmm2,    %[outbuf1]\n\t"
+                       : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+                         [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+                       : [lxfkey] "m" (*lxf_key)
+                       : "memory" );
+         asm volatile ("movdqu %%xmm3,    %[outbuf2]\n\t"
+                       "movdqu %%xmm4,    %[outbuf3]\n\t"
+                       "movdqu %%xmm8,    %[outbuf4]\n\t"
+                       : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+                         [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE)),
+                         [outbuf4] "=m" (*(outbuf + 4 * BLOCKSIZE))
+                       :
+                       : "memory" );
+         asm volatile ("movdqu %%xmm9,    %[outbuf5]\n\t"
+                       "movdqu %%xmm10,   %[outbuf6]\n\t"
+                       "movdqu %%xmm11,   %[outbuf7]\n\t"
+                       : [outbuf5] "=m" (*(outbuf + 5 * BLOCKSIZE)),
+                         [outbuf6] "=m" (*(outbuf + 6 * BLOCKSIZE)),
+                         [outbuf7] "=m" (*(outbuf + 7 * BLOCKSIZE))
+                       :
+                       : "memory" );
+
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      asm volatile ("pxor %[first_key], %%xmm5\n\t"
+                   "pxor %%xmm0, %%xmm0\n\t"
+                   "movdqu %%xmm0, %[lxfkey]\n\t"
+                   : [lxfkey] "=m" (*lxf_key)
+                   : [first_key] "m" (ctx->keyschdec[0][0][0])
+                   : "memory" );
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4 ; nblocks -= 4 )
+    {
+      n += 4;
+      l = aes_ocb_get_l(c, n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* C_i = Offset_i xor DECIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+                   "movdqu %[inbuf0], %%xmm1\n\t"
+                   "movdqu %[l0l1],   %%xmm3\n\t"
+                   :
+                   : [l0] "m" (*c->u_mode.ocb.L[0]),
+                     [l0l1] "m" (*c->u_mode.ocb.L0L1),
+                     [inbuf0] "m" (*(inbuf + 0 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm4\n\t"
+                   "movdqu %[l3],     %%xmm6\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm1\n\t"
+                   "movdqa %%xmm0,    %[tmpbuf0]\n\t"
+                   : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE))
+                   : [l1] "m" (*c->u_mode.ocb.L[1]),
+                     [l3] "m" (*l)
+                   : "memory" );
+      asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+                   "pxor   %%xmm5,    %%xmm3\n\t"
+                   "pxor   %%xmm3,    %%xmm2\n\t"
+                   "movdqa %%xmm3,    %[tmpbuf1]\n\t"
+                   : [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE))
+                   : [inbuf1] "m" (*(inbuf + 1 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("movdqa %%xmm4,    %%xmm0\n\t"
+                   "movdqu %[inbuf2], %%xmm3\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm3\n\t"
+                   "movdqa %%xmm0,    %[tmpbuf2]\n\t"
+                   : [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+                   :
+                     [inbuf2] "m" (*(inbuf + 2 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("pxor   %%xmm6,    %%xmm5\n\t"
+                   "pxor   %%xmm4,    %%xmm5\n\t"
+                   "movdqu %[inbuf3], %%xmm4\n\t"
+                   "pxor   %%xmm5,    %%xmm4\n\t"
+                   :
+                   : [inbuf3] "m" (*(inbuf + 3 * BLOCKSIZE))
+                   : "memory" );
+
+      do_aesni_dec_vec4 (ctx);
+
+      asm volatile ("pxor   %[tmpbuf0],%%xmm1\n\t"
+                   "movdqu %%xmm1,    %[outbuf0]\n\t"
+                   "pxor   %[tmpbuf1],%%xmm2\n\t"
+                   "movdqu %%xmm2,    %[outbuf1]\n\t"
+                   : [outbuf0] "=m" (*(outbuf + 0 * BLOCKSIZE)),
+                     [outbuf1] "=m" (*(outbuf + 1 * BLOCKSIZE))
+                   : [tmpbuf0] "m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                     [tmpbuf1] "m" (*(tmpbuf + 1 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("pxor   %[tmpbuf2],%%xmm3\n\t"
+                   "movdqu %%xmm3,    %[outbuf2]\n\t"
+                   "pxor   %%xmm5,    %%xmm4\n\t"
+                   "movdqu %%xmm4,    %[outbuf3]\n\t"
+                   : [outbuf2] "=m" (*(outbuf + 2 * BLOCKSIZE)),
+                     [outbuf3] "=m" (*(outbuf + 3 * BLOCKSIZE))
+                   : [tmpbuf2] "m" (*(tmpbuf + 2 * BLOCKSIZE))
+                   : "memory" );
+
+      outbuf += 4*BLOCKSIZE;
+      inbuf  += 4*BLOCKSIZE;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_aesni_dec (ctx);
+
+      asm volatile ("pxor   %%xmm5, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.data_nblocks = n;
+  asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+                : [iv] "=m" (*c->u_iv.iv)
+                :
+                : "memory" );
+
+  asm volatile ("pxor   %%xmm0, %%xmm0\n\t"
+                "movdqa %%xmm0, %[tmpbuf0]\n\t"
+                "movdqa %%xmm0, %[tmpbuf1]\n\t"
+                "movdqa %%xmm0, %[tmpbuf2]\n\t"
+               : [tmpbuf0] "=m" (*(tmpbuf + 0 * BLOCKSIZE)),
+                 [tmpbuf1] "=m" (*(tmpbuf + 1 * BLOCKSIZE)),
+                 [tmpbuf2] "=m" (*(tmpbuf + 2 * BLOCKSIZE))
+                :
+                : "memory" );
+
+  aesni_ocb_checksum (c, outbuf_arg, nblocks_arg);
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+
+  return 0;
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_aesni_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+                          const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  if (encrypt)
+    return aesni_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+  else
+    return aesni_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                          size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  u64 n = c->u_mode.ocb.aad_nblocks;
+  const unsigned char *l;
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7 ();
+
+  /* Preload Offset and Sum */
+  asm volatile ("movdqu %[iv], %%xmm5\n\t"
+                "movdqu %[ctr], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_mode.ocb.aad_offset),
+                  [ctr] "m" (*c->u_mode.ocb.aad_sum)
+                : "memory" );
+
+  for ( ;nblocks && n % 4; nblocks-- )
+    {
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[abuf],  %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [abuf] "m" (*abuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm0,   %%xmm6\n\t"
+                    :
+                    :
+                    : "memory" );
+
+      abuf += BLOCKSIZE;
+    }
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      aesni_prepare_8_15_variable;
+
+      aesni_prepare_8_15();
+
+      asm volatile ("movdqu %[l0],     %%xmm7\n\t"
+                   "movdqu %[l0l1],   %%xmm12\n\t"
+                   "movdqu %[l1],     %%xmm13\n\t"
+                   :
+                   : [l0] "m" (*c->u_mode.ocb.L[0]),
+                     [l0l1] "m" (*c->u_mode.ocb.L0L1),
+                     [l1] "m" (*c->u_mode.ocb.L[1])
+                   : "memory" );
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         n += 4;
+         l = aes_ocb_get_l(c, n);
+
+         asm volatile ("movdqu %[l3],   %%xmm0\n\t"
+                       "pxor   %%xmm13, %%xmm0\n\t"
+                       :
+                       : [l3] "m" (*l)
+                       : "memory" );
+
+         n += 4;
+         l = aes_ocb_get_l(c, n);
+
+         asm volatile ("movdqu %[l7],   %%xmm14\n\t"
+                       "pxor   %%xmm13, %%xmm14\n\t"
+                       :
+                       : [l7] "m" (*l)
+                       : "memory" );
+
+         /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+         /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+         asm volatile ("movdqu %[abuf0],  %%xmm1\n\t"
+                       "movdqu %[abuf1],  %%xmm2\n\t"
+                       "movdqu %[abuf2],  %%xmm3\n\t"
+                       "movdqu %[abuf3],  %%xmm4\n\t"
+                       :
+                       : [abuf0] "m" (*(abuf + 0 * BLOCKSIZE)),
+                         [abuf1] "m" (*(abuf + 1 * BLOCKSIZE)),
+                         [abuf2] "m" (*(abuf + 2 * BLOCKSIZE)),
+                         [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+                       : "memory" );
+         asm volatile ("movdqu %[abuf4],  %%xmm8\n\t"
+                       "movdqu %[abuf5],  %%xmm9\n\t"
+                       "movdqu %[abuf6],  %%xmm10\n\t"
+                       "movdqu %[abuf7],  %%xmm11\n\t"
+                       :
+                       : [abuf4] "m" (*(abuf + 4 * BLOCKSIZE)),
+                         [abuf5] "m" (*(abuf + 5 * BLOCKSIZE)),
+                         [abuf6] "m" (*(abuf + 6 * BLOCKSIZE)),
+                         [abuf7] "m" (*(abuf + 7 * BLOCKSIZE))
+                       : "memory" );
+         asm volatile ("pxor   %%xmm7,    %%xmm1\n\t"
+                       "pxor   %%xmm5,    %%xmm1\n\t"
+
+                       "pxor   %%xmm12,   %%xmm2\n\t"
+                       "pxor   %%xmm5,    %%xmm2\n\t"
+
+                       "pxor   %%xmm13,   %%xmm3\n\t"
+                       "pxor   %%xmm5,    %%xmm3\n\t"
+
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       "movdqa (%[key]),  %%xmm0\n\t"
+                       "pxor   %%xmm5,    %%xmm4\n\t"
+
+                       "pxor   %%xmm0, %%xmm1\n\t"     /* xmm1 ^= key[0] */
+                       "pxor   %%xmm0, %%xmm2\n\t"     /* xmm2 ^= key[0] */
+                       "pxor   %%xmm0, %%xmm3\n\t"     /* xmm3 ^= key[0] */
+                       "pxor   %%xmm0, %%xmm4\n\t"     /* xmm4 ^= key[0] */
+
+                       "pxor   %%xmm7,    %%xmm8\n\t"
+                       "pxor   %%xmm5,    %%xmm8\n\t"
+
+                       "pxor   %%xmm12,   %%xmm9\n\t"
+                       "pxor   %%xmm5,    %%xmm9\n\t"
+
+                       "pxor   %%xmm13,   %%xmm10\n\t"
+                       "pxor   %%xmm5,    %%xmm10\n\t"
+
+                       "pxor   %%xmm14,   %%xmm5\n\t"
+                       "pxor   %%xmm5,    %%xmm11\n\t"
+
+                       "pxor   %%xmm0, %%xmm8\n\t"     /* xmm8 ^= key[0] */
+                       "pxor   %%xmm0, %%xmm9\n\t"     /* xmm9 ^= key[0] */
+                       "pxor   %%xmm0, %%xmm10\n\t"    /* xmm10 ^= key[0] */
+                       "pxor   %%xmm0, %%xmm11\n\t"    /* xmm11 ^= key[0] */
+                       :
+                       : [key] "r" (ctx->keyschenc)
+                       : "memory" );
+
+         do_aesni_enc_vec8 (ctx);
+
+         asm volatile (
+                       "aesenclast %%xmm0, %%xmm1\n\t"
+                       "aesenclast %%xmm0, %%xmm2\n\t"
+                       "aesenclast %%xmm0, %%xmm3\n\t"
+                       "aesenclast %%xmm0, %%xmm4\n\t"
+                       "aesenclast %%xmm0, %%xmm8\n\t"
+                       "aesenclast %%xmm0, %%xmm9\n\t"
+                       "aesenclast %%xmm0, %%xmm10\n\t"
+                       "aesenclast %%xmm0, %%xmm11\n\t"
+                       "pxor   %%xmm2,   %%xmm1\n\t"
+                       "pxor   %%xmm3,   %%xmm1\n\t"
+                       "pxor   %%xmm4,   %%xmm1\n\t"
+                       "pxor   %%xmm8,   %%xmm1\n\t"
+                       "pxor   %%xmm9,   %%xmm6\n\t"
+                       "pxor   %%xmm10,  %%xmm6\n\t"
+                       "pxor   %%xmm11,  %%xmm6\n\t"
+                       "pxor   %%xmm1,   %%xmm6\n\t"
+                       :
+                       :
+                       : "memory" );
+
+         abuf += 8*BLOCKSIZE;
+       }
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4 ; nblocks -= 4 )
+    {
+      n += 4;
+      l = aes_ocb_get_l(c, n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l0],     %%xmm0\n\t"
+                   "movdqu %[abuf0],  %%xmm1\n\t"
+                   "movdqu %[l0l1],   %%xmm3\n\t"
+                   :
+                   : [l0] "m" (*c->u_mode.ocb.L[0]),
+                     [l0l1] "m" (*c->u_mode.ocb.L0L1),
+                     [abuf0] "m" (*(abuf + 0 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("movdqu %[l1],     %%xmm4\n\t"
+                   "movdqu %[l3],     %%xmm7\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm1\n\t"
+                   :
+                   : [l1] "m" (*c->u_mode.ocb.L[1]),
+                     [l3] "m" (*l)
+                   : "memory" );
+      asm volatile ("movdqu %[abuf1],  %%xmm2\n\t"
+                   "pxor   %%xmm5,    %%xmm3\n\t"
+                   "pxor   %%xmm3,    %%xmm2\n\t"
+                   :
+                   : [abuf1] "m" (*(abuf + 1 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("movdqa %%xmm4,    %%xmm0\n\t"
+                   "movdqu %[abuf2],  %%xmm3\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm3\n\t"
+                   :
+                   : [abuf2] "m" (*(abuf + 2 * BLOCKSIZE))
+                   : "memory" );
+      asm volatile ("pxor   %%xmm7,    %%xmm5\n\t"
+                   "pxor   %%xmm4,    %%xmm5\n\t"
+                   "movdqu %[abuf3],  %%xmm4\n\t"
+                   "pxor   %%xmm5,    %%xmm4\n\t"
+                   :
+                   : [abuf3] "m" (*(abuf + 3 * BLOCKSIZE))
+                   : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("pxor   %%xmm1,   %%xmm6\n\t"
+                   "pxor   %%xmm2,   %%xmm6\n\t"
+                   "pxor   %%xmm3,   %%xmm6\n\t"
+                   "pxor   %%xmm4,   %%xmm6\n\t"
+                   :
+                   :
+                   : "memory" );
+
+      abuf += 4*BLOCKSIZE;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[abuf],  %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm5\n\t"
+                    "pxor   %%xmm5,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [abuf] "m" (*abuf)
+                    : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm0,   %%xmm6\n\t"
+                    :
+                    :
+                    : "memory" );
+
+      abuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.aad_nblocks = n;
+  asm volatile ("movdqu %%xmm5, %[iv]\n\t"
+                "movdqu %%xmm6, %[ctr]\n\t"
+                : [iv] "=m" (*c->u_mode.ocb.aad_offset),
+                  [ctr] "=m" (*c->u_mode.ocb.aad_sum)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+
+  return 0;
+}
+
+
+static const u64 xts_gfmul_const[2] __attribute__ ((aligned (16))) =
+  { 0x87, 0x01 };
+
+
+static void ASM_FUNC_ATTR
+_gcry_aes_aesni_xts_enc (RIJNDAEL_context *ctx, unsigned char *tweak,
+                        unsigned char *outbuf, const unsigned char *inbuf,
+                        size_t nblocks)
+{
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7 ();
+
+  /* Preload Tweak */
+  asm volatile ("movdqu %[tweak], %%xmm5\n\t"
+               "movdqa %[gfmul], %%xmm6\n\t"
+               :
+               : [tweak] "m" (*tweak),
+                 [gfmul] "m" (*xts_gfmul_const)
+               : "memory" );
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      aesni_prepare_8_15_variable;
+
+      aesni_prepare_8_15();
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         asm volatile ("pshufd $0x13,     %%xmm5,  %%xmm11\n\t"
+                       "movdqu %[inbuf0], %%xmm1\n\t"
+                       "pxor   %%xmm5,    %%xmm1\n\t"
+                       "movdqa %%xmm5,    %%xmm7\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf0] "m" (*(inbuf + 0 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+                       "pxor   %%xmm5,    %%xmm2\n\t"
+                       "movdqa %%xmm5,    %%xmm12\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf1] "m" (*(inbuf + 1 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+                       "pxor   %%xmm5,    %%xmm3\n\t"
+                       "movdqa %%xmm5,    %%xmm13\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf2] "m" (*(inbuf + 2 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+                       "pxor   %%xmm5,    %%xmm4\n\t"
+                       "movdqa %%xmm5,    %%xmm14\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf3] "m" (*(inbuf + 3 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf4], %%xmm8\n\t"
+                       "pxor   %%xmm5,    %%xmm8\n\t"
+                       "movdqa %%xmm5,    %%xmm15\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf4] "m" (*(inbuf + 4 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf5], %%xmm9\n\t"
+                       "pxor   %%xmm5,    %%xmm9\n\t"
+                       "movdqu %%xmm5,    %[outbuf5]\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       : [outbuf5] "=m" (*(outbuf + 5 * 16))
+                       : [inbuf5] "m" (*(inbuf + 5 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
+                       "pxor   %%xmm5,    %%xmm10\n\t"
+                       "movdqu %%xmm5,    %[outbuf6]\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       : [outbuf6] "=m" (*(outbuf + 6 * 16))
+                       : [inbuf6] "m" (*(inbuf + 6 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqa %%xmm11,   %%xmm0\n\t"
+                       "movdqu %[inbuf7], %%xmm11\n\t"
+                       "pxor   %%xmm5,    %%xmm11\n\t"
+                       "movdqu %%xmm5,    %[outbuf7]\n\t"
+
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       : [outbuf7] "=m" (*(outbuf + 7 * 16))
+                       : [inbuf7] "m" (*(inbuf + 7 * 16))
+                       : "memory" );
+
+         asm volatile ("cmpl $12, %[rounds]\n\t"
+                       "movdqa (%[key]), %%xmm0\n\t"
+                       "pxor %%xmm0, %%xmm1\n\t"
+                       "pxor %%xmm0, %%xmm2\n\t"
+                       "pxor %%xmm0, %%xmm3\n\t"
+                       "pxor %%xmm0, %%xmm4\n\t"
+                       "pxor %%xmm0, %%xmm8\n\t"
+                       "pxor %%xmm0, %%xmm9\n\t"
+                       "pxor %%xmm0, %%xmm10\n\t"
+                       "pxor %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x10(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x20(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x30(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x40(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x50(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x60(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x70(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x80(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x90(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                       "jb .Lenclast%=\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                       "je .Lenclast%=\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                       "aesenc %%xmm0, %%xmm1\n\t"
+                       "aesenc %%xmm0, %%xmm2\n\t"
+                       "aesenc %%xmm0, %%xmm3\n\t"
+                       "aesenc %%xmm0, %%xmm4\n\t"
+                       "aesenc %%xmm0, %%xmm8\n\t"
+                       "aesenc %%xmm0, %%xmm9\n\t"
+                       "aesenc %%xmm0, %%xmm10\n\t"
+                       "aesenc %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xe0(%[key]), %%xmm0\n\t"
+
+                       ".Lenclast%=:\n\t"
+                       :
+                       : [key] "r" (ctx->keyschenc),
+                         [rounds] "rm" (ctx->rounds)
+                       : "cc", "memory");
+
+         asm volatile ("pxor %%xmm0, %%xmm7\n\t"
+                       "pxor %%xmm0, %%xmm12\n\t"
+                       "pxor %%xmm0, %%xmm13\n\t"
+                       "pxor %%xmm0, %%xmm14\n\t"
+                       "aesenclast %%xmm7, %%xmm1\n\t"
+                       "aesenclast %%xmm12, %%xmm2\n\t"
+                       "aesenclast %%xmm13, %%xmm3\n\t"
+                       "aesenclast %%xmm14, %%xmm4\n\t"
+                       "movdqu 5*16(%[outbuf]), %%xmm12\n\t"
+                       "movdqu 6*16(%[outbuf]), %%xmm13\n\t"
+                       "movdqu 7*16(%[outbuf]), %%xmm14\n\t"
+                       "pxor %%xmm0, %%xmm15\n\t"
+                       "pxor %%xmm0, %%xmm12\n\t"
+                       "pxor %%xmm0, %%xmm13\n\t"
+                       "pxor %%xmm0, %%xmm14\n\t"
+                       "aesenclast %%xmm15, %%xmm8\n\t"
+                       "aesenclast %%xmm12, %%xmm9\n\t"
+                       "aesenclast %%xmm13, %%xmm10\n\t"
+                       "aesenclast %%xmm14, %%xmm11\n\t"
+                       "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+                       "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+                       "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+                       "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+                       "movdqu %%xmm8, 4*16(%[outbuf])\n\t"
+                       "movdqu %%xmm9, 5*16(%[outbuf])\n\t"
+                       "movdqu %%xmm10, 6*16(%[outbuf])\n\t"
+                       "movdqu %%xmm11, 7*16(%[outbuf])\n\t"
+                       :
+                       : [outbuf] "r" (outbuf)
+                       : "memory" );
+
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4; nblocks -= 4 )
+    {
+      asm volatile ("pshufd $0x13,     %%xmm5,  %%xmm4\n\t"
+                   "movdqu %[inbuf0], %%xmm1\n\t"
+                   "pxor   %%xmm5,    %%xmm1\n\t"
+                   "movdqu %%xmm5,    %[outbuf0]\n\t"
+
+                   "movdqa %%xmm4,    %%xmm0\n\t"
+                   "paddd  %%xmm4,    %%xmm4\n\t"
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf0] "=m" (*(outbuf + 0 * 16))
+                   : [inbuf0] "m" (*(inbuf + 0 * 16))
+                   : "memory" );
+
+      asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+                   "pxor   %%xmm5,    %%xmm2\n\t"
+                   "movdqu %%xmm5,    %[outbuf1]\n\t"
+
+                   "movdqa %%xmm4,    %%xmm0\n\t"
+                   "paddd  %%xmm4,    %%xmm4\n\t"
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf1] "=m" (*(outbuf + 1 * 16))
+                   : [inbuf1] "m" (*(inbuf + 1 * 16))
+                   : "memory" );
+
+      asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+                   "pxor   %%xmm5,    %%xmm3\n\t"
+                   "movdqu %%xmm5,    %[outbuf2]\n\t"
+
+                   "movdqa %%xmm4,    %%xmm0\n\t"
+                   "paddd  %%xmm4,    %%xmm4\n\t"
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf2] "=m" (*(outbuf + 2 * 16))
+                   : [inbuf2] "m" (*(inbuf + 2 * 16))
+                   : "memory" );
+
+      asm volatile ("movdqa %%xmm4,    %%xmm0\n\t"
+                   "movdqu %[inbuf3], %%xmm4\n\t"
+                   "pxor   %%xmm5,    %%xmm4\n\t"
+                   "movdqu %%xmm5,    %[outbuf3]\n\t"
+
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf3] "=m" (*(outbuf + 3 * 16))
+                   : [inbuf3] "m" (*(inbuf + 3 * 16))
+                   : "memory" );
+
+      do_aesni_enc_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0], %%xmm0\n\t"
+                    "pxor   %%xmm0,     %%xmm1\n\t"
+                   "movdqu %[outbuf1], %%xmm0\n\t"
+                   "movdqu %%xmm1,     %[outbuf0]\n\t"
+                   "movdqu %[outbuf2], %%xmm1\n\t"
+                    "pxor   %%xmm0,     %%xmm2\n\t"
+                   "movdqu %[outbuf3], %%xmm0\n\t"
+                    "pxor   %%xmm1,     %%xmm3\n\t"
+                    "pxor   %%xmm0,     %%xmm4\n\t"
+                   "movdqu %%xmm2,     %[outbuf1]\n\t"
+                   "movdqu %%xmm3,     %[outbuf2]\n\t"
+                   "movdqu %%xmm4,     %[outbuf3]\n\t"
+                   : [outbuf0] "+m" (*(outbuf + 0 * 16)),
+                     [outbuf1] "+m" (*(outbuf + 1 * 16)),
+                     [outbuf2] "+m" (*(outbuf + 2 * 16)),
+                     [outbuf3] "+m" (*(outbuf + 3 * 16))
+                   :
+                   : "memory" );
+
+      outbuf += BLOCKSIZE * 4;
+      inbuf += BLOCKSIZE * 4;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqu %[inbuf],  %%xmm0\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "movdqa %%xmm5,    %%xmm4\n\t"
+
+                   "pshufd $0x13,     %%xmm5,  %%xmm1\n\t"
+                   "psrad  $31,       %%xmm1\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm1\n\t"
+                   "pxor   %%xmm1,    %%xmm5\n\t"
+                   :
+                   : [inbuf] "m" (*inbuf)
+                   : "memory" );
+
+      do_aesni_enc (ctx);
+
+      asm volatile ("pxor   %%xmm4,    %%xmm0\n\t"
+                   "movdqu %%xmm0,    %[outbuf]\n\t"
+                   : [outbuf] "=m" (*outbuf)
+                   :
+                   : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm5, %[tweak]\n\t"
+               : [tweak] "=m" (*tweak)
+               :
+               : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+static void ASM_FUNC_ATTR
+_gcry_aes_aesni_xts_dec (RIJNDAEL_context *ctx, unsigned char *tweak,
+                        unsigned char *outbuf, const unsigned char *inbuf,
+                        size_t nblocks)
+{
+  aesni_prepare_2_7_variable;
+
+  aesni_prepare ();
+  aesni_prepare_2_7 ();
+
+  if ( !ctx->decryption_prepared )
+    {
+      do_aesni_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  /* Preload Tweak */
+  asm volatile ("movdqu %[tweak], %%xmm5\n\t"
+               "movdqa %[gfmul], %%xmm6\n\t"
+               :
+               : [tweak] "m" (*tweak),
+                 [gfmul] "m" (*xts_gfmul_const)
+               : "memory" );
+
+#ifdef __x86_64__
+  if (nblocks >= 8)
+    {
+      aesni_prepare_8_15_variable;
+
+      aesni_prepare_8_15();
+
+      for ( ;nblocks >= 8 ; nblocks -= 8 )
+       {
+         asm volatile ("pshufd $0x13,     %%xmm5,  %%xmm11\n\t"
+                       "movdqu %[inbuf0], %%xmm1\n\t"
+                       "pxor   %%xmm5,    %%xmm1\n\t"
+                       "movdqa %%xmm5,    %%xmm7\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf0] "m" (*(inbuf + 0 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+                       "pxor   %%xmm5,    %%xmm2\n\t"
+                       "movdqa %%xmm5,    %%xmm12\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf1] "m" (*(inbuf + 1 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+                       "pxor   %%xmm5,    %%xmm3\n\t"
+                       "movdqa %%xmm5,    %%xmm13\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf2] "m" (*(inbuf + 2 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf3], %%xmm4\n\t"
+                       "pxor   %%xmm5,    %%xmm4\n\t"
+                       "movdqa %%xmm5,    %%xmm14\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf3] "m" (*(inbuf + 3 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf4], %%xmm8\n\t"
+                       "pxor   %%xmm5,    %%xmm8\n\t"
+                       "movdqa %%xmm5,    %%xmm15\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       :
+                       : [inbuf4] "m" (*(inbuf + 4 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf5], %%xmm9\n\t"
+                       "pxor   %%xmm5,    %%xmm9\n\t"
+                       "movdqu %%xmm5,    %[outbuf5]\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       : [outbuf5] "=m" (*(outbuf + 5 * 16))
+                       : [inbuf5] "m" (*(inbuf + 5 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqu %[inbuf6], %%xmm10\n\t"
+                       "pxor   %%xmm5,    %%xmm10\n\t"
+                       "movdqu %%xmm5,    %[outbuf6]\n\t"
+
+                       "movdqa %%xmm11,   %%xmm0\n\t"
+                       "paddd  %%xmm11,   %%xmm11\n\t"
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       : [outbuf6] "=m" (*(outbuf + 6 * 16))
+                       : [inbuf6] "m" (*(inbuf + 6 * 16))
+                       : "memory" );
+
+         asm volatile ("movdqa %%xmm11,   %%xmm0\n\t"
+                       "movdqu %[inbuf7], %%xmm11\n\t"
+                       "pxor   %%xmm5,    %%xmm11\n\t"
+                       "movdqu %%xmm5,    %[outbuf7]\n\t"
+
+                       "psrad  $31,       %%xmm0\n\t"
+                       "paddq  %%xmm5,    %%xmm5\n\t"
+                       "pand   %%xmm6,    %%xmm0\n\t"
+                       "pxor   %%xmm0,    %%xmm5\n\t"
+                       : [outbuf7] "=m" (*(outbuf + 7 * 16))
+                       : [inbuf7] "m" (*(inbuf + 7 * 16))
+                       : "memory" );
+
+         asm volatile ("cmpl $12, %[rounds]\n\t"
+                       "movdqa (%[key]), %%xmm0\n\t"
+                       "pxor %%xmm0, %%xmm1\n\t"
+                       "pxor %%xmm0, %%xmm2\n\t"
+                       "pxor %%xmm0, %%xmm3\n\t"
+                       "pxor %%xmm0, %%xmm4\n\t"
+                       "pxor %%xmm0, %%xmm8\n\t"
+                       "pxor %%xmm0, %%xmm9\n\t"
+                       "pxor %%xmm0, %%xmm10\n\t"
+                       "pxor %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x10(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x20(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x30(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x40(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x50(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x60(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x70(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x80(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0x90(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xa0(%[key]), %%xmm0\n\t"
+                       "jb .Ldeclast%=\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xb0(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xc0(%[key]), %%xmm0\n\t"
+                       "je .Ldeclast%=\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xd0(%[key]), %%xmm0\n\t"
+                       "aesdec %%xmm0, %%xmm1\n\t"
+                       "aesdec %%xmm0, %%xmm2\n\t"
+                       "aesdec %%xmm0, %%xmm3\n\t"
+                       "aesdec %%xmm0, %%xmm4\n\t"
+                       "aesdec %%xmm0, %%xmm8\n\t"
+                       "aesdec %%xmm0, %%xmm9\n\t"
+                       "aesdec %%xmm0, %%xmm10\n\t"
+                       "aesdec %%xmm0, %%xmm11\n\t"
+                       "movdqa 0xe0(%[key]), %%xmm0\n\t"
+
+                       ".Ldeclast%=:\n\t"
+                       :
+                       : [key] "r" (ctx->keyschdec),
+                         [rounds] "rm" (ctx->rounds)
+                       : "cc", "memory");
+
+         asm volatile ("pxor %%xmm0, %%xmm7\n\t"
+                       "pxor %%xmm0, %%xmm12\n\t"
+                       "pxor %%xmm0, %%xmm13\n\t"
+                       "pxor %%xmm0, %%xmm14\n\t"
+                       "aesdeclast %%xmm7, %%xmm1\n\t"
+                       "aesdeclast %%xmm12, %%xmm2\n\t"
+                       "aesdeclast %%xmm13, %%xmm3\n\t"
+                       "aesdeclast %%xmm14, %%xmm4\n\t"
+                       "movdqu 5*16(%[outbuf]), %%xmm12\n\t"
+                       "movdqu 6*16(%[outbuf]), %%xmm13\n\t"
+                       "movdqu 7*16(%[outbuf]), %%xmm14\n\t"
+                       "pxor %%xmm0, %%xmm15\n\t"
+                       "pxor %%xmm0, %%xmm12\n\t"
+                       "pxor %%xmm0, %%xmm13\n\t"
+                       "pxor %%xmm0, %%xmm14\n\t"
+                       "aesdeclast %%xmm15, %%xmm8\n\t"
+                       "aesdeclast %%xmm12, %%xmm9\n\t"
+                       "aesdeclast %%xmm13, %%xmm10\n\t"
+                       "aesdeclast %%xmm14, %%xmm11\n\t"
+                       "movdqu %%xmm1, 0*16(%[outbuf])\n\t"
+                       "movdqu %%xmm2, 1*16(%[outbuf])\n\t"
+                       "movdqu %%xmm3, 2*16(%[outbuf])\n\t"
+                       "movdqu %%xmm4, 3*16(%[outbuf])\n\t"
+                       "movdqu %%xmm8, 4*16(%[outbuf])\n\t"
+                       "movdqu %%xmm9, 5*16(%[outbuf])\n\t"
+                       "movdqu %%xmm10, 6*16(%[outbuf])\n\t"
+                       "movdqu %%xmm11, 7*16(%[outbuf])\n\t"
+                       :
+                       : [outbuf] "r" (outbuf)
+                       : "memory" );
+
+         outbuf += 8*BLOCKSIZE;
+         inbuf  += 8*BLOCKSIZE;
+       }
+
+      aesni_cleanup_8_15();
+    }
+#endif
+
+  for ( ;nblocks >= 4; nblocks -= 4 )
+    {
+      asm volatile ("pshufd $0x13,     %%xmm5,  %%xmm4\n\t"
+                   "movdqu %[inbuf0], %%xmm1\n\t"
+                   "pxor   %%xmm5,    %%xmm1\n\t"
+                   "movdqu %%xmm5,    %[outbuf0]\n\t"
+
+                   "movdqa %%xmm4,    %%xmm0\n\t"
+                   "paddd  %%xmm4,    %%xmm4\n\t"
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf0] "=m" (*(outbuf + 0 * 16))
+                   : [inbuf0] "m" (*(inbuf + 0 * 16))
+                   : "memory" );
+
+      asm volatile ("movdqu %[inbuf1], %%xmm2\n\t"
+                   "pxor   %%xmm5,    %%xmm2\n\t"
+                   "movdqu %%xmm5,    %[outbuf1]\n\t"
+
+                   "movdqa %%xmm4,    %%xmm0\n\t"
+                   "paddd  %%xmm4,    %%xmm4\n\t"
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf1] "=m" (*(outbuf + 1 * 16))
+                   : [inbuf1] "m" (*(inbuf + 1 * 16))
+                   : "memory" );
+
+      asm volatile ("movdqu %[inbuf2], %%xmm3\n\t"
+                   "pxor   %%xmm5,    %%xmm3\n\t"
+                   "movdqu %%xmm5,    %[outbuf2]\n\t"
+
+                   "movdqa %%xmm4,    %%xmm0\n\t"
+                   "paddd  %%xmm4,    %%xmm4\n\t"
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf2] "=m" (*(outbuf + 2 * 16))
+                   : [inbuf2] "m" (*(inbuf + 2 * 16))
+                   : "memory" );
+
+      asm volatile ("movdqa %%xmm4,    %%xmm0\n\t"
+                   "movdqu %[inbuf3], %%xmm4\n\t"
+                   "pxor   %%xmm5,    %%xmm4\n\t"
+                   "movdqu %%xmm5,    %[outbuf3]\n\t"
+
+                   "psrad  $31,       %%xmm0\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm0\n\t"
+                   "pxor   %%xmm0,    %%xmm5\n\t"
+                   : [outbuf3] "=m" (*(outbuf + 3 * 16))
+                   : [inbuf3] "m" (*(inbuf + 3 * 16))
+                   : "memory" );
+
+      do_aesni_dec_vec4 (ctx);
+
+      asm volatile ("movdqu %[outbuf0], %%xmm0\n\t"
+                    "pxor   %%xmm0,     %%xmm1\n\t"
+                   "movdqu %[outbuf1], %%xmm0\n\t"
+                   "movdqu %%xmm1,     %[outbuf0]\n\t"
+                   "movdqu %[outbuf2], %%xmm1\n\t"
+                    "pxor   %%xmm0,     %%xmm2\n\t"
+                   "movdqu %[outbuf3], %%xmm0\n\t"
+                    "pxor   %%xmm1,     %%xmm3\n\t"
+                    "pxor   %%xmm0,     %%xmm4\n\t"
+                   "movdqu %%xmm2,     %[outbuf1]\n\t"
+                   "movdqu %%xmm3,     %[outbuf2]\n\t"
+                   "movdqu %%xmm4,     %[outbuf3]\n\t"
+                   : [outbuf0] "+m" (*(outbuf + 0 * 16)),
+                     [outbuf1] "+m" (*(outbuf + 1 * 16)),
+                     [outbuf2] "+m" (*(outbuf + 2 * 16)),
+                     [outbuf3] "+m" (*(outbuf + 3 * 16))
+                   :
+                   : "memory" );
+
+      outbuf += BLOCKSIZE * 4;
+      inbuf += BLOCKSIZE * 4;
+    }
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqu %[inbuf],  %%xmm0\n\t"
+                   "pxor   %%xmm5,    %%xmm0\n\t"
+                   "movdqa %%xmm5,    %%xmm4\n\t"
+
+                   "pshufd $0x13,     %%xmm5,  %%xmm1\n\t"
+                   "psrad  $31,       %%xmm1\n\t"
+                   "paddq  %%xmm5,    %%xmm5\n\t"
+                   "pand   %%xmm6,    %%xmm1\n\t"
+                   "pxor   %%xmm1,    %%xmm5\n\t"
+                   :
+                   : [inbuf] "m" (*inbuf)
+                   : "memory" );
+
+      do_aesni_dec (ctx);
+
+      asm volatile ("pxor   %%xmm4,    %%xmm0\n\t"
+                   "movdqu %%xmm0,    %[outbuf]\n\t"
+                   : [outbuf] "=m" (*outbuf)
+                   :
+                   : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm5, %[tweak]\n\t"
+                : [tweak] "=m" (*tweak)
+                :
+                : "memory" );
+
+  aesni_cleanup ();
+  aesni_cleanup_2_7 ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_aesni_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak,
+                          unsigned char *outbuf, const unsigned char *inbuf,
+                          size_t nblocks, int encrypt)
+{
+  if (encrypt)
+    _gcry_aes_aesni_xts_enc(ctx, tweak, outbuf, inbuf, nblocks);
+  else
+    _gcry_aes_aesni_xts_dec(ctx, tweak, outbuf, inbuf, nblocks);
+}
+
+#if __clang__
+#  pragma clang attribute pop
+#endif
+
+#endif /* USE_AESNI */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-amd64.S 
b/grub-core/lib/libgcrypt/cipher/rijndael-amd64.S
new file mode 100644
index 000000000..6e3cc8193
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-amd64.S
@@ -0,0 +1,477 @@
+/* rinjdael-amd64.S  -  AMD64 assembly implementation of AES cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_AES)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* table macros */
+#define E0     (0)
+#define Es0    (1)
+#define Esize  4
+#define Essize 4
+
+#define D0     (0)
+#define Ds0    (4 * 256)
+#define Dsize  4
+#define Dssize 1
+
+/* register macros */
+#define CTX    %rdi
+#define RTAB   %r12
+
+#define RA     %rax
+#define RB     %rbx
+#define RC     %rcx
+#define RD     %rdx
+
+#define RAd    %eax
+#define RBd    %ebx
+#define RCd    %ecx
+#define RDd    %edx
+
+#define RAbl   %al
+#define RBbl   %bl
+#define RCbl   %cl
+#define RDbl   %dl
+
+#define RAbh   %ah
+#define RBbh   %bh
+#define RCbh   %ch
+#define RDbh   %dh
+
+#define RNA    %r8
+#define RNB    %r9
+#define RNC    %r10
+#define RND    %r11
+
+#define RNAd   %r8d
+#define RNBd   %r9d
+#define RNCd   %r10d
+#define RNDd   %r11d
+
+#define RT0    %rbp
+#define RT1    %rsi
+
+#define RT0d   %ebp
+#define RT1d   %esi
+
+/* helper macros */
+#define do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, t1) \
+       movzbl source ## bl,                    t0 ## d; \
+       movzbl source ## bh,                    t1 ## d; \
+       op ## l table1(RTAB,t0,tablemul),       dest1 ## d; \
+       op ## l table2(RTAB,t1,tablemul),       dest2 ## d;
+
+#define do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, dest2, 
t0, t1) \
+       movzbl source ## bl,                    t0 ## d; \
+       movzbl source ## bh,                    t1 ## d; \
+       shrl $(shf),                            source ## d; \
+       op ## l table1(RTAB,t0,tablemul),       dest1 ## d; \
+       op ## l table2(RTAB,t1,tablemul),       dest2 ## d;
+
+#define last_do16bit(op, source, tablemul, table1, dest1, table2, dest2, t0, 
t1) \
+       movzbl source ## bl,                    t0 ## d; \
+       movzbl source ## bh,                    t1 ## d; \
+       movzbl table1(RTAB,t0,tablemul),        t0 ## d; \
+       movzbl table2(RTAB,t1,tablemul),        t1 ## d; \
+       op ## l t0 ## d,                        dest1 ## d; \
+       op ## l t1 ## d,                        dest2 ## d;
+
+#define last_do16bit_shr(shf, op, source, tablemul, table1, dest1, table2, 
dest2, t0, t1) \
+       movzbl source ## bl,                    t0 ## d; \
+       movzbl source ## bh,                    t1 ## d; \
+       shrl $(shf),                            source ## d; \
+       movzbl table1(RTAB,t0,tablemul),        t0 ## d; \
+       movzbl table2(RTAB,t1,tablemul),        t1 ## d; \
+       op ## l t0 ## d,                        dest1 ## d; \
+       op ## l t1 ## d,                        dest2 ## d;
+
+/***********************************************************************
+ * AMD64 assembly implementation of the AES cipher
+ ***********************************************************************/
+#define addroundkey(round, ra, rb, rc, rd) \
+       xorl (((round) * 16) + 0 * 4)(CTX), ra ## d; \
+       xorl (((round) * 16) + 1 * 4)(CTX), rb ## d; \
+       xorl (((round) * 16) + 2 * 4)(CTX), rc ## d; \
+       xorl (((round) * 16) + 3 * 4)(CTX), rd ## d;
+
+#define do_encround(next_r) \
+       do16bit_shr(16, mov, RA, Esize, E0, RNA, E0, RND, RT0, RT1); \
+       do16bit(        mov, RA, Esize, E0, RNC, E0, RNB, RT0, RT1); \
+       movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+       roll $8, RNDd; \
+       xorl RNAd, RAd; \
+       roll $8, RNCd; \
+       roll $8, RNBd; \
+       roll $8, RAd; \
+       \
+       do16bit_shr(16, xor, RD, Esize, E0, RND, E0, RNC, RT0, RT1); \
+       do16bit(        xor, RD, Esize, E0, RNB, E0, RA,  RT0, RT1); \
+       movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+       roll $8, RNCd; \
+       xorl RNDd, RDd; \
+       roll $8, RNBd; \
+       roll $8, RAd; \
+       roll $8, RDd; \
+       \
+       do16bit_shr(16, xor, RC, Esize, E0, RNC, E0, RNB, RT0, RT1); \
+       do16bit(        xor, RC, Esize, E0, RA,  E0, RD,  RT0, RT1); \
+       movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+       roll $8, RNBd; \
+       xorl RNCd, RCd; \
+       roll $8, RAd; \
+       roll $8, RDd; \
+       roll $8, RCd; \
+       \
+       do16bit_shr(16, xor, RB, Esize, E0, RNB, E0, RA,  RT0, RT1); \
+       do16bit(        xor, RB, Esize, E0, RD,  E0, RC,  RT0, RT1); \
+       movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+       roll $8, RAd; \
+       xorl RNBd, RBd; \
+       roll $16, RDd; \
+       roll $24, RCd;
+
+#define do_lastencround(next_r) \
+       do16bit_shr(16, movzb, RA, Essize, Es0, RNA, Es0, RND, RT0, RT1); \
+       do16bit(        movzb, RA, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \
+       movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+       roll $8, RNDd; \
+       xorl RNAd, RAd; \
+       roll $8, RNCd; \
+       roll $8, RNBd; \
+       roll $8, RAd; \
+       \
+       last_do16bit_shr(16, xor, RD, Essize, Es0, RND, Es0, RNC, RT0, RT1); \
+       last_do16bit(        xor, RD, Essize, Es0, RNB, Es0, RA,  RT0, RT1); \
+       movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+       roll $8, RNCd; \
+       xorl RNDd, RDd; \
+       roll $8, RNBd; \
+       roll $8, RAd; \
+       roll $8, RDd; \
+       \
+       last_do16bit_shr(16, xor, RC, Essize, Es0, RNC, Es0, RNB, RT0, RT1); \
+       last_do16bit(        xor, RC, Essize, Es0, RA,  Es0, RD,  RT0, RT1); \
+       movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+       roll $8, RNBd; \
+       xorl RNCd, RCd; \
+       roll $8, RAd; \
+       roll $8, RDd; \
+       roll $8, RCd; \
+       \
+       last_do16bit_shr(16, xor, RB, Essize, Es0, RNB, Es0, RA,  RT0, RT1); \
+       last_do16bit(        xor, RB, Essize, Es0, RD,  Es0, RC,  RT0, RT1); \
+       movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+       roll $8, RAd; \
+       xorl RNBd, RBd; \
+       roll $16, RDd; \
+       roll $24, RCd;
+
+#define firstencround(round) \
+       addroundkey(round, RA, RB, RC, RD); \
+       do_encround((round) + 1);
+
+#define encround(round) \
+       do_encround((round) + 1);
+
+#define lastencround(round) \
+       do_lastencround((round) + 1);
+
+.align 8
+.globl _gcry_aes_amd64_encrypt_block
+ELF(.type   _gcry_aes_amd64_encrypt_block,@function;)
+
+_gcry_aes_amd64_encrypt_block:
+       /* input:
+        *      %rdi: keysched, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        *      %ecx: number of rounds.. 10, 12 or 14
+        *      %r8:  encryption tables
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_5
+
+       subq $(5 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(5 * 8);
+       movq %rsi, (0 * 8)(%rsp);
+       movl %ecx, (1 * 8)(%rsp);
+       movq %rbp, (2 * 8)(%rsp);
+       movq %rbx, (3 * 8)(%rsp);
+       movq %r12, (4 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 2 * 8);
+       CFI_REL_OFFSET(%rbx, 3 * 8);
+       CFI_REL_OFFSET(%r12, 4 * 8);
+
+       leaq (%r8), RTAB;
+
+       /* read input block */
+       movl 0 * 4(%rdx), RAd;
+       movl 1 * 4(%rdx), RBd;
+       movl 2 * 4(%rdx), RCd;
+       movl 3 * 4(%rdx), RDd;
+
+       firstencround(0);
+       encround(1);
+       encround(2);
+       encround(3);
+       encround(4);
+       encround(5);
+       encround(6);
+       encround(7);
+       encround(8);
+       cmpl $12, (1 * 8)(%rsp);
+       jnb .Lenc_not_128;
+       lastencround(9);
+
+.align 4
+.Lenc_done:
+       /* write output block */
+       movq (0 * 8)(%rsp), %rsi;
+       movl RAd, 0 * 4(%rsi);
+       movl RBd, 1 * 4(%rsi);
+       movl RCd, 2 * 4(%rsi);
+       movl RDd, 3 * 4(%rsi);
+
+       CFI_REMEMBER_STATE();
+
+       movq (4 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %rbp;
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%rbp);
+       addq $(5 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-5 * 8);
+
+       movl $(6 * 8), %eax;
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+
+       CFI_RESTORE_STATE();
+.align 4
+.Lenc_not_128:
+       je .Lenc_192
+
+       encround(9);
+       encround(10);
+       encround(11);
+       encround(12);
+       lastencround(13);
+
+       jmp .Lenc_done;
+
+.align 4
+.Lenc_192:
+       encround(9);
+       encround(10);
+       lastencround(11);
+
+       jmp .Lenc_done;
+       CFI_ENDPROC();
+ELF(.size _gcry_aes_amd64_encrypt_block,.-_gcry_aes_amd64_encrypt_block;)
+
+#define do_decround(next_r) \
+       do16bit_shr(16, mov, RA, Dsize, D0, RNA, D0, RNB, RT0, RT1); \
+       do16bit(        mov, RA, Dsize, D0, RNC, D0, RND, RT0, RT1); \
+       movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+       roll $8, RNBd; \
+       xorl RNAd, RAd; \
+       roll $8, RNCd; \
+       roll $8, RNDd; \
+       roll $8, RAd; \
+       \
+       do16bit_shr(16, xor, RB, Dsize, D0, RNB, D0, RNC, RT0, RT1); \
+       do16bit(        xor, RB, Dsize, D0, RND, D0, RA,  RT0, RT1); \
+       movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+       roll $8, RNCd; \
+       xorl RNBd, RBd; \
+       roll $8, RNDd; \
+       roll $8, RAd; \
+       roll $8, RBd; \
+       \
+       do16bit_shr(16, xor, RC, Dsize, D0, RNC, D0, RND, RT0, RT1); \
+       do16bit(        xor, RC, Dsize, D0, RA,  D0, RB,  RT0, RT1); \
+       movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+       roll $8, RNDd; \
+       xorl RNCd, RCd; \
+       roll $8, RAd; \
+       roll $8, RBd; \
+       roll $8, RCd; \
+       \
+       do16bit_shr(16, xor, RD, Dsize, D0, RND, D0, RA,  RT0, RT1); \
+       do16bit(        xor, RD, Dsize, D0, RB,  D0, RC,  RT0, RT1); \
+       movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+       roll $8, RAd; \
+       xorl RNDd, RDd; \
+       roll $16, RBd; \
+       roll $24, RCd;
+
+#define do_lastdecround(next_r) \
+       do16bit_shr(16, movzb, RA, Dssize, Ds0, RNA, Ds0, RNB, RT0, RT1); \
+       do16bit(        movzb, RA, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \
+       movl (((next_r) * 16) + 0 * 4)(CTX), RAd; \
+       roll $8, RNBd; \
+       xorl RNAd, RAd; \
+       roll $8, RNCd; \
+       roll $8, RNDd; \
+       roll $8, RAd; \
+       \
+       last_do16bit_shr(16, xor, RB, Dssize, Ds0, RNB, Ds0, RNC, RT0, RT1); \
+       last_do16bit(        xor, RB, Dssize, Ds0, RND, Ds0, RA,  RT0, RT1); \
+       movl (((next_r) * 16) + 1 * 4)(CTX), RBd; \
+       roll $8, RNCd; \
+       xorl RNBd, RBd; \
+       roll $8, RNDd; \
+       roll $8, RAd; \
+       roll $8, RBd; \
+       \
+       last_do16bit_shr(16, xor, RC, Dssize, Ds0, RNC, Ds0, RND, RT0, RT1); \
+       last_do16bit(        xor, RC, Dssize, Ds0, RA,  Ds0, RB,  RT0, RT1); \
+       movl (((next_r) * 16) + 2 * 4)(CTX), RCd; \
+       roll $8, RNDd; \
+       xorl RNCd, RCd; \
+       roll $8, RAd; \
+       roll $8, RBd; \
+       roll $8, RCd; \
+       \
+       last_do16bit_shr(16, xor, RD, Dssize, Ds0, RND, Ds0, RA,  RT0, RT1); \
+       last_do16bit(        xor, RD, Dssize, Ds0, RB,  Ds0, RC,  RT0, RT1); \
+       movl (((next_r) * 16) + 3 * 4)(CTX), RDd; \
+       roll $8, RAd; \
+       xorl RNDd, RDd; \
+       roll $16, RBd; \
+       roll $24, RCd;
+
+#define firstdecround(round) \
+       addroundkey((round + 1), RA, RB, RC, RD); \
+       do_decround(round);
+
+#define decround(round) \
+       do_decround(round);
+
+#define lastdecround(round) \
+       do_lastdecround(round);
+
+.align 8
+.globl _gcry_aes_amd64_decrypt_block
+ELF(.type   _gcry_aes_amd64_decrypt_block,@function;)
+
+_gcry_aes_amd64_decrypt_block:
+       /* input:
+        *      %rdi: keysched, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        *      %ecx: number of rounds.. 10, 12 or 14
+        *      %r8:  decryption tables
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_5
+
+       subq $(5 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(5 * 8);
+       movq %rsi, (0 * 8)(%rsp);
+       movl %ecx, (1 * 8)(%rsp);
+       movq %rbp, (2 * 8)(%rsp);
+       movq %rbx, (3 * 8)(%rsp);
+       movq %r12, (4 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 2 * 8);
+       CFI_REL_OFFSET(%rbx, 3 * 8);
+       CFI_REL_OFFSET(%r12, 4 * 8);
+
+       leaq (%r8), RTAB;
+
+       /* read input block */
+       movl 0 * 4(%rdx), RAd;
+       movl 1 * 4(%rdx), RBd;
+       movl 2 * 4(%rdx), RCd;
+       movl 3 * 4(%rdx), RDd;
+
+       cmpl $12, (1 * 8)(%rsp);
+       jnb .Ldec_256;
+
+       firstdecround(9);
+.align 4
+.Ldec_tail:
+       decround(8);
+       decround(7);
+       decround(6);
+       decround(5);
+       decround(4);
+       decround(3);
+       decround(2);
+       decround(1);
+       lastdecround(0);
+
+       /* write output block */
+       movq (0 * 8)(%rsp), %rsi;
+       movl RAd, 0 * 4(%rsi);
+       movl RBd, 1 * 4(%rsi);
+       movl RCd, 2 * 4(%rsi);
+       movl RDd, 3 * 4(%rsi);
+
+       CFI_REMEMBER_STATE();
+
+       movq (4 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %rbp;
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%rbp);
+       addq $(5 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-5 * 8);
+
+       movl $(6 * 8), %eax;
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+
+       CFI_RESTORE_STATE();
+.align 4
+.Ldec_256:
+       je .Ldec_192;
+
+       firstdecround(13);
+       decround(12);
+       decround(11);
+       decround(10);
+       decround(9);
+
+       jmp .Ldec_tail;
+
+.align 4
+.Ldec_192:
+       firstdecround(11);
+       decround(10);
+       decround(9);
+
+       jmp .Ldec_tail;
+       CFI_ENDPROC();
+ELF(.size _gcry_aes_amd64_decrypt_block,.-_gcry_aes_amd64_decrypt_block;)
+
+#endif /*USE_AES*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-arm.S 
b/grub-core/lib/libgcrypt/cipher/rijndael-arm.S
new file mode 100644
index 000000000..e680c817b
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-arm.S
@@ -0,0 +1,581 @@
+/* rijndael-arm.S  -  ARM assembly implementation of AES cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* register macros */
+#define CTX    %r0
+#define RTAB   %lr
+#define RMASK  %ip
+
+#define RA     %r4
+#define RB     %r5
+#define RC     %r6
+#define RD     %r7
+
+#define RNA    %r8
+#define RNB    %r9
+#define RNC    %r10
+#define RND    %r11
+
+#define RT0    %r1
+#define RT1    %r2
+#define RT2    %r3
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+       ldrb rout, [rsrc, #((offs) + 0)]; \
+       ldrb rtmp, [rsrc, #((offs) + 1)]; \
+       orr rout, rout, rtmp, lsl #8; \
+       ldrb rtmp, [rsrc, #((offs) + 2)]; \
+       orr rout, rout, rtmp, lsl #16; \
+       ldrb rtmp, [rsrc, #((offs) + 3)]; \
+       orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+       mov rtmp0, rin, lsr #8; \
+       strb rin, [rdst, #((offs) + 0)]; \
+       mov rtmp1, rin, lsr #16; \
+       strb rtmp0, [rdst, #((offs) + 1)]; \
+       mov rtmp0, rin, lsr #24; \
+       strb rtmp1, [rdst, #((offs) + 2)]; \
+       strb rtmp0, [rdst, #((offs) + 3)];
+
+/***********************************************************************
+ * ARM assembly implementation of the AES cipher
+ ***********************************************************************/
+#define preload_first_key(round, ra) \
+       ldr ra, [CTX, #(((round) * 16) + 0 * 4)];
+
+#define dummy(round, ra) /* nothing */
+
+#define addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       ldm CTX, {rna, rnb, rnc, rnd}; \
+       eor ra, rna; \
+       eor rb, rnb; \
+       eor rc, rnc; \
+       preload_key(1, rna); \
+       eor rd, rnd;
+
+#define do_encround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+       \
+       and RT0, RMASK, ra, lsl#2; \
+       ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+       and RT1, RMASK, ra, lsr#(8 - 2); \
+       ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+       and RT2, RMASK, ra, lsr#(16 - 2); \
+       ldr RT0, [RTAB, RT0]; \
+       and ra,  RMASK, ra, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rna, rna, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rd, lsl#2; \
+       ldr ra,  [RTAB, ra]; \
+       \
+       eor rnd, rnd, RT1, ror #24; \
+       and RT1, RMASK, rd, lsr#(8 - 2); \
+       eor rnc, rnc, RT2, ror #16; \
+       and RT2, RMASK, rd, lsr#(16 - 2); \
+       eor rnb, rnb, ra, ror #8; \
+       ldr RT0, [RTAB, RT0]; \
+       and rd,  RMASK, rd, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rnd, rnd, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rc, lsl#2; \
+       ldr rd,  [RTAB, rd]; \
+       \
+       eor rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#(8 - 2); \
+       eor rnb, rnb, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#(16 - 2); \
+       eor rna, rna, rd, ror #8; \
+       ldr RT0, [RTAB, RT0]; \
+       and rc,  RMASK, rc, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rnc, rnc, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rb, lsl#2; \
+       ldr rc,  [RTAB, rc]; \
+       \
+       eor rnb, rnb, RT1, ror #24; \
+       and RT1, RMASK, rb, lsr#(8 - 2); \
+       eor rna, rna, RT2, ror #16; \
+       and RT2, RMASK, rb, lsr#(16 - 2); \
+       eor rnd, rnd, rc, ror #8; \
+       ldr RT0, [RTAB, RT0]; \
+       and rb,  RMASK, rb, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rnb, rnb, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       eor rna, rna, RT1, ror #24; \
+       ldr rb,  [RTAB, rb]; \
+       \
+       eor rnd, rnd, RT2, ror #16; \
+       preload_key((next_r) + 1, ra); \
+       eor rnc, rnc, rb, ror #8;
+
+#define do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       and RT0, RMASK, ra, lsl#2; \
+       and RT1, RMASK, ra, lsr#(8 - 2); \
+       and RT2, RMASK, ra, lsr#(16 - 2); \
+       ldrb rna, [RTAB, RT0]; \
+       and ra,  RMASK, ra, lsr#(24 - 2); \
+       ldrb rnd, [RTAB, RT1]; \
+       and RT0, RMASK, rd, lsl#2; \
+       ldrb rnc, [RTAB, RT2]; \
+       mov rnd, rnd, ror #24; \
+       ldrb rnb, [RTAB, ra]; \
+       and RT1, RMASK, rd, lsr#(8 - 2); \
+       mov rnc, rnc, ror #16; \
+       and RT2, RMASK, rd, lsr#(16 - 2); \
+       mov rnb, rnb, ror #8; \
+       ldrb RT0, [RTAB, RT0]; \
+       and rd,  RMASK, rd, lsr#(24 - 2); \
+       ldrb RT1, [RTAB, RT1]; \
+       \
+       orr rnd, rnd, RT0; \
+       ldrb RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rc, lsl#2; \
+       ldrb rd,  [RTAB, rd]; \
+       orr rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#(8 - 2); \
+       orr rnb, rnb, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#(16 - 2); \
+       orr rna, rna, rd, ror #8; \
+       ldrb RT0, [RTAB, RT0]; \
+       and rc,  RMASK, rc, lsr#(24 - 2); \
+       ldrb RT1, [RTAB, RT1]; \
+       \
+       orr rnc, rnc, RT0; \
+       ldrb RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rb, lsl#2; \
+       ldrb rc,  [RTAB, rc]; \
+       orr rnb, rnb, RT1, ror #24; \
+       and RT1, RMASK, rb, lsr#(8 - 2); \
+       orr rna, rna, RT2, ror #16; \
+       ldrb RT0, [RTAB, RT0]; \
+       and RT2, RMASK, rb, lsr#(16 - 2); \
+       ldrb RT1, [RTAB, RT1]; \
+       orr rnd, rnd, rc, ror #8; \
+       ldrb RT2, [RTAB, RT2]; \
+       and rb,  RMASK, rb, lsr#(24 - 2); \
+       ldrb rb,  [RTAB, rb]; \
+       \
+       orr rnb, rnb, RT0; \
+       orr rna, rna, RT1, ror #24; \
+       orr rnd, rnd, RT2, ror #16; \
+       orr rnc, rnc, rb, ror #8;
+
+#define firstencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       addroundkey(ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_first_key); \
+       do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, 
preload_first_key);
+
+#define encround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       do_encround((round) + 1, ra, rb, rc, rd, rna, rnb, rnc, rnd, 
preload_key);
+
+#define lastencround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       add CTX, #(((round) + 1) * 16); \
+       add RTAB, #1; \
+       do_lastencround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+       addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.align 3
+.globl _gcry_aes_arm_encrypt_block
+.type   _gcry_aes_arm_encrypt_block,%function;
+
+_gcry_aes_arm_encrypt_block:
+       /* input:
+        *      %r0: keysched, CTX
+        *      %r1: dst
+        *      %r2: src
+        *      %r3: number of rounds.. 10, 12 or 14
+        *      %st+0: encryption table
+        */
+       push {%r4-%r11, %ip, %lr};
+
+       /* read input block */
+
+       /* test if src is unaligned */
+       tst     %r2, #3;
+       beq     1f;
+
+       /* unaligned load */
+       ldr_unaligned_le(RA, %r2, 0, RNA);
+       ldr_unaligned_le(RB, %r2, 4, RNB);
+       ldr_unaligned_le(RC, %r2, 8, RNA);
+       ldr_unaligned_le(RD, %r2, 12, RNB);
+       b       2f;
+.ltorg
+1:
+       /* aligned load */
+       ldm     %r2, {RA, RB, RC, RD};
+#ifndef __ARMEL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+2:
+       ldr     RTAB, [%sp, #40];
+       sub     %sp, #16;
+
+       str     %r1, [%sp, #4];         /* dst */
+       mov     RMASK, #0xff;
+       str     %r3, [%sp, #8];         /* nrounds */
+       mov     RMASK, RMASK, lsl#2;    /* byte mask */
+
+       firstencround(0, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+       encround(1, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(2, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(3, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(4, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(5, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(6, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(7, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+
+       ldr     RT0, [%sp, #8];         /* nrounds */
+       cmp     RT0, #12;
+       bge     .Lenc_not_128;
+
+       encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+       lastencround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+.Lenc_done:
+       ldr     RT0, [%sp, #4];         /* dst */
+       add     %sp, #16;
+
+       /* store output block */
+
+       /* test if dst is unaligned */
+       tst     RT0, #3;
+       beq     1f;
+
+       /* unaligned store */
+       str_unaligned_le(RA, RT0, 0, RNA, RNB);
+       str_unaligned_le(RB, RT0, 4, RNA, RNB);
+       str_unaligned_le(RC, RT0, 8, RNA, RNB);
+       str_unaligned_le(RD, RT0, 12, RNA, RNB);
+       b       2f;
+.ltorg
+1:
+       /* aligned store */
+#ifndef __ARMEL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+       /* write output block */
+       stm     RT0, {RA, RB, RC, RD};
+2:
+
+       mov     r0, #(10 * 4);
+       pop {%r4-%r11, %ip, %pc};
+
+.ltorg
+.Lenc_not_128:
+       beq .Lenc_192
+
+       encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(12, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+       lastencround(13, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+       b .Lenc_done;
+
+.ltorg
+.Lenc_192:
+       encround(8, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       encround(9, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       encround(10, RA, RB, RC, RD, RNA, RNB, RNC, RND, dummy);
+       lastencround(11, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+       b .Lenc_done;
+.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
+
+#define addroundkey_dec(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       ldr rna, [CTX, #(((round) * 16) + 0 * 4)]; \
+       ldr rnb, [CTX, #(((round) * 16) + 1 * 4)]; \
+       eor ra, rna; \
+       ldr rnc, [CTX, #(((round) * 16) + 2 * 4)]; \
+       eor rb, rnb; \
+       ldr rnd, [CTX, #(((round) * 16) + 3 * 4)]; \
+       eor rc, rnc; \
+       preload_first_key((round) - 1, rna); \
+       eor rd, rnd;
+
+#define do_decround(next_r, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       ldr rnb, [CTX, #(((next_r) * 16) + 1 * 4)]; \
+       \
+       and RT0, RMASK, ra, lsl#2; \
+       ldr rnc, [CTX, #(((next_r) * 16) + 2 * 4)]; \
+       and RT1, RMASK, ra, lsr#(8 - 2); \
+       ldr rnd, [CTX, #(((next_r) * 16) + 3 * 4)]; \
+       and RT2, RMASK, ra, lsr#(16 - 2); \
+       ldr RT0, [RTAB, RT0]; \
+       and ra,  RMASK, ra, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rna, rna, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rb, lsl#2; \
+       ldr ra,  [RTAB, ra]; \
+       \
+       eor rnb, rnb, RT1, ror #24; \
+       and RT1, RMASK, rb, lsr#(8 - 2); \
+       eor rnc, rnc, RT2, ror #16; \
+       and RT2, RMASK, rb, lsr#(16 - 2); \
+       eor rnd, rnd, ra, ror #8; \
+       ldr RT0, [RTAB, RT0]; \
+       and rb,  RMASK, rb, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rnb, rnb, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rc, lsl#2; \
+       ldr rb,  [RTAB, rb]; \
+       \
+       eor rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#(8 - 2); \
+       eor rnd, rnd, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#(16 - 2); \
+       eor rna, rna, rb, ror #8; \
+       ldr RT0, [RTAB, RT0]; \
+       and rc,  RMASK, rc, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rnc, rnc, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rd, lsl#2; \
+       ldr rc,  [RTAB, rc]; \
+       \
+       eor rnd, rnd, RT1, ror #24; \
+       and RT1, RMASK, rd, lsr#(8 - 2); \
+       eor rna, rna, RT2, ror #16; \
+       and RT2, RMASK, rd, lsr#(16 - 2); \
+       eor rnb, rnb, rc, ror #8; \
+       ldr RT0, [RTAB, RT0]; \
+       and rd,  RMASK, rd, lsr#(24 - 2); \
+       \
+       ldr RT1, [RTAB, RT1]; \
+       eor rnd, rnd, RT0; \
+       ldr RT2, [RTAB, RT2]; \
+       eor rna, rna, RT1, ror #24; \
+       ldr rd,  [RTAB, rd]; \
+       \
+       eor rnb, rnb, RT2, ror #16; \
+       preload_key((next_r) - 1, ra); \
+       eor rnc, rnc, rd, ror #8;
+
+#define do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       and RT0, RMASK, ra; \
+       and RT1, RMASK, ra, lsr#8; \
+       and RT2, RMASK, ra, lsr#16; \
+       ldrb rna, [RTAB, RT0]; \
+       mov ra,  ra, lsr#24; \
+       ldrb rnb, [RTAB, RT1]; \
+       and RT0, RMASK, rb; \
+       ldrb rnc, [RTAB, RT2]; \
+       mov rnb, rnb, ror #24; \
+       ldrb rnd, [RTAB, ra]; \
+       and RT1, RMASK, rb, lsr#8; \
+       mov rnc, rnc, ror #16; \
+       and RT2, RMASK, rb, lsr#16; \
+       mov rnd, rnd, ror #8; \
+       ldrb RT0, [RTAB, RT0]; \
+       mov rb,  rb, lsr#24; \
+       ldrb RT1, [RTAB, RT1]; \
+       \
+       orr rnb, rnb, RT0; \
+       ldrb RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rc; \
+       ldrb rb,  [RTAB, rb]; \
+       orr rnc, rnc, RT1, ror #24; \
+       and RT1, RMASK, rc, lsr#8; \
+       orr rnd, rnd, RT2, ror #16; \
+       and RT2, RMASK, rc, lsr#16; \
+       orr rna, rna, rb, ror #8; \
+       ldrb RT0, [RTAB, RT0]; \
+       mov rc,  rc, lsr#24; \
+       ldrb RT1, [RTAB, RT1]; \
+       \
+       orr rnc, rnc, RT0; \
+       ldrb RT2, [RTAB, RT2]; \
+       and RT0, RMASK, rd; \
+       ldrb rc,  [RTAB, rc]; \
+       orr rnd, rnd, RT1, ror #24; \
+       and RT1, RMASK, rd, lsr#8; \
+       orr rna, rna, RT2, ror #16; \
+       ldrb RT0, [RTAB, RT0]; \
+       and RT2, RMASK, rd, lsr#16; \
+       ldrb RT1, [RTAB, RT1]; \
+       orr rnb, rnb, rc, ror #8; \
+       ldrb RT2, [RTAB, RT2]; \
+       mov rd,  rd, lsr#24; \
+       ldrb rd,  [RTAB, rd]; \
+       \
+       orr rnd, rnd, RT0; \
+       orr rna, rna, RT1, ror #24; \
+       orr rnb, rnb, RT2, ror #16; \
+       orr rnc, rnc, rd, ror #8;
+
+#define firstdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       addroundkey_dec(((round) + 1), ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+       do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, 
preload_first_key);
+
+#define decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key) \
+       do_decround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd, preload_key);
+
+#define set_last_round_rmask(_, __) \
+       mov RMASK, #0xff;
+
+#define lastdecround(round, ra, rb, rc, rd, rna, rnb, rnc, rnd) \
+       add RTAB, #(4 * 256); \
+       do_lastdecround(ra, rb, rc, rd, rna, rnb, rnc, rnd); \
+       addroundkey(rna, rnb, rnc, rnd, ra, rb, rc, rd, dummy);
+
+.align 3
+.globl _gcry_aes_arm_decrypt_block
+.type   _gcry_aes_arm_decrypt_block,%function;
+
+_gcry_aes_arm_decrypt_block:
+       /* input:
+        *      %r0: keysched, CTX
+        *      %r1: dst
+        *      %r2: src
+        *      %r3: number of rounds.. 10, 12 or 14
+        *      %st+0: decryption table
+        */
+       push {%r4-%r11, %ip, %lr};
+
+       /* read input block */
+
+       /* test if src is unaligned */
+       tst     %r2, #3;
+       beq     1f;
+
+       /* unaligned load */
+       ldr_unaligned_le(RA, %r2, 0, RNA);
+       ldr_unaligned_le(RB, %r2, 4, RNB);
+       ldr_unaligned_le(RC, %r2, 8, RNA);
+       ldr_unaligned_le(RD, %r2, 12, RNB);
+       b       2f;
+.ltorg
+1:
+       /* aligned load */
+       ldm     %r2, {RA, RB, RC, RD};
+#ifndef __ARMEL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+2:
+       ldr     RTAB, [%sp, #40];
+       sub     %sp, #16;
+
+       mov     RMASK, #0xff;
+       str     %r1, [%sp, #4];         /* dst */
+       mov     RMASK, RMASK, lsl#2;    /* byte mask */
+
+       cmp     %r3, #12;
+       bge     .Ldec_256;
+
+       firstdecround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+.Ldec_tail:
+       decround(8, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(7, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(6, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(5, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(4, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(3, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(2, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(1, RA, RB, RC, RD, RNA, RNB, RNC, RND, set_last_round_rmask);
+       lastdecround(0, RNA, RNB, RNC, RND, RA, RB, RC, RD);
+
+       ldr     RT0, [%sp, #4];         /* dst */
+       add     %sp, #16;
+
+       /* store output block */
+
+       /* test if dst is unaligned */
+       tst     RT0, #3;
+       beq     1f;
+
+       /* unaligned store */
+       str_unaligned_le(RA, RT0, 0, RNA, RNB);
+       str_unaligned_le(RB, RT0, 4, RNA, RNB);
+       str_unaligned_le(RC, RT0, 8, RNA, RNB);
+       str_unaligned_le(RD, RT0, 12, RNA, RNB);
+       b       2f;
+.ltorg
+1:
+       /* aligned store */
+#ifndef __ARMEL__
+       rev     RA, RA;
+       rev     RB, RB;
+       rev     RC, RC;
+       rev     RD, RD;
+#endif
+       /* write output block */
+       stm     RT0, {RA, RB, RC, RD};
+2:
+       mov     r0, #(10 * 4);
+       pop {%r4-%r11, %ip, %pc};
+
+.ltorg
+.Ldec_256:
+       beq .Ldec_192;
+
+       firstdecround(13, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+       decround(12, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+       decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+       b .Ldec_tail;
+
+.ltorg
+.Ldec_192:
+       firstdecround(11, RA, RB, RC, RD, RNA, RNB, RNC, RND);
+       decround(10, RNA, RNB, RNC, RND, RA, RB, RC, RD, preload_first_key);
+       decround(9, RA, RB, RC, RD, RNA, RNB, RNC, RND, preload_first_key);
+
+       b .Ldec_tail;
+.size _gcry_aes_arm_encrypt_block,.-_gcry_aes_arm_encrypt_block;
+
+#endif /*HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS*/
+#endif /*__ARMEL__ */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S 
b/grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S
new file mode 100644
index 000000000..1eafa93ed
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S
@@ -0,0 +1,1988 @@
+/* rijndael-armv8-aarch32-ce.S - ARMv8/CE accelerated AES
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* AES macros */
+
+#define aes_preload_keys(keysched, rekeysched) \
+        vldmia   keysched!, {q5-q7}; \
+        mov      rekeysched, keysched; \
+        vldmialo keysched!, {q8-q15}; /* 128-bit */ \
+        addeq    keysched, #(2*16); \
+        vldmiaeq keysched!, {q10-q15}; /* 192-bit */ \
+        addhi    keysched, #(4*16); \
+        vldmiahi keysched!, {q12-q15}; /* 256-bit */ \
+
+#define do_aes_one128(ed, mcimc, qo, qb) \
+        aes##ed.8    qb, q5; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q6; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q7; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q8; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q9; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q10; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q11; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q12; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q13; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q14; \
+        veor         qo, qb, q15;
+
+#define do_aes_one128re(ed, mcimc, qo, qb, keysched, rekeysched) \
+        vldm         rekeysched, {q8-q9}; \
+        do_aes_one128(ed, mcimc, qo, qb);
+
+#define do_aes_one192(ed, mcimc, qo, qb, keysched, rekeysched) \
+        vldm         rekeysched!, {q8}; \
+        aes##ed.8    qb, q5; \
+        aes##mcimc.8 qb, qb; \
+        vldm         rekeysched, {q9}; \
+        aes##ed.8    qb, q6; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q7; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q8; \
+        aes##mcimc.8 qb, qb; \
+        vldmia       keysched!, {q8}; \
+        aes##ed.8    qb, q9; \
+        aes##mcimc.8 qb, qb; \
+        sub          rekeysched, #(1*16); \
+        aes##ed.8    qb, q10; \
+        aes##mcimc.8 qb, qb; \
+        vldm         keysched, {q9}; \
+        aes##ed.8    qb, q11; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q12; \
+        aes##mcimc.8 qb, qb; \
+        sub          keysched, #16; \
+        aes##ed.8    qb, q13; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q14; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q15; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q8; \
+        veor         qo, qb, q9; \
+
+#define do_aes_one256(ed, mcimc, qo, qb, keysched, rekeysched) \
+        vldmia       rekeysched!, {q8}; \
+        aes##ed.8    qb, q5; \
+        aes##mcimc.8 qb, qb; \
+        vldmia       rekeysched!, {q9}; \
+        aes##ed.8    qb, q6; \
+        aes##mcimc.8 qb, qb; \
+        vldmia       rekeysched!, {q10}; \
+        aes##ed.8    qb, q7; \
+        aes##mcimc.8 qb, qb; \
+        vldm         rekeysched, {q11}; \
+        aes##ed.8    qb, q8; \
+        aes##mcimc.8 qb, qb; \
+        vldmia       keysched!, {q8}; \
+        aes##ed.8    qb, q9; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q10; \
+        aes##mcimc.8 qb, qb; \
+        vldmia       keysched!, {q9}; \
+        aes##ed.8    qb, q11; \
+        aes##mcimc.8 qb, qb; \
+        sub          rekeysched, #(3*16); \
+        aes##ed.8    qb, q12; \
+        aes##mcimc.8 qb, qb; \
+        vldmia       keysched!, {q10}; \
+        aes##ed.8    qb, q13; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q14; \
+        aes##mcimc.8 qb, qb; \
+        vldm         keysched, {q11}; \
+        aes##ed.8    qb, q15; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q8; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q9; \
+        aes##mcimc.8 qb, qb; \
+        aes##ed.8    qb, q10; \
+        veor         qo, qb, q11; \
+        sub          keysched, #(3*16); \
+
+#define aes_round_4(ed, mcimc, b0, b1, b2, b3, key) \
+        aes##ed.8    b0, key; \
+        aes##mcimc.8 b0, b0; \
+          aes##ed.8    b1, key; \
+          aes##mcimc.8 b1, b1; \
+            aes##ed.8    b2, key; \
+            aes##mcimc.8 b2, b2; \
+              aes##ed.8    b3, key; \
+              aes##mcimc.8 b3, b3;
+
+#define do_aes_4_128(ed, mcimc, b0, b1, b2, b3) \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \
+        aes##ed.8    b0, q14; \
+        veor         b0, b0, q15; \
+        aes##ed.8    b1, q14; \
+        veor         b1, b1, q15; \
+        aes##ed.8    b2, q14; \
+        veor         b2, b2, q15; \
+        aes##ed.8    b3, q14; \
+        veor         b3, b3, q15;
+
+#define do_aes_4_128re(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \
+        vldm         rekeysched, {q8-q9}; \
+        do_aes_4_128(ed, mcimc, b0, b1, b2, b3);
+
+#define do_aes_4_192(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \
+        vldm         rekeysched!, {q8}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \
+        vldm         rekeysched, {q9}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+        vldmia       keysched!, {q8}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+        sub          rekeysched, #(1*16); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \
+        vldm         keysched, {q9}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \
+        sub          keysched, #16; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q14); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q15); \
+        aes##ed.8    b0, q8; \
+        veor         b0, b0, q9; \
+        aes##ed.8    b1, q8; \
+        veor         b1, b1, q9; \
+        aes##ed.8    b2, q8; \
+        veor         b2, b2, q9; \
+        aes##ed.8    b3, q8; \
+        veor         b3, b3, q9;
+
+#define do_aes_4_256(ed, mcimc, b0, b1, b2, b3, keysched, rekeysched) \
+        vldmia       rekeysched!, {q8}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q5); \
+        vldmia       rekeysched!, {q9}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q6); \
+        vldmia       rekeysched!, {q10}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q7); \
+        vldm         rekeysched, {q11}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+        vldmia       keysched!, {q8}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q10); \
+        vldmia       keysched!, {q9}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q11); \
+        sub          rekeysched, #(3*16); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q12); \
+        vldmia       keysched!, {q10}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q13); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q14); \
+        vldm         keysched, {q11}; \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q15); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q8); \
+        aes_round_4(ed, mcimc, b0, b1, b2, b3, q9); \
+        sub          keysched, #(3*16); \
+        aes##ed.8    b0, q10; \
+        veor         b0, b0, q11; \
+        aes##ed.8    b1, q10; \
+        veor         b1, b1, q11; \
+        aes##ed.8    b2, q10; \
+        veor         b2, b2, q11; \
+        aes##ed.8    b3, q10; \
+        veor         b3, b3, q11;
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
+
+/*
+ * unsigned int _gcry_aes_enc_armv8_ce(void *keysched, byte *dst,
+ *                                     const byte *src,
+ *                                     unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_enc_armv8_ce
+.type  _gcry_aes_enc_armv8_ce,%function;
+_gcry_aes_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: dst
+   *    r2: src
+   *    r3: nrounds
+   */
+
+  vldmia r0!, {q1-q3} /* load 3 round keys */
+
+  cmp r3, #12
+
+  vld1.8 {q0}, [r2]
+
+  bhi .Lenc1_256
+  beq .Lenc1_192
+
+.Lenc1_128:
+
+.Lenc1_tail:
+  vldmia r0, {q8-q15} /* load 8 round keys */
+
+  aese.8  q0, q1
+  aesmc.8 q0, q0
+  CLEAR_REG(q1)
+
+  aese.8  q0, q2
+  aesmc.8 q0, q0
+  CLEAR_REG(q2)
+
+  aese.8  q0, q3
+  aesmc.8 q0, q0
+  CLEAR_REG(q3)
+
+  aese.8  q0, q8
+  aesmc.8 q0, q0
+  CLEAR_REG(q8)
+
+  aese.8  q0, q9
+  aesmc.8 q0, q0
+  CLEAR_REG(q9)
+
+  aese.8  q0, q10
+  aesmc.8 q0, q0
+  CLEAR_REG(q10)
+
+  aese.8  q0, q11
+  aesmc.8 q0, q0
+  CLEAR_REG(q11)
+
+  aese.8  q0, q12
+  aesmc.8 q0, q0
+  CLEAR_REG(q12)
+
+  aese.8  q0, q13
+  aesmc.8 q0, q0
+  CLEAR_REG(q13)
+
+  aese.8  q0, q14
+  veor    q0, q15
+  CLEAR_REG(q14)
+  CLEAR_REG(q15)
+
+  vst1.8 {q0}, [r1]
+  CLEAR_REG(q0)
+
+  mov r0, #0
+  bx lr
+
+.Lenc1_192:
+  aese.8  q0, q1
+  aesmc.8 q0, q0
+  vmov q1, q3
+
+  aese.8  q0, q2
+  aesmc.8 q0, q0
+  vldm r0!, {q2-q3} /* load 3 round keys */
+
+  b .Lenc1_tail
+
+.Lenc1_256:
+  vldm r0!, {q15}   /* load 1 round key */
+  aese.8  q0, q1
+  aesmc.8 q0, q0
+
+  aese.8  q0, q2
+  aesmc.8 q0, q0
+
+  aese.8  q0, q3
+  aesmc.8 q0, q0
+  vldm r0!, {q1-q3} /* load 3 round keys */
+
+  aese.8  q0, q15
+  aesmc.8 q0, q0
+
+  b .Lenc1_tail
+.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;
+
+
+/*
+ * unsigned int _gcry_aes_dec_armv8_ce(void *keysched, byte *dst,
+ *                                     const byte *src,
+ *                                     unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_dec_armv8_ce
+.type  _gcry_aes_dec_armv8_ce,%function;
+_gcry_aes_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: dst
+   *    r2: src
+   *    r3: nrounds
+   */
+
+  vldmia r0!, {q1-q3} /* load 3 round keys */
+
+  cmp r3, #12
+
+  vld1.8 {q0}, [r2]
+
+  bhi .Ldec1_256
+  beq .Ldec1_192
+
+.Ldec1_128:
+
+.Ldec1_tail:
+  vldmia r0, {q8-q15} /* load 8 round keys */
+
+  aesd.8   q0, q1
+  aesimc.8 q0, q0
+  CLEAR_REG(q1)
+
+  aesd.8   q0, q2
+  aesimc.8 q0, q0
+  CLEAR_REG(q2)
+
+  aesd.8   q0, q3
+  aesimc.8 q0, q0
+  CLEAR_REG(q3)
+
+  aesd.8   q0, q8
+  aesimc.8 q0, q0
+  CLEAR_REG(q8)
+
+  aesd.8   q0, q9
+  aesimc.8 q0, q0
+  CLEAR_REG(q9)
+
+  aesd.8   q0, q10
+  aesimc.8 q0, q0
+  CLEAR_REG(q10)
+
+  aesd.8   q0, q11
+  aesimc.8 q0, q0
+  CLEAR_REG(q11)
+
+  aesd.8   q0, q12
+  aesimc.8 q0, q0
+  CLEAR_REG(q12)
+
+  aesd.8   q0, q13
+  aesimc.8 q0, q0
+  CLEAR_REG(q13)
+
+  aesd.8   q0, q14
+  veor     q0, q15
+  CLEAR_REG(q14)
+  CLEAR_REG(q15)
+
+  vst1.8 {q0}, [r1]
+  CLEAR_REG(q0)
+
+  mov r0, #0
+  bx lr
+
+.Ldec1_192:
+  aesd.8   q0, q1
+  aesimc.8 q0, q0
+  vmov q1, q3
+
+  aesd.8   q0, q2
+  aesimc.8 q0, q0
+  vldm r0!, {q2-q3} /* load 3 round keys */
+
+  b .Ldec1_tail
+
+.Ldec1_256:
+  vldm r0!, {q15}   /* load 1 round key */
+  aesd.8   q0, q1
+  aesimc.8 q0, q0
+
+  aesd.8   q0, q2
+  aesimc.8 q0, q0
+
+  aesd.8  q0, q3
+  aesimc.8 q0, q0
+  vldm r0!, {q1-q3} /* load 3 round keys */
+
+  aesd.8   q0, q15
+  aesimc.8 q0, q0
+
+  b .Ldec1_tail
+.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cbc_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, size_t nblocks,
+ *                                  int cbc_mac, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_enc_armv8_ce
+.type  _gcry_aes_cbc_enc_armv8_ce,%function;
+_gcry_aes_cbc_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: cbc_mac => r5
+   *    %st+8: nrounds => r6
+   */
+
+  push {r4-r6,lr} /* 4*4 = 16b */
+  ldr r4, [sp, #(16+0)]
+  ldr r5, [sp, #(16+4)]
+  cmp r4, #0
+  ldr r6, [sp, #(16+8)]
+  beq .Lcbc_enc_skip
+  cmp r5, #0
+  vpush {q4-q7}
+  moveq r5, #16
+  movne r5, #0
+
+  cmp r6, #12
+  vld1.8 {q1}, [r3] /* load IV */
+
+  aes_preload_keys(r0, lr);
+
+  beq .Lcbc_enc_loop192
+  bhi .Lcbc_enc_loop256
+
+#define CBC_ENC(bits, ...) \
+  .Lcbc_enc_loop##bits: \
+    vld1.8 {q0}, [r2]!; /* load plaintext */ \
+    veor q1, q0, q1; \
+    subs r4, r4, #1; \
+    \
+    do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+    \
+    vst1.8 {q1}, [r1], r5; /* store ciphertext */ \
+    \
+    bne .Lcbc_enc_loop##bits; \
+    b .Lcbc_enc_done;
+
+  CBC_ENC(128)
+  CBC_ENC(192, r0, lr)
+  CBC_ENC(256, r0, lr)
+
+#undef CBC_ENC
+
+.Lcbc_enc_done:
+  vst1.8 {q1}, [r3] /* store IV */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  vpop {q4-q7}
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lcbc_enc_skip:
+  pop {r4-r6,pc}
+.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_dec_armv8_ce
+.type  _gcry_aes_cbc_dec_armv8_ce,%function;
+_gcry_aes_cbc_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  push {r4-r6,lr} /* 4*4 = 16b */
+  ldr r4, [sp, #(16+0)]
+  ldr r5, [sp, #(16+4)]
+  cmp r4, #0
+  beq .Lcbc_dec_skip
+  vpush {q4-q7}
+
+  cmp r5, #12
+  vld1.8 {q0}, [r3] /* load IV */
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lcbc_dec_entry_192
+  bhi .Lcbc_dec_entry_256
+
+#define CBC_DEC(bits, ...) \
+  .Lcbc_dec_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lcbc_dec_loop_##bits; \
+    \
+  .Lcbc_dec_loop4_##bits: \
+    \
+    vld1.8 {q1-q2}, [r2]!; /* load ciphertext */ \
+    sub r4, r4, #4; \
+    vld1.8 {q3-q4}, [r2]; /* load ciphertext */ \
+    cmp r4, #4; \
+    sub r2, #32; \
+    \
+    do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q0; \
+    vld1.8 {q0}, [r2]!; /* load next IV */ \
+    veor q2, q2, q0; \
+    vld1.8 {q0}, [r2]!; /* load next IV */ \
+    vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+    veor q3, q3, q0; \
+    vld1.8 {q0}, [r2]!; /* load next IV */ \
+    veor q4, q4, q0; \
+    vld1.8 {q0}, [r2]!; /* load next IV */ \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lcbc_dec_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lcbc_dec_done; \
+    \
+  .Lcbc_dec_loop_##bits: \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    subs r4, r4, #1; \
+    vmov q2, q1; \
+    \
+    do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q0; \
+    vmov q0, q2; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lcbc_dec_loop_##bits; \
+    b .Lcbc_dec_done;
+
+  CBC_DEC(128)
+  CBC_DEC(192, r0, r6)
+  CBC_DEC(256, r0, r6)
+
+#undef CBC_DEC
+
+.Lcbc_dec_done:
+  vst1.8 {q0}, [r3] /* store IV */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  vpop {q4-q7}
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lcbc_dec_skip:
+  pop {r4-r6,pc}
+.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cfb_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_enc_armv8_ce
+.type  _gcry_aes_cfb_enc_armv8_ce,%function;
+_gcry_aes_cfb_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  push {r4-r6,lr} /* 4*4 = 16b */
+  ldr r4, [sp, #(16+0)]
+  ldr r5, [sp, #(16+4)]
+  cmp r4, #0
+  beq .Lcfb_enc_skip
+  vpush {q4-q7}
+
+  cmp r5, #12
+  vld1.8 {q0}, [r3] /* load IV */
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lcfb_enc_entry_192
+  bhi .Lcfb_enc_entry_256
+
+#define CFB_ENC(bits, ...) \
+  .Lcfb_enc_entry_##bits: \
+  .Lcfb_enc_loop_##bits: \
+    vld1.8 {q1}, [r2]!; /* load plaintext */ \
+    subs r4, r4, #1; \
+    \
+    do_aes_one##bits(e, mc, q0, q0, ##__VA_ARGS__); \
+    \
+    veor q0, q1, q0; \
+    vst1.8 {q0}, [r1]!; /* store ciphertext */ \
+    \
+    bne .Lcfb_enc_loop_##bits; \
+    b .Lcfb_enc_done;
+
+  CFB_ENC(128)
+  CFB_ENC(192, r0, r6)
+  CFB_ENC(256, r0, r6)
+
+#undef CFB_ENC
+
+.Lcfb_enc_done:
+  vst1.8 {q0}, [r3] /* store IV */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  vpop {q4-q7}
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lcfb_enc_skip:
+  pop {r4-r6,pc}
+.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_cfb_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_dec_armv8_ce
+.type  _gcry_aes_cfb_dec_armv8_ce,%function;
+_gcry_aes_cfb_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  push {r4-r6,lr} /* 4*4 = 16b */
+  ldr r4, [sp, #(16+0)]
+  ldr r5, [sp, #(16+4)]
+  cmp r4, #0
+  beq .Lcfb_dec_skip
+  vpush {q4-q7}
+
+  cmp r5, #12
+  vld1.8 {q0}, [r3] /* load IV */
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lcfb_dec_entry_192
+  bhi .Lcfb_dec_entry_256
+
+#define CFB_DEC(bits, ...) \
+  .Lcfb_dec_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lcfb_dec_loop_##bits; \
+    \
+  .Lcfb_dec_loop4_##bits: \
+    \
+    vld1.8 {q2-q3}, [r2]!; /* load ciphertext */ \
+    vmov q1, q0; \
+    sub r4, r4, #4; \
+    vld1.8 {q4}, [r2]; /* load ciphertext */ \
+    sub r2, #32; \
+    cmp r4, #4; \
+    \
+    do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    veor q1, q1, q0; \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    veor q2, q2, q0; \
+    vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+    vld1.8 {q0}, [r2]!; \
+    veor q3, q3, q0; \
+    vld1.8 {q0}, [r2]!; /* load next IV / ciphertext */ \
+    veor q4, q4, q0; \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lcfb_dec_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lcfb_dec_done; \
+    \
+  .Lcfb_dec_loop_##bits: \
+    \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    \
+    subs r4, r4, #1; \
+    \
+    do_aes_one##bits(e, mc, q0, q0, ##__VA_ARGS__); \
+    \
+    veor q2, q1, q0; \
+    vmov q0, q1; \
+    vst1.8 {q2}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lcfb_dec_loop_##bits; \
+    b .Lcfb_dec_done;
+
+  CFB_DEC(128)
+  CFB_DEC(192, r0, r6)
+  CFB_DEC(256, r0, r6)
+
+#undef CFB_DEC
+
+.Lcfb_dec_done:
+  vst1.8 {q0}, [r3] /* store IV */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  vpop {q4-q7}
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lcfb_dec_skip:
+  pop {r4-r6,pc}
+.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ctr_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_ctr_enc_armv8_ce
+.type  _gcry_aes_ctr_enc_armv8_ce,%function;
+_gcry_aes_ctr_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  cmp r4, #0
+  beq .Lctr_enc_skip
+
+  cmp r5, #12
+  ldm r3, {r7-r10}
+  vld1.8 {q0}, [r3] /* load IV */
+  rev r7, r7
+  rev r8, r8
+  rev r9, r9
+  rev r10, r10
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lctr_enc_entry_192
+  bhi .Lctr_enc_entry_256
+
+#define CTR_ENC(bits, ...) \
+  .Lctr_enc_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lctr_enc_loop_##bits; \
+    \
+  .Lctr_enc_loop4_##bits: \
+    cmp r10, #0xfffffffc; \
+    sub r4, r4, #4; \
+    blo .Lctr_enc_loop4_##bits##_nocarry; \
+    cmp r9, #0xffffffff; \
+    bne .Lctr_enc_loop4_##bits##_nocarry; \
+    \
+    adds r10, #1; \
+    vmov q1, q0; \
+    blcs .Lctr_overflow_one; \
+    rev r11, r10; \
+    vmov.32 d1[1], r11; \
+    \
+    adds r10, #1; \
+    vmov q2, q0; \
+    blcs .Lctr_overflow_one; \
+    rev r11, r10; \
+    vmov.32 d1[1], r11; \
+    \
+    adds r10, #1; \
+    vmov q3, q0; \
+    blcs .Lctr_overflow_one; \
+    rev r11, r10; \
+    vmov.32 d1[1], r11; \
+    \
+    adds r10, #1; \
+    vmov q4, q0; \
+    blcs .Lctr_overflow_one; \
+    rev r11, r10; \
+    vmov.32 d1[1], r11; \
+    \
+    b .Lctr_enc_loop4_##bits##_store_ctr; \
+    \
+  .Lctr_enc_loop4_##bits##_nocarry: \
+    \
+    veor q2, q2; \
+    vrev64.8 q1, q0; \
+    vceq.u32 d5, d5; \
+    vadd.u64 q3, q2, q2; \
+    vadd.u64 q4, q3, q2; \
+    vadd.u64 q0, q3, q3; \
+    vsub.u64 q2, q1, q2; \
+    vsub.u64 q3, q1, q3; \
+    vsub.u64 q4, q1, q4; \
+    vsub.u64 q0, q1, q0; \
+    vrev64.8 q1, q1; \
+    vrev64.8 q2, q2; \
+    vrev64.8 q3, q3; \
+    vrev64.8 q0, q0; \
+    vrev64.8 q4, q4; \
+    add r10, #4; \
+    \
+  .Lctr_enc_loop4_##bits##_store_ctr: \
+    \
+    vst1.8 {q0}, [r3]; \
+    cmp r4, #4; \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    \
+    do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q0; \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    veor q2, q2, q0; \
+    veor q3, q3, q1; \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    vst1.8 {q2}, [r1]!; /* store plaintext */ \
+    veor q4, q4, q0; \
+    vld1.8 {q0}, [r3]; /* reload IV */ \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lctr_enc_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lctr_enc_done; \
+    \
+  .Lctr_enc_loop_##bits: \
+    \
+    adds r10, #1; \
+    vmov q1, q0; \
+    blcs .Lctr_overflow_one; \
+    rev r11, r10; \
+    subs r4, r4, #1; \
+    vld1.8 {q2}, [r2]!; /* load ciphertext */ \
+    vmov.32 d1[1], r11; \
+    \
+    do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q2, q1; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lctr_enc_loop_##bits; \
+    b .Lctr_enc_done;
+
+  CTR_ENC(128)
+  CTR_ENC(192, r0, r6)
+  CTR_ENC(256, r0, r6)
+
+#undef CTR_ENC
+
+.Lctr_enc_done:
+  vst1.8 {q0}, [r3] /* store IV */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lctr_enc_skip:
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+
+.Lctr_overflow_one:
+  adcs r9, #0
+  adcs r8, #0
+  adc r7, #0
+  rev r11, r9
+  rev r12, r8
+  vmov.32 d1[0], r11
+  rev r11, r7
+  vmov.32 d0[1], r12
+  vmov.32 d0[0], r11
+  bx lr
+.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ctr32le_enc_armv8_ce (const void *keysched,
+ *                                      unsigned char *outbuf,
+ *                                      const unsigned char *inbuf,
+ *                                      unsigned char *iv,
+ *                                      unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_ctr32le_enc_armv8_ce
+.type  _gcry_aes_ctr32le_enc_armv8_ce,%function;
+_gcry_aes_ctr32le_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  cmp r4, #0
+  beq .Lctr32le_enc_skip
+
+  cmp r5, #12
+  vld1.8 {q0}, [r3] /* load IV */
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lctr32le_enc_entry_192
+  bhi .Lctr32le_enc_entry_256
+
+#define CTR_ENC(bits, ...) \
+  .Lctr32le_enc_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lctr32le_enc_loop_##bits; \
+    \
+  .Lctr32le_enc_loop4_##bits: \
+    veor q2, q2; \
+    sub r4, r4, #4; \
+    vmov.i64 d4, #0xffffffff; /* q2 <= -1:0:0:0 */ \
+    vmov q1, q0; \
+    vadd.u32 q3, q2, q2; /* q3 <= -2:0:0:0 */ \
+    vadd.u32 q0, q3, q3; /* q0 <= -4:0:0:0 */ \
+    vadd.u32 q4, q3, q2; /* q4 <= -3:0:0:0 */ \
+    vsub.u32 q0, q1, q0; \
+    vsub.u32 q2, q1, q2; \
+    vst1.8 {q0}, [r3]; \
+    vsub.u32 q3, q1, q3; \
+    vsub.u32 q4, q1, q4; \
+    \
+    cmp r4, #4; \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    \
+    do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q0; \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    veor q2, q2, q0; \
+    veor q3, q3, q1; \
+    vld1.8 {q0}, [r2]!; /* load ciphertext */ \
+    vst1.8 {q2}, [r1]!; /* store plaintext */ \
+    veor q4, q4, q0; \
+    vld1.8 {q0}, [r3]; /* reload IV */ \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lctr32le_enc_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lctr32le_enc_done; \
+    \
+  .Lctr32le_enc_loop_##bits: \
+    \
+    veor q2, q2; \
+    vmov q1, q0; \
+    vmov.i64 d4, #0xffffffff; /* q2 <= -1:0:0:0 */ \
+    subs r4, r4, #1; \
+    vsub.u32 q0, q0, q2; \
+    vld1.8 {q2}, [r2]!; /* load ciphertext */ \
+    \
+    do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q2, q1; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lctr32le_enc_loop_##bits; \
+    b .Lctr32le_enc_done;
+
+  CTR_ENC(128)
+  CTR_ENC(192, r0, r6)
+  CTR_ENC(256, r0, r6)
+
+#undef CTR_ENC
+
+.Lctr32le_enc_done:
+  vst1.8 {q0}, [r3] /* store IV */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lctr32le_enc_skip:
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_ctr32le_enc_armv8_ce,.-_gcry_aes_ctr32le_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ocb_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *offset,
+ *                                  unsigned char *checksum,
+ *                                  unsigned char *L_table,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_enc_armv8_ce
+.type  _gcry_aes_ocb_enc_armv8_ce,%function;
+_gcry_aes_ocb_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: offset
+   *    %st+0: checksum => r4
+   *    %st+4: Ls => r5
+   *    %st+8: nblocks => r6  (0 < nblocks <= 32)
+   *    %st+12: nrounds => r7
+   *    %st+16: blkn => lr
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r7, [sp, #(104+12)]
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  ldr r6, [sp, #(104+8)]
+  ldr lr, [sp, #(104+16)]
+
+  cmp r7, #12
+  vld1.8 {q0}, [r3] /* load offset */
+
+  aes_preload_keys(r0, r12);
+
+  beq .Locb_enc_entry_192
+  bhi .Locb_enc_entry_256
+
+#define OCB_ENC(bits, ...) \
+  .Locb_enc_entry_##bits: \
+    cmp r6, #4; \
+    add lr, #1; \
+    blo .Locb_enc_loop_##bits; \
+    \
+  .Locb_enc_loop4_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Checksum_i = Checksum_{i-1} xor P_i  */ \
+    /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
+    \
+    add r9, lr, #1; \
+    add r10, lr, #2; \
+    add r11, lr, #3; \
+    rbit r8, lr; \
+    add lr, lr, #4; \
+    rbit r9, r9; \
+    rbit r10, r10; \
+    rbit r11, r11; \
+    clz r8, r8; /* ntz(i+0) */ \
+    clz r9, r9; /* ntz(i+1) */ \
+    clz r10, r10; /* ntz(i+2) */ \
+    clz r11, r11; /* ntz(i+3) */ \
+    add r8, r5, r8, lsl #4; \
+    add r9, r5, r9, lsl #4; \
+    add r10, r5, r10, lsl #4; \
+    add r11, r5, r11, lsl #4; \
+    \
+    sub r6, #4; \
+    \
+    vld1.8 {q9}, [r8];     /* load L_{ntz(i+0)} */ \
+    vld1.8 {q1-q2}, [r2]!; /* load P_i+<0-1> */ \
+    vld1.8 {q8}, [r4];     /* load Checksum_{i-1} */ \
+    veor q0, q0, q9;       /* Offset_i+0 */ \
+    vld1.8 {q9}, [r9];     /* load L_{ntz(i+1)} */ \
+    veor q8, q8, q1;       /* Checksum_i+0 */ \
+    veor q1, q1, q0;       /* P_i+0 xor Offset_i+0 */\
+    vld1.8 {q3-q4}, [r2]!; /* load P_i+<2-3> */ \
+    vst1.8 {q0}, [r1]!;    /* store Offset_i+0 */\
+    veor q0, q0, q9;       /* Offset_i+1 */ \
+    vld1.8 {q9}, [r10];    /* load L_{ntz(i+2)} */ \
+    veor q8, q8, q2;       /* Checksum_i+1 */ \
+    veor q2, q2, q0;       /* P_i+1 xor Offset_i+1 */\
+    vst1.8 {q0}, [r1]!;    /* store Offset_i+1 */\
+    veor q0, q0, q9;       /* Offset_i+2 */ \
+    vld1.8 {q9}, [r11];    /* load L_{ntz(i+3)} */ \
+    veor q8, q8, q3;       /* Checksum_i+2 */ \
+    veor q3, q3, q0;       /* P_i+2 xor Offset_i+2 */\
+    vst1.8 {q0}, [r1]!;    /* store Offset_i+2 */\
+    veor q0, q0, q9;       /* Offset_i+3 */ \
+    veor q8, q8, q4;       /* Checksum_i+3 */ \
+    veor q4, q4, q0;       /* P_i+3 xor Offset_i+3 */\
+    vst1.8 {q0}, [r1];     /* store Offset_i+3 */\
+    sub r1, #(3*16); \
+    vst1.8 {q8}, [r4];     /* store Checksum_i+3 */\
+    \
+    cmp r6, #4; \
+    \
+    do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    mov r8, r1; \
+    vld1.8 {q8-q9}, [r1]!; \
+    veor q1, q1, q8; \
+    veor q2, q2, q9; \
+    vld1.8 {q8-q9}, [r1]!; \
+    vst1.8 {q1-q2}, [r8]!; \
+    veor q3, q3, q8; \
+    veor q4, q4, q9; \
+    vst1.8 {q3-q4}, [r8]; \
+    \
+    bhs .Locb_enc_loop4_##bits; \
+    cmp r6, #0; \
+    beq .Locb_enc_done; \
+    \
+  .Locb_enc_loop_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Checksum_i = Checksum_{i-1} xor P_i  */ \
+    /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
+    \
+    rbit r8, lr; \
+    add lr, #1; \
+    clz r8, r8; /* ntz(i) */ \
+    add r8, r5, r8, lsl #4; \
+    \
+    vld1.8 {q1}, [r2]!; /* load plaintext */ \
+    vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
+    vld1.8 {q3}, [r4]; /* load checksum */ \
+    subs r6, #1; \
+    veor q0, q0, q2; \
+    veor q3, q3, q1; \
+    veor q1, q1, q0; \
+    vst1.8 {q3}, [r4]; /* store checksum */ \
+    \
+    do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q0; \
+    vst1.8 {q1}, [r1]!; /* store ciphertext */ \
+    \
+    bne .Locb_enc_loop_##bits; \
+    b .Locb_enc_done;
+
+  OCB_ENC(128re, r0, r12)
+  OCB_ENC(192, r0, r12)
+  OCB_ENC(256, r0, r12)
+
+#undef OCB_ENC
+
+.Locb_enc_done:
+  vst1.8 {q0}, [r3] /* store offset */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ocb_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *offset,
+ *                                  unsigned char *checksum,
+ *                                  unsigned char *L_table,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_dec_armv8_ce
+.type  _gcry_aes_ocb_dec_armv8_ce,%function;
+_gcry_aes_ocb_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: offset
+   *    %st+0: checksum => r4
+   *    %st+4: Ls => r5
+   *    %st+8: nblocks => r6  (0 < nblocks <= 32)
+   *    %st+12: nrounds => r7
+   *    %st+16: blkn => lr
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r7, [sp, #(104+12)]
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  ldr r6, [sp, #(104+8)]
+  ldr lr, [sp, #(104+16)]
+
+  cmp r7, #12
+  vld1.8 {q0}, [r3] /* load offset */
+
+  aes_preload_keys(r0, r12);
+
+  beq .Locb_dec_entry_192
+  bhi .Locb_dec_entry_256
+
+#define OCB_DEC(bits, ...) \
+  .Locb_dec_entry_##bits: \
+    cmp r6, #4; \
+    add lr, #1; \
+    blo .Locb_dec_loop_##bits; \
+    \
+  .Locb_dec_loop4_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */ \
+    /* Checksum_i = Checksum_{i-1} xor P_i  */ \
+    \
+    add r9, lr, #1; \
+    add r10, lr, #2; \
+    add r11, lr, #3; \
+    rbit r8, lr; \
+    add lr, lr, #4; \
+    rbit r9, r9; \
+    rbit r10, r10; \
+    rbit r11, r11; \
+    clz r8, r8; /* ntz(i+0) */ \
+    clz r9, r9; /* ntz(i+1) */ \
+    clz r10, r10; /* ntz(i+2) */ \
+    clz r11, r11; /* ntz(i+3) */ \
+    add r8, r5, r8, lsl #4; \
+    add r9, r5, r9, lsl #4; \
+    add r10, r5, r10, lsl #4; \
+    add r11, r5, r11, lsl #4; \
+    \
+    sub r6, #4; \
+    \
+    vld1.8 {q9}, [r8];     /* load L_{ntz(i+0)} */ \
+    vld1.8 {q1-q2}, [r2]!; /* load P_i+<0-1> */ \
+    veor q0, q0, q9;       /* Offset_i+0 */ \
+    vld1.8 {q9}, [r9];     /* load L_{ntz(i+1)} */ \
+    veor q1, q1, q0;       /* P_i+0 xor Offset_i+0 */\
+    vld1.8 {q3-q4}, [r2]!; /* load P_i+<2-3> */ \
+    vst1.8 {q0}, [r1]!;    /* store Offset_i+0 */\
+    veor q0, q0, q9;       /* Offset_i+1 */ \
+    vld1.8 {q9}, [r10];    /* load L_{ntz(i+2)} */ \
+    veor q2, q2, q0;       /* P_i+1 xor Offset_i+1 */\
+    vst1.8 {q0}, [r1]!;    /* store Offset_i+1 */\
+    veor q0, q0, q9;       /* Offset_i+2 */ \
+    vld1.8 {q9}, [r11];    /* load L_{ntz(i+3)} */ \
+    veor q3, q3, q0;       /* P_i+2 xor Offset_i+2 */\
+    vst1.8 {q0}, [r1]!;    /* store Offset_i+2 */\
+    veor q0, q0, q9;       /* Offset_i+3 */ \
+    veor q4, q4, q0;       /* P_i+3 xor Offset_i+3 */\
+    vst1.8 {q0}, [r1];     /* store Offset_i+3 */\
+    sub r1, #(3*16); \
+    \
+    cmp r6, #4; \
+    \
+    do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    mov r8, r1; \
+    vld1.8 {q8-q9}, [r1]!; \
+    veor q1, q1, q8; \
+    veor q2, q2, q9; \
+    vld1.8 {q8-q9}, [r1]!; \
+    vst1.8 {q1-q2}, [r8]!; \
+    veor q1, q1, q2; \
+    vld1.8 {q2}, [r4];     /* load Checksum_{i-1} */ \
+    veor q3, q3, q8; \
+    veor q1, q1, q3; \
+    veor q4, q4, q9; \
+    veor q1, q1, q4; \
+    vst1.8 {q3-q4}, [r8]; \
+    veor q2, q2, q1; \
+    vst1.8 {q2}, [r4];     /* store Checksum_i+3 */ \
+    \
+    bhs .Locb_dec_loop4_##bits; \
+    cmp r6, #0; \
+    beq .Locb_dec_done; \
+    \
+  .Locb_dec_loop_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */ \
+    /* Checksum_i = Checksum_{i-1} xor P_i  */ \
+    \
+    rbit r8, lr; \
+    add lr, #1; \
+    clz r8, r8; /* ntz(i) */ \
+    add r8, r5, r8, lsl #4; \
+    \
+    vld1.8 {q2}, [r8]; /* load L_{ntz(i)} */ \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    subs r6, #1; \
+    veor q0, q0, q2; \
+    veor q1, q1, q0; \
+    \
+    do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__) \
+    \
+    vld1.8 {q2}, [r4]; /* load checksum */ \
+    veor q1, q1, q0; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    veor q2, q2, q1; \
+    vst1.8 {q2}, [r4]; /* store checksum */ \
+    \
+    bne .Locb_dec_loop_##bits; \
+    b .Locb_dec_done;
+
+  OCB_DEC(128re, r0, r12)
+  OCB_DEC(192, r0, r12)
+  OCB_DEC(256, r0, r12)
+
+#undef OCB_DEC
+
+.Locb_dec_done:
+  vst1.8 {q0}, [r3] /* store offset */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;
+
+
+/*
+ * void _gcry_aes_ocb_auth_armv8_ce (const void *keysched,
+ *                                   const unsigned char *abuf,
+ *                                   unsigned char *offset,
+ *                                   unsigned char *checksum,
+ *                                   unsigned char *L_table,
+ *                                   size_t nblocks,
+ *                                   unsigned int nrounds,
+ *                                   unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_auth_armv8_ce
+.type  _gcry_aes_ocb_auth_armv8_ce,%function;
+_gcry_aes_ocb_auth_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: abuf
+   *    r2: offset
+   *    r3: checksum
+   *    %st+0: Ls => r5
+   *    %st+4: nblocks => r6  (0 < nblocks <= 32)
+   *    %st+8: nrounds => r7
+   *    %st+12: blkn => lr
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r7, [sp, #(104+8)]
+  ldr r5, [sp, #(104+0)]
+  ldr r6, [sp, #(104+4)]
+  ldr lr, [sp, #(104+12)]
+
+  cmp r7, #12
+  vld1.8 {q0}, [r2] /* load offset */
+
+  aes_preload_keys(r0, r12);
+
+  beq .Locb_auth_entry_192
+  bhi .Locb_auth_entry_256
+
+#define OCB_AUTH(bits, ...) \
+  .Locb_auth_entry_##bits: \
+    cmp r6, #4; \
+    add lr, #1; \
+    blo .Locb_auth_loop_##bits; \
+    \
+  .Locb_auth_loop4_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
+    \
+    add r9, lr, #1; \
+    add r10, lr, #2; \
+    add r11, lr, #3; \
+    rbit r8, lr; \
+    add lr, lr, #4; \
+    rbit r9, r9; \
+    rbit r10, r10; \
+    rbit r11, r11; \
+    clz r8, r8; /* ntz(i+0) */ \
+    clz r9, r9; /* ntz(i+1) */ \
+    clz r10, r10; /* ntz(i+2) */ \
+    clz r11, r11; /* ntz(i+3) */ \
+    add r8, r5, r8, lsl #4; \
+    add r9, r5, r9, lsl #4; \
+    add r10, r5, r10, lsl #4; \
+    add r11, r5, r11, lsl #4; \
+    \
+    sub r6, #4; \
+    \
+    vld1.8 {q9}, [r8];     /* load L_{ntz(i+0)} */ \
+    vld1.8 {q1-q2}, [r1]!; /* load A_i+<0-1> */ \
+    veor q0, q0, q9;       /* Offset_i+0 */ \
+    vld1.8 {q9}, [r9];     /* load L_{ntz(i+1)} */ \
+    veor q1, q1, q0;       /* A_i+0 xor Offset_i+0 */\
+    vld1.8 {q3-q4}, [r1]!; /* load A_i+<2-3> */ \
+    veor q0, q0, q9;       /* Offset_i+1 */ \
+    vld1.8 {q9}, [r10];    /* load L_{ntz(i+2)} */ \
+    veor q2, q2, q0;       /* A_i+1 xor Offset_i+1 */\
+    veor q0, q0, q9;       /* Offset_i+2 */ \
+    vld1.8 {q9}, [r11];    /* load L_{ntz(i+3)} */ \
+    veor q3, q3, q0;       /* A_i+2 xor Offset_i+2 */\
+    veor q0, q0, q9;       /* Offset_i+3 */ \
+    veor q4, q4, q0;       /* A_i+3 xor Offset_i+3 */\
+    \
+    cmp r6, #4; \
+    \
+    do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q2; \
+    veor q3, q3, q4; \
+    vld1.8 {q2}, [r3]; \
+    veor q1, q1, q3; \
+    veor q2, q2, q1; \
+    vst1.8 {q2}, [r3]; \
+    \
+    bhs .Locb_auth_loop4_##bits; \
+    cmp r6, #0; \
+    beq .Locb_auth_done; \
+    \
+  .Locb_auth_loop_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
+    \
+    rbit r8, lr; \
+    add lr, #1; \
+    clz r8, r8; /* ntz(i) */ \
+    add r8, r5, r8, lsl #4; \
+    \
+    vld1.8 {q2}, [r8];  /* load L_{ntz(i)} */ \
+    vld1.8 {q1}, [r1]!; /* load aadtext */ \
+    subs r6, #1; \
+    veor q0, q0, q2; \
+    vld1.8 {q2}, [r3]; /* load checksum */ \
+    veor q1, q1, q0; \
+    \
+    do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__) \
+    \
+    veor q2, q2, q1; \
+    vst1.8 {q2}, [r3]; /* store checksum */ \
+    \
+    bne .Locb_auth_loop_##bits; \
+    b .Locb_auth_done;
+
+  OCB_AUTH(128re, r0, r12)
+  OCB_AUTH(192, r0, r12)
+  OCB_AUTH(256, r0, r12)
+
+#undef OCB_AUTH
+
+.Locb_auth_done:
+  vst1.8 {q0}, [r2] /* store offset */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;
+
+
+
+/*
+ * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_enc_armv8_ce
+.type  _gcry_aes_xts_enc_armv8_ce,%function;
+_gcry_aes_xts_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  cmp r4, #0
+  beq .Lxts_enc_skip
+
+  cmp r5, #12
+
+  vld1.8 {q0}, [r3] /* load tweak */
+  mov r7, #0x87;
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lxts_enc_entry_192
+  bhi .Lxts_enc_entry_256
+
+#define CTR_XTS(bits, ...) \
+  .Lxts_enc_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lxts_enc_loop_##bits; \
+    \
+  .Lxts_enc_loop4_##bits: \
+    sub r4, r4, #4; \
+    veor q9, q9, q9; \
+    \
+    vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \
+    veor q1, q1, q0; \
+    cmp r4, #4; \
+    vmov.u32 d18[0], r7; \
+    vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \
+    veor q2, q2, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q3, q3, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q4, q4, q0; \
+    vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \
+    sub r1, r1, #48; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    do_aes_4_##bits(e, mc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \
+    veor q1, q1, q8; \
+    veor q2, q2, q9; \
+    vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \
+    sub r1, r1, #32; \
+    veor q3, q3, q8; \
+    veor q4, q4, q9; \
+    vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lxts_enc_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lxts_enc_done; \
+    \
+  .Lxts_enc_loop_##bits: \
+    \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    \
+    veor q9, q9, q9; \
+    veor q1, q1, q0; \
+    vmov.u32 d18[0], r7; \
+    vmov q2, q0; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    subs r4, r4, #1; \
+    \
+    do_aes_one##bits(e, mc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q2; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lxts_enc_loop_##bits; \
+    b .Lxts_enc_done;
+
+  CTR_XTS(128re, r0, r6)
+  CTR_XTS(192, r0, r6)
+  CTR_XTS(256, r0, r6)
+
+#undef CTR_XTS
+
+.Lxts_enc_done:
+  vst1.8 {q0}, [r3] /* store tweak */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lxts_enc_skip:
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;
+
+
+/*
+ * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_dec_armv8_ce
+.type  _gcry_aes_xts_dec_armv8_ce,%function;
+_gcry_aes_xts_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    %st+0: nblocks => r4
+   *    %st+4: nrounds => r5
+   */
+
+  vpush {q4-q7}
+  push {r4-r12,lr} /* 4*16 + 4*10 = 104b */
+  ldr r4, [sp, #(104+0)]
+  ldr r5, [sp, #(104+4)]
+  cmp r4, #0
+  beq .Lxts_dec_skip
+
+  cmp r5, #12
+
+  vld1.8 {q0}, [r3] /* load tweak */
+  mov r7, #0x87;
+
+  aes_preload_keys(r0, r6);
+
+  beq .Lxts_dec_entry_192
+  bhi .Lxts_dec_entry_256
+
+#define CTR_XTS(bits, ...) \
+  .Lxts_dec_entry_##bits: \
+    cmp r4, #4; \
+    blo .Lxts_dec_loop_##bits; \
+    \
+  .Lxts_dec_loop4_##bits: \
+    sub r4, r4, #4; \
+    veor q9, q9, q9; \
+    \
+    vld1.8 {q1-q2}, [r2]!; /* load plaintext */ \
+    veor q1, q1, q0; \
+    cmp r4, #4; \
+    vmov.u32 d18[0], r7; \
+    vst1.8 {q0}, [r1]!; /* store tweak0 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    vld1.8 {q3-q4}, [r2]!; /* load plaintext */ \
+    veor q2, q2, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak1 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q3, q3, q0; \
+    vst1.8 {q0}, [r1]!; /* store tweak2 to temp */ \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    veor q4, q4, q0; \
+    vst1.8 {q0}, [r1]; /* store tweak3 to temp */ \
+    sub r1, r1, #48; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    \
+    do_aes_4_##bits(d, imc, q1, q2, q3, q4, ##__VA_ARGS__); \
+    \
+    vld1.8 {q8-q9}, [r1]!; /* load tweak from temp */ \
+    veor q1, q1, q8; \
+    veor q2, q2, q9; \
+    vld1.8 {q8-q9}, [r1]; /* load tweak from temp */ \
+    sub r1, r1, #32; \
+    veor q3, q3, q8; \
+    veor q4, q4, q9; \
+    vst1.8 {q1-q2}, [r1]!; /* store plaintext */ \
+    vst1.8 {q3-q4}, [r1]!; /* store plaintext */ \
+    \
+    bhs .Lxts_dec_loop4_##bits; \
+    cmp r4, #0; \
+    beq .Lxts_dec_done; \
+    \
+  .Lxts_dec_loop_##bits: \
+    \
+    vld1.8 {q1}, [r2]!; /* load ciphertext */ \
+    \
+    veor q9, q9, q9; \
+    veor q1, q1, q0; \
+    vmov.u32 d18[0], r7; \
+    vmov q2, q0; \
+    \
+    vshr.s64 d16, d1, #63; \
+    vshr.u64 d17, d0, #63; \
+    vadd.u64 q0, q0, q0; \
+    vand d16, d16, d18; \
+    veor q0, q0, q8; \
+    subs r4, r4, #1; \
+    \
+    do_aes_one##bits(d, imc, q1, q1, ##__VA_ARGS__); \
+    \
+    veor q1, q1, q2; \
+    vst1.8 {q1}, [r1]!; /* store plaintext */ \
+    \
+    bne .Lxts_dec_loop_##bits; \
+    b .Lxts_dec_done;
+
+  CTR_XTS(128re, r0, r6)
+  CTR_XTS(192, r0, r6)
+  CTR_XTS(256, r0, r6)
+
+#undef CTR_XTS
+
+.Lxts_dec_done:
+  vst1.8 {q0}, [r3] /* store tweak */
+
+  CLEAR_REG(q0)
+  CLEAR_REG(q1)
+  CLEAR_REG(q2)
+  CLEAR_REG(q3)
+  CLEAR_REG(q8)
+  CLEAR_REG(q9)
+  CLEAR_REG(q10)
+  CLEAR_REG(q11)
+  CLEAR_REG(q12)
+  CLEAR_REG(q13)
+  CLEAR_REG(q14)
+
+.Lxts_dec_skip:
+  pop {r4-r12,lr}
+  vpop {q4-q7}
+  bx lr
+.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;
+
+
+/*
+ * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
+ */
+.align 3
+.globl _gcry_aes_sbox4_armv8_ce
+.type  _gcry_aes_sbox4_armv8_ce,%function;
+_gcry_aes_sbox4_armv8_ce:
+  /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+   * Cryptology — CT-RSA 2015" for details.
+   */
+  vmov.i8 q0, #0x52
+  vmov.i8 q1, #0
+  vmov s0, r0
+  aese.8 q0, q1
+  veor d0, d1
+  vpadd.i32 d0, d0, d1
+  vmov r0, s0
+  CLEAR_REG(q0)
+  bx lr
+.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;
+
+
+/*
+ * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src);
+ */
+.align 3
+.globl _gcry_aes_invmixcol_armv8_ce
+.type  _gcry_aes_invmixcol_armv8_ce,%function;
+_gcry_aes_invmixcol_armv8_ce:
+  vld1.8 {q0}, [r1]
+  aesimc.8 q0, q0
+  vst1.8 {q0}, [r0]
+  CLEAR_REG(q0)
+  bx lr
+.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S 
b/grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S
new file mode 100644
index 000000000..4fef03454
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S
@@ -0,0 +1,1921 @@
+/* rijndael-armv8-aarch64-ce.S - ARMv8/CE accelerated AES
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Register macros */
+
+#define vk0 v17
+#define vk1 v18
+#define vk2 v19
+#define vk3 v20
+#define vk4 v21
+#define vk5 v22
+#define vk6 v23
+#define vk7 v24
+#define vk8 v25
+#define vk9 v26
+#define vk10 v27
+#define vk11 v28
+#define vk12 v29
+#define vk13 v30
+#define vklast v31
+
+
+/* Helper macros */
+
+#define __ /*_*/
+#define _(...) __VA_ARGS__
+
+
+/* AES macros */
+
+#define aes_preload_keys(keysched, nrounds) \
+       cmp nrounds, #12; \
+       ld1 {vk0.16b-vk3.16b}, [keysched], #64; \
+       ld1 {vk4.16b-vk7.16b}, [keysched], #64; \
+       ld1 {vk8.16b-vk10.16b}, [keysched], #48; \
+       mov vklast.16b, vk10.16b; \
+       b.lo 1f; \
+       ld1 {vk11.16b-vk12.16b}, [keysched], #32; \
+       mov vklast.16b, vk12.16b; \
+       b.eq 1f; \
+       ld1 {vk13.16b-vklast.16b}, [keysched]; \
+1:     ;
+
+#define do_aes_one_part1(ed, mcimc, vb, vkfirst) \
+       aes##ed    vb.16b, vkfirst.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk1.16b; \
+       aes##mcimc vb.16b, vb.16b;
+
+#define do_aes_one_part2_128(ed, mcimc, vb, iop1, iop2) \
+       aes##ed    vb.16b, vk2.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk3.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       iop1; \
+       aes##ed    vb.16b, vk4.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk5.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk6.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       iop2; \
+       aes##ed    vb.16b, vk7.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk8.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk9.16b;
+
+#define do_aes_one_part2_192(ed, mcimc, vb, iop1, iop2) \
+       aes##ed    vb.16b, vk2.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk3.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk4.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk5.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       iop1; \
+       aes##ed    vb.16b, vk6.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk7.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk8.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       iop2; \
+       aes##ed    vb.16b, vk9.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk10.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk11.16b;
+
+#define do_aes_one_part2_256(ed, mcimc, vb, iop1, iop2) \
+       aes##ed    vb.16b, vk2.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk3.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk4.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk5.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk6.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk7.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       iop1; \
+       aes##ed    vb.16b, vk8.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk9.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk10.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       iop2; \
+       aes##ed    vb.16b, vk11.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk12.16b; \
+       aes##mcimc vb.16b, vb.16b; \
+       aes##ed    vb.16b, vk13.16b;
+
+#define do_aes_one128(ed, mcimc, vo, vb, vkfirst) \
+       do_aes_one_part1(ed, mcimc, vb, vkfirst); \
+       do_aes_one_part2_128(ed, mcimc, vb, __, __); \
+       eor vo.16b, vb.16b, vklast.16b;
+
+#define do_aes_one192(ed, mcimc, vo, vb, vkfirst) \
+       do_aes_one_part1(ed, mcimc, vb, vkfirst); \
+       do_aes_one_part2_192(ed, mcimc, vb, __, __); \
+       eor vo.16b, vb.16b, vklast.16b;
+
+#define do_aes_one256(ed, mcimc, vo, vb, vkfirst) \
+       do_aes_one_part1(ed, mcimc, vb, vkfirst); \
+       do_aes_one_part2_256(ed, mcimc, vb, __, __); \
+       eor vo.16b, vb.16b, vklast.16b;
+
+#define aes_round_4_multikey(ed, mcimc, b0, b1, b2, b3, key0, key1, key2, 
key3) \
+       aes##ed    b0.16b, key0.16b; \
+       aes##mcimc b0.16b, b0.16b; \
+         aes##ed    b1.16b, key1.16b; \
+         aes##mcimc b1.16b, b1.16b; \
+           aes##ed    b2.16b, key2.16b; \
+           aes##mcimc b2.16b, b2.16b; \
+             aes##ed    b3.16b, key3.16b; \
+             aes##mcimc b3.16b, b3.16b;
+
+#define aes_round_4(ed, mcimc, b0, b1, b2, b3, key) \
+       aes_round_4_multikey(ed, mcimc, b0, b1, b2, b3, key, key, key, key);
+
+#define aes_lastround_4(ed, o0, o1, o2, o3, b0, b1, b2, b3, key1, b0_key2, 
b1_key2, b2_key2, b3_key2) \
+       aes##ed    b0.16b, key1.16b; \
+         aes##ed    b1.16b, key1.16b; \
+           aes##ed    b2.16b, key1.16b; \
+             aes##ed    b3.16b, key1.16b; \
+       eor        o0.16b, b0.16b, b0_key2.16b; \
+         eor        o1.16b, b1.16b, b1_key2.16b; \
+           eor        o2.16b, b2.16b, b2_key2.16b; \
+             eor        o3.16b, b3.16b, b3_key2.16b;
+
+#define do_aes_4_part1_multikey(ed, mcimc, b0, b1, b2, b3, key0, key1, key2, 
key3) \
+       aes_round_4_multikey(ed, mcimc, b0, b1, b2, b3, key0, key1, key2, 
key3); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk1); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk2); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk3);
+
+#define do_aes_4_part1(ed, mcimc, b0, b1, b2, b3, vkfirst) \
+       do_aes_4_part1_multikey(ed, mcimc, b0, b1, b2, b3, vkfirst, vkfirst, 
vkfirst, vkfirst);
+
+#define do_aes_4_part2_128(ed, mcimc, o0, o1, o2, o3, b0, b1, b2, b3, \
+                          b0_key, b1_key, b2_key, b3_key) \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \
+       aes_lastround_4(ed, o0, o1, o2, o3, b0, b1, b2, b3, vk9, b0_key, 
b1_key, b2_key, b3_key);
+
+#define do_aes_4_part2_192(ed, mcimc, o0, o1, o2, o3, b0, b1, b2, b3, \
+                          b0_key, b1_key, b2_key, b3_key) \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \
+       aes_lastround_4(ed, o0, o1, o2, o3, b0, b1, b2, b3, vk11, b0_key, 
b1_key, b2_key, b3_key);
+
+#define do_aes_4_part2_256(ed, mcimc, o0, o1, o2, o3, b0, b1, b2, b3, \
+                          b0_key, b1_key, b2_key, b3_key) \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk4); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk5); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk6); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk7); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk8); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk9); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk10); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk11); \
+       aes_round_4(ed, mcimc, b0, b1, b2, b3, vk12); \
+       aes_lastround_4(ed, o0, o1, o2, o3, b0, b1, b2, b3, vk13, b0_key, 
b1_key, b2_key, b3_key);
+
+#define do_aes_4_128(ed, mcimc, b0, b1, b2, b3) \
+       do_aes_4_part1(ed, mcimc, b0, b1, b2, b3, vk0); \
+       do_aes_4_part2_128(ed, mcimc, b0, b1, b2, b3, b0, b1, b2, b3, vklast, 
vklast, vklast, vklast);
+
+#define do_aes_4_192(ed, mcimc, b0, b1, b2, b3) \
+       do_aes_4_part1(ed, mcimc, b0, b1, b2, b3, vk0); \
+       do_aes_4_part2_192(ed, mcimc, b0, b1, b2, b3, b0, b1, b2, b3, vklast, 
vklast, vklast, vklast);
+
+#define do_aes_4_256(ed, mcimc, b0, b1, b2, b3) \
+       do_aes_4_part1(ed, mcimc, b0, b1, b2, b3, vk0); \
+       do_aes_4_part2_256(ed, mcimc, b0, b1, b2, b3, b0, b1, b2, b3, vklast, 
vklast, vklast, vklast);
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) movi reg.16b, #0;
+
+#define aes_clear_keys(nrounds) \
+       CLEAR_REG(vk0); \
+       CLEAR_REG(vk1); \
+       CLEAR_REG(vk2); \
+       CLEAR_REG(vk3); \
+       CLEAR_REG(vk4); \
+       CLEAR_REG(vk5); \
+       CLEAR_REG(vk6); \
+       CLEAR_REG(vk7); \
+       CLEAR_REG(vk9); \
+       CLEAR_REG(vk8); \
+       CLEAR_REG(vk10); \
+       CLEAR_REG(vk11); \
+       CLEAR_REG(vk12); \
+       CLEAR_REG(vk13); \
+       CLEAR_REG(vklast);
+
+
+/*
+ * unsigned int _gcry_aes_enc_armv8_ce(void *keysched, byte *dst,
+ *                                     const byte *src,
+ *                                     unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_enc_armv8_ce
+ELF(.type  _gcry_aes_enc_armv8_ce,%function;)
+_gcry_aes_enc_armv8_ce:
+  /* input:
+   *    x0: keysched
+   *    x1: dst
+   *    x2: src
+   *    w3: nrounds
+   */
+  CFI_STARTPROC();
+
+  aes_preload_keys(x0, w3);
+
+  ld1 {v0.16b}, [x2]
+
+  b.hi .Lenc1_256
+  b.eq .Lenc1_192
+
+.Lenc1_128:
+  do_aes_one128(e, mc, v0, v0, vk0);
+
+.Lenc1_tail:
+  CLEAR_REG(vk0)
+  CLEAR_REG(vk1)
+  CLEAR_REG(vk2)
+  CLEAR_REG(vk3)
+  CLEAR_REG(vk4)
+  CLEAR_REG(vk5)
+  CLEAR_REG(vk6)
+  CLEAR_REG(vk7)
+  CLEAR_REG(vk8)
+  CLEAR_REG(vk9)
+  CLEAR_REG(vk10)
+  CLEAR_REG(vklast)
+  st1 {v0.16b}, [x1]
+  CLEAR_REG(v0)
+
+  mov x0, #0
+  ret_spec_stop
+
+.Lenc1_192:
+  do_aes_one192(e, mc, v0, v0, vk0);
+
+  CLEAR_REG(vk11)
+  CLEAR_REG(vk12)
+  b .Lenc1_tail
+
+.Lenc1_256:
+  do_aes_one256(e, mc, v0, v0, vk0);
+
+  CLEAR_REG(vk11)
+  CLEAR_REG(vk12)
+  CLEAR_REG(vk13)
+  b .Lenc1_tail
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_enc_armv8_ce,.-_gcry_aes_enc_armv8_ce;)
+
+
+/*
+ * unsigned int _gcry_aes_dec_armv8_ce(void *keysched, byte *dst,
+ *                                     const byte *src,
+ *                                     unsigned int nrounds);
+ */
+.align 3
+.globl _gcry_aes_dec_armv8_ce
+ELF(.type  _gcry_aes_dec_armv8_ce,%function;)
+_gcry_aes_dec_armv8_ce:
+  /* input:
+   *    x0: keysched
+   *    x1: dst
+   *    x2: src
+   *    w3: nrounds
+   */
+  CFI_STARTPROC();
+
+  aes_preload_keys(x0, w3);
+
+  ld1 {v0.16b}, [x2]
+
+  b.hi .Ldec1_256
+  b.eq .Ldec1_192
+
+.Ldec1_128:
+  do_aes_one128(d, imc, v0, v0, vk0);
+
+.Ldec1_tail:
+  CLEAR_REG(vk0)
+  CLEAR_REG(vk1)
+  CLEAR_REG(vk2)
+  CLEAR_REG(vk3)
+  CLEAR_REG(vk4)
+  CLEAR_REG(vk5)
+  CLEAR_REG(vk6)
+  CLEAR_REG(vk7)
+  CLEAR_REG(vk8)
+  CLEAR_REG(vk9)
+  CLEAR_REG(vk10)
+  CLEAR_REG(vklast)
+  st1 {v0.16b}, [x1]
+  CLEAR_REG(v0)
+
+  mov x0, #0
+  ret_spec_stop
+
+.Ldec1_192:
+  do_aes_one192(d, imc, v0, v0, vk0);
+
+  CLEAR_REG(vk11)
+  CLEAR_REG(vk12)
+  b .Ldec1_tail
+
+.Ldec1_256:
+  do_aes_one256(d, imc, v0, v0, vk0);
+
+  CLEAR_REG(vk11)
+  CLEAR_REG(vk12)
+  CLEAR_REG(vk13)
+  b .Ldec1_tail
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_dec_armv8_ce,.-_gcry_aes_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_cbc_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, size_t nblocks,
+ *                                  int cbc_mac, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_enc_armv8_ce
+ELF(.type  _gcry_aes_cbc_enc_armv8_ce,%function;)
+_gcry_aes_cbc_enc_armv8_ce:
+  /* input:
+   *    x0: keysched
+   *    x1: outbuf
+   *    x2: inbuf
+   *    x3: iv
+   *    x4: nblocks
+   *    w5: cbc_mac
+   *    w6: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lcbc_enc_skip
+
+  cmp w5, #0
+  ld1 {v4.16b}, [x3] /* load IV */
+  csetm x5, eq
+
+  aes_preload_keys(x0, w6);
+  and x5, x5, #16
+
+  ld1 {v3.16b}, [x2], #16; /* load plaintext */
+  mov v0.16b, vk0.16b;
+  sub x4, x4, #1;
+  eor v16.16b, vk0.16b, vklast.16b;
+  eor v4.16b, v4.16b, v3.16b;
+  do_aes_one_part1(e, mc, v4, v0);
+
+  b.eq .Lcbc_enc_entry_192
+  b.hi .Lcbc_enc_entry_256
+
+#define CBC_ENC(bits) \
+  .Lcbc_enc_entry_##bits: \
+    cbz x4, .Lcbc_enc_done_##bits; \
+    \
+  .Lcbc_enc_loop_##bits: \
+    do_aes_one_part2_##bits(e, mc, v4, \
+                            _(ld1 {v0.16b}, [x2], #16 /* load plaintext */), \
+                            _(eor v0.16b, v0.16b, v16.16b)); \
+    sub x4, x4, #1; \
+    eor v3.16b, v4.16b, vklast.16b; \
+    do_aes_one_part1(e, mc, v4, v0); \
+    st1 {v3.16b}, [x1], x5; /* store ciphertext */ \
+    cbnz x4, .Lcbc_enc_loop_##bits; \
+    \
+  .Lcbc_enc_done_##bits: \
+    do_aes_one_part2_##bits(e, mc, v4, __, __); \
+    b .Lcbc_enc_done;
+
+  CBC_ENC(128)
+  CBC_ENC(192)
+  CBC_ENC(256)
+
+#undef CBC_ENC
+
+.Lcbc_enc_done:
+  eor v3.16b, v4.16b, vklast.16b;
+  st1 {v3.16b}, [x1]; /* store ciphertext */
+  aes_clear_keys(w6)
+  st1 {v3.16b}, [x3] /* store IV */
+
+  CLEAR_REG(v16)
+  CLEAR_REG(v4)
+  CLEAR_REG(v3)
+  CLEAR_REG(v0)
+
+.Lcbc_enc_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_cbc_enc_armv8_ce,.-_gcry_aes_cbc_enc_armv8_ce;)
+
+/*
+ * void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cbc_dec_armv8_ce
+ELF(.type  _gcry_aes_cbc_dec_armv8_ce,%function;)
+_gcry_aes_cbc_dec_armv8_ce:
+  /* input:
+   *    x0: keysched
+   *    x1: outbuf
+   *    x2: inbuf
+   *    x3: iv
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lcbc_dec_skip
+
+  add sp, sp, #-64;
+  CFI_ADJUST_CFA_OFFSET(64);
+
+  ld1 {v16.16b}, [x3] /* load IV */
+
+  aes_preload_keys(x0, w5);
+
+  b.eq .Lcbc_dec_entry_192
+  b.hi .Lcbc_dec_entry_256
+
+#define CBC_DEC(bits) \
+  .Lcbc_dec_entry_##bits: \
+    cmp x4, #4; \
+    b.lo .Lcbc_dec_loop_##bits; \
+    \
+    ld1 {v0.16b-v3.16b}, [x2], #64; /* load ciphertext */ \
+    cmp x4, #8; \
+    sub x4, x4, #4; \
+    eor v4.16b, v16.16b, vklast.16b; \
+    eor v5.16b, v0.16b, vklast.16b; \
+    eor v6.16b, v1.16b, vklast.16b; \
+    eor v7.16b, v2.16b, vklast.16b; \
+    mov v16.16b, v3.16b; /* next IV */ \
+    \
+    do_aes_4_part1(d, imc, v0, v1, v2, v3, vk0); \
+    b.lo .Lcbc_dec_done4_##bits; \
+    \
+    st1 {v8.16b-v11.16b}, [sp]; /* store callee saved registers */ \
+    \
+  .Lcbc_dec_loop4_##bits: \
+    do_aes_4_part2_##bits(d, imc, v8, v9, v10, v11, v0, v1, v2, v3, v4, v5, 
v6, v7); \
+    ld1 {v0.16b-v3.16b}, [x2], #64; /* load ciphertext */ \
+    cmp x4, #8; \
+    sub x4, x4, #4; \
+    eor v4.16b, v16.16b, vklast.16b; \
+    eor v5.16b, v0.16b, vklast.16b; \
+    eor v6.16b, v1.16b, vklast.16b; \
+    eor v7.16b, v2.16b, vklast.16b; \
+    mov v16.16b, v3.16b; /* next IV */ \
+    \
+    do_aes_4_part1(d, imc, v0, v1, v2, v3, vk0); \
+    st1 {v8.16b-v11.16b}, [x1], #64; /* store plaintext */ \
+    \
+    b.hs .Lcbc_dec_loop4_##bits; \
+    \
+    ld1 {v8.16b-v11.16b}, [sp]; /* restore callee saved registers */ \
+    \
+  .Lcbc_dec_done4_##bits: \
+    do_aes_4_part2_##bits(d, imc, v0, v1, v2, v3, v0, v1, v2, v3, v4, v5, v6, 
v7); \
+    \
+    CLEAR_REG(v4); \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    st1 {v0.16b-v3.16b}, [x1], #64; /* store plaintext */ \
+    CLEAR_REG(v0); \
+    CLEAR_REG(v3); \
+    cbz x4, .Lcbc_dec_done; \
+    \
+  .Lcbc_dec_loop_##bits: \
+    ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \
+    sub x4, x4, #1; \
+    eor v16.16b, v16.16b, vklast.16b; \
+    mov v2.16b, v1.16b; \
+    \
+    do_aes_one_part1(d, imc, v1, vk0); \
+    do_aes_one_part2_##bits(d, imc, v1, __, __); \
+    eor v1.16b, v1.16b, v16.16b; \
+    \
+    mov v16.16b, v2.16b; \
+    st1 {v1.16b}, [x1], #16; /* store plaintext */ \
+    \
+    cbnz x4, .Lcbc_dec_loop_##bits; \
+    b .Lcbc_dec_done;
+
+  CBC_DEC(128)
+  CBC_DEC(192)
+  CBC_DEC(256)
+
+#undef CBC_DEC
+
+.Lcbc_dec_done:
+  aes_clear_keys(w5)
+
+  st1 {v16.16b}, [x3] /* store IV */
+
+  CLEAR_REG(v16)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+
+  add sp, sp, #64;
+  CFI_ADJUST_CFA_OFFSET(-64);
+
+.Lcbc_dec_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_cbc_dec_armv8_ce,.-_gcry_aes_cbc_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ctr_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_ctr_enc_armv8_ce
+ELF(.type  _gcry_aes_ctr_enc_armv8_ce,%function;)
+_gcry_aes_ctr_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lctr_enc_skip
+
+  add x8, sp, #-64
+  add sp, sp, #-128;
+  CFI_ADJUST_CFA_OFFSET(128);
+
+  mov w6, #(1 << 24)
+  movi v16.16b, #0
+  mov v16.S[3], w6 /* 1 */
+
+  /* load IV */
+  ldp x9, x10, [x3]
+  ld1 {v0.16b}, [x3]
+  rev x9, x9
+  rev x10, x10
+
+  mov x12, #(4 << 56)
+  lsl x11, x10, #56
+
+  aes_preload_keys(x0, w5);
+
+  b.eq .Lctr_enc_entry_192
+  b.hi .Lctr_enc_entry_256
+
+#define CTR_ENC(bits) \
+  .Lctr_enc_entry_##bits: \
+    cmp x4, #4; \
+    b.lo .Lctr_enc_loop_##bits; \
+    \
+    st1 {v8.16b-v11.16b}, [sp]; /* store callee saved registers */ \
+    \
+    adds x11, x11, x12; \
+    add v9.4s, v16.4s, v16.4s; /* 2 */ \
+    add v10.4s, v16.4s, v9.4s; /* 3 */ \
+    add v11.4s, v9.4s, v9.4s; /* 4 */ \
+    mov x7, #1; \
+    sub x4, x4, #4; \
+    ld1 {v5.16b-v8.16b}, [x2], #64; /* preload ciphertext */ \
+    b.cs .Lctr_enc_carry4_##bits; \
+    \
+    mov v1.16b, v0.16b; \
+    add x10, x10, #4; \
+    add v2.16b, v0.16b, v16.16b; \
+    add v3.8h, v0.8h, v9.8h; \
+    add v4.4s, v0.4s, v10.4s; \
+    add v0.2d, v0.2d, v11.2d; \
+    \
+  .Lctr_enc_entry4_##bits##_carry_done: \
+    mov x7, #0; \
+    cmp x4, #4; \
+    do_aes_4_part1(e, mc, v1, v2, v3, v4, vk0); \
+    b.lo .Lctr_enc_done4_##bits; \
+    \
+    st1 {v12.16b-v15.16b}, [x8]; /* store callee saved registers */ \
+    \
+  .Lctr_enc_loop4_##bits: \
+    eor v5.16b, v5.16b, vklast.16b; \
+    eor v6.16b, v6.16b, vklast.16b; \
+    eor v7.16b, v7.16b, vklast.16b; \
+    eor v8.16b, v8.16b, vklast.16b; \
+    do_aes_4_part2_##bits(e, mc, v12, v13, v14, v15, v1, v2, v3, v4, v5, v6, 
v7, v8); \
+    ld1 {v5.16b-v8.16b}, [x2], #64; /* preload ciphertext */ \
+    adds x11, x11, x12; \
+    sub x4, x4, #4; \
+    b.cs .Lctr_enc_carry4_##bits; \
+    \
+    mov v1.16b, v0.16b; \
+    add x10, x10, #4; \
+    add v2.16b, v0.16b, v16.16b; \
+    add v3.8h, v0.8h, v9.8h; \
+    add v4.4s, v0.4s, v10.4s; \
+    add v0.2d, v0.2d, v11.2d; \
+    \
+  .Lctr_enc_loop4_##bits##_carry_done: \
+    cmp x4, #4; \
+    do_aes_4_part1(e, mc, v1, v2, v3, v4, vk0); \
+    st1 {v12.16b-v15.16b}, [x1], #64; /* store plaintext */ \
+    \
+    b.hs .Lctr_enc_loop4_##bits; \
+    \
+    ld1 {v12.16b-v15.16b}, [x8]; /* restore callee saved registers */ \
+    \
+  .Lctr_enc_done4_##bits: \
+    eor v5.16b, v5.16b, vklast.16b; \
+    eor v6.16b, v6.16b, vklast.16b; \
+    eor v7.16b, v7.16b, vklast.16b; \
+    eor v8.16b, v8.16b, vklast.16b; \
+    do_aes_4_part2_##bits(e, mc, v5, v6, v7, v8, v1, v2, v3, v4, v5, v6, v7, 
v8); \
+    \
+    st1 {v5.16b-v8.16b}, [x1], #64; /* store plaintext */ \
+    \
+    CLEAR_REG(v3); \
+    CLEAR_REG(v4); \
+    ld1 {v8.16b-v11.16b}, [sp]; /* restore callee saved registers */ \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    cbz x4, .Lctr_enc_done; \
+    \
+  .Lctr_enc_loop_##bits: \
+    \
+    adds x10, x10, #1; \
+    mov v1.16b, v0.16b; \
+    adc x9, x9, xzr; \
+    dup v0.2d, x10; \
+    sub x4, x4, #1; \
+    ins v0.D[0], x9; \
+    ld1 {v2.16b}, [x2], #16; /* load ciphertext */ \
+    rev64 v0.16b, v0.16b; \
+    \
+    do_aes_one_part1(e, mc, v1, vk0); \
+    eor v2.16b, v2.16b, vklast.16b; \
+    do_aes_one_part2_##bits(e, mc, v1, __, __); \
+    \
+    eor v1.16b, v1.16b, v2.16b; \
+    st1 {v1.16b}, [x1], #16; /* store plaintext */ \
+    \
+    cbnz x4, .Lctr_enc_loop_##bits; \
+    b .Lctr_enc_done; \
+    \
+  .Lctr_enc_carry4_##bits: \
+    \
+    adds x13, x10, #1; \
+    mov v1.16b, v0.16b; \
+    adc x14, x9, xzr; \
+    dup v2.2d, x13; \
+      adds x13, x10, #2; \
+    ins v2.D[0], x14; \
+      adc x14, x9, xzr; \
+    rev64 v2.16b, v2.16b; \
+      dup v3.2d, x13; \
+       adds x13, x10, #3; \
+      ins v3.D[0], x14; \
+       adc x14, x9, xzr; \
+      rev64 v3.16b, v3.16b; \
+       dup v4.2d, x13; \
+         adds x10, x10, #4; \
+       ins v4.D[0], x14; \
+         adc x9, x9, xzr; \
+       rev64 v4.16b, v4.16b; \
+         dup v0.2d, x10; \
+         ins v0.D[0], x9; \
+         rev64 v0.16b, v0.16b; \
+    \
+    cbz x7, .Lctr_enc_loop4_##bits##_carry_done; \
+    b .Lctr_enc_entry4_##bits##_carry_done;
+
+  CTR_ENC(128)
+  CTR_ENC(192)
+  CTR_ENC(256)
+
+#undef CTR_ENC
+
+.Lctr_enc_done:
+  aes_clear_keys(w5)
+
+  st1 {v0.16b}, [x3] /* store IV */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v16)
+
+  add sp, sp, #128;
+  CFI_ADJUST_CFA_OFFSET(-128);
+
+.Lctr_enc_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_ctr_enc_armv8_ce,.-_gcry_aes_ctr_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ctr32le_enc_armv8_ce (const void *keysched,
+ *                                      unsigned char *outbuf,
+ *                                      const unsigned char *inbuf,
+ *                                      unsigned char *iv,
+ *                                      unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_ctr32le_enc_armv8_ce
+ELF(.type  _gcry_aes_ctr32le_enc_armv8_ce,%function;)
+_gcry_aes_ctr32le_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lctr32le_enc_skip
+
+  add x8, sp, #-64
+  add sp, sp, #-128;
+  CFI_ADJUST_CFA_OFFSET(128);
+
+  mov w6, #1
+  movi v16.16b, #0
+  mov v16.S[0], w6
+
+  /* load IV */
+  ld1 {v0.16b}, [x3]
+
+  aes_preload_keys(x0, w5);
+
+  b.eq .Lctr32le_enc_entry_192
+  b.hi .Lctr32le_enc_entry_256
+
+#define CTR32LE_ENC(bits) \
+  .Lctr32le_enc_entry_##bits: \
+    cmp x4, #4; \
+    b.lo .Lctr32le_enc_loop_##bits; \
+    \
+    st1 {v8.16b-v11.16b}, [sp]; /* store callee saved registers */ \
+    add v9.4s, v16.4s, v16.4s; /* 2 */ \
+    cmp x4, #8; \
+    add v10.4s, v9.4s, v16.4s; /* 3 */ \
+    sub x4, x4, #4; \
+    add v11.4s, v9.4s, v9.4s;  /* 4 */ \
+    \
+    ld1 {v5.16b-v8.16b}, [x2], #64; /* preload ciphertext */ \
+    \
+    mov v1.16b, v0.16b; \
+    add v2.4s, v0.4s, v16.4s; \
+    add v3.4s, v0.4s, v9.4s; \
+    add v4.4s, v0.4s, v10.4s; \
+    add v0.4s, v0.4s, v11.4s; \
+    \
+    do_aes_4_part1(e, mc, v1, v2, v3, v4, vk0); \
+    b.lo .Lctr32le_enc_done4_##bits; \
+    \
+    st1 {v12.16b-v15.16b}, [x8]; /* store callee saved registers */ \
+    \
+  .Lctr32le_enc_loop4_##bits: \
+    eor v5.16b, v5.16b, vklast.16b; \
+    eor v6.16b, v6.16b, vklast.16b; \
+    eor v7.16b, v7.16b, vklast.16b; \
+    eor v8.16b, v8.16b, vklast.16b; \
+    do_aes_4_part2_##bits(e, mc, v12, v13, v14, v15, v1, v2, v3, v4, v5, v6, 
v7, v8); \
+    ld1 {v5.16b-v8.16b}, [x2], #64; /* preload ciphertext */ \
+    \
+    cmp x4, #8; \
+    sub x4, x4, #4; \
+    \
+    mov v1.16b, v0.16b; \
+    add v2.4s, v0.4s, v16.4s; \
+    add v3.4s, v0.4s, v9.4s; \
+    add v4.4s, v0.4s, v10.4s; \
+    add v0.4s, v0.4s, v11.4s; \
+    \
+    do_aes_4_part1(e, mc, v1, v2, v3, v4, vk0); \
+    st1 {v12.16b-v15.16b}, [x1], #64; /* store plaintext */ \
+    \
+    b.hs .Lctr32le_enc_loop4_##bits; \
+    \
+    ld1 {v12.16b-v15.16b}, [x8]; /* restore callee saved registers */ \
+    \
+  .Lctr32le_enc_done4_##bits: \
+    eor v5.16b, v5.16b, vklast.16b; \
+    eor v6.16b, v6.16b, vklast.16b; \
+    eor v7.16b, v7.16b, vklast.16b; \
+    eor v8.16b, v8.16b, vklast.16b; \
+    do_aes_4_part2_##bits(e, mc, v5, v6, v7, v8, v1, v2, v3, v4, v5, v6, v7, 
v8); \
+    \
+    st1 {v5.16b-v8.16b}, [x1], #64; /* store plaintext */ \
+    CLEAR_REG(v3); \
+    CLEAR_REG(v4); \
+    ld1 {v8.16b-v11.16b}, [sp]; /* restore callee saved registers */ \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    cbz x4, .Lctr32le_enc_done; \
+    \
+  .Lctr32le_enc_loop_##bits: \
+    \
+    mov v1.16b, v0.16b; \
+    ld1 {v2.16b}, [x2], #16; /* load ciphertext */ \
+    sub x4, x4, #1; \
+    add v0.4s, v0.4s, v16.4s; \
+    \
+    do_aes_one_part1(e, mc, v1, vk0); \
+    eor v2.16b, v2.16b, vklast.16b; \
+    do_aes_one_part2_##bits(e, mc, v1, __, __); \
+    \
+    eor v1.16b, v1.16b, v2.16b; \
+    st1 {v1.16b}, [x1], #16; /* store plaintext */ \
+    \
+    cbnz x4, .Lctr32le_enc_loop_##bits; \
+    b .Lctr32le_enc_done;
+
+  CTR32LE_ENC(128)
+  CTR32LE_ENC(192)
+  CTR32LE_ENC(256)
+
+#undef CTR32LE_ENC
+
+.Lctr32le_enc_done:
+  aes_clear_keys(w5)
+
+  st1 {v0.16b}, [x3] /* store IV */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v16)
+
+  add sp, sp, #128;
+  CFI_ADJUST_CFA_OFFSET(-128);
+
+.Lctr32le_enc_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_ctr32le_enc_armv8_ce,.-_gcry_aes_ctr32le_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_cfb_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_enc_armv8_ce
+ELF(.type  _gcry_aes_cfb_enc_armv8_ce,%function;)
+_gcry_aes_cfb_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lcfb_enc_skip
+
+  /* load IV */
+  ld1 {v0.16b}, [x3]
+
+  aes_preload_keys(x0, w5);
+
+  ld1 {v1.16b}, [x2], #16; /* load plaintext */
+  eor v3.16b, vk0.16b, vklast.16b;
+  eor v0.16b, v0.16b, vklast.16b;
+  sub x4, x4, #1;
+  mov v4.16b, v3.16b;
+  do_aes_one_part1(e, mc, v0, v4);
+
+  b.eq .Lcfb_enc_entry_192
+  b.hi .Lcfb_enc_entry_256
+
+#define CFB_ENC(bits) \
+  .Lcfb_enc_entry_##bits: \
+    cbz x4, .Lcfb_enc_done_##bits; \
+    \
+  .Lcfb_enc_loop_##bits: \
+    eor v2.16b, v1.16b, vklast.16b; \
+    do_aes_one_part2_##bits(e, mc, v0, \
+                           _(eor v4.16b, v3.16b, v1.16b), \
+                           _(ld1 {v1.16b}, [x2], #16 /* load plaintext */)); \
+    sub x4, x4, #1; \
+    eor v2.16b, v2.16b, v0.16b; \
+    do_aes_one_part1(e, mc, v0, v4); \
+    st1 {v2.16b}, [x1], #16; /* store ciphertext */ \
+    cbnz x4, .Lcfb_enc_loop_##bits; \
+    \
+  .Lcfb_enc_done_##bits: \
+    eor v2.16b, v1.16b, vklast.16b; \
+    do_aes_one_part2_##bits(e, mc, v0, __, __); \
+    b .Lcfb_enc_done;
+
+  CFB_ENC(128)
+  CFB_ENC(192)
+  CFB_ENC(256)
+
+#undef CFB_ENC
+
+.Lcfb_enc_done:
+  eor v2.16b, v2.16b, v0.16b;
+  st1 {v2.16b}, [x1]; /* store ciphertext */
+  aes_clear_keys(w5)
+  st1 {v2.16b}, [x3] /* store IV */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v3)
+  CLEAR_REG(v4)
+
+.Lcfb_enc_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_cfb_enc_armv8_ce,.-_gcry_aes_cfb_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_cfb_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *iv, unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_cfb_dec_armv8_ce
+ELF(.type  _gcry_aes_cfb_dec_armv8_ce,%function;)
+_gcry_aes_cfb_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: iv
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lcfb_dec_skip
+
+  add sp, sp, #-64;
+  CFI_ADJUST_CFA_OFFSET(64);
+
+  /* load IV */
+  ld1 {v0.16b}, [x3]
+
+  aes_preload_keys(x0, w5);
+
+  b.eq .Lcfb_dec_entry_192
+  b.hi .Lcfb_dec_entry_256
+
+#define CFB_DEC(bits) \
+  .Lcfb_dec_entry_##bits: \
+    cmp x4, #4; \
+    b.lo .Lcfb_dec_loop_##bits; \
+    \
+    ld1 {v2.16b-v5.16b}, [x2], #64; /* load ciphertext */ \
+    cmp x4, #8; \
+    mov v1.16b, v0.16b; \
+    sub x4, x4, #4; \
+    eor v6.16b, v2.16b, vklast.16b; \
+    eor v7.16b, v3.16b, vklast.16b; \
+    eor v16.16b, v4.16b, vklast.16b; \
+    mov v0.16b, v5.16b; /* next IV */ \
+    eor v5.16b, v5.16b, vklast.16b; \
+    \
+    do_aes_4_part1(e, mc, v1, v2, v3, v4, vk0); \
+    b.lo .Lcfb_dec_done4_##bits; \
+    \
+    st1 {v8.16b-v11.16b}, [sp]; /* store callee saved registers */ \
+    \
+  .Lcfb_dec_loop4_##bits: \
+    do_aes_4_part2_##bits(e, mc, v8, v9, v10, v11, v1, v2, v3, v4, v6, v7, 
v16, v5); \
+    ld1 {v2.16b-v5.16b}, [x2], #64; /* load ciphertext */ \
+    cmp x4, #8; \
+    mov v1.16b, v0.16b; \
+    sub x4, x4, #4; \
+    eor v6.16b, v2.16b, vklast.16b; \
+    eor v7.16b, v3.16b, vklast.16b; \
+    eor v16.16b, v4.16b, vklast.16b; \
+    mov v0.16b, v5.16b; /* next IV */ \
+    eor v5.16b, v5.16b, vklast.16b; \
+    \
+    do_aes_4_part1(e, mc, v1, v2, v3, v4, vk0); \
+    st1 {v8.16b-v11.16b}, [x1], #64; /* store plaintext */ \
+    \
+    b.hs .Lcfb_dec_loop4_##bits; \
+    \
+    ld1 {v8.16b-v11.16b}, [sp]; /* restore callee saved registers */ \
+    \
+  .Lcfb_dec_done4_##bits: \
+    do_aes_4_part2_##bits(e, mc, v1, v2, v3, v4, v1, v2, v3, v4, v6, v7, v16, 
v5); \
+    \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+    CLEAR_REG(v3); \
+    CLEAR_REG(v4); \
+    cbz x4, .Lcfb_dec_done; \
+    \
+  .Lcfb_dec_loop_##bits: \
+    ld1 {v1.16b}, [x2], #16; /* load ciphertext */ \
+    sub x4, x4, #1; \
+    \
+    do_aes_one_part1(e, mc, v0, vk0); \
+    eor v2.16b, v1.16b, vklast.16b; \
+    do_aes_one_part2_##bits(e, mc, v0, __, __); \
+    eor v2.16b, v2.16b, v0.16b; \
+    \
+    mov v0.16b, v1.16b; \
+    st1 {v2.16b}, [x1], #16; /* store plaintext */ \
+    \
+    cbnz x4, .Lcfb_dec_loop_##bits; \
+    b .Lcfb_dec_done;
+
+  CFB_DEC(128)
+  CFB_DEC(192)
+  CFB_DEC(256)
+
+#undef CFB_DEC
+
+.Lcfb_dec_done:
+  aes_clear_keys(w5)
+
+  st1 {v0.16b}, [x3] /* store IV */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v16)
+
+  add sp, sp, #64;
+  CFI_ADJUST_CFA_OFFSET(-64);
+
+.Lcfb_dec_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_cfb_dec_armv8_ce,.-_gcry_aes_cfb_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ocb_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *offset,
+ *                                  unsigned char *checksum,
+ *                                  unsigned char *L_table,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_enc_armv8_ce
+ELF(.type  _gcry_aes_ocb_enc_armv8_ce,%function;)
+_gcry_aes_ocb_enc_armv8_ce:
+  /* input:
+   *    x0: keysched
+   *    x1: outbuf
+   *    x2: inbuf
+   *    x3: offset
+   *    x4: checksum
+   *    x5: Ltable
+   *    x6: nblocks (0 < nblocks)
+   *    w7: nrounds
+   *    %st+0: blkn => w12
+   */
+  CFI_STARTPROC();
+
+  ldr w12, [sp]
+  ld1 {v0.16b}, [x3] /* load offset */
+  ld1 {v16.16b}, [x4] /* load checksum */
+
+  add x16, sp, #-64;
+  add sp, sp, #-128;
+  CFI_ADJUST_CFA_OFFSET(128);
+
+  aes_preload_keys(x0, w7);
+
+  st1 {v8.16b-v11.16b}, [sp]; /* store callee saved registers */
+
+  eor v0.16b, v0.16b, vk0.16b; /* offset ^ first key */
+  eor v9.16b, vk0.16b, vklast.16b; /* first key ^ last key */
+
+  b.eq .Locb_ecry_entry_192
+  b.hi .Locb_ecry_entry_256
+
+#define OCB_CRYPT(bits, ed, mcimc) \
+  .Locb_##ed##cry_entry_##bits: \
+    /* Get number of blocks to align nblk to 4. */ \
+    neg x13, x12; \
+    add x12, x12, #1; /* Pre-increment nblk for ntz calculation */ \
+    and x13, x13, #(4-1); \
+    cmp x13, x6; \
+    csel x13, x6, x13, hi; \
+    cbz x13, .Locb_##ed##cry_alignment_ok_##bits; \
+    \
+    /* Number of blocks after alignment. */ \
+    sub x14, x6, x13; \
+    \
+    /* If number after alignment is less than 4, skip aligned handling \
+     * completely. */ \
+    cmp x14, #4; \
+    csel x13, x6, x13, lo; \
+    \
+  .Locb_##ed##cry_unaligned_entry_##bits: \
+    cmp x13, #4; \
+    \
+  .Locb_##ed##cry_loop1_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Checksum_i = Checksum_{i-1} xor P_i  */ \
+    /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
+    \
+    rbit x8, x12; \
+    add x12, x12, #1; \
+    clz x8, x8; /* ntz(i) */ \
+    add x8, x5, x8, lsl #4; \
+    \
+    ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+    ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
+    eor v0.16b, v0.16b, v2.16b; \
+    sub x13, x13, #1; \
+    ENC(eor v16.16b, v16.16b, v1.16b); \
+    sub x6, x6, #1; \
+    \
+    do_aes_one_part1(ed, mcimc, v1, v0); \
+    eor v2.16b, v0.16b, v9.16b; \
+    do_aes_one_part2_##bits(ed, mcimc, v1, __, __); \
+    eor v1.16b, v1.16b, v2.16b; \
+    st1 {v1.16b}, [x1], #16; /* store ciphertext */ \
+    DEC(eor v16.16b, v16.16b, v1.16b); \
+    \
+    cbnz x13, .Locb_##ed##cry_loop1_##bits; \
+    \
+    cbz x6, .Locb_##ed##cry_done; \
+    \
+    /* nblk is now aligned and we have 4 or more blocks. So jump directly to \
+     * aligned processing. */ \
+    b .Locb_##ed##cry_aligned_entry_##bits; \
+    \
+  .Locb_##ed##cry_alignment_ok_##bits: \
+    cbz x6, .Locb_##ed##cry_done; \
+    \
+    /* Short buffers do not benefit from L-array optimization. */ \
+    cmp x6, #4; \
+    mov x13, x6; \
+    b.lo .Locb_##ed##cry_unaligned_entry_##bits; \
+    \
+  .Locb_##ed##cry_aligned_entry_##bits: \
+    /* Prepare L-array optimization. \
+     * Since nblk is aligned to 4, offsets will have following construction: \
+     *  - block1 = ntz{0} = offset ^ L[0] \
+     *  - block2 = ntz{1} = offset ^ L[0] ^ L[1] \
+     *  - block3 = ntz{0} = offset ^ L[1] \
+     *  - block4 = ntz{x} = offset ^ L[1] ^ L[ntz{x}] \
+     */ \
+    ld1 {v10.16b-v11.16b}, [x5];        /* preload L[0] && L[1] */ \
+    mov x15, #4; \
+    \
+    st1 {v12.16b-v15.16b}, [x16]; /* store callee saved registers */ \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Checksum_i = Checksum_{i-1} xor P_i  */ \
+    /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
+    \
+    add x11, x12, #3; \
+    ld1 {v1.16b-v4.16b}, [x2], #64;     /* load P_i+<0-3> */ \
+    rbit x11, x11; \
+    eor v6.16b, v10.16b, v11.16b;       /* L[0] ^ L[1] */ \
+    ENC(eor v16.16b, v16.16b, v1.16b);  /* Checksum_i+0 */ \
+    add x12, x12, #4; \
+    clz x11, x11; /* ntz(i+3) */ \
+    add x15, x15, #4; \
+    add x11, x5, x11, lsl #4; \
+    \
+    eor v5.16b, v0.16b, v10.16b;        /* Offset_i+0 */ \
+    ENC(eor v16.16b, v16.16b, v2.16b);  /* Checksum_i+1 */ \
+    ld1 {v8.16b}, [x11];                /* load L_{ntz(i+3)} */ \
+    ENC(eor v16.16b, v16.16b, v3.16b);  /* Checksum_i+2 */ \
+    eor v6.16b, v0.16b, v6.16b;         /* Offset_i+1 */ \
+    ENC(eor v16.16b, v16.16b, v4.16b);  /* Checksum_i+3 */ \
+    eor v7.16b, v0.16b, v11.16b;        /* Offset_i+2 */ \
+    eor v8.16b, v8.16b, v11.16b;        /* L[1] ^ L[ntz{x}] */ \
+    cmp x15, x13; \
+    eor v0.16b, v0.16b, v8.16b;         /* Offset_i+3 */ \
+    \
+    do_aes_4_part1_multikey(ed, mcimc, v1, v2, v3, v4, v5, v6, v7, v0); /* 
P_i+j xor Offset_i+j */ \
+    b.hi .Locb_##ed##cry_aligned_done4_##bits; \
+    \
+  .Locb_##ed##cry_aligned_loop4_##bits: \
+    add x11, x12, #3; \
+    eor v5.16b, v5.16b, v9.16b; \
+    eor v6.16b, v6.16b, v9.16b; \
+    rbit x11, x11; \
+    eor v7.16b, v7.16b, v9.16b; \
+    eor v8.16b, v0.16b, v9.16b; \
+    clz x11, x11; /* ntz(i+3) */ \
+    do_aes_4_part2_##bits(ed, mcimc, v12, v13, v14, v15, v1, v2, v3, v4, v5, 
v6, v7, v8); /* xor Offset_i+j */ \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Checksum_i = Checksum_{i-1} xor P_i  */ \
+    /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */ \
+    \
+    add x12, x12, #4; \
+    ld1 {v1.16b-v4.16b}, [x2], #64;     /* load P_i+<0-3> */ \
+    eor v6.16b, v10.16b, v11.16b;       /* L[0] ^ L[1] */ \
+    add x15, x15, #4; \
+    DEC(eor v16.16b, v16.16b, v12.16b); /* Checksum_i+0 */ \
+    ENC(eor v16.16b, v16.16b, v1.16b);  /* Checksum_i+0 */ \
+    add x11, x5, x11, lsl #4; \
+    \
+    eor v5.16b, v0.16b, v10.16b;        /* Offset_i+0 */ \
+    ENC(eor v16.16b, v16.16b, v2.16b);  /* Checksum_i+1 */ \
+    DEC(eor v16.16b, v16.16b, v13.16b); /* Checksum_1+2 */ \
+    ld1 {v8.16b}, [x11];                /* load L_{ntz(i+3)} */ \
+    ENC(eor v16.16b, v16.16b, v3.16b);  /* Checksum_i+2 */ \
+    DEC(eor v16.16b, v16.16b, v14.16b); /* Checksum_i+0+3 */ \
+    eor v6.16b, v0.16b, v6.16b;         /* Offset_i+1 */ \
+    ENC(eor v16.16b, v16.16b, v4.16b);  /* Checksum_i+3 */ \
+    DEC(eor v16.16b, v16.16b, v15.16b); /* Checksum_i+0+1+2 */ \
+    eor v7.16b, v0.16b, v11.16b;        /* Offset_i+2 */ \
+    eor v8.16b, v8.16b, v11.16b;        /* L[1] ^ L[ntz{x}] */ \
+    cmp x15, x13; \
+    eor v0.16b, v0.16b, v8.16b;         /* Offset_i+3 */ \
+    \
+    do_aes_4_part1_multikey(ed, mcimc, v1, v2, v3, v4, v5, v6, v7, v0); /* 
P_i+j xor Offset_i+j */ \
+    st1 {v12.16b-v15.16b}, [x1], #64; \
+    \
+    b.ls .Locb_##ed##cry_aligned_loop4_##bits; \
+    \
+  .Locb_##ed##cry_aligned_done4_##bits: \
+    eor v5.16b, v5.16b, v9.16b; \
+    eor v6.16b, v6.16b, v9.16b; \
+    eor v7.16b, v7.16b, v9.16b; \
+    eor v8.16b, v0.16b, v9.16b; \
+    do_aes_4_part2_##bits(ed, mcimc, v1, v2, v3, v4, v1, v2, v3, v4, v5, v6, 
v7, v8); /* xor Offset_i+j */ \
+    DEC(eor v16.16b, v16.16b, v1.16b);  /* Checksum_i+0 */ \
+    DEC(eor v5.16b, v2.16b, v3.16b);    /* Checksum_1+2 */ \
+    DEC(eor v16.16b, v16.16b, v4.16b);  /* Checksum_i+0+3 */ \
+    st1 {v1.16b-v4.16b}, [x1], #64; \
+    DEC(eor v16.16b, v16.16b, v5.16b);  /* Checksum_i+0+1+2 */ \
+    \
+    sub x15, x15, #4; \
+    CLEAR_REG(v3); \
+    CLEAR_REG(v4); \
+    ld1 {v12.16b-v15.16b}, [x16]; /* restore callee saved registers */ \
+    sub x13, x13, x15; \
+    sub x6, x6, x15; \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    \
+    /* Handle tailing 1…3 blocks in unaligned loop. */ \
+    mov x13, x6; \
+    cbnz x6, .Locb_##ed##cry_unaligned_entry_##bits; \
+    \
+    b .Locb_##ed##cry_done;
+
+#define ENC(...) __VA_ARGS__
+#define DEC(...) /*_*/
+  OCB_CRYPT(128, e, mc)
+  OCB_CRYPT(192, e, mc)
+  OCB_CRYPT(256, e, mc)
+#undef ENC
+#undef DEC
+
+.Locb_ecry_done:
+  eor v0.16b, v0.16b, vk0.16b; /* restore offset */
+
+  ld1 {v8.16b-v11.16b}, [sp]; /* restore callee saved registers */
+  aes_clear_keys(w7)
+
+  st1 {v16.16b}, [x4] /* store checksum */
+  st1 {v0.16b}, [x3] /* store offset */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v7)
+  CLEAR_REG(v16)
+
+  add sp, sp, #128;
+  CFI_ADJUST_CFA_OFFSET(-128);
+
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_ocb_enc_armv8_ce,.-_gcry_aes_ocb_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ocb_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *offset,
+ *                                  unsigned char *checksum,
+ *                                  unsigned char *L_table,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds,
+ *                                  unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_dec_armv8_ce
+ELF(.type  _gcry_aes_ocb_dec_armv8_ce,%function;)
+_gcry_aes_ocb_dec_armv8_ce:
+  /* input:
+   *    x0: keysched
+   *    x1: outbuf
+   *    x2: inbuf
+   *    x3: offset
+   *    x4: checksum
+   *    x5: Ltable
+   *    x6: nblocks (0 < nblocks)
+   *    w7: nrounds
+   *    %st+0: blkn => w12
+   */
+  CFI_STARTPROC();
+
+  ldr w12, [sp]
+  ld1 {v0.16b}, [x3] /* load offset */
+  ld1 {v16.16b}, [x4] /* load checksum */
+
+  add x16, sp, #-64;
+  add sp, sp, #-128;
+  CFI_ADJUST_CFA_OFFSET(128);
+
+  aes_preload_keys(x0, w7);
+
+  st1 {v8.16b-v11.16b}, [sp]; /* store callee saved registers */
+
+  eor v0.16b, v0.16b, vk0.16b; /* offset ^ first key */
+  eor v9.16b, vk0.16b, vklast.16b; /* first key ^ last key */
+
+  b.eq .Locb_dcry_entry_192
+  b.hi .Locb_dcry_entry_256
+
+#define ENC(...) /*_*/
+#define DEC(...) __VA_ARGS__
+  OCB_CRYPT(128, d, imc)
+  OCB_CRYPT(192, d, imc)
+  OCB_CRYPT(256, d, imc)
+#undef ENC
+#undef DEC
+
+#undef OCB_CRYPT
+
+.Locb_dcry_done:
+  eor v0.16b, v0.16b, vk0.16b; /* restore offset */
+
+  ld1 {v8.16b-v11.16b}, [sp]; /* restore callee saved registers */
+  aes_clear_keys(w7)
+
+  st1 {v16.16b}, [x4] /* store checksum */
+  st1 {v0.16b}, [x3] /* store offset */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v16)
+
+  add sp, sp, #128;
+  CFI_ADJUST_CFA_OFFSET(-128);
+
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_ocb_dec_armv8_ce,.-_gcry_aes_ocb_dec_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_ocb_auth_armv8_ce (const void *keysched,
+ *                                   const unsigned char *abuf,
+ *                                   unsigned char *offset,
+ *                                   unsigned char *checksum,
+ *                                   unsigned char *L_table,
+ *                                   size_t nblocks,
+ *                                   unsigned int nrounds,
+ *                                   unsigned int blkn);
+ */
+
+.align 3
+.globl _gcry_aes_ocb_auth_armv8_ce
+ELF(.type  _gcry_aes_ocb_auth_armv8_ce,%function;)
+_gcry_aes_ocb_auth_armv8_ce:
+  /* input:
+   *    x0: keysched
+   *    x1: abuf
+   *    x2: offset => x3
+   *    x3: checksum => x4
+   *    x4: Ltable => x5
+   *    x5: nblocks => x6  (0 < nblocks <= 32)
+   *    w6: nrounds => w7
+   *    w7: blkn => w12
+   */
+  CFI_STARTPROC();
+
+  mov w12, w7
+  mov w7, w6
+  mov x6, x5
+  mov x5, x4
+  mov x4, x3
+  mov x3, x2
+
+  aes_preload_keys(x0, w7);
+
+  ld1 {v0.16b}, [x3] /* load offset */
+  ld1 {v16.16b}, [x4] /* load checksum */
+
+  beq .Locb_auth_entry_192
+  bhi .Locb_auth_entry_256
+
+#define OCB_AUTH(bits) \
+  .Locb_auth_entry_##bits: \
+    cmp x6, #4; \
+    add w12, w12, #1; \
+    b.lo .Locb_auth_loop_##bits; \
+    \
+  .Locb_auth_loop4_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
+    \
+    add w9, w12, #1; \
+    add w10, w12, #2; \
+    add w11, w12, #3; \
+    rbit w8, w12; \
+    add w12, w12, #4; \
+    rbit w9, w9; \
+    rbit w10, w10; \
+    rbit w11, w11; \
+    clz w8, w8; /* ntz(i+0) */ \
+    clz w9, w9; /* ntz(i+1) */ \
+    clz w10, w10; /* ntz(i+2) */ \
+    clz w11, w11; /* ntz(i+3) */ \
+    add x8, x5, x8, lsl #4; \
+    ld1 {v1.16b-v4.16b}, [x1], #64;   /* load A_i+<0-3> */ \
+    add x9, x5, x9, lsl #4; \
+    add x10, x5, x10, lsl #4; \
+    add x11, x5, x11, lsl #4; \
+    \
+    sub x6, x6, #4; \
+    \
+    ld1 {v5.16b}, [x8];               /* load L_{ntz(i+0)} */ \
+    ld1 {v6.16b}, [x9];               /* load L_{ntz(i+1)} */ \
+    ld1 {v7.16b}, [x10];              /* load L_{ntz(i+2)} */ \
+    eor v5.16b, v5.16b, v0.16b;       /* Offset_i+0 */ \
+    ld1 {v0.16b}, [x11];              /* load L_{ntz(i+3)} */ \
+    eor v6.16b, v6.16b, v5.16b;       /* Offset_i+1 */ \
+    eor v1.16b, v1.16b, v5.16b;       /* A_i+0 xor Offset_i+0 */ \
+    eor v7.16b, v7.16b, v6.16b;       /* Offset_i+2 */ \
+    eor v2.16b, v2.16b, v6.16b;       /* A_i+1 xor Offset_i+1 */ \
+    eor v0.16b, v0.16b, v7.16b;       /* Offset_i+3 */ \
+    cmp x6, #4; \
+    eor v3.16b, v3.16b, v7.16b;       /* A_i+2 xor Offset_i+2 */ \
+    eor v4.16b, v4.16b, v0.16b;       /* A_i+3 xor Offset_i+3 */ \
+    \
+    do_aes_4_##bits(e, mc, v1, v2, v3, v4); \
+    \
+    eor v1.16b, v1.16b, v2.16b; \
+    eor v16.16b, v16.16b, v3.16b; \
+    eor v1.16b, v1.16b, v4.16b; \
+    eor v16.16b, v16.16b, v1.16b; \
+    \
+    b.hs .Locb_auth_loop4_##bits; \
+    CLEAR_REG(v3); \
+    CLEAR_REG(v4); \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    cbz x6, .Locb_auth_done; \
+    \
+  .Locb_auth_loop_##bits: \
+    \
+    /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ \
+    /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */ \
+    \
+    rbit w8, w12; \
+    add w12, w12, #1; \
+    clz w8, w8; /* ntz(i) */ \
+    add x8, x5, x8, lsl #4; \
+    \
+    ld1 {v1.16b}, [x1], #16; /* load aadtext */ \
+    ld1 {v2.16b}, [x8]; /* load L_{ntz(i)} */ \
+    sub x6, x6, #1; \
+    eor v0.16b, v0.16b, v2.16b; \
+    eor v1.16b, v1.16b, v0.16b; \
+    \
+    do_aes_one##bits(e, mc, v1, v1, vk0) \
+    \
+    eor v16.16b, v16.16b, v1.16b; \
+    \
+    cbnz x6, .Locb_auth_loop_##bits; \
+    b .Locb_auth_done;
+
+  OCB_AUTH(128)
+  OCB_AUTH(192)
+  OCB_AUTH(256)
+
+#undef OCB_AUTH
+
+.Locb_auth_done:
+  aes_clear_keys(w7)
+
+  st1 {v16.16b}, [x4] /* store checksum */
+  st1 {v0.16b}, [x3] /* store offset */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v16)
+
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_ocb_auth_armv8_ce,.-_gcry_aes_ocb_auth_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_enc_armv8_ce
+ELF(.type  _gcry_aes_xts_enc_armv8_ce,%function;)
+_gcry_aes_xts_enc_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: tweak
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lxts_enc_skip
+
+  add x16, sp, #-64;
+  add sp, sp, #-128;
+  CFI_ADJUST_CFA_OFFSET(128);
+
+  /* load tweak */
+  ld1 {v0.16b}, [x3]
+
+  /* load gfmul mask */
+  mov x6, #0x87
+  mov x7, #0x01
+  mov v16.D[0], x6
+  mov v16.D[1], x7
+
+  aes_preload_keys(x0, w5);
+  eor vklast.16b, vklast.16b, vk0.16b;
+
+  b.eq .Lxts_ecry_entry_192
+  b.hi .Lxts_ecry_entry_256
+
+#define XTS_CRYPT(bits, ed, mcimc) \
+  .Lxts_##ed##cry_entry_##bits: \
+    cmp x4, #4; \
+    b.lo .Lxts_##ed##cry_loop_##bits; \
+    \
+    st1 {v8.16b}, [sp]; /* store callee saved registers */ \
+    ext v4.16b, v0.16b, v0.16b, #8; \
+    mov v8.16b, v0.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v5.2d, v0.2d, v0.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v5.16b, v5.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v6.2d, v5.2d, v5.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v6.16b, v6.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v7.2d, v6.2d, v6.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v7.16b, v7.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v3.2d, v7.2d, v7.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v0.16b, v3.16b, v2.16b; \
+    ld1 {v1.16b-v4.16b}, [x2], #64; /* load plaintext */ \
+    cmp x4, #8; \
+    sub x4, x4, #4; \
+    \
+    eor v8.16b, v8.16b, vk0.16b; \
+    eor v5.16b, v5.16b, vk0.16b; \
+    eor v6.16b, v6.16b, vk0.16b; \
+    eor v7.16b, v7.16b, vk0.16b; \
+    \
+    do_aes_4_part1_multikey(ed, mcimc, v1, v2, v3, v4, v8, v5, v6, v7); \
+    b.lo .Lxts_##ed##cry_done4_##bits; \
+    \
+    st1 {v9.16b-v12.16b}, [x16]; /* store callee saved registers */ \
+    \
+  .Lxts_##ed##cry_loop4_##bits: \
+    eor v8.16b, v8.16b, vklast.16b; \
+    eor v5.16b, v5.16b, vklast.16b; \
+    eor v6.16b, v6.16b, vklast.16b; \
+    eor v7.16b, v7.16b, vklast.16b; \
+    do_aes_4_part2_##bits(ed, mcimc, v9, v10, v11, v12, v1, v2, v3, v4, v8, 
v5, v6, v7); \
+    \
+    ext v4.16b, v0.16b, v0.16b, #8; \
+    mov v8.16b, v0.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v5.2d, v0.2d, v0.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v5.16b, v5.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v6.2d, v5.2d, v5.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v6.16b, v6.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v7.2d, v6.2d, v6.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v7.16b, v7.16b, v2.16b; \
+    \
+    sshr v2.2d, v4.2d, #63; \
+    add v3.2d, v7.2d, v7.2d; \
+    and v2.16b, v2.16b, v16.16b; \
+    add v4.2d, v4.2d, v4.2d; \
+    eor v0.16b, v3.16b, v2.16b; \
+    ld1 {v1.16b-v4.16b}, [x2], #64; /* load plaintext */ \
+    cmp x4, #8; \
+    sub x4, x4, #4; \
+    \
+    eor v8.16b, v8.16b, vk0.16b; \
+    eor v5.16b, v5.16b, vk0.16b; \
+    eor v6.16b, v6.16b, vk0.16b; \
+    eor v7.16b, v7.16b, vk0.16b; \
+    \
+    do_aes_4_part1_multikey(ed, mcimc, v1, v2, v3, v4, v8, v5, v6, v7); \
+    \
+    st1 {v9.16b-v12.16b}, [x1], #64; /* store plaintext */ \
+    \
+    b.hs .Lxts_##ed##cry_loop4_##bits; \
+    \
+    ld1 {v9.16b-v12.16b}, [x16]; /* restore callee saved registers */ \
+    \
+  .Lxts_##ed##cry_done4_##bits: \
+    eor v8.16b, v8.16b, vklast.16b; \
+    eor v5.16b, v5.16b, vklast.16b; \
+    eor v6.16b, v6.16b, vklast.16b; \
+    eor v7.16b, v7.16b, vklast.16b; \
+    do_aes_4_part2_##bits(ed, mcimc, v1, v2, v3, v4, v1, v2, v3, v4, v8, v5, 
v6, v7); \
+    \
+    st1 {v1.16b-v4.16b}, [x1], #64; /* store plaintext */ \
+    \
+    CLEAR_REG(v4); \
+    ld1 {v8.16b}, [sp]; /* restore callee saved registers */ \
+    CLEAR_REG(v5); \
+    CLEAR_REG(v6); \
+    CLEAR_REG(v7); \
+    cbz x4, .Lxts_##ed##cry_done; \
+    \
+  .Lxts_##ed##cry_loop_##bits: \
+    \
+    ld1 {v1.16b}, [x2], #16; /* load plaintext */ \
+    ext v3.16b, v0.16b, v0.16b, #8; \
+    eor v2.16b, v0.16b, vk0.16b; \
+    sshr v3.2d, v3.2d, #63; \
+    add v0.2d, v0.2d, v0.2d; \
+    and v3.16b, v3.16b, v16.16b; \
+    sub x4, x4, #1; \
+    eor v0.16b, v0.16b, v3.16b; \
+    \
+    do_aes_one_part1(ed, mcimc, v1, v2); \
+    eor v2.16b, v2.16b, vklast.16b; \
+    do_aes_one_part2_##bits(ed, mcimc, v1, __, __); \
+    eor v1.16b, v1.16b, v2.16b; \
+    \
+    st1 {v1.16b}, [x1], #16; /* store ciphertext */ \
+    \
+    cbnz x4, .Lxts_##ed##cry_loop_##bits; \
+    b .Lxts_##ed##cry_done;
+
+  XTS_CRYPT(128, e, mc)
+  XTS_CRYPT(192, e, mc)
+  XTS_CRYPT(256, e, mc)
+
+.Lxts_ecry_done:
+  aes_clear_keys(w5)
+
+  st1 {v0.16b}, [x3] /* store tweak */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+  CLEAR_REG(v3)
+  CLEAR_REG(v16)
+
+  add sp, sp, 128;
+  CFI_ADJUST_CFA_OFFSET(-128);
+
+.Lxts_enc_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_xts_enc_armv8_ce,.-_gcry_aes_xts_enc_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+ *                                  unsigned char *outbuf,
+ *                                  const unsigned char *inbuf,
+ *                                  unsigned char *tweak,
+ *                                  size_t nblocks,
+ *                                  unsigned int nrounds);
+ */
+
+.align 3
+.globl _gcry_aes_xts_dec_armv8_ce
+ELF(.type  _gcry_aes_xts_dec_armv8_ce,%function;)
+_gcry_aes_xts_dec_armv8_ce:
+  /* input:
+   *    r0: keysched
+   *    r1: outbuf
+   *    r2: inbuf
+   *    r3: tweak
+   *    x4: nblocks
+   *    w5: nrounds
+   */
+  CFI_STARTPROC();
+
+  cbz x4, .Lxts_dec_skip
+
+  add x16, sp, #-64;
+  add sp, sp, #-128;
+  CFI_ADJUST_CFA_OFFSET(128);
+
+  /* load tweak */
+  ld1 {v0.16b}, [x3]
+
+  /* load gfmul mask */
+  mov x6, #0x87
+  mov x7, #0x01
+  mov v16.D[0], x6
+  mov v16.D[1], x7
+
+  aes_preload_keys(x0, w5);
+  eor vklast.16b, vklast.16b, vk0.16b;
+
+  b.eq .Lxts_dcry_entry_192
+  b.hi .Lxts_dcry_entry_256
+
+  XTS_CRYPT(128, d, imc)
+  XTS_CRYPT(192, d, imc)
+  XTS_CRYPT(256, d, imc)
+
+#undef XTS_CRYPT
+
+.Lxts_dcry_done:
+  aes_clear_keys(w5)
+
+  st1 {v0.16b}, [x3] /* store tweak */
+
+  CLEAR_REG(v0)
+  CLEAR_REG(v1)
+  CLEAR_REG(v2)
+
+  add sp, sp, 128;
+  CFI_ADJUST_CFA_OFFSET(-128);
+
+.Lxts_dec_skip:
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_xts_dec_armv8_ce,.-_gcry_aes_xts_dec_armv8_ce;)
+
+
+/*
+ * u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
+ */
+.align 3
+.globl _gcry_aes_sbox4_armv8_ce
+ELF(.type  _gcry_aes_sbox4_armv8_ce,%function;)
+_gcry_aes_sbox4_armv8_ce:
+  /* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in
+   * Cryptology — CT-RSA 2015" for details.
+   */
+  CFI_STARTPROC();
+  movi v0.16b, #0x52
+  movi v1.16b, #0
+  mov v0.S[0], w0
+  aese v0.16b, v1.16b
+  addv s0, v0.4s
+  mov w0, v0.S[0]
+  CLEAR_REG(v0)
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_sbox4_armv8_ce,.-_gcry_aes_sbox4_armv8_ce;)
+
+
+/*
+ * void _gcry_aes_invmixcol_armv8_ce(void *dst, const void *src);
+ */
+.align 3
+.globl _gcry_aes_invmixcol_armv8_ce
+ELF(.type  _gcry_aes_invmixcol_armv8_ce,%function;)
+_gcry_aes_invmixcol_armv8_ce:
+  CFI_STARTPROC();
+  ld1 {v0.16b}, [x1]
+  aesimc v0.16b, v0.16b
+  st1 {v0.16b}, [x0]
+  CLEAR_REG(v0)
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_aes_invmixcol_armv8_ce,.-_gcry_aes_invmixcol_armv8_ce;)
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-armv8-ce.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-armv8-ce.c
new file mode 100644
index 000000000..b24ae3e9a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-armv8-ce.c
@@ -0,0 +1,431 @@
+/* ARMv8 Crypto Extension AES for Libgcrypt
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h"  /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_ARM_CE
+
+
+typedef struct u128_s { u32 a, b, c, d; } u128_t;
+
+extern u32 _gcry_aes_sbox4_armv8_ce(u32 in4b);
+extern void _gcry_aes_invmixcol_armv8_ce(u128_t *dst, const u128_t *src);
+
+extern unsigned int _gcry_aes_enc_armv8_ce(const void *keysched, byte *dst,
+                                           const byte *src,
+                                           unsigned int nrounds);
+extern unsigned int _gcry_aes_dec_armv8_ce(const void *keysched, byte *dst,
+                                           const byte *src,
+                                           unsigned int nrounds);
+
+extern void _gcry_aes_cbc_enc_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *iv, size_t nblocks,
+                                        int cbc_mac, unsigned int nrounds);
+extern void _gcry_aes_cbc_dec_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *iv, size_t nblocks,
+                                        unsigned int nrounds);
+
+extern void _gcry_aes_cfb_enc_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *iv, size_t nblocks,
+                                        unsigned int nrounds);
+extern void _gcry_aes_cfb_dec_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *iv, size_t nblocks,
+                                        unsigned int nrounds);
+
+extern void _gcry_aes_ctr_enc_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *iv, size_t nblocks,
+                                        unsigned int nrounds);
+
+extern void _gcry_aes_ctr32le_enc_armv8_ce (const void *keysched,
+                                            unsigned char *outbuf,
+                                            const unsigned char *inbuf,
+                                            unsigned char *iv, size_t nblocks,
+                                            unsigned int nrounds);
+
+extern void _gcry_aes_ocb_enc_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *offset,
+                                        unsigned char *checksum,
+                                        unsigned char *L_table,
+                                        size_t nblocks,
+                                        unsigned int nrounds,
+                                        unsigned int blkn);
+extern void _gcry_aes_ocb_dec_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *offset,
+                                        unsigned char *checksum,
+                                        unsigned char *L_table,
+                                        size_t nblocks,
+                                        unsigned int nrounds,
+                                        unsigned int blkn);
+extern void _gcry_aes_ocb_auth_armv8_ce (const void *keysched,
+                                         const unsigned char *abuf,
+                                         unsigned char *offset,
+                                         unsigned char *checksum,
+                                         unsigned char *L_table,
+                                         size_t nblocks,
+                                         unsigned int nrounds,
+                                         unsigned int blkn);
+extern void _gcry_aes_xts_enc_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *tweak,
+                                        size_t nblocks, unsigned int nrounds);
+extern void _gcry_aes_xts_dec_armv8_ce (const void *keysched,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        unsigned char *tweak,
+                                        size_t nblocks, unsigned int nrounds);
+
+typedef void (*ocb_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
+                                const unsigned char *inbuf,
+                                unsigned char *offset, unsigned char *checksum,
+                                unsigned char *L_table, size_t nblocks,
+                                unsigned int nrounds, unsigned int blkn);
+
+typedef void (*xts_crypt_fn_t) (const void *keysched, unsigned char *outbuf,
+                                const unsigned char *inbuf,
+                                unsigned char *tweak, size_t nblocks,
+                                unsigned int nrounds);
+
+void
+_gcry_aes_armv8_ce_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+  union
+    {
+      PROPERLY_ALIGNED_TYPE dummy;
+      byte data[MAXKC][4];
+      u32 data32[MAXKC];
+    } tkk[2];
+  unsigned int rounds = ctx->rounds;
+  int KC = rounds - 6;
+  unsigned int keylen = KC * 4;
+  unsigned int i, r, t;
+  byte rcon = 1;
+  int j;
+#define k      tkk[0].data
+#define k_u32  tkk[0].data32
+#define tk     tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W      (ctx->keyschenc)
+#define W_u32  (ctx->keyschenc32)
+
+  for (i = 0; i < keylen; i++)
+    {
+      k[i >> 2][i & 3] = key[i];
+    }
+
+  for (j = KC-1; j >= 0; j--)
+    {
+      tk_u32[j] = k_u32[j];
+    }
+  r = 0;
+  t = 0;
+  /* Copy values into round key array.  */
+  for (j = 0; (j < KC) && (r < rounds + 1); )
+    {
+      for (; (j < KC) && (t < 4); j++, t++)
+        {
+          W_u32[r][t] = le_bswap32(tk_u32[j]);
+        }
+      if (t == 4)
+        {
+          r++;
+          t = 0;
+        }
+    }
+
+  while (r < rounds + 1)
+    {
+      tk_u32[0] ^= _gcry_aes_sbox4_armv8_ce(rol(tk_u32[KC - 1], 24)) ^ rcon;
+
+      if (KC != 8)
+        {
+          for (j = 1; j < KC; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+        }
+      else
+        {
+          for (j = 1; j < KC/2; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+
+          tk_u32[KC/2] ^= _gcry_aes_sbox4_armv8_ce(tk_u32[KC/2 - 1]);
+
+          for (j = KC/2 + 1; j < KC; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+        }
+
+      /* Copy values into round key array.  */
+      for (j = 0; (j < KC) && (r < rounds + 1); )
+        {
+          for (; (j < KC) && (t < 4); j++, t++)
+            {
+              W_u32[r][t] = le_bswap32(tk_u32[j]);
+            }
+          if (t == 4)
+            {
+              r++;
+              t = 0;
+            }
+        }
+
+      rcon = (rcon << 1) ^ ((rcon >> 7) * 0x1b);
+    }
+
+#undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+  wipememory(&tkk, sizeof(tkk));
+}
+
+/* Make a decryption key from an encryption key. */
+void
+_gcry_aes_armv8_ce_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+  u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
+  int rounds = ctx->rounds;
+  int rr;
+  int r;
+
+#define DO_AESIMC() _gcry_aes_invmixcol_armv8_ce(&dkey[r], &ekey[rr])
+
+  dkey[0] = ekey[rounds];
+  r = 1;
+  rr = rounds-1;
+  DO_AESIMC(); r++; rr--; /* round 1 */
+  DO_AESIMC(); r++; rr--; /* round 2 */
+  DO_AESIMC(); r++; rr--; /* round 3 */
+  DO_AESIMC(); r++; rr--; /* round 4 */
+  DO_AESIMC(); r++; rr--; /* round 5 */
+  DO_AESIMC(); r++; rr--; /* round 6 */
+  DO_AESIMC(); r++; rr--; /* round 7 */
+  DO_AESIMC(); r++; rr--; /* round 8 */
+  DO_AESIMC(); r++; rr--; /* round 9 */
+  if (rounds >= 12)
+    {
+      if (rounds > 12)
+        {
+          DO_AESIMC(); r++; rr--; /* round 10 */
+          DO_AESIMC(); r++; rr--; /* round 11 */
+        }
+
+      DO_AESIMC(); r++; rr--; /* round 12 / 10 */
+      DO_AESIMC(); r++; rr--; /* round 13 / 11 */
+    }
+
+  dkey[r] = ekey[0];
+
+#undef DO_AESIMC
+}
+
+unsigned int
+_gcry_aes_armv8_ce_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+                            const unsigned char *src)
+{
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  return _gcry_aes_enc_armv8_ce(keysched, dst, src, nrounds);
+}
+
+unsigned int
+_gcry_aes_armv8_ce_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+                            const unsigned char *src)
+{
+  const void *keysched = ctx->keyschdec32;
+  unsigned int nrounds = ctx->rounds;
+
+  return _gcry_aes_dec_armv8_ce(keysched, dst, src, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cbc_enc (const RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
+                            size_t nblocks, int cbc_mac)
+{
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_aes_cbc_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, cbc_mac,
+                             nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
+                            size_t nblocks)
+{
+  const void *keysched = ctx->keyschdec32;
+  unsigned int nrounds = ctx->rounds;
+
+  if ( !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  _gcry_aes_cbc_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
+                            size_t nblocks)
+{
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_aes_cfb_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
+                            size_t nblocks)
+{
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_aes_cfb_dec_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_ctr_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                            unsigned char *outbuf, const unsigned char *inbuf,
+                            size_t nblocks)
+{
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_aes_ctr_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, nrounds);
+}
+
+void
+_gcry_aes_armv8_ce_ctr32le_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                                unsigned char *outbuf,
+                                const unsigned char *inbuf, size_t nblocks)
+{
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_aes_ctr32le_enc_armv8_ce(keysched, outbuf, inbuf, iv, nblocks, 
nrounds);
+}
+
+size_t
+_gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                              const void *inbuf_arg, size_t nblocks,
+                              int encrypt)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32;
+  ocb_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_ocb_enc_armv8_ce
+                                    : _gcry_aes_ocb_dec_armv8_ce;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int nrounds = ctx->rounds;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+
+  if ( !encrypt && !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  c->u_mode.ocb.data_nblocks = blkn + nblocks;
+
+  crypt_fn(keysched, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+           c->u_mode.ocb.L[0], nblocks, nrounds, (unsigned int)blkn);
+
+  return 0;
+}
+
+size_t
+_gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c, void *abuf_arg,
+                             size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const void *keysched = ctx->keyschenc32;
+  const unsigned char *abuf = abuf_arg;
+  unsigned int nrounds = ctx->rounds;
+  u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+  c->u_mode.ocb.aad_nblocks = blkn + nblocks;
+
+  _gcry_aes_ocb_auth_armv8_ce(keysched, abuf, c->u_mode.ocb.aad_offset,
+                             c->u_mode.ocb.aad_sum, c->u_mode.ocb.L[0],
+                             nblocks, nrounds, (unsigned int)blkn);
+
+  return 0;
+}
+
+void
+_gcry_aes_armv8_ce_xts_crypt (RIJNDAEL_context *ctx, unsigned char *tweak,
+                             unsigned char *outbuf, const unsigned char *inbuf,
+                             size_t nblocks, int encrypt)
+{
+  const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32;
+  xts_crypt_fn_t crypt_fn = encrypt ? _gcry_aes_xts_enc_armv8_ce
+                                    : _gcry_aes_xts_dec_armv8_ce;
+  unsigned int nrounds = ctx->rounds;
+
+  if ( !encrypt && !ctx->decryption_prepared )
+    {
+      _gcry_aes_armv8_ce_prepare_decryption ( ctx );
+      ctx->decryption_prepared = 1;
+    }
+
+  crypt_fn(keysched, outbuf, inbuf, tweak, nblocks, nrounds);
+}
+
+#endif /* USE_ARM_CE */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-gcm-p10le.s 
b/grub-core/lib/libgcrypt/cipher/rijndael-gcm-p10le.s
new file mode 100644
index 000000000..81fec2add
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-gcm-p10le.s
@@ -0,0 +1,1401 @@
+# Copyright 2021- IBM Inc. All rights reserved
+#
+# This file is part of Libgcrypt.
+#
+# Libgcrypt is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# Libgcrypt is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, see <http://www.gnu.org/licenses/>.
+#
+#===================================================================================
+# Written by Danny Tsen <dtsen@us.ibm.com>
+#
+# GHASH is based on the Karatsuba multiplication method.
+#
+#    Xi xor X1
+#
+#    X1 * H^4 + X2 * H^3 + x3 * H^2 + X4 * H =
+#      (X1.h * H4.h + xX.l * H4.l + X1 * H4) +
+#      (X2.h * H3.h + X2.l * H3.l + X2 * H3) +
+#      (X3.h * H2.h + X3.l * H2.l + X3 * H2) +
+#      (X4.h * H.h + X4.l * H.l + X4 * H)
+#
+# Xi = v0
+# H Poly = v2
+# Hash keys = v3 - v14
+#     ( H.l, H, H.h)
+#     ( H^2.l, H^2, H^2.h)
+#     ( H^3.l, H^3, H^3.h)
+#     ( H^4.l, H^4, H^4.h)
+#
+# v30 is IV
+# v31 - counter 1
+#
+# AES used,
+#     vs0 - vs14 for round keys
+#     v15, v16, v17, v18, v19, v20, v21, v22 for 8 blocks (encrypted)
+#
+# This implementation uses stitched AES-GCM approach to improve overall 
performance.
+# AES is implemented with 8x blocks and GHASH is using 2 4x blocks.
+#
+# Current performance with 128 bit key using bench-slope on Power10[le] 
(3.89GHz):
+#
+# AES            |  nanosecs/byte   mebibytes/sec   cycles/byte
+#        GCM enc |     0.169 ns/B      5643 MiB/s         - c/B
+#        GCM dec |     0.171 ns/B      5585 MiB/s         - c/B
+#
+# 
===================================================================================
+#
+
+.machine        "any"
+.abiversion     2
+.text
+
+# 4x loops
+# v15 - v18 - input states
+# vs1 - vs9 - round keys
+#
+.macro Loop_aes_middle4x
+       xxlor   19+32, 1, 1
+       xxlor   20+32, 2, 2
+       xxlor   21+32, 3, 3
+       xxlor   22+32, 4, 4
+
+       vcipher 15, 15, 19
+       vcipher 16, 16, 19
+       vcipher 17, 17, 19
+       vcipher 18, 18, 19
+
+       vcipher 15, 15, 20
+       vcipher 16, 16, 20
+       vcipher 17, 17, 20
+       vcipher 18, 18, 20
+
+       vcipher 15, 15, 21
+       vcipher 16, 16, 21
+       vcipher 17, 17, 21
+       vcipher 18, 18, 21
+
+       vcipher 15, 15, 22
+       vcipher 16, 16, 22
+       vcipher 17, 17, 22
+       vcipher 18, 18, 22
+
+       xxlor   19+32, 5, 5
+       xxlor   20+32, 6, 6
+       xxlor   21+32, 7, 7
+       xxlor   22+32, 8, 8
+
+       vcipher 15, 15, 19
+       vcipher 16, 16, 19
+       vcipher 17, 17, 19
+       vcipher 18, 18, 19
+
+       vcipher 15, 15, 20
+       vcipher 16, 16, 20
+       vcipher 17, 17, 20
+       vcipher 18, 18, 20
+
+       vcipher 15, 15, 21
+       vcipher 16, 16, 21
+       vcipher 17, 17, 21
+       vcipher 18, 18, 21
+
+       vcipher 15, 15, 22
+       vcipher 16, 16, 22
+       vcipher 17, 17, 22
+       vcipher 18, 18, 22
+
+       xxlor   23+32, 9, 9
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+.endm
+
+# 8x loops
+# v15 - v22 - input states
+# vs1 - vs9 - round keys
+#
+.macro Loop_aes_middle8x
+       xxlor   23+32, 1, 1
+       xxlor   24+32, 2, 2
+       xxlor   25+32, 3, 3
+       xxlor   26+32, 4, 4
+
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+       vcipher 19, 19, 23
+       vcipher 20, 20, 23
+       vcipher 21, 21, 23
+       vcipher 22, 22, 23
+
+       vcipher 15, 15, 24
+       vcipher 16, 16, 24
+       vcipher 17, 17, 24
+       vcipher 18, 18, 24
+       vcipher 19, 19, 24
+       vcipher 20, 20, 24
+       vcipher 21, 21, 24
+       vcipher 22, 22, 24
+
+       vcipher 15, 15, 25
+       vcipher 16, 16, 25
+       vcipher 17, 17, 25
+       vcipher 18, 18, 25
+       vcipher 19, 19, 25
+       vcipher 20, 20, 25
+       vcipher 21, 21, 25
+       vcipher 22, 22, 25
+
+       vcipher 15, 15, 26
+       vcipher 16, 16, 26
+       vcipher 17, 17, 26
+       vcipher 18, 18, 26
+       vcipher 19, 19, 26
+       vcipher 20, 20, 26
+       vcipher 21, 21, 26
+       vcipher 22, 22, 26
+
+       xxlor   23+32, 5, 5
+       xxlor   24+32, 6, 6
+       xxlor   25+32, 7, 7
+       xxlor   26+32, 8, 8
+
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+       vcipher 19, 19, 23
+       vcipher 20, 20, 23
+       vcipher 21, 21, 23
+       vcipher 22, 22, 23
+
+       vcipher 15, 15, 24
+       vcipher 16, 16, 24
+       vcipher 17, 17, 24
+       vcipher 18, 18, 24
+       vcipher 19, 19, 24
+       vcipher 20, 20, 24
+       vcipher 21, 21, 24
+       vcipher 22, 22, 24
+
+       vcipher 15, 15, 25
+       vcipher 16, 16, 25
+       vcipher 17, 17, 25
+       vcipher 18, 18, 25
+       vcipher 19, 19, 25
+       vcipher 20, 20, 25
+       vcipher 21, 21, 25
+       vcipher 22, 22, 25
+
+       vcipher 15, 15, 26
+       vcipher 16, 16, 26
+       vcipher 17, 17, 26
+       vcipher 18, 18, 26
+       vcipher 19, 19, 26
+       vcipher 20, 20, 26
+       vcipher 21, 21, 26
+       vcipher 22, 22, 26
+
+       xxlor   23+32, 9, 9
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+       vcipher 19, 19, 23
+       vcipher 20, 20, 23
+       vcipher 21, 21, 23
+       vcipher 22, 22, 23
+.endm
+
+#
+# Compute 4x hash values based on Karatsuba method.
+#
+ppc_aes_gcm_ghash:
+       vxor            15, 15, 0
+
+       xxlxor          29, 29, 29
+
+       vpmsumd         23, 12, 15              # H4.L * X.L
+       vpmsumd         24, 9, 16
+       vpmsumd         25, 6, 17
+       vpmsumd         26, 3, 18
+
+       vxor            23, 23, 24
+       vxor            23, 23, 25
+       vxor            23, 23, 26              # L
+
+       vpmsumd         24, 13, 15              # H4.L * X.H + H4.H * X.L
+       vpmsumd         25, 10, 16              # H3.L * X1.H + H3.H * X1.L
+       vpmsumd         26, 7, 17
+       vpmsumd         27, 4, 18
+
+       vxor            24, 24, 25
+       vxor            24, 24, 26
+       vxor            24, 24, 27              # M
+
+       # sum hash and reduction with H Poly
+       vpmsumd         28, 23, 2               # reduction
+
+       xxlor           29+32, 29, 29
+       vsldoi          26, 24, 29, 8           # mL
+       vsldoi          29, 29, 24, 8           # mH
+       vxor            23, 23, 26              # mL + L
+
+       vsldoi          23, 23, 23, 8           # swap
+       vxor            23, 23, 28
+
+       vpmsumd         24, 14, 15              # H4.H * X.H
+       vpmsumd         25, 11, 16
+       vpmsumd         26, 8, 17
+       vpmsumd         27, 5, 18
+
+       vxor            24, 24, 25
+       vxor            24, 24, 26
+       vxor            24, 24, 27
+
+       vxor            24, 24, 29
+
+       # sum hash and reduction with H Poly
+       vsldoi          27, 23, 23, 8           # swap
+       vpmsumd         23, 23, 2
+       vxor            27, 27, 24
+       vxor            23, 23, 27
+
+       xxlor           32, 23+32, 23+32                # update hash
+
+       blr
+
+#
+# Combine two 4x ghash
+# v15 - v22 - input blocks
+#
+.macro ppc_aes_gcm_ghash2_4x
+       # first 4x hash
+       vxor            15, 15, 0               # Xi + X
+
+       xxlxor          29, 29, 29
+
+       vpmsumd         23, 12, 15              # H4.L * X.L
+       vpmsumd         24, 9, 16
+       vpmsumd         25, 6, 17
+       vpmsumd         26, 3, 18
+
+       vxor            23, 23, 24
+       vxor            23, 23, 25
+       vxor            23, 23, 26              # L
+
+       vpmsumd         24, 13, 15              # H4.L * X.H + H4.H * X.L
+       vpmsumd         25, 10, 16              # H3.L * X1.H + H3.H * X1.L
+       vpmsumd         26, 7, 17
+       vpmsumd         27, 4, 18
+
+       vxor            24, 24, 25
+       vxor            24, 24, 26
+
+       # sum hash and reduction with H Poly
+       vpmsumd         28, 23, 2               # reduction
+
+       xxlor           29+32, 29, 29
+
+       vxor            24, 24, 27              # M
+       vsldoi          26, 24, 29, 8           # mL
+       vsldoi          29, 29, 24, 8           # mH
+       vxor            23, 23, 26              # mL + L
+
+       vsldoi          23, 23, 23, 8           # swap
+       vxor            23, 23, 28
+
+       vpmsumd         24, 14, 15              # H4.H * X.H
+       vpmsumd         25, 11, 16
+       vpmsumd         26, 8, 17
+       vpmsumd         27, 5, 18
+
+       vxor            24, 24, 25
+       vxor            24, 24, 26
+       vxor            24, 24, 27              # H
+
+       vxor            24, 24, 29              # H + mH
+
+       # sum hash and reduction with H Poly
+       vsldoi          27, 23, 23, 8           # swap
+       vpmsumd         23, 23, 2
+       vxor            27, 27, 24
+       vxor            27, 23, 27              # 1st Xi
+
+       # 2nd 4x hash
+       vpmsumd         24, 9, 20
+       vpmsumd         25, 6, 21
+       vpmsumd         26, 3, 22
+       vxor            19, 19, 27              # Xi + X
+       vpmsumd         23, 12, 19              # H4.L * X.L
+
+       vxor            23, 23, 24
+       vxor            23, 23, 25
+       vxor            23, 23, 26              # L
+
+       vpmsumd         24, 13, 19              # H4.L * X.H + H4.H * X.L
+       vpmsumd         25, 10, 20              # H3.L * X1.H + H3.H * X1.L
+       vpmsumd         26, 7, 21
+       vpmsumd         27, 4, 22
+
+       vxor            24, 24, 25
+       vxor            24, 24, 26
+
+       # sum hash and reduction with H Poly
+       vpmsumd         28, 23, 2               # reduction
+
+       xxlor           29+32, 29, 29
+
+       vxor            24, 24, 27              # M
+       vsldoi          26, 24, 29, 8           # mL
+       vsldoi          29, 29, 24, 8           # mH
+       vxor            23, 23, 26              # mL + L
+
+       vsldoi          23, 23, 23, 8           # swap
+       vxor            23, 23, 28
+
+       vpmsumd         24, 14, 19              # H4.H * X.H
+       vpmsumd         25, 11, 20
+       vpmsumd         26, 8, 21
+       vpmsumd         27, 5, 22
+
+       vxor            24, 24, 25
+       vxor            24, 24, 26
+       vxor            24, 24, 27              # H
+
+       vxor            24, 24, 29              # H + mH
+
+       # sum hash and reduction with H Poly
+       vsldoi          27, 23, 23, 8           # swap
+       vpmsumd         23, 23, 2
+       vxor            27, 27, 24
+       vxor            23, 23, 27
+
+       xxlor           32, 23+32, 23+32                # update hash
+
+.endm
+
+#
+# Compute update single hash
+#
+.macro ppc_update_hash_1x
+       vxor            28, 28, 0
+
+       vxor            19, 19, 19
+
+       vpmsumd         22, 3, 28               # L
+       vpmsumd         23, 4, 28               # M
+       vpmsumd         24, 5, 28               # H
+
+       vpmsumd         27, 22, 2               # reduction
+
+       vsldoi          25, 23, 19, 8           # mL
+       vsldoi          26, 19, 23, 8           # mH
+       vxor            22, 22, 25              # LL + LL
+       vxor            24, 24, 26              # HH + HH
+
+       vsldoi          22, 22, 22, 8           # swap
+       vxor            22, 22, 27
+
+       vsldoi          20, 22, 22, 8           # swap
+       vpmsumd         22, 22, 2               # reduction
+       vxor            20, 20, 24
+       vxor            22, 22, 20
+
+       vmr             0, 22                   # update hash
+
+.endm
+
+#
+# libgcrypt:
+# _gcry_ppc10_aes_gcm_encrypt (const void *inp, void *out, size_t len,
+#               const char *rk, unsigned char iv[16], void *Xip);
+#
+#    r3 - inp
+#    r4 - out
+#    r5 - len
+#    r6 - AES round keys
+#    r7 - iv
+#    r8 - HPoli, hash keys, Xi
+#
+#    rounds is at offset 480 in rk
+#    Xi is at 256 in gcm_table (Xip).
+#
+.global _gcry_ppc10_aes_gcm_encrypt
+.align 5
+_gcry_ppc10_aes_gcm_encrypt:
+_gcry_ppc_aes_gcm_encrypt:
+
+       stdu 1,-512(1)
+       mflr 0
+
+       std     14,112(1)
+       std     15,120(1)
+       std     16,128(1)
+       std     17,136(1)
+       std     18,144(1)
+       std     19,152(1)
+       std     20,160(1)
+       std     21,168(1)
+       li      9, 256
+       stvx    20, 9, 1
+       addi    9, 9, 16
+       stvx    21, 9, 1
+       addi    9, 9, 16
+       stvx    22, 9, 1
+       addi    9, 9, 16
+       stvx    23, 9, 1
+       addi    9, 9, 16
+       stvx    24, 9, 1
+       addi    9, 9, 16
+       stvx    25, 9, 1
+       addi    9, 9, 16
+       stvx    26, 9, 1
+       addi    9, 9, 16
+       stvx    27, 9, 1
+       addi    9, 9, 16
+       stvx    28, 9, 1
+       addi    9, 9, 16
+       stvx    29, 9, 1
+       addi    9, 9, 16
+       stvx    30, 9, 1
+       addi    9, 9, 16
+       stvx    31, 9, 1
+       std     0, 528(1)
+
+       # Load Xi
+       li      10, 256
+       lxvb16x 32, 10, 8       # load Xi
+
+       # load Hash - h^4, h^3, h^2, h
+       lxvd2x  2+32, 0, 8      # H Poli
+       li      10, 16
+       lxvd2x  3+32, 10, 8     # Hl
+       li      10, 32
+       lxvd2x  4+32, 10, 8     # H
+       li      10, 48
+       lxvd2x  5+32, 10, 8     # Hh
+
+       li      10, 64
+       lxvd2x  6+32, 10, 8     # H^2l
+       li      10, 80
+       lxvd2x  7+32, 10, 8     # H^2
+       li      10, 96
+       lxvd2x  8+32, 10, 8     # H^2h
+
+       li      10, 112
+       lxvd2x  9+32, 10, 8     # H^3l
+       li      10, 128
+       lxvd2x  10+32, 10, 8    # H^3
+       li      10, 144
+       lxvd2x  11+32, 10, 8    # H^3h
+
+       li      10, 160
+       lxvd2x  12+32, 10, 8    # H^4l
+       li      10, 176
+       lxvd2x  13+32, 10, 8    # H^4
+       li      10, 192
+       lxvd2x  14+32, 10, 8    # H^4h
+
+       # initialize ICB: GHASH( IV ), IV - r7
+       lxvb16x 30+32, 0, 7     # load IV  - v30
+
+       mr      12, 5           # length
+       li      11, 0           # block index
+
+       # counter 1
+       vxor    31, 31, 31
+       vspltisb 22, 1
+       vsldoi  31, 31, 22,1    # counter 1
+
+       # load round key to VSR
+       lxv     0, 0(6)
+       lxv     1, 0x10(6)
+       lxv     2, 0x20(6)
+       lxv     3, 0x30(6)
+       lxv     4, 0x40(6)
+       lxv     5, 0x50(6)
+       lxv     6, 0x60(6)
+       lxv     7, 0x70(6)
+       lxv     8, 0x80(6)
+       lxv     9, 0x90(6)
+       lxv     10, 0xa0(6)
+
+       # load rounds - 10 (128), 12 (192), 14 (256)
+       lwz     9,480(6)
+
+       #
+       # vxor  state, state, w # addroundkey
+       xxlor   32+29, 0, 0
+       vxor    15, 30, 29      # IV + round key - add round key 0
+
+       cmpdi   9, 10
+       beq     Loop_aes_gcm_8x
+
+       # load 2 more round keys (v11, v12)
+       lxv     11, 0xb0(6)
+       lxv     12, 0xc0(6)
+
+       cmpdi   9, 12
+       beq     Loop_aes_gcm_8x
+
+       # load 2 more round keys (v11, v12, v13, v14)
+       lxv     13, 0xd0(6)
+       lxv     14, 0xe0(6)
+       cmpdi   9, 14
+       beq     Loop_aes_gcm_8x
+
+       b       aes_gcm_out
+
+.align 5
+Loop_aes_gcm_8x:
+       mr      14, 3
+       mr      9, 4
+
+       # n blcoks
+       li      10, 128
+       divdu   10, 5, 10       # n 128 bytes-blocks
+       cmpdi   10, 0
+       beq     Loop_last_block
+
+       vaddudm 30, 30, 31      # IV + counter
+       vxor    16, 30, 29
+       vaddudm 30, 30, 31
+       vxor    17, 30, 29
+       vaddudm 30, 30, 31
+       vxor    18, 30, 29
+       vaddudm 30, 30, 31
+       vxor    19, 30, 29
+       vaddudm 30, 30, 31
+       vxor    20, 30, 29
+       vaddudm 30, 30, 31
+       vxor    21, 30, 29
+       vaddudm 30, 30, 31
+       vxor    22, 30, 29
+
+       mtctr   10
+
+       li      15, 16
+       li      16, 32
+       li      17, 48
+       li      18, 64
+       li      19, 80
+       li      20, 96
+       li      21, 112
+
+       lwz     10, 480(6)
+
+Loop_8x_block:
+
+       lxvb16x         15, 0, 14       # load block
+       lxvb16x         16, 15, 14      # load block
+       lxvb16x         17, 16, 14      # load block
+       lxvb16x         18, 17, 14      # load block
+       lxvb16x         19, 18, 14      # load block
+       lxvb16x         20, 19, 14      # load block
+       lxvb16x         21, 20, 14      # load block
+       lxvb16x         22, 21, 14      # load block
+       addi            14, 14, 128
+
+       Loop_aes_middle8x
+
+       xxlor   23+32, 10, 10
+
+       cmpdi   10, 10
+       beq     Do_next_ghash
+
+       # 192 bits
+       xxlor   24+32, 11, 11
+
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+       vcipher 19, 19, 23
+       vcipher 20, 20, 23
+       vcipher 21, 21, 23
+       vcipher 22, 22, 23
+
+       vcipher 15, 15, 24
+       vcipher 16, 16, 24
+       vcipher 17, 17, 24
+       vcipher 18, 18, 24
+       vcipher 19, 19, 24
+       vcipher 20, 20, 24
+       vcipher 21, 21, 24
+       vcipher 22, 22, 24
+
+       xxlor   23+32, 12, 12
+
+       cmpdi   10, 12
+       beq     Do_next_ghash
+
+       # 256 bits
+       xxlor   24+32, 13, 13
+
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+       vcipher 19, 19, 23
+       vcipher 20, 20, 23
+       vcipher 21, 21, 23
+       vcipher 22, 22, 23
+
+       vcipher 15, 15, 24
+       vcipher 16, 16, 24
+       vcipher 17, 17, 24
+       vcipher 18, 18, 24
+       vcipher 19, 19, 24
+       vcipher 20, 20, 24
+       vcipher 21, 21, 24
+       vcipher 22, 22, 24
+
+       xxlor   23+32, 14, 14
+
+       cmpdi   10, 14
+       beq     Do_next_ghash
+       b       aes_gcm_out
+
+Do_next_ghash:
+
+       #
+       # last round
+       vcipherlast     15, 15, 23
+       vcipherlast     16, 16, 23
+
+       xxlxor          47, 47, 15
+       stxvb16x        47, 0, 9        # store output
+       xxlxor          48, 48, 16
+       stxvb16x        48, 15, 9       # store output
+
+       vcipherlast     17, 17, 23
+       vcipherlast     18, 18, 23
+
+       xxlxor          49, 49, 17
+       stxvb16x        49, 16, 9       # store output
+       xxlxor          50, 50, 18
+       stxvb16x        50, 17, 9       # store output
+
+       vcipherlast     19, 19, 23
+       vcipherlast     20, 20, 23
+
+       xxlxor          51, 51, 19
+       stxvb16x        51, 18, 9       # store output
+       xxlxor          52, 52, 20
+       stxvb16x        52, 19, 9       # store output
+
+       vcipherlast     21, 21, 23
+       vcipherlast     22, 22, 23
+
+       xxlxor          53, 53, 21
+       stxvb16x        53, 20, 9       # store output
+       xxlxor          54, 54, 22
+       stxvb16x        54, 21, 9       # store output
+
+       addi            9, 9, 128
+
+       # ghash here
+       ppc_aes_gcm_ghash2_4x
+
+       xxlor   27+32, 0, 0
+       vaddudm 30, 30, 31              # IV + counter
+       vmr     29, 30
+       vxor    15, 30, 27              # add round key
+       vaddudm 30, 30, 31
+       vxor    16, 30, 27
+       vaddudm 30, 30, 31
+       vxor    17, 30, 27
+       vaddudm 30, 30, 31
+       vxor    18, 30, 27
+       vaddudm 30, 30, 31
+       vxor    19, 30, 27
+       vaddudm 30, 30, 31
+       vxor    20, 30, 27
+       vaddudm 30, 30, 31
+       vxor    21, 30, 27
+       vaddudm 30, 30, 31
+       vxor    22, 30, 27
+
+       addi    12, 12, -128
+       addi    11, 11, 128
+
+       bdnz    Loop_8x_block
+
+       vmr     30, 29
+
+Loop_last_block:
+       cmpdi   12, 0
+       beq     aes_gcm_out
+
+       # loop last few blocks
+       li      10, 16
+       divdu   10, 12, 10
+
+       mtctr   10
+
+       lwz     10, 480(6)
+
+       cmpdi   12, 16
+       blt     Final_block
+
+.macro Loop_aes_middle_1x
+       xxlor   19+32, 1, 1
+       xxlor   20+32, 2, 2
+       xxlor   21+32, 3, 3
+       xxlor   22+32, 4, 4
+
+       vcipher 15, 15, 19
+       vcipher 15, 15, 20
+       vcipher 15, 15, 21
+       vcipher 15, 15, 22
+
+       xxlor   19+32, 5, 5
+       xxlor   20+32, 6, 6
+       xxlor   21+32, 7, 7
+       xxlor   22+32, 8, 8
+
+       vcipher 15, 15, 19
+       vcipher 15, 15, 20
+       vcipher 15, 15, 21
+       vcipher 15, 15, 22
+
+       xxlor   19+32, 9, 9
+       vcipher 15, 15, 19
+.endm
+
+Next_rem_block:
+       lxvb16x 15, 0, 14               # load block
+
+       Loop_aes_middle_1x
+
+       xxlor   23+32, 10, 10
+
+       cmpdi   10, 10
+       beq     Do_next_1x
+
+       # 192 bits
+       xxlor   24+32, 11, 11
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 12, 12
+
+       cmpdi   10, 12
+       beq     Do_next_1x
+
+       # 256 bits
+       xxlor   24+32, 13, 13
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 14, 14
+
+       cmpdi   10, 14
+       beq     Do_next_1x
+
+Do_next_1x:
+       vcipherlast     15, 15, 23
+
+       xxlxor          47, 47, 15
+       stxvb16x        47, 0, 9        # store output
+       addi            14, 14, 16
+       addi            9, 9, 16
+
+       vmr             28, 15
+       ppc_update_hash_1x
+
+       addi            12, 12, -16
+       addi            11, 11, 16
+       xxlor           19+32, 0, 0
+       vaddudm         30, 30, 31              # IV + counter
+       vxor            15, 30, 19              # add round key
+
+       bdnz    Next_rem_block
+
+       cmpdi   12, 0
+       beq     aes_gcm_out
+
+Final_block:
+       Loop_aes_middle_1x
+
+       xxlor   23+32, 10, 10
+
+       cmpdi   10, 10
+       beq     Do_final_1x
+
+       # 192 bits
+       xxlor   24+32, 11, 11
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 12, 12
+
+       cmpdi   10, 12
+       beq     Do_final_1x
+
+       # 256 bits
+       xxlor   24+32, 13, 13
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 14, 14
+
+       cmpdi   10, 14
+       beq     Do_final_1x
+
+Do_final_1x:
+       vcipherlast     15, 15, 23
+
+       lxvb16x 15, 0, 14               # load last block
+       xxlxor  47, 47, 15
+
+       # create partial block mask
+       li      15, 16
+       sub     15, 15, 12              # index to the mask
+
+       vspltisb        16, -1          # first 16 bytes - 0xffff...ff
+       vspltisb        17, 0           # second 16 bytes - 0x0000...00
+       li      10, 192
+       stvx    16, 10, 1
+       addi    10, 10, 16
+       stvx    17, 10, 1
+
+       addi    10, 1, 192
+       lxvb16x 16, 15, 10              # load partial block mask
+       xxland  47, 47, 16
+
+       vmr     28, 15
+       ppc_update_hash_1x
+
+       # * should store only the remaining bytes.
+       bl      Write_partial_block
+
+       b aes_gcm_out
+
+#
+# Write partial block
+# r9 - output
+# r12 - remaining bytes
+# v15 - partial input data
+#
+Write_partial_block:
+       li              10, 192
+       stxvb16x        15+32, 10, 1            # last block
+
+       #add            10, 9, 11               # Output
+       addi            10, 9, -1
+       addi            16, 1, 191
+
+        mtctr          12                      # remaining bytes
+       li              15, 0
+
+Write_last_byte:
+        lbzu           14, 1(16)
+       stbu            14, 1(10)
+        bdnz           Write_last_byte
+       blr
+
+aes_gcm_out:
+       # out = state
+       li      10, 256
+       stxvb16x        32, 10, 8               # write out Xi
+       add     3, 11, 12               # return count
+
+       li      9, 256
+       lvx     20, 9, 1
+       addi    9, 9, 16
+       lvx     21, 9, 1
+       addi    9, 9, 16
+       lvx     22, 9, 1
+       addi    9, 9, 16
+       lvx     23, 9, 1
+       addi    9, 9, 16
+       lvx     24, 9, 1
+       addi    9, 9, 16
+       lvx     25, 9, 1
+       addi    9, 9, 16
+       lvx     26, 9, 1
+       addi    9, 9, 16
+       lvx     27, 9, 1
+       addi    9, 9, 16
+       lvx     28, 9, 1
+       addi    9, 9, 16
+       lvx     29, 9, 1
+       addi    9, 9, 16
+       lvx     30, 9, 1
+       addi    9, 9, 16
+       lvx     31, 9, 1
+
+       ld      0, 528(1)
+       ld      14,112(1)
+       ld      15,120(1)
+       ld      16,128(1)
+       ld      17,136(1)
+       ld      18,144(1)
+       ld      19,152(1)
+       ld      20,160(1)
+       ld      21,168(1)
+
+       mtlr    0
+       addi    1, 1, 512
+       blr
+
+#
+# 8x Decrypt
+#
+.global _gcry_ppc10_aes_gcm_decrypt
+.align 5
+_gcry_ppc10_aes_gcm_decrypt:
+_gcry_ppc_aes_gcm_decrypt:
+
+       stdu 1,-512(1)
+       mflr 0
+
+       std     14,112(1)
+       std     15,120(1)
+       std     16,128(1)
+       std     17,136(1)
+       std     18,144(1)
+       std     19,152(1)
+       std     20,160(1)
+       std     21,168(1)
+       li      9, 256
+       stvx    20, 9, 1
+       addi    9, 9, 16
+       stvx    21, 9, 1
+       addi    9, 9, 16
+       stvx    22, 9, 1
+       addi    9, 9, 16
+       stvx    23, 9, 1
+       addi    9, 9, 16
+       stvx    24, 9, 1
+       addi    9, 9, 16
+       stvx    25, 9, 1
+       addi    9, 9, 16
+       stvx    26, 9, 1
+       addi    9, 9, 16
+       stvx    27, 9, 1
+       addi    9, 9, 16
+       stvx    28, 9, 1
+       addi    9, 9, 16
+       stvx    29, 9, 1
+       addi    9, 9, 16
+       stvx    30, 9, 1
+       addi    9, 9, 16
+       stvx    31, 9, 1
+       std     0, 528(1)
+
+       # Load Xi
+       li      10, 256
+       lxvb16x 32, 10, 8       # load Xi
+
+       # load Hash - h^4, h^3, h^2, h
+       lxvd2x  2+32, 0, 8      # H Poli
+       li      10, 16
+       lxvd2x  3+32, 10, 8     # Hl
+       li      10, 32
+       lxvd2x  4+32, 10, 8     # H
+       li      10, 48
+       lxvd2x  5+32, 10, 8     # Hh
+
+       li      10, 64
+       lxvd2x  6+32, 10, 8     # H^2l
+       li      10, 80
+       lxvd2x  7+32, 10, 8     # H^2
+       li      10, 96
+       lxvd2x  8+32, 10, 8     # H^2h
+
+       li      10, 112
+       lxvd2x  9+32, 10, 8     # H^3l
+       li      10, 128
+       lxvd2x  10+32, 10, 8    # H^3
+       li      10, 144
+       lxvd2x  11+32, 10, 8    # H^3h
+
+       li      10, 160
+       lxvd2x  12+32, 10, 8    # H^4l
+       li      10, 176
+       lxvd2x  13+32, 10, 8    # H^4
+       li      10, 192
+       lxvd2x  14+32, 10, 8    # H^4h
+
+       # initialize ICB: GHASH( IV ), IV - r7
+       lxvb16x 30+32, 0, 7     # load IV  - v30
+
+       mr      12, 5           # length
+       li      11, 0           # block index
+
+       # counter 1
+       vxor    31, 31, 31
+       vspltisb 22, 1
+       vsldoi  31, 31, 22,1    # counter 1
+
+       # load round key to VSR
+       lxv     0, 0(6)
+       lxv     1, 0x10(6)
+       lxv     2, 0x20(6)
+       lxv     3, 0x30(6)
+       lxv     4, 0x40(6)
+       lxv     5, 0x50(6)
+       lxv     6, 0x60(6)
+       lxv     7, 0x70(6)
+       lxv     8, 0x80(6)
+       lxv     9, 0x90(6)
+       lxv     10, 0xa0(6)
+
+       # load rounds - 10 (128), 12 (192), 14 (256)
+       lwz     9,480(6)
+
+       #
+       # vxor  state, state, w # addroundkey
+       xxlor   32+29, 0, 0
+       vxor    15, 30, 29      # IV + round key - add round key 0
+
+       cmpdi   9, 10
+       beq     Loop_aes_gcm_8x_dec
+
+       # load 2 more round keys (v11, v12)
+       lxv     11, 0xb0(6)
+       lxv     12, 0xc0(6)
+
+       cmpdi   9, 12
+       beq     Loop_aes_gcm_8x_dec
+
+       # load 2 more round keys (v11, v12, v13, v14)
+       lxv     13, 0xd0(6)
+       lxv     14, 0xe0(6)
+       cmpdi   9, 14
+       beq     Loop_aes_gcm_8x_dec
+
+       b       aes_gcm_out
+
+.align 5
+Loop_aes_gcm_8x_dec:
+       mr      14, 3
+       mr      9, 4
+
+       # n blcoks
+       li      10, 128
+       divdu   10, 5, 10       # n 128 bytes-blocks
+       cmpdi   10, 0
+       beq     Loop_last_block_dec
+
+       vaddudm 30, 30, 31      # IV + counter
+       vxor    16, 30, 29
+       vaddudm 30, 30, 31
+       vxor    17, 30, 29
+       vaddudm 30, 30, 31
+       vxor    18, 30, 29
+       vaddudm 30, 30, 31
+       vxor    19, 30, 29
+       vaddudm 30, 30, 31
+       vxor    20, 30, 29
+       vaddudm 30, 30, 31
+       vxor    21, 30, 29
+       vaddudm 30, 30, 31
+       vxor    22, 30, 29
+
+       mtctr   10
+
+       li      15, 16
+       li      16, 32
+       li      17, 48
+       li      18, 64
+       li      19, 80
+       li      20, 96
+       li      21, 112
+
+       lwz     10, 480(6)
+
+Loop_8x_block_dec:
+
+       lxvb16x         15, 0, 14       # load block
+       lxvb16x         16, 15, 14      # load block
+       lxvb16x         17, 16, 14      # load block
+       lxvb16x         18, 17, 14      # load block
+       lxvb16x         19, 18, 14      # load block
+       lxvb16x         20, 19, 14      # load block
+       lxvb16x         21, 20, 14      # load block
+       lxvb16x         22, 21, 14      # load block
+       addi            14, 14, 128
+
+       Loop_aes_middle8x
+
+       xxlor   23+32, 10, 10
+
+       cmpdi   10, 10
+       beq     Do_last_aes_dec
+
+       # 192 bits
+       xxlor   24+32, 11, 11
+
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+       vcipher 19, 19, 23
+       vcipher 20, 20, 23
+       vcipher 21, 21, 23
+       vcipher 22, 22, 23
+
+       vcipher 15, 15, 24
+       vcipher 16, 16, 24
+       vcipher 17, 17, 24
+       vcipher 18, 18, 24
+       vcipher 19, 19, 24
+       vcipher 20, 20, 24
+       vcipher 21, 21, 24
+       vcipher 22, 22, 24
+
+       xxlor   23+32, 12, 12
+
+       cmpdi   10, 12
+       beq     Do_last_aes_dec
+
+       # 256 bits
+       xxlor   24+32, 13, 13
+
+       vcipher 15, 15, 23
+       vcipher 16, 16, 23
+       vcipher 17, 17, 23
+       vcipher 18, 18, 23
+       vcipher 19, 19, 23
+       vcipher 20, 20, 23
+       vcipher 21, 21, 23
+       vcipher 22, 22, 23
+
+       vcipher 15, 15, 24
+       vcipher 16, 16, 24
+       vcipher 17, 17, 24
+       vcipher 18, 18, 24
+       vcipher 19, 19, 24
+       vcipher 20, 20, 24
+       vcipher 21, 21, 24
+       vcipher 22, 22, 24
+
+       xxlor   23+32, 14, 14
+
+       cmpdi   10, 14
+       beq     Do_last_aes_dec
+       b       aes_gcm_out
+
+Do_last_aes_dec:
+
+       #
+       # last round
+       vcipherlast     15, 15, 23
+       vcipherlast     16, 16, 23
+
+       xxlxor          47, 47, 15
+       stxvb16x        47, 0, 9        # store output
+       xxlxor          48, 48, 16
+       stxvb16x        48, 15, 9       # store output
+
+       vcipherlast     17, 17, 23
+       vcipherlast     18, 18, 23
+
+       xxlxor          49, 49, 17
+       stxvb16x        49, 16, 9       # store output
+       xxlxor          50, 50, 18
+       stxvb16x        50, 17, 9       # store output
+
+       vcipherlast     19, 19, 23
+       vcipherlast     20, 20, 23
+
+       xxlxor          51, 51, 19
+       stxvb16x        51, 18, 9       # store output
+       xxlxor          52, 52, 20
+       stxvb16x        52, 19, 9       # store output
+
+       vcipherlast     21, 21, 23
+       vcipherlast     22, 22, 23
+
+       xxlxor          53, 53, 21
+       stxvb16x        53, 20, 9       # store output
+       xxlxor          54, 54, 22
+       stxvb16x        54, 21, 9       # store output
+
+       addi            9, 9, 128
+
+       xxlor           15+32, 15, 15
+       xxlor           16+32, 16, 16
+       xxlor           17+32, 17, 17
+       xxlor           18+32, 18, 18
+       xxlor           19+32, 19, 19
+       xxlor           20+32, 20, 20
+       xxlor           21+32, 21, 21
+       xxlor           22+32, 22, 22
+
+       # ghash here
+       ppc_aes_gcm_ghash2_4x
+
+       xxlor   27+32, 0, 0
+       vaddudm 30, 30, 31              # IV + counter
+       vmr     29, 30
+       vxor    15, 30, 27              # add round key
+       vaddudm 30, 30, 31
+       vxor    16, 30, 27
+       vaddudm 30, 30, 31
+       vxor    17, 30, 27
+       vaddudm 30, 30, 31
+       vxor    18, 30, 27
+       vaddudm 30, 30, 31
+       vxor    19, 30, 27
+       vaddudm 30, 30, 31
+       vxor    20, 30, 27
+       vaddudm 30, 30, 31
+       vxor    21, 30, 27
+       vaddudm 30, 30, 31
+       vxor    22, 30, 27
+       addi    12, 12, -128
+       addi    11, 11, 128
+
+       bdnz    Loop_8x_block_dec
+
+       vmr     30, 29
+
+Loop_last_block_dec:
+       cmpdi   12, 0
+       beq     aes_gcm_out
+
+       # loop last few blocks
+       li      10, 16
+       divdu   10, 12, 10
+
+       mtctr   10
+
+       lwz     10,480(6)
+
+       cmpdi   12, 16
+       blt     Final_block_dec
+
+Next_rem_block_dec:
+       lxvb16x 15, 0, 14               # load block
+
+       Loop_aes_middle_1x
+
+       xxlor   23+32, 10, 10
+
+       cmpdi   10, 10
+       beq     Do_next_1x_dec
+
+       # 192 bits
+       xxlor   24+32, 11, 11
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 12, 12
+
+       cmpdi   10, 12
+       beq     Do_next_1x_dec
+
+       # 256 bits
+       xxlor   24+32, 13, 13
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 14, 14
+
+       cmpdi   10, 14
+       beq     Do_next_1x_dec
+
+Do_next_1x_dec:
+       vcipherlast     15, 15, 23
+
+       xxlxor  47, 47, 15
+       stxvb16x        47, 0, 9        # store output
+       addi    14, 14, 16
+       addi    9, 9, 16
+
+       xxlor   28+32, 15, 15
+       ppc_update_hash_1x
+
+       addi    12, 12, -16
+       addi    11, 11, 16
+       xxlor   19+32, 0, 0
+       vaddudm 30, 30, 31              # IV + counter
+       vxor    15, 30, 19              # add round key
+
+       bdnz    Next_rem_block_dec
+
+       cmpdi   12, 0
+       beq     aes_gcm_out
+
+Final_block_dec:
+       Loop_aes_middle_1x
+
+       xxlor   23+32, 10, 10
+
+       cmpdi   10, 10
+       beq     Do_final_1x_dec
+
+       # 192 bits
+       xxlor   24+32, 11, 11
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 12, 12
+
+       cmpdi   10, 12
+       beq     Do_final_1x_dec
+
+       # 256 bits
+       xxlor   24+32, 13, 13
+
+       vcipher 15, 15, 23
+       vcipher 15, 15, 24
+
+       xxlor   23+32, 14, 14
+
+       cmpdi   10, 14
+       beq     Do_final_1x_dec
+
+Do_final_1x_dec:
+       vcipherlast     15, 15, 23
+
+       lxvb16x 15, 0, 14               # load block
+       xxlxor  47, 47, 15
+
+       # create partial block mask
+       li      15, 16
+       sub     15, 15, 12              # index to the mask
+
+       vspltisb        16, -1          # first 16 bytes - 0xffff...ff
+       vspltisb        17, 0           # second 16 bytes - 0x0000...00
+       li      10, 192
+       stvx    16, 10, 1
+       addi    10, 10, 16
+       stvx    17, 10, 1
+
+       addi    10, 1, 192
+       lxvb16x 16, 15, 10              # load block mask
+       xxland  47, 47, 16
+
+       xxlor   28+32, 15, 15
+       ppc_update_hash_1x
+
+       # * should store only the remaining bytes.
+       bl      Write_partial_block
+
+       b aes_gcm_out
+#
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-internal.h 
b/grub-core/lib/libgcrypt/cipher/rijndael-internal.h
new file mode 100644
index 000000000..306040883
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-internal.h
@@ -0,0 +1,204 @@
+/* Rijndael (AES) for GnuPG
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ *               2008, 2011, 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef G10_RIJNDAEL_INTERNAL_H
+#define G10_RIJNDAEL_INTERNAL_H
+
+#include "types.h"  /* for byte and u32 typedefs */
+
+
+#define MAXKC                   (256/32)
+#define MAXROUNDS               14
+#define BLOCKSIZE               (128/8)
+
+
+/* Helper macro to force alignment to 16 or 64 bytes.  */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
+# define ATTR_ALIGNED_64  __attribute__ ((aligned (64)))
+#else
+# define ATTR_ALIGNED_16
+# define ATTR_ALIGNED_64
+#endif
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_SSSE3 indicates whether to use SSSE3 code. */
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+#  define USE_SSSE3 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+#  define USE_ARM_ASM 1
+# endif
+#endif
+#if defined(__AARCH64EL__)
+# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+#  define USE_ARM_ASM 1
+# endif
+#endif
+
+/* USE_PADLOCK indicates whether to compile the padlock specific
+   code.  */
+#undef USE_PADLOCK
+#ifdef ENABLE_PADLOCK_SUPPORT
+# ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+#  if (defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)
+#   define USE_PADLOCK 1
+#  endif
+# endif
+#endif /* ENABLE_PADLOCK_SUPPORT */
+
+/* USE_AESNI inidicates whether to compile with Intel AES-NI code.  We
+   need the vector-size attribute which seems to be available since
+   gcc 3.  However, to be on the safe side we require at least gcc 4.  */
+#undef USE_AESNI
+#ifdef ENABLE_AESNI_SUPPORT
+# if ((defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+#  if __GNUC__ >= 4
+#   define USE_AESNI 1
+#  endif
+# endif
+#endif /* ENABLE_AESNI_SUPPORT */
+
+/* USE_VAES inidicates whether to compile with Intel VAES code.  */
+#undef USE_VAES
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+     defined(__x86_64__) && defined(ENABLE_AVX2_SUPPORT) && \
+     defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL) && \
+     defined(USE_AESNI)
+# define USE_VAES 1
+#endif
+
+/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
+ * code. */
+#undef USE_ARM_CE
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+#  define USE_ARM_CE 1
+# elif defined(__AARCH64EL__) \
+       && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+       && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+#  define USE_ARM_CE 1
+# endif
+#endif /* ENABLE_ARM_CRYPTO_SUPPORT */
+
+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code.  USE_PPC_CRYPTO_WITH_PPC9LE indicates whether to
+ * enable POWER9 optimized variant.  */
+#undef USE_PPC_CRYPTO
+#undef USE_PPC_CRYPTO_WITH_PPC9LE
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+#  if __GNUC__ >= 4
+#   define USE_PPC_CRYPTO 1
+#   if !defined(WORDS_BIGENDIAN) && defined(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00)
+#    define USE_PPC_CRYPTO_WITH_PPC9LE 1
+#   endif
+#  endif
+# endif
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+struct RIJNDAEL_context_s;
+
+typedef unsigned int (*rijndael_cryptfn_t)(const struct RIJNDAEL_context_s 
*ctx,
+                                           unsigned char *bx,
+                                           const unsigned char *ax);
+typedef void (*rijndael_prefetchfn_t)(void);
+typedef void (*rijndael_prepare_decfn_t)(struct RIJNDAEL_context_s *ctx);
+
+/* Our context object.  */
+typedef struct RIJNDAEL_context_s
+{
+  /* The first fields are the keyschedule arrays.  This is so that
+     they are aligned on a 16 byte boundary if using gcc.  This
+     alignment is required for the AES-NI code and a good idea in any
+     case.  The alignment is guaranteed due to the way cipher.c
+     allocates the space for the context.  The PROPERLY_ALIGNED_TYPE
+     hack is used to force a minimal alignment if not using gcc of if
+     the alignment requirement is higher that 16 bytes.  */
+  union
+  {
+    PROPERLY_ALIGNED_TYPE dummy;
+    byte keyschedule[MAXROUNDS+1][4][4];
+    u32 keyschedule32[MAXROUNDS+1][4];
+#ifdef USE_PADLOCK
+    /* The key as passed to the padlock engine.  It is only used if
+       the padlock engine is used (USE_PADLOCK, below).  */
+    unsigned char padlock_key[16] __attribute__ ((aligned (16)));
+#endif /*USE_PADLOCK*/
+  } u1;
+  union
+  {
+    PROPERLY_ALIGNED_TYPE dummy;
+    byte keyschedule[MAXROUNDS+1][4][4];
+    u32 keyschedule32[MAXROUNDS+1][4];
+  } u2;
+  int rounds;                         /* Key-length-dependent number of 
rounds.  */
+  unsigned int decryption_prepared:1; /* The decryption key schedule is 
available.  */
+#ifdef USE_AESNI
+  unsigned int use_avx:1;             /* AVX shall be used by AES-NI 
implementation. */
+  unsigned int use_avx2:1;            /* AVX2 shall be used by AES-NI 
implementation. */
+#endif /*USE_AESNI*/
+#ifdef USE_S390X_CRYPTO
+  byte km_func;
+  byte km_func_xts;
+  byte kmc_func;
+  byte kmac_func;
+  byte kmf_func;
+  byte kmo_func;
+  byte kma_func;
+#endif /*USE_S390X_CRYPTO*/
+  rijndael_cryptfn_t encrypt_fn;
+  rijndael_cryptfn_t decrypt_fn;
+  rijndael_prefetchfn_t prefetch_enc_fn;
+  rijndael_prefetchfn_t prefetch_dec_fn;
+  rijndael_prepare_decfn_t prepare_decryption;
+} RIJNDAEL_context ATTR_ALIGNED_16;
+
+/* Macros defining alias for the keyschedules.  */
+#define keyschenc   u1.keyschedule
+#define keyschenc32 u1.keyschedule32
+#define keyschdec   u2.keyschedule
+#define keyschdec32 u2.keyschedule32
+#define padlockkey  u1.padlock_key
+
+#endif /* G10_RIJNDAEL_INTERNAL_H */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-p10le.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-p10le.c
new file mode 100644
index 000000000..b16ebe1b7
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-p10le.c
@@ -0,0 +1,119 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright 2021- IBM Inc. All rights reserved
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+
+
+extern size_t _gcry_ppc10_aes_gcm_encrypt (const void *inp, void *out,
+                                           size_t len,
+                                           const unsigned char *key,
+                                           unsigned char iv[16], void *Xip);
+extern size_t _gcry_ppc10_aes_gcm_decrypt (const void *inp, void *out,
+                                           size_t len,
+                                           const unsigned char *key,
+                                           unsigned char iv[16], void *Xip);
+
+size_t
+_gcry_aes_p10le_gcm_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+                          const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  RIJNDAEL_context *ctx = (RIJNDAEL_context *) &c->context.c;
+  unsigned char *rk = (unsigned char *) ctx->u1.keyschedule;
+  unsigned char *gcm_table = (unsigned char *) c->u_mode.gcm.gcm_table;
+  unsigned char *iv = c->u_ctr.ctr;
+  unsigned char *Xi = c->u_mode.gcm.u_tag.tag;
+  int s = 0;
+  int ndone = 0;
+  int ctr_reset = 0;
+  size_t len = nblocks * GCRY_GCM_BLOCK_LEN;
+  u64 blocks_unused;
+  u64 nb = nblocks;
+  u64 next_ctr = 0;
+  unsigned char ctr_saved[12];
+  unsigned char *inp = (unsigned char *) inbuf_arg;
+  unsigned char *out = (unsigned char *) outbuf_arg;
+
+  /*
+   * This is what the aes-gcm asembly code expects some input parameters.
+   *
+   *   - Number of rounds is at 480 offset from rk (rk->rounds)
+   *   - Xi at 256 offset from gcm_table
+   */
+  gcry_assert (sizeof(c->u_mode.gcm.gcm_table) >= 256 + 16);
+  buf_cpy (gcm_table+256, Xi, 16);
+  buf_cpy (ctr_saved, c->u_ctr.ctr, 12);
+
+  while (nb)
+    {
+      blocks_unused = (u64) 0xffffffffU + 1 - (u64) buf_get_be32 (iv + 12);
+      if (nb > blocks_unused)
+        {
+          len = blocks_unused * GCRY_GCM_BLOCK_LEN;
+          nb -= blocks_unused;
+          next_ctr = blocks_unused;
+          ctr_reset = 1;
+        }
+      else
+        {
+          len = nb * GCRY_GCM_BLOCK_LEN;
+          next_ctr = nb;
+          nb = 0;
+        }
+
+      if (encrypt)
+        s = _gcry_ppc10_aes_gcm_encrypt((const void *) inp, (void *) out, len,
+                                        (const unsigned char *) rk, iv,
+                                        (void *) gcm_table);
+      else
+        s = _gcry_ppc10_aes_gcm_decrypt((const void *) inp, (void *) out, len,
+                                        (const unsigned char *) rk, iv,
+                                        (void *) gcm_table);
+
+      cipher_block_add(c->u_ctr.ctr, next_ctr, GCRY_GCM_BLOCK_LEN);
+      if (ctr_reset)
+        {
+          ctr_reset = 0;
+          inp += len;
+          out += len;
+        }
+      buf_cpy (c->u_ctr.ctr, ctr_saved, 12);
+      ndone += s;
+    }
+  buf_cpy (Xi, gcm_table+256, 16);
+
+  /*
+   * Return number of blocks done.
+   */
+  s = ndone / GCRY_GCM_BLOCK_LEN;
+  s = nblocks - s;
+  return ( s );
+}
+
+#endif /* USE_PPC_CRYPTO_WITH_PPC9LE */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-padlock.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-padlock.c
new file mode 100644
index 000000000..3af214d74
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-padlock.c
@@ -0,0 +1,110 @@
+/* Padlock accelerated AES for Libgcrypt
+ * Copyright (C) 2000, 2001, 2002, 2003, 2007,
+ *               2008, 2011, 2012 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h"  /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+
+#ifdef USE_PADLOCK
+
+/* Encrypt or decrypt one block using the padlock engine.  A and B may
+   be the same. */
+static unsigned int
+do_padlock (const RIJNDAEL_context *ctx, unsigned char *bx,
+            const unsigned char *ax, int decrypt_flag)
+{
+  /* BX and AX are not necessary correctly aligned.  Thus we need to
+     copy them here. */
+  unsigned char a[16] __attribute__ ((aligned (16)));
+  unsigned char b[16] __attribute__ ((aligned (16)));
+  unsigned int cword[4] __attribute__ ((aligned (16)));
+  unsigned char *pa = a;
+  unsigned char *pb = b;
+  int blocks;
+
+  /* The control word fields are:
+      127:12   11:10 9     8     7     6     5     4     3:0
+      RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND  */
+  cword[0] = (ctx->rounds & 15);  /* (The mask is just a safeguard.)  */
+  cword[1] = 0;
+  cword[2] = 0;
+  cword[3] = 0;
+  if (decrypt_flag)
+    cword[0] |= 0x00000200;
+
+  memcpy (a, ax, 16);
+
+  blocks = 1; /* Init counter for just one block.  */
+#ifdef __x86_64__
+  asm volatile
+    ("pushfq\n\t"          /* Force key reload.  */
+     "popfq\n\t"
+     ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+     : "+S" (pa), "+D" (pb), "+c" (blocks)
+     : "d" (cword), "b" (ctx->padlockkey)
+     : "cc", "memory"
+     );
+#else
+  asm volatile
+    ("pushfl\n\t"          /* Force key reload.  */
+     "popfl\n\t"
+     "xchg %4, %%ebx\n\t"  /* Load key.  */
+     ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XCRYPT ECB. */
+     "xchg %4, %%ebx\n"    /* Restore GOT register.  */
+     : "+S" (pa), "+D" (pb), "+c" (blocks)
+     : "d" (cword), "r" (ctx->padlockkey)
+     : "cc", "memory"
+     );
+#endif
+
+  memcpy (bx, b, 16);
+
+  return (48 + 15 /* possible padding for alignment */);
+}
+
+unsigned int
+_gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx,
+                           unsigned char *bx, const unsigned char *ax)
+{
+  return do_padlock(ctx, bx, ax, 0);
+}
+
+unsigned int
+_gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx,
+                           unsigned char *bx, const unsigned char *ax)
+{
+  return do_padlock(ctx, bx, ax, 1);
+}
+
+void
+_gcry_aes_padlock_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  /* Padlock does not need decryption subkeys. */
+  (void)ctx;
+}
+#endif /* USE_PADLOCK */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-ppc-common.h 
b/grub-core/lib/libgcrypt/cipher/rijndael-ppc-common.h
new file mode 100644
index 000000000..bbbeaac03
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-ppc-common.h
@@ -0,0 +1,342 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#ifndef G10_RIJNDAEL_PPC_COMMON_H
+#define G10_RIJNDAEL_PPC_COMMON_H
+
+#include <altivec.h>
+
+
+typedef vector unsigned char block;
+
+typedef union
+{
+  u32 data32[4];
+} __attribute__((packed, aligned(1), may_alias)) u128_t;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+#define ALIGNED_LOAD(in_ptr, offs) \
+  (asm_aligned_ld ((offs) * 16, (const void *)(in_ptr)))
+
+#define ALIGNED_STORE(out_ptr, offs, vec) \
+  (asm_aligned_st ((vec), (offs) * 16, (void *)(out_ptr)))
+
+#define VEC_BE_SWAP(vec, bige_const) (asm_be_swap ((vec), (bige_const)))
+
+#define VEC_LOAD_BE(in_ptr, offs, bige_const) \
+  (asm_be_swap (asm_load_be_noswap ((offs) * 16, (const void *)(in_ptr)), \
+               bige_const))
+
+#define VEC_LOAD_BE_NOSWAP(in_ptr, offs) \
+  (asm_load_be_noswap ((offs) * 16, (const unsigned char *)(in_ptr)))
+
+#define VEC_STORE_BE(out_ptr, offs, vec, bige_const) \
+  (asm_store_be_noswap (asm_be_swap ((vec), (bige_const)), (offs) * 16, \
+                       (void *)(out_ptr)))
+
+#define VEC_STORE_BE_NOSWAP(out_ptr, offs, vec) \
+  (asm_store_be_noswap ((vec), (offs) * 16, (void *)(out_ptr)))
+
+
+#define ROUND_KEY_VARIABLES \
+  block rkey0, rkeylast
+
+#define PRELOAD_ROUND_KEYS(nrounds) \
+  do { \
+    rkey0 = ALIGNED_LOAD (rk, 0); \
+    rkeylast = ALIGNED_LOAD (rk, nrounds); \
+  } while (0)
+
+#define AES_ENCRYPT(blk, nrounds) \
+  do { \
+    blk ^= rkey0; \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+    blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+    if (nrounds >= 12) \
+      { \
+       blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+       blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+       if (rounds > 12) \
+         { \
+           blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+           blk = asm_cipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+         } \
+      } \
+    blk = asm_cipherlast_be (blk, rkeylast); \
+  } while (0)
+
+#define AES_DECRYPT(blk, nrounds) \
+  do { \
+    blk ^= rkey0; \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 1)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 2)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 3)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 4)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 5)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 6)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 7)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 8)); \
+    blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 9)); \
+    if (nrounds >= 12) \
+      { \
+       blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 10)); \
+       blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 11)); \
+       if (rounds > 12) \
+         { \
+           blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 12)); \
+           blk = asm_ncipher_be (blk, ALIGNED_LOAD (rk, 13)); \
+         } \
+      } \
+    blk = asm_ncipherlast_be (blk, rkeylast); \
+  } while (0)
+
+
+#define ROUND_KEY_VARIABLES_ALL \
+  block rkey0, rkey1, rkey2, rkey3, rkey4, rkey5, rkey6, rkey7, rkey8, \
+        rkey9, rkey10, rkey11, rkey12, rkey13, rkeylast
+
+#define PRELOAD_ROUND_KEYS_ALL(nrounds) \
+  do { \
+    rkey0 = ALIGNED_LOAD (rk, 0); \
+    rkey1 = ALIGNED_LOAD (rk, 1); \
+    rkey2 = ALIGNED_LOAD (rk, 2); \
+    rkey3 = ALIGNED_LOAD (rk, 3); \
+    rkey4 = ALIGNED_LOAD (rk, 4); \
+    rkey5 = ALIGNED_LOAD (rk, 5); \
+    rkey6 = ALIGNED_LOAD (rk, 6); \
+    rkey7 = ALIGNED_LOAD (rk, 7); \
+    rkey8 = ALIGNED_LOAD (rk, 8); \
+    rkey9 = ALIGNED_LOAD (rk, 9); \
+    if (nrounds >= 12) \
+      { \
+       rkey10 = ALIGNED_LOAD (rk, 10); \
+       rkey11 = ALIGNED_LOAD (rk, 11); \
+       if (rounds > 12) \
+         { \
+           rkey12 = ALIGNED_LOAD (rk, 12); \
+           rkey13 = ALIGNED_LOAD (rk, 13); \
+         } \
+      } \
+    rkeylast = ALIGNED_LOAD (rk, nrounds); \
+  } while (0)
+
+#define AES_ENCRYPT_ALL(blk, nrounds) \
+  do { \
+    blk ^= rkey0; \
+    blk = asm_cipher_be (blk, rkey1); \
+    blk = asm_cipher_be (blk, rkey2); \
+    blk = asm_cipher_be (blk, rkey3); \
+    blk = asm_cipher_be (blk, rkey4); \
+    blk = asm_cipher_be (blk, rkey5); \
+    blk = asm_cipher_be (blk, rkey6); \
+    blk = asm_cipher_be (blk, rkey7); \
+    blk = asm_cipher_be (blk, rkey8); \
+    blk = asm_cipher_be (blk, rkey9); \
+    if (nrounds >= 12) \
+      { \
+       blk = asm_cipher_be (blk, rkey10); \
+       blk = asm_cipher_be (blk, rkey11); \
+       if (rounds > 12) \
+         { \
+           blk = asm_cipher_be (blk, rkey12); \
+           blk = asm_cipher_be (blk, rkey13); \
+         } \
+      } \
+    blk = asm_cipherlast_be (blk, rkeylast); \
+  } while (0)
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_aligned_ld(unsigned long offset, const void *ptr)
+{
+  block vec;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lvx %0,0,%1\n\t"
+                     : "=v" (vec)
+                     : "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("lvx %0,%1,%2\n\t"
+                     : "=v" (vec)
+                     : "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_aligned_st(block vec, unsigned long offset, void *ptr)
+{
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("stvx %0,0,%1\n\t"
+                     :
+                     : "v" (vec), "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("stvx %0,%1,%2\n\t"
+                     :
+                     : "v" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_vperm1(block vec, block mask)
+{
+  block o;
+  __asm__ volatile ("vperm %0,%1,%1,%2\n\t"
+                   : "=v" (o)
+                   : "v" (vec), "v" (mask));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint128(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vadduqm %0,%1,%2\n\t"
+                   : "=v" (res)
+                   : "v" (a), "v" (b));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_add_uint64(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vaddudm %0,%1,%2\n\t"
+                   : "=v" (res)
+                   : "v" (a), "v" (b));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_sra_int64(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vsrad %0,%1,%2\n\t"
+                   : "=v" (res)
+                   : "v" (a), "v" (b));
+  return res;
+}
+
+static block
+asm_swap_uint64_halfs(block a)
+{
+  block res;
+  __asm__ volatile ("xxswapd %x0, %x1"
+                   : "=wa" (res)
+                   : "wa" (a));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_xor(block a, block b)
+{
+  block res;
+  __asm__ volatile ("vxor %0,%1,%2\n\t"
+                   : "=v" (res)
+                   : "v" (a), "v" (b));
+  return res;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipher_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vcipher %0, %1, %2\n\t"
+                   : "=v" (o)
+                   : "v" (b), "v" (rk));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_cipherlast_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vcipherlast %0, %1, %2\n\t"
+                   : "=v" (o)
+                   : "v" (b), "v" (rk));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipher_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vncipher %0, %1, %2\n\t"
+                   : "=v" (o)
+                   : "v" (b), "v" (rk));
+  return o;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_ncipherlast_be(block b, block rk)
+{
+  block o;
+  __asm__ volatile ("vncipherlast %0, %1, %2\n\t"
+                   : "=v" (o)
+                   : "v" (b), "v" (rk));
+  return o;
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+internal_aes_ppc_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+  u128_t *dkey = (u128_t *)(void *)ctx->keyschdec;
+  int rounds = ctx->rounds;
+  int rr;
+  int r;
+
+  r = 0;
+  rr = rounds;
+  for (r = 0, rr = rounds; r <= rounds; r++, rr--)
+    {
+      ALIGNED_STORE (dkey, r, ALIGNED_LOAD (ekey, rr));
+    }
+}
+
+#endif /* G10_RIJNDAEL_PPC_COMMON_H */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-ppc-functions.h 
b/grub-core/lib/libgcrypt/cipher/rijndael-ppc-functions.h
new file mode 100644
index 000000000..72f31852b
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-ppc-functions.h
@@ -0,0 +1,2020 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+unsigned int ENCRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+                                unsigned char *out,
+                                const unsigned char *in)
+{
+  const block bige_const = asm_load_be_const();
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block b;
+
+  b = VEC_LOAD_BE (in, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+
+  AES_ENCRYPT (b, rounds);
+  VEC_STORE_BE (out, 0, b, bige_const);
+
+  return 0; /* does not use stack */
+}
+
+
+unsigned int DECRYPT_BLOCK_FUNC (const RIJNDAEL_context *ctx,
+                                unsigned char *out,
+                                const unsigned char *in)
+{
+  const block bige_const = asm_load_be_const();
+  const u128_t *rk = (u128_t *)&ctx->keyschdec;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block b;
+
+  b = VEC_LOAD_BE (in, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+
+  AES_DECRYPT (b, rounds);
+  VEC_STORE_BE (out, 0, b, bige_const);
+
+  return 0; /* does not use stack */
+}
+
+
+void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES_ALL;
+  block rkeylast_orig;
+  block iv;
+
+  iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS_ALL (rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 2; nblocks -= 2)
+    {
+      block in2, iv1;
+
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+      in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+      in += 2;
+
+      AES_ENCRYPT_ALL (iv, rounds);
+
+      iv1 = iv;
+      rkeylast = rkeylast_orig ^ in2;
+
+      AES_ENCRYPT_ALL (iv, rounds);
+
+      VEC_STORE_BE (out++, 0, iv1, bige_const);
+      VEC_STORE_BE (out++, 0, iv, bige_const);
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+      AES_ENCRYPT_ALL (iv, rounds);
+
+      VEC_STORE_BE (out++, 0, iv, bige_const);
+    }
+
+  VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block iv, b, bin;
+  block in0, in1, in2, in3, in4, in5, in6, in7;
+  block b0, b1, b2, b3, b4, b5, b6, b7;
+  block rkey;
+
+  iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      in0 = iv;
+      in1 = VEC_LOAD_BE_NOSWAP (in, 0);
+      in2 = VEC_LOAD_BE_NOSWAP (in, 1);
+      in3 = VEC_LOAD_BE_NOSWAP (in, 2);
+      in4 = VEC_LOAD_BE_NOSWAP (in, 3);
+      in1 = VEC_BE_SWAP (in1, bige_const);
+      in2 = VEC_BE_SWAP (in2, bige_const);
+      in5 = VEC_LOAD_BE_NOSWAP (in, 4);
+      in6 = VEC_LOAD_BE_NOSWAP (in, 5);
+      in3 = VEC_BE_SWAP (in3, bige_const);
+      in4 = VEC_BE_SWAP (in4, bige_const);
+      in7 = VEC_LOAD_BE_NOSWAP (in, 6);
+      iv = VEC_LOAD_BE_NOSWAP (in, 7);
+      in += 8;
+      in5 = VEC_BE_SWAP (in5, bige_const);
+      in6 = VEC_BE_SWAP (in6, bige_const);
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      in7 = VEC_BE_SWAP (in7, bige_const);
+      iv = VEC_BE_SWAP (iv, bige_const);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+      b4 = asm_xor (rkey0, in4);
+      b5 = asm_xor (rkey0, in5);
+      b6 = asm_xor (rkey0, in6);
+      b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey); \
+             b4 = asm_cipher_be (b4, rkey); \
+             b5 = asm_cipher_be (b5, rkey); \
+             b6 = asm_cipher_be (b6, rkey); \
+             b7 = asm_cipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+       {
+         DO_ROUND(10);
+         DO_ROUND(11);
+         if (rounds > 12)
+           {
+             DO_ROUND(12);
+             DO_ROUND(13);
+           }
+       }
+
+#undef DO_ROUND
+
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+      in3 = asm_xor (rkeylast, in3);
+      in4 = asm_xor (rkeylast, in4);
+      b0 = asm_cipherlast_be (b0, in1);
+      b1 = asm_cipherlast_be (b1, in2);
+      in5 = asm_xor (rkeylast, in5);
+      in6 = asm_xor (rkeylast, in6);
+      b2 = asm_cipherlast_be (b2, in3);
+      b3 = asm_cipherlast_be (b3, in4);
+      in7 = asm_xor (rkeylast, in7);
+      in0 = asm_xor (rkeylast, iv);
+      b0 = VEC_BE_SWAP (b0, bige_const);
+      b1 = VEC_BE_SWAP (b1, bige_const);
+      b4 = asm_cipherlast_be (b4, in5);
+      b5 = asm_cipherlast_be (b5, in6);
+      b2 = VEC_BE_SWAP (b2, bige_const);
+      b3 = VEC_BE_SWAP (b3, bige_const);
+      b6 = asm_cipherlast_be (b6, in7);
+      b7 = asm_cipherlast_be (b7, in0);
+      b4 = VEC_BE_SWAP (b4, bige_const);
+      b5 = VEC_BE_SWAP (b5, bige_const);
+      b6 = VEC_BE_SWAP (b6, bige_const);
+      b7 = VEC_BE_SWAP (b7, bige_const);
+      VEC_STORE_BE_NOSWAP (out, 0, b0);
+      VEC_STORE_BE_NOSWAP (out, 1, b1);
+      VEC_STORE_BE_NOSWAP (out, 2, b2);
+      VEC_STORE_BE_NOSWAP (out, 3, b3);
+      VEC_STORE_BE_NOSWAP (out, 4, b4);
+      VEC_STORE_BE_NOSWAP (out, 5, b5);
+      VEC_STORE_BE_NOSWAP (out, 6, b6);
+      VEC_STORE_BE_NOSWAP (out, 7, b7);
+      out += 8;
+    }
+
+  if (nblocks >= 4)
+    {
+      in0 = iv;
+      in1 = VEC_LOAD_BE (in, 0, bige_const);
+      in2 = VEC_LOAD_BE (in, 1, bige_const);
+      in3 = VEC_LOAD_BE (in, 2, bige_const);
+      iv = VEC_LOAD_BE (in, 3, bige_const);
+
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+       {
+         DO_ROUND(10);
+         DO_ROUND(11);
+         if (rounds > 12)
+           {
+             DO_ROUND(12);
+             DO_ROUND(13);
+           }
+       }
+
+#undef DO_ROUND
+
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+      in3 = asm_xor (rkeylast, in3);
+      in0 = asm_xor (rkeylast, iv);
+      b0 = asm_cipherlast_be (b0, in1);
+      b1 = asm_cipherlast_be (b1, in2);
+      b2 = asm_cipherlast_be (b2, in3);
+      b3 = asm_cipherlast_be (b3, in0);
+      VEC_STORE_BE (out, 0, b0, bige_const);
+      VEC_STORE_BE (out, 1, b1, bige_const);
+      VEC_STORE_BE (out, 2, b2, bige_const);
+      VEC_STORE_BE (out, 3, b3, bige_const);
+
+      in += 4;
+      out += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      bin = VEC_LOAD_BE (in, 0, bige_const);
+      rkeylast = rkeylast_orig ^ bin;
+      b = iv;
+      iv = bin;
+
+      AES_ENCRYPT (b, rounds);
+
+      VEC_STORE_BE (out, 0, b, bige_const);
+
+      out++;
+      in++;
+    }
+
+  VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks, int cbc_mac)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  byte *out = (byte *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES_ALL;
+  block lastiv, b;
+  unsigned int outadd = -(!cbc_mac) & 16;
+
+  lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS_ALL (rounds);
+
+  for (; nblocks >= 2; nblocks -= 2)
+    {
+      block in2, lastiv1;
+
+      b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const);
+      in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
+      in += 2;
+
+      AES_ENCRYPT_ALL (b, rounds);
+
+      lastiv1 = b;
+      b = lastiv1 ^ in2;
+
+      AES_ENCRYPT_ALL (b, rounds);
+
+      lastiv = b;
+      VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const);
+      out += outadd;
+      VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const);
+      out += outadd;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+      AES_ENCRYPT_ALL (b, rounds);
+
+      lastiv = b;
+      VEC_STORE_BE ((u128_t *)out, 0, b, bige_const);
+      out += outadd;
+    }
+
+  VEC_STORE_BE (iv_arg, 0, lastiv, bige_const);
+}
+
+void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschdec;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block in0, in1, in2, in3, in4, in5, in6, in7;
+  block b0, b1, b2, b3, b4, b5, b6, b7;
+  block rkey;
+  block iv, b;
+
+  if (!ctx->decryption_prepared)
+    {
+      internal_aes_ppc_prepare_decryption (ctx);
+      ctx->decryption_prepared = 1;
+    }
+
+  iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+  rkeylast_orig = rkeylast;
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+      in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+      in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+      in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+      in0 = VEC_BE_SWAP (in0, bige_const);
+      in1 = VEC_BE_SWAP (in1, bige_const);
+      in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+      in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+      in2 = VEC_BE_SWAP (in2, bige_const);
+      in3 = VEC_BE_SWAP (in3, bige_const);
+      in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+      in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+      in += 8;
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      in4 = VEC_BE_SWAP (in4, bige_const);
+      in5 = VEC_BE_SWAP (in5, bige_const);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+      in6 = VEC_BE_SWAP (in6, bige_const);
+      in7 = VEC_BE_SWAP (in7, bige_const);
+      b4 = asm_xor (rkey0, in4);
+      b5 = asm_xor (rkey0, in5);
+      b6 = asm_xor (rkey0, in6);
+      b7 = asm_xor (rkey0, in7);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_ncipher_be (b0, rkey); \
+             b1 = asm_ncipher_be (b1, rkey); \
+             b2 = asm_ncipher_be (b2, rkey); \
+             b3 = asm_ncipher_be (b3, rkey); \
+             b4 = asm_ncipher_be (b4, rkey); \
+             b5 = asm_ncipher_be (b5, rkey); \
+             b6 = asm_ncipher_be (b6, rkey); \
+             b7 = asm_ncipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+       {
+         DO_ROUND(10);
+         DO_ROUND(11);
+         if (rounds > 12)
+           {
+             DO_ROUND(12);
+             DO_ROUND(13);
+           }
+       }
+
+#undef DO_ROUND
+
+      iv = asm_xor (rkeylast, iv);
+      in0 = asm_xor (rkeylast, in0);
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+      b0 = asm_ncipherlast_be (b0, iv);
+      iv = in7;
+      b1 = asm_ncipherlast_be (b1, in0);
+      in3 = asm_xor (rkeylast, in3);
+      in4 = asm_xor (rkeylast, in4);
+      b2 = asm_ncipherlast_be (b2, in1);
+      b3 = asm_ncipherlast_be (b3, in2);
+      in5 = asm_xor (rkeylast, in5);
+      in6 = asm_xor (rkeylast, in6);
+      b0 = VEC_BE_SWAP (b0, bige_const);
+      b1 = VEC_BE_SWAP (b1, bige_const);
+      b4 = asm_ncipherlast_be (b4, in3);
+      b5 = asm_ncipherlast_be (b5, in4);
+      b2 = VEC_BE_SWAP (b2, bige_const);
+      b3 = VEC_BE_SWAP (b3, bige_const);
+      b6 = asm_ncipherlast_be (b6, in5);
+      b7 = asm_ncipherlast_be (b7, in6);
+      b4 = VEC_BE_SWAP (b4, bige_const);
+      b5 = VEC_BE_SWAP (b5, bige_const);
+      b6 = VEC_BE_SWAP (b6, bige_const);
+      b7 = VEC_BE_SWAP (b7, bige_const);
+      VEC_STORE_BE_NOSWAP (out, 0, b0);
+      VEC_STORE_BE_NOSWAP (out, 1, b1);
+      VEC_STORE_BE_NOSWAP (out, 2, b2);
+      VEC_STORE_BE_NOSWAP (out, 3, b3);
+      VEC_STORE_BE_NOSWAP (out, 4, b4);
+      VEC_STORE_BE_NOSWAP (out, 5, b5);
+      VEC_STORE_BE_NOSWAP (out, 6, b6);
+      VEC_STORE_BE_NOSWAP (out, 7, b7);
+      out += 8;
+    }
+
+  if (nblocks >= 4)
+    {
+      in0 = VEC_LOAD_BE (in, 0, bige_const);
+      in1 = VEC_LOAD_BE (in, 1, bige_const);
+      in2 = VEC_LOAD_BE (in, 2, bige_const);
+      in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+      b0 = asm_xor (rkey0, in0);
+      b1 = asm_xor (rkey0, in1);
+      b2 = asm_xor (rkey0, in2);
+      b3 = asm_xor (rkey0, in3);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_ncipher_be (b0, rkey); \
+             b1 = asm_ncipher_be (b1, rkey); \
+             b2 = asm_ncipher_be (b2, rkey); \
+             b3 = asm_ncipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+       {
+         DO_ROUND(10);
+         DO_ROUND(11);
+         if (rounds > 12)
+           {
+             DO_ROUND(12);
+             DO_ROUND(13);
+           }
+       }
+
+#undef DO_ROUND
+
+      iv = asm_xor (rkeylast, iv);
+      in0 = asm_xor (rkeylast, in0);
+      in1 = asm_xor (rkeylast, in1);
+      in2 = asm_xor (rkeylast, in2);
+
+      b0 = asm_ncipherlast_be (b0, iv);
+      iv = in3;
+      b1 = asm_ncipherlast_be (b1, in0);
+      b2 = asm_ncipherlast_be (b2, in1);
+      b3 = asm_ncipherlast_be (b3, in2);
+
+      VEC_STORE_BE (out, 0, b0, bige_const);
+      VEC_STORE_BE (out, 1, b1, bige_const);
+      VEC_STORE_BE (out, 2, b2, bige_const);
+      VEC_STORE_BE (out, 3, b3, bige_const);
+
+      in += 4;
+      out += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      rkeylast = rkeylast_orig ^ iv;
+
+      iv = VEC_LOAD_BE (in, 0, bige_const);
+      b = iv;
+      AES_DECRYPT (b, rounds);
+
+      VEC_STORE_BE (out, 0, b, bige_const);
+
+      in++;
+      out++;
+    }
+
+  VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+}
+
+
+void CTR_ENC_FUNC (void *context, unsigned char *ctr_arg,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks)
+{
+  static const unsigned char vec_one_const[16] =
+    { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 };
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  ROUND_KEY_VARIABLES;
+  block rkeylast_orig;
+  block ctr, b, one;
+
+  ctr = VEC_LOAD_BE (ctr_arg, 0, bige_const);
+  one = VEC_LOAD_BE (&vec_one_const, 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+  rkeylast_orig = rkeylast;
+
+  if (nblocks >= 4)
+    {
+      block in0, in1, in2, in3, in4, in5, in6, in7;
+      block b0, b1, b2, b3, b4, b5, b6, b7;
+      block two, three, four;
+      block rkey;
+
+      two   = asm_add_uint128 (one, one);
+      three = asm_add_uint128 (two, one);
+      four  = asm_add_uint128 (two, two);
+
+      for (; nblocks >= 8; nblocks -= 8)
+       {
+         b1 = asm_add_uint128 (ctr, one);
+         b2 = asm_add_uint128 (ctr, two);
+         b3 = asm_add_uint128 (ctr, three);
+         b4 = asm_add_uint128 (ctr, four);
+         b5 = asm_add_uint128 (b1, four);
+         b6 = asm_add_uint128 (b2, four);
+         b7 = asm_add_uint128 (b3, four);
+         b0 = asm_xor (rkey0, ctr);
+         rkey = ALIGNED_LOAD (rk, 1);
+         ctr = asm_add_uint128 (b4, four);
+         b1 = asm_xor (rkey0, b1);
+         b2 = asm_xor (rkey0, b2);
+         b3 = asm_xor (rkey0, b3);
+         b0 = asm_cipher_be (b0, rkey);
+         b1 = asm_cipher_be (b1, rkey);
+         b2 = asm_cipher_be (b2, rkey);
+         b3 = asm_cipher_be (b3, rkey);
+         b4 = asm_xor (rkey0, b4);
+         b5 = asm_xor (rkey0, b5);
+         b6 = asm_xor (rkey0, b6);
+         b7 = asm_xor (rkey0, b7);
+         b4 = asm_cipher_be (b4, rkey);
+         b5 = asm_cipher_be (b5, rkey);
+         b6 = asm_cipher_be (b6, rkey);
+         b7 = asm_cipher_be (b7, rkey);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey); \
+             b4 = asm_cipher_be (b4, rkey); \
+             b5 = asm_cipher_be (b5, rkey); \
+             b6 = asm_cipher_be (b6, rkey); \
+             b7 = asm_cipher_be (b7, rkey);
+
+         in0 = VEC_LOAD_BE_NOSWAP (in, 0);
+         DO_ROUND(2);
+         in1 = VEC_LOAD_BE_NOSWAP (in, 1);
+         DO_ROUND(3);
+         in2 = VEC_LOAD_BE_NOSWAP (in, 2);
+         DO_ROUND(4);
+         in3 = VEC_LOAD_BE_NOSWAP (in, 3);
+         DO_ROUND(5);
+         in4 = VEC_LOAD_BE_NOSWAP (in, 4);
+         DO_ROUND(6);
+         in5 = VEC_LOAD_BE_NOSWAP (in, 5);
+         DO_ROUND(7);
+         in6 = VEC_LOAD_BE_NOSWAP (in, 6);
+         DO_ROUND(8);
+         in7 = VEC_LOAD_BE_NOSWAP (in, 7);
+         in += 8;
+         DO_ROUND(9);
+
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         in0 = VEC_BE_SWAP (in0, bige_const);
+         in1 = VEC_BE_SWAP (in1, bige_const);
+         in2 = VEC_BE_SWAP (in2, bige_const);
+         in3 = VEC_BE_SWAP (in3, bige_const);
+         in4 = VEC_BE_SWAP (in4, bige_const);
+         in5 = VEC_BE_SWAP (in5, bige_const);
+         in6 = VEC_BE_SWAP (in6, bige_const);
+         in7 = VEC_BE_SWAP (in7, bige_const);
+
+         in0 = asm_xor (rkeylast, in0);
+         in1 = asm_xor (rkeylast, in1);
+         in2 = asm_xor (rkeylast, in2);
+         in3 = asm_xor (rkeylast, in3);
+         b0 = asm_cipherlast_be (b0, in0);
+         b1 = asm_cipherlast_be (b1, in1);
+         in4 = asm_xor (rkeylast, in4);
+         in5 = asm_xor (rkeylast, in5);
+         b2 = asm_cipherlast_be (b2, in2);
+         b3 = asm_cipherlast_be (b3, in3);
+         in6 = asm_xor (rkeylast, in6);
+         in7 = asm_xor (rkeylast, in7);
+         b4 = asm_cipherlast_be (b4, in4);
+         b5 = asm_cipherlast_be (b5, in5);
+         b6 = asm_cipherlast_be (b6, in6);
+         b7 = asm_cipherlast_be (b7, in7);
+
+         b0 = VEC_BE_SWAP (b0, bige_const);
+         b1 = VEC_BE_SWAP (b1, bige_const);
+         b2 = VEC_BE_SWAP (b2, bige_const);
+         b3 = VEC_BE_SWAP (b3, bige_const);
+         b4 = VEC_BE_SWAP (b4, bige_const);
+         b5 = VEC_BE_SWAP (b5, bige_const);
+         b6 = VEC_BE_SWAP (b6, bige_const);
+         b7 = VEC_BE_SWAP (b7, bige_const);
+         VEC_STORE_BE_NOSWAP (out, 0, b0);
+         VEC_STORE_BE_NOSWAP (out, 1, b1);
+         VEC_STORE_BE_NOSWAP (out, 2, b2);
+         VEC_STORE_BE_NOSWAP (out, 3, b3);
+         VEC_STORE_BE_NOSWAP (out, 4, b4);
+         VEC_STORE_BE_NOSWAP (out, 5, b5);
+         VEC_STORE_BE_NOSWAP (out, 6, b6);
+         VEC_STORE_BE_NOSWAP (out, 7, b7);
+         out += 8;
+       }
+
+      if (nblocks >= 4)
+       {
+         b1 = asm_add_uint128 (ctr, one);
+         b2 = asm_add_uint128 (ctr, two);
+         b3 = asm_add_uint128 (ctr, three);
+         b0 = asm_xor (rkey0, ctr);
+         ctr = asm_add_uint128 (ctr, four);
+         b1 = asm_xor (rkey0, b1);
+         b2 = asm_xor (rkey0, b2);
+         b3 = asm_xor (rkey0, b3);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+         DO_ROUND(8);
+
+         in0 = VEC_LOAD_BE (in, 0, bige_const);
+         in1 = VEC_LOAD_BE (in, 1, bige_const);
+         in2 = VEC_LOAD_BE (in, 2, bige_const);
+         in3 = VEC_LOAD_BE (in, 3, bige_const);
+
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         in0 = asm_xor (rkeylast, in0);
+         in1 = asm_xor (rkeylast, in1);
+         in2 = asm_xor (rkeylast, in2);
+         in3 = asm_xor (rkeylast, in3);
+
+         b0 = asm_cipherlast_be (b0, in0);
+         b1 = asm_cipherlast_be (b1, in1);
+         b2 = asm_cipherlast_be (b2, in2);
+         b3 = asm_cipherlast_be (b3, in3);
+
+         VEC_STORE_BE (out, 0, b0, bige_const);
+         VEC_STORE_BE (out, 1, b1, bige_const);
+         VEC_STORE_BE (out, 2, b2, bige_const);
+         VEC_STORE_BE (out, 3, b3, bige_const);
+
+         in += 4;
+         out += 4;
+         nblocks -= 4;
+       }
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      b = ctr;
+      ctr = asm_add_uint128 (ctr, one);
+      rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
+
+      AES_ENCRYPT (b, rounds);
+
+      VEC_STORE_BE (out, 0, b, bige_const);
+
+      out++;
+      in++;
+    }
+
+  VEC_STORE_BE (ctr_arg, 0, ctr, bige_const);
+}
+
+
+size_t OCB_CRYPT_FUNC (gcry_cipher_hd_t c, void *outbuf_arg,
+                      const void *inbuf_arg, size_t nblocks,
+                      int encrypt)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  u64 data_nblocks = c->u_mode.ocb.data_nblocks;
+  block l0, l1, l2, l;
+  block b0, b1, b2, b3, b4, b5, b6, b7, b;
+  block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+  block rkey, rkeylf;
+  block ctr, iv;
+  ROUND_KEY_VARIABLES;
+
+  iv = VEC_LOAD_BE (c->u_iv.iv, 0, bige_const);
+  ctr = VEC_LOAD_BE (c->u_ctr.ctr, 0, bige_const);
+
+  l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+  l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+  l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+  if (encrypt)
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+       {
+         l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+         b = VEC_LOAD_BE (in, 0, bige_const);
+
+         /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+         iv ^= l;
+         /* Checksum_i = Checksum_{i-1} xor P_i  */
+         ctr ^= b;
+         /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+         b ^= iv;
+         AES_ENCRYPT (b, rounds);
+         b ^= iv;
+
+         VEC_STORE_BE (out, 0, b, bige_const);
+
+         in += 1;
+         out += 1;
+       }
+
+      for (; nblocks >= 8; nblocks -= 8)
+       {
+         b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+         b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+         b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+         b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+         b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+         b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+         b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+         b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+         in += 8;
+         l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+         b0 = VEC_BE_SWAP(b0, bige_const);
+         b1 = VEC_BE_SWAP(b1, bige_const);
+         b2 = VEC_BE_SWAP(b2, bige_const);
+         b3 = VEC_BE_SWAP(b3, bige_const);
+         b4 = VEC_BE_SWAP(b4, bige_const);
+         b5 = VEC_BE_SWAP(b5, bige_const);
+         b6 = VEC_BE_SWAP(b6, bige_const);
+         b7 = VEC_BE_SWAP(b7, bige_const);
+         l = VEC_BE_SWAP(l, bige_const);
+
+         ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+         iv ^= rkey0;
+
+         iv0 = iv ^ l0;
+         iv1 = iv ^ l0 ^ l1;
+         iv2 = iv ^ l1;
+         iv3 = iv ^ l1 ^ l2;
+         iv4 = iv ^ l1 ^ l2 ^ l0;
+         iv5 = iv ^ l2 ^ l0;
+         iv6 = iv ^ l2;
+         iv7 = iv ^ l2 ^ l;
+
+         b0 ^= iv0;
+         b1 ^= iv1;
+         b2 ^= iv2;
+         b3 ^= iv3;
+         b4 ^= iv4;
+         b5 ^= iv5;
+         b6 ^= iv6;
+         b7 ^= iv7;
+         iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey); \
+             b4 = asm_cipher_be (b4, rkey); \
+             b5 = asm_cipher_be (b5, rkey); \
+             b6 = asm_cipher_be (b6, rkey); \
+             b7 = asm_cipher_be (b7, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+
+         rkeylf = asm_xor (rkeylast, rkey0);
+
+         DO_ROUND(8);
+
+         iv0 = asm_xor (rkeylf, iv0);
+         iv1 = asm_xor (rkeylf, iv1);
+         iv2 = asm_xor (rkeylf, iv2);
+         iv3 = asm_xor (rkeylf, iv3);
+         iv4 = asm_xor (rkeylf, iv4);
+         iv5 = asm_xor (rkeylf, iv5);
+         iv6 = asm_xor (rkeylf, iv6);
+         iv7 = asm_xor (rkeylf, iv7);
+
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         b0 = asm_cipherlast_be (b0, iv0);
+         b1 = asm_cipherlast_be (b1, iv1);
+         b2 = asm_cipherlast_be (b2, iv2);
+         b3 = asm_cipherlast_be (b3, iv3);
+         b4 = asm_cipherlast_be (b4, iv4);
+         b5 = asm_cipherlast_be (b5, iv5);
+         b6 = asm_cipherlast_be (b6, iv6);
+         b7 = asm_cipherlast_be (b7, iv7);
+
+         b0 = VEC_BE_SWAP (b0, bige_const);
+         b1 = VEC_BE_SWAP (b1, bige_const);
+         b2 = VEC_BE_SWAP (b2, bige_const);
+         b3 = VEC_BE_SWAP (b3, bige_const);
+         b4 = VEC_BE_SWAP (b4, bige_const);
+         b5 = VEC_BE_SWAP (b5, bige_const);
+         b6 = VEC_BE_SWAP (b6, bige_const);
+         b7 = VEC_BE_SWAP (b7, bige_const);
+         VEC_STORE_BE_NOSWAP (out, 0, b0);
+         VEC_STORE_BE_NOSWAP (out, 1, b1);
+         VEC_STORE_BE_NOSWAP (out, 2, b2);
+         VEC_STORE_BE_NOSWAP (out, 3, b3);
+         VEC_STORE_BE_NOSWAP (out, 4, b4);
+         VEC_STORE_BE_NOSWAP (out, 5, b5);
+         VEC_STORE_BE_NOSWAP (out, 6, b6);
+         VEC_STORE_BE_NOSWAP (out, 7, b7);
+         out += 8;
+       }
+
+      if (nblocks >= 4 && (data_nblocks % 4) == 0)
+       {
+         b0 = VEC_LOAD_BE (in, 0, bige_const);
+         b1 = VEC_LOAD_BE (in, 1, bige_const);
+         b2 = VEC_LOAD_BE (in, 2, bige_const);
+         b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+         l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+         ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+         iv ^= rkey0;
+
+         iv0 = iv ^ l0;
+         iv1 = iv ^ l0 ^ l1;
+         iv2 = iv ^ l1;
+         iv3 = iv ^ l1 ^ l;
+
+         b0 ^= iv0;
+         b1 ^= iv1;
+         b2 ^= iv2;
+         b3 ^= iv3;
+         iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+         DO_ROUND(8);
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         rkey = rkeylast ^ rkey0;
+         b0 = asm_cipherlast_be (b0, rkey ^ iv0);
+         b1 = asm_cipherlast_be (b1, rkey ^ iv1);
+         b2 = asm_cipherlast_be (b2, rkey ^ iv2);
+         b3 = asm_cipherlast_be (b3, rkey ^ iv3);
+
+         VEC_STORE_BE (out, 0, b0, bige_const);
+         VEC_STORE_BE (out, 1, b1, bige_const);
+         VEC_STORE_BE (out, 2, b2, bige_const);
+         VEC_STORE_BE (out, 3, b3, bige_const);
+
+         in += 4;
+         out += 4;
+         nblocks -= 4;
+       }
+
+      for (; nblocks; nblocks--)
+       {
+         l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+         b = VEC_LOAD_BE (in, 0, bige_const);
+
+         /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+         iv ^= l;
+         /* Checksum_i = Checksum_{i-1} xor P_i  */
+         ctr ^= b;
+         /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+         b ^= iv;
+         AES_ENCRYPT (b, rounds);
+         b ^= iv;
+
+         VEC_STORE_BE (out, 0, b, bige_const);
+
+         in += 1;
+         out += 1;
+       }
+    }
+  else
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+      if (!ctx->decryption_prepared)
+       {
+         internal_aes_ppc_prepare_decryption (ctx);
+         ctx->decryption_prepared = 1;
+       }
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+       {
+         l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+         b = VEC_LOAD_BE (in, 0, bige_const);
+
+         /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+         iv ^= l;
+         /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+         b ^= iv;
+         AES_DECRYPT (b, rounds);
+         b ^= iv;
+         /* Checksum_i = Checksum_{i-1} xor P_i  */
+         ctr ^= b;
+
+         VEC_STORE_BE (out, 0, b, bige_const);
+
+         in += 1;
+         out += 1;
+       }
+
+      for (; nblocks >= 8; nblocks -= 8)
+       {
+         b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+         b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+         b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+         b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+         b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+         b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+         b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+         b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+         in += 8;
+         l = VEC_LOAD_BE_NOSWAP (ocb_get_l (c, data_nblocks += 8), 0);
+         b0 = VEC_BE_SWAP(b0, bige_const);
+         b1 = VEC_BE_SWAP(b1, bige_const);
+         b2 = VEC_BE_SWAP(b2, bige_const);
+         b3 = VEC_BE_SWAP(b3, bige_const);
+         b4 = VEC_BE_SWAP(b4, bige_const);
+         b5 = VEC_BE_SWAP(b5, bige_const);
+         b6 = VEC_BE_SWAP(b6, bige_const);
+         b7 = VEC_BE_SWAP(b7, bige_const);
+         l = VEC_BE_SWAP(l, bige_const);
+
+         iv ^= rkey0;
+
+         iv0 = iv ^ l0;
+         iv1 = iv ^ l0 ^ l1;
+         iv2 = iv ^ l1;
+         iv3 = iv ^ l1 ^ l2;
+         iv4 = iv ^ l1 ^ l2 ^ l0;
+         iv5 = iv ^ l2 ^ l0;
+         iv6 = iv ^ l2;
+         iv7 = iv ^ l2 ^ l;
+
+         b0 ^= iv0;
+         b1 ^= iv1;
+         b2 ^= iv2;
+         b3 ^= iv3;
+         b4 ^= iv4;
+         b5 ^= iv5;
+         b6 ^= iv6;
+         b7 ^= iv7;
+         iv = iv7 ^ rkey0;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_ncipher_be (b0, rkey); \
+             b1 = asm_ncipher_be (b1, rkey); \
+             b2 = asm_ncipher_be (b2, rkey); \
+             b3 = asm_ncipher_be (b3, rkey); \
+             b4 = asm_ncipher_be (b4, rkey); \
+             b5 = asm_ncipher_be (b5, rkey); \
+             b6 = asm_ncipher_be (b6, rkey); \
+             b7 = asm_ncipher_be (b7, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+
+         rkeylf = asm_xor (rkeylast, rkey0);
+
+         DO_ROUND(8);
+
+         iv0 = asm_xor (rkeylf, iv0);
+         iv1 = asm_xor (rkeylf, iv1);
+         iv2 = asm_xor (rkeylf, iv2);
+         iv3 = asm_xor (rkeylf, iv3);
+         iv4 = asm_xor (rkeylf, iv4);
+         iv5 = asm_xor (rkeylf, iv5);
+         iv6 = asm_xor (rkeylf, iv6);
+         iv7 = asm_xor (rkeylf, iv7);
+
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         b0 = asm_ncipherlast_be (b0, iv0);
+         b1 = asm_ncipherlast_be (b1, iv1);
+         b2 = asm_ncipherlast_be (b2, iv2);
+         b3 = asm_ncipherlast_be (b3, iv3);
+         b4 = asm_ncipherlast_be (b4, iv4);
+         b5 = asm_ncipherlast_be (b5, iv5);
+         b6 = asm_ncipherlast_be (b6, iv6);
+         b7 = asm_ncipherlast_be (b7, iv7);
+
+         ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+         b0 = VEC_BE_SWAP (b0, bige_const);
+         b1 = VEC_BE_SWAP (b1, bige_const);
+         b2 = VEC_BE_SWAP (b2, bige_const);
+         b3 = VEC_BE_SWAP (b3, bige_const);
+         b4 = VEC_BE_SWAP (b4, bige_const);
+         b5 = VEC_BE_SWAP (b5, bige_const);
+         b6 = VEC_BE_SWAP (b6, bige_const);
+         b7 = VEC_BE_SWAP (b7, bige_const);
+         VEC_STORE_BE_NOSWAP (out, 0, b0);
+         VEC_STORE_BE_NOSWAP (out, 1, b1);
+         VEC_STORE_BE_NOSWAP (out, 2, b2);
+         VEC_STORE_BE_NOSWAP (out, 3, b3);
+         VEC_STORE_BE_NOSWAP (out, 4, b4);
+         VEC_STORE_BE_NOSWAP (out, 5, b5);
+         VEC_STORE_BE_NOSWAP (out, 6, b6);
+         VEC_STORE_BE_NOSWAP (out, 7, b7);
+         out += 8;
+       }
+
+      if (nblocks >= 4 && (data_nblocks % 4) == 0)
+       {
+         b0 = VEC_LOAD_BE (in, 0, bige_const);
+         b1 = VEC_LOAD_BE (in, 1, bige_const);
+         b2 = VEC_LOAD_BE (in, 2, bige_const);
+         b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+         l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+         iv ^= rkey0;
+
+         iv0 = iv ^ l0;
+         iv1 = iv ^ l0 ^ l1;
+         iv2 = iv ^ l1;
+         iv3 = iv ^ l1 ^ l;
+
+         b0 ^= iv0;
+         b1 ^= iv1;
+         b2 ^= iv2;
+         b3 ^= iv3;
+         iv = iv3 ^ rkey0;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_ncipher_be (b0, rkey); \
+             b1 = asm_ncipher_be (b1, rkey); \
+             b2 = asm_ncipher_be (b2, rkey); \
+             b3 = asm_ncipher_be (b3, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+         DO_ROUND(8);
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         rkey = rkeylast ^ rkey0;
+         b0 = asm_ncipherlast_be (b0, rkey ^ iv0);
+         b1 = asm_ncipherlast_be (b1, rkey ^ iv1);
+         b2 = asm_ncipherlast_be (b2, rkey ^ iv2);
+         b3 = asm_ncipherlast_be (b3, rkey ^ iv3);
+
+         VEC_STORE_BE (out, 0, b0, bige_const);
+         VEC_STORE_BE (out, 1, b1, bige_const);
+         VEC_STORE_BE (out, 2, b2, bige_const);
+         VEC_STORE_BE (out, 3, b3, bige_const);
+
+         ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+         in += 4;
+         out += 4;
+         nblocks -= 4;
+       }
+
+      for (; nblocks; nblocks--)
+       {
+         l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+         b = VEC_LOAD_BE (in, 0, bige_const);
+
+         /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+         iv ^= l;
+         /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+         b ^= iv;
+         AES_DECRYPT (b, rounds);
+         b ^= iv;
+         /* Checksum_i = Checksum_{i-1} xor P_i  */
+         ctr ^= b;
+
+         VEC_STORE_BE (out, 0, b, bige_const);
+
+         in += 1;
+         out += 1;
+       }
+    }
+
+  VEC_STORE_BE (c->u_iv.iv, 0, iv, bige_const);
+  VEC_STORE_BE (c->u_ctr.ctr, 0, ctr, bige_const);
+  c->u_mode.ocb.data_nblocks = data_nblocks;
+
+  return 0;
+}
+
+size_t OCB_AUTH_FUNC (gcry_cipher_hd_t c, void *abuf_arg, size_t nblocks)
+{
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const u128_t *rk = (u128_t *)&ctx->keyschenc;
+  const u128_t *abuf = (const u128_t *)abuf_arg;
+  int rounds = ctx->rounds;
+  u64 data_nblocks = c->u_mode.ocb.aad_nblocks;
+  block l0, l1, l2, l;
+  block b0, b1, b2, b3, b4, b5, b6, b7, b;
+  block iv0, iv1, iv2, iv3, iv4, iv5, iv6, iv7;
+  block rkey, frkey;
+  block ctr, iv;
+  ROUND_KEY_VARIABLES;
+
+  iv = VEC_LOAD_BE (c->u_mode.ocb.aad_offset, 0, bige_const);
+  ctr = VEC_LOAD_BE (c->u_mode.ocb.aad_sum, 0, bige_const);
+
+  l0 = VEC_LOAD_BE (c->u_mode.ocb.L[0], 0, bige_const);
+  l1 = VEC_LOAD_BE (c->u_mode.ocb.L[1], 0, bige_const);
+  l2 = VEC_LOAD_BE (c->u_mode.ocb.L[2], 0, bige_const);
+
+  PRELOAD_ROUND_KEYS (rounds);
+
+  for (; nblocks >= 8 && data_nblocks % 8; nblocks--)
+    {
+      l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+      b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      iv ^= l;
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      b ^= iv;
+      AES_ENCRYPT (b, rounds);
+      ctr ^= b;
+
+      abuf += 1;
+    }
+
+  for (; nblocks >= 8; nblocks -= 8)
+    {
+      b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+      b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+      b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+      b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+      b4 = VEC_LOAD_BE (abuf, 4, bige_const);
+      b5 = VEC_LOAD_BE (abuf, 5, bige_const);
+      b6 = VEC_LOAD_BE (abuf, 6, bige_const);
+      b7 = VEC_LOAD_BE (abuf, 7, bige_const);
+
+      l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 8), 0, bige_const);
+
+      frkey = rkey0;
+      iv ^= frkey;
+
+      iv0 = iv ^ l0;
+      iv1 = iv ^ l0 ^ l1;
+      iv2 = iv ^ l1;
+      iv3 = iv ^ l1 ^ l2;
+      iv4 = iv ^ l1 ^ l2 ^ l0;
+      iv5 = iv ^ l2 ^ l0;
+      iv6 = iv ^ l2;
+      iv7 = iv ^ l2 ^ l;
+
+      b0 ^= iv0;
+      b1 ^= iv1;
+      b2 ^= iv2;
+      b3 ^= iv3;
+      b4 ^= iv4;
+      b5 ^= iv5;
+      b6 ^= iv6;
+      b7 ^= iv7;
+      iv = iv7 ^ frkey;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey); \
+             b4 = asm_cipher_be (b4, rkey); \
+             b5 = asm_cipher_be (b5, rkey); \
+             b6 = asm_cipher_be (b6, rkey); \
+             b7 = asm_cipher_be (b7, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+       {
+         DO_ROUND(10);
+         DO_ROUND(11);
+         if (rounds > 12)
+           {
+             DO_ROUND(12);
+             DO_ROUND(13);
+           }
+       }
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = asm_cipherlast_be (b0, rkey);
+      b1 = asm_cipherlast_be (b1, rkey);
+      b2 = asm_cipherlast_be (b2, rkey);
+      b3 = asm_cipherlast_be (b3, rkey);
+      b4 = asm_cipherlast_be (b4, rkey);
+      b5 = asm_cipherlast_be (b5, rkey);
+      b6 = asm_cipherlast_be (b6, rkey);
+      b7 = asm_cipherlast_be (b7, rkey);
+
+      ctr ^= b0 ^ b1 ^ b2 ^ b3 ^ b4 ^ b5 ^ b6 ^ b7;
+
+      abuf += 8;
+    }
+
+  if (nblocks >= 4 && (data_nblocks % 4) == 0)
+    {
+      b0 = VEC_LOAD_BE (abuf, 0, bige_const);
+      b1 = VEC_LOAD_BE (abuf, 1, bige_const);
+      b2 = VEC_LOAD_BE (abuf, 2, bige_const);
+      b3 = VEC_LOAD_BE (abuf, 3, bige_const);
+
+      l = VEC_LOAD_BE (ocb_get_l (c, data_nblocks += 4), 0, bige_const);
+
+      frkey = rkey0;
+      iv ^= frkey;
+
+      iv0 = iv ^ l0;
+      iv1 = iv ^ l0 ^ l1;
+      iv2 = iv ^ l1;
+      iv3 = iv ^ l1 ^ l;
+
+      b0 ^= iv0;
+      b1 ^= iv1;
+      b2 ^= iv2;
+      b3 ^= iv3;
+      iv = iv3 ^ frkey;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey);
+
+      DO_ROUND(1);
+      DO_ROUND(2);
+      DO_ROUND(3);
+      DO_ROUND(4);
+      DO_ROUND(5);
+      DO_ROUND(6);
+      DO_ROUND(7);
+      DO_ROUND(8);
+      DO_ROUND(9);
+      if (rounds >= 12)
+       {
+         DO_ROUND(10);
+         DO_ROUND(11);
+         if (rounds > 12)
+           {
+             DO_ROUND(12);
+             DO_ROUND(13);
+           }
+       }
+
+#undef DO_ROUND
+
+      rkey = rkeylast;
+      b0 = asm_cipherlast_be (b0, rkey);
+      b1 = asm_cipherlast_be (b1, rkey);
+      b2 = asm_cipherlast_be (b2, rkey);
+      b3 = asm_cipherlast_be (b3, rkey);
+
+      ctr ^= b0 ^ b1 ^ b2 ^ b3;
+
+      abuf += 4;
+      nblocks -= 4;
+    }
+
+  for (; nblocks; nblocks--)
+    {
+      l = VEC_LOAD_BE (ocb_get_l (c, ++data_nblocks), 0, bige_const);
+      b = VEC_LOAD_BE (abuf, 0, bige_const);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      iv ^= l;
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      b ^= iv;
+      AES_ENCRYPT (b, rounds);
+      ctr ^= b;
+
+      abuf += 1;
+    }
+
+  VEC_STORE_BE (c->u_mode.ocb.aad_offset, 0, iv, bige_const);
+  VEC_STORE_BE (c->u_mode.ocb.aad_sum, 0, ctr, bige_const);
+  c->u_mode.ocb.aad_nblocks = data_nblocks;
+
+  return 0;
+}
+
+
+void XTS_CRYPT_FUNC (void *context, unsigned char *tweak_arg,
+                    void *outbuf_arg, const void *inbuf_arg,
+                    size_t nblocks, int encrypt)
+{
+#ifdef WORDS_BIGENDIAN
+  static const block vec_bswap128_const =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+#else
+  static const block vec_bswap128_const =
+    { ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8, ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0 };
+#endif
+  static const unsigned char vec_tweak_const[16] =
+    { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0x87 };
+  static const vector unsigned long long vec_shift63_const =
+    { 63, 63 };
+  const block bige_const = asm_load_be_const();
+  RIJNDAEL_context *ctx = context;
+  const u128_t *in = (const u128_t *)inbuf_arg;
+  u128_t *out = (u128_t *)outbuf_arg;
+  int rounds = ctx->rounds;
+  block tweak;
+  block b0, b1, b2, b3, b4, b5, b6, b7, b, rkey, rkeylf;
+  block tweak0, tweak1, tweak2, tweak3, tweak4, tweak5, tweak6, tweak7;
+  block tweak_const, bswap128_const, shift63_const;
+  ROUND_KEY_VARIABLES;
+
+  tweak_const = VEC_LOAD_BE (&vec_tweak_const, 0, bige_const);
+  bswap128_const = ALIGNED_LOAD (&vec_bswap128_const, 0);
+  shift63_const = ALIGNED_LOAD (&vec_shift63_const, 0);
+
+  tweak = VEC_LOAD_BE (tweak_arg, 0, bige_const);
+  tweak = asm_vperm1 (tweak, bswap128_const);
+
+#define GEN_TWEAK(tout, tin) /* Generate next tweak. */ \
+    do { \
+      block tmp1, tmp2; \
+      tmp1 = asm_swap_uint64_halfs(tin); \
+      tmp2 = asm_add_uint64(tin, tin); \
+      tmp1 = asm_sra_int64(tmp1, shift63_const) & tweak_const; \
+      tout = asm_xor(tmp1, tmp2); \
+    } while (0)
+
+  if (encrypt)
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschenc;
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8; nblocks -= 8)
+       {
+         b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+         b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+         b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+         b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+         tweak0 = tweak;
+         GEN_TWEAK (tweak1, tweak0);
+         tweak0 = asm_vperm1 (tweak0, bswap128_const);
+         b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+         b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+         GEN_TWEAK (tweak2, tweak1);
+         tweak1 = asm_vperm1 (tweak1, bswap128_const);
+         b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+         b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+         in += 8;
+
+         b0 = VEC_BE_SWAP(b0, bige_const);
+         b1 = VEC_BE_SWAP(b1, bige_const);
+         GEN_TWEAK (tweak3, tweak2);
+         tweak2 = asm_vperm1 (tweak2, bswap128_const);
+         GEN_TWEAK (tweak4, tweak3);
+         tweak3 = asm_vperm1 (tweak3, bswap128_const);
+         b2 = VEC_BE_SWAP(b2, bige_const);
+         b3 = VEC_BE_SWAP(b3, bige_const);
+         GEN_TWEAK (tweak5, tweak4);
+         tweak4 = asm_vperm1 (tweak4, bswap128_const);
+         GEN_TWEAK (tweak6, tweak5);
+         tweak5 = asm_vperm1 (tweak5, bswap128_const);
+         b4 = VEC_BE_SWAP(b4, bige_const);
+         b5 = VEC_BE_SWAP(b5, bige_const);
+         GEN_TWEAK (tweak7, tweak6);
+         tweak6 = asm_vperm1 (tweak6, bswap128_const);
+         GEN_TWEAK (tweak, tweak7);
+         tweak7 = asm_vperm1 (tweak7, bswap128_const);
+         b6 = VEC_BE_SWAP(b6, bige_const);
+         b7 = VEC_BE_SWAP(b7, bige_const);
+
+         tweak0 = asm_xor (tweak0, rkey0);
+         tweak1 = asm_xor (tweak1, rkey0);
+         tweak2 = asm_xor (tweak2, rkey0);
+         tweak3 = asm_xor (tweak3, rkey0);
+         tweak4 = asm_xor (tweak4, rkey0);
+         tweak5 = asm_xor (tweak5, rkey0);
+         tweak6 = asm_xor (tweak6, rkey0);
+         tweak7 = asm_xor (tweak7, rkey0);
+
+         b0 = asm_xor (b0, tweak0);
+         b1 = asm_xor (b1, tweak1);
+         b2 = asm_xor (b2, tweak2);
+         b3 = asm_xor (b3, tweak3);
+         b4 = asm_xor (b4, tweak4);
+         b5 = asm_xor (b5, tweak5);
+         b6 = asm_xor (b6, tweak6);
+         b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey); \
+             b4 = asm_cipher_be (b4, rkey); \
+             b5 = asm_cipher_be (b5, rkey); \
+             b6 = asm_cipher_be (b6, rkey); \
+             b7 = asm_cipher_be (b7, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+
+         rkeylf = asm_xor (rkeylast, rkey0);
+
+         DO_ROUND(8);
+
+         tweak0 = asm_xor (tweak0, rkeylf);
+         tweak1 = asm_xor (tweak1, rkeylf);
+         tweak2 = asm_xor (tweak2, rkeylf);
+         tweak3 = asm_xor (tweak3, rkeylf);
+         tweak4 = asm_xor (tweak4, rkeylf);
+         tweak5 = asm_xor (tweak5, rkeylf);
+         tweak6 = asm_xor (tweak6, rkeylf);
+         tweak7 = asm_xor (tweak7, rkeylf);
+
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         b0 = asm_cipherlast_be (b0, tweak0);
+         b1 = asm_cipherlast_be (b1, tweak1);
+         b2 = asm_cipherlast_be (b2, tweak2);
+         b3 = asm_cipherlast_be (b3, tweak3);
+         b0 = VEC_BE_SWAP (b0, bige_const);
+         b1 = VEC_BE_SWAP (b1, bige_const);
+         b4 = asm_cipherlast_be (b4, tweak4);
+         b5 = asm_cipherlast_be (b5, tweak5);
+         b2 = VEC_BE_SWAP (b2, bige_const);
+         b3 = VEC_BE_SWAP (b3, bige_const);
+         b6 = asm_cipherlast_be (b6, tweak6);
+         b7 = asm_cipherlast_be (b7, tweak7);
+         VEC_STORE_BE_NOSWAP (out, 0, b0);
+         VEC_STORE_BE_NOSWAP (out, 1, b1);
+         b4 = VEC_BE_SWAP (b4, bige_const);
+         b5 = VEC_BE_SWAP (b5, bige_const);
+         VEC_STORE_BE_NOSWAP (out, 2, b2);
+         VEC_STORE_BE_NOSWAP (out, 3, b3);
+         b6 = VEC_BE_SWAP (b6, bige_const);
+         b7 = VEC_BE_SWAP (b7, bige_const);
+         VEC_STORE_BE_NOSWAP (out, 4, b4);
+         VEC_STORE_BE_NOSWAP (out, 5, b5);
+         VEC_STORE_BE_NOSWAP (out, 6, b6);
+         VEC_STORE_BE_NOSWAP (out, 7, b7);
+         out += 8;
+       }
+
+      if (nblocks >= 4)
+       {
+         tweak0 = tweak;
+         GEN_TWEAK (tweak1, tweak0);
+         GEN_TWEAK (tweak2, tweak1);
+         GEN_TWEAK (tweak3, tweak2);
+         GEN_TWEAK (tweak, tweak3);
+
+         b0 = VEC_LOAD_BE (in, 0, bige_const);
+         b1 = VEC_LOAD_BE (in, 1, bige_const);
+         b2 = VEC_LOAD_BE (in, 2, bige_const);
+         b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+         tweak0 = asm_vperm1 (tweak0, bswap128_const);
+         tweak1 = asm_vperm1 (tweak1, bswap128_const);
+         tweak2 = asm_vperm1 (tweak2, bswap128_const);
+         tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+         b0 ^= tweak0 ^ rkey0;
+         b1 ^= tweak1 ^ rkey0;
+         b2 ^= tweak2 ^ rkey0;
+         b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_cipher_be (b0, rkey); \
+             b1 = asm_cipher_be (b1, rkey); \
+             b2 = asm_cipher_be (b2, rkey); \
+             b3 = asm_cipher_be (b3, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+         DO_ROUND(8);
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         rkey = rkeylast;
+         b0 = asm_cipherlast_be (b0, rkey ^ tweak0);
+         b1 = asm_cipherlast_be (b1, rkey ^ tweak1);
+         b2 = asm_cipherlast_be (b2, rkey ^ tweak2);
+         b3 = asm_cipherlast_be (b3, rkey ^ tweak3);
+
+         VEC_STORE_BE (out, 0, b0, bige_const);
+         VEC_STORE_BE (out, 1, b1, bige_const);
+         VEC_STORE_BE (out, 2, b2, bige_const);
+         VEC_STORE_BE (out, 3, b3, bige_const);
+
+         in += 4;
+         out += 4;
+         nblocks -= 4;
+       }
+
+      for (; nblocks; nblocks--)
+       {
+         tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+         /* Xor-Encrypt/Decrypt-Xor block. */
+         b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+         /* Generate next tweak. */
+         GEN_TWEAK (tweak, tweak);
+
+         AES_ENCRYPT (b, rounds);
+
+         b ^= tweak0;
+         VEC_STORE_BE (out, 0, b, bige_const);
+
+         in++;
+         out++;
+       }
+    }
+  else
+    {
+      const u128_t *rk = (u128_t *)&ctx->keyschdec;
+
+      if (!ctx->decryption_prepared)
+       {
+         internal_aes_ppc_prepare_decryption (ctx);
+         ctx->decryption_prepared = 1;
+       }
+
+      PRELOAD_ROUND_KEYS (rounds);
+
+      for (; nblocks >= 8; nblocks -= 8)
+       {
+         b0 = VEC_LOAD_BE_NOSWAP (in, 0);
+         b1 = VEC_LOAD_BE_NOSWAP (in, 1);
+         b2 = VEC_LOAD_BE_NOSWAP (in, 2);
+         b3 = VEC_LOAD_BE_NOSWAP (in, 3);
+         tweak0 = tweak;
+         GEN_TWEAK (tweak1, tweak0);
+         tweak0 = asm_vperm1 (tweak0, bswap128_const);
+         b4 = VEC_LOAD_BE_NOSWAP (in, 4);
+         b5 = VEC_LOAD_BE_NOSWAP (in, 5);
+         GEN_TWEAK (tweak2, tweak1);
+         tweak1 = asm_vperm1 (tweak1, bswap128_const);
+         b6 = VEC_LOAD_BE_NOSWAP (in, 6);
+         b7 = VEC_LOAD_BE_NOSWAP (in, 7);
+         in += 8;
+
+         b0 = VEC_BE_SWAP(b0, bige_const);
+         b1 = VEC_BE_SWAP(b1, bige_const);
+         GEN_TWEAK (tweak3, tweak2);
+         tweak2 = asm_vperm1 (tweak2, bswap128_const);
+         GEN_TWEAK (tweak4, tweak3);
+         tweak3 = asm_vperm1 (tweak3, bswap128_const);
+         b2 = VEC_BE_SWAP(b2, bige_const);
+         b3 = VEC_BE_SWAP(b3, bige_const);
+         GEN_TWEAK (tweak5, tweak4);
+         tweak4 = asm_vperm1 (tweak4, bswap128_const);
+         GEN_TWEAK (tweak6, tweak5);
+         tweak5 = asm_vperm1 (tweak5, bswap128_const);
+         b4 = VEC_BE_SWAP(b4, bige_const);
+         b5 = VEC_BE_SWAP(b5, bige_const);
+         GEN_TWEAK (tweak7, tweak6);
+         tweak6 = asm_vperm1 (tweak6, bswap128_const);
+         GEN_TWEAK (tweak, tweak7);
+         tweak7 = asm_vperm1 (tweak7, bswap128_const);
+         b6 = VEC_BE_SWAP(b6, bige_const);
+         b7 = VEC_BE_SWAP(b7, bige_const);
+
+         tweak0 = asm_xor (tweak0, rkey0);
+         tweak1 = asm_xor (tweak1, rkey0);
+         tweak2 = asm_xor (tweak2, rkey0);
+         tweak3 = asm_xor (tweak3, rkey0);
+         tweak4 = asm_xor (tweak4, rkey0);
+         tweak5 = asm_xor (tweak5, rkey0);
+         tweak6 = asm_xor (tweak6, rkey0);
+         tweak7 = asm_xor (tweak7, rkey0);
+
+         b0 = asm_xor (b0, tweak0);
+         b1 = asm_xor (b1, tweak1);
+         b2 = asm_xor (b2, tweak2);
+         b3 = asm_xor (b3, tweak3);
+         b4 = asm_xor (b4, tweak4);
+         b5 = asm_xor (b5, tweak5);
+         b6 = asm_xor (b6, tweak6);
+         b7 = asm_xor (b7, tweak7);
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_ncipher_be (b0, rkey); \
+             b1 = asm_ncipher_be (b1, rkey); \
+             b2 = asm_ncipher_be (b2, rkey); \
+             b3 = asm_ncipher_be (b3, rkey); \
+             b4 = asm_ncipher_be (b4, rkey); \
+             b5 = asm_ncipher_be (b5, rkey); \
+             b6 = asm_ncipher_be (b6, rkey); \
+             b7 = asm_ncipher_be (b7, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+
+         rkeylf = asm_xor (rkeylast, rkey0);
+
+         DO_ROUND(8);
+
+         tweak0 = asm_xor (tweak0, rkeylf);
+         tweak1 = asm_xor (tweak1, rkeylf);
+         tweak2 = asm_xor (tweak2, rkeylf);
+         tweak3 = asm_xor (tweak3, rkeylf);
+         tweak4 = asm_xor (tweak4, rkeylf);
+         tweak5 = asm_xor (tweak5, rkeylf);
+         tweak6 = asm_xor (tweak6, rkeylf);
+         tweak7 = asm_xor (tweak7, rkeylf);
+
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         b0 = asm_ncipherlast_be (b0, tweak0);
+         b1 = asm_ncipherlast_be (b1, tweak1);
+         b2 = asm_ncipherlast_be (b2, tweak2);
+         b3 = asm_ncipherlast_be (b3, tweak3);
+         b0 = VEC_BE_SWAP (b0, bige_const);
+         b1 = VEC_BE_SWAP (b1, bige_const);
+         b4 = asm_ncipherlast_be (b4, tweak4);
+         b5 = asm_ncipherlast_be (b5, tweak5);
+         b2 = VEC_BE_SWAP (b2, bige_const);
+         b3 = VEC_BE_SWAP (b3, bige_const);
+         b6 = asm_ncipherlast_be (b6, tweak6);
+         b7 = asm_ncipherlast_be (b7, tweak7);
+         VEC_STORE_BE_NOSWAP (out, 0, b0);
+         VEC_STORE_BE_NOSWAP (out, 1, b1);
+         b4 = VEC_BE_SWAP (b4, bige_const);
+         b5 = VEC_BE_SWAP (b5, bige_const);
+         VEC_STORE_BE_NOSWAP (out, 2, b2);
+         VEC_STORE_BE_NOSWAP (out, 3, b3);
+         b6 = VEC_BE_SWAP (b6, bige_const);
+         b7 = VEC_BE_SWAP (b7, bige_const);
+         VEC_STORE_BE_NOSWAP (out, 4, b4);
+         VEC_STORE_BE_NOSWAP (out, 5, b5);
+         VEC_STORE_BE_NOSWAP (out, 6, b6);
+         VEC_STORE_BE_NOSWAP (out, 7, b7);
+         out += 8;
+       }
+
+      if (nblocks >= 4)
+       {
+         tweak0 = tweak;
+         GEN_TWEAK (tweak1, tweak0);
+         GEN_TWEAK (tweak2, tweak1);
+         GEN_TWEAK (tweak3, tweak2);
+         GEN_TWEAK (tweak, tweak3);
+
+         b0 = VEC_LOAD_BE (in, 0, bige_const);
+         b1 = VEC_LOAD_BE (in, 1, bige_const);
+         b2 = VEC_LOAD_BE (in, 2, bige_const);
+         b3 = VEC_LOAD_BE (in, 3, bige_const);
+
+         tweak0 = asm_vperm1 (tweak0, bswap128_const);
+         tweak1 = asm_vperm1 (tweak1, bswap128_const);
+         tweak2 = asm_vperm1 (tweak2, bswap128_const);
+         tweak3 = asm_vperm1 (tweak3, bswap128_const);
+
+         b0 ^= tweak0 ^ rkey0;
+         b1 ^= tweak1 ^ rkey0;
+         b2 ^= tweak2 ^ rkey0;
+         b3 ^= tweak3 ^ rkey0;
+
+#define DO_ROUND(r) \
+             rkey = ALIGNED_LOAD (rk, r); \
+             b0 = asm_ncipher_be (b0, rkey); \
+             b1 = asm_ncipher_be (b1, rkey); \
+             b2 = asm_ncipher_be (b2, rkey); \
+             b3 = asm_ncipher_be (b3, rkey);
+
+         DO_ROUND(1);
+         DO_ROUND(2);
+         DO_ROUND(3);
+         DO_ROUND(4);
+         DO_ROUND(5);
+         DO_ROUND(6);
+         DO_ROUND(7);
+         DO_ROUND(8);
+         DO_ROUND(9);
+         if (rounds >= 12)
+           {
+             DO_ROUND(10);
+             DO_ROUND(11);
+             if (rounds > 12)
+               {
+                 DO_ROUND(12);
+                 DO_ROUND(13);
+               }
+           }
+
+#undef DO_ROUND
+
+         rkey = rkeylast;
+         b0 = asm_ncipherlast_be (b0, rkey ^ tweak0);
+         b1 = asm_ncipherlast_be (b1, rkey ^ tweak1);
+         b2 = asm_ncipherlast_be (b2, rkey ^ tweak2);
+         b3 = asm_ncipherlast_be (b3, rkey ^ tweak3);
+
+         VEC_STORE_BE (out, 0, b0, bige_const);
+         VEC_STORE_BE (out, 1, b1, bige_const);
+         VEC_STORE_BE (out, 2, b2, bige_const);
+         VEC_STORE_BE (out, 3, b3, bige_const);
+
+         in += 4;
+         out += 4;
+         nblocks -= 4;
+       }
+
+      for (; nblocks; nblocks--)
+       {
+         tweak0 = asm_vperm1 (tweak, bswap128_const);
+
+         /* Xor-Encrypt/Decrypt-Xor block. */
+         b = VEC_LOAD_BE (in, 0, bige_const) ^ tweak0;
+
+         /* Generate next tweak. */
+         GEN_TWEAK (tweak, tweak);
+
+         AES_DECRYPT (b, rounds);
+
+         b ^= tweak0;
+         VEC_STORE_BE (out, 0, b, bige_const);
+
+         in++;
+         out++;
+       }
+    }
+
+  tweak = asm_vperm1 (tweak, bswap128_const);
+  VEC_STORE_BE (tweak_arg, 0, tweak, bige_const);
+
+#undef GEN_TWEAK
+}
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-ppc.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-ppc.c
new file mode 100644
index 000000000..f5c323611
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-ppc.c
@@ -0,0 +1,259 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO
+
+#include "rijndael-ppc-common.h"
+
+
+#ifdef WORDS_BIGENDIAN
+static const block vec_bswap32_const =
+  { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+#else
+static const block vec_bswap32_const_neg =
+  { ~3, ~2, ~1, ~0, ~7, ~6, ~5, ~4, ~11, ~10, ~9, ~8, ~15, ~14, ~13, ~12 };
+#endif
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_const(void)
+{
+#ifndef WORDS_BIGENDIAN
+  return ALIGNED_LOAD (&vec_bswap32_const_neg, 0);
+#else
+  static const block vec_dummy = { 0 };
+  return vec_dummy;
+#endif
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_be_swap(block vec, block be_bswap_const)
+{
+  (void)be_bswap_const;
+#ifndef WORDS_BIGENDIAN
+  return asm_vperm1 (vec, be_bswap_const);
+#else
+  return vec;
+#endif
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_noswap(unsigned long offset, const void *ptr)
+{
+  block vec;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvw4x %x0,0,%1\n\t"
+                     : "=wa" (vec)
+                     : "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvw4x %x0,%1,%2\n\t"
+                     : "=wa" (vec)
+                     : "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+  /* NOTE: vec needs to be be-swapped using 'asm_be_swap' by caller */
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
+{
+  /* NOTE: vec be-swapped using 'asm_be_swap' by caller */
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("stxvw4x %x0,0,%1\n\t"
+                     :
+                     : "wa" (vec), "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("stxvw4x %x0,%1,%2\n\t"
+                     :
+                     : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+}
+
+
+static ASM_FUNC_ATTR_INLINE u32
+_gcry_aes_sbox4_ppc8(u32 fourbytes)
+{
+  union
+    {
+      PROPERLY_ALIGNED_TYPE dummy;
+      block data_vec;
+      u32 data32[4];
+    } u;
+
+  u.data32[0] = fourbytes;
+  u.data_vec = vec_sbox_be(u.data_vec);
+  return u.data32[0];
+}
+
+void
+_gcry_aes_ppc8_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+  const block bige_const = asm_load_be_const();
+  union
+    {
+      PROPERLY_ALIGNED_TYPE dummy;
+      byte data[MAXKC][4];
+      u32 data32[MAXKC];
+    } tkk[2];
+  unsigned int rounds = ctx->rounds;
+  int KC = rounds - 6;
+  unsigned int keylen = KC * 4;
+  u128_t *ekey = (u128_t *)(void *)ctx->keyschenc;
+  unsigned int i, r, t;
+  byte rcon = 1;
+  int j;
+#define k      tkk[0].data
+#define k_u32  tkk[0].data32
+#define tk     tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W      (ctx->keyschenc)
+#define W_u32  (ctx->keyschenc32)
+
+  for (i = 0; i < keylen; i++)
+    {
+      k[i >> 2][i & 3] = key[i];
+    }
+
+  for (j = KC-1; j >= 0; j--)
+    {
+      tk_u32[j] = k_u32[j];
+    }
+  r = 0;
+  t = 0;
+  /* Copy values into round key array.  */
+  for (j = 0; (j < KC) && (r < rounds + 1); )
+    {
+      for (; (j < KC) && (t < 4); j++, t++)
+        {
+          W_u32[r][t] = le_bswap32(tk_u32[j]);
+        }
+      if (t == 4)
+        {
+          r++;
+          t = 0;
+        }
+    }
+  while (r < rounds + 1)
+    {
+      tk_u32[0] ^=
+       le_bswap32(
+         _gcry_aes_sbox4_ppc8(rol(le_bswap32(tk_u32[KC - 1]), 24)) ^ rcon);
+
+      if (KC != 8)
+        {
+          for (j = 1; j < KC; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+        }
+      else
+        {
+          for (j = 1; j < KC/2; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+
+          tk_u32[KC/2] ^=
+           le_bswap32(_gcry_aes_sbox4_ppc8(le_bswap32(tk_u32[KC/2 - 1])));
+
+          for (j = KC/2 + 1; j < KC; j++)
+            {
+              tk_u32[j] ^= tk_u32[j-1];
+            }
+        }
+
+      /* Copy values into round key array.  */
+      for (j = 0; (j < KC) && (r < rounds + 1); )
+        {
+          for (; (j < KC) && (t < 4); j++, t++)
+            {
+              W_u32[r][t] = le_bswap32(tk_u32[j]);
+            }
+          if (t == 4)
+            {
+              r++;
+              t = 0;
+            }
+        }
+
+      rcon = (rcon << 1) ^ (-(rcon >> 7) & 0x1b);
+    }
+
+  /* Store in big-endian order. */
+  for (r = 0; r <= rounds; r++)
+    {
+#ifndef WORDS_BIGENDIAN
+      VEC_STORE_BE(ekey, r, ALIGNED_LOAD (ekey, r), bige_const);
+#else
+      block rvec = ALIGNED_LOAD (ekey, r);
+      ALIGNED_STORE (ekey, r,
+                     vec_perm(rvec, rvec, vec_bswap32_const));
+      (void)bige_const;
+#endif
+    }
+
+#undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+  wipememory(&tkk, sizeof(tkk));
+}
+
+void
+_gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  internal_aes_ppc_prepare_decryption (ctx);
+}
+
+
+#define GCRY_AES_PPC8 1
+#define ENCRYPT_BLOCK_FUNC     _gcry_aes_ppc8_encrypt
+#define DECRYPT_BLOCK_FUNC     _gcry_aes_ppc8_decrypt
+#define CFB_ENC_FUNC           _gcry_aes_ppc8_cfb_enc
+#define CFB_DEC_FUNC           _gcry_aes_ppc8_cfb_dec
+#define CBC_ENC_FUNC           _gcry_aes_ppc8_cbc_enc
+#define CBC_DEC_FUNC           _gcry_aes_ppc8_cbc_dec
+#define CTR_ENC_FUNC           _gcry_aes_ppc8_ctr_enc
+#define OCB_CRYPT_FUNC         _gcry_aes_ppc8_ocb_crypt
+#define OCB_AUTH_FUNC          _gcry_aes_ppc8_ocb_auth
+#define XTS_CRYPT_FUNC         _gcry_aes_ppc8_xts_crypt
+
+#include <rijndael-ppc-functions.h>
+
+#endif /* USE_PPC_CRYPTO */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-ppc9le.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-ppc9le.c
new file mode 100644
index 000000000..facdedd4f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-ppc9le.c
@@ -0,0 +1,102 @@
+/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Alternatively, this code may be used in OpenSSL from The OpenSSL Project,
+ * and Cryptogams by Andy Polyakov, and if made part of a release of either
+ * or both projects, is thereafter dual-licensed under the license said project
+ * is released under.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+
+#include "rijndael-ppc-common.h"
+
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_const(void)
+{
+  static const block vec_dummy = { 0 };
+  return vec_dummy;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_be_swap(block vec, block be_bswap_const)
+{
+  (void)be_bswap_const;
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE block
+asm_load_be_noswap(unsigned long offset, const void *ptr)
+{
+  block vec;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("lxvb16x %x0,0,%1\n\t"
+                     : "=wa" (vec)
+                     : "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("lxvb16x %x0,%1,%2\n\t"
+                     : "=wa" (vec)
+                     : "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+  return vec;
+}
+
+static ASM_FUNC_ATTR_INLINE void
+asm_store_be_noswap(block vec, unsigned long offset, void *ptr)
+{
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ volatile ("stxvb16x %x0,0,%1\n\t"
+                     :
+                     : "wa" (vec), "r" ((uintptr_t)ptr)
+                     : "memory");
+  else
+#endif
+    __asm__ volatile ("stxvb16x %x0,%1,%2\n\t"
+                     :
+                     : "wa" (vec), "r" (offset), "r" ((uintptr_t)ptr)
+                     : "memory", "r0");
+}
+
+
+#define GCRY_AES_PPC9LE 1
+#define ENCRYPT_BLOCK_FUNC     _gcry_aes_ppc9le_encrypt
+#define DECRYPT_BLOCK_FUNC     _gcry_aes_ppc9le_decrypt
+#define CFB_ENC_FUNC           _gcry_aes_ppc9le_cfb_enc
+#define CFB_DEC_FUNC           _gcry_aes_ppc9le_cfb_dec
+#define CBC_ENC_FUNC           _gcry_aes_ppc9le_cbc_enc
+#define CBC_DEC_FUNC           _gcry_aes_ppc9le_cbc_dec
+#define CTR_ENC_FUNC           _gcry_aes_ppc9le_ctr_enc
+#define OCB_CRYPT_FUNC         _gcry_aes_ppc9le_ocb_crypt
+#define OCB_AUTH_FUNC          _gcry_aes_ppc9le_ocb_auth
+#define XTS_CRYPT_FUNC         _gcry_aes_ppc9le_xts_crypt
+
+#include <rijndael-ppc-functions.h>
+
+#endif /* USE_PPC_CRYPTO */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-s390x.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-s390x.c
new file mode 100644
index 000000000..e50537ed9
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-s390x.c
@@ -0,0 +1,1166 @@
+/* Rijndael (AES) for GnuPG - s390x/zSeries AES implementation
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_S390X_CRYPTO
+
+#include "asm-inline-s390x.h"
+
+#define NO_INLINE __attribute__((noinline))
+
+struct aes_s390x_gcm_params_s
+{
+  u32 reserved[3];
+  u32 counter_value;
+  u64 tag[2];
+  u64 hash_subkey[2];
+  u64 total_aad_length;
+  u64 total_cipher_length;
+  u32 initial_counter_value[4];
+  u64 key[4];
+};
+
+#define DECL_QUERY_FUNC(instruction, opcode) \
+  static u128_t instruction ##_query(void) \
+  { \
+    static u128_t function_codes = 0; \
+    static int initialized = 0; \
+    register unsigned long reg0 asm("0") = 0; \
+    register void *reg1 asm("1") = &function_codes; \
+    u128_t r1, r2; \
+    \
+    if (initialized) \
+      return function_codes; \
+    \
+    asm volatile ("0: .insn rre," #opcode " << 16, %[r1], %[r2]\n\t" \
+                 "   brc 1,0b\n\t" \
+                 : [r1] "=a" (r1), [r2] "=a" (r2) \
+                 : [reg0] "r" (reg0), [reg1] "r" (reg1) \
+                 : "cc", "memory"); \
+    \
+    initialized = 1; \
+    return function_codes; \
+  }
+
+#define DECL_EXECUTE_FUNC(instruction, opcode, param_const) \
+  static ALWAYS_INLINE size_t \
+  instruction ##_execute(unsigned int func, param_const void *param_block, \
+                        void *dst, const void *src, size_t src_len) \
+  { \
+    register unsigned long reg0 asm("0") = func; \
+    register param_const byte *reg1 asm("1") = param_block; \
+    u128_t r1 = ((u128_t)(uintptr_t)dst << 64); \
+    u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len; \
+    \
+    asm volatile ("0: .insn rre," #opcode " << 16, %[r1], %[r2]\n\t" \
+                 "   brc 1,0b\n\t" \
+                 : [r1] "+a" (r1), [r2] "+a" (r2) \
+                 : [func] "r" (reg0), [param_ptr] "r" (reg1) \
+                 : "cc", "memory"); \
+    \
+    return (u64)r2; \
+  }
+
+DECL_QUERY_FUNC(km, 0xb92e);
+DECL_QUERY_FUNC(kmc, 0xb92f);
+DECL_QUERY_FUNC(kmac, 0xb91e);
+DECL_QUERY_FUNC(kmf, 0xb92a);
+DECL_QUERY_FUNC(kmo, 0xb92b);
+
+DECL_EXECUTE_FUNC(km, 0xb92e, const);
+DECL_EXECUTE_FUNC(kmc, 0xb92f, );
+DECL_EXECUTE_FUNC(kmac, 0xb91e, );
+DECL_EXECUTE_FUNC(kmf, 0xb92a, );
+DECL_EXECUTE_FUNC(kmo, 0xb92b, );
+
+static u128_t kma_query(void)
+{
+  static u128_t function_codes = 0;
+  static int initialized = 0;
+  register unsigned long reg0 asm("0") = 0;
+  register void *reg1 asm("1") = &function_codes;
+  u128_t r1, r2, r3;
+
+  if (initialized)
+    return function_codes;
+
+  asm volatile ("0: .insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+               "   brc 1,0b\n\t"
+               : [r1] "=a" (r1), [r2] "=a" (r2), [r3] "=a" (r3)
+               : [reg0] "r" (reg0), [reg1] "r" (reg1)
+               : "cc", "memory");
+
+  initialized = 1;
+  return function_codes;
+}
+
+static ALWAYS_INLINE void
+kma_execute(unsigned int func, void *param_block, byte *dst, const byte *src,
+           size_t src_len, const byte *aad, size_t aad_len)
+{
+  register unsigned long reg0 asm("0") = func;
+  register byte *reg1 asm("1") = param_block;
+  u128_t r1 = ((u128_t)(uintptr_t)dst << 64);
+  u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+  u128_t r3 = ((u128_t)(uintptr_t)aad << 64) | (u64)aad_len;
+
+  asm volatile ("0: .insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+               "   brc 1,0b\n\t"
+               : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3),
+                 [func] "+r" (reg0)
+               : [param_ptr] "r" (reg1)
+               : "cc", "memory");
+}
+
+unsigned int _gcry_aes_s390x_encrypt(const RIJNDAEL_context *ctx,
+                                    unsigned char *dst,
+                                    const unsigned char *src)
+{
+  km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, dst, src,
+             BLOCKSIZE);
+  return 0;
+}
+
+unsigned int _gcry_aes_s390x_decrypt(const RIJNDAEL_context *ctx,
+                                    unsigned char *dst,
+                                    const unsigned char *src)
+{
+  km_execute (ctx->km_func | KM_DECRYPT, ctx->keyschenc, dst, src,
+             BLOCKSIZE);
+  return 0;
+}
+
+static void aes_s390x_cbc_enc(void *context, unsigned char *iv,
+                             void *outbuf_arg, const void *inbuf_arg,
+                             size_t nblocks, int cbc_mac)
+{
+  RIJNDAEL_context *ctx = context;
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  u128_t params[3];
+
+  /* Prepare parameter block. */
+  memcpy (&params[0], iv, BLOCKSIZE);
+  memcpy (&params[1], ctx->keyschenc, 32);
+
+  if (cbc_mac)
+    {
+      kmac_execute (ctx->kmac_func | KM_ENCRYPT, &params, NULL, in,
+                   nblocks * BLOCKSIZE);
+      memcpy (out, &params[0], BLOCKSIZE);
+    }
+  else
+    {
+      kmc_execute (ctx->kmc_func | KM_ENCRYPT, &params, out, in,
+                  nblocks * BLOCKSIZE);
+    }
+
+  /* Update IV with OCV. */
+  memcpy (iv, &params[0], BLOCKSIZE);
+
+  wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cbc_dec(void *context, unsigned char *iv,
+                             void *outbuf_arg, const void *inbuf_arg,
+                             size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  u128_t params[3];
+
+  /* Prepare parameter block (ICV & key). */
+  memcpy (&params[0], iv, BLOCKSIZE);
+  memcpy (&params[1], ctx->keyschenc, 32);
+
+  kmc_execute (ctx->kmc_func | KM_DECRYPT, &params, out, in,
+              nblocks * BLOCKSIZE);
+
+  /* Update IV with OCV. */
+  memcpy (iv, &params[0], BLOCKSIZE);
+
+  wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cfb128_enc(void *context, unsigned char *iv,
+                                void *outbuf_arg, const void *inbuf_arg,
+                                size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  unsigned int function;
+  u128_t params[3];
+
+  /* Prepare parameter block. */
+  memcpy (&params[0], iv, BLOCKSIZE);
+  memcpy (&params[1], ctx->keyschenc, 32);
+
+  function = ctx->kmf_func | KM_ENCRYPT | KMF_LCFB_16;
+  kmf_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+  /* Update IV with OCV. */
+  memcpy (iv, &params[0], BLOCKSIZE);
+
+  wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cfb128_dec(void *context, unsigned char *iv,
+                                void *outbuf_arg, const void *inbuf_arg,
+                                size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  u128_t blocks[64];
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  size_t max_blocks_used = 0;
+
+  /* AES128-CFB128 decryption speed using KMF was observed to be the same as
+   * the KMF encryption, ~1.03 cpb. Expection was to see similar performance
+   * as for AES128-CBC decryption as decryption for both modes should be
+   * parallalizeble (CBC shows ~0.22 cpb). Therefore there is quite a bit
+   * of room for improvement and implementation below using KM instruction
+   * shows ~0.70 cpb speed, ~30% improvement over KMF instruction.
+   */
+
+  while (nblocks >= 64)
+    {
+      /* Copy IV to encrypt buffer, copy (nblocks - 1) input blocks to
+       * encrypt buffer and update IV. */
+      asm volatile ("mvc 0(16, %[blocks]), 0(%[iv])\n\t"
+                   "mvc  16(240, %[blocks]),   0(%[in])\n\t"
+                   "mvc 256(256, %[blocks]), 240(%[in])\n\t"
+                   "mvc 512(256, %[blocks]), 496(%[in])\n\t"
+                   "mvc 768(256, %[blocks]), 752(%[in])\n\t"
+                   "mvc 0(16, %[iv]), 1008(%[in])\n\t"
+                   :
+                   : [in] "a" (in), [out] "a" (out), [blocks] "a" (blocks),
+                     [iv] "a" (iv)
+                   : "memory");
+
+      /* Perform encryption of temporary buffer. */
+      km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, blocks, blocks,
+                 64 * BLOCKSIZE);
+
+      /* Xor encrypt buffer with input blocks and store to output blocks. */
+      asm volatile ("xc   0(256, %[blocks]),   0(%[in])\n\t"
+                   "xc 256(256, %[blocks]), 256(%[in])\n\t"
+                   "xc 512(256, %[blocks]), 512(%[in])\n\t"
+                   "xc 768(256, %[blocks]), 768(%[in])\n\t"
+                   "mvc   0(256, %[out]),   0(%[blocks])\n\t"
+                   "mvc 256(256, %[out]), 256(%[blocks])\n\t"
+                   "mvc 512(256, %[out]), 512(%[blocks])\n\t"
+                   "mvc 768(256, %[out]), 768(%[blocks])\n\t"
+                   :
+                   : [in] "a" (in), [out] "a" (out), [blocks] "a" (blocks)
+                   : "memory");
+
+      max_blocks_used = 64;
+      in += 64 * BLOCKSIZE;
+      out += 64 * BLOCKSIZE;
+      nblocks -= 64;
+    }
+
+  if (nblocks)
+    {
+      unsigned int pos = 0;
+      size_t in_nblocks = nblocks;
+      size_t num_in = 0;
+
+      max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+      /* Copy IV to encrypt buffer. */
+      asm volatile ("mvc 0(16, %[blocks]), 0(%[iv])\n\t"
+                   :
+                   : [blocks] "a" (blocks), [iv] "a" (iv)
+                   : "memory");
+      pos += 1;
+
+#define CFB_MOVE_BLOCKS(block_oper, move_nbytes) \
+      block_oper (in_nblocks - 1 >= move_nbytes / BLOCKSIZE) \
+       { \
+         unsigned int move_nblocks = move_nbytes / BLOCKSIZE; \
+         asm volatile ("mvc 0(" #move_nbytes ", %[blocks_x]), 0(%[in])\n\t" \
+                       : \
+                       : [blocks_x] "a" (&blocks[pos]), [in] "a" (in) \
+                       : "memory"); \
+         num_in += move_nblocks; \
+         in += move_nblocks * BLOCKSIZE; \
+         pos += move_nblocks; \
+          in_nblocks -= move_nblocks; \
+       }
+
+      /* Copy (nblocks - 1) input blocks to encrypt buffer. */
+      CFB_MOVE_BLOCKS(while, 256);
+      CFB_MOVE_BLOCKS(if, 128);
+      CFB_MOVE_BLOCKS(if, 64);
+      CFB_MOVE_BLOCKS(if, 32);
+      CFB_MOVE_BLOCKS(if, 16);
+
+#undef CFB_MOVE_BLOCKS
+
+      /* Update IV. */
+      asm volatile ("mvc 0(16, %[iv]), 0(%[in])\n\t"
+                   :
+                   : [iv] "a" (iv), [in] "a" (in)
+                   : "memory");
+      num_in += 1;
+      in += BLOCKSIZE;
+
+      /* Perform encryption of temporary buffer. */
+      km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, blocks, blocks,
+                 nblocks * BLOCKSIZE);
+
+      /* Xor encrypt buffer with input blocks and store to output blocks. */
+      pos = 0;
+      in -= nblocks * BLOCKSIZE;
+
+#define CFB_XOR_BLOCKS(block_oper, xor_nbytes) \
+      block_oper (nblocks >= xor_nbytes / BLOCKSIZE) \
+       { \
+         unsigned int xor_nblocks = xor_nbytes / BLOCKSIZE; \
+         asm volatile ("xc 0(" #xor_nbytes ", %[blocks_x]), 0(%[in])\n\t" \
+                       "mvc 0(" #xor_nbytes ", %[out]), 0(%[blocks_x])\n\t" \
+                       : \
+                       : [blocks_x] "a" (&blocks[pos]), [out] "a" (out), \
+                         [in] "a" (in) \
+                       : "memory"); \
+         out += xor_nblocks * BLOCKSIZE; \
+         in += xor_nblocks * BLOCKSIZE; \
+         nblocks -= xor_nblocks; \
+         pos += xor_nblocks; \
+       }
+
+      CFB_XOR_BLOCKS(while, 256);
+      CFB_XOR_BLOCKS(if, 128);
+      CFB_XOR_BLOCKS(if, 64);
+      CFB_XOR_BLOCKS(if, 32);
+      CFB_XOR_BLOCKS(if, 16);
+
+#undef CFB_XOR_BLOCKS
+    }
+
+  if (max_blocks_used)
+    wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+}
+
+static void aes_s390x_ofb_enc(void *context, unsigned char *iv,
+                             void *outbuf_arg, const void *inbuf_arg,
+                             size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  unsigned int function;
+  u128_t params[3];
+
+  /* Prepare parameter block. */
+  memcpy (&params[0], iv, BLOCKSIZE);
+  memcpy (&params[1], ctx->keyschenc, 32);
+
+  function = ctx->kmo_func | KM_ENCRYPT;
+  kmo_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+  /* Update IV with OCV. */
+  memcpy (iv, &params[0], BLOCKSIZE);
+
+  wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_ctr128_enc(void *context, unsigned char *ctr,
+                                void *outbuf_arg, const void *inbuf_arg,
+                                size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  unsigned int function;
+  struct aes_s390x_gcm_params_s params;
+
+  memset (&params.hash_subkey, 0, sizeof(params.hash_subkey));
+  memcpy (&params.key, ctx->keyschenc, 32);
+
+  function = ctx->kma_func | KM_DECRYPT | KMA_HS | KMA_LAAD;
+
+  while (nblocks)
+    {
+      u64 to_overflow = (u64)0xFFFFFFFFU + 1 - buf_get_be32 (ctr + 12);
+      u64 ncurr = nblocks > to_overflow ? to_overflow : nblocks;
+
+      /* Prepare parameter block. */
+      memset (&params.reserved, 0, sizeof(params.reserved));
+      buf_put_be32 (&params.counter_value, buf_get_be32(ctr + 12) - 1);
+      memcpy (&params.initial_counter_value, ctr, 16);
+      params.initial_counter_value[3] = params.counter_value;
+      memset (&params.tag, 0, sizeof(params.tag));
+      params.total_aad_length = 0;
+      params.total_cipher_length = 0;
+
+      /* Update counter. */
+      cipher_block_add (ctr, ncurr, BLOCKSIZE);
+      if (ncurr == (u64)0xFFFFFFFFU + 1)
+       cipher_block_add (ctr, 1, BLOCKSIZE);
+
+      /* Perform CTR using KMA-GCM. */
+      kma_execute (function, &params, out, in, ncurr * BLOCKSIZE, NULL, 0);
+
+      out += ncurr * BLOCKSIZE;
+      in += ncurr * BLOCKSIZE;
+      nblocks -= ncurr;
+    }
+
+  wipememory (&params, sizeof(params));
+}
+
+static size_t aes_s390x_gcm_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+                                 const void *inbuf_arg, size_t nblocks,
+                                 int encrypt)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  byte *ctr = c->u_ctr.ctr;
+  unsigned int function;
+  struct aes_s390x_gcm_params_s params;
+
+  function = ctx->kma_func | (encrypt ? KM_ENCRYPT : KM_DECRYPT)
+             | KMA_HS | KMA_LAAD;
+
+  /* Prepare parameter block. */
+  memset (&params.reserved, 0, sizeof(params.reserved));
+  buf_put_be32 (&params.counter_value, buf_get_be32(ctr + 12) - 1);
+  memcpy (&params.tag, c->u_mode.gcm.u_tag.tag, 16);
+  memcpy (&params.hash_subkey, c->u_mode.gcm.u_ghash_key.key, 16);
+  params.total_aad_length = 0;
+  params.total_cipher_length = 0;
+  memcpy (&params.initial_counter_value, ctr, 12);
+  params.initial_counter_value[3] = params.counter_value;
+  memcpy (&params.key, ctx->keyschenc, 32);
+
+  /* Update counter (CTR32). */
+  buf_put_be32(ctr + 12, buf_get_be32(ctr + 12) + nblocks);
+
+  /* Perform KMA-GCM. */
+  kma_execute (function, &params, out, in, nblocks * BLOCKSIZE, NULL, 0);
+
+  /* Update tag. */
+  memcpy (c->u_mode.gcm.u_tag.tag, &params.tag, 16);
+
+  wipememory (&params, sizeof(params));
+
+  return 0;
+}
+
+static void aes_s390x_xts_crypt(void *context, unsigned char *tweak,
+                               void *outbuf_arg, const void *inbuf_arg,
+                               size_t nblocks, int encrypt)
+{
+  RIJNDAEL_context *ctx = context;
+  byte *out = outbuf_arg;
+  const byte *in = inbuf_arg;
+  unsigned int function;
+  u128_t params[3];
+  u128_t *params_tweak;
+
+  if (ctx->rounds < 12)
+    {
+      memcpy (&params[0], ctx->keyschenc, 16);
+      params_tweak = &params[1];
+      memcpy (params_tweak, tweak, BLOCKSIZE);
+    }
+  else if (ctx->rounds == 12)
+    {
+      BUG(); /* KM-XTS-AES-192 not defined. */
+    }
+  else
+    {
+      memcpy (&params[0], ctx->keyschenc, 32);
+      params_tweak = &params[2];
+      memcpy (params_tweak, tweak, BLOCKSIZE);
+    }
+
+  function = ctx->km_func_xts | (encrypt ? KM_ENCRYPT : KM_DECRYPT);
+  km_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+  /* Update tweak with XTSP. */
+  memcpy (tweak, params_tweak, BLOCKSIZE);
+
+  wipememory (&params, sizeof(params));
+}
+
+static NO_INLINE void
+aes_s390x_ocb_prepare_Ls (gcry_cipher_hd_t c, u64 blkn, const void *Ls[64],
+                         const void ***pl)
+{
+  unsigned int n = 64 - (blkn % 64);
+  int i;
+
+  /* Prepare L pointers. */
+  *pl = &Ls[(63 + n) % 64];
+  for (i = 0; i < 64; i += 8, n = (n + 8) % 64)
+    {
+      static const int lastL[8] = { 3, 4, 3, 5, 3, 4, 3, 0 };
+
+      Ls[(0 + n) % 64] = c->u_mode.ocb.L[0];
+      Ls[(1 + n) % 64] = c->u_mode.ocb.L[1];
+      Ls[(2 + n) % 64] = c->u_mode.ocb.L[0];
+      Ls[(3 + n) % 64] = c->u_mode.ocb.L[2];
+      Ls[(4 + n) % 64] = c->u_mode.ocb.L[0];
+      Ls[(5 + n) % 64] = c->u_mode.ocb.L[1];
+      Ls[(6 + n) % 64] = c->u_mode.ocb.L[0];
+      Ls[(7 + n) % 64] = c->u_mode.ocb.L[lastL[i / 8]];
+    }
+}
+
+static ALWAYS_INLINE const unsigned char *
+aes_s390x_ocb_get_l (gcry_cipher_hd_t c, u64 n)
+{
+  unsigned long ntz = _gcry_ctz (n);
+  if (ntz >= OCB_L_TABLE_SIZE)
+    {
+      return NULL; /* Not accessed. */
+    }
+  return c->u_mode.ocb.L[ntz];
+}
+
+static NO_INLINE void
+aes_s390x_ocb_checksum (unsigned char *checksum, const void *plainbuf_arg,
+                       size_t nblks)
+{
+  const char *plainbuf = plainbuf_arg;
+  u64 tmp0[2];
+  u64 tmp1[2] = { 0, 0 };
+  u64 tmp2[2] = { 0, 0 };
+  u64 tmp3[2] = { 0, 0 };
+
+  cipher_block_cpy (tmp0, checksum, BLOCKSIZE);
+
+  if (nblks >= 4)
+    {
+      while (nblks >= 4)
+       {
+         /* Checksum_i = Checksum_{i-1} xor P_i  */
+         cipher_block_xor_1 (tmp0, plainbuf + 0 * BLOCKSIZE, BLOCKSIZE);
+         cipher_block_xor_1 (tmp1, plainbuf + 1 * BLOCKSIZE, BLOCKSIZE);
+         cipher_block_xor_1 (tmp2, plainbuf + 2 * BLOCKSIZE, BLOCKSIZE);
+         cipher_block_xor_1 (tmp3, plainbuf + 3 * BLOCKSIZE, BLOCKSIZE);
+
+         plainbuf += 4 * BLOCKSIZE;
+         nblks -= 4;
+       }
+
+      cipher_block_xor_1 (tmp0, tmp1, BLOCKSIZE);
+      cipher_block_xor_1 (tmp2, tmp3, BLOCKSIZE);
+      cipher_block_xor_1 (tmp0, tmp2, BLOCKSIZE);
+
+      wipememory (tmp1, sizeof(tmp1));
+      wipememory (tmp2, sizeof(tmp2));
+      wipememory (tmp3, sizeof(tmp3));
+    }
+
+  while (nblks > 0)
+    {
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      cipher_block_xor_1 (tmp0, plainbuf, BLOCKSIZE);
+
+      plainbuf += BLOCKSIZE;
+      nblks--;
+    }
+
+  cipher_block_cpy (checksum, tmp0, BLOCKSIZE);
+
+  wipememory (tmp0, sizeof(tmp0));
+}
+
+static NO_INLINE size_t
+aes_s390x_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+                  const void *inbuf_arg, size_t nblocks_arg)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  size_t nblocks = nblocks_arg;
+  u128_t blocks[64];
+  u128_t offset;
+  size_t max_blocks_used = 0;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+  unsigned int function = ctx->km_func | KM_ENCRYPT;
+  const void *Ls[64];
+  const void **pl;
+
+  aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+  /* Checksumming could be done inline in OCB_INPUT macros, but register
+   * pressure becomes too heavy and performance would end up being worse.
+   * For decryption, checksumming is part of OCB_OUTPUT macros as
+   * output handling is less demanding and can handle the additional
+   * computation. */
+  aes_s390x_ocb_checksum (c->u_ctr.ctr, inbuf_arg, nblocks_arg);
+
+  cipher_block_cpy (&offset, &c->u_iv.iv, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+      cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+      cipher_block_xor (outbuf + (n) * BLOCKSIZE, inbuf + (n) * BLOCKSIZE, \
+                       &offset, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+      OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+      OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+      OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+      OCB_INPUT_4((n) + 12);
+
+#define OCB_OUTPUT(n) \
+      cipher_block_xor_1 (outbuf + (n) * BLOCKSIZE, &blocks[n], BLOCKSIZE)
+
+#define OCB_OUTPUT_4(n) \
+      OCB_OUTPUT((n) + 0); OCB_OUTPUT((n) + 1); OCB_OUTPUT((n) + 2); \
+      OCB_OUTPUT((n) + 3)
+
+#define OCB_OUTPUT_16(n) \
+      OCB_OUTPUT_4((n) + 0); OCB_OUTPUT_4((n) + 4); OCB_OUTPUT_4((n) + 8); \
+      OCB_OUTPUT_4((n) + 12);
+
+  while (nblocks >= 64)
+    {
+      blkn += 64;
+      *pl = aes_s390x_ocb_get_l(c, blkn - blkn % 64);
+
+      OCB_INPUT_16(0);
+      OCB_INPUT_16(16);
+      OCB_INPUT_16(32);
+      OCB_INPUT_16(48);
+
+      km_execute (function, ctx->keyschenc, outbuf, outbuf, 64 * BLOCKSIZE);
+
+      asm volatile ("xc   0(256, %[out]),   0(%[blocks])\n\t"
+                   "xc 256(256, %[out]), 256(%[blocks])\n\t"
+                   "xc 512(256, %[out]), 512(%[blocks])\n\t"
+                   "xc 768(256, %[out]), 768(%[blocks])\n\t"
+                   :
+                   : [out] "a" (outbuf), [blocks] "a" (blocks)
+                   : "memory");
+
+      max_blocks_used = 64;
+      inbuf += 64 * BLOCKSIZE;
+      outbuf += 64 * BLOCKSIZE;
+      nblocks -= 64;
+    }
+
+  if (nblocks)
+    {
+      unsigned int pos = 0;
+
+      max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+      blkn += nblocks;
+      *pl = aes_s390x_ocb_get_l(c, blkn - blkn % 64);
+
+      while (nblocks >= 16)
+       {
+         OCB_INPUT_16(pos + 0);
+         pos += 16;
+         nblocks -= 16;
+       }
+      while (nblocks >= 4)
+       {
+         OCB_INPUT_4(pos + 0);
+         pos += 4;
+         nblocks -= 4;
+       }
+      if (nblocks >= 2)
+       {
+         OCB_INPUT(pos + 0);
+         OCB_INPUT(pos + 1);
+         pos += 2;
+         nblocks -= 2;
+       }
+      if (nblocks >= 1)
+       {
+         OCB_INPUT(pos + 0);
+         pos += 1;
+         nblocks -= 1;
+       }
+
+      nblocks = pos;
+      pos = 0;
+      km_execute (function, ctx->keyschenc, outbuf, outbuf,
+                 nblocks * BLOCKSIZE);
+
+      while (nblocks >= 16)
+       {
+         OCB_OUTPUT_16(pos + 0);
+         pos += 16;
+         nblocks -= 16;
+       }
+      while (nblocks >= 4)
+       {
+         OCB_OUTPUT_4(pos + 0);
+         pos += 4;
+         nblocks -= 4;
+       }
+      if (nblocks >= 2)
+       {
+         OCB_OUTPUT(pos + 0);
+         OCB_OUTPUT(pos + 1);
+         pos += 2;
+         nblocks -= 2;
+       }
+      if (nblocks >= 1)
+       {
+         OCB_OUTPUT(pos + 0);
+         pos += 1;
+         nblocks -= 1;
+       }
+    }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+#undef OCB_OUTPUT
+#undef OCB_OUTPUT_4
+#undef OCB_OUTPUT_16
+
+  c->u_mode.ocb.data_nblocks = blkn;
+  cipher_block_cpy (&c->u_iv.iv, &offset, BLOCKSIZE);
+
+  if (max_blocks_used)
+    wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+  return 0;
+}
+
+static NO_INLINE size_t
+aes_s390x_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+                  const void *inbuf_arg, size_t nblocks_arg)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  size_t nblocks = nblocks_arg;
+  u128_t blocks[64];
+  u128_t offset;
+  size_t max_blocks_used = 0;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+  unsigned int function = ctx->km_func | KM_DECRYPT;
+  const void *Ls[64];
+  const void **pl;
+
+  aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+  cipher_block_cpy (&offset, &c->u_iv.iv, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+      cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+      cipher_block_xor (outbuf + (n) * BLOCKSIZE, inbuf + (n) * BLOCKSIZE, \
+                       &offset, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+      OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+      OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+      OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+      OCB_INPUT_4((n) + 12);
+
+#define OCB_OUTPUT(n) \
+      cipher_block_xor_1 (outbuf + (n) * BLOCKSIZE, &blocks[n], BLOCKSIZE);
+
+#define OCB_OUTPUT_4(n) \
+      OCB_OUTPUT((n) + 0); OCB_OUTPUT((n) + 1); OCB_OUTPUT((n) + 2); \
+      OCB_OUTPUT((n) + 3)
+
+#define OCB_OUTPUT_16(n) \
+      OCB_OUTPUT_4((n) + 0); OCB_OUTPUT_4((n) + 4); OCB_OUTPUT_4((n) + 8); \
+      OCB_OUTPUT_4((n) + 12);
+
+  while (nblocks >= 64)
+    {
+      blkn += 64;
+      *pl = aes_s390x_ocb_get_l(c, blkn - blkn % 64);
+
+      OCB_INPUT_16(0);
+      OCB_INPUT_16(16);
+      OCB_INPUT_16(32);
+      OCB_INPUT_16(48);
+
+      km_execute (function, ctx->keyschenc, outbuf, outbuf, 64 * BLOCKSIZE);
+
+      asm volatile ("xc   0(256, %[out]),   0(%[blocks])\n\t"
+                   "xc 256(256, %[out]), 256(%[blocks])\n\t"
+                   "xc 512(256, %[out]), 512(%[blocks])\n\t"
+                   "xc 768(256, %[out]), 768(%[blocks])\n\t"
+                   :
+                   : [out] "a" (outbuf), [blocks] "a" (blocks)
+                   : "memory");
+
+      max_blocks_used = 64;
+      inbuf += 64 * BLOCKSIZE;
+      outbuf += 64 * BLOCKSIZE;
+      nblocks -= 64;
+    }
+
+  if (nblocks)
+    {
+      unsigned int pos = 0;
+
+      max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+      blkn += nblocks;
+      *pl = aes_s390x_ocb_get_l(c, blkn - blkn % 64);
+
+      while (nblocks >= 16)
+       {
+         OCB_INPUT_16(pos + 0);
+         pos += 16;
+         nblocks -= 16;
+       }
+      while (nblocks >= 4)
+       {
+         OCB_INPUT_4(pos + 0);
+         pos += 4;
+         nblocks -= 4;
+       }
+      if (nblocks >= 2)
+       {
+         OCB_INPUT(pos + 0);
+         OCB_INPUT(pos + 1);
+         pos += 2;
+         nblocks -= 2;
+       }
+      if (nblocks >= 1)
+       {
+         OCB_INPUT(pos + 0);
+         pos += 1;
+         nblocks -= 1;
+       }
+
+      nblocks = pos;
+      pos = 0;
+      km_execute (function, ctx->keyschenc, outbuf, outbuf,
+                 nblocks * BLOCKSIZE);
+
+      while (nblocks >= 16)
+       {
+         OCB_OUTPUT_16(pos + 0);
+         pos += 16;
+         nblocks -= 16;
+       }
+      while (nblocks >= 4)
+       {
+         OCB_OUTPUT_4(pos + 0);
+         pos += 4;
+         nblocks -= 4;
+       }
+      if (nblocks >= 2)
+       {
+         OCB_OUTPUT(pos + 0);
+         OCB_OUTPUT(pos + 1);
+         pos += 2;
+         nblocks -= 2;
+       }
+      if (nblocks >= 1)
+       {
+         OCB_OUTPUT(pos + 0);
+         pos += 1;
+         nblocks -= 1;
+       }
+    }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+#undef OCB_OUTPUT
+#undef OCB_OUTPUT_4
+#undef OCB_OUTPUT_16
+
+  c->u_mode.ocb.data_nblocks = blkn;
+  cipher_block_cpy (&c->u_iv.iv, &offset, BLOCKSIZE);
+
+  if (max_blocks_used)
+    wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+  aes_s390x_ocb_checksum (c->u_ctr.ctr, outbuf_arg, nblocks_arg);
+
+  return 0;
+}
+
+static size_t
+aes_s390x_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                    const void *inbuf_arg, size_t nblocks_arg, int encrypt)
+{
+  if (encrypt)
+    return aes_s390x_ocb_enc (c, outbuf_arg, inbuf_arg, nblocks_arg);
+  else
+    return aes_s390x_ocb_dec (c, outbuf_arg, inbuf_arg, nblocks_arg);
+}
+
+static size_t
+aes_s390x_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                   size_t nblocks_arg)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  u128_t blocks[64];
+  u128_t offset;
+  size_t max_blocks_used = 0;
+  u64 blkn = c->u_mode.ocb.aad_nblocks;
+  unsigned int function = ctx->km_func | KM_ENCRYPT;
+  const void *Ls[64];
+  const void **pl;
+
+  aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+  cipher_block_cpy (&offset, c->u_mode.ocb.aad_offset, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+      cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+      cipher_block_xor_1 (&blocks[n], abuf + (n) * BLOCKSIZE, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+      OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+      OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+      OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+      OCB_INPUT_4((n) + 12);
+
+  while (nblocks_arg >= 64)
+    {
+      blkn += 64;
+      *pl = aes_s390x_ocb_get_l(c, blkn - blkn % 64);
+
+      OCB_INPUT_16(0);
+      OCB_INPUT_16(16);
+      OCB_INPUT_16(32);
+      OCB_INPUT_16(48);
+
+      km_execute (function, ctx->keyschenc, blocks, blocks, 64 * BLOCKSIZE);
+
+      aes_s390x_ocb_checksum (c->u_mode.ocb.aad_sum, blocks, 64);
+
+      max_blocks_used = 64;
+      abuf += 64 * BLOCKSIZE;
+      nblocks_arg -= 64;
+    }
+
+  if (nblocks_arg > 0)
+    {
+      size_t nblocks = nblocks_arg;
+      unsigned int pos = 0;
+
+      max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+      blkn += nblocks;
+      *pl = aes_s390x_ocb_get_l(c, blkn - blkn % 64);
+
+      while (nblocks >= 16)
+       {
+         OCB_INPUT_16(pos + 0);
+         pos += 16;
+         nblocks -= 16;
+       }
+      while (nblocks >= 4)
+       {
+         OCB_INPUT_4(pos + 0);
+         pos += 4;
+         nblocks -= 4;
+       }
+      if (nblocks >= 2)
+       {
+         OCB_INPUT(pos + 0);
+         OCB_INPUT(pos + 1);
+         pos += 2;
+         nblocks -= 2;
+       }
+      if (nblocks >= 1)
+       {
+         OCB_INPUT(pos + 0);
+         pos += 1;
+         nblocks -= 1;
+       }
+
+      nblocks = pos;
+      nblocks_arg -= pos;
+      pos = 0;
+      km_execute (function, ctx->keyschenc, blocks, blocks,
+                 nblocks * BLOCKSIZE);
+
+      aes_s390x_ocb_checksum (c->u_mode.ocb.aad_sum, blocks, nblocks);
+    }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+
+  c->u_mode.ocb.aad_nblocks = blkn;
+  cipher_block_cpy (c->u_mode.ocb.aad_offset, &offset, BLOCKSIZE);
+
+  if (max_blocks_used)
+    wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+  return 0;
+}
+
+int _gcry_aes_s390x_setup_acceleration(RIJNDAEL_context *ctx,
+                                      unsigned int keylen,
+                                      unsigned int hwfeatures,
+                                      cipher_bulk_ops_t *bulk_ops)
+{
+  unsigned int func;
+  unsigned int func_xts;
+  u128_t func_mask;
+  u128_t func_xts_mask;
+
+  if (!(hwfeatures & HWF_S390X_MSA))
+    return 0;
+
+  switch (keylen)
+    {
+    default:
+    case 16:
+      func = KM_FUNCTION_AES_128;
+      func_xts = KM_FUNCTION_XTS_AES_128;
+      func_mask = km_function_to_mask(KM_FUNCTION_AES_128);
+      func_xts_mask = km_function_to_mask(KM_FUNCTION_XTS_AES_128);
+      break;
+    case 24:
+      func = KM_FUNCTION_AES_192;
+      func_xts = 0;
+      func_mask = km_function_to_mask(KM_FUNCTION_AES_192);
+      func_xts_mask = 0; /* XTS-AES192 not available. */
+      break;
+    case 32:
+      func = KM_FUNCTION_AES_256;
+      func_xts = KM_FUNCTION_XTS_AES_256;
+      func_mask = km_function_to_mask(KM_FUNCTION_AES_256);
+      func_xts_mask = km_function_to_mask(KM_FUNCTION_AES_256);
+      break;
+    }
+
+  /* Query KM for supported algorithms and check if acceleration for
+   * requested key-length is available. */
+  if (!(km_query () & func_mask))
+    return 0;
+
+  ctx->km_func = func;
+
+  /* Query KM for supported XTS algorithms. */
+  if (km_query () & func_xts_mask)
+    ctx->km_func_xts = func_xts;
+
+  /* Query KMC for supported algorithms. */
+  if (kmc_query () & func_mask)
+    ctx->kmc_func = func;
+
+  /* Query KMAC for supported algorithms. */
+  if (kmac_query () & func_mask)
+    ctx->kmac_func = func;
+
+  if (hwfeatures & HWF_S390X_MSA_4)
+    {
+      /* Query KMF for supported algorithms. */
+      if (kmf_query () & func_mask)
+       ctx->kmf_func = func;
+
+      /* Query KMO for supported algorithms. */
+      if (kmo_query () & func_mask)
+       ctx->kmo_func = func;
+    }
+
+  if (hwfeatures & HWF_S390X_MSA_8)
+    {
+      /* Query KMA for supported algorithms. */
+      if (kma_query () & func_mask)
+       ctx->kma_func = func;
+    }
+
+  /* Setup zSeries bulk encryption/decryption routines. */
+
+  if (ctx->km_func)
+    {
+      bulk_ops->ocb_crypt = aes_s390x_ocb_crypt;
+      bulk_ops->ocb_auth = aes_s390x_ocb_auth;
+
+      /* CFB128 decryption uses KM instruction, instead of KMF. */
+      bulk_ops->cfb_dec = aes_s390x_cfb128_dec;
+    }
+
+  if (ctx->km_func_xts)
+    {
+      bulk_ops->xts_crypt = aes_s390x_xts_crypt;
+    }
+
+  if (ctx->kmc_func)
+    {
+      if(ctx->kmac_func)
+       {
+         /* Either KMC or KMAC used depending on 'cbc_mac' parameter. */
+         bulk_ops->cbc_enc = aes_s390x_cbc_enc;
+       }
+
+      bulk_ops->cbc_dec = aes_s390x_cbc_dec;
+    }
+
+  if (ctx->kmf_func)
+    {
+      bulk_ops->cfb_enc = aes_s390x_cfb128_enc;
+    }
+
+  if (ctx->kmo_func)
+    {
+      bulk_ops->ofb_enc = aes_s390x_ofb_enc;
+    }
+
+  if (ctx->kma_func)
+    {
+      bulk_ops->ctr_enc = aes_s390x_ctr128_enc;
+
+      if (kimd_query () & km_function_to_mask (KMID_FUNCTION_GHASH))
+       {
+         /* KIMD based GHASH implementation is required with AES-GCM
+          * acceleration. */
+         bulk_ops->gcm_crypt = aes_s390x_gcm_crypt;
+       }
+    }
+
+  return 1;
+}
+
+void _gcry_aes_s390x_setkey(RIJNDAEL_context *ctx, const byte *key)
+{
+  unsigned int keylen = 16 + (ctx->rounds - 10) * 4;
+  memcpy (ctx->keyschenc, key, keylen);
+}
+
+void _gcry_aes_s390x_prepare_decryption(RIJNDAEL_context *ctx)
+{
+  /* Do nothing. */
+  (void)ctx;
+}
+
+#endif /* USE_S390X_CRYPTO */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S 
b/grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S
new file mode 100644
index 000000000..b98dca26e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S
@@ -0,0 +1,874 @@
+/* SSSE3 vector permutation AES for Libgcrypt
+ * Copyright (C) 2014-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The code is based on the public domain library libvpaes version 0.5
+ * available at http://crypto.stanford.edu/vpaes/ and which carries
+ * this notice:
+ *
+ *     libvpaes: constant-time SSSE3 AES encryption and decryption.
+ *     version 0.5
+ *
+ *     By Mike Hamburg, Stanford University, 2009.  Public domain.
+ *     I wrote essentially all of this code.  I did not write the test
+ *     vectors; they are the NIST known answer tests.  I hereby release all
+ *     the code and documentation here that I wrote into the public domain.
+ *
+ *     This is an implementation of AES following my paper,
+ *       "Accelerating AES with Vector Permute Instructions
+ *       CHES 2009; http://shiftleft.org/papers/vector_aes/
+ */
+
+#if defined(__x86_64__)
+#include <config.h>
+#if defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+
+#include "asm-common-amd64.h"
+
+.text
+
+##
+##  _gcry_aes_ssse3_enc_preload
+##
+ELF(.type _gcry_aes_ssse3_enc_preload,@function)
+.globl _gcry_aes_ssse3_enc_preload
+_gcry_aes_ssse3_enc_preload:
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+       lea     .Laes_consts(%rip), %rax
+       movdqa            (%rax), %xmm9  # 0F
+       movdqa  .Lk_inv   (%rax), %xmm10 # inv
+       movdqa  .Lk_inv+16(%rax), %xmm11 # inva
+       movdqa  .Lk_sb1   (%rax), %xmm13 # sb1u
+       movdqa  .Lk_sb1+16(%rax), %xmm12 # sb1t
+       movdqa  .Lk_sb2   (%rax), %xmm15 # sb2u
+       movdqa  .Lk_sb2+16(%rax), %xmm14 # sb2t
+       EXIT_SYSV_FUNC
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_aes_ssse3_enc_preload,.-_gcry_aes_ssse3_enc_preload)
+
+##
+##  _gcry_aes_ssse3_dec_preload
+##
+ELF(.type _gcry_aes_ssse3_dec_preload,@function)
+.globl _gcry_aes_ssse3_dec_preload
+_gcry_aes_ssse3_dec_preload:
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+       lea     .Laes_consts(%rip), %rax
+       movdqa            (%rax), %xmm9   # 0F
+       movdqa  .Lk_inv   (%rax), %xmm10  # inv
+       movdqa  .Lk_inv+16(%rax), %xmm11  # inva
+       movdqa  .Lk_dsb9   (%rax), %xmm13 # sb9u
+       movdqa  .Lk_dsb9+16(%rax), %xmm12 # sb9t
+       movdqa  .Lk_dsbd   (%rax), %xmm15 # sbdu
+       movdqa  .Lk_dsbb   (%rax), %xmm14 # sbbu
+       movdqa  .Lk_dsbe   (%rax), %xmm8  # sbeu
+       EXIT_SYSV_FUNC
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_aes_ssse3_dec_preload,.-_gcry_aes_ssse3_dec_preload)
+
+##
+## Constant-time SSSE3 AES core implementation.
+##
+## By Mike Hamburg (Stanford University), 2009
+## Public domain.
+##
+
+##
+##  _aes_encrypt_core
+##
+##  AES-encrypt %xmm0.
+##
+##  Inputs:
+##     %xmm0 = input
+##     %xmm9-%xmm15 as in .Laes_preheat
+##    (%rdi) = scheduled keys
+##     %rsi  = nrounds
+##
+##  Output in %xmm0
+##  Clobbers  %xmm1-%xmm4, %r9, %r11, %rax, %rcx, %rdx
+##  Preserves %xmm6 - %xmm7 so you get some local vectors
+##
+##
+.align 16
+ELF(.type _gcry_aes_ssse3_encrypt_core,@function)
+.globl _gcry_aes_ssse3_encrypt_core
+_gcry_aes_ssse3_encrypt_core:
+_aes_encrypt_core:
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+       mov     %rdi,   %rdx
+       leaq    -1(%rsi), %rax
+       lea     .Laes_consts(%rip), %rcx
+       leaq    .Lk_mc_backward(%rcx), %rdi
+       mov     $16,    %rsi
+       movdqa  .Lk_ipt   (%rcx), %xmm2 # iptlo
+       movdqa  %xmm9,  %xmm1
+       pandn   %xmm0,  %xmm1
+       psrld   $4,     %xmm1
+       pand    %xmm9,  %xmm0
+       pshufb  %xmm0,  %xmm2
+       movdqa  .Lk_ipt+16(%rcx), %xmm0 # ipthi
+       pshufb  %xmm1,  %xmm0
+       pxor    (%rdx),%xmm2
+       pxor    %xmm2,  %xmm0
+       add     $16,    %rdx
+       jmp     .Laes_entry
+
+.align 8
+.Laes_loop:
+       # middle of middle round
+       movdqa  %xmm13, %xmm4   # 4 : sb1u
+       pshufb  %xmm2,  %xmm4   # 4 = sb1u
+       pxor    (%rdx), %xmm4   # 4 = sb1u + k
+       movdqa  %xmm12, %xmm0   # 0 : sb1t
+       pshufb  %xmm3,  %xmm0   # 0 = sb1t
+       pxor    %xmm4,  %xmm0   # 0 = A
+       movdqa  %xmm15, %xmm4   # 4 : sb2u
+       pshufb  %xmm2,  %xmm4   # 4 = sb2u
+       movdqa  .Lk_mc_forward-.Lk_mc_backward(%rsi,%rdi), %xmm1
+       movdqa  %xmm14, %xmm2   # 2 : sb2t
+       pshufb  %xmm3,  %xmm2   # 2 = sb2t
+       pxor    %xmm4,  %xmm2   # 2 = 2A
+       movdqa  %xmm0,  %xmm3   # 3 = A
+       pshufb  %xmm1,  %xmm0   # 0 = B
+       pxor    %xmm2,  %xmm0   # 0 = 2A+B
+       pshufb  (%rsi,%rdi), %xmm3  # 3 = D
+       lea     16(%esi),%esi   # next mc
+       pxor    %xmm0,  %xmm3   # 3 = 2A+B+D
+       lea     16(%rdx),%rdx   # next key
+       pshufb  %xmm1,  %xmm0   # 0 = 2B+C
+       pxor    %xmm3,  %xmm0   # 0 = 2A+3B+C+D
+       and     $48, %rsi       # ... mod 4
+       dec     %rax            # nr--
+
+.Laes_entry:
+       # top of round
+       movdqa  %xmm9,  %xmm1   # 1 : i
+       pandn   %xmm0,  %xmm1   # 1 = i<<4
+       psrld   $4,     %xmm1   # 1 = i
+       pand    %xmm9,  %xmm0   # 0 = k
+       movdqa  %xmm11, %xmm2   # 2 : a/k
+       pshufb  %xmm0,  %xmm2   # 2 = a/k
+       pxor    %xmm1,  %xmm0   # 0 = j
+       movdqa  %xmm10, %xmm3   # 3 : 1/i
+       pshufb  %xmm1,  %xmm3   # 3 = 1/i
+       pxor    %xmm2,  %xmm3   # 3 = iak = 1/i + a/k
+       movdqa  %xmm10, %xmm4   # 4 : 1/j
+       pshufb  %xmm0,  %xmm4   # 4 = 1/j
+       pxor    %xmm2,  %xmm4   # 4 = jak = 1/j + a/k
+       movdqa  %xmm10, %xmm2   # 2 : 1/iak
+       pshufb  %xmm3,  %xmm2   # 2 = 1/iak
+       pxor    %xmm0,  %xmm2   # 2 = io
+       movdqa  %xmm10, %xmm3   # 3 : 1/jak
+       pshufb  %xmm4,  %xmm3   # 3 = 1/jak
+       pxor    %xmm1,  %xmm3   # 3 = jo
+       jnz     .Laes_loop
+
+       # middle of last round
+       movdqa  .Lk_sbo(%rcx), %xmm4    # 3 : sbou
+       pshufb  %xmm2,  %xmm4   # 4 = sbou
+       pxor    (%rdx), %xmm4   # 4 = sb1u + k
+       movdqa  .Lk_sbo+16(%rcx), %xmm0 # 0 : sbot
+       pshufb  %xmm3,  %xmm0   # 0 = sb1t
+       pxor    %xmm4,  %xmm0   # 0 = A
+       pshufb  .Lk_sr(%rsi,%rcx), %xmm0
+       EXIT_SYSV_FUNC
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _aes_encrypt_core,.-_aes_encrypt_core)
+
+##
+##  Decryption core
+##
+##  Same API as encryption core.
+##
+.align 16
+.globl _gcry_aes_ssse3_decrypt_core
+ELF(.type _gcry_aes_ssse3_decrypt_core,@function)
+_gcry_aes_ssse3_decrypt_core:
+_aes_decrypt_core:
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+       mov     %rdi,   %rdx
+       lea     .Laes_consts(%rip), %rcx
+       subl    $1,     %esi
+       movl    %esi,   %eax
+       shll    $4,     %esi
+       xorl    $48,    %esi
+       andl    $48,    %esi
+       movdqa  .Lk_dipt   (%rcx), %xmm2 # iptlo
+       movdqa  %xmm9,  %xmm1
+       pandn   %xmm0,  %xmm1
+       psrld   $4,     %xmm1
+       pand    %xmm9,  %xmm0
+       pshufb  %xmm0,  %xmm2
+       movdqa  .Lk_dipt+16(%rcx), %xmm0 # ipthi
+       pshufb  %xmm1,  %xmm0
+       pxor    (%rdx), %xmm2
+       pxor    %xmm2,  %xmm0
+       movdqa  .Lk_mc_forward+48(%rcx), %xmm5
+       lea     16(%rdx), %rdx
+       neg     %rax
+       jmp     .Laes_dec_entry
+
+.align 16
+.Laes_dec_loop:
+##
+##  Inverse mix columns
+##
+       movdqa  %xmm13, %xmm4           # 4 : sb9u
+       pshufb  %xmm2,  %xmm4           # 4 = sb9u
+       pxor    (%rdx), %xmm4
+       movdqa  %xmm12, %xmm0           # 0 : sb9t
+       pshufb  %xmm3,  %xmm0           # 0 = sb9t
+       movdqa  .Lk_dsbd+16(%rcx),%xmm1 # 1 : sbdt
+       pxor    %xmm4,  %xmm0           # 0 = ch
+       lea     16(%rdx), %rdx          # next round key
+
+       pshufb  %xmm5,  %xmm0           # MC ch
+       movdqa  %xmm15, %xmm4           # 4 : sbdu
+       pshufb  %xmm2,  %xmm4           # 4 = sbdu
+       pxor    %xmm0,  %xmm4           # 4 = ch
+       pshufb  %xmm3,  %xmm1           # 1 = sbdt
+       pxor    %xmm4,  %xmm1           # 1 = ch
+
+       pshufb  %xmm5,  %xmm1           # MC ch
+       movdqa  %xmm14, %xmm4           # 4 : sbbu
+       pshufb  %xmm2,  %xmm4           # 4 = sbbu
+       inc     %rax                    # nr--
+       pxor    %xmm1,  %xmm4           # 4 = ch
+       movdqa  .Lk_dsbb+16(%rcx),%xmm0 # 0 : sbbt
+       pshufb  %xmm3,  %xmm0           # 0 = sbbt
+       pxor    %xmm4,  %xmm0           # 0 = ch
+
+       pshufb  %xmm5,  %xmm0           # MC ch
+       movdqa  %xmm8,  %xmm4           # 4 : sbeu
+       pshufb  %xmm2,  %xmm4           # 4 = sbeu
+       pshufd  $0x93,  %xmm5,  %xmm5
+       pxor    %xmm0,  %xmm4           # 4 = ch
+       movdqa  .Lk_dsbe+16(%rcx),%xmm0 # 0 : sbet
+       pshufb  %xmm3,  %xmm0           # 0 = sbet
+       pxor    %xmm4,  %xmm0           # 0 = ch
+
+.Laes_dec_entry:
+       # top of round
+       movdqa  %xmm9,  %xmm1   # 1 : i
+       pandn   %xmm0,  %xmm1   # 1 = i<<4
+       psrld   $4,     %xmm1   # 1 = i
+       pand    %xmm9,  %xmm0   # 0 = k
+       movdqa  %xmm11, %xmm2   # 2 : a/k
+       pshufb  %xmm0,  %xmm2   # 2 = a/k
+       pxor    %xmm1,  %xmm0   # 0 = j
+       movdqa  %xmm10, %xmm3   # 3 : 1/i
+       pshufb  %xmm1,  %xmm3   # 3 = 1/i
+       pxor    %xmm2,  %xmm3   # 3 = iak = 1/i + a/k
+       movdqa  %xmm10, %xmm4   # 4 : 1/j
+       pshufb  %xmm0,  %xmm4   # 4 = 1/j
+       pxor    %xmm2,  %xmm4   # 4 = jak = 1/j + a/k
+       movdqa  %xmm10, %xmm2   # 2 : 1/iak
+       pshufb  %xmm3,  %xmm2   # 2 = 1/iak
+       pxor    %xmm0,  %xmm2   # 2 = io
+       movdqa  %xmm10, %xmm3   # 3 : 1/jak
+       pshufb  %xmm4,  %xmm3   # 3 = 1/jak
+       pxor    %xmm1,  %xmm3   # 3 = jo
+       jnz     .Laes_dec_loop
+
+       # middle of last round
+       movdqa  .Lk_dsbo(%rcx), %xmm4           # 3 : sbou
+       pshufb  %xmm2,  %xmm4   # 4 = sbou
+       pxor    (%rdx), %xmm4   # 4 = sb1u + k
+       movdqa  .Lk_dsbo+16(%rcx), %xmm0        # 0 : sbot
+       pshufb  %xmm3,  %xmm0   # 0 = sb1t
+       pxor    %xmm4,  %xmm0   # 0 = A
+       pshufb  .Lk_sr(%rsi,%rcx), %xmm0
+       EXIT_SYSV_FUNC
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _aes_decrypt_core,.-_aes_decrypt_core)
+
+########################################################
+##                                                    ##
+##                  AES key schedule                  ##
+##                                                    ##
+########################################################
+
+.align 16
+.globl _gcry_aes_ssse3_schedule_core
+ELF(.type _gcry_aes_ssse3_schedule_core,@function)
+_gcry_aes_ssse3_schedule_core:
+_aes_schedule_core:
+       # rdi = key
+       # rsi = size in bits
+       # rdx = buffer
+       # rcx = direction.  0=encrypt, 1=decrypt
+       # r8 = rotoffs
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_5
+
+       # load the tables
+       lea     .Laes_consts(%rip), %r10
+       movdqa            (%r10), %xmm9  # 0F
+       movdqa  .Lk_inv   (%r10), %xmm10 # inv
+       movdqa  .Lk_inv+16(%r10), %xmm11 # inva
+       movdqa  .Lk_sb1   (%r10), %xmm13 # sb1u
+       movdqa  .Lk_sb1+16(%r10), %xmm12 # sb1t
+       movdqa  .Lk_sb2   (%r10), %xmm15 # sb2u
+       movdqa  .Lk_sb2+16(%r10), %xmm14 # sb2t
+
+       movdqa  .Lk_rcon(%r10), %xmm8   # load rcon
+       movdqu  (%rdi), %xmm0           # load key (unaligned)
+
+       # input transform
+       movdqu  %xmm0,  %xmm3
+       lea     .Lk_ipt(%r10), %r11
+       call    .Laes_schedule_transform
+       movdqu  %xmm0,  %xmm7
+
+       test    %rcx,   %rcx
+       jnz     .Laes_schedule_am_decrypting
+
+       # encrypting, output zeroth round key after transform
+       movdqa  %xmm0,  (%rdx)
+       jmp     .Laes_schedule_go
+
+.Laes_schedule_am_decrypting:
+       # decrypting, output zeroth round key after shiftrows
+       pshufb  .Lk_sr(%r8,%r10),%xmm3
+       movdqa  %xmm3,  (%rdx)
+       xor     $48,    %r8
+
+.Laes_schedule_go:
+       cmp     $192,   %rsi
+       je      .Laes_schedule_192
+       cmp     $256,   %rsi
+       je      .Laes_schedule_256
+       # 128: fall though
+
+##
+##  .Laes_schedule_128
+##
+##  128-bit specific part of key schedule.
+##
+##  This schedule is really simple, because all its parts
+##  are accomplished by the subroutines.
+##
+.Laes_schedule_128:
+       mov     $10, %rsi
+
+.Laes_schedule_128_L:
+       call    .Laes_schedule_round
+       dec     %rsi
+       jz      .Laes_schedule_mangle_last
+       call    .Laes_schedule_mangle   # write output
+       jmp     .Laes_schedule_128_L
+
+##
+##  .Laes_schedule_192
+##
+##  192-bit specific part of key schedule.
+##
+##  The main body of this schedule is the same as the 128-bit
+##  schedule, but with more smearing.  The long, high side is
+##  stored in %xmm7 as before, and the short, low side is in
+##  the high bits of %xmm6.
+##
+##  This schedule is somewhat nastier, however, because each
+##  round produces 192 bits of key material, or 1.5 round keys.
+##  Therefore, on each cycle we do 2 rounds and produce 3 round
+##  keys.
+##
+.Laes_schedule_192:
+       movdqu  8(%rdi),%xmm0           # load key part 2 (very unaligned)
+       call    .Laes_schedule_transform        # input transform
+       pshufd  $0x0E,  %xmm0,  %xmm6
+       pslldq  $8,     %xmm6           # clobber low side with zeros
+       mov     $4,     %rsi
+
+.Laes_schedule_192_L:
+       call    .Laes_schedule_round
+       palignr $8,%xmm6,%xmm0
+       call    .Laes_schedule_mangle   # save key n
+       call    .Laes_schedule_192_smear
+       call    .Laes_schedule_mangle   # save key n+1
+       call    .Laes_schedule_round
+       dec     %rsi
+       jz      .Laes_schedule_mangle_last
+       call    .Laes_schedule_mangle   # save key n+2
+       call    .Laes_schedule_192_smear
+       jmp     .Laes_schedule_192_L
+
+##
+##  .Laes_schedule_192_smear
+##
+##  Smear the short, low side in the 192-bit key schedule.
+##
+##  Inputs:
+##    %xmm7: high side, b  a  x  y
+##    %xmm6:  low side, d  c  0  0
+##    %xmm13: 0
+##
+##  Outputs:
+##    %xmm6: b+c+d  b+c  0  0
+##    %xmm0: b+c+d  b+c  b  a
+##
+.Laes_schedule_192_smear:
+       pshufd  $0x80,  %xmm6,  %xmm0   # d c 0 0 -> c 0 0 0
+       pxor    %xmm0,  %xmm6           # -> c+d c 0 0
+       pshufd  $0xFE,  %xmm7,  %xmm0   # b a _ _ -> b b b a
+       pxor    %xmm6,  %xmm0           # -> b+c+d b+c b a
+       pshufd  $0x0E,  %xmm0,  %xmm6
+       pslldq  $8,     %xmm6           # clobber low side with zeros
+       ret_spec_stop
+
+##
+##  .Laes_schedule_256
+##
+##  256-bit specific part of key schedule.
+##
+##  The structure here is very similar to the 128-bit
+##  schedule, but with an additional 'low side' in
+##  %xmm6.  The low side's rounds are the same as the
+##  high side's, except no rcon and no rotation.
+##
+.Laes_schedule_256:
+       movdqu  16(%rdi),%xmm0          # load key part 2 (unaligned)
+       call    .Laes_schedule_transform        # input transform
+       mov     $7, %rsi
+
+.Laes_schedule_256_L:
+       call    .Laes_schedule_mangle   # output low result
+       movdqa  %xmm0,  %xmm6           # save cur_lo in xmm6
+
+       # high round
+       call    .Laes_schedule_round
+       dec     %rsi
+       jz      .Laes_schedule_mangle_last
+       call    .Laes_schedule_mangle
+
+       # low round. swap xmm7 and xmm6
+       pshufd  $0xFF,  %xmm0,  %xmm0
+       movdqa  %xmm7,  %xmm5
+       movdqa  %xmm6,  %xmm7
+       call    .Laes_schedule_low_round
+       movdqa  %xmm5,  %xmm7
+
+       jmp     .Laes_schedule_256_L
+
+##
+##  .Laes_schedule_round
+##
+##  Runs one main round of the key schedule on %xmm0, %xmm7
+##
+##  Specifically, runs subbytes on the high dword of %xmm0
+##  then rotates it by one byte and xors into the low dword of
+##  %xmm7.
+##
+##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
+##  next rcon.
+##
+##  Smears the dwords of %xmm7 by xoring the low into the
+##  second low, result into third, result into highest.
+##
+##  Returns results in %xmm7 = %xmm0.
+##  Clobbers %xmm1-%xmm4, %r11.
+##
+.Laes_schedule_round:
+       # extract rcon from xmm8
+       pxor    %xmm1,  %xmm1
+       palignr $15,    %xmm8,  %xmm1
+       palignr $15,    %xmm8,  %xmm8
+       pxor    %xmm1,  %xmm7
+
+       # rotate
+       pshufd  $0xFF,  %xmm0,  %xmm0
+       palignr $1,     %xmm0,  %xmm0
+
+       # fall through...
+
+       # low round: same as high round, but no rotation and no rcon.
+.Laes_schedule_low_round:
+       # smear xmm7
+       movdqa  %xmm7,  %xmm1
+       pslldq  $4,     %xmm7
+       pxor    %xmm1,  %xmm7
+       movdqa  %xmm7,  %xmm1
+       pslldq  $8,     %xmm7
+       pxor    %xmm1,  %xmm7
+       pxor    .Lk_s63(%r10), %xmm7
+
+       # subbytes
+       movdqa  %xmm9,  %xmm1
+       pandn   %xmm0,  %xmm1
+       psrld   $4,     %xmm1           # 1 = i
+       pand    %xmm9,  %xmm0           # 0 = k
+       movdqa  %xmm11, %xmm2           # 2 : a/k
+       pshufb  %xmm0,  %xmm2           # 2 = a/k
+       pxor    %xmm1,  %xmm0           # 0 = j
+       movdqa  %xmm10, %xmm3           # 3 : 1/i
+       pshufb  %xmm1,  %xmm3           # 3 = 1/i
+       pxor    %xmm2,  %xmm3           # 3 = iak = 1/i + a/k
+       movdqa  %xmm10, %xmm4           # 4 : 1/j
+       pshufb  %xmm0,  %xmm4           # 4 = 1/j
+       pxor    %xmm2,  %xmm4           # 4 = jak = 1/j + a/k
+       movdqa  %xmm10, %xmm2           # 2 : 1/iak
+       pshufb  %xmm3,  %xmm2           # 2 = 1/iak
+       pxor    %xmm0,  %xmm2           # 2 = io
+       movdqa  %xmm10, %xmm3           # 3 : 1/jak
+       pshufb  %xmm4,  %xmm3           # 3 = 1/jak
+       pxor    %xmm1,  %xmm3           # 3 = jo
+       movdqa  .Lk_sb1(%r10), %xmm4    # 4 : sbou
+       pshufb  %xmm2,  %xmm4           # 4 = sbou
+       movdqa  .Lk_sb1+16(%r10), %xmm0 # 0 : sbot
+       pshufb  %xmm3,  %xmm0           # 0 = sb1t
+       pxor    %xmm4,  %xmm0           # 0 = sbox output
+
+       # add in smeared stuff
+       pxor    %xmm7,  %xmm0
+       movdqa  %xmm0,  %xmm7
+       ret_spec_stop
+
+##
+##  .Laes_schedule_transform
+##
+##  Linear-transform %xmm0 according to tables at (%r11)
+##
+##  Requires that %xmm9 = 0x0F0F... as in preheat
+##  Output in %xmm0
+##  Clobbers %xmm1, %xmm2
+##
+.Laes_schedule_transform:
+       movdqa  %xmm9,  %xmm1
+       pandn   %xmm0,  %xmm1
+       psrld   $4,     %xmm1
+       pand    %xmm9,  %xmm0
+       movdqa  (%r11), %xmm2   # lo
+       pshufb  %xmm0,  %xmm2
+       movdqa  16(%r11), %xmm0 # hi
+       pshufb  %xmm1,  %xmm0
+       pxor    %xmm2,  %xmm0
+       ret_spec_stop
+
+##
+##  .Laes_schedule_mangle
+##
+##  Mangle xmm0 from (basis-transformed) standard version
+##  to our version.
+##
+##  On encrypt,
+##    xor with 0x63
+##    multiply by circulant 0,1,1,1
+##    apply shiftrows transform
+##
+##  On decrypt,
+##    xor with 0x63
+##    multiply by 'inverse mixcolumns' circulant E,B,D,9
+##    deskew
+##    apply shiftrows transform
+##
+##
+##  Writes out to (%rdx), and increments or decrements it
+##  Keeps track of round number mod 4 in %r8
+##  Preserves xmm0
+##  Clobbers xmm1-xmm5
+##
+.Laes_schedule_mangle:
+       movdqa  %xmm0,  %xmm4   # save xmm0 for later
+       movdqa  .Lk_mc_forward(%r10),%xmm5
+       test    %rcx,   %rcx
+       jnz     .Laes_schedule_mangle_dec
+
+       # encrypting
+       add     $16,    %rdx
+       pxor    .Lk_s63(%r10),%xmm4
+       pshufb  %xmm5,  %xmm4
+       movdqa  %xmm4,  %xmm3
+       pshufb  %xmm5,  %xmm4
+       pxor    %xmm4,  %xmm3
+       pshufb  %xmm5,  %xmm4
+       pxor    %xmm4,  %xmm3
+
+       jmp     .Laes_schedule_mangle_both
+
+.Laes_schedule_mangle_dec:
+       lea     .Lk_dks_1(%r10), %r11   # first table: *9
+       call    .Laes_schedule_transform
+       movdqa  %xmm0,  %xmm3
+       pshufb  %xmm5,  %xmm3
+
+       add     $32,    %r11            # next table:  *B
+       call    .Laes_schedule_transform
+       pxor    %xmm0,  %xmm3
+       pshufb  %xmm5,  %xmm3
+
+       add     $32,    %r11            # next table:  *D
+       call    .Laes_schedule_transform
+       pxor    %xmm0,  %xmm3
+       pshufb  %xmm5,  %xmm3
+
+       add     $32,    %r11            # next table:  *E
+       call    .Laes_schedule_transform
+       pxor    %xmm0,  %xmm3
+       pshufb  %xmm5,  %xmm3
+
+       movdqa  %xmm4,  %xmm0           # restore %xmm0
+       add     $-16,   %rdx
+
+.Laes_schedule_mangle_both:
+       pshufb  .Lk_sr(%r8,%r10),%xmm3
+       add     $-16,   %r8
+       and     $48,    %r8
+       movdqa  %xmm3,  (%rdx)
+       ret_spec_stop
+
+##
+##  .Laes_schedule_mangle_last
+##
+##  Mangler for last round of key schedule
+##  Mangles %xmm0
+##    when encrypting, outputs out(%xmm0) ^ 63
+##    when decrypting, outputs unskew(%xmm0)
+##
+##  Always called right before return... jumps to cleanup and exits
+##
+.Laes_schedule_mangle_last:
+       # schedule last round key from xmm0
+       lea     .Lk_deskew(%r10),%r11   # prepare to deskew
+       test    %rcx,   %rcx
+       jnz     .Laes_schedule_mangle_last_dec
+
+       # encrypting
+       pshufb  .Lk_sr(%r8,%r10),%xmm0  # output permute
+       lea     .Lk_opt(%r10),  %r11    # prepare to output transform
+       add     $32,    %rdx
+
+.Laes_schedule_mangle_last_dec:
+       add     $-16,   %rdx
+       pxor    .Lk_s63(%r10),  %xmm0
+       call    .Laes_schedule_transform # output transform
+       movdqa  %xmm0,  (%rdx)          # save last key
+
+       #_aes_cleanup
+       pxor    %xmm0,  %xmm0
+       pxor    %xmm1,  %xmm1
+       pxor    %xmm2,  %xmm2
+       pxor    %xmm3,  %xmm3
+       pxor    %xmm4,  %xmm4
+       pxor    %xmm5,  %xmm5
+       pxor    %xmm6,  %xmm6
+       pxor    %xmm7,  %xmm7
+       pxor    %xmm8,  %xmm8
+       EXIT_SYSV_FUNC
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_aes_ssse3_schedule_core,.-_gcry_aes_ssse3_schedule_core)
+
+########################################################
+##                                                    ##
+##                     Constants                      ##
+##                                                    ##
+########################################################
+
+.align 16
+ELF(.type _aes_consts,@object)
+.Laes_consts:
+_aes_consts:
+       # s0F
+       .Lk_s0F = .-.Laes_consts
+       .quad   0x0F0F0F0F0F0F0F0F
+       .quad   0x0F0F0F0F0F0F0F0F
+
+       # input transform (lo, hi)
+       .Lk_ipt = .-.Laes_consts
+       .quad   0xC2B2E8985A2A7000
+       .quad   0xCABAE09052227808
+       .quad   0x4C01307D317C4D00
+       .quad   0xCD80B1FCB0FDCC81
+
+       # inv, inva
+       .Lk_inv = .-.Laes_consts
+       .quad   0x0E05060F0D080180
+       .quad   0x040703090A0B0C02
+       .quad   0x01040A060F0B0780
+       .quad   0x030D0E0C02050809
+
+       # sb1u, sb1t
+       .Lk_sb1 = .-.Laes_consts
+       .quad   0xB19BE18FCB503E00
+       .quad   0xA5DF7A6E142AF544
+       .quad   0x3618D415FAE22300
+       .quad   0x3BF7CCC10D2ED9EF
+
+
+       # sb2u, sb2t
+       .Lk_sb2 = .-.Laes_consts
+       .quad   0xE27A93C60B712400
+       .quad   0x5EB7E955BC982FCD
+       .quad   0x69EB88400AE12900
+       .quad   0xC2A163C8AB82234A
+
+       # sbou, sbot
+       .Lk_sbo = .-.Laes_consts
+       .quad   0xD0D26D176FBDC700
+       .quad   0x15AABF7AC502A878
+       .quad   0xCFE474A55FBB6A00
+       .quad   0x8E1E90D1412B35FA
+
+       # mc_forward
+       .Lk_mc_forward = .-.Laes_consts
+       .quad   0x0407060500030201
+       .quad   0x0C0F0E0D080B0A09
+       .quad   0x080B0A0904070605
+       .quad   0x000302010C0F0E0D
+       .quad   0x0C0F0E0D080B0A09
+       .quad   0x0407060500030201
+       .quad   0x000302010C0F0E0D
+       .quad   0x080B0A0904070605
+
+       # mc_backward
+       .Lk_mc_backward = .-.Laes_consts
+       .quad   0x0605040702010003
+       .quad   0x0E0D0C0F0A09080B
+       .quad   0x020100030E0D0C0F
+       .quad   0x0A09080B06050407
+       .quad   0x0E0D0C0F0A09080B
+       .quad   0x0605040702010003
+       .quad   0x0A09080B06050407
+       .quad   0x020100030E0D0C0F
+
+       # sr
+       .Lk_sr = .-.Laes_consts
+       .quad   0x0706050403020100
+       .quad   0x0F0E0D0C0B0A0908
+       .quad   0x030E09040F0A0500
+       .quad   0x0B06010C07020D08
+       .quad   0x0F060D040B020900
+       .quad   0x070E050C030A0108
+       .quad   0x0B0E0104070A0D00
+       .quad   0x0306090C0F020508
+
+       # rcon
+       .Lk_rcon = .-.Laes_consts
+       .quad   0x1F8391B9AF9DEEB6
+       .quad   0x702A98084D7C7D81
+
+       # s63: all equal to 0x63 transformed
+       .Lk_s63 = .-.Laes_consts
+       .quad   0x5B5B5B5B5B5B5B5B
+       .quad   0x5B5B5B5B5B5B5B5B
+
+       # output transform
+       .Lk_opt = .-.Laes_consts
+       .quad   0xFF9F4929D6B66000
+       .quad   0xF7974121DEBE6808
+       .quad   0x01EDBD5150BCEC00
+       .quad   0xE10D5DB1B05C0CE0
+
+       # deskew tables: inverts the sbox's 'skew'
+       .Lk_deskew = .-.Laes_consts
+       .quad   0x07E4A34047A4E300
+       .quad   0x1DFEB95A5DBEF91A
+       .quad   0x5F36B5DC83EA6900
+       .quad   0x2841C2ABF49D1E77
+
+##
+##  Decryption stuff
+##  Key schedule constants
+##
+       # decryption key schedule: x -> invskew x*9
+       .Lk_dks_1 = .-.Laes_consts
+       .quad   0xB6116FC87ED9A700
+       .quad   0x4AED933482255BFC
+       .quad   0x4576516227143300
+       .quad   0x8BB89FACE9DAFDCE
+
+       # decryption key schedule: invskew x*9 -> invskew x*D
+       .Lk_dks_2 = .-.Laes_consts
+       .quad   0x27438FEBCCA86400
+       .quad   0x4622EE8AADC90561
+       .quad   0x815C13CE4F92DD00
+       .quad   0x73AEE13CBD602FF2
+
+       # decryption key schedule: invskew x*D -> invskew x*B
+       .Lk_dks_3 = .-.Laes_consts
+       .quad   0x03C4C50201C6C700
+       .quad   0xF83F3EF9FA3D3CFB
+       .quad   0xEE1921D638CFF700
+       .quad   0xA5526A9D7384BC4B
+
+       # decryption key schedule: invskew x*B -> invskew x*E + 0x63
+       .Lk_dks_4 = .-.Laes_consts
+       .quad   0xE3C390B053732000
+       .quad   0xA080D3F310306343
+       .quad   0xA0CA214B036982E8
+       .quad   0x2F45AEC48CE60D67
+
+##
+##  Decryption stuff
+##  Round function constants
+##
+       # decryption input transform
+       .Lk_dipt = .-.Laes_consts
+       .quad   0x0F505B040B545F00
+       .quad   0x154A411E114E451A
+       .quad   0x86E383E660056500
+       .quad   0x12771772F491F194
+
+       # decryption sbox output *9*u, *9*t
+       .Lk_dsb9 = .-.Laes_consts
+       .quad   0x851C03539A86D600
+       .quad   0xCAD51F504F994CC9
+       .quad   0xC03B1789ECD74900
+       .quad   0x725E2C9EB2FBA565
+
+       # decryption sbox output *D*u, *D*t
+       .Lk_dsbd = .-.Laes_consts
+       .quad   0x7D57CCDFE6B1A200
+       .quad   0xF56E9B13882A4439
+       .quad   0x3CE2FAF724C6CB00
+       .quad   0x2931180D15DEEFD3
+
+       # decryption sbox output *B*u, *B*t
+       .Lk_dsbb = .-.Laes_consts
+       .quad   0xD022649296B44200
+       .quad   0x602646F6B0F2D404
+       .quad   0xC19498A6CD596700
+       .quad   0xF3FF0C3E3255AA6B
+
+       # decryption sbox output *E*u, *E*t
+       .Lk_dsbe = .-.Laes_consts
+       .quad   0x46F2929626D4D000
+       .quad   0x2242600464B4F6B0
+       .quad   0x0C55A6CDFFAAC100
+       .quad   0x9467F36B98593E32
+
+       # decryption sbox final output
+       .Lk_dsbo = .-.Laes_consts
+       .quad   0x1387EA537EF94000
+       .quad   0xC7AA6DB9D4943E2D
+       .quad   0x12D7560F93441D00
+       .quad   0xCA4B8159D8C58E9C
+ELF(.size _aes_consts,.-_aes_consts)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64.c
new file mode 100644
index 000000000..b07238531
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-ssse3-amd64.c
@@ -0,0 +1,743 @@
+/* SSSE3 vector permutation AES for Libgcrypt
+ * Copyright (C) 2014-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The code is based on the public domain library libvpaes version 0.5
+ * available at http://crypto.stanford.edu/vpaes/ and which carries
+ * this notice:
+ *
+ *     libvpaes: constant-time SSSE3 AES encryption and decryption.
+ *     version 0.5
+ *
+ *     By Mike Hamburg, Stanford University, 2009.  Public domain.
+ *     I wrote essentially all of this code.  I did not write the test
+ *     vectors; they are the NIST known answer tests.  I hereby release all
+ *     the code and documentation here that I wrote into the public domain.
+ *
+ *     This is an implementation of AES following my paper,
+ *       "Accelerating AES with Vector Permute Instructions"
+ *       CHES 2009; http://shiftleft.org/papers/vector_aes/
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h> /* for memcmp() */
+
+#include "types.h"  /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_SSSE3
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+#  pragma GCC target("no-sse")
+#endif
+#if __clang__
+#  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = 
function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR        NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE
+
+
+/* Copy of ocb_get_l needed here as GCC is unable to inline ocb_get_l
+   because of 'pragma target'. */
+static ASM_FUNC_ATTR_INLINE const unsigned char *
+aes_ocb_get_l (gcry_cipher_hd_t c, u64 n)
+{
+  unsigned long ntz;
+
+  /* Assumes that N != 0. */
+  asm ("rep;bsfl %k[low], %k[ntz]\n\t"
+        : [ntz] "=r" (ntz)
+        : [low] "r" ((unsigned long)n)
+        : "cc");
+
+  return c->u_mode.ocb.L[ntz];
+}
+
+
+/* Assembly functions in rijndael-ssse3-amd64-asm.S. Note that these
+   have custom calling convention (additional XMM parameters). */
+extern void _gcry_aes_ssse3_enc_preload(void);
+extern void _gcry_aes_ssse3_dec_preload(void);
+extern void _gcry_aes_ssse3_schedule_core(const void *key, u64 keybits,
+                                         void *buffer, u64 decrypt,
+                                         u64 rotoffs);
+extern void _gcry_aes_ssse3_encrypt_core(const void *key, u64 nrounds);
+extern void _gcry_aes_ssse3_decrypt_core(const void *key, u64 nrounds);
+
+
+
+/* Two macros to be called prior and after the use of SSSE3
+   instructions.  There should be no external function calls between
+   the use of these macros.  There purpose is to make sure that the
+   SSE registers are cleared and won't reveal any information about
+   the key or the data.  */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define SSSE3_STATE_SIZE (16 * 10)
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define vpaes_ssse3_prepare() \
+    asm volatile ("movdqu %%xmm6,  0*16(%0)\n\t" \
+                  "movdqu %%xmm7,  1*16(%0)\n\t" \
+                  "movdqu %%xmm8,  2*16(%0)\n\t" \
+                  "movdqu %%xmm9,  3*16(%0)\n\t" \
+                  "movdqu %%xmm10, 4*16(%0)\n\t" \
+                  "movdqu %%xmm11, 5*16(%0)\n\t" \
+                  "movdqu %%xmm12, 6*16(%0)\n\t" \
+                  "movdqu %%xmm13, 7*16(%0)\n\t" \
+                  "movdqu %%xmm14, 8*16(%0)\n\t" \
+                  "movdqu %%xmm15, 9*16(%0)\n\t" \
+                  : \
+                  : "r" (ssse3_state) \
+                  : "memory" )
+# define vpaes_ssse3_cleanup() \
+    asm volatile ("pxor        %%xmm0,  %%xmm0 \n\t" \
+                  "pxor        %%xmm1,  %%xmm1 \n\t" \
+                  "pxor        %%xmm2,  %%xmm2 \n\t" \
+                  "pxor        %%xmm3,  %%xmm3 \n\t" \
+                  "pxor        %%xmm4,  %%xmm4 \n\t" \
+                  "pxor        %%xmm5,  %%xmm5 \n\t" \
+                  "movdqu 0*16(%0), %%xmm6 \n\t" \
+                  "movdqu 1*16(%0), %%xmm7 \n\t" \
+                  "movdqu 2*16(%0), %%xmm8 \n\t" \
+                  "movdqu 3*16(%0), %%xmm9 \n\t" \
+                  "movdqu 4*16(%0), %%xmm10 \n\t" \
+                  "movdqu 5*16(%0), %%xmm11 \n\t" \
+                  "movdqu 6*16(%0), %%xmm12 \n\t" \
+                  "movdqu 7*16(%0), %%xmm13 \n\t" \
+                  "movdqu 8*16(%0), %%xmm14 \n\t" \
+                  "movdqu 9*16(%0), %%xmm15 \n\t" \
+                  : \
+                  : "r" (ssse3_state) \
+                  : "memory" )
+#else
+# define SSSE3_STATE_SIZE 1
+# define vpaes_ssse3_prepare() (void)ssse3_state
+# define vpaes_ssse3_cleanup() \
+    asm volatile ("pxor        %%xmm0,  %%xmm0 \n\t" \
+                  "pxor        %%xmm1,  %%xmm1 \n\t" \
+                  "pxor        %%xmm2,  %%xmm2 \n\t" \
+                  "pxor        %%xmm3,  %%xmm3 \n\t" \
+                  "pxor        %%xmm4,  %%xmm4 \n\t" \
+                  "pxor        %%xmm5,  %%xmm5 \n\t" \
+                  "pxor        %%xmm6,  %%xmm6 \n\t" \
+                  "pxor        %%xmm7,  %%xmm7 \n\t" \
+                  "pxor        %%xmm8,  %%xmm8 \n\t" \
+                  ::: "memory" )
+#endif
+
+#define vpaes_ssse3_prepare_enc() \
+    vpaes_ssse3_prepare(); \
+    _gcry_aes_ssse3_enc_preload();
+
+#define vpaes_ssse3_prepare_dec() \
+    vpaes_ssse3_prepare(); \
+    _gcry_aes_ssse3_dec_preload();
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_do_setkey (RIJNDAEL_context *ctx, const byte *key)
+{
+  unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare();
+
+  _gcry_aes_ssse3_schedule_core(key, keybits, &ctx->keyschenc32[0][0], 0, 48);
+
+  /* Save key for setting up decryption. */
+  if (keybits > 192)
+    asm volatile ("movdqu   (%[src]), %%xmm0\n\t"
+                 "movdqu 16(%[src]), %%xmm1\n\t"
+                 "movdqu %%xmm0,   (%[dst])\n\t"
+                 "movdqu %%xmm1, 16(%[dst])\n\t"
+                 : /* No output */
+                 : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key)
+                 : "memory" );
+  else if (keybits == 192)
+    asm volatile ("movdqu   (%[src]), %%xmm0\n\t"
+                 "movq   16(%[src]), %%xmm1\n\t"
+                 "movdqu %%xmm0,   (%[dst])\n\t"
+                 "movq   %%xmm1, 16(%[dst])\n\t"
+                 : /* No output */
+                 : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key)
+                 : "memory" );
+  else
+    asm volatile ("movdqu (%[src]), %%xmm0\n\t"
+                 "movdqu %%xmm0, (%[dst])\n\t"
+                 : /* No output */
+                 : [dst] "r" (&ctx->keyschdec32[0][0]), [src] "r" (key)
+                 : "memory" );
+
+  vpaes_ssse3_cleanup();
+}
+
+
+/* Make a decryption key from an encryption key. */
+static ASM_FUNC_ATTR_INLINE void
+do_ssse3_prepare_decryption (RIJNDAEL_context *ctx,
+                             byte ssse3_state[SSSE3_STATE_SIZE])
+{
+  unsigned int keybits = (ctx->rounds - 10) * 32 + 128;
+
+  vpaes_ssse3_prepare();
+
+  _gcry_aes_ssse3_schedule_core(&ctx->keyschdec32[0][0], keybits,
+                               &ctx->keyschdec32[ctx->rounds][0], 1,
+                               (keybits == 192) ? 0 : 32);
+
+  vpaes_ssse3_cleanup();
+}
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_prepare_decryption (RIJNDAEL_context *ctx)
+{
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  do_ssse3_prepare_decryption(ctx, ssse3_state);
+}
+
+
+/* Encrypt one block using the Intel SSSE3 instructions.  Block is input
+* and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_vpaes_ssse3_enc (const RIJNDAEL_context *ctx, unsigned int nrounds)
+{
+  _gcry_aes_ssse3_encrypt_core(ctx->keyschenc32, nrounds);
+}
+
+
+/* Decrypt one block using the Intel SSSE3 instructions.  Block is input
+* and output through SSE register xmm0. */
+static ASM_FUNC_ATTR_INLINE void
+do_vpaes_ssse3_dec (const RIJNDAEL_context *ctx, unsigned int nrounds)
+{
+  _gcry_aes_ssse3_decrypt_core(ctx->keyschdec32, nrounds);
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+                        const unsigned char *src)
+{
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare_enc ();
+  asm volatile ("movdqu %[src], %%xmm0\n\t"
+                :
+                : [src] "m" (*src)
+                : "memory" );
+  do_vpaes_ssse3_enc (ctx, nrounds);
+  asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+                : [dst] "=m" (*dst)
+                :
+                : "memory" );
+  vpaes_ssse3_cleanup ();
+  return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cfb_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare_enc ();
+
+  asm volatile ("movdqu %[iv], %%xmm0\n\t"
+                : /* No output */
+                : [iv] "m" (*iv)
+                : "memory" );
+
+  for ( ;nblocks; nblocks-- )
+    {
+      do_vpaes_ssse3_enc (ctx, nrounds);
+
+      asm volatile ("movdqu %[inbuf], %%xmm1\n\t"
+                    "pxor %%xmm1, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    : [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm0, %[iv]\n\t"
+                : [iv] "=m" (*iv)
+                :
+                : "memory" );
+
+  vpaes_ssse3_cleanup ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cbc_enc (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks, int cbc_mac)
+{
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare_enc ();
+
+  asm volatile ("movdqu %[iv], %%xmm7\n\t"
+                : /* No output */
+                : [iv] "m" (*iv)
+                : "memory" );
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor %%xmm7, %%xmm0\n\t"
+                    : /* No output */
+                    : [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_vpaes_ssse3_enc (ctx, nrounds);
+
+      asm volatile ("movdqa %%xmm0, %%xmm7\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      if (!cbc_mac)
+        outbuf += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+                : [iv] "=m" (*iv)
+                :
+                : "memory" );
+
+  vpaes_ssse3_cleanup ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_ctr_enc (RIJNDAEL_context *ctx, unsigned char *ctr,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+  u64 ctrlow;
+
+  vpaes_ssse3_prepare_enc ();
+
+  asm volatile ("movdqa %[mask], %%xmm6\n\t" /* Preload mask */
+                "movdqa (%[ctr]), %%xmm7\n\t"  /* Preload CTR */
+                "movq 8(%[ctr]), %q[ctrlow]\n\t"
+                "bswapq %q[ctrlow]\n\t"
+                : [ctrlow] "=r" (ctrlow)
+                : [mask] "m" (*be_mask),
+                  [ctr] "r" (ctr)
+                : "memory", "cc");
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqa %%xmm7, %%xmm0\n\t"     /* xmm0 := CTR (xmm7)  */
+                    "pcmpeqd %%xmm1, %%xmm1\n\t"
+                    "psrldq $8, %%xmm1\n\t"         /* xmm1 = -1 */
+
+                    "pshufb %%xmm6, %%xmm7\n\t"
+                    "psubq  %%xmm1, %%xmm7\n\t"     /* xmm7++ (big endian) */
+
+                    /* detect if 64-bit carry handling is needed */
+                    "incq   %q[ctrlow]\n\t"
+                    "jnz    .Lno_carry%=\n\t"
+
+                    "pslldq $8, %%xmm1\n\t"         /* move lower 64-bit to 
high */
+                    "psubq   %%xmm1, %%xmm7\n\t"    /* add carry to upper 
64bits */
+
+                    ".Lno_carry%=:\n\t"
+
+                    "pshufb %%xmm6, %%xmm7\n\t"
+                    : [ctrlow] "+r" (ctrlow)
+                    :
+                    : "cc", "memory");
+
+      do_vpaes_ssse3_enc (ctx, nrounds);
+
+      asm volatile ("movdqu %[src], %%xmm1\n\t"      /* xmm1 := input   */
+                    "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR ^= input  */
+                    "movdqu %%xmm0, %[dst]"          /* Store EncCTR.    */
+                    : [dst] "=m" (*outbuf)
+                    : [src] "m" (*inbuf)
+                    : "memory");
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm7, %[ctr]\n\t"   /* Update CTR (mem).       */
+                : [ctr] "=m" (*ctr)
+                :
+                : "memory" );
+
+  vpaes_ssse3_cleanup ();
+}
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx, unsigned char *dst,
+                         const unsigned char *src)
+{
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare_dec ();
+  asm volatile ("movdqu %[src], %%xmm0\n\t"
+                :
+                : [src] "m" (*src)
+                : "memory" );
+  do_vpaes_ssse3_dec (ctx, nrounds);
+  asm volatile ("movdqu %%xmm0, %[dst]\n\t"
+                : [dst] "=m" (*dst)
+                :
+                : "memory" );
+  vpaes_ssse3_cleanup ();
+  return 0;
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cfb_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare_enc ();
+
+  asm volatile ("movdqu %[iv], %%xmm0\n\t"
+                : /* No output */
+                : [iv] "m" (*iv)
+                : "memory" );
+
+  for ( ;nblocks; nblocks-- )
+    {
+      do_vpaes_ssse3_enc (ctx, nrounds);
+
+      asm volatile ("movdqa %%xmm0, %%xmm6\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor %%xmm0, %%xmm6\n\t"
+                    "movdqu %%xmm6, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    : [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm0, %[iv]\n\t"
+                : [iv] "=m" (*iv)
+                :
+                : "memory" );
+
+  vpaes_ssse3_cleanup ();
+}
+
+
+void ASM_FUNC_ATTR
+_gcry_aes_ssse3_cbc_dec (RIJNDAEL_context *ctx, unsigned char *iv,
+                         unsigned char *outbuf, const unsigned char *inbuf,
+                         size_t nblocks)
+{
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  if ( !ctx->decryption_prepared )
+    {
+      do_ssse3_prepare_decryption ( ctx, ssse3_state );
+      ctx->decryption_prepared = 1;
+    }
+
+  vpaes_ssse3_prepare_dec ();
+
+  asm volatile ("movdqu %[iv], %%xmm7\n\t"     /* use xmm7 as fast IV storage 
*/
+               : /* No output */
+               : [iv] "m" (*iv)
+               : "memory");
+
+  for ( ;nblocks; nblocks-- )
+    {
+      asm volatile ("movdqu %[inbuf], %%xmm0\n\t"
+                   "movdqa %%xmm0, %%xmm6\n\t"    /* use xmm6 as savebuf */
+                   : /* No output */
+                   : [inbuf] "m" (*inbuf)
+                   : "memory");
+
+      do_vpaes_ssse3_dec (ctx, nrounds);
+
+      asm volatile ("pxor %%xmm7, %%xmm0\n\t"  /* xor IV with output */
+                   "movdqu %%xmm0, %[outbuf]\n\t"
+                   "movdqu %%xmm6, %%xmm7\n\t" /* store savebuf as new IV */
+                   : [outbuf] "=m" (*outbuf)
+                   :
+                   : "memory");
+
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+    }
+
+  asm volatile ("movdqu %%xmm7, %[iv]\n\t"     /* store IV */
+               : /* No output */
+               : [iv] "m" (*iv)
+               : "memory");
+
+  vpaes_ssse3_cleanup ();
+}
+
+
+static void ASM_FUNC_ATTR
+ssse3_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+               const void *inbuf_arg, size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  u64 n = c->u_mode.ocb.data_nblocks;
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare_enc ();
+
+  /* Preload Offset and Checksum */
+  asm volatile ("movdqu %[iv], %%xmm7\n\t"
+                "movdqu %[ctr], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_iv.iv),
+                  [ctr] "m" (*c->u_ctr.ctr)
+                : "memory" );
+
+  for ( ;nblocks; nblocks-- )
+    {
+      const unsigned char *l;
+
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm7\n\t"
+                    "pxor   %%xmm0,   %%xmm6\n\t"
+                    "pxor   %%xmm7,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_vpaes_ssse3_enc (ctx, nrounds);
+
+      asm volatile ("pxor   %%xmm7, %%xmm0\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.data_nblocks = n;
+  asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+                "movdqu %%xmm6, %[ctr]\n\t"
+                : [iv] "=m" (*c->u_iv.iv),
+                  [ctr] "=m" (*c->u_ctr.ctr)
+                :
+                : "memory" );
+
+  vpaes_ssse3_cleanup ();
+}
+
+static void ASM_FUNC_ATTR
+ssse3_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+               const void *inbuf_arg, size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  u64 n = c->u_mode.ocb.data_nblocks;
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  if ( !ctx->decryption_prepared )
+    {
+      do_ssse3_prepare_decryption ( ctx, ssse3_state );
+      ctx->decryption_prepared = 1;
+    }
+
+  vpaes_ssse3_prepare_dec ();
+
+  /* Preload Offset and Checksum */
+  asm volatile ("movdqu %[iv], %%xmm7\n\t"
+                "movdqu %[ctr], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_iv.iv),
+                  [ctr] "m" (*c->u_ctr.ctr)
+                : "memory" );
+
+  for ( ;nblocks; nblocks-- )
+    {
+      const unsigned char *l;
+
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+      /* Checksum_i = Checksum_{i-1} xor P_i  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[inbuf], %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm7\n\t"
+                    "pxor   %%xmm7,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [inbuf] "m" (*inbuf)
+                    : "memory" );
+
+      do_vpaes_ssse3_dec (ctx, nrounds);
+
+      asm volatile ("pxor   %%xmm7, %%xmm0\n\t"
+                    "pxor   %%xmm0, %%xmm6\n\t"
+                    "movdqu %%xmm0, %[outbuf]\n\t"
+                    : [outbuf] "=m" (*outbuf)
+                    :
+                    : "memory" );
+
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.data_nblocks = n;
+  asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+                "movdqu %%xmm6, %[ctr]\n\t"
+                : [iv] "=m" (*c->u_iv.iv),
+                  [ctr] "=m" (*c->u_ctr.ctr)
+                :
+                : "memory" );
+
+  vpaes_ssse3_cleanup ();
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_ssse3_ocb_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+                          const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  if (encrypt)
+    ssse3_ocb_enc(c, outbuf_arg, inbuf_arg, nblocks);
+  else
+    ssse3_ocb_dec(c, outbuf_arg, inbuf_arg, nblocks);
+
+  return 0;
+}
+
+
+size_t ASM_FUNC_ATTR
+_gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                          size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  u64 n = c->u_mode.ocb.aad_nblocks;
+  unsigned int nrounds = ctx->rounds;
+  byte ssse3_state[SSSE3_STATE_SIZE];
+
+  vpaes_ssse3_prepare_enc ();
+
+  /* Preload Offset and Sum */
+  asm volatile ("movdqu %[iv], %%xmm7\n\t"
+                "movdqu %[ctr], %%xmm6\n\t"
+                : /* No output */
+                : [iv] "m" (*c->u_mode.ocb.aad_offset),
+                  [ctr] "m" (*c->u_mode.ocb.aad_sum)
+                : "memory" );
+
+  for ( ;nblocks; nblocks-- )
+    {
+      const unsigned char *l;
+
+      l = aes_ocb_get_l(c, ++n);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      asm volatile ("movdqu %[l],     %%xmm1\n\t"
+                    "movdqu %[abuf],  %%xmm0\n\t"
+                    "pxor   %%xmm1,   %%xmm7\n\t"
+                    "pxor   %%xmm7,   %%xmm0\n\t"
+                    :
+                    : [l] "m" (*l),
+                      [abuf] "m" (*abuf)
+                    : "memory" );
+
+      do_vpaes_ssse3_enc (ctx, nrounds);
+
+      asm volatile ("pxor   %%xmm0,   %%xmm6\n\t"
+                    :
+                    :
+                    : "memory" );
+
+      abuf += BLOCKSIZE;
+    }
+
+  c->u_mode.ocb.aad_nblocks = n;
+  asm volatile ("movdqu %%xmm7, %[iv]\n\t"
+                "movdqu %%xmm6, %[ctr]\n\t"
+                : [iv] "=m" (*c->u_mode.ocb.aad_offset),
+                  [ctr] "=m" (*c->u_mode.ocb.aad_sum)
+                :
+                : "memory" );
+
+  vpaes_ssse3_cleanup ();
+
+  return 0;
+}
+
+#if __clang__
+#  pragma clang attribute pop
+#endif
+
+#endif /* USE_SSSE3 */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-tables.h 
b/grub-core/lib/libgcrypt/cipher/rijndael-tables.h
index b6a5b158c..b54d95939 100644
--- a/grub-core/lib/libgcrypt/cipher/rijndael-tables.h
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-tables.h
@@ -21,1662 +21,203 @@
 /* To keep the actual implementation at a readable size we use this
    include file to define the tables.  */
 
-static const unsigned char S[256] =
+static struct
+{
+  volatile u32 counter_head;
+  u32 cacheline_align[64 / 4 - 1];
+  u32 T[256];
+  volatile u32 counter_tail;
+} enc_tables ATTR_ALIGNED_64 =
   {
-     99, 124, 119, 123, 242, 107, 111, 197,
-     48,   1, 103,  43, 254, 215, 171, 118,
-    202, 130, 201, 125, 250,  89,  71, 240,
-    173, 212, 162, 175, 156, 164, 114, 192,
-    183, 253, 147,  38,  54,  63, 247, 204,
-     52, 165, 229, 241, 113, 216,  49,  21,
-      4, 199,  35, 195,  24, 150,   5, 154,
-      7,  18, 128, 226, 235,  39, 178, 117,
-      9, 131,  44,  26,  27, 110,  90, 160,
-     82,  59, 214, 179,  41, 227,  47, 132,
-     83, 209,   0, 237,  32, 252, 177,  91,
-    106, 203, 190,  57,  74,  76,  88, 207,
-    208, 239, 170, 251,  67,  77,  51, 133,
-     69, 249,   2, 127,  80,  60, 159, 168,
-     81, 163,  64, 143, 146, 157,  56, 245,
-    188, 182, 218,  33,  16, 255, 243, 210,
-    205,  12,  19, 236,  95, 151,  68,  23,
-    196, 167, 126,  61, 100,  93,  25, 115,
-     96, 129,  79, 220,  34,  42, 144, 136,
-     70, 238, 184,  20, 222,  94,  11, 219,
-    224,  50,  58,  10,  73,   6,  36,  92,
-    194, 211, 172,  98, 145, 149, 228, 121,
-    231, 200,  55, 109, 141, 213,  78, 169,
-    108,  86, 244, 234, 101, 122, 174,   8,
-    186, 120,  37,  46,  28, 166, 180, 198,
-    232, 221, 116,  31,  75, 189, 139, 138,
-    112,  62, 181, 102,  72,   3, 246,  14,
-     97,  53,  87, 185, 134, 193,  29, 158,
-    225, 248, 152,  17, 105, 217, 142, 148,
-    155,  30, 135, 233, 206,  85,  40, 223,
-    140, 161, 137,  13, 191, 230,  66, 104,
-     65, 153,  45,  15, 176,  84, 187,  22
+    0,
+    { 0, },
+    {
+      0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6,
+      0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591,
+      0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
+      0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec,
+      0x45caca8f, 0x9d82821f, 0x40c9c989, 0x877d7dfa,
+      0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+      0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45,
+      0xbf9c9c23, 0xf7a4a453, 0x967272e4, 0x5bc0c09b,
+      0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
+      0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83,
+      0x5c343468, 0xf4a5a551, 0x34e5e5d1, 0x08f1f1f9,
+      0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+      0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d,
+      0x28181830, 0xa1969637, 0x0f05050a, 0xb59a9a2f,
+      0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
+      0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea,
+      0x1b090912, 0x9e83831d, 0x742c2c58, 0x2e1a1a34,
+      0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+      0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d,
+      0x7b292952, 0x3ee3e3dd, 0x712f2f5e, 0x97848413,
+      0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
+      0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6,
+      0xbe6a6ad4, 0x46cbcb8d, 0xd9bebe67, 0x4b393972,
+      0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+      0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed,
+      0xc5434386, 0xd74d4d9a, 0x55333366, 0x94858511,
+      0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
+      0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b,
+      0xf35151a2, 0xfea3a35d, 0xc0404080, 0x8a8f8f05,
+      0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+      0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142,
+      0x30101020, 0x1affffe5, 0x0ef3f3fd, 0x6dd2d2bf,
+      0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
+      0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e,
+      0x57c4c493, 0xf2a7a755, 0x827e7efc, 0x473d3d7a,
+      0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+      0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3,
+      0x66222244, 0x7e2a2a54, 0xab90903b, 0x8388880b,
+      0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
+      0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad,
+      0x3be0e0db, 0x56323264, 0x4e3a3a74, 0x1e0a0a14,
+      0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+      0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4,
+      0xa8919139, 0xa4959531, 0x37e4e4d3, 0x8b7979f2,
+      0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
+      0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949,
+      0xb46c6cd8, 0xfa5656ac, 0x07f4f4f3, 0x25eaeacf,
+      0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+      0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c,
+      0x241c1c38, 0xf1a6a657, 0xc7b4b473, 0x51c6c697,
+      0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
+      0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f,
+      0x907070e0, 0x423e3e7c, 0xc4b5b571, 0xaa6666cc,
+      0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+      0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969,
+      0x91868617, 0x58c1c199, 0x271d1d3a, 0xb99e9e27,
+      0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
+      0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433,
+      0xb69b9b2d, 0x221e1e3c, 0x92878715, 0x20e9e9c9,
+      0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+      0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a,
+      0xdabfbf65, 0x31e6e6d7, 0xc6424284, 0xb86868d0,
+      0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
+      0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c
+    },
+    0
   };
 
+#define encT enc_tables.T
 
-static const unsigned char T1[256][4] =
+static struct
+{
+  volatile u32 counter_head;
+  u32 cacheline_align[64 / 4 - 1];
+  u32 T[256];
+  byte inv_sbox[256];
+  volatile u32 counter_tail;
+} dec_tables ATTR_ALIGNED_64 =
   {
-    { 0xc6,0x63,0x63,0xa5 }, { 0xf8,0x7c,0x7c,0x84 },
-    { 0xee,0x77,0x77,0x99 }, { 0xf6,0x7b,0x7b,0x8d },
-    { 0xff,0xf2,0xf2,0x0d }, { 0xd6,0x6b,0x6b,0xbd },
-    { 0xde,0x6f,0x6f,0xb1 }, { 0x91,0xc5,0xc5,0x54 },
-    { 0x60,0x30,0x30,0x50 }, { 0x02,0x01,0x01,0x03 },
-    { 0xce,0x67,0x67,0xa9 }, { 0x56,0x2b,0x2b,0x7d },
-    { 0xe7,0xfe,0xfe,0x19 }, { 0xb5,0xd7,0xd7,0x62 },
-    { 0x4d,0xab,0xab,0xe6 }, { 0xec,0x76,0x76,0x9a },
-    { 0x8f,0xca,0xca,0x45 }, { 0x1f,0x82,0x82,0x9d },
-    { 0x89,0xc9,0xc9,0x40 }, { 0xfa,0x7d,0x7d,0x87 },
-    { 0xef,0xfa,0xfa,0x15 }, { 0xb2,0x59,0x59,0xeb },
-    { 0x8e,0x47,0x47,0xc9 }, { 0xfb,0xf0,0xf0,0x0b },
-    { 0x41,0xad,0xad,0xec }, { 0xb3,0xd4,0xd4,0x67 },
-    { 0x5f,0xa2,0xa2,0xfd }, { 0x45,0xaf,0xaf,0xea },
-    { 0x23,0x9c,0x9c,0xbf }, { 0x53,0xa4,0xa4,0xf7 },
-    { 0xe4,0x72,0x72,0x96 }, { 0x9b,0xc0,0xc0,0x5b },
-    { 0x75,0xb7,0xb7,0xc2 }, { 0xe1,0xfd,0xfd,0x1c },
-    { 0x3d,0x93,0x93,0xae }, { 0x4c,0x26,0x26,0x6a },
-    { 0x6c,0x36,0x36,0x5a }, { 0x7e,0x3f,0x3f,0x41 },
-    { 0xf5,0xf7,0xf7,0x02 }, { 0x83,0xcc,0xcc,0x4f },
-    { 0x68,0x34,0x34,0x5c }, { 0x51,0xa5,0xa5,0xf4 },
-    { 0xd1,0xe5,0xe5,0x34 }, { 0xf9,0xf1,0xf1,0x08 },
-    { 0xe2,0x71,0x71,0x93 }, { 0xab,0xd8,0xd8,0x73 },
-    { 0x62,0x31,0x31,0x53 }, { 0x2a,0x15,0x15,0x3f },
-    { 0x08,0x04,0x04,0x0c }, { 0x95,0xc7,0xc7,0x52 },
-    { 0x46,0x23,0x23,0x65 }, { 0x9d,0xc3,0xc3,0x5e },
-    { 0x30,0x18,0x18,0x28 }, { 0x37,0x96,0x96,0xa1 },
-    { 0x0a,0x05,0x05,0x0f }, { 0x2f,0x9a,0x9a,0xb5 },
-    { 0x0e,0x07,0x07,0x09 }, { 0x24,0x12,0x12,0x36 },
-    { 0x1b,0x80,0x80,0x9b }, { 0xdf,0xe2,0xe2,0x3d },
-    { 0xcd,0xeb,0xeb,0x26 }, { 0x4e,0x27,0x27,0x69 },
-    { 0x7f,0xb2,0xb2,0xcd }, { 0xea,0x75,0x75,0x9f },
-    { 0x12,0x09,0x09,0x1b }, { 0x1d,0x83,0x83,0x9e },
-    { 0x58,0x2c,0x2c,0x74 }, { 0x34,0x1a,0x1a,0x2e },
-    { 0x36,0x1b,0x1b,0x2d }, { 0xdc,0x6e,0x6e,0xb2 },
-    { 0xb4,0x5a,0x5a,0xee }, { 0x5b,0xa0,0xa0,0xfb },
-    { 0xa4,0x52,0x52,0xf6 }, { 0x76,0x3b,0x3b,0x4d },
-    { 0xb7,0xd6,0xd6,0x61 }, { 0x7d,0xb3,0xb3,0xce },
-    { 0x52,0x29,0x29,0x7b }, { 0xdd,0xe3,0xe3,0x3e },
-    { 0x5e,0x2f,0x2f,0x71 }, { 0x13,0x84,0x84,0x97 },
-    { 0xa6,0x53,0x53,0xf5 }, { 0xb9,0xd1,0xd1,0x68 },
-    { 0x00,0x00,0x00,0x00 }, { 0xc1,0xed,0xed,0x2c },
-    { 0x40,0x20,0x20,0x60 }, { 0xe3,0xfc,0xfc,0x1f },
-    { 0x79,0xb1,0xb1,0xc8 }, { 0xb6,0x5b,0x5b,0xed },
-    { 0xd4,0x6a,0x6a,0xbe }, { 0x8d,0xcb,0xcb,0x46 },
-    { 0x67,0xbe,0xbe,0xd9 }, { 0x72,0x39,0x39,0x4b },
-    { 0x94,0x4a,0x4a,0xde }, { 0x98,0x4c,0x4c,0xd4 },
-    { 0xb0,0x58,0x58,0xe8 }, { 0x85,0xcf,0xcf,0x4a },
-    { 0xbb,0xd0,0xd0,0x6b }, { 0xc5,0xef,0xef,0x2a },
-    { 0x4f,0xaa,0xaa,0xe5 }, { 0xed,0xfb,0xfb,0x16 },
-    { 0x86,0x43,0x43,0xc5 }, { 0x9a,0x4d,0x4d,0xd7 },
-    { 0x66,0x33,0x33,0x55 }, { 0x11,0x85,0x85,0x94 },
-    { 0x8a,0x45,0x45,0xcf }, { 0xe9,0xf9,0xf9,0x10 },
-    { 0x04,0x02,0x02,0x06 }, { 0xfe,0x7f,0x7f,0x81 },
-    { 0xa0,0x50,0x50,0xf0 }, { 0x78,0x3c,0x3c,0x44 },
-    { 0x25,0x9f,0x9f,0xba }, { 0x4b,0xa8,0xa8,0xe3 },
-    { 0xa2,0x51,0x51,0xf3 }, { 0x5d,0xa3,0xa3,0xfe },
-    { 0x80,0x40,0x40,0xc0 }, { 0x05,0x8f,0x8f,0x8a },
-    { 0x3f,0x92,0x92,0xad }, { 0x21,0x9d,0x9d,0xbc },
-    { 0x70,0x38,0x38,0x48 }, { 0xf1,0xf5,0xf5,0x04 },
-    { 0x63,0xbc,0xbc,0xdf }, { 0x77,0xb6,0xb6,0xc1 },
-    { 0xaf,0xda,0xda,0x75 }, { 0x42,0x21,0x21,0x63 },
-    { 0x20,0x10,0x10,0x30 }, { 0xe5,0xff,0xff,0x1a },
-    { 0xfd,0xf3,0xf3,0x0e }, { 0xbf,0xd2,0xd2,0x6d },
-    { 0x81,0xcd,0xcd,0x4c }, { 0x18,0x0c,0x0c,0x14 },
-    { 0x26,0x13,0x13,0x35 }, { 0xc3,0xec,0xec,0x2f },
-    { 0xbe,0x5f,0x5f,0xe1 }, { 0x35,0x97,0x97,0xa2 },
-    { 0x88,0x44,0x44,0xcc }, { 0x2e,0x17,0x17,0x39 },
-    { 0x93,0xc4,0xc4,0x57 }, { 0x55,0xa7,0xa7,0xf2 },
-    { 0xfc,0x7e,0x7e,0x82 }, { 0x7a,0x3d,0x3d,0x47 },
-    { 0xc8,0x64,0x64,0xac }, { 0xba,0x5d,0x5d,0xe7 },
-    { 0x32,0x19,0x19,0x2b }, { 0xe6,0x73,0x73,0x95 },
-    { 0xc0,0x60,0x60,0xa0 }, { 0x19,0x81,0x81,0x98 },
-    { 0x9e,0x4f,0x4f,0xd1 }, { 0xa3,0xdc,0xdc,0x7f },
-    { 0x44,0x22,0x22,0x66 }, { 0x54,0x2a,0x2a,0x7e },
-    { 0x3b,0x90,0x90,0xab }, { 0x0b,0x88,0x88,0x83 },
-    { 0x8c,0x46,0x46,0xca }, { 0xc7,0xee,0xee,0x29 },
-    { 0x6b,0xb8,0xb8,0xd3 }, { 0x28,0x14,0x14,0x3c },
-    { 0xa7,0xde,0xde,0x79 }, { 0xbc,0x5e,0x5e,0xe2 },
-    { 0x16,0x0b,0x0b,0x1d }, { 0xad,0xdb,0xdb,0x76 },
-    { 0xdb,0xe0,0xe0,0x3b }, { 0x64,0x32,0x32,0x56 },
-    { 0x74,0x3a,0x3a,0x4e }, { 0x14,0x0a,0x0a,0x1e },
-    { 0x92,0x49,0x49,0xdb }, { 0x0c,0x06,0x06,0x0a },
-    { 0x48,0x24,0x24,0x6c }, { 0xb8,0x5c,0x5c,0xe4 },
-    { 0x9f,0xc2,0xc2,0x5d }, { 0xbd,0xd3,0xd3,0x6e },
-    { 0x43,0xac,0xac,0xef }, { 0xc4,0x62,0x62,0xa6 },
-    { 0x39,0x91,0x91,0xa8 }, { 0x31,0x95,0x95,0xa4 },
-    { 0xd3,0xe4,0xe4,0x37 }, { 0xf2,0x79,0x79,0x8b },
-    { 0xd5,0xe7,0xe7,0x32 }, { 0x8b,0xc8,0xc8,0x43 },
-    { 0x6e,0x37,0x37,0x59 }, { 0xda,0x6d,0x6d,0xb7 },
-    { 0x01,0x8d,0x8d,0x8c }, { 0xb1,0xd5,0xd5,0x64 },
-    { 0x9c,0x4e,0x4e,0xd2 }, { 0x49,0xa9,0xa9,0xe0 },
-    { 0xd8,0x6c,0x6c,0xb4 }, { 0xac,0x56,0x56,0xfa },
-    { 0xf3,0xf4,0xf4,0x07 }, { 0xcf,0xea,0xea,0x25 },
-    { 0xca,0x65,0x65,0xaf }, { 0xf4,0x7a,0x7a,0x8e },
-    { 0x47,0xae,0xae,0xe9 }, { 0x10,0x08,0x08,0x18 },
-    { 0x6f,0xba,0xba,0xd5 }, { 0xf0,0x78,0x78,0x88 },
-    { 0x4a,0x25,0x25,0x6f }, { 0x5c,0x2e,0x2e,0x72 },
-    { 0x38,0x1c,0x1c,0x24 }, { 0x57,0xa6,0xa6,0xf1 },
-    { 0x73,0xb4,0xb4,0xc7 }, { 0x97,0xc6,0xc6,0x51 },
-    { 0xcb,0xe8,0xe8,0x23 }, { 0xa1,0xdd,0xdd,0x7c },
-    { 0xe8,0x74,0x74,0x9c }, { 0x3e,0x1f,0x1f,0x21 },
-    { 0x96,0x4b,0x4b,0xdd }, { 0x61,0xbd,0xbd,0xdc },
-    { 0x0d,0x8b,0x8b,0x86 }, { 0x0f,0x8a,0x8a,0x85 },
-    { 0xe0,0x70,0x70,0x90 }, { 0x7c,0x3e,0x3e,0x42 },
-    { 0x71,0xb5,0xb5,0xc4 }, { 0xcc,0x66,0x66,0xaa },
-    { 0x90,0x48,0x48,0xd8 }, { 0x06,0x03,0x03,0x05 },
-    { 0xf7,0xf6,0xf6,0x01 }, { 0x1c,0x0e,0x0e,0x12 },
-    { 0xc2,0x61,0x61,0xa3 }, { 0x6a,0x35,0x35,0x5f },
-    { 0xae,0x57,0x57,0xf9 }, { 0x69,0xb9,0xb9,0xd0 },
-    { 0x17,0x86,0x86,0x91 }, { 0x99,0xc1,0xc1,0x58 },
-    { 0x3a,0x1d,0x1d,0x27 }, { 0x27,0x9e,0x9e,0xb9 },
-    { 0xd9,0xe1,0xe1,0x38 }, { 0xeb,0xf8,0xf8,0x13 },
-    { 0x2b,0x98,0x98,0xb3 }, { 0x22,0x11,0x11,0x33 },
-    { 0xd2,0x69,0x69,0xbb }, { 0xa9,0xd9,0xd9,0x70 },
-    { 0x07,0x8e,0x8e,0x89 }, { 0x33,0x94,0x94,0xa7 },
-    { 0x2d,0x9b,0x9b,0xb6 }, { 0x3c,0x1e,0x1e,0x22 },
-    { 0x15,0x87,0x87,0x92 }, { 0xc9,0xe9,0xe9,0x20 },
-    { 0x87,0xce,0xce,0x49 }, { 0xaa,0x55,0x55,0xff },
-    { 0x50,0x28,0x28,0x78 }, { 0xa5,0xdf,0xdf,0x7a },
-    { 0x03,0x8c,0x8c,0x8f }, { 0x59,0xa1,0xa1,0xf8 },
-    { 0x09,0x89,0x89,0x80 }, { 0x1a,0x0d,0x0d,0x17 },
-    { 0x65,0xbf,0xbf,0xda }, { 0xd7,0xe6,0xe6,0x31 },
-    { 0x84,0x42,0x42,0xc6 }, { 0xd0,0x68,0x68,0xb8 },
-    { 0x82,0x41,0x41,0xc3 }, { 0x29,0x99,0x99,0xb0 },
-    { 0x5a,0x2d,0x2d,0x77 }, { 0x1e,0x0f,0x0f,0x11 },
-    { 0x7b,0xb0,0xb0,0xcb }, { 0xa8,0x54,0x54,0xfc },
-    { 0x6d,0xbb,0xbb,0xd6 }, { 0x2c,0x16,0x16,0x3a }
+    0,
+    { 0, },
+    {
+      0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a,
+      0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b,
+      0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
+      0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5,
+      0x495ab1de, 0x671bba25, 0x980eea45, 0xe1c0fe5d,
+      0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
+      0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295,
+      0x2d83bed4, 0xd3217458, 0x2969e049, 0x44c8c98e,
+      0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
+      0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d,
+      0x184adf63, 0x82311ae5, 0x60335197, 0x457f5362,
+      0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
+      0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52,
+      0x23d373ab, 0xe2024b72, 0x578f1fe3, 0x2aab5566,
+      0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
+      0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed,
+      0x2b1ccf8a, 0x92b479a7, 0xf0f207f3, 0xa1e2694e,
+      0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
+      0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4,
+      0x39ec830b, 0xaaef6040, 0x069f715e, 0x51106ebd,
+      0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
+      0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060,
+      0x24fb9819, 0x97e9bdd6, 0xcc434089, 0x779ed967,
+      0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
+      0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000,
+      0x83868009, 0x48ed2b32, 0xac70111e, 0x4e725a6c,
+      0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
+      0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624,
+      0xb1670a0c, 0x0fe75793, 0xd296eeb4, 0x9e919b1b,
+      0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
+      0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12,
+      0x0b0d090e, 0xadc78bf2, 0xb9a8b62d, 0xc8a91e14,
+      0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
+      0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b,
+      0x7629438b, 0xdcc623cb, 0x68fcedb6, 0x63f1e4b8,
+      0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
+      0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7,
+      0x4b2f9e1d, 0xf330b2dc, 0xec52860d, 0xd0e3c177,
+      0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
+      0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322,
+      0xc74e4987, 0xc1d138d9, 0xfea2ca8c, 0x360bd498,
+      0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
+      0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54,
+      0xc2138df6, 0xe8b8d890, 0x5ef7392e, 0xf5afc382,
+      0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
+      0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb,
+      0x097826cd, 0xf418596e, 0x01b79aec, 0xa89a4f83,
+      0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
+      0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029,
+      0xafb2a431, 0x31233f2a, 0x3094a5c6, 0xc066a235,
+      0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
+      0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117,
+      0x8dd64d76, 0x4db0ef43, 0x544daacc, 0xdf0496e4,
+      0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
+      0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb,
+      0x5a1d67b3, 0x52d2db92, 0x335610e9, 0x1347d66d,
+      0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
+      0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a,
+      0x59dfd29c, 0x3f73f255, 0x79ce1418, 0xbf37c773,
+      0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
+      0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2,
+      0x72c31d16, 0x0c25e2bc, 0x8b493c28, 0x41950dff,
+      0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
+      0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0
+    },
+    {
+      0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
+      0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
+      0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
+      0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
+      0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
+      0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
+      0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
+      0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
+      0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
+      0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
+      0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
+      0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
+      0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
+      0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
+      0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
+      0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
+      0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
+      0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
+      0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
+      0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
+      0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
+      0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
+      0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
+      0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
+      0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
+      0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
+      0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
+      0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
+      0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
+      0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
+      0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
+      0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
+    },
+    0
   };
 
-static const unsigned char T2[256][4] =
-  {
-    { 0xa5,0xc6,0x63,0x63 }, { 0x84,0xf8,0x7c,0x7c },
-    { 0x99,0xee,0x77,0x77 }, { 0x8d,0xf6,0x7b,0x7b },
-    { 0x0d,0xff,0xf2,0xf2 }, { 0xbd,0xd6,0x6b,0x6b },
-    { 0xb1,0xde,0x6f,0x6f }, { 0x54,0x91,0xc5,0xc5 },
-    { 0x50,0x60,0x30,0x30 }, { 0x03,0x02,0x01,0x01 },
-    { 0xa9,0xce,0x67,0x67 }, { 0x7d,0x56,0x2b,0x2b },
-    { 0x19,0xe7,0xfe,0xfe }, { 0x62,0xb5,0xd7,0xd7 },
-    { 0xe6,0x4d,0xab,0xab }, { 0x9a,0xec,0x76,0x76 },
-    { 0x45,0x8f,0xca,0xca }, { 0x9d,0x1f,0x82,0x82 },
-    { 0x40,0x89,0xc9,0xc9 }, { 0x87,0xfa,0x7d,0x7d },
-    { 0x15,0xef,0xfa,0xfa }, { 0xeb,0xb2,0x59,0x59 },
-    { 0xc9,0x8e,0x47,0x47 }, { 0x0b,0xfb,0xf0,0xf0 },
-    { 0xec,0x41,0xad,0xad }, { 0x67,0xb3,0xd4,0xd4 },
-    { 0xfd,0x5f,0xa2,0xa2 }, { 0xea,0x45,0xaf,0xaf },
-    { 0xbf,0x23,0x9c,0x9c }, { 0xf7,0x53,0xa4,0xa4 },
-    { 0x96,0xe4,0x72,0x72 }, { 0x5b,0x9b,0xc0,0xc0 },
-    { 0xc2,0x75,0xb7,0xb7 }, { 0x1c,0xe1,0xfd,0xfd },
-    { 0xae,0x3d,0x93,0x93 }, { 0x6a,0x4c,0x26,0x26 },
-    { 0x5a,0x6c,0x36,0x36 }, { 0x41,0x7e,0x3f,0x3f },
-    { 0x02,0xf5,0xf7,0xf7 }, { 0x4f,0x83,0xcc,0xcc },
-    { 0x5c,0x68,0x34,0x34 }, { 0xf4,0x51,0xa5,0xa5 },
-    { 0x34,0xd1,0xe5,0xe5 }, { 0x08,0xf9,0xf1,0xf1 },
-    { 0x93,0xe2,0x71,0x71 }, { 0x73,0xab,0xd8,0xd8 },
-    { 0x53,0x62,0x31,0x31 }, { 0x3f,0x2a,0x15,0x15 },
-    { 0x0c,0x08,0x04,0x04 }, { 0x52,0x95,0xc7,0xc7 },
-    { 0x65,0x46,0x23,0x23 }, { 0x5e,0x9d,0xc3,0xc3 },
-    { 0x28,0x30,0x18,0x18 }, { 0xa1,0x37,0x96,0x96 },
-    { 0x0f,0x0a,0x05,0x05 }, { 0xb5,0x2f,0x9a,0x9a },
-    { 0x09,0x0e,0x07,0x07 }, { 0x36,0x24,0x12,0x12 },
-    { 0x9b,0x1b,0x80,0x80 }, { 0x3d,0xdf,0xe2,0xe2 },
-    { 0x26,0xcd,0xeb,0xeb }, { 0x69,0x4e,0x27,0x27 },
-    { 0xcd,0x7f,0xb2,0xb2 }, { 0x9f,0xea,0x75,0x75 },
-    { 0x1b,0x12,0x09,0x09 }, { 0x9e,0x1d,0x83,0x83 },
-    { 0x74,0x58,0x2c,0x2c }, { 0x2e,0x34,0x1a,0x1a },
-    { 0x2d,0x36,0x1b,0x1b }, { 0xb2,0xdc,0x6e,0x6e },
-    { 0xee,0xb4,0x5a,0x5a }, { 0xfb,0x5b,0xa0,0xa0 },
-    { 0xf6,0xa4,0x52,0x52 }, { 0x4d,0x76,0x3b,0x3b },
-    { 0x61,0xb7,0xd6,0xd6 }, { 0xce,0x7d,0xb3,0xb3 },
-    { 0x7b,0x52,0x29,0x29 }, { 0x3e,0xdd,0xe3,0xe3 },
-    { 0x71,0x5e,0x2f,0x2f }, { 0x97,0x13,0x84,0x84 },
-    { 0xf5,0xa6,0x53,0x53 }, { 0x68,0xb9,0xd1,0xd1 },
-    { 0x00,0x00,0x00,0x00 }, { 0x2c,0xc1,0xed,0xed },
-    { 0x60,0x40,0x20,0x20 }, { 0x1f,0xe3,0xfc,0xfc },
-    { 0xc8,0x79,0xb1,0xb1 }, { 0xed,0xb6,0x5b,0x5b },
-    { 0xbe,0xd4,0x6a,0x6a }, { 0x46,0x8d,0xcb,0xcb },
-    { 0xd9,0x67,0xbe,0xbe }, { 0x4b,0x72,0x39,0x39 },
-    { 0xde,0x94,0x4a,0x4a }, { 0xd4,0x98,0x4c,0x4c },
-    { 0xe8,0xb0,0x58,0x58 }, { 0x4a,0x85,0xcf,0xcf },
-    { 0x6b,0xbb,0xd0,0xd0 }, { 0x2a,0xc5,0xef,0xef },
-    { 0xe5,0x4f,0xaa,0xaa }, { 0x16,0xed,0xfb,0xfb },
-    { 0xc5,0x86,0x43,0x43 }, { 0xd7,0x9a,0x4d,0x4d },
-    { 0x55,0x66,0x33,0x33 }, { 0x94,0x11,0x85,0x85 },
-    { 0xcf,0x8a,0x45,0x45 }, { 0x10,0xe9,0xf9,0xf9 },
-    { 0x06,0x04,0x02,0x02 }, { 0x81,0xfe,0x7f,0x7f },
-    { 0xf0,0xa0,0x50,0x50 }, { 0x44,0x78,0x3c,0x3c },
-    { 0xba,0x25,0x9f,0x9f }, { 0xe3,0x4b,0xa8,0xa8 },
-    { 0xf3,0xa2,0x51,0x51 }, { 0xfe,0x5d,0xa3,0xa3 },
-    { 0xc0,0x80,0x40,0x40 }, { 0x8a,0x05,0x8f,0x8f },
-    { 0xad,0x3f,0x92,0x92 }, { 0xbc,0x21,0x9d,0x9d },
-    { 0x48,0x70,0x38,0x38 }, { 0x04,0xf1,0xf5,0xf5 },
-    { 0xdf,0x63,0xbc,0xbc }, { 0xc1,0x77,0xb6,0xb6 },
-    { 0x75,0xaf,0xda,0xda }, { 0x63,0x42,0x21,0x21 },
-    { 0x30,0x20,0x10,0x10 }, { 0x1a,0xe5,0xff,0xff },
-    { 0x0e,0xfd,0xf3,0xf3 }, { 0x6d,0xbf,0xd2,0xd2 },
-    { 0x4c,0x81,0xcd,0xcd }, { 0x14,0x18,0x0c,0x0c },
-    { 0x35,0x26,0x13,0x13 }, { 0x2f,0xc3,0xec,0xec },
-    { 0xe1,0xbe,0x5f,0x5f }, { 0xa2,0x35,0x97,0x97 },
-    { 0xcc,0x88,0x44,0x44 }, { 0x39,0x2e,0x17,0x17 },
-    { 0x57,0x93,0xc4,0xc4 }, { 0xf2,0x55,0xa7,0xa7 },
-    { 0x82,0xfc,0x7e,0x7e }, { 0x47,0x7a,0x3d,0x3d },
-    { 0xac,0xc8,0x64,0x64 }, { 0xe7,0xba,0x5d,0x5d },
-    { 0x2b,0x32,0x19,0x19 }, { 0x95,0xe6,0x73,0x73 },
-    { 0xa0,0xc0,0x60,0x60 }, { 0x98,0x19,0x81,0x81 },
-    { 0xd1,0x9e,0x4f,0x4f }, { 0x7f,0xa3,0xdc,0xdc },
-    { 0x66,0x44,0x22,0x22 }, { 0x7e,0x54,0x2a,0x2a },
-    { 0xab,0x3b,0x90,0x90 }, { 0x83,0x0b,0x88,0x88 },
-    { 0xca,0x8c,0x46,0x46 }, { 0x29,0xc7,0xee,0xee },
-    { 0xd3,0x6b,0xb8,0xb8 }, { 0x3c,0x28,0x14,0x14 },
-    { 0x79,0xa7,0xde,0xde }, { 0xe2,0xbc,0x5e,0x5e },
-    { 0x1d,0x16,0x0b,0x0b }, { 0x76,0xad,0xdb,0xdb },
-    { 0x3b,0xdb,0xe0,0xe0 }, { 0x56,0x64,0x32,0x32 },
-    { 0x4e,0x74,0x3a,0x3a }, { 0x1e,0x14,0x0a,0x0a },
-    { 0xdb,0x92,0x49,0x49 }, { 0x0a,0x0c,0x06,0x06 },
-    { 0x6c,0x48,0x24,0x24 }, { 0xe4,0xb8,0x5c,0x5c },
-    { 0x5d,0x9f,0xc2,0xc2 }, { 0x6e,0xbd,0xd3,0xd3 },
-    { 0xef,0x43,0xac,0xac }, { 0xa6,0xc4,0x62,0x62 },
-    { 0xa8,0x39,0x91,0x91 }, { 0xa4,0x31,0x95,0x95 },
-    { 0x37,0xd3,0xe4,0xe4 }, { 0x8b,0xf2,0x79,0x79 },
-    { 0x32,0xd5,0xe7,0xe7 }, { 0x43,0x8b,0xc8,0xc8 },
-    { 0x59,0x6e,0x37,0x37 }, { 0xb7,0xda,0x6d,0x6d },
-    { 0x8c,0x01,0x8d,0x8d }, { 0x64,0xb1,0xd5,0xd5 },
-    { 0xd2,0x9c,0x4e,0x4e }, { 0xe0,0x49,0xa9,0xa9 },
-    { 0xb4,0xd8,0x6c,0x6c }, { 0xfa,0xac,0x56,0x56 },
-    { 0x07,0xf3,0xf4,0xf4 }, { 0x25,0xcf,0xea,0xea },
-    { 0xaf,0xca,0x65,0x65 }, { 0x8e,0xf4,0x7a,0x7a },
-    { 0xe9,0x47,0xae,0xae }, { 0x18,0x10,0x08,0x08 },
-    { 0xd5,0x6f,0xba,0xba }, { 0x88,0xf0,0x78,0x78 },
-    { 0x6f,0x4a,0x25,0x25 }, { 0x72,0x5c,0x2e,0x2e },
-    { 0x24,0x38,0x1c,0x1c }, { 0xf1,0x57,0xa6,0xa6 },
-    { 0xc7,0x73,0xb4,0xb4 }, { 0x51,0x97,0xc6,0xc6 },
-    { 0x23,0xcb,0xe8,0xe8 }, { 0x7c,0xa1,0xdd,0xdd },
-    { 0x9c,0xe8,0x74,0x74 }, { 0x21,0x3e,0x1f,0x1f },
-    { 0xdd,0x96,0x4b,0x4b }, { 0xdc,0x61,0xbd,0xbd },
-    { 0x86,0x0d,0x8b,0x8b }, { 0x85,0x0f,0x8a,0x8a },
-    { 0x90,0xe0,0x70,0x70 }, { 0x42,0x7c,0x3e,0x3e },
-    { 0xc4,0x71,0xb5,0xb5 }, { 0xaa,0xcc,0x66,0x66 },
-    { 0xd8,0x90,0x48,0x48 }, { 0x05,0x06,0x03,0x03 },
-    { 0x01,0xf7,0xf6,0xf6 }, { 0x12,0x1c,0x0e,0x0e },
-    { 0xa3,0xc2,0x61,0x61 }, { 0x5f,0x6a,0x35,0x35 },
-    { 0xf9,0xae,0x57,0x57 }, { 0xd0,0x69,0xb9,0xb9 },
-    { 0x91,0x17,0x86,0x86 }, { 0x58,0x99,0xc1,0xc1 },
-    { 0x27,0x3a,0x1d,0x1d }, { 0xb9,0x27,0x9e,0x9e },
-    { 0x38,0xd9,0xe1,0xe1 }, { 0x13,0xeb,0xf8,0xf8 },
-    { 0xb3,0x2b,0x98,0x98 }, { 0x33,0x22,0x11,0x11 },
-    { 0xbb,0xd2,0x69,0x69 }, { 0x70,0xa9,0xd9,0xd9 },
-    { 0x89,0x07,0x8e,0x8e }, { 0xa7,0x33,0x94,0x94 },
-    { 0xb6,0x2d,0x9b,0x9b }, { 0x22,0x3c,0x1e,0x1e },
-    { 0x92,0x15,0x87,0x87 }, { 0x20,0xc9,0xe9,0xe9 },
-    { 0x49,0x87,0xce,0xce }, { 0xff,0xaa,0x55,0x55 },
-    { 0x78,0x50,0x28,0x28 }, { 0x7a,0xa5,0xdf,0xdf },
-    { 0x8f,0x03,0x8c,0x8c }, { 0xf8,0x59,0xa1,0xa1 },
-    { 0x80,0x09,0x89,0x89 }, { 0x17,0x1a,0x0d,0x0d },
-    { 0xda,0x65,0xbf,0xbf }, { 0x31,0xd7,0xe6,0xe6 },
-    { 0xc6,0x84,0x42,0x42 }, { 0xb8,0xd0,0x68,0x68 },
-    { 0xc3,0x82,0x41,0x41 }, { 0xb0,0x29,0x99,0x99 },
-    { 0x77,0x5a,0x2d,0x2d }, { 0x11,0x1e,0x0f,0x0f },
-    { 0xcb,0x7b,0xb0,0xb0 }, { 0xfc,0xa8,0x54,0x54 },
-    { 0xd6,0x6d,0xbb,0xbb }, { 0x3a,0x2c,0x16,0x16 }
-  };
-
-static const unsigned char T3[256][4] =
-  {
-    { 0x63,0xa5,0xc6,0x63 }, { 0x7c,0x84,0xf8,0x7c },
-    { 0x77,0x99,0xee,0x77 }, { 0x7b,0x8d,0xf6,0x7b },
-    { 0xf2,0x0d,0xff,0xf2 }, { 0x6b,0xbd,0xd6,0x6b },
-    { 0x6f,0xb1,0xde,0x6f }, { 0xc5,0x54,0x91,0xc5 },
-    { 0x30,0x50,0x60,0x30 }, { 0x01,0x03,0x02,0x01 },
-    { 0x67,0xa9,0xce,0x67 }, { 0x2b,0x7d,0x56,0x2b },
-    { 0xfe,0x19,0xe7,0xfe }, { 0xd7,0x62,0xb5,0xd7 },
-    { 0xab,0xe6,0x4d,0xab }, { 0x76,0x9a,0xec,0x76 },
-    { 0xca,0x45,0x8f,0xca }, { 0x82,0x9d,0x1f,0x82 },
-    { 0xc9,0x40,0x89,0xc9 }, { 0x7d,0x87,0xfa,0x7d },
-    { 0xfa,0x15,0xef,0xfa }, { 0x59,0xeb,0xb2,0x59 },
-    { 0x47,0xc9,0x8e,0x47 }, { 0xf0,0x0b,0xfb,0xf0 },
-    { 0xad,0xec,0x41,0xad }, { 0xd4,0x67,0xb3,0xd4 },
-    { 0xa2,0xfd,0x5f,0xa2 }, { 0xaf,0xea,0x45,0xaf },
-    { 0x9c,0xbf,0x23,0x9c }, { 0xa4,0xf7,0x53,0xa4 },
-    { 0x72,0x96,0xe4,0x72 }, { 0xc0,0x5b,0x9b,0xc0 },
-    { 0xb7,0xc2,0x75,0xb7 }, { 0xfd,0x1c,0xe1,0xfd },
-    { 0x93,0xae,0x3d,0x93 }, { 0x26,0x6a,0x4c,0x26 },
-    { 0x36,0x5a,0x6c,0x36 }, { 0x3f,0x41,0x7e,0x3f },
-    { 0xf7,0x02,0xf5,0xf7 }, { 0xcc,0x4f,0x83,0xcc },
-    { 0x34,0x5c,0x68,0x34 }, { 0xa5,0xf4,0x51,0xa5 },
-    { 0xe5,0x34,0xd1,0xe5 }, { 0xf1,0x08,0xf9,0xf1 },
-    { 0x71,0x93,0xe2,0x71 }, { 0xd8,0x73,0xab,0xd8 },
-    { 0x31,0x53,0x62,0x31 }, { 0x15,0x3f,0x2a,0x15 },
-    { 0x04,0x0c,0x08,0x04 }, { 0xc7,0x52,0x95,0xc7 },
-    { 0x23,0x65,0x46,0x23 }, { 0xc3,0x5e,0x9d,0xc3 },
-    { 0x18,0x28,0x30,0x18 }, { 0x96,0xa1,0x37,0x96 },
-    { 0x05,0x0f,0x0a,0x05 }, { 0x9a,0xb5,0x2f,0x9a },
-    { 0x07,0x09,0x0e,0x07 }, { 0x12,0x36,0x24,0x12 },
-    { 0x80,0x9b,0x1b,0x80 }, { 0xe2,0x3d,0xdf,0xe2 },
-    { 0xeb,0x26,0xcd,0xeb }, { 0x27,0x69,0x4e,0x27 },
-    { 0xb2,0xcd,0x7f,0xb2 }, { 0x75,0x9f,0xea,0x75 },
-    { 0x09,0x1b,0x12,0x09 }, { 0x83,0x9e,0x1d,0x83 },
-    { 0x2c,0x74,0x58,0x2c }, { 0x1a,0x2e,0x34,0x1a },
-    { 0x1b,0x2d,0x36,0x1b }, { 0x6e,0xb2,0xdc,0x6e },
-    { 0x5a,0xee,0xb4,0x5a }, { 0xa0,0xfb,0x5b,0xa0 },
-    { 0x52,0xf6,0xa4,0x52 }, { 0x3b,0x4d,0x76,0x3b },
-    { 0xd6,0x61,0xb7,0xd6 }, { 0xb3,0xce,0x7d,0xb3 },
-    { 0x29,0x7b,0x52,0x29 }, { 0xe3,0x3e,0xdd,0xe3 },
-    { 0x2f,0x71,0x5e,0x2f }, { 0x84,0x97,0x13,0x84 },
-    { 0x53,0xf5,0xa6,0x53 }, { 0xd1,0x68,0xb9,0xd1 },
-    { 0x00,0x00,0x00,0x00 }, { 0xed,0x2c,0xc1,0xed },
-    { 0x20,0x60,0x40,0x20 }, { 0xfc,0x1f,0xe3,0xfc },
-    { 0xb1,0xc8,0x79,0xb1 }, { 0x5b,0xed,0xb6,0x5b },
-    { 0x6a,0xbe,0xd4,0x6a }, { 0xcb,0x46,0x8d,0xcb },
-    { 0xbe,0xd9,0x67,0xbe }, { 0x39,0x4b,0x72,0x39 },
-    { 0x4a,0xde,0x94,0x4a }, { 0x4c,0xd4,0x98,0x4c },
-    { 0x58,0xe8,0xb0,0x58 }, { 0xcf,0x4a,0x85,0xcf },
-    { 0xd0,0x6b,0xbb,0xd0 }, { 0xef,0x2a,0xc5,0xef },
-    { 0xaa,0xe5,0x4f,0xaa }, { 0xfb,0x16,0xed,0xfb },
-    { 0x43,0xc5,0x86,0x43 }, { 0x4d,0xd7,0x9a,0x4d },
-    { 0x33,0x55,0x66,0x33 }, { 0x85,0x94,0x11,0x85 },
-    { 0x45,0xcf,0x8a,0x45 }, { 0xf9,0x10,0xe9,0xf9 },
-    { 0x02,0x06,0x04,0x02 }, { 0x7f,0x81,0xfe,0x7f },
-    { 0x50,0xf0,0xa0,0x50 }, { 0x3c,0x44,0x78,0x3c },
-    { 0x9f,0xba,0x25,0x9f }, { 0xa8,0xe3,0x4b,0xa8 },
-    { 0x51,0xf3,0xa2,0x51 }, { 0xa3,0xfe,0x5d,0xa3 },
-    { 0x40,0xc0,0x80,0x40 }, { 0x8f,0x8a,0x05,0x8f },
-    { 0x92,0xad,0x3f,0x92 }, { 0x9d,0xbc,0x21,0x9d },
-    { 0x38,0x48,0x70,0x38 }, { 0xf5,0x04,0xf1,0xf5 },
-    { 0xbc,0xdf,0x63,0xbc }, { 0xb6,0xc1,0x77,0xb6 },
-    { 0xda,0x75,0xaf,0xda }, { 0x21,0x63,0x42,0x21 },
-    { 0x10,0x30,0x20,0x10 }, { 0xff,0x1a,0xe5,0xff },
-    { 0xf3,0x0e,0xfd,0xf3 }, { 0xd2,0x6d,0xbf,0xd2 },
-    { 0xcd,0x4c,0x81,0xcd }, { 0x0c,0x14,0x18,0x0c },
-    { 0x13,0x35,0x26,0x13 }, { 0xec,0x2f,0xc3,0xec },
-    { 0x5f,0xe1,0xbe,0x5f }, { 0x97,0xa2,0x35,0x97 },
-    { 0x44,0xcc,0x88,0x44 }, { 0x17,0x39,0x2e,0x17 },
-    { 0xc4,0x57,0x93,0xc4 }, { 0xa7,0xf2,0x55,0xa7 },
-    { 0x7e,0x82,0xfc,0x7e }, { 0x3d,0x47,0x7a,0x3d },
-    { 0x64,0xac,0xc8,0x64 }, { 0x5d,0xe7,0xba,0x5d },
-    { 0x19,0x2b,0x32,0x19 }, { 0x73,0x95,0xe6,0x73 },
-    { 0x60,0xa0,0xc0,0x60 }, { 0x81,0x98,0x19,0x81 },
-    { 0x4f,0xd1,0x9e,0x4f }, { 0xdc,0x7f,0xa3,0xdc },
-    { 0x22,0x66,0x44,0x22 }, { 0x2a,0x7e,0x54,0x2a },
-    { 0x90,0xab,0x3b,0x90 }, { 0x88,0x83,0x0b,0x88 },
-    { 0x46,0xca,0x8c,0x46 }, { 0xee,0x29,0xc7,0xee },
-    { 0xb8,0xd3,0x6b,0xb8 }, { 0x14,0x3c,0x28,0x14 },
-    { 0xde,0x79,0xa7,0xde }, { 0x5e,0xe2,0xbc,0x5e },
-    { 0x0b,0x1d,0x16,0x0b }, { 0xdb,0x76,0xad,0xdb },
-    { 0xe0,0x3b,0xdb,0xe0 }, { 0x32,0x56,0x64,0x32 },
-    { 0x3a,0x4e,0x74,0x3a }, { 0x0a,0x1e,0x14,0x0a },
-    { 0x49,0xdb,0x92,0x49 }, { 0x06,0x0a,0x0c,0x06 },
-    { 0x24,0x6c,0x48,0x24 }, { 0x5c,0xe4,0xb8,0x5c },
-    { 0xc2,0x5d,0x9f,0xc2 }, { 0xd3,0x6e,0xbd,0xd3 },
-    { 0xac,0xef,0x43,0xac }, { 0x62,0xa6,0xc4,0x62 },
-    { 0x91,0xa8,0x39,0x91 }, { 0x95,0xa4,0x31,0x95 },
-    { 0xe4,0x37,0xd3,0xe4 }, { 0x79,0x8b,0xf2,0x79 },
-    { 0xe7,0x32,0xd5,0xe7 }, { 0xc8,0x43,0x8b,0xc8 },
-    { 0x37,0x59,0x6e,0x37 }, { 0x6d,0xb7,0xda,0x6d },
-    { 0x8d,0x8c,0x01,0x8d }, { 0xd5,0x64,0xb1,0xd5 },
-    { 0x4e,0xd2,0x9c,0x4e }, { 0xa9,0xe0,0x49,0xa9 },
-    { 0x6c,0xb4,0xd8,0x6c }, { 0x56,0xfa,0xac,0x56 },
-    { 0xf4,0x07,0xf3,0xf4 }, { 0xea,0x25,0xcf,0xea },
-    { 0x65,0xaf,0xca,0x65 }, { 0x7a,0x8e,0xf4,0x7a },
-    { 0xae,0xe9,0x47,0xae }, { 0x08,0x18,0x10,0x08 },
-    { 0xba,0xd5,0x6f,0xba }, { 0x78,0x88,0xf0,0x78 },
-    { 0x25,0x6f,0x4a,0x25 }, { 0x2e,0x72,0x5c,0x2e },
-    { 0x1c,0x24,0x38,0x1c }, { 0xa6,0xf1,0x57,0xa6 },
-    { 0xb4,0xc7,0x73,0xb4 }, { 0xc6,0x51,0x97,0xc6 },
-    { 0xe8,0x23,0xcb,0xe8 }, { 0xdd,0x7c,0xa1,0xdd },
-    { 0x74,0x9c,0xe8,0x74 }, { 0x1f,0x21,0x3e,0x1f },
-    { 0x4b,0xdd,0x96,0x4b }, { 0xbd,0xdc,0x61,0xbd },
-    { 0x8b,0x86,0x0d,0x8b }, { 0x8a,0x85,0x0f,0x8a },
-    { 0x70,0x90,0xe0,0x70 }, { 0x3e,0x42,0x7c,0x3e },
-    { 0xb5,0xc4,0x71,0xb5 }, { 0x66,0xaa,0xcc,0x66 },
-    { 0x48,0xd8,0x90,0x48 }, { 0x03,0x05,0x06,0x03 },
-    { 0xf6,0x01,0xf7,0xf6 }, { 0x0e,0x12,0x1c,0x0e },
-    { 0x61,0xa3,0xc2,0x61 }, { 0x35,0x5f,0x6a,0x35 },
-    { 0x57,0xf9,0xae,0x57 }, { 0xb9,0xd0,0x69,0xb9 },
-    { 0x86,0x91,0x17,0x86 }, { 0xc1,0x58,0x99,0xc1 },
-    { 0x1d,0x27,0x3a,0x1d }, { 0x9e,0xb9,0x27,0x9e },
-    { 0xe1,0x38,0xd9,0xe1 }, { 0xf8,0x13,0xeb,0xf8 },
-    { 0x98,0xb3,0x2b,0x98 }, { 0x11,0x33,0x22,0x11 },
-    { 0x69,0xbb,0xd2,0x69 }, { 0xd9,0x70,0xa9,0xd9 },
-    { 0x8e,0x89,0x07,0x8e }, { 0x94,0xa7,0x33,0x94 },
-    { 0x9b,0xb6,0x2d,0x9b }, { 0x1e,0x22,0x3c,0x1e },
-    { 0x87,0x92,0x15,0x87 }, { 0xe9,0x20,0xc9,0xe9 },
-    { 0xce,0x49,0x87,0xce }, { 0x55,0xff,0xaa,0x55 },
-    { 0x28,0x78,0x50,0x28 }, { 0xdf,0x7a,0xa5,0xdf },
-    { 0x8c,0x8f,0x03,0x8c }, { 0xa1,0xf8,0x59,0xa1 },
-    { 0x89,0x80,0x09,0x89 }, { 0x0d,0x17,0x1a,0x0d },
-    { 0xbf,0xda,0x65,0xbf }, { 0xe6,0x31,0xd7,0xe6 },
-    { 0x42,0xc6,0x84,0x42 }, { 0x68,0xb8,0xd0,0x68 },
-    { 0x41,0xc3,0x82,0x41 }, { 0x99,0xb0,0x29,0x99 },
-    { 0x2d,0x77,0x5a,0x2d }, { 0x0f,0x11,0x1e,0x0f },
-    { 0xb0,0xcb,0x7b,0xb0 }, { 0x54,0xfc,0xa8,0x54 },
-    { 0xbb,0xd6,0x6d,0xbb }, { 0x16,0x3a,0x2c,0x16 }
-  };
-
-static const unsigned char T4[256][4] =
-  {
-    { 0x63,0x63,0xa5,0xc6 }, { 0x7c,0x7c,0x84,0xf8 },
-    { 0x77,0x77,0x99,0xee }, { 0x7b,0x7b,0x8d,0xf6 },
-    { 0xf2,0xf2,0x0d,0xff }, { 0x6b,0x6b,0xbd,0xd6 },
-    { 0x6f,0x6f,0xb1,0xde }, { 0xc5,0xc5,0x54,0x91 },
-    { 0x30,0x30,0x50,0x60 }, { 0x01,0x01,0x03,0x02 },
-    { 0x67,0x67,0xa9,0xce }, { 0x2b,0x2b,0x7d,0x56 },
-    { 0xfe,0xfe,0x19,0xe7 }, { 0xd7,0xd7,0x62,0xb5 },
-    { 0xab,0xab,0xe6,0x4d }, { 0x76,0x76,0x9a,0xec },
-    { 0xca,0xca,0x45,0x8f }, { 0x82,0x82,0x9d,0x1f },
-    { 0xc9,0xc9,0x40,0x89 }, { 0x7d,0x7d,0x87,0xfa },
-    { 0xfa,0xfa,0x15,0xef }, { 0x59,0x59,0xeb,0xb2 },
-    { 0x47,0x47,0xc9,0x8e }, { 0xf0,0xf0,0x0b,0xfb },
-    { 0xad,0xad,0xec,0x41 }, { 0xd4,0xd4,0x67,0xb3 },
-    { 0xa2,0xa2,0xfd,0x5f }, { 0xaf,0xaf,0xea,0x45 },
-    { 0x9c,0x9c,0xbf,0x23 }, { 0xa4,0xa4,0xf7,0x53 },
-    { 0x72,0x72,0x96,0xe4 }, { 0xc0,0xc0,0x5b,0x9b },
-    { 0xb7,0xb7,0xc2,0x75 }, { 0xfd,0xfd,0x1c,0xe1 },
-    { 0x93,0x93,0xae,0x3d }, { 0x26,0x26,0x6a,0x4c },
-    { 0x36,0x36,0x5a,0x6c }, { 0x3f,0x3f,0x41,0x7e },
-    { 0xf7,0xf7,0x02,0xf5 }, { 0xcc,0xcc,0x4f,0x83 },
-    { 0x34,0x34,0x5c,0x68 }, { 0xa5,0xa5,0xf4,0x51 },
-    { 0xe5,0xe5,0x34,0xd1 }, { 0xf1,0xf1,0x08,0xf9 },
-    { 0x71,0x71,0x93,0xe2 }, { 0xd8,0xd8,0x73,0xab },
-    { 0x31,0x31,0x53,0x62 }, { 0x15,0x15,0x3f,0x2a },
-    { 0x04,0x04,0x0c,0x08 }, { 0xc7,0xc7,0x52,0x95 },
-    { 0x23,0x23,0x65,0x46 }, { 0xc3,0xc3,0x5e,0x9d },
-    { 0x18,0x18,0x28,0x30 }, { 0x96,0x96,0xa1,0x37 },
-    { 0x05,0x05,0x0f,0x0a }, { 0x9a,0x9a,0xb5,0x2f },
-    { 0x07,0x07,0x09,0x0e }, { 0x12,0x12,0x36,0x24 },
-    { 0x80,0x80,0x9b,0x1b }, { 0xe2,0xe2,0x3d,0xdf },
-    { 0xeb,0xeb,0x26,0xcd }, { 0x27,0x27,0x69,0x4e },
-    { 0xb2,0xb2,0xcd,0x7f }, { 0x75,0x75,0x9f,0xea },
-    { 0x09,0x09,0x1b,0x12 }, { 0x83,0x83,0x9e,0x1d },
-    { 0x2c,0x2c,0x74,0x58 }, { 0x1a,0x1a,0x2e,0x34 },
-    { 0x1b,0x1b,0x2d,0x36 }, { 0x6e,0x6e,0xb2,0xdc },
-    { 0x5a,0x5a,0xee,0xb4 }, { 0xa0,0xa0,0xfb,0x5b },
-    { 0x52,0x52,0xf6,0xa4 }, { 0x3b,0x3b,0x4d,0x76 },
-    { 0xd6,0xd6,0x61,0xb7 }, { 0xb3,0xb3,0xce,0x7d },
-    { 0x29,0x29,0x7b,0x52 }, { 0xe3,0xe3,0x3e,0xdd },
-    { 0x2f,0x2f,0x71,0x5e }, { 0x84,0x84,0x97,0x13 },
-    { 0x53,0x53,0xf5,0xa6 }, { 0xd1,0xd1,0x68,0xb9 },
-    { 0x00,0x00,0x00,0x00 }, { 0xed,0xed,0x2c,0xc1 },
-    { 0x20,0x20,0x60,0x40 }, { 0xfc,0xfc,0x1f,0xe3 },
-    { 0xb1,0xb1,0xc8,0x79 }, { 0x5b,0x5b,0xed,0xb6 },
-    { 0x6a,0x6a,0xbe,0xd4 }, { 0xcb,0xcb,0x46,0x8d },
-    { 0xbe,0xbe,0xd9,0x67 }, { 0x39,0x39,0x4b,0x72 },
-    { 0x4a,0x4a,0xde,0x94 }, { 0x4c,0x4c,0xd4,0x98 },
-    { 0x58,0x58,0xe8,0xb0 }, { 0xcf,0xcf,0x4a,0x85 },
-    { 0xd0,0xd0,0x6b,0xbb }, { 0xef,0xef,0x2a,0xc5 },
-    { 0xaa,0xaa,0xe5,0x4f }, { 0xfb,0xfb,0x16,0xed },
-    { 0x43,0x43,0xc5,0x86 }, { 0x4d,0x4d,0xd7,0x9a },
-    { 0x33,0x33,0x55,0x66 }, { 0x85,0x85,0x94,0x11 },
-    { 0x45,0x45,0xcf,0x8a }, { 0xf9,0xf9,0x10,0xe9 },
-    { 0x02,0x02,0x06,0x04 }, { 0x7f,0x7f,0x81,0xfe },
-    { 0x50,0x50,0xf0,0xa0 }, { 0x3c,0x3c,0x44,0x78 },
-    { 0x9f,0x9f,0xba,0x25 }, { 0xa8,0xa8,0xe3,0x4b },
-    { 0x51,0x51,0xf3,0xa2 }, { 0xa3,0xa3,0xfe,0x5d },
-    { 0x40,0x40,0xc0,0x80 }, { 0x8f,0x8f,0x8a,0x05 },
-    { 0x92,0x92,0xad,0x3f }, { 0x9d,0x9d,0xbc,0x21 },
-    { 0x38,0x38,0x48,0x70 }, { 0xf5,0xf5,0x04,0xf1 },
-    { 0xbc,0xbc,0xdf,0x63 }, { 0xb6,0xb6,0xc1,0x77 },
-    { 0xda,0xda,0x75,0xaf }, { 0x21,0x21,0x63,0x42 },
-    { 0x10,0x10,0x30,0x20 }, { 0xff,0xff,0x1a,0xe5 },
-    { 0xf3,0xf3,0x0e,0xfd }, { 0xd2,0xd2,0x6d,0xbf },
-    { 0xcd,0xcd,0x4c,0x81 }, { 0x0c,0x0c,0x14,0x18 },
-    { 0x13,0x13,0x35,0x26 }, { 0xec,0xec,0x2f,0xc3 },
-    { 0x5f,0x5f,0xe1,0xbe }, { 0x97,0x97,0xa2,0x35 },
-    { 0x44,0x44,0xcc,0x88 }, { 0x17,0x17,0x39,0x2e },
-    { 0xc4,0xc4,0x57,0x93 }, { 0xa7,0xa7,0xf2,0x55 },
-    { 0x7e,0x7e,0x82,0xfc }, { 0x3d,0x3d,0x47,0x7a },
-    { 0x64,0x64,0xac,0xc8 }, { 0x5d,0x5d,0xe7,0xba },
-    { 0x19,0x19,0x2b,0x32 }, { 0x73,0x73,0x95,0xe6 },
-    { 0x60,0x60,0xa0,0xc0 }, { 0x81,0x81,0x98,0x19 },
-    { 0x4f,0x4f,0xd1,0x9e }, { 0xdc,0xdc,0x7f,0xa3 },
-    { 0x22,0x22,0x66,0x44 }, { 0x2a,0x2a,0x7e,0x54 },
-    { 0x90,0x90,0xab,0x3b }, { 0x88,0x88,0x83,0x0b },
-    { 0x46,0x46,0xca,0x8c }, { 0xee,0xee,0x29,0xc7 },
-    { 0xb8,0xb8,0xd3,0x6b }, { 0x14,0x14,0x3c,0x28 },
-    { 0xde,0xde,0x79,0xa7 }, { 0x5e,0x5e,0xe2,0xbc },
-    { 0x0b,0x0b,0x1d,0x16 }, { 0xdb,0xdb,0x76,0xad },
-    { 0xe0,0xe0,0x3b,0xdb }, { 0x32,0x32,0x56,0x64 },
-    { 0x3a,0x3a,0x4e,0x74 }, { 0x0a,0x0a,0x1e,0x14 },
-    { 0x49,0x49,0xdb,0x92 }, { 0x06,0x06,0x0a,0x0c },
-    { 0x24,0x24,0x6c,0x48 }, { 0x5c,0x5c,0xe4,0xb8 },
-    { 0xc2,0xc2,0x5d,0x9f }, { 0xd3,0xd3,0x6e,0xbd },
-    { 0xac,0xac,0xef,0x43 }, { 0x62,0x62,0xa6,0xc4 },
-    { 0x91,0x91,0xa8,0x39 }, { 0x95,0x95,0xa4,0x31 },
-    { 0xe4,0xe4,0x37,0xd3 }, { 0x79,0x79,0x8b,0xf2 },
-    { 0xe7,0xe7,0x32,0xd5 }, { 0xc8,0xc8,0x43,0x8b },
-    { 0x37,0x37,0x59,0x6e }, { 0x6d,0x6d,0xb7,0xda },
-    { 0x8d,0x8d,0x8c,0x01 }, { 0xd5,0xd5,0x64,0xb1 },
-    { 0x4e,0x4e,0xd2,0x9c }, { 0xa9,0xa9,0xe0,0x49 },
-    { 0x6c,0x6c,0xb4,0xd8 }, { 0x56,0x56,0xfa,0xac },
-    { 0xf4,0xf4,0x07,0xf3 }, { 0xea,0xea,0x25,0xcf },
-    { 0x65,0x65,0xaf,0xca }, { 0x7a,0x7a,0x8e,0xf4 },
-    { 0xae,0xae,0xe9,0x47 }, { 0x08,0x08,0x18,0x10 },
-    { 0xba,0xba,0xd5,0x6f }, { 0x78,0x78,0x88,0xf0 },
-    { 0x25,0x25,0x6f,0x4a }, { 0x2e,0x2e,0x72,0x5c },
-    { 0x1c,0x1c,0x24,0x38 }, { 0xa6,0xa6,0xf1,0x57 },
-    { 0xb4,0xb4,0xc7,0x73 }, { 0xc6,0xc6,0x51,0x97 },
-    { 0xe8,0xe8,0x23,0xcb }, { 0xdd,0xdd,0x7c,0xa1 },
-    { 0x74,0x74,0x9c,0xe8 }, { 0x1f,0x1f,0x21,0x3e },
-    { 0x4b,0x4b,0xdd,0x96 }, { 0xbd,0xbd,0xdc,0x61 },
-    { 0x8b,0x8b,0x86,0x0d }, { 0x8a,0x8a,0x85,0x0f },
-    { 0x70,0x70,0x90,0xe0 }, { 0x3e,0x3e,0x42,0x7c },
-    { 0xb5,0xb5,0xc4,0x71 }, { 0x66,0x66,0xaa,0xcc },
-    { 0x48,0x48,0xd8,0x90 }, { 0x03,0x03,0x05,0x06 },
-    { 0xf6,0xf6,0x01,0xf7 }, { 0x0e,0x0e,0x12,0x1c },
-    { 0x61,0x61,0xa3,0xc2 }, { 0x35,0x35,0x5f,0x6a },
-    { 0x57,0x57,0xf9,0xae }, { 0xb9,0xb9,0xd0,0x69 },
-    { 0x86,0x86,0x91,0x17 }, { 0xc1,0xc1,0x58,0x99 },
-    { 0x1d,0x1d,0x27,0x3a }, { 0x9e,0x9e,0xb9,0x27 },
-    { 0xe1,0xe1,0x38,0xd9 }, { 0xf8,0xf8,0x13,0xeb },
-    { 0x98,0x98,0xb3,0x2b }, { 0x11,0x11,0x33,0x22 },
-    { 0x69,0x69,0xbb,0xd2 }, { 0xd9,0xd9,0x70,0xa9 },
-    { 0x8e,0x8e,0x89,0x07 }, { 0x94,0x94,0xa7,0x33 },
-    { 0x9b,0x9b,0xb6,0x2d }, { 0x1e,0x1e,0x22,0x3c },
-    { 0x87,0x87,0x92,0x15 }, { 0xe9,0xe9,0x20,0xc9 },
-    { 0xce,0xce,0x49,0x87 }, { 0x55,0x55,0xff,0xaa },
-    { 0x28,0x28,0x78,0x50 }, { 0xdf,0xdf,0x7a,0xa5 },
-    { 0x8c,0x8c,0x8f,0x03 }, { 0xa1,0xa1,0xf8,0x59 },
-    { 0x89,0x89,0x80,0x09 }, { 0x0d,0x0d,0x17,0x1a },
-    { 0xbf,0xbf,0xda,0x65 }, { 0xe6,0xe6,0x31,0xd7 },
-    { 0x42,0x42,0xc6,0x84 }, { 0x68,0x68,0xb8,0xd0 },
-    { 0x41,0x41,0xc3,0x82 }, { 0x99,0x99,0xb0,0x29 },
-    { 0x2d,0x2d,0x77,0x5a }, { 0x0f,0x0f,0x11,0x1e },
-    { 0xb0,0xb0,0xcb,0x7b }, { 0x54,0x54,0xfc,0xa8 },
-    { 0xbb,0xbb,0xd6,0x6d }, { 0x16,0x16,0x3a,0x2c }
-  };
-
-static const unsigned char T5[256][4] =
-  {
-    { 0x51,0xf4,0xa7,0x50 }, { 0x7e,0x41,0x65,0x53 },
-    { 0x1a,0x17,0xa4,0xc3 }, { 0x3a,0x27,0x5e,0x96 },
-    { 0x3b,0xab,0x6b,0xcb }, { 0x1f,0x9d,0x45,0xf1 },
-    { 0xac,0xfa,0x58,0xab }, { 0x4b,0xe3,0x03,0x93 },
-    { 0x20,0x30,0xfa,0x55 }, { 0xad,0x76,0x6d,0xf6 },
-    { 0x88,0xcc,0x76,0x91 }, { 0xf5,0x02,0x4c,0x25 },
-    { 0x4f,0xe5,0xd7,0xfc }, { 0xc5,0x2a,0xcb,0xd7 },
-    { 0x26,0x35,0x44,0x80 }, { 0xb5,0x62,0xa3,0x8f },
-    { 0xde,0xb1,0x5a,0x49 }, { 0x25,0xba,0x1b,0x67 },
-    { 0x45,0xea,0x0e,0x98 }, { 0x5d,0xfe,0xc0,0xe1 },
-    { 0xc3,0x2f,0x75,0x02 }, { 0x81,0x4c,0xf0,0x12 },
-    { 0x8d,0x46,0x97,0xa3 }, { 0x6b,0xd3,0xf9,0xc6 },
-    { 0x03,0x8f,0x5f,0xe7 }, { 0x15,0x92,0x9c,0x95 },
-    { 0xbf,0x6d,0x7a,0xeb }, { 0x95,0x52,0x59,0xda },
-    { 0xd4,0xbe,0x83,0x2d }, { 0x58,0x74,0x21,0xd3 },
-    { 0x49,0xe0,0x69,0x29 }, { 0x8e,0xc9,0xc8,0x44 },
-    { 0x75,0xc2,0x89,0x6a }, { 0xf4,0x8e,0x79,0x78 },
-    { 0x99,0x58,0x3e,0x6b }, { 0x27,0xb9,0x71,0xdd },
-    { 0xbe,0xe1,0x4f,0xb6 }, { 0xf0,0x88,0xad,0x17 },
-    { 0xc9,0x20,0xac,0x66 }, { 0x7d,0xce,0x3a,0xb4 },
-    { 0x63,0xdf,0x4a,0x18 }, { 0xe5,0x1a,0x31,0x82 },
-    { 0x97,0x51,0x33,0x60 }, { 0x62,0x53,0x7f,0x45 },
-    { 0xb1,0x64,0x77,0xe0 }, { 0xbb,0x6b,0xae,0x84 },
-    { 0xfe,0x81,0xa0,0x1c }, { 0xf9,0x08,0x2b,0x94 },
-    { 0x70,0x48,0x68,0x58 }, { 0x8f,0x45,0xfd,0x19 },
-    { 0x94,0xde,0x6c,0x87 }, { 0x52,0x7b,0xf8,0xb7 },
-    { 0xab,0x73,0xd3,0x23 }, { 0x72,0x4b,0x02,0xe2 },
-    { 0xe3,0x1f,0x8f,0x57 }, { 0x66,0x55,0xab,0x2a },
-    { 0xb2,0xeb,0x28,0x07 }, { 0x2f,0xb5,0xc2,0x03 },
-    { 0x86,0xc5,0x7b,0x9a }, { 0xd3,0x37,0x08,0xa5 },
-    { 0x30,0x28,0x87,0xf2 }, { 0x23,0xbf,0xa5,0xb2 },
-    { 0x02,0x03,0x6a,0xba }, { 0xed,0x16,0x82,0x5c },
-    { 0x8a,0xcf,0x1c,0x2b }, { 0xa7,0x79,0xb4,0x92 },
-    { 0xf3,0x07,0xf2,0xf0 }, { 0x4e,0x69,0xe2,0xa1 },
-    { 0x65,0xda,0xf4,0xcd }, { 0x06,0x05,0xbe,0xd5 },
-    { 0xd1,0x34,0x62,0x1f }, { 0xc4,0xa6,0xfe,0x8a },
-    { 0x34,0x2e,0x53,0x9d }, { 0xa2,0xf3,0x55,0xa0 },
-    { 0x05,0x8a,0xe1,0x32 }, { 0xa4,0xf6,0xeb,0x75 },
-    { 0x0b,0x83,0xec,0x39 }, { 0x40,0x60,0xef,0xaa },
-    { 0x5e,0x71,0x9f,0x06 }, { 0xbd,0x6e,0x10,0x51 },
-    { 0x3e,0x21,0x8a,0xf9 }, { 0x96,0xdd,0x06,0x3d },
-    { 0xdd,0x3e,0x05,0xae }, { 0x4d,0xe6,0xbd,0x46 },
-    { 0x91,0x54,0x8d,0xb5 }, { 0x71,0xc4,0x5d,0x05 },
-    { 0x04,0x06,0xd4,0x6f }, { 0x60,0x50,0x15,0xff },
-    { 0x19,0x98,0xfb,0x24 }, { 0xd6,0xbd,0xe9,0x97 },
-    { 0x89,0x40,0x43,0xcc }, { 0x67,0xd9,0x9e,0x77 },
-    { 0xb0,0xe8,0x42,0xbd }, { 0x07,0x89,0x8b,0x88 },
-    { 0xe7,0x19,0x5b,0x38 }, { 0x79,0xc8,0xee,0xdb },
-    { 0xa1,0x7c,0x0a,0x47 }, { 0x7c,0x42,0x0f,0xe9 },
-    { 0xf8,0x84,0x1e,0xc9 }, { 0x00,0x00,0x00,0x00 },
-    { 0x09,0x80,0x86,0x83 }, { 0x32,0x2b,0xed,0x48 },
-    { 0x1e,0x11,0x70,0xac }, { 0x6c,0x5a,0x72,0x4e },
-    { 0xfd,0x0e,0xff,0xfb }, { 0x0f,0x85,0x38,0x56 },
-    { 0x3d,0xae,0xd5,0x1e }, { 0x36,0x2d,0x39,0x27 },
-    { 0x0a,0x0f,0xd9,0x64 }, { 0x68,0x5c,0xa6,0x21 },
-    { 0x9b,0x5b,0x54,0xd1 }, { 0x24,0x36,0x2e,0x3a },
-    { 0x0c,0x0a,0x67,0xb1 }, { 0x93,0x57,0xe7,0x0f },
-    { 0xb4,0xee,0x96,0xd2 }, { 0x1b,0x9b,0x91,0x9e },
-    { 0x80,0xc0,0xc5,0x4f }, { 0x61,0xdc,0x20,0xa2 },
-    { 0x5a,0x77,0x4b,0x69 }, { 0x1c,0x12,0x1a,0x16 },
-    { 0xe2,0x93,0xba,0x0a }, { 0xc0,0xa0,0x2a,0xe5 },
-    { 0x3c,0x22,0xe0,0x43 }, { 0x12,0x1b,0x17,0x1d },
-    { 0x0e,0x09,0x0d,0x0b }, { 0xf2,0x8b,0xc7,0xad },
-    { 0x2d,0xb6,0xa8,0xb9 }, { 0x14,0x1e,0xa9,0xc8 },
-    { 0x57,0xf1,0x19,0x85 }, { 0xaf,0x75,0x07,0x4c },
-    { 0xee,0x99,0xdd,0xbb }, { 0xa3,0x7f,0x60,0xfd },
-    { 0xf7,0x01,0x26,0x9f }, { 0x5c,0x72,0xf5,0xbc },
-    { 0x44,0x66,0x3b,0xc5 }, { 0x5b,0xfb,0x7e,0x34 },
-    { 0x8b,0x43,0x29,0x76 }, { 0xcb,0x23,0xc6,0xdc },
-    { 0xb6,0xed,0xfc,0x68 }, { 0xb8,0xe4,0xf1,0x63 },
-    { 0xd7,0x31,0xdc,0xca }, { 0x42,0x63,0x85,0x10 },
-    { 0x13,0x97,0x22,0x40 }, { 0x84,0xc6,0x11,0x20 },
-    { 0x85,0x4a,0x24,0x7d }, { 0xd2,0xbb,0x3d,0xf8 },
-    { 0xae,0xf9,0x32,0x11 }, { 0xc7,0x29,0xa1,0x6d },
-    { 0x1d,0x9e,0x2f,0x4b }, { 0xdc,0xb2,0x30,0xf3 },
-    { 0x0d,0x86,0x52,0xec }, { 0x77,0xc1,0xe3,0xd0 },
-    { 0x2b,0xb3,0x16,0x6c }, { 0xa9,0x70,0xb9,0x99 },
-    { 0x11,0x94,0x48,0xfa }, { 0x47,0xe9,0x64,0x22 },
-    { 0xa8,0xfc,0x8c,0xc4 }, { 0xa0,0xf0,0x3f,0x1a },
-    { 0x56,0x7d,0x2c,0xd8 }, { 0x22,0x33,0x90,0xef },
-    { 0x87,0x49,0x4e,0xc7 }, { 0xd9,0x38,0xd1,0xc1 },
-    { 0x8c,0xca,0xa2,0xfe }, { 0x98,0xd4,0x0b,0x36 },
-    { 0xa6,0xf5,0x81,0xcf }, { 0xa5,0x7a,0xde,0x28 },
-    { 0xda,0xb7,0x8e,0x26 }, { 0x3f,0xad,0xbf,0xa4 },
-    { 0x2c,0x3a,0x9d,0xe4 }, { 0x50,0x78,0x92,0x0d },
-    { 0x6a,0x5f,0xcc,0x9b }, { 0x54,0x7e,0x46,0x62 },
-    { 0xf6,0x8d,0x13,0xc2 }, { 0x90,0xd8,0xb8,0xe8 },
-    { 0x2e,0x39,0xf7,0x5e }, { 0x82,0xc3,0xaf,0xf5 },
-    { 0x9f,0x5d,0x80,0xbe }, { 0x69,0xd0,0x93,0x7c },
-    { 0x6f,0xd5,0x2d,0xa9 }, { 0xcf,0x25,0x12,0xb3 },
-    { 0xc8,0xac,0x99,0x3b }, { 0x10,0x18,0x7d,0xa7 },
-    { 0xe8,0x9c,0x63,0x6e }, { 0xdb,0x3b,0xbb,0x7b },
-    { 0xcd,0x26,0x78,0x09 }, { 0x6e,0x59,0x18,0xf4 },
-    { 0xec,0x9a,0xb7,0x01 }, { 0x83,0x4f,0x9a,0xa8 },
-    { 0xe6,0x95,0x6e,0x65 }, { 0xaa,0xff,0xe6,0x7e },
-    { 0x21,0xbc,0xcf,0x08 }, { 0xef,0x15,0xe8,0xe6 },
-    { 0xba,0xe7,0x9b,0xd9 }, { 0x4a,0x6f,0x36,0xce },
-    { 0xea,0x9f,0x09,0xd4 }, { 0x29,0xb0,0x7c,0xd6 },
-    { 0x31,0xa4,0xb2,0xaf }, { 0x2a,0x3f,0x23,0x31 },
-    { 0xc6,0xa5,0x94,0x30 }, { 0x35,0xa2,0x66,0xc0 },
-    { 0x74,0x4e,0xbc,0x37 }, { 0xfc,0x82,0xca,0xa6 },
-    { 0xe0,0x90,0xd0,0xb0 }, { 0x33,0xa7,0xd8,0x15 },
-    { 0xf1,0x04,0x98,0x4a }, { 0x41,0xec,0xda,0xf7 },
-    { 0x7f,0xcd,0x50,0x0e }, { 0x17,0x91,0xf6,0x2f },
-    { 0x76,0x4d,0xd6,0x8d }, { 0x43,0xef,0xb0,0x4d },
-    { 0xcc,0xaa,0x4d,0x54 }, { 0xe4,0x96,0x04,0xdf },
-    { 0x9e,0xd1,0xb5,0xe3 }, { 0x4c,0x6a,0x88,0x1b },
-    { 0xc1,0x2c,0x1f,0xb8 }, { 0x46,0x65,0x51,0x7f },
-    { 0x9d,0x5e,0xea,0x04 }, { 0x01,0x8c,0x35,0x5d },
-    { 0xfa,0x87,0x74,0x73 }, { 0xfb,0x0b,0x41,0x2e },
-    { 0xb3,0x67,0x1d,0x5a }, { 0x92,0xdb,0xd2,0x52 },
-    { 0xe9,0x10,0x56,0x33 }, { 0x6d,0xd6,0x47,0x13 },
-    { 0x9a,0xd7,0x61,0x8c }, { 0x37,0xa1,0x0c,0x7a },
-    { 0x59,0xf8,0x14,0x8e }, { 0xeb,0x13,0x3c,0x89 },
-    { 0xce,0xa9,0x27,0xee }, { 0xb7,0x61,0xc9,0x35 },
-    { 0xe1,0x1c,0xe5,0xed }, { 0x7a,0x47,0xb1,0x3c },
-    { 0x9c,0xd2,0xdf,0x59 }, { 0x55,0xf2,0x73,0x3f },
-    { 0x18,0x14,0xce,0x79 }, { 0x73,0xc7,0x37,0xbf },
-    { 0x53,0xf7,0xcd,0xea }, { 0x5f,0xfd,0xaa,0x5b },
-    { 0xdf,0x3d,0x6f,0x14 }, { 0x78,0x44,0xdb,0x86 },
-    { 0xca,0xaf,0xf3,0x81 }, { 0xb9,0x68,0xc4,0x3e },
-    { 0x38,0x24,0x34,0x2c }, { 0xc2,0xa3,0x40,0x5f },
-    { 0x16,0x1d,0xc3,0x72 }, { 0xbc,0xe2,0x25,0x0c },
-    { 0x28,0x3c,0x49,0x8b }, { 0xff,0x0d,0x95,0x41 },
-    { 0x39,0xa8,0x01,0x71 }, { 0x08,0x0c,0xb3,0xde },
-    { 0xd8,0xb4,0xe4,0x9c }, { 0x64,0x56,0xc1,0x90 },
-    { 0x7b,0xcb,0x84,0x61 }, { 0xd5,0x32,0xb6,0x70 },
-    { 0x48,0x6c,0x5c,0x74 }, { 0xd0,0xb8,0x57,0x42 }
-  };
-
-static const unsigned char T6[256][4] =
-  {
-    { 0x50,0x51,0xf4,0xa7 }, { 0x53,0x7e,0x41,0x65 },
-    { 0xc3,0x1a,0x17,0xa4 }, { 0x96,0x3a,0x27,0x5e },
-    { 0xcb,0x3b,0xab,0x6b }, { 0xf1,0x1f,0x9d,0x45 },
-    { 0xab,0xac,0xfa,0x58 }, { 0x93,0x4b,0xe3,0x03 },
-    { 0x55,0x20,0x30,0xfa }, { 0xf6,0xad,0x76,0x6d },
-    { 0x91,0x88,0xcc,0x76 }, { 0x25,0xf5,0x02,0x4c },
-    { 0xfc,0x4f,0xe5,0xd7 }, { 0xd7,0xc5,0x2a,0xcb },
-    { 0x80,0x26,0x35,0x44 }, { 0x8f,0xb5,0x62,0xa3 },
-    { 0x49,0xde,0xb1,0x5a }, { 0x67,0x25,0xba,0x1b },
-    { 0x98,0x45,0xea,0x0e }, { 0xe1,0x5d,0xfe,0xc0 },
-    { 0x02,0xc3,0x2f,0x75 }, { 0x12,0x81,0x4c,0xf0 },
-    { 0xa3,0x8d,0x46,0x97 }, { 0xc6,0x6b,0xd3,0xf9 },
-    { 0xe7,0x03,0x8f,0x5f }, { 0x95,0x15,0x92,0x9c },
-    { 0xeb,0xbf,0x6d,0x7a }, { 0xda,0x95,0x52,0x59 },
-    { 0x2d,0xd4,0xbe,0x83 }, { 0xd3,0x58,0x74,0x21 },
-    { 0x29,0x49,0xe0,0x69 }, { 0x44,0x8e,0xc9,0xc8 },
-    { 0x6a,0x75,0xc2,0x89 }, { 0x78,0xf4,0x8e,0x79 },
-    { 0x6b,0x99,0x58,0x3e }, { 0xdd,0x27,0xb9,0x71 },
-    { 0xb6,0xbe,0xe1,0x4f }, { 0x17,0xf0,0x88,0xad },
-    { 0x66,0xc9,0x20,0xac }, { 0xb4,0x7d,0xce,0x3a },
-    { 0x18,0x63,0xdf,0x4a }, { 0x82,0xe5,0x1a,0x31 },
-    { 0x60,0x97,0x51,0x33 }, { 0x45,0x62,0x53,0x7f },
-    { 0xe0,0xb1,0x64,0x77 }, { 0x84,0xbb,0x6b,0xae },
-    { 0x1c,0xfe,0x81,0xa0 }, { 0x94,0xf9,0x08,0x2b },
-    { 0x58,0x70,0x48,0x68 }, { 0x19,0x8f,0x45,0xfd },
-    { 0x87,0x94,0xde,0x6c }, { 0xb7,0x52,0x7b,0xf8 },
-    { 0x23,0xab,0x73,0xd3 }, { 0xe2,0x72,0x4b,0x02 },
-    { 0x57,0xe3,0x1f,0x8f }, { 0x2a,0x66,0x55,0xab },
-    { 0x07,0xb2,0xeb,0x28 }, { 0x03,0x2f,0xb5,0xc2 },
-    { 0x9a,0x86,0xc5,0x7b }, { 0xa5,0xd3,0x37,0x08 },
-    { 0xf2,0x30,0x28,0x87 }, { 0xb2,0x23,0xbf,0xa5 },
-    { 0xba,0x02,0x03,0x6a }, { 0x5c,0xed,0x16,0x82 },
-    { 0x2b,0x8a,0xcf,0x1c }, { 0x92,0xa7,0x79,0xb4 },
-    { 0xf0,0xf3,0x07,0xf2 }, { 0xa1,0x4e,0x69,0xe2 },
-    { 0xcd,0x65,0xda,0xf4 }, { 0xd5,0x06,0x05,0xbe },
-    { 0x1f,0xd1,0x34,0x62 }, { 0x8a,0xc4,0xa6,0xfe },
-    { 0x9d,0x34,0x2e,0x53 }, { 0xa0,0xa2,0xf3,0x55 },
-    { 0x32,0x05,0x8a,0xe1 }, { 0x75,0xa4,0xf6,0xeb },
-    { 0x39,0x0b,0x83,0xec }, { 0xaa,0x40,0x60,0xef },
-    { 0x06,0x5e,0x71,0x9f }, { 0x51,0xbd,0x6e,0x10 },
-    { 0xf9,0x3e,0x21,0x8a }, { 0x3d,0x96,0xdd,0x06 },
-    { 0xae,0xdd,0x3e,0x05 }, { 0x46,0x4d,0xe6,0xbd },
-    { 0xb5,0x91,0x54,0x8d }, { 0x05,0x71,0xc4,0x5d },
-    { 0x6f,0x04,0x06,0xd4 }, { 0xff,0x60,0x50,0x15 },
-    { 0x24,0x19,0x98,0xfb }, { 0x97,0xd6,0xbd,0xe9 },
-    { 0xcc,0x89,0x40,0x43 }, { 0x77,0x67,0xd9,0x9e },
-    { 0xbd,0xb0,0xe8,0x42 }, { 0x88,0x07,0x89,0x8b },
-    { 0x38,0xe7,0x19,0x5b }, { 0xdb,0x79,0xc8,0xee },
-    { 0x47,0xa1,0x7c,0x0a }, { 0xe9,0x7c,0x42,0x0f },
-    { 0xc9,0xf8,0x84,0x1e }, { 0x00,0x00,0x00,0x00 },
-    { 0x83,0x09,0x80,0x86 }, { 0x48,0x32,0x2b,0xed },
-    { 0xac,0x1e,0x11,0x70 }, { 0x4e,0x6c,0x5a,0x72 },
-    { 0xfb,0xfd,0x0e,0xff }, { 0x56,0x0f,0x85,0x38 },
-    { 0x1e,0x3d,0xae,0xd5 }, { 0x27,0x36,0x2d,0x39 },
-    { 0x64,0x0a,0x0f,0xd9 }, { 0x21,0x68,0x5c,0xa6 },
-    { 0xd1,0x9b,0x5b,0x54 }, { 0x3a,0x24,0x36,0x2e },
-    { 0xb1,0x0c,0x0a,0x67 }, { 0x0f,0x93,0x57,0xe7 },
-    { 0xd2,0xb4,0xee,0x96 }, { 0x9e,0x1b,0x9b,0x91 },
-    { 0x4f,0x80,0xc0,0xc5 }, { 0xa2,0x61,0xdc,0x20 },
-    { 0x69,0x5a,0x77,0x4b }, { 0x16,0x1c,0x12,0x1a },
-    { 0x0a,0xe2,0x93,0xba }, { 0xe5,0xc0,0xa0,0x2a },
-    { 0x43,0x3c,0x22,0xe0 }, { 0x1d,0x12,0x1b,0x17 },
-    { 0x0b,0x0e,0x09,0x0d }, { 0xad,0xf2,0x8b,0xc7 },
-    { 0xb9,0x2d,0xb6,0xa8 }, { 0xc8,0x14,0x1e,0xa9 },
-    { 0x85,0x57,0xf1,0x19 }, { 0x4c,0xaf,0x75,0x07 },
-    { 0xbb,0xee,0x99,0xdd }, { 0xfd,0xa3,0x7f,0x60 },
-    { 0x9f,0xf7,0x01,0x26 }, { 0xbc,0x5c,0x72,0xf5 },
-    { 0xc5,0x44,0x66,0x3b }, { 0x34,0x5b,0xfb,0x7e },
-    { 0x76,0x8b,0x43,0x29 }, { 0xdc,0xcb,0x23,0xc6 },
-    { 0x68,0xb6,0xed,0xfc }, { 0x63,0xb8,0xe4,0xf1 },
-    { 0xca,0xd7,0x31,0xdc }, { 0x10,0x42,0x63,0x85 },
-    { 0x40,0x13,0x97,0x22 }, { 0x20,0x84,0xc6,0x11 },
-    { 0x7d,0x85,0x4a,0x24 }, { 0xf8,0xd2,0xbb,0x3d },
-    { 0x11,0xae,0xf9,0x32 }, { 0x6d,0xc7,0x29,0xa1 },
-    { 0x4b,0x1d,0x9e,0x2f }, { 0xf3,0xdc,0xb2,0x30 },
-    { 0xec,0x0d,0x86,0x52 }, { 0xd0,0x77,0xc1,0xe3 },
-    { 0x6c,0x2b,0xb3,0x16 }, { 0x99,0xa9,0x70,0xb9 },
-    { 0xfa,0x11,0x94,0x48 }, { 0x22,0x47,0xe9,0x64 },
-    { 0xc4,0xa8,0xfc,0x8c }, { 0x1a,0xa0,0xf0,0x3f },
-    { 0xd8,0x56,0x7d,0x2c }, { 0xef,0x22,0x33,0x90 },
-    { 0xc7,0x87,0x49,0x4e }, { 0xc1,0xd9,0x38,0xd1 },
-    { 0xfe,0x8c,0xca,0xa2 }, { 0x36,0x98,0xd4,0x0b },
-    { 0xcf,0xa6,0xf5,0x81 }, { 0x28,0xa5,0x7a,0xde },
-    { 0x26,0xda,0xb7,0x8e }, { 0xa4,0x3f,0xad,0xbf },
-    { 0xe4,0x2c,0x3a,0x9d }, { 0x0d,0x50,0x78,0x92 },
-    { 0x9b,0x6a,0x5f,0xcc }, { 0x62,0x54,0x7e,0x46 },
-    { 0xc2,0xf6,0x8d,0x13 }, { 0xe8,0x90,0xd8,0xb8 },
-    { 0x5e,0x2e,0x39,0xf7 }, { 0xf5,0x82,0xc3,0xaf },
-    { 0xbe,0x9f,0x5d,0x80 }, { 0x7c,0x69,0xd0,0x93 },
-    { 0xa9,0x6f,0xd5,0x2d }, { 0xb3,0xcf,0x25,0x12 },
-    { 0x3b,0xc8,0xac,0x99 }, { 0xa7,0x10,0x18,0x7d },
-    { 0x6e,0xe8,0x9c,0x63 }, { 0x7b,0xdb,0x3b,0xbb },
-    { 0x09,0xcd,0x26,0x78 }, { 0xf4,0x6e,0x59,0x18 },
-    { 0x01,0xec,0x9a,0xb7 }, { 0xa8,0x83,0x4f,0x9a },
-    { 0x65,0xe6,0x95,0x6e }, { 0x7e,0xaa,0xff,0xe6 },
-    { 0x08,0x21,0xbc,0xcf }, { 0xe6,0xef,0x15,0xe8 },
-    { 0xd9,0xba,0xe7,0x9b }, { 0xce,0x4a,0x6f,0x36 },
-    { 0xd4,0xea,0x9f,0x09 }, { 0xd6,0x29,0xb0,0x7c },
-    { 0xaf,0x31,0xa4,0xb2 }, { 0x31,0x2a,0x3f,0x23 },
-    { 0x30,0xc6,0xa5,0x94 }, { 0xc0,0x35,0xa2,0x66 },
-    { 0x37,0x74,0x4e,0xbc }, { 0xa6,0xfc,0x82,0xca },
-    { 0xb0,0xe0,0x90,0xd0 }, { 0x15,0x33,0xa7,0xd8 },
-    { 0x4a,0xf1,0x04,0x98 }, { 0xf7,0x41,0xec,0xda },
-    { 0x0e,0x7f,0xcd,0x50 }, { 0x2f,0x17,0x91,0xf6 },
-    { 0x8d,0x76,0x4d,0xd6 }, { 0x4d,0x43,0xef,0xb0 },
-    { 0x54,0xcc,0xaa,0x4d }, { 0xdf,0xe4,0x96,0x04 },
-    { 0xe3,0x9e,0xd1,0xb5 }, { 0x1b,0x4c,0x6a,0x88 },
-    { 0xb8,0xc1,0x2c,0x1f }, { 0x7f,0x46,0x65,0x51 },
-    { 0x04,0x9d,0x5e,0xea }, { 0x5d,0x01,0x8c,0x35 },
-    { 0x73,0xfa,0x87,0x74 }, { 0x2e,0xfb,0x0b,0x41 },
-    { 0x5a,0xb3,0x67,0x1d }, { 0x52,0x92,0xdb,0xd2 },
-    { 0x33,0xe9,0x10,0x56 }, { 0x13,0x6d,0xd6,0x47 },
-    { 0x8c,0x9a,0xd7,0x61 }, { 0x7a,0x37,0xa1,0x0c },
-    { 0x8e,0x59,0xf8,0x14 }, { 0x89,0xeb,0x13,0x3c },
-    { 0xee,0xce,0xa9,0x27 }, { 0x35,0xb7,0x61,0xc9 },
-    { 0xed,0xe1,0x1c,0xe5 }, { 0x3c,0x7a,0x47,0xb1 },
-    { 0x59,0x9c,0xd2,0xdf }, { 0x3f,0x55,0xf2,0x73 },
-    { 0x79,0x18,0x14,0xce }, { 0xbf,0x73,0xc7,0x37 },
-    { 0xea,0x53,0xf7,0xcd }, { 0x5b,0x5f,0xfd,0xaa },
-    { 0x14,0xdf,0x3d,0x6f }, { 0x86,0x78,0x44,0xdb },
-    { 0x81,0xca,0xaf,0xf3 }, { 0x3e,0xb9,0x68,0xc4 },
-    { 0x2c,0x38,0x24,0x34 }, { 0x5f,0xc2,0xa3,0x40 },
-    { 0x72,0x16,0x1d,0xc3 }, { 0x0c,0xbc,0xe2,0x25 },
-    { 0x8b,0x28,0x3c,0x49 }, { 0x41,0xff,0x0d,0x95 },
-    { 0x71,0x39,0xa8,0x01 }, { 0xde,0x08,0x0c,0xb3 },
-    { 0x9c,0xd8,0xb4,0xe4 }, { 0x90,0x64,0x56,0xc1 },
-    { 0x61,0x7b,0xcb,0x84 }, { 0x70,0xd5,0x32,0xb6 },
-    { 0x74,0x48,0x6c,0x5c }, { 0x42,0xd0,0xb8,0x57 }
-  };
-
-static const unsigned char T7[256][4] =
-  {
-    { 0xa7,0x50,0x51,0xf4 }, { 0x65,0x53,0x7e,0x41 },
-    { 0xa4,0xc3,0x1a,0x17 }, { 0x5e,0x96,0x3a,0x27 },
-    { 0x6b,0xcb,0x3b,0xab }, { 0x45,0xf1,0x1f,0x9d },
-    { 0x58,0xab,0xac,0xfa }, { 0x03,0x93,0x4b,0xe3 },
-    { 0xfa,0x55,0x20,0x30 }, { 0x6d,0xf6,0xad,0x76 },
-    { 0x76,0x91,0x88,0xcc }, { 0x4c,0x25,0xf5,0x02 },
-    { 0xd7,0xfc,0x4f,0xe5 }, { 0xcb,0xd7,0xc5,0x2a },
-    { 0x44,0x80,0x26,0x35 }, { 0xa3,0x8f,0xb5,0x62 },
-    { 0x5a,0x49,0xde,0xb1 }, { 0x1b,0x67,0x25,0xba },
-    { 0x0e,0x98,0x45,0xea }, { 0xc0,0xe1,0x5d,0xfe },
-    { 0x75,0x02,0xc3,0x2f }, { 0xf0,0x12,0x81,0x4c },
-    { 0x97,0xa3,0x8d,0x46 }, { 0xf9,0xc6,0x6b,0xd3 },
-    { 0x5f,0xe7,0x03,0x8f }, { 0x9c,0x95,0x15,0x92 },
-    { 0x7a,0xeb,0xbf,0x6d }, { 0x59,0xda,0x95,0x52 },
-    { 0x83,0x2d,0xd4,0xbe }, { 0x21,0xd3,0x58,0x74 },
-    { 0x69,0x29,0x49,0xe0 }, { 0xc8,0x44,0x8e,0xc9 },
-    { 0x89,0x6a,0x75,0xc2 }, { 0x79,0x78,0xf4,0x8e },
-    { 0x3e,0x6b,0x99,0x58 }, { 0x71,0xdd,0x27,0xb9 },
-    { 0x4f,0xb6,0xbe,0xe1 }, { 0xad,0x17,0xf0,0x88 },
-    { 0xac,0x66,0xc9,0x20 }, { 0x3a,0xb4,0x7d,0xce },
-    { 0x4a,0x18,0x63,0xdf }, { 0x31,0x82,0xe5,0x1a },
-    { 0x33,0x60,0x97,0x51 }, { 0x7f,0x45,0x62,0x53 },
-    { 0x77,0xe0,0xb1,0x64 }, { 0xae,0x84,0xbb,0x6b },
-    { 0xa0,0x1c,0xfe,0x81 }, { 0x2b,0x94,0xf9,0x08 },
-    { 0x68,0x58,0x70,0x48 }, { 0xfd,0x19,0x8f,0x45 },
-    { 0x6c,0x87,0x94,0xde }, { 0xf8,0xb7,0x52,0x7b },
-    { 0xd3,0x23,0xab,0x73 }, { 0x02,0xe2,0x72,0x4b },
-    { 0x8f,0x57,0xe3,0x1f }, { 0xab,0x2a,0x66,0x55 },
-    { 0x28,0x07,0xb2,0xeb }, { 0xc2,0x03,0x2f,0xb5 },
-    { 0x7b,0x9a,0x86,0xc5 }, { 0x08,0xa5,0xd3,0x37 },
-    { 0x87,0xf2,0x30,0x28 }, { 0xa5,0xb2,0x23,0xbf },
-    { 0x6a,0xba,0x02,0x03 }, { 0x82,0x5c,0xed,0x16 },
-    { 0x1c,0x2b,0x8a,0xcf }, { 0xb4,0x92,0xa7,0x79 },
-    { 0xf2,0xf0,0xf3,0x07 }, { 0xe2,0xa1,0x4e,0x69 },
-    { 0xf4,0xcd,0x65,0xda }, { 0xbe,0xd5,0x06,0x05 },
-    { 0x62,0x1f,0xd1,0x34 }, { 0xfe,0x8a,0xc4,0xa6 },
-    { 0x53,0x9d,0x34,0x2e }, { 0x55,0xa0,0xa2,0xf3 },
-    { 0xe1,0x32,0x05,0x8a }, { 0xeb,0x75,0xa4,0xf6 },
-    { 0xec,0x39,0x0b,0x83 }, { 0xef,0xaa,0x40,0x60 },
-    { 0x9f,0x06,0x5e,0x71 }, { 0x10,0x51,0xbd,0x6e },
-    { 0x8a,0xf9,0x3e,0x21 }, { 0x06,0x3d,0x96,0xdd },
-    { 0x05,0xae,0xdd,0x3e }, { 0xbd,0x46,0x4d,0xe6 },
-    { 0x8d,0xb5,0x91,0x54 }, { 0x5d,0x05,0x71,0xc4 },
-    { 0xd4,0x6f,0x04,0x06 }, { 0x15,0xff,0x60,0x50 },
-    { 0xfb,0x24,0x19,0x98 }, { 0xe9,0x97,0xd6,0xbd },
-    { 0x43,0xcc,0x89,0x40 }, { 0x9e,0x77,0x67,0xd9 },
-    { 0x42,0xbd,0xb0,0xe8 }, { 0x8b,0x88,0x07,0x89 },
-    { 0x5b,0x38,0xe7,0x19 }, { 0xee,0xdb,0x79,0xc8 },
-    { 0x0a,0x47,0xa1,0x7c }, { 0x0f,0xe9,0x7c,0x42 },
-    { 0x1e,0xc9,0xf8,0x84 }, { 0x00,0x00,0x00,0x00 },
-    { 0x86,0x83,0x09,0x80 }, { 0xed,0x48,0x32,0x2b },
-    { 0x70,0xac,0x1e,0x11 }, { 0x72,0x4e,0x6c,0x5a },
-    { 0xff,0xfb,0xfd,0x0e }, { 0x38,0x56,0x0f,0x85 },
-    { 0xd5,0x1e,0x3d,0xae }, { 0x39,0x27,0x36,0x2d },
-    { 0xd9,0x64,0x0a,0x0f }, { 0xa6,0x21,0x68,0x5c },
-    { 0x54,0xd1,0x9b,0x5b }, { 0x2e,0x3a,0x24,0x36 },
-    { 0x67,0xb1,0x0c,0x0a }, { 0xe7,0x0f,0x93,0x57 },
-    { 0x96,0xd2,0xb4,0xee }, { 0x91,0x9e,0x1b,0x9b },
-    { 0xc5,0x4f,0x80,0xc0 }, { 0x20,0xa2,0x61,0xdc },
-    { 0x4b,0x69,0x5a,0x77 }, { 0x1a,0x16,0x1c,0x12 },
-    { 0xba,0x0a,0xe2,0x93 }, { 0x2a,0xe5,0xc0,0xa0 },
-    { 0xe0,0x43,0x3c,0x22 }, { 0x17,0x1d,0x12,0x1b },
-    { 0x0d,0x0b,0x0e,0x09 }, { 0xc7,0xad,0xf2,0x8b },
-    { 0xa8,0xb9,0x2d,0xb6 }, { 0xa9,0xc8,0x14,0x1e },
-    { 0x19,0x85,0x57,0xf1 }, { 0x07,0x4c,0xaf,0x75 },
-    { 0xdd,0xbb,0xee,0x99 }, { 0x60,0xfd,0xa3,0x7f },
-    { 0x26,0x9f,0xf7,0x01 }, { 0xf5,0xbc,0x5c,0x72 },
-    { 0x3b,0xc5,0x44,0x66 }, { 0x7e,0x34,0x5b,0xfb },
-    { 0x29,0x76,0x8b,0x43 }, { 0xc6,0xdc,0xcb,0x23 },
-    { 0xfc,0x68,0xb6,0xed }, { 0xf1,0x63,0xb8,0xe4 },
-    { 0xdc,0xca,0xd7,0x31 }, { 0x85,0x10,0x42,0x63 },
-    { 0x22,0x40,0x13,0x97 }, { 0x11,0x20,0x84,0xc6 },
-    { 0x24,0x7d,0x85,0x4a }, { 0x3d,0xf8,0xd2,0xbb },
-    { 0x32,0x11,0xae,0xf9 }, { 0xa1,0x6d,0xc7,0x29 },
-    { 0x2f,0x4b,0x1d,0x9e }, { 0x30,0xf3,0xdc,0xb2 },
-    { 0x52,0xec,0x0d,0x86 }, { 0xe3,0xd0,0x77,0xc1 },
-    { 0x16,0x6c,0x2b,0xb3 }, { 0xb9,0x99,0xa9,0x70 },
-    { 0x48,0xfa,0x11,0x94 }, { 0x64,0x22,0x47,0xe9 },
-    { 0x8c,0xc4,0xa8,0xfc }, { 0x3f,0x1a,0xa0,0xf0 },
-    { 0x2c,0xd8,0x56,0x7d }, { 0x90,0xef,0x22,0x33 },
-    { 0x4e,0xc7,0x87,0x49 }, { 0xd1,0xc1,0xd9,0x38 },
-    { 0xa2,0xfe,0x8c,0xca }, { 0x0b,0x36,0x98,0xd4 },
-    { 0x81,0xcf,0xa6,0xf5 }, { 0xde,0x28,0xa5,0x7a },
-    { 0x8e,0x26,0xda,0xb7 }, { 0xbf,0xa4,0x3f,0xad },
-    { 0x9d,0xe4,0x2c,0x3a }, { 0x92,0x0d,0x50,0x78 },
-    { 0xcc,0x9b,0x6a,0x5f }, { 0x46,0x62,0x54,0x7e },
-    { 0x13,0xc2,0xf6,0x8d }, { 0xb8,0xe8,0x90,0xd8 },
-    { 0xf7,0x5e,0x2e,0x39 }, { 0xaf,0xf5,0x82,0xc3 },
-    { 0x80,0xbe,0x9f,0x5d }, { 0x93,0x7c,0x69,0xd0 },
-    { 0x2d,0xa9,0x6f,0xd5 }, { 0x12,0xb3,0xcf,0x25 },
-    { 0x99,0x3b,0xc8,0xac }, { 0x7d,0xa7,0x10,0x18 },
-    { 0x63,0x6e,0xe8,0x9c }, { 0xbb,0x7b,0xdb,0x3b },
-    { 0x78,0x09,0xcd,0x26 }, { 0x18,0xf4,0x6e,0x59 },
-    { 0xb7,0x01,0xec,0x9a }, { 0x9a,0xa8,0x83,0x4f },
-    { 0x6e,0x65,0xe6,0x95 }, { 0xe6,0x7e,0xaa,0xff },
-    { 0xcf,0x08,0x21,0xbc }, { 0xe8,0xe6,0xef,0x15 },
-    { 0x9b,0xd9,0xba,0xe7 }, { 0x36,0xce,0x4a,0x6f },
-    { 0x09,0xd4,0xea,0x9f }, { 0x7c,0xd6,0x29,0xb0 },
-    { 0xb2,0xaf,0x31,0xa4 }, { 0x23,0x31,0x2a,0x3f },
-    { 0x94,0x30,0xc6,0xa5 }, { 0x66,0xc0,0x35,0xa2 },
-    { 0xbc,0x37,0x74,0x4e }, { 0xca,0xa6,0xfc,0x82 },
-    { 0xd0,0xb0,0xe0,0x90 }, { 0xd8,0x15,0x33,0xa7 },
-    { 0x98,0x4a,0xf1,0x04 }, { 0xda,0xf7,0x41,0xec },
-    { 0x50,0x0e,0x7f,0xcd }, { 0xf6,0x2f,0x17,0x91 },
-    { 0xd6,0x8d,0x76,0x4d }, { 0xb0,0x4d,0x43,0xef },
-    { 0x4d,0x54,0xcc,0xaa }, { 0x04,0xdf,0xe4,0x96 },
-    { 0xb5,0xe3,0x9e,0xd1 }, { 0x88,0x1b,0x4c,0x6a },
-    { 0x1f,0xb8,0xc1,0x2c }, { 0x51,0x7f,0x46,0x65 },
-    { 0xea,0x04,0x9d,0x5e }, { 0x35,0x5d,0x01,0x8c },
-    { 0x74,0x73,0xfa,0x87 }, { 0x41,0x2e,0xfb,0x0b },
-    { 0x1d,0x5a,0xb3,0x67 }, { 0xd2,0x52,0x92,0xdb },
-    { 0x56,0x33,0xe9,0x10 }, { 0x47,0x13,0x6d,0xd6 },
-    { 0x61,0x8c,0x9a,0xd7 }, { 0x0c,0x7a,0x37,0xa1 },
-    { 0x14,0x8e,0x59,0xf8 }, { 0x3c,0x89,0xeb,0x13 },
-    { 0x27,0xee,0xce,0xa9 }, { 0xc9,0x35,0xb7,0x61 },
-    { 0xe5,0xed,0xe1,0x1c }, { 0xb1,0x3c,0x7a,0x47 },
-    { 0xdf,0x59,0x9c,0xd2 }, { 0x73,0x3f,0x55,0xf2 },
-    { 0xce,0x79,0x18,0x14 }, { 0x37,0xbf,0x73,0xc7 },
-    { 0xcd,0xea,0x53,0xf7 }, { 0xaa,0x5b,0x5f,0xfd },
-    { 0x6f,0x14,0xdf,0x3d }, { 0xdb,0x86,0x78,0x44 },
-    { 0xf3,0x81,0xca,0xaf }, { 0xc4,0x3e,0xb9,0x68 },
-    { 0x34,0x2c,0x38,0x24 }, { 0x40,0x5f,0xc2,0xa3 },
-    { 0xc3,0x72,0x16,0x1d }, { 0x25,0x0c,0xbc,0xe2 },
-    { 0x49,0x8b,0x28,0x3c }, { 0x95,0x41,0xff,0x0d },
-    { 0x01,0x71,0x39,0xa8 }, { 0xb3,0xde,0x08,0x0c },
-    { 0xe4,0x9c,0xd8,0xb4 }, { 0xc1,0x90,0x64,0x56 },
-    { 0x84,0x61,0x7b,0xcb }, { 0xb6,0x70,0xd5,0x32 },
-    { 0x5c,0x74,0x48,0x6c }, { 0x57,0x42,0xd0,0xb8 }
-  };
-
-static const unsigned char T8[256][4] =
-  {
-    { 0xf4,0xa7,0x50,0x51 }, { 0x41,0x65,0x53,0x7e },
-    { 0x17,0xa4,0xc3,0x1a }, { 0x27,0x5e,0x96,0x3a },
-    { 0xab,0x6b,0xcb,0x3b }, { 0x9d,0x45,0xf1,0x1f },
-    { 0xfa,0x58,0xab,0xac }, { 0xe3,0x03,0x93,0x4b },
-    { 0x30,0xfa,0x55,0x20 }, { 0x76,0x6d,0xf6,0xad },
-    { 0xcc,0x76,0x91,0x88 }, { 0x02,0x4c,0x25,0xf5 },
-    { 0xe5,0xd7,0xfc,0x4f }, { 0x2a,0xcb,0xd7,0xc5 },
-    { 0x35,0x44,0x80,0x26 }, { 0x62,0xa3,0x8f,0xb5 },
-    { 0xb1,0x5a,0x49,0xde }, { 0xba,0x1b,0x67,0x25 },
-    { 0xea,0x0e,0x98,0x45 }, { 0xfe,0xc0,0xe1,0x5d },
-    { 0x2f,0x75,0x02,0xc3 }, { 0x4c,0xf0,0x12,0x81 },
-    { 0x46,0x97,0xa3,0x8d }, { 0xd3,0xf9,0xc6,0x6b },
-    { 0x8f,0x5f,0xe7,0x03 }, { 0x92,0x9c,0x95,0x15 },
-    { 0x6d,0x7a,0xeb,0xbf }, { 0x52,0x59,0xda,0x95 },
-    { 0xbe,0x83,0x2d,0xd4 }, { 0x74,0x21,0xd3,0x58 },
-    { 0xe0,0x69,0x29,0x49 }, { 0xc9,0xc8,0x44,0x8e },
-    { 0xc2,0x89,0x6a,0x75 }, { 0x8e,0x79,0x78,0xf4 },
-    { 0x58,0x3e,0x6b,0x99 }, { 0xb9,0x71,0xdd,0x27 },
-    { 0xe1,0x4f,0xb6,0xbe }, { 0x88,0xad,0x17,0xf0 },
-    { 0x20,0xac,0x66,0xc9 }, { 0xce,0x3a,0xb4,0x7d },
-    { 0xdf,0x4a,0x18,0x63 }, { 0x1a,0x31,0x82,0xe5 },
-    { 0x51,0x33,0x60,0x97 }, { 0x53,0x7f,0x45,0x62 },
-    { 0x64,0x77,0xe0,0xb1 }, { 0x6b,0xae,0x84,0xbb },
-    { 0x81,0xa0,0x1c,0xfe }, { 0x08,0x2b,0x94,0xf9 },
-    { 0x48,0x68,0x58,0x70 }, { 0x45,0xfd,0x19,0x8f },
-    { 0xde,0x6c,0x87,0x94 }, { 0x7b,0xf8,0xb7,0x52 },
-    { 0x73,0xd3,0x23,0xab }, { 0x4b,0x02,0xe2,0x72 },
-    { 0x1f,0x8f,0x57,0xe3 }, { 0x55,0xab,0x2a,0x66 },
-    { 0xeb,0x28,0x07,0xb2 }, { 0xb5,0xc2,0x03,0x2f },
-    { 0xc5,0x7b,0x9a,0x86 }, { 0x37,0x08,0xa5,0xd3 },
-    { 0x28,0x87,0xf2,0x30 }, { 0xbf,0xa5,0xb2,0x23 },
-    { 0x03,0x6a,0xba,0x02 }, { 0x16,0x82,0x5c,0xed },
-    { 0xcf,0x1c,0x2b,0x8a }, { 0x79,0xb4,0x92,0xa7 },
-    { 0x07,0xf2,0xf0,0xf3 }, { 0x69,0xe2,0xa1,0x4e },
-    { 0xda,0xf4,0xcd,0x65 }, { 0x05,0xbe,0xd5,0x06 },
-    { 0x34,0x62,0x1f,0xd1 }, { 0xa6,0xfe,0x8a,0xc4 },
-    { 0x2e,0x53,0x9d,0x34 }, { 0xf3,0x55,0xa0,0xa2 },
-    { 0x8a,0xe1,0x32,0x05 }, { 0xf6,0xeb,0x75,0xa4 },
-    { 0x83,0xec,0x39,0x0b }, { 0x60,0xef,0xaa,0x40 },
-    { 0x71,0x9f,0x06,0x5e }, { 0x6e,0x10,0x51,0xbd },
-    { 0x21,0x8a,0xf9,0x3e }, { 0xdd,0x06,0x3d,0x96 },
-    { 0x3e,0x05,0xae,0xdd }, { 0xe6,0xbd,0x46,0x4d },
-    { 0x54,0x8d,0xb5,0x91 }, { 0xc4,0x5d,0x05,0x71 },
-    { 0x06,0xd4,0x6f,0x04 }, { 0x50,0x15,0xff,0x60 },
-    { 0x98,0xfb,0x24,0x19 }, { 0xbd,0xe9,0x97,0xd6 },
-    { 0x40,0x43,0xcc,0x89 }, { 0xd9,0x9e,0x77,0x67 },
-    { 0xe8,0x42,0xbd,0xb0 }, { 0x89,0x8b,0x88,0x07 },
-    { 0x19,0x5b,0x38,0xe7 }, { 0xc8,0xee,0xdb,0x79 },
-    { 0x7c,0x0a,0x47,0xa1 }, { 0x42,0x0f,0xe9,0x7c },
-    { 0x84,0x1e,0xc9,0xf8 }, { 0x00,0x00,0x00,0x00 },
-    { 0x80,0x86,0x83,0x09 }, { 0x2b,0xed,0x48,0x32 },
-    { 0x11,0x70,0xac,0x1e }, { 0x5a,0x72,0x4e,0x6c },
-    { 0x0e,0xff,0xfb,0xfd }, { 0x85,0x38,0x56,0x0f },
-    { 0xae,0xd5,0x1e,0x3d }, { 0x2d,0x39,0x27,0x36 },
-    { 0x0f,0xd9,0x64,0x0a }, { 0x5c,0xa6,0x21,0x68 },
-    { 0x5b,0x54,0xd1,0x9b }, { 0x36,0x2e,0x3a,0x24 },
-    { 0x0a,0x67,0xb1,0x0c }, { 0x57,0xe7,0x0f,0x93 },
-    { 0xee,0x96,0xd2,0xb4 }, { 0x9b,0x91,0x9e,0x1b },
-    { 0xc0,0xc5,0x4f,0x80 }, { 0xdc,0x20,0xa2,0x61 },
-    { 0x77,0x4b,0x69,0x5a }, { 0x12,0x1a,0x16,0x1c },
-    { 0x93,0xba,0x0a,0xe2 }, { 0xa0,0x2a,0xe5,0xc0 },
-    { 0x22,0xe0,0x43,0x3c }, { 0x1b,0x17,0x1d,0x12 },
-    { 0x09,0x0d,0x0b,0x0e }, { 0x8b,0xc7,0xad,0xf2 },
-    { 0xb6,0xa8,0xb9,0x2d }, { 0x1e,0xa9,0xc8,0x14 },
-    { 0xf1,0x19,0x85,0x57 }, { 0x75,0x07,0x4c,0xaf },
-    { 0x99,0xdd,0xbb,0xee }, { 0x7f,0x60,0xfd,0xa3 },
-    { 0x01,0x26,0x9f,0xf7 }, { 0x72,0xf5,0xbc,0x5c },
-    { 0x66,0x3b,0xc5,0x44 }, { 0xfb,0x7e,0x34,0x5b },
-    { 0x43,0x29,0x76,0x8b }, { 0x23,0xc6,0xdc,0xcb },
-    { 0xed,0xfc,0x68,0xb6 }, { 0xe4,0xf1,0x63,0xb8 },
-    { 0x31,0xdc,0xca,0xd7 }, { 0x63,0x85,0x10,0x42 },
-    { 0x97,0x22,0x40,0x13 }, { 0xc6,0x11,0x20,0x84 },
-    { 0x4a,0x24,0x7d,0x85 }, { 0xbb,0x3d,0xf8,0xd2 },
-    { 0xf9,0x32,0x11,0xae }, { 0x29,0xa1,0x6d,0xc7 },
-    { 0x9e,0x2f,0x4b,0x1d }, { 0xb2,0x30,0xf3,0xdc },
-    { 0x86,0x52,0xec,0x0d }, { 0xc1,0xe3,0xd0,0x77 },
-    { 0xb3,0x16,0x6c,0x2b }, { 0x70,0xb9,0x99,0xa9 },
-    { 0x94,0x48,0xfa,0x11 }, { 0xe9,0x64,0x22,0x47 },
-    { 0xfc,0x8c,0xc4,0xa8 }, { 0xf0,0x3f,0x1a,0xa0 },
-    { 0x7d,0x2c,0xd8,0x56 }, { 0x33,0x90,0xef,0x22 },
-    { 0x49,0x4e,0xc7,0x87 }, { 0x38,0xd1,0xc1,0xd9 },
-    { 0xca,0xa2,0xfe,0x8c }, { 0xd4,0x0b,0x36,0x98 },
-    { 0xf5,0x81,0xcf,0xa6 }, { 0x7a,0xde,0x28,0xa5 },
-    { 0xb7,0x8e,0x26,0xda }, { 0xad,0xbf,0xa4,0x3f },
-    { 0x3a,0x9d,0xe4,0x2c }, { 0x78,0x92,0x0d,0x50 },
-    { 0x5f,0xcc,0x9b,0x6a }, { 0x7e,0x46,0x62,0x54 },
-    { 0x8d,0x13,0xc2,0xf6 }, { 0xd8,0xb8,0xe8,0x90 },
-    { 0x39,0xf7,0x5e,0x2e }, { 0xc3,0xaf,0xf5,0x82 },
-    { 0x5d,0x80,0xbe,0x9f }, { 0xd0,0x93,0x7c,0x69 },
-    { 0xd5,0x2d,0xa9,0x6f }, { 0x25,0x12,0xb3,0xcf },
-    { 0xac,0x99,0x3b,0xc8 }, { 0x18,0x7d,0xa7,0x10 },
-    { 0x9c,0x63,0x6e,0xe8 }, { 0x3b,0xbb,0x7b,0xdb },
-    { 0x26,0x78,0x09,0xcd }, { 0x59,0x18,0xf4,0x6e },
-    { 0x9a,0xb7,0x01,0xec }, { 0x4f,0x9a,0xa8,0x83 },
-    { 0x95,0x6e,0x65,0xe6 }, { 0xff,0xe6,0x7e,0xaa },
-    { 0xbc,0xcf,0x08,0x21 }, { 0x15,0xe8,0xe6,0xef },
-    { 0xe7,0x9b,0xd9,0xba }, { 0x6f,0x36,0xce,0x4a },
-    { 0x9f,0x09,0xd4,0xea }, { 0xb0,0x7c,0xd6,0x29 },
-    { 0xa4,0xb2,0xaf,0x31 }, { 0x3f,0x23,0x31,0x2a },
-    { 0xa5,0x94,0x30,0xc6 }, { 0xa2,0x66,0xc0,0x35 },
-    { 0x4e,0xbc,0x37,0x74 }, { 0x82,0xca,0xa6,0xfc },
-    { 0x90,0xd0,0xb0,0xe0 }, { 0xa7,0xd8,0x15,0x33 },
-    { 0x04,0x98,0x4a,0xf1 }, { 0xec,0xda,0xf7,0x41 },
-    { 0xcd,0x50,0x0e,0x7f }, { 0x91,0xf6,0x2f,0x17 },
-    { 0x4d,0xd6,0x8d,0x76 }, { 0xef,0xb0,0x4d,0x43 },
-    { 0xaa,0x4d,0x54,0xcc }, { 0x96,0x04,0xdf,0xe4 },
-    { 0xd1,0xb5,0xe3,0x9e }, { 0x6a,0x88,0x1b,0x4c },
-    { 0x2c,0x1f,0xb8,0xc1 }, { 0x65,0x51,0x7f,0x46 },
-    { 0x5e,0xea,0x04,0x9d }, { 0x8c,0x35,0x5d,0x01 },
-    { 0x87,0x74,0x73,0xfa }, { 0x0b,0x41,0x2e,0xfb },
-    { 0x67,0x1d,0x5a,0xb3 }, { 0xdb,0xd2,0x52,0x92 },
-    { 0x10,0x56,0x33,0xe9 }, { 0xd6,0x47,0x13,0x6d },
-    { 0xd7,0x61,0x8c,0x9a }, { 0xa1,0x0c,0x7a,0x37 },
-    { 0xf8,0x14,0x8e,0x59 }, { 0x13,0x3c,0x89,0xeb },
-    { 0xa9,0x27,0xee,0xce }, { 0x61,0xc9,0x35,0xb7 },
-    { 0x1c,0xe5,0xed,0xe1 }, { 0x47,0xb1,0x3c,0x7a },
-    { 0xd2,0xdf,0x59,0x9c }, { 0xf2,0x73,0x3f,0x55 },
-    { 0x14,0xce,0x79,0x18 }, { 0xc7,0x37,0xbf,0x73 },
-    { 0xf7,0xcd,0xea,0x53 }, { 0xfd,0xaa,0x5b,0x5f },
-    { 0x3d,0x6f,0x14,0xdf }, { 0x44,0xdb,0x86,0x78 },
-    { 0xaf,0xf3,0x81,0xca }, { 0x68,0xc4,0x3e,0xb9 },
-    { 0x24,0x34,0x2c,0x38 }, { 0xa3,0x40,0x5f,0xc2 },
-    { 0x1d,0xc3,0x72,0x16 }, { 0xe2,0x25,0x0c,0xbc },
-    { 0x3c,0x49,0x8b,0x28 }, { 0x0d,0x95,0x41,0xff },
-    { 0xa8,0x01,0x71,0x39 }, { 0x0c,0xb3,0xde,0x08 },
-    { 0xb4,0xe4,0x9c,0xd8 }, { 0x56,0xc1,0x90,0x64 },
-    { 0xcb,0x84,0x61,0x7b }, { 0x32,0xb6,0x70,0xd5 },
-    { 0x6c,0x5c,0x74,0x48 }, { 0xb8,0x57,0x42,0xd0 }
-  };
-
-static const unsigned char S5[256] =
-  {
-    0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
-    0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
-    0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
-    0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
-    0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
-    0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
-    0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
-    0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
-    0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
-    0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
-    0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
-    0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
-    0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
-    0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
-    0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
-    0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
-    0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
-    0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
-    0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
-    0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
-    0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
-    0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
-    0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
-    0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
-    0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
-    0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
-    0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
-    0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
-    0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
-    0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
-    0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
-    0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
-  };
-
-static const unsigned char U1[256][4] =
-  {
-    { 0x00,0x00,0x00,0x00 }, { 0x0e,0x09,0x0d,0x0b },
-    { 0x1c,0x12,0x1a,0x16 }, { 0x12,0x1b,0x17,0x1d },
-    { 0x38,0x24,0x34,0x2c }, { 0x36,0x2d,0x39,0x27 },
-    { 0x24,0x36,0x2e,0x3a }, { 0x2a,0x3f,0x23,0x31 },
-    { 0x70,0x48,0x68,0x58 }, { 0x7e,0x41,0x65,0x53 },
-    { 0x6c,0x5a,0x72,0x4e }, { 0x62,0x53,0x7f,0x45 },
-    { 0x48,0x6c,0x5c,0x74 }, { 0x46,0x65,0x51,0x7f },
-    { 0x54,0x7e,0x46,0x62 }, { 0x5a,0x77,0x4b,0x69 },
-    { 0xe0,0x90,0xd0,0xb0 }, { 0xee,0x99,0xdd,0xbb },
-    { 0xfc,0x82,0xca,0xa6 }, { 0xf2,0x8b,0xc7,0xad },
-    { 0xd8,0xb4,0xe4,0x9c }, { 0xd6,0xbd,0xe9,0x97 },
-    { 0xc4,0xa6,0xfe,0x8a }, { 0xca,0xaf,0xf3,0x81 },
-    { 0x90,0xd8,0xb8,0xe8 }, { 0x9e,0xd1,0xb5,0xe3 },
-    { 0x8c,0xca,0xa2,0xfe }, { 0x82,0xc3,0xaf,0xf5 },
-    { 0xa8,0xfc,0x8c,0xc4 }, { 0xa6,0xf5,0x81,0xcf },
-    { 0xb4,0xee,0x96,0xd2 }, { 0xba,0xe7,0x9b,0xd9 },
-    { 0xdb,0x3b,0xbb,0x7b }, { 0xd5,0x32,0xb6,0x70 },
-    { 0xc7,0x29,0xa1,0x6d }, { 0xc9,0x20,0xac,0x66 },
-    { 0xe3,0x1f,0x8f,0x57 }, { 0xed,0x16,0x82,0x5c },
-    { 0xff,0x0d,0x95,0x41 }, { 0xf1,0x04,0x98,0x4a },
-    { 0xab,0x73,0xd3,0x23 }, { 0xa5,0x7a,0xde,0x28 },
-    { 0xb7,0x61,0xc9,0x35 }, { 0xb9,0x68,0xc4,0x3e },
-    { 0x93,0x57,0xe7,0x0f }, { 0x9d,0x5e,0xea,0x04 },
-    { 0x8f,0x45,0xfd,0x19 }, { 0x81,0x4c,0xf0,0x12 },
-    { 0x3b,0xab,0x6b,0xcb }, { 0x35,0xa2,0x66,0xc0 },
-    { 0x27,0xb9,0x71,0xdd }, { 0x29,0xb0,0x7c,0xd6 },
-    { 0x03,0x8f,0x5f,0xe7 }, { 0x0d,0x86,0x52,0xec },
-    { 0x1f,0x9d,0x45,0xf1 }, { 0x11,0x94,0x48,0xfa },
-    { 0x4b,0xe3,0x03,0x93 }, { 0x45,0xea,0x0e,0x98 },
-    { 0x57,0xf1,0x19,0x85 }, { 0x59,0xf8,0x14,0x8e },
-    { 0x73,0xc7,0x37,0xbf }, { 0x7d,0xce,0x3a,0xb4 },
-    { 0x6f,0xd5,0x2d,0xa9 }, { 0x61,0xdc,0x20,0xa2 },
-    { 0xad,0x76,0x6d,0xf6 }, { 0xa3,0x7f,0x60,0xfd },
-    { 0xb1,0x64,0x77,0xe0 }, { 0xbf,0x6d,0x7a,0xeb },
-    { 0x95,0x52,0x59,0xda }, { 0x9b,0x5b,0x54,0xd1 },
-    { 0x89,0x40,0x43,0xcc }, { 0x87,0x49,0x4e,0xc7 },
-    { 0xdd,0x3e,0x05,0xae }, { 0xd3,0x37,0x08,0xa5 },
-    { 0xc1,0x2c,0x1f,0xb8 }, { 0xcf,0x25,0x12,0xb3 },
-    { 0xe5,0x1a,0x31,0x82 }, { 0xeb,0x13,0x3c,0x89 },
-    { 0xf9,0x08,0x2b,0x94 }, { 0xf7,0x01,0x26,0x9f },
-    { 0x4d,0xe6,0xbd,0x46 }, { 0x43,0xef,0xb0,0x4d },
-    { 0x51,0xf4,0xa7,0x50 }, { 0x5f,0xfd,0xaa,0x5b },
-    { 0x75,0xc2,0x89,0x6a }, { 0x7b,0xcb,0x84,0x61 },
-    { 0x69,0xd0,0x93,0x7c }, { 0x67,0xd9,0x9e,0x77 },
-    { 0x3d,0xae,0xd5,0x1e }, { 0x33,0xa7,0xd8,0x15 },
-    { 0x21,0xbc,0xcf,0x08 }, { 0x2f,0xb5,0xc2,0x03 },
-    { 0x05,0x8a,0xe1,0x32 }, { 0x0b,0x83,0xec,0x39 },
-    { 0x19,0x98,0xfb,0x24 }, { 0x17,0x91,0xf6,0x2f },
-    { 0x76,0x4d,0xd6,0x8d }, { 0x78,0x44,0xdb,0x86 },
-    { 0x6a,0x5f,0xcc,0x9b }, { 0x64,0x56,0xc1,0x90 },
-    { 0x4e,0x69,0xe2,0xa1 }, { 0x40,0x60,0xef,0xaa },
-    { 0x52,0x7b,0xf8,0xb7 }, { 0x5c,0x72,0xf5,0xbc },
-    { 0x06,0x05,0xbe,0xd5 }, { 0x08,0x0c,0xb3,0xde },
-    { 0x1a,0x17,0xa4,0xc3 }, { 0x14,0x1e,0xa9,0xc8 },
-    { 0x3e,0x21,0x8a,0xf9 }, { 0x30,0x28,0x87,0xf2 },
-    { 0x22,0x33,0x90,0xef }, { 0x2c,0x3a,0x9d,0xe4 },
-    { 0x96,0xdd,0x06,0x3d }, { 0x98,0xd4,0x0b,0x36 },
-    { 0x8a,0xcf,0x1c,0x2b }, { 0x84,0xc6,0x11,0x20 },
-    { 0xae,0xf9,0x32,0x11 }, { 0xa0,0xf0,0x3f,0x1a },
-    { 0xb2,0xeb,0x28,0x07 }, { 0xbc,0xe2,0x25,0x0c },
-    { 0xe6,0x95,0x6e,0x65 }, { 0xe8,0x9c,0x63,0x6e },
-    { 0xfa,0x87,0x74,0x73 }, { 0xf4,0x8e,0x79,0x78 },
-    { 0xde,0xb1,0x5a,0x49 }, { 0xd0,0xb8,0x57,0x42 },
-    { 0xc2,0xa3,0x40,0x5f }, { 0xcc,0xaa,0x4d,0x54 },
-    { 0x41,0xec,0xda,0xf7 }, { 0x4f,0xe5,0xd7,0xfc },
-    { 0x5d,0xfe,0xc0,0xe1 }, { 0x53,0xf7,0xcd,0xea },
-    { 0x79,0xc8,0xee,0xdb }, { 0x77,0xc1,0xe3,0xd0 },
-    { 0x65,0xda,0xf4,0xcd }, { 0x6b,0xd3,0xf9,0xc6 },
-    { 0x31,0xa4,0xb2,0xaf }, { 0x3f,0xad,0xbf,0xa4 },
-    { 0x2d,0xb6,0xa8,0xb9 }, { 0x23,0xbf,0xa5,0xb2 },
-    { 0x09,0x80,0x86,0x83 }, { 0x07,0x89,0x8b,0x88 },
-    { 0x15,0x92,0x9c,0x95 }, { 0x1b,0x9b,0x91,0x9e },
-    { 0xa1,0x7c,0x0a,0x47 }, { 0xaf,0x75,0x07,0x4c },
-    { 0xbd,0x6e,0x10,0x51 }, { 0xb3,0x67,0x1d,0x5a },
-    { 0x99,0x58,0x3e,0x6b }, { 0x97,0x51,0x33,0x60 },
-    { 0x85,0x4a,0x24,0x7d }, { 0x8b,0x43,0x29,0x76 },
-    { 0xd1,0x34,0x62,0x1f }, { 0xdf,0x3d,0x6f,0x14 },
-    { 0xcd,0x26,0x78,0x09 }, { 0xc3,0x2f,0x75,0x02 },
-    { 0xe9,0x10,0x56,0x33 }, { 0xe7,0x19,0x5b,0x38 },
-    { 0xf5,0x02,0x4c,0x25 }, { 0xfb,0x0b,0x41,0x2e },
-    { 0x9a,0xd7,0x61,0x8c }, { 0x94,0xde,0x6c,0x87 },
-    { 0x86,0xc5,0x7b,0x9a }, { 0x88,0xcc,0x76,0x91 },
-    { 0xa2,0xf3,0x55,0xa0 }, { 0xac,0xfa,0x58,0xab },
-    { 0xbe,0xe1,0x4f,0xb6 }, { 0xb0,0xe8,0x42,0xbd },
-    { 0xea,0x9f,0x09,0xd4 }, { 0xe4,0x96,0x04,0xdf },
-    { 0xf6,0x8d,0x13,0xc2 }, { 0xf8,0x84,0x1e,0xc9 },
-    { 0xd2,0xbb,0x3d,0xf8 }, { 0xdc,0xb2,0x30,0xf3 },
-    { 0xce,0xa9,0x27,0xee }, { 0xc0,0xa0,0x2a,0xe5 },
-    { 0x7a,0x47,0xb1,0x3c }, { 0x74,0x4e,0xbc,0x37 },
-    { 0x66,0x55,0xab,0x2a }, { 0x68,0x5c,0xa6,0x21 },
-    { 0x42,0x63,0x85,0x10 }, { 0x4c,0x6a,0x88,0x1b },
-    { 0x5e,0x71,0x9f,0x06 }, { 0x50,0x78,0x92,0x0d },
-    { 0x0a,0x0f,0xd9,0x64 }, { 0x04,0x06,0xd4,0x6f },
-    { 0x16,0x1d,0xc3,0x72 }, { 0x18,0x14,0xce,0x79 },
-    { 0x32,0x2b,0xed,0x48 }, { 0x3c,0x22,0xe0,0x43 },
-    { 0x2e,0x39,0xf7,0x5e }, { 0x20,0x30,0xfa,0x55 },
-    { 0xec,0x9a,0xb7,0x01 }, { 0xe2,0x93,0xba,0x0a },
-    { 0xf0,0x88,0xad,0x17 }, { 0xfe,0x81,0xa0,0x1c },
-    { 0xd4,0xbe,0x83,0x2d }, { 0xda,0xb7,0x8e,0x26 },
-    { 0xc8,0xac,0x99,0x3b }, { 0xc6,0xa5,0x94,0x30 },
-    { 0x9c,0xd2,0xdf,0x59 }, { 0x92,0xdb,0xd2,0x52 },
-    { 0x80,0xc0,0xc5,0x4f }, { 0x8e,0xc9,0xc8,0x44 },
-    { 0xa4,0xf6,0xeb,0x75 }, { 0xaa,0xff,0xe6,0x7e },
-    { 0xb8,0xe4,0xf1,0x63 }, { 0xb6,0xed,0xfc,0x68 },
-    { 0x0c,0x0a,0x67,0xb1 }, { 0x02,0x03,0x6a,0xba },
-    { 0x10,0x18,0x7d,0xa7 }, { 0x1e,0x11,0x70,0xac },
-    { 0x34,0x2e,0x53,0x9d }, { 0x3a,0x27,0x5e,0x96 },
-    { 0x28,0x3c,0x49,0x8b }, { 0x26,0x35,0x44,0x80 },
-    { 0x7c,0x42,0x0f,0xe9 }, { 0x72,0x4b,0x02,0xe2 },
-    { 0x60,0x50,0x15,0xff }, { 0x6e,0x59,0x18,0xf4 },
-    { 0x44,0x66,0x3b,0xc5 }, { 0x4a,0x6f,0x36,0xce },
-    { 0x58,0x74,0x21,0xd3 }, { 0x56,0x7d,0x2c,0xd8 },
-    { 0x37,0xa1,0x0c,0x7a }, { 0x39,0xa8,0x01,0x71 },
-    { 0x2b,0xb3,0x16,0x6c }, { 0x25,0xba,0x1b,0x67 },
-    { 0x0f,0x85,0x38,0x56 }, { 0x01,0x8c,0x35,0x5d },
-    { 0x13,0x97,0x22,0x40 }, { 0x1d,0x9e,0x2f,0x4b },
-    { 0x47,0xe9,0x64,0x22 }, { 0x49,0xe0,0x69,0x29 },
-    { 0x5b,0xfb,0x7e,0x34 }, { 0x55,0xf2,0x73,0x3f },
-    { 0x7f,0xcd,0x50,0x0e }, { 0x71,0xc4,0x5d,0x05 },
-    { 0x63,0xdf,0x4a,0x18 }, { 0x6d,0xd6,0x47,0x13 },
-    { 0xd7,0x31,0xdc,0xca }, { 0xd9,0x38,0xd1,0xc1 },
-    { 0xcb,0x23,0xc6,0xdc }, { 0xc5,0x2a,0xcb,0xd7 },
-    { 0xef,0x15,0xe8,0xe6 }, { 0xe1,0x1c,0xe5,0xed },
-    { 0xf3,0x07,0xf2,0xf0 }, { 0xfd,0x0e,0xff,0xfb },
-    { 0xa7,0x79,0xb4,0x92 }, { 0xa9,0x70,0xb9,0x99 },
-    { 0xbb,0x6b,0xae,0x84 }, { 0xb5,0x62,0xa3,0x8f },
-    { 0x9f,0x5d,0x80,0xbe }, { 0x91,0x54,0x8d,0xb5 },
-    { 0x83,0x4f,0x9a,0xa8 }, { 0x8d,0x46,0x97,0xa3 }
-  };
-
-static const unsigned char U2[256][4] =
-  {
-    { 0x00,0x00,0x00,0x00 }, { 0x0b,0x0e,0x09,0x0d },
-    { 0x16,0x1c,0x12,0x1a }, { 0x1d,0x12,0x1b,0x17 },
-    { 0x2c,0x38,0x24,0x34 }, { 0x27,0x36,0x2d,0x39 },
-    { 0x3a,0x24,0x36,0x2e }, { 0x31,0x2a,0x3f,0x23 },
-    { 0x58,0x70,0x48,0x68 }, { 0x53,0x7e,0x41,0x65 },
-    { 0x4e,0x6c,0x5a,0x72 }, { 0x45,0x62,0x53,0x7f },
-    { 0x74,0x48,0x6c,0x5c }, { 0x7f,0x46,0x65,0x51 },
-    { 0x62,0x54,0x7e,0x46 }, { 0x69,0x5a,0x77,0x4b },
-    { 0xb0,0xe0,0x90,0xd0 }, { 0xbb,0xee,0x99,0xdd },
-    { 0xa6,0xfc,0x82,0xca }, { 0xad,0xf2,0x8b,0xc7 },
-    { 0x9c,0xd8,0xb4,0xe4 }, { 0x97,0xd6,0xbd,0xe9 },
-    { 0x8a,0xc4,0xa6,0xfe }, { 0x81,0xca,0xaf,0xf3 },
-    { 0xe8,0x90,0xd8,0xb8 }, { 0xe3,0x9e,0xd1,0xb5 },
-    { 0xfe,0x8c,0xca,0xa2 }, { 0xf5,0x82,0xc3,0xaf },
-    { 0xc4,0xa8,0xfc,0x8c }, { 0xcf,0xa6,0xf5,0x81 },
-    { 0xd2,0xb4,0xee,0x96 }, { 0xd9,0xba,0xe7,0x9b },
-    { 0x7b,0xdb,0x3b,0xbb }, { 0x70,0xd5,0x32,0xb6 },
-    { 0x6d,0xc7,0x29,0xa1 }, { 0x66,0xc9,0x20,0xac },
-    { 0x57,0xe3,0x1f,0x8f }, { 0x5c,0xed,0x16,0x82 },
-    { 0x41,0xff,0x0d,0x95 }, { 0x4a,0xf1,0x04,0x98 },
-    { 0x23,0xab,0x73,0xd3 }, { 0x28,0xa5,0x7a,0xde },
-    { 0x35,0xb7,0x61,0xc9 }, { 0x3e,0xb9,0x68,0xc4 },
-    { 0x0f,0x93,0x57,0xe7 }, { 0x04,0x9d,0x5e,0xea },
-    { 0x19,0x8f,0x45,0xfd }, { 0x12,0x81,0x4c,0xf0 },
-    { 0xcb,0x3b,0xab,0x6b }, { 0xc0,0x35,0xa2,0x66 },
-    { 0xdd,0x27,0xb9,0x71 }, { 0xd6,0x29,0xb0,0x7c },
-    { 0xe7,0x03,0x8f,0x5f }, { 0xec,0x0d,0x86,0x52 },
-    { 0xf1,0x1f,0x9d,0x45 }, { 0xfa,0x11,0x94,0x48 },
-    { 0x93,0x4b,0xe3,0x03 }, { 0x98,0x45,0xea,0x0e },
-    { 0x85,0x57,0xf1,0x19 }, { 0x8e,0x59,0xf8,0x14 },
-    { 0xbf,0x73,0xc7,0x37 }, { 0xb4,0x7d,0xce,0x3a },
-    { 0xa9,0x6f,0xd5,0x2d }, { 0xa2,0x61,0xdc,0x20 },
-    { 0xf6,0xad,0x76,0x6d }, { 0xfd,0xa3,0x7f,0x60 },
-    { 0xe0,0xb1,0x64,0x77 }, { 0xeb,0xbf,0x6d,0x7a },
-    { 0xda,0x95,0x52,0x59 }, { 0xd1,0x9b,0x5b,0x54 },
-    { 0xcc,0x89,0x40,0x43 }, { 0xc7,0x87,0x49,0x4e },
-    { 0xae,0xdd,0x3e,0x05 }, { 0xa5,0xd3,0x37,0x08 },
-    { 0xb8,0xc1,0x2c,0x1f }, { 0xb3,0xcf,0x25,0x12 },
-    { 0x82,0xe5,0x1a,0x31 }, { 0x89,0xeb,0x13,0x3c },
-    { 0x94,0xf9,0x08,0x2b }, { 0x9f,0xf7,0x01,0x26 },
-    { 0x46,0x4d,0xe6,0xbd }, { 0x4d,0x43,0xef,0xb0 },
-    { 0x50,0x51,0xf4,0xa7 }, { 0x5b,0x5f,0xfd,0xaa },
-    { 0x6a,0x75,0xc2,0x89 }, { 0x61,0x7b,0xcb,0x84 },
-    { 0x7c,0x69,0xd0,0x93 }, { 0x77,0x67,0xd9,0x9e },
-    { 0x1e,0x3d,0xae,0xd5 }, { 0x15,0x33,0xa7,0xd8 },
-    { 0x08,0x21,0xbc,0xcf }, { 0x03,0x2f,0xb5,0xc2 },
-    { 0x32,0x05,0x8a,0xe1 }, { 0x39,0x0b,0x83,0xec },
-    { 0x24,0x19,0x98,0xfb }, { 0x2f,0x17,0x91,0xf6 },
-    { 0x8d,0x76,0x4d,0xd6 }, { 0x86,0x78,0x44,0xdb },
-    { 0x9b,0x6a,0x5f,0xcc }, { 0x90,0x64,0x56,0xc1 },
-    { 0xa1,0x4e,0x69,0xe2 }, { 0xaa,0x40,0x60,0xef },
-    { 0xb7,0x52,0x7b,0xf8 }, { 0xbc,0x5c,0x72,0xf5 },
-    { 0xd5,0x06,0x05,0xbe }, { 0xde,0x08,0x0c,0xb3 },
-    { 0xc3,0x1a,0x17,0xa4 }, { 0xc8,0x14,0x1e,0xa9 },
-    { 0xf9,0x3e,0x21,0x8a }, { 0xf2,0x30,0x28,0x87 },
-    { 0xef,0x22,0x33,0x90 }, { 0xe4,0x2c,0x3a,0x9d },
-    { 0x3d,0x96,0xdd,0x06 }, { 0x36,0x98,0xd4,0x0b },
-    { 0x2b,0x8a,0xcf,0x1c }, { 0x20,0x84,0xc6,0x11 },
-    { 0x11,0xae,0xf9,0x32 }, { 0x1a,0xa0,0xf0,0x3f },
-    { 0x07,0xb2,0xeb,0x28 }, { 0x0c,0xbc,0xe2,0x25 },
-    { 0x65,0xe6,0x95,0x6e }, { 0x6e,0xe8,0x9c,0x63 },
-    { 0x73,0xfa,0x87,0x74 }, { 0x78,0xf4,0x8e,0x79 },
-    { 0x49,0xde,0xb1,0x5a }, { 0x42,0xd0,0xb8,0x57 },
-    { 0x5f,0xc2,0xa3,0x40 }, { 0x54,0xcc,0xaa,0x4d },
-    { 0xf7,0x41,0xec,0xda }, { 0xfc,0x4f,0xe5,0xd7 },
-    { 0xe1,0x5d,0xfe,0xc0 }, { 0xea,0x53,0xf7,0xcd },
-    { 0xdb,0x79,0xc8,0xee }, { 0xd0,0x77,0xc1,0xe3 },
-    { 0xcd,0x65,0xda,0xf4 }, { 0xc6,0x6b,0xd3,0xf9 },
-    { 0xaf,0x31,0xa4,0xb2 }, { 0xa4,0x3f,0xad,0xbf },
-    { 0xb9,0x2d,0xb6,0xa8 }, { 0xb2,0x23,0xbf,0xa5 },
-    { 0x83,0x09,0x80,0x86 }, { 0x88,0x07,0x89,0x8b },
-    { 0x95,0x15,0x92,0x9c }, { 0x9e,0x1b,0x9b,0x91 },
-    { 0x47,0xa1,0x7c,0x0a }, { 0x4c,0xaf,0x75,0x07 },
-    { 0x51,0xbd,0x6e,0x10 }, { 0x5a,0xb3,0x67,0x1d },
-    { 0x6b,0x99,0x58,0x3e }, { 0x60,0x97,0x51,0x33 },
-    { 0x7d,0x85,0x4a,0x24 }, { 0x76,0x8b,0x43,0x29 },
-    { 0x1f,0xd1,0x34,0x62 }, { 0x14,0xdf,0x3d,0x6f },
-    { 0x09,0xcd,0x26,0x78 }, { 0x02,0xc3,0x2f,0x75 },
-    { 0x33,0xe9,0x10,0x56 }, { 0x38,0xe7,0x19,0x5b },
-    { 0x25,0xf5,0x02,0x4c }, { 0x2e,0xfb,0x0b,0x41 },
-    { 0x8c,0x9a,0xd7,0x61 }, { 0x87,0x94,0xde,0x6c },
-    { 0x9a,0x86,0xc5,0x7b }, { 0x91,0x88,0xcc,0x76 },
-    { 0xa0,0xa2,0xf3,0x55 }, { 0xab,0xac,0xfa,0x58 },
-    { 0xb6,0xbe,0xe1,0x4f }, { 0xbd,0xb0,0xe8,0x42 },
-    { 0xd4,0xea,0x9f,0x09 }, { 0xdf,0xe4,0x96,0x04 },
-    { 0xc2,0xf6,0x8d,0x13 }, { 0xc9,0xf8,0x84,0x1e },
-    { 0xf8,0xd2,0xbb,0x3d }, { 0xf3,0xdc,0xb2,0x30 },
-    { 0xee,0xce,0xa9,0x27 }, { 0xe5,0xc0,0xa0,0x2a },
-    { 0x3c,0x7a,0x47,0xb1 }, { 0x37,0x74,0x4e,0xbc },
-    { 0x2a,0x66,0x55,0xab }, { 0x21,0x68,0x5c,0xa6 },
-    { 0x10,0x42,0x63,0x85 }, { 0x1b,0x4c,0x6a,0x88 },
-    { 0x06,0x5e,0x71,0x9f }, { 0x0d,0x50,0x78,0x92 },
-    { 0x64,0x0a,0x0f,0xd9 }, { 0x6f,0x04,0x06,0xd4 },
-    { 0x72,0x16,0x1d,0xc3 }, { 0x79,0x18,0x14,0xce },
-    { 0x48,0x32,0x2b,0xed }, { 0x43,0x3c,0x22,0xe0 },
-    { 0x5e,0x2e,0x39,0xf7 }, { 0x55,0x20,0x30,0xfa },
-    { 0x01,0xec,0x9a,0xb7 }, { 0x0a,0xe2,0x93,0xba },
-    { 0x17,0xf0,0x88,0xad }, { 0x1c,0xfe,0x81,0xa0 },
-    { 0x2d,0xd4,0xbe,0x83 }, { 0x26,0xda,0xb7,0x8e },
-    { 0x3b,0xc8,0xac,0x99 }, { 0x30,0xc6,0xa5,0x94 },
-    { 0x59,0x9c,0xd2,0xdf }, { 0x52,0x92,0xdb,0xd2 },
-    { 0x4f,0x80,0xc0,0xc5 }, { 0x44,0x8e,0xc9,0xc8 },
-    { 0x75,0xa4,0xf6,0xeb }, { 0x7e,0xaa,0xff,0xe6 },
-    { 0x63,0xb8,0xe4,0xf1 }, { 0x68,0xb6,0xed,0xfc },
-    { 0xb1,0x0c,0x0a,0x67 }, { 0xba,0x02,0x03,0x6a },
-    { 0xa7,0x10,0x18,0x7d }, { 0xac,0x1e,0x11,0x70 },
-    { 0x9d,0x34,0x2e,0x53 }, { 0x96,0x3a,0x27,0x5e },
-    { 0x8b,0x28,0x3c,0x49 }, { 0x80,0x26,0x35,0x44 },
-    { 0xe9,0x7c,0x42,0x0f }, { 0xe2,0x72,0x4b,0x02 },
-    { 0xff,0x60,0x50,0x15 }, { 0xf4,0x6e,0x59,0x18 },
-    { 0xc5,0x44,0x66,0x3b }, { 0xce,0x4a,0x6f,0x36 },
-    { 0xd3,0x58,0x74,0x21 }, { 0xd8,0x56,0x7d,0x2c },
-    { 0x7a,0x37,0xa1,0x0c }, { 0x71,0x39,0xa8,0x01 },
-    { 0x6c,0x2b,0xb3,0x16 }, { 0x67,0x25,0xba,0x1b },
-    { 0x56,0x0f,0x85,0x38 }, { 0x5d,0x01,0x8c,0x35 },
-    { 0x40,0x13,0x97,0x22 }, { 0x4b,0x1d,0x9e,0x2f },
-    { 0x22,0x47,0xe9,0x64 }, { 0x29,0x49,0xe0,0x69 },
-    { 0x34,0x5b,0xfb,0x7e }, { 0x3f,0x55,0xf2,0x73 },
-    { 0x0e,0x7f,0xcd,0x50 }, { 0x05,0x71,0xc4,0x5d },
-    { 0x18,0x63,0xdf,0x4a }, { 0x13,0x6d,0xd6,0x47 },
-    { 0xca,0xd7,0x31,0xdc }, { 0xc1,0xd9,0x38,0xd1 },
-    { 0xdc,0xcb,0x23,0xc6 }, { 0xd7,0xc5,0x2a,0xcb },
-    { 0xe6,0xef,0x15,0xe8 }, { 0xed,0xe1,0x1c,0xe5 },
-    { 0xf0,0xf3,0x07,0xf2 }, { 0xfb,0xfd,0x0e,0xff },
-    { 0x92,0xa7,0x79,0xb4 }, { 0x99,0xa9,0x70,0xb9 },
-    { 0x84,0xbb,0x6b,0xae }, { 0x8f,0xb5,0x62,0xa3 },
-    { 0xbe,0x9f,0x5d,0x80 }, { 0xb5,0x91,0x54,0x8d },
-    { 0xa8,0x83,0x4f,0x9a }, { 0xa3,0x8d,0x46,0x97 }
-  };
-
-static const unsigned char U3[256][4] =
-  {
-    { 0x00,0x00,0x00,0x00 }, { 0x0d,0x0b,0x0e,0x09 },
-    { 0x1a,0x16,0x1c,0x12 }, { 0x17,0x1d,0x12,0x1b },
-    { 0x34,0x2c,0x38,0x24 }, { 0x39,0x27,0x36,0x2d },
-    { 0x2e,0x3a,0x24,0x36 }, { 0x23,0x31,0x2a,0x3f },
-    { 0x68,0x58,0x70,0x48 }, { 0x65,0x53,0x7e,0x41 },
-    { 0x72,0x4e,0x6c,0x5a }, { 0x7f,0x45,0x62,0x53 },
-    { 0x5c,0x74,0x48,0x6c }, { 0x51,0x7f,0x46,0x65 },
-    { 0x46,0x62,0x54,0x7e }, { 0x4b,0x69,0x5a,0x77 },
-    { 0xd0,0xb0,0xe0,0x90 }, { 0xdd,0xbb,0xee,0x99 },
-    { 0xca,0xa6,0xfc,0x82 }, { 0xc7,0xad,0xf2,0x8b },
-    { 0xe4,0x9c,0xd8,0xb4 }, { 0xe9,0x97,0xd6,0xbd },
-    { 0xfe,0x8a,0xc4,0xa6 }, { 0xf3,0x81,0xca,0xaf },
-    { 0xb8,0xe8,0x90,0xd8 }, { 0xb5,0xe3,0x9e,0xd1 },
-    { 0xa2,0xfe,0x8c,0xca }, { 0xaf,0xf5,0x82,0xc3 },
-    { 0x8c,0xc4,0xa8,0xfc }, { 0x81,0xcf,0xa6,0xf5 },
-    { 0x96,0xd2,0xb4,0xee }, { 0x9b,0xd9,0xba,0xe7 },
-    { 0xbb,0x7b,0xdb,0x3b }, { 0xb6,0x70,0xd5,0x32 },
-    { 0xa1,0x6d,0xc7,0x29 }, { 0xac,0x66,0xc9,0x20 },
-    { 0x8f,0x57,0xe3,0x1f }, { 0x82,0x5c,0xed,0x16 },
-    { 0x95,0x41,0xff,0x0d }, { 0x98,0x4a,0xf1,0x04 },
-    { 0xd3,0x23,0xab,0x73 }, { 0xde,0x28,0xa5,0x7a },
-    { 0xc9,0x35,0xb7,0x61 }, { 0xc4,0x3e,0xb9,0x68 },
-    { 0xe7,0x0f,0x93,0x57 }, { 0xea,0x04,0x9d,0x5e },
-    { 0xfd,0x19,0x8f,0x45 }, { 0xf0,0x12,0x81,0x4c },
-    { 0x6b,0xcb,0x3b,0xab }, { 0x66,0xc0,0x35,0xa2 },
-    { 0x71,0xdd,0x27,0xb9 }, { 0x7c,0xd6,0x29,0xb0 },
-    { 0x5f,0xe7,0x03,0x8f }, { 0x52,0xec,0x0d,0x86 },
-    { 0x45,0xf1,0x1f,0x9d }, { 0x48,0xfa,0x11,0x94 },
-    { 0x03,0x93,0x4b,0xe3 }, { 0x0e,0x98,0x45,0xea },
-    { 0x19,0x85,0x57,0xf1 }, { 0x14,0x8e,0x59,0xf8 },
-    { 0x37,0xbf,0x73,0xc7 }, { 0x3a,0xb4,0x7d,0xce },
-    { 0x2d,0xa9,0x6f,0xd5 }, { 0x20,0xa2,0x61,0xdc },
-    { 0x6d,0xf6,0xad,0x76 }, { 0x60,0xfd,0xa3,0x7f },
-    { 0x77,0xe0,0xb1,0x64 }, { 0x7a,0xeb,0xbf,0x6d },
-    { 0x59,0xda,0x95,0x52 }, { 0x54,0xd1,0x9b,0x5b },
-    { 0x43,0xcc,0x89,0x40 }, { 0x4e,0xc7,0x87,0x49 },
-    { 0x05,0xae,0xdd,0x3e }, { 0x08,0xa5,0xd3,0x37 },
-    { 0x1f,0xb8,0xc1,0x2c }, { 0x12,0xb3,0xcf,0x25 },
-    { 0x31,0x82,0xe5,0x1a }, { 0x3c,0x89,0xeb,0x13 },
-    { 0x2b,0x94,0xf9,0x08 }, { 0x26,0x9f,0xf7,0x01 },
-    { 0xbd,0x46,0x4d,0xe6 }, { 0xb0,0x4d,0x43,0xef },
-    { 0xa7,0x50,0x51,0xf4 }, { 0xaa,0x5b,0x5f,0xfd },
-    { 0x89,0x6a,0x75,0xc2 }, { 0x84,0x61,0x7b,0xcb },
-    { 0x93,0x7c,0x69,0xd0 }, { 0x9e,0x77,0x67,0xd9 },
-    { 0xd5,0x1e,0x3d,0xae }, { 0xd8,0x15,0x33,0xa7 },
-    { 0xcf,0x08,0x21,0xbc }, { 0xc2,0x03,0x2f,0xb5 },
-    { 0xe1,0x32,0x05,0x8a }, { 0xec,0x39,0x0b,0x83 },
-    { 0xfb,0x24,0x19,0x98 }, { 0xf6,0x2f,0x17,0x91 },
-    { 0xd6,0x8d,0x76,0x4d }, { 0xdb,0x86,0x78,0x44 },
-    { 0xcc,0x9b,0x6a,0x5f }, { 0xc1,0x90,0x64,0x56 },
-    { 0xe2,0xa1,0x4e,0x69 }, { 0xef,0xaa,0x40,0x60 },
-    { 0xf8,0xb7,0x52,0x7b }, { 0xf5,0xbc,0x5c,0x72 },
-    { 0xbe,0xd5,0x06,0x05 }, { 0xb3,0xde,0x08,0x0c },
-    { 0xa4,0xc3,0x1a,0x17 }, { 0xa9,0xc8,0x14,0x1e },
-    { 0x8a,0xf9,0x3e,0x21 }, { 0x87,0xf2,0x30,0x28 },
-    { 0x90,0xef,0x22,0x33 }, { 0x9d,0xe4,0x2c,0x3a },
-    { 0x06,0x3d,0x96,0xdd }, { 0x0b,0x36,0x98,0xd4 },
-    { 0x1c,0x2b,0x8a,0xcf }, { 0x11,0x20,0x84,0xc6 },
-    { 0x32,0x11,0xae,0xf9 }, { 0x3f,0x1a,0xa0,0xf0 },
-    { 0x28,0x07,0xb2,0xeb }, { 0x25,0x0c,0xbc,0xe2 },
-    { 0x6e,0x65,0xe6,0x95 }, { 0x63,0x6e,0xe8,0x9c },
-    { 0x74,0x73,0xfa,0x87 }, { 0x79,0x78,0xf4,0x8e },
-    { 0x5a,0x49,0xde,0xb1 }, { 0x57,0x42,0xd0,0xb8 },
-    { 0x40,0x5f,0xc2,0xa3 }, { 0x4d,0x54,0xcc,0xaa },
-    { 0xda,0xf7,0x41,0xec }, { 0xd7,0xfc,0x4f,0xe5 },
-    { 0xc0,0xe1,0x5d,0xfe }, { 0xcd,0xea,0x53,0xf7 },
-    { 0xee,0xdb,0x79,0xc8 }, { 0xe3,0xd0,0x77,0xc1 },
-    { 0xf4,0xcd,0x65,0xda }, { 0xf9,0xc6,0x6b,0xd3 },
-    { 0xb2,0xaf,0x31,0xa4 }, { 0xbf,0xa4,0x3f,0xad },
-    { 0xa8,0xb9,0x2d,0xb6 }, { 0xa5,0xb2,0x23,0xbf },
-    { 0x86,0x83,0x09,0x80 }, { 0x8b,0x88,0x07,0x89 },
-    { 0x9c,0x95,0x15,0x92 }, { 0x91,0x9e,0x1b,0x9b },
-    { 0x0a,0x47,0xa1,0x7c }, { 0x07,0x4c,0xaf,0x75 },
-    { 0x10,0x51,0xbd,0x6e }, { 0x1d,0x5a,0xb3,0x67 },
-    { 0x3e,0x6b,0x99,0x58 }, { 0x33,0x60,0x97,0x51 },
-    { 0x24,0x7d,0x85,0x4a }, { 0x29,0x76,0x8b,0x43 },
-    { 0x62,0x1f,0xd1,0x34 }, { 0x6f,0x14,0xdf,0x3d },
-    { 0x78,0x09,0xcd,0x26 }, { 0x75,0x02,0xc3,0x2f },
-    { 0x56,0x33,0xe9,0x10 }, { 0x5b,0x38,0xe7,0x19 },
-    { 0x4c,0x25,0xf5,0x02 }, { 0x41,0x2e,0xfb,0x0b },
-    { 0x61,0x8c,0x9a,0xd7 }, { 0x6c,0x87,0x94,0xde },
-    { 0x7b,0x9a,0x86,0xc5 }, { 0x76,0x91,0x88,0xcc },
-    { 0x55,0xa0,0xa2,0xf3 }, { 0x58,0xab,0xac,0xfa },
-    { 0x4f,0xb6,0xbe,0xe1 }, { 0x42,0xbd,0xb0,0xe8 },
-    { 0x09,0xd4,0xea,0x9f }, { 0x04,0xdf,0xe4,0x96 },
-    { 0x13,0xc2,0xf6,0x8d }, { 0x1e,0xc9,0xf8,0x84 },
-    { 0x3d,0xf8,0xd2,0xbb }, { 0x30,0xf3,0xdc,0xb2 },
-    { 0x27,0xee,0xce,0xa9 }, { 0x2a,0xe5,0xc0,0xa0 },
-    { 0xb1,0x3c,0x7a,0x47 }, { 0xbc,0x37,0x74,0x4e },
-    { 0xab,0x2a,0x66,0x55 }, { 0xa6,0x21,0x68,0x5c },
-    { 0x85,0x10,0x42,0x63 }, { 0x88,0x1b,0x4c,0x6a },
-    { 0x9f,0x06,0x5e,0x71 }, { 0x92,0x0d,0x50,0x78 },
-    { 0xd9,0x64,0x0a,0x0f }, { 0xd4,0x6f,0x04,0x06 },
-    { 0xc3,0x72,0x16,0x1d }, { 0xce,0x79,0x18,0x14 },
-    { 0xed,0x48,0x32,0x2b }, { 0xe0,0x43,0x3c,0x22 },
-    { 0xf7,0x5e,0x2e,0x39 }, { 0xfa,0x55,0x20,0x30 },
-    { 0xb7,0x01,0xec,0x9a }, { 0xba,0x0a,0xe2,0x93 },
-    { 0xad,0x17,0xf0,0x88 }, { 0xa0,0x1c,0xfe,0x81 },
-    { 0x83,0x2d,0xd4,0xbe }, { 0x8e,0x26,0xda,0xb7 },
-    { 0x99,0x3b,0xc8,0xac }, { 0x94,0x30,0xc6,0xa5 },
-    { 0xdf,0x59,0x9c,0xd2 }, { 0xd2,0x52,0x92,0xdb },
-    { 0xc5,0x4f,0x80,0xc0 }, { 0xc8,0x44,0x8e,0xc9 },
-    { 0xeb,0x75,0xa4,0xf6 }, { 0xe6,0x7e,0xaa,0xff },
-    { 0xf1,0x63,0xb8,0xe4 }, { 0xfc,0x68,0xb6,0xed },
-    { 0x67,0xb1,0x0c,0x0a }, { 0x6a,0xba,0x02,0x03 },
-    { 0x7d,0xa7,0x10,0x18 }, { 0x70,0xac,0x1e,0x11 },
-    { 0x53,0x9d,0x34,0x2e }, { 0x5e,0x96,0x3a,0x27 },
-    { 0x49,0x8b,0x28,0x3c }, { 0x44,0x80,0x26,0x35 },
-    { 0x0f,0xe9,0x7c,0x42 }, { 0x02,0xe2,0x72,0x4b },
-    { 0x15,0xff,0x60,0x50 }, { 0x18,0xf4,0x6e,0x59 },
-    { 0x3b,0xc5,0x44,0x66 }, { 0x36,0xce,0x4a,0x6f },
-    { 0x21,0xd3,0x58,0x74 }, { 0x2c,0xd8,0x56,0x7d },
-    { 0x0c,0x7a,0x37,0xa1 }, { 0x01,0x71,0x39,0xa8 },
-    { 0x16,0x6c,0x2b,0xb3 }, { 0x1b,0x67,0x25,0xba },
-    { 0x38,0x56,0x0f,0x85 }, { 0x35,0x5d,0x01,0x8c },
-    { 0x22,0x40,0x13,0x97 }, { 0x2f,0x4b,0x1d,0x9e },
-    { 0x64,0x22,0x47,0xe9 }, { 0x69,0x29,0x49,0xe0 },
-    { 0x7e,0x34,0x5b,0xfb }, { 0x73,0x3f,0x55,0xf2 },
-    { 0x50,0x0e,0x7f,0xcd }, { 0x5d,0x05,0x71,0xc4 },
-    { 0x4a,0x18,0x63,0xdf }, { 0x47,0x13,0x6d,0xd6 },
-    { 0xdc,0xca,0xd7,0x31 }, { 0xd1,0xc1,0xd9,0x38 },
-    { 0xc6,0xdc,0xcb,0x23 }, { 0xcb,0xd7,0xc5,0x2a },
-    { 0xe8,0xe6,0xef,0x15 }, { 0xe5,0xed,0xe1,0x1c },
-    { 0xf2,0xf0,0xf3,0x07 }, { 0xff,0xfb,0xfd,0x0e },
-    { 0xb4,0x92,0xa7,0x79 }, { 0xb9,0x99,0xa9,0x70 },
-    { 0xae,0x84,0xbb,0x6b }, { 0xa3,0x8f,0xb5,0x62 },
-    { 0x80,0xbe,0x9f,0x5d }, { 0x8d,0xb5,0x91,0x54 },
-    { 0x9a,0xa8,0x83,0x4f }, { 0x97,0xa3,0x8d,0x46 }
-  };
-
-static const unsigned char U4[256][4] =
-  {
-    { 0x00,0x00,0x00,0x00 }, { 0x09,0x0d,0x0b,0x0e },
-    { 0x12,0x1a,0x16,0x1c }, { 0x1b,0x17,0x1d,0x12 },
-    { 0x24,0x34,0x2c,0x38 }, { 0x2d,0x39,0x27,0x36 },
-    { 0x36,0x2e,0x3a,0x24 }, { 0x3f,0x23,0x31,0x2a },
-    { 0x48,0x68,0x58,0x70 }, { 0x41,0x65,0x53,0x7e },
-    { 0x5a,0x72,0x4e,0x6c }, { 0x53,0x7f,0x45,0x62 },
-    { 0x6c,0x5c,0x74,0x48 }, { 0x65,0x51,0x7f,0x46 },
-    { 0x7e,0x46,0x62,0x54 }, { 0x77,0x4b,0x69,0x5a },
-    { 0x90,0xd0,0xb0,0xe0 }, { 0x99,0xdd,0xbb,0xee },
-    { 0x82,0xca,0xa6,0xfc }, { 0x8b,0xc7,0xad,0xf2 },
-    { 0xb4,0xe4,0x9c,0xd8 }, { 0xbd,0xe9,0x97,0xd6 },
-    { 0xa6,0xfe,0x8a,0xc4 }, { 0xaf,0xf3,0x81,0xca },
-    { 0xd8,0xb8,0xe8,0x90 }, { 0xd1,0xb5,0xe3,0x9e },
-    { 0xca,0xa2,0xfe,0x8c }, { 0xc3,0xaf,0xf5,0x82 },
-    { 0xfc,0x8c,0xc4,0xa8 }, { 0xf5,0x81,0xcf,0xa6 },
-    { 0xee,0x96,0xd2,0xb4 }, { 0xe7,0x9b,0xd9,0xba },
-    { 0x3b,0xbb,0x7b,0xdb }, { 0x32,0xb6,0x70,0xd5 },
-    { 0x29,0xa1,0x6d,0xc7 }, { 0x20,0xac,0x66,0xc9 },
-    { 0x1f,0x8f,0x57,0xe3 }, { 0x16,0x82,0x5c,0xed },
-    { 0x0d,0x95,0x41,0xff }, { 0x04,0x98,0x4a,0xf1 },
-    { 0x73,0xd3,0x23,0xab }, { 0x7a,0xde,0x28,0xa5 },
-    { 0x61,0xc9,0x35,0xb7 }, { 0x68,0xc4,0x3e,0xb9 },
-    { 0x57,0xe7,0x0f,0x93 }, { 0x5e,0xea,0x04,0x9d },
-    { 0x45,0xfd,0x19,0x8f }, { 0x4c,0xf0,0x12,0x81 },
-    { 0xab,0x6b,0xcb,0x3b }, { 0xa2,0x66,0xc0,0x35 },
-    { 0xb9,0x71,0xdd,0x27 }, { 0xb0,0x7c,0xd6,0x29 },
-    { 0x8f,0x5f,0xe7,0x03 }, { 0x86,0x52,0xec,0x0d },
-    { 0x9d,0x45,0xf1,0x1f }, { 0x94,0x48,0xfa,0x11 },
-    { 0xe3,0x03,0x93,0x4b }, { 0xea,0x0e,0x98,0x45 },
-    { 0xf1,0x19,0x85,0x57 }, { 0xf8,0x14,0x8e,0x59 },
-    { 0xc7,0x37,0xbf,0x73 }, { 0xce,0x3a,0xb4,0x7d },
-    { 0xd5,0x2d,0xa9,0x6f }, { 0xdc,0x20,0xa2,0x61 },
-    { 0x76,0x6d,0xf6,0xad }, { 0x7f,0x60,0xfd,0xa3 },
-    { 0x64,0x77,0xe0,0xb1 }, { 0x6d,0x7a,0xeb,0xbf },
-    { 0x52,0x59,0xda,0x95 }, { 0x5b,0x54,0xd1,0x9b },
-    { 0x40,0x43,0xcc,0x89 }, { 0x49,0x4e,0xc7,0x87 },
-    { 0x3e,0x05,0xae,0xdd }, { 0x37,0x08,0xa5,0xd3 },
-    { 0x2c,0x1f,0xb8,0xc1 }, { 0x25,0x12,0xb3,0xcf },
-    { 0x1a,0x31,0x82,0xe5 }, { 0x13,0x3c,0x89,0xeb },
-    { 0x08,0x2b,0x94,0xf9 }, { 0x01,0x26,0x9f,0xf7 },
-    { 0xe6,0xbd,0x46,0x4d }, { 0xef,0xb0,0x4d,0x43 },
-    { 0xf4,0xa7,0x50,0x51 }, { 0xfd,0xaa,0x5b,0x5f },
-    { 0xc2,0x89,0x6a,0x75 }, { 0xcb,0x84,0x61,0x7b },
-    { 0xd0,0x93,0x7c,0x69 }, { 0xd9,0x9e,0x77,0x67 },
-    { 0xae,0xd5,0x1e,0x3d }, { 0xa7,0xd8,0x15,0x33 },
-    { 0xbc,0xcf,0x08,0x21 }, { 0xb5,0xc2,0x03,0x2f },
-    { 0x8a,0xe1,0x32,0x05 }, { 0x83,0xec,0x39,0x0b },
-    { 0x98,0xfb,0x24,0x19 }, { 0x91,0xf6,0x2f,0x17 },
-    { 0x4d,0xd6,0x8d,0x76 }, { 0x44,0xdb,0x86,0x78 },
-    { 0x5f,0xcc,0x9b,0x6a }, { 0x56,0xc1,0x90,0x64 },
-    { 0x69,0xe2,0xa1,0x4e }, { 0x60,0xef,0xaa,0x40 },
-    { 0x7b,0xf8,0xb7,0x52 }, { 0x72,0xf5,0xbc,0x5c },
-    { 0x05,0xbe,0xd5,0x06 }, { 0x0c,0xb3,0xde,0x08 },
-    { 0x17,0xa4,0xc3,0x1a }, { 0x1e,0xa9,0xc8,0x14 },
-    { 0x21,0x8a,0xf9,0x3e }, { 0x28,0x87,0xf2,0x30 },
-    { 0x33,0x90,0xef,0x22 }, { 0x3a,0x9d,0xe4,0x2c },
-    { 0xdd,0x06,0x3d,0x96 }, { 0xd4,0x0b,0x36,0x98 },
-    { 0xcf,0x1c,0x2b,0x8a }, { 0xc6,0x11,0x20,0x84 },
-    { 0xf9,0x32,0x11,0xae }, { 0xf0,0x3f,0x1a,0xa0 },
-    { 0xeb,0x28,0x07,0xb2 }, { 0xe2,0x25,0x0c,0xbc },
-    { 0x95,0x6e,0x65,0xe6 }, { 0x9c,0x63,0x6e,0xe8 },
-    { 0x87,0x74,0x73,0xfa }, { 0x8e,0x79,0x78,0xf4 },
-    { 0xb1,0x5a,0x49,0xde }, { 0xb8,0x57,0x42,0xd0 },
-    { 0xa3,0x40,0x5f,0xc2 }, { 0xaa,0x4d,0x54,0xcc },
-    { 0xec,0xda,0xf7,0x41 }, { 0xe5,0xd7,0xfc,0x4f },
-    { 0xfe,0xc0,0xe1,0x5d }, { 0xf7,0xcd,0xea,0x53 },
-    { 0xc8,0xee,0xdb,0x79 }, { 0xc1,0xe3,0xd0,0x77 },
-    { 0xda,0xf4,0xcd,0x65 }, { 0xd3,0xf9,0xc6,0x6b },
-    { 0xa4,0xb2,0xaf,0x31 }, { 0xad,0xbf,0xa4,0x3f },
-    { 0xb6,0xa8,0xb9,0x2d }, { 0xbf,0xa5,0xb2,0x23 },
-    { 0x80,0x86,0x83,0x09 }, { 0x89,0x8b,0x88,0x07 },
-    { 0x92,0x9c,0x95,0x15 }, { 0x9b,0x91,0x9e,0x1b },
-    { 0x7c,0x0a,0x47,0xa1 }, { 0x75,0x07,0x4c,0xaf },
-    { 0x6e,0x10,0x51,0xbd }, { 0x67,0x1d,0x5a,0xb3 },
-    { 0x58,0x3e,0x6b,0x99 }, { 0x51,0x33,0x60,0x97 },
-    { 0x4a,0x24,0x7d,0x85 }, { 0x43,0x29,0x76,0x8b },
-    { 0x34,0x62,0x1f,0xd1 }, { 0x3d,0x6f,0x14,0xdf },
-    { 0x26,0x78,0x09,0xcd }, { 0x2f,0x75,0x02,0xc3 },
-    { 0x10,0x56,0x33,0xe9 }, { 0x19,0x5b,0x38,0xe7 },
-    { 0x02,0x4c,0x25,0xf5 }, { 0x0b,0x41,0x2e,0xfb },
-    { 0xd7,0x61,0x8c,0x9a }, { 0xde,0x6c,0x87,0x94 },
-    { 0xc5,0x7b,0x9a,0x86 }, { 0xcc,0x76,0x91,0x88 },
-    { 0xf3,0x55,0xa0,0xa2 }, { 0xfa,0x58,0xab,0xac },
-    { 0xe1,0x4f,0xb6,0xbe }, { 0xe8,0x42,0xbd,0xb0 },
-    { 0x9f,0x09,0xd4,0xea }, { 0x96,0x04,0xdf,0xe4 },
-    { 0x8d,0x13,0xc2,0xf6 }, { 0x84,0x1e,0xc9,0xf8 },
-    { 0xbb,0x3d,0xf8,0xd2 }, { 0xb2,0x30,0xf3,0xdc },
-    { 0xa9,0x27,0xee,0xce }, { 0xa0,0x2a,0xe5,0xc0 },
-    { 0x47,0xb1,0x3c,0x7a }, { 0x4e,0xbc,0x37,0x74 },
-    { 0x55,0xab,0x2a,0x66 }, { 0x5c,0xa6,0x21,0x68 },
-    { 0x63,0x85,0x10,0x42 }, { 0x6a,0x88,0x1b,0x4c },
-    { 0x71,0x9f,0x06,0x5e }, { 0x78,0x92,0x0d,0x50 },
-    { 0x0f,0xd9,0x64,0x0a }, { 0x06,0xd4,0x6f,0x04 },
-    { 0x1d,0xc3,0x72,0x16 }, { 0x14,0xce,0x79,0x18 },
-    { 0x2b,0xed,0x48,0x32 }, { 0x22,0xe0,0x43,0x3c },
-    { 0x39,0xf7,0x5e,0x2e }, { 0x30,0xfa,0x55,0x20 },
-    { 0x9a,0xb7,0x01,0xec }, { 0x93,0xba,0x0a,0xe2 },
-    { 0x88,0xad,0x17,0xf0 }, { 0x81,0xa0,0x1c,0xfe },
-    { 0xbe,0x83,0x2d,0xd4 }, { 0xb7,0x8e,0x26,0xda },
-    { 0xac,0x99,0x3b,0xc8 }, { 0xa5,0x94,0x30,0xc6 },
-    { 0xd2,0xdf,0x59,0x9c }, { 0xdb,0xd2,0x52,0x92 },
-    { 0xc0,0xc5,0x4f,0x80 }, { 0xc9,0xc8,0x44,0x8e },
-    { 0xf6,0xeb,0x75,0xa4 }, { 0xff,0xe6,0x7e,0xaa },
-    { 0xe4,0xf1,0x63,0xb8 }, { 0xed,0xfc,0x68,0xb6 },
-    { 0x0a,0x67,0xb1,0x0c }, { 0x03,0x6a,0xba,0x02 },
-    { 0x18,0x7d,0xa7,0x10 }, { 0x11,0x70,0xac,0x1e },
-    { 0x2e,0x53,0x9d,0x34 }, { 0x27,0x5e,0x96,0x3a },
-    { 0x3c,0x49,0x8b,0x28 }, { 0x35,0x44,0x80,0x26 },
-    { 0x42,0x0f,0xe9,0x7c }, { 0x4b,0x02,0xe2,0x72 },
-    { 0x50,0x15,0xff,0x60 }, { 0x59,0x18,0xf4,0x6e },
-    { 0x66,0x3b,0xc5,0x44 }, { 0x6f,0x36,0xce,0x4a },
-    { 0x74,0x21,0xd3,0x58 }, { 0x7d,0x2c,0xd8,0x56 },
-    { 0xa1,0x0c,0x7a,0x37 }, { 0xa8,0x01,0x71,0x39 },
-    { 0xb3,0x16,0x6c,0x2b }, { 0xba,0x1b,0x67,0x25 },
-    { 0x85,0x38,0x56,0x0f }, { 0x8c,0x35,0x5d,0x01 },
-    { 0x97,0x22,0x40,0x13 }, { 0x9e,0x2f,0x4b,0x1d },
-    { 0xe9,0x64,0x22,0x47 }, { 0xe0,0x69,0x29,0x49 },
-    { 0xfb,0x7e,0x34,0x5b }, { 0xf2,0x73,0x3f,0x55 },
-    { 0xcd,0x50,0x0e,0x7f }, { 0xc4,0x5d,0x05,0x71 },
-    { 0xdf,0x4a,0x18,0x63 }, { 0xd6,0x47,0x13,0x6d },
-    { 0x31,0xdc,0xca,0xd7 }, { 0x38,0xd1,0xc1,0xd9 },
-    { 0x23,0xc6,0xdc,0xcb }, { 0x2a,0xcb,0xd7,0xc5 },
-    { 0x15,0xe8,0xe6,0xef }, { 0x1c,0xe5,0xed,0xe1 },
-    { 0x07,0xf2,0xf0,0xf3 }, { 0x0e,0xff,0xfb,0xfd },
-    { 0x79,0xb4,0x92,0xa7 }, { 0x70,0xb9,0x99,0xa9 },
-    { 0x6b,0xae,0x84,0xbb }, { 0x62,0xa3,0x8f,0xb5 },
-    { 0x5d,0x80,0xbe,0x9f }, { 0x54,0x8d,0xb5,0x91 },
-    { 0x4f,0x9a,0xa8,0x83 }, { 0x46,0x97,0xa3,0x8d }
-  };
+#define decT dec_tables.T
+#define inv_sbox dec_tables.inv_sbox
 
 static const u32 rcon[30] =
   {
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-vaes-avx2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/rijndael-vaes-avx2-amd64.S
new file mode 100644
index 000000000..f94b58dbc
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-vaes-avx2-amd64.S
@@ -0,0 +1,3021 @@
+/* VAES/AVX2 AMD64 accelerated AES for Libgcrypt
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if defined(__x86_64__)
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) && \
+    defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+#define no(...) /*_*/
+#define yes(...) __VA_ARGS__
+
+#define AES_OP8(op, key, b0, b1, b2, b3, b4, b5, b6, b7) \
+       op key, b0, b0; \
+       op key, b1, b1; \
+       op key, b2, b2; \
+       op key, b3, b3; \
+       op key, b4, b4; \
+       op key, b5, b5; \
+       op key, b6, b6; \
+       op key, b7, b7;
+
+#define VAESENC8(key, b0, b1, b2, b3, b4, b5, b6, b7) \
+       AES_OP8(vaesenc, key, b0, b1, b2, b3, b4, b5, b6, b7)
+
+#define VAESDEC8(key, b0, b1, b2, b3, b4, b5, b6, b7) \
+       AES_OP8(vaesdec, key, b0, b1, b2, b3, b4, b5, b6, b7)
+
+#define XOR8(key, b0, b1, b2, b3, b4, b5, b6, b7) \
+       AES_OP8(vpxor, key, b0, b1, b2, b3, b4, b5, b6, b7)
+
+#define AES_OP4(op, key, b0, b1, b2, b3) \
+       op key, b0, b0; \
+       op key, b1, b1; \
+       op key, b2, b2; \
+       op key, b3, b3;
+
+#define VAESENC4(key, b0, b1, b2, b3) \
+       AES_OP4(vaesenc, key, b0, b1, b2, b3)
+
+#define VAESDEC4(key, b0, b1, b2, b3) \
+       AES_OP4(vaesdec, key, b0, b1, b2, b3)
+
+#define XOR4(key, b0, b1, b2, b3) \
+       AES_OP4(vpxor, key, b0, b1, b2, b3)
+
+#define AES_OP2(op, key, b0, b1) \
+       op key, b0, b0; \
+       op key, b1, b1;
+
+#define VAESENC2(key, b0, b1) \
+       AES_OP2(vaesenc, key, b0, b1)
+
+#define VAESDEC2(key, b0, b1) \
+       AES_OP2(vaesdec, key, b0, b1)
+
+#define XOR2(key, b0, b1) \
+       AES_OP2(vpxor, key, b0, b1)
+
+/**********************************************************************
+  CBC-mode decryption
+ **********************************************************************/
+ELF(.type _gcry_vaes_avx2_cbc_dec_amd64,@function)
+.globl _gcry_vaes_avx2_cbc_dec_amd64
+_gcry_vaes_avx2_cbc_dec_amd64:
+       /* input:
+        *      %rdi: round keys
+        *      %rsi: iv
+        *      %rdx: dst
+        *      %rcx: src
+        *      %r8:  nblocks
+        *      %r9:  nrounds
+        */
+       CFI_STARTPROC();
+
+       /* Load IV. */
+       vmovdqu (%rsi), %xmm15;
+
+       /* Process 16 blocks per loop. */
+.align 8
+.Lcbc_dec_blk16:
+       cmpq $16, %r8;
+       jb .Lcbc_dec_blk8;
+
+       leaq -16(%r8), %r8;
+
+       /* Load input and xor first key. Update IV. */
+       vbroadcasti128 (0 * 16)(%rdi), %ymm8;
+       vmovdqu (0 * 16)(%rcx), %ymm0;
+       vmovdqu (2 * 16)(%rcx), %ymm1;
+       vmovdqu (4 * 16)(%rcx), %ymm2;
+       vmovdqu (6 * 16)(%rcx), %ymm3;
+       vmovdqu (8 * 16)(%rcx), %ymm4;
+       vmovdqu (10 * 16)(%rcx), %ymm5;
+       vmovdqu (12 * 16)(%rcx), %ymm6;
+       vmovdqu (14 * 16)(%rcx), %ymm7;
+       vpxor %ymm8, %ymm0, %ymm0;
+       vpxor %ymm8, %ymm1, %ymm1;
+       vpxor %ymm8, %ymm2, %ymm2;
+       vpxor %ymm8, %ymm3, %ymm3;
+       vpxor %ymm8, %ymm4, %ymm4;
+       vpxor %ymm8, %ymm5, %ymm5;
+       vpxor %ymm8, %ymm6, %ymm6;
+       vpxor %ymm8, %ymm7, %ymm7;
+       vbroadcasti128 (1 * 16)(%rdi), %ymm8;
+       vinserti128 $1, (0 * 16)(%rcx), %ymm15, %ymm9;
+       vmovdqu (1 * 16)(%rcx), %ymm10;
+       vmovdqu (3 * 16)(%rcx), %ymm11;
+       vmovdqu (5 * 16)(%rcx), %ymm12;
+       vmovdqu (7 * 16)(%rcx), %ymm13;
+       vmovdqu (9 * 16)(%rcx), %ymm14;
+       vmovdqu (15 * 16)(%rcx), %xmm15;
+       leaq (16 * 16)(%rcx), %rcx;
+
+       /* AES rounds */
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm8;
+       cmpl $12, %r9d;
+       jb .Lcbc_dec_blk16_last;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm8;
+       jz .Lcbc_dec_blk16_last;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm8;
+       VAESDEC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm8;
+
+       /* Last round and output handling. */
+  .Lcbc_dec_blk16_last:
+       vpxor %ymm8, %ymm9, %ymm9;
+       vpxor %ymm8, %ymm10, %ymm10;
+       vpxor %ymm8, %ymm11, %ymm11;
+       vpxor %ymm8, %ymm12, %ymm12;
+       vpxor %ymm8, %ymm13, %ymm13;
+       vpxor %ymm8, %ymm14, %ymm14;
+       vaesdeclast %ymm9, %ymm0, %ymm0;
+       vaesdeclast %ymm10, %ymm1, %ymm1;
+       vpxor (-5 * 16)(%rcx), %ymm8, %ymm9;
+       vpxor (-3 * 16)(%rcx), %ymm8, %ymm10;
+       vaesdeclast %ymm11, %ymm2, %ymm2;
+       vaesdeclast %ymm12, %ymm3, %ymm3;
+       vaesdeclast %ymm13, %ymm4, %ymm4;
+       vaesdeclast %ymm14, %ymm5, %ymm5;
+       vaesdeclast %ymm9, %ymm6, %ymm6;
+       vaesdeclast %ymm10, %ymm7, %ymm7;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       vmovdqu %ymm4, (8 * 16)(%rdx);
+       vmovdqu %ymm5, (10 * 16)(%rdx);
+       vmovdqu %ymm6, (12 * 16)(%rdx);
+       vmovdqu %ymm7, (14 * 16)(%rdx);
+       leaq (16 * 16)(%rdx), %rdx;
+
+       jmp .Lcbc_dec_blk16;
+
+       /* Handle trailing eight blocks. */
+.align 8
+.Lcbc_dec_blk8:
+       cmpq $8, %r8;
+       jb .Lcbc_dec_blk4;
+
+       leaq -8(%r8), %r8;
+
+       /* Load input and xor first key. Update IV. */
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+       vmovdqu (0 * 16)(%rcx), %ymm0;
+       vmovdqu (2 * 16)(%rcx), %ymm1;
+       vmovdqu (4 * 16)(%rcx), %ymm2;
+       vmovdqu (6 * 16)(%rcx), %ymm3;
+       vpxor %ymm4, %ymm0, %ymm0;
+       vpxor %ymm4, %ymm1, %ymm1;
+       vpxor %ymm4, %ymm2, %ymm2;
+       vpxor %ymm4, %ymm3, %ymm3;
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       vinserti128 $1, (0 * 16)(%rcx), %ymm15, %ymm10;
+       vmovdqu (1 * 16)(%rcx), %ymm11;
+       vmovdqu (3 * 16)(%rcx), %ymm12;
+       vmovdqu (5 * 16)(%rcx), %ymm13;
+       vmovdqu (7 * 16)(%rcx), %xmm15;
+       leaq (8 * 16)(%rcx), %rcx;
+
+       /* AES rounds */
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lcbc_dec_blk8_last;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lcbc_dec_blk8_last;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lcbc_dec_blk8_last:
+       vpxor %ymm4, %ymm10, %ymm10;
+       vpxor %ymm4, %ymm11, %ymm11;
+       vpxor %ymm4, %ymm12, %ymm12;
+       vpxor %ymm4, %ymm13, %ymm13;
+       vaesdeclast %ymm10, %ymm0, %ymm0;
+       vaesdeclast %ymm11, %ymm1, %ymm1;
+       vaesdeclast %ymm12, %ymm2, %ymm2;
+       vaesdeclast %ymm13, %ymm3, %ymm3;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       leaq (8 * 16)(%rdx), %rdx;
+
+       /* Handle trailing four blocks. */
+.align 8
+.Lcbc_dec_blk4:
+       cmpq $4, %r8;
+       jb .Lcbc_dec_blk1;
+
+       leaq -4(%r8), %r8;
+
+       /* Load input and xor first key. Update IV. */
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+       vmovdqu (0 * 16)(%rcx), %ymm0;
+       vmovdqu (2 * 16)(%rcx), %ymm1;
+       vpxor %ymm4, %ymm0, %ymm0;
+       vpxor %ymm4, %ymm1, %ymm1;
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       vinserti128 $1, (0 * 16)(%rcx), %ymm15, %ymm10;
+       vmovdqu (1 * 16)(%rcx), %ymm11;
+       vmovdqu (3 * 16)(%rcx), %xmm15;
+       leaq (4 * 16)(%rcx), %rcx;
+
+       /* AES rounds */
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lcbc_dec_blk4_last;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lcbc_dec_blk4_last;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESDEC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lcbc_dec_blk4_last:
+       vpxor %ymm4, %ymm10, %ymm10;
+       vpxor %ymm4, %ymm11, %ymm11;
+       vaesdeclast %ymm10, %ymm0, %ymm0;
+       vaesdeclast %ymm11, %ymm1, %ymm1;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       leaq (4 * 16)(%rdx), %rdx;
+
+       /* Process trailing one to three blocks, one per loop. */
+.align 8
+.Lcbc_dec_blk1:
+       cmpq $1, %r8;
+       jb .Ldone_cbc_dec;
+
+       leaq -1(%r8), %r8;
+
+       /* Load input. */
+       vmovdqu (%rcx), %xmm2;
+       leaq 16(%rcx), %rcx;
+
+       /* Xor first key. */
+       vpxor (0 * 16)(%rdi), %xmm2, %xmm0;
+
+       /* AES rounds. */
+       vaesdec (1 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (2 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (3 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (4 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (5 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (6 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (7 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (8 * 16)(%rdi), %xmm0, %xmm0;
+       vaesdec (9 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (10 * 16)(%rdi), %xmm1;
+       cmpl $12, %r9d;
+       jb .Lcbc_dec_blk1_last;
+       vaesdec %xmm1, %xmm0, %xmm0;
+       vaesdec (11 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (12 * 16)(%rdi), %xmm1;
+       jz .Lcbc_dec_blk1_last;
+       vaesdec %xmm1, %xmm0, %xmm0;
+       vaesdec (13 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (14 * 16)(%rdi), %xmm1;
+
+       /* Last round and output handling. */
+  .Lcbc_dec_blk1_last:
+       vpxor %xmm1, %xmm15, %xmm15;
+       vaesdeclast %xmm15, %xmm0, %xmm0;
+       vmovdqa %xmm2, %xmm15;
+       vmovdqu %xmm0, (%rdx);
+       leaq 16(%rdx), %rdx;
+
+       jmp .Lcbc_dec_blk1;
+
+.align 8
+.Ldone_cbc_dec:
+       /* Store IV. */
+       vmovdqu %xmm15, (%rsi);
+
+       vzeroall;
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_vaes_avx2_cbc_dec_amd64,.-_gcry_vaes_avx2_cbc_dec_amd64)
+
+/**********************************************************************
+  CFB-mode decryption
+ **********************************************************************/
+ELF(.type _gcry_vaes_avx2_cfb_dec_amd64,@function)
+.globl _gcry_vaes_avx2_cfb_dec_amd64
+_gcry_vaes_avx2_cfb_dec_amd64:
+       /* input:
+        *      %rdi: round keys
+        *      %rsi: iv
+        *      %rdx: dst
+        *      %rcx: src
+        *      %r8:  nblocks
+        *      %r9:  nrounds
+        */
+       CFI_STARTPROC();
+
+       /* Load IV. */
+       vmovdqu (%rsi), %xmm15;
+
+       /* Process 16 blocks per loop. */
+.align 8
+.Lcfb_dec_blk16:
+       cmpq $16, %r8;
+       jb .Lcfb_dec_blk8;
+
+       leaq -16(%r8), %r8;
+
+       /* Load input and xor first key. Update IV. */
+       vbroadcasti128 (0 * 16)(%rdi), %ymm8;
+       vinserti128 $1, (0 * 16)(%rcx), %ymm15, %ymm0;
+       vmovdqu (1 * 16)(%rcx), %ymm1;
+       vmovdqu (3 * 16)(%rcx), %ymm2;
+       vmovdqu (5 * 16)(%rcx), %ymm3;
+       vmovdqu (7 * 16)(%rcx), %ymm4;
+       vmovdqu (9 * 16)(%rcx), %ymm5;
+       vmovdqu (11 * 16)(%rcx), %ymm6;
+       vmovdqu (13 * 16)(%rcx), %ymm7;
+       vmovdqu (15 * 16)(%rcx), %xmm15;
+       vpxor %ymm8, %ymm0, %ymm0;
+       vpxor %ymm8, %ymm1, %ymm1;
+       vpxor %ymm8, %ymm2, %ymm2;
+       vpxor %ymm8, %ymm3, %ymm3;
+       vpxor %ymm8, %ymm4, %ymm4;
+       vpxor %ymm8, %ymm5, %ymm5;
+       vpxor %ymm8, %ymm6, %ymm6;
+       vpxor %ymm8, %ymm7, %ymm7;
+       vbroadcasti128 (1 * 16)(%rdi), %ymm8;
+       vmovdqu (0 * 16)(%rcx), %ymm9;
+       vmovdqu (2 * 16)(%rcx), %ymm10;
+       vmovdqu (4 * 16)(%rcx), %ymm11;
+       vmovdqu (6 * 16)(%rcx), %ymm12;
+       vmovdqu (8 * 16)(%rcx), %ymm13;
+       vmovdqu (10 * 16)(%rcx), %ymm14;
+
+       leaq (16 * 16)(%rcx), %rcx;
+
+       /* AES rounds */
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm8;
+       cmpl $12, %r9d;
+       jb .Lcfb_dec_blk16_last;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm8;
+       jz .Lcfb_dec_blk16_last;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm8;
+
+       /* Last round and output handling. */
+  .Lcfb_dec_blk16_last:
+       vpxor %ymm8, %ymm9, %ymm9;
+       vpxor %ymm8, %ymm10, %ymm10;
+       vpxor %ymm8, %ymm11, %ymm11;
+       vpxor %ymm8, %ymm12, %ymm12;
+       vpxor %ymm8, %ymm13, %ymm13;
+       vpxor %ymm8, %ymm14, %ymm14;
+       vaesenclast %ymm9, %ymm0, %ymm0;
+       vaesenclast %ymm10, %ymm1, %ymm1;
+       vpxor (-4 * 16)(%rcx), %ymm8, %ymm9;
+       vpxor (-2 * 16)(%rcx), %ymm8, %ymm10;
+       vaesenclast %ymm11, %ymm2, %ymm2;
+       vaesenclast %ymm12, %ymm3, %ymm3;
+       vaesenclast %ymm13, %ymm4, %ymm4;
+       vaesenclast %ymm14, %ymm5, %ymm5;
+       vaesenclast %ymm9, %ymm6, %ymm6;
+       vaesenclast %ymm10, %ymm7, %ymm7;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       vmovdqu %ymm4, (8 * 16)(%rdx);
+       vmovdqu %ymm5, (10 * 16)(%rdx);
+       vmovdqu %ymm6, (12 * 16)(%rdx);
+       vmovdqu %ymm7, (14 * 16)(%rdx);
+       leaq (16 * 16)(%rdx), %rdx;
+
+       jmp .Lcfb_dec_blk16;
+
+       /* Handle trailing eight blocks. */
+.align 8
+.Lcfb_dec_blk8:
+       cmpq $8, %r8;
+       jb .Lcfb_dec_blk4;
+
+       leaq -8(%r8), %r8;
+
+       /* Load input and xor first key. Update IV. */
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+       vinserti128 $1, (0 * 16)(%rcx), %ymm15, %ymm0;
+       vmovdqu (1 * 16)(%rcx), %ymm1;
+       vmovdqu (3 * 16)(%rcx), %ymm2;
+       vmovdqu (5 * 16)(%rcx), %ymm3;
+       vmovdqu (7 * 16)(%rcx), %xmm15;
+       vpxor %ymm4, %ymm0, %ymm0;
+       vpxor %ymm4, %ymm1, %ymm1;
+       vpxor %ymm4, %ymm2, %ymm2;
+       vpxor %ymm4, %ymm3, %ymm3;
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       vmovdqu (0 * 16)(%rcx), %ymm10;
+       vmovdqu (2 * 16)(%rcx), %ymm11;
+       vmovdqu (4 * 16)(%rcx), %ymm12;
+       vmovdqu (6 * 16)(%rcx), %ymm13;
+
+       leaq (8 * 16)(%rcx), %rcx;
+
+       /* AES rounds */
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lcfb_dec_blk8_last;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lcfb_dec_blk8_last;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lcfb_dec_blk8_last:
+       vpxor %ymm4, %ymm10, %ymm10;
+       vpxor %ymm4, %ymm11, %ymm11;
+       vpxor %ymm4, %ymm12, %ymm12;
+       vpxor %ymm4, %ymm13, %ymm13;
+       vaesenclast %ymm10, %ymm0, %ymm0;
+       vaesenclast %ymm11, %ymm1, %ymm1;
+       vaesenclast %ymm12, %ymm2, %ymm2;
+       vaesenclast %ymm13, %ymm3, %ymm3;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       leaq (8 * 16)(%rdx), %rdx;
+
+       /* Handle trailing four blocks. */
+.align 8
+.Lcfb_dec_blk4:
+       cmpq $4, %r8;
+       jb .Lcfb_dec_blk1;
+
+       leaq -4(%r8), %r8;
+
+       /* Load input and xor first key. Update IV. */
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+       vinserti128 $1, (0 * 16)(%rcx), %ymm15, %ymm0;
+       vmovdqu (1 * 16)(%rcx), %ymm1;
+       vmovdqu (3 * 16)(%rcx), %xmm15;
+       vpxor %ymm4, %ymm0, %ymm0;
+       vpxor %ymm4, %ymm1, %ymm1;
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       vmovdqu (0 * 16)(%rcx), %ymm10;
+       vmovdqu (2 * 16)(%rcx), %ymm11;
+
+       leaq (4 * 16)(%rcx), %rcx;
+
+       /* AES rounds */
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lcfb_dec_blk4_last;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lcfb_dec_blk4_last;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lcfb_dec_blk4_last:
+       vpxor %ymm4, %ymm10, %ymm10;
+       vpxor %ymm4, %ymm11, %ymm11;
+       vaesenclast %ymm10, %ymm0, %ymm0;
+       vaesenclast %ymm11, %ymm1, %ymm1;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       leaq (4 * 16)(%rdx), %rdx;
+
+       /* Process trailing one to three blocks, one per loop. */
+.align 8
+.Lcfb_dec_blk1:
+       cmpq $1, %r8;
+       jb .Ldone_cfb_dec;
+
+       leaq -1(%r8), %r8;
+
+       /* Xor first key. */
+       vpxor (0 * 16)(%rdi), %xmm15, %xmm0;
+
+       /* Load input as next IV. */
+       vmovdqu (%rcx), %xmm15;
+       leaq 16(%rcx), %rcx;
+
+       /* AES rounds. */
+       vaesenc (1 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (2 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (3 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (4 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (5 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (6 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (7 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (8 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (9 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (10 * 16)(%rdi), %xmm1;
+       cmpl $12, %r9d;
+       jb .Lcfb_dec_blk1_last;
+       vaesenc %xmm1, %xmm0, %xmm0;
+       vaesenc (11 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (12 * 16)(%rdi), %xmm1;
+       jz .Lcfb_dec_blk1_last;
+       vaesenc %xmm1, %xmm0, %xmm0;
+       vaesenc (13 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (14 * 16)(%rdi), %xmm1;
+
+       /* Last round and output handling. */
+  .Lcfb_dec_blk1_last:
+       vpxor %xmm15, %xmm1, %xmm1;
+       vaesenclast %xmm1, %xmm0, %xmm0;
+       vmovdqu %xmm0, (%rdx);
+       leaq 16(%rdx), %rdx;
+
+       jmp .Lcfb_dec_blk1;
+
+.align 8
+.Ldone_cfb_dec:
+       /* Store IV. */
+       vmovdqu %xmm15, (%rsi);
+
+       vzeroall;
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_vaes_avx2_cfb_dec_amd64,.-_gcry_vaes_avx2_cfb_dec_amd64)
+
+/**********************************************************************
+  CTR-mode encryption
+ **********************************************************************/
+ELF(.type _gcry_vaes_avx2_ctr_enc_amd64,@function)
+.globl _gcry_vaes_avx2_ctr_enc_amd64
+_gcry_vaes_avx2_ctr_enc_amd64:
+       /* input:
+        *      %rdi: round keys
+        *      %rsi: counter
+        *      %rdx: dst
+        *      %rcx: src
+        *      %r8:  nblocks
+        *      %r9:  nrounds
+        */
+       CFI_STARTPROC();
+
+       movq 8(%rsi), %r10;
+       movq 0(%rsi), %r11;
+       bswapq %r10;
+       bswapq %r11;
+
+       vpcmpeqd %ymm15, %ymm15, %ymm15;
+       vpsrldq $8, %ymm15, %ymm15;     // 0:-1
+       vpaddq %ymm15, %ymm15, %ymm14;  // 0:-2
+       vbroadcasti128 .Lbswap128_mask rRIP, %ymm13;
+
+#define inc_le128(x, minus_one, tmp) \
+       vpcmpeqq minus_one, x, tmp; \
+       vpsubq minus_one, x, x; \
+       vpslldq $8, tmp, tmp; \
+       vpsubq tmp, x, x;
+
+#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
+       vpcmpeqq minus_one, x, tmp1; \
+       vpcmpeqq minus_two, x, tmp2; \
+       vpor tmp1, tmp2, tmp2; \
+       vpsubq minus_two, x, x; \
+       vpslldq $8, tmp2, tmp2; \
+       vpsubq tmp2, x, x;
+
+       /* Process 16 blocks per loop. */
+.align 8
+.Lctr_enc_blk16:
+       cmpq $16, %r8;
+       jb .Lctr_enc_blk8;
+
+       leaq -16(%r8), %r8;
+
+       vbroadcasti128 (%rsi), %ymm7;
+       vbroadcasti128 (0 * 16)(%rdi), %ymm8;
+
+       /* detect if carry handling is needed */
+       addb $16, 15(%rsi);
+       jc .Lctr_enc_blk16_handle_carry;
+
+       /* Increment counters. */
+       vpaddb .Lbige_addb_0 rRIP, %ymm7, %ymm0;
+       vpaddb .Lbige_addb_2 rRIP, %ymm7, %ymm1;
+       vpaddb .Lbige_addb_4 rRIP, %ymm7, %ymm2;
+       vpaddb .Lbige_addb_6 rRIP, %ymm7, %ymm3;
+       vpaddb .Lbige_addb_8 rRIP, %ymm7, %ymm4;
+       vpaddb .Lbige_addb_10 rRIP, %ymm7, %ymm5;
+       vpaddb .Lbige_addb_12 rRIP, %ymm7, %ymm6;
+       vpaddb .Lbige_addb_14 rRIP, %ymm7, %ymm7;
+       leaq 16(%r10), %r10;
+
+  .Lctr_enc_blk16_rounds:
+       /* AES rounds */
+       XOR8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (1 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm8;
+       cmpl $12, %r9d;
+       jb .Lctr_enc_blk16_last;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm8;
+       jz .Lctr_enc_blk16_last;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm8;
+
+       /* Last round and output handling. */
+  .Lctr_enc_blk16_last:
+       vpxor (0 * 16)(%rcx), %ymm8, %ymm9; /* Xor src to last round key. */
+       vpxor (2 * 16)(%rcx), %ymm8, %ymm10;
+       vpxor (4 * 16)(%rcx), %ymm8, %ymm11;
+       vpxor (6 * 16)(%rcx), %ymm8, %ymm12;
+       vaesenclast %ymm9, %ymm0, %ymm0;
+       vaesenclast %ymm10, %ymm1, %ymm1;
+       vaesenclast %ymm11, %ymm2, %ymm2;
+       vaesenclast %ymm12, %ymm3, %ymm3;
+       vpxor (8 * 16)(%rcx), %ymm8, %ymm9;
+       vpxor (10 * 16)(%rcx), %ymm8, %ymm10;
+       vpxor (12 * 16)(%rcx), %ymm8, %ymm11;
+       vpxor (14 * 16)(%rcx), %ymm8, %ymm8;
+       leaq (16 * 16)(%rcx), %rcx;
+       vaesenclast %ymm9, %ymm4, %ymm4;
+       vaesenclast %ymm10, %ymm5, %ymm5;
+       vaesenclast %ymm11, %ymm6, %ymm6;
+       vaesenclast %ymm8, %ymm7, %ymm7;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       vmovdqu %ymm4, (8 * 16)(%rdx);
+       vmovdqu %ymm5, (10 * 16)(%rdx);
+       vmovdqu %ymm6, (12 * 16)(%rdx);
+       vmovdqu %ymm7, (14 * 16)(%rdx);
+       leaq (16 * 16)(%rdx), %rdx;
+
+       jmp .Lctr_enc_blk16;
+
+  .align 8
+  .Lctr_enc_blk16_handle_carry:
+       /* Increment counters (handle carry). */
+       vpshufb %xmm13, %xmm7, %xmm1; /* be => le */
+       vmovdqa %xmm1, %xmm0;
+       inc_le128(%xmm1, %xmm15, %xmm5);
+       vinserti128 $1, %xmm1, %ymm0, %ymm7; /* ctr: +1:+0 */
+       vpshufb %ymm13, %ymm7, %ymm0;
+       addq $16, %r10;
+       adcq $0, %r11;
+       bswapq %r10;
+       bswapq %r11;
+       movq %r10, 8(%rsi);
+       movq %r11, 0(%rsi);
+       bswapq %r10;
+       bswapq %r11;
+       add2_le128(%ymm7, %ymm15, %ymm14, %ymm9, %ymm10); /* ctr: +3:+2 */
+       vpshufb %ymm13, %ymm7, %ymm1;
+       add2_le128(%ymm7, %ymm15, %ymm14, %ymm9, %ymm10); /* ctr: +5:+4 */
+       vpshufb %ymm13, %ymm7, %ymm2;
+       add2_le128(%ymm7, %ymm15, %ymm14, %ymm9, %ymm10); /* ctr: +7:+6 */
+       vpshufb %ymm13, %ymm7, %ymm3;
+       add2_le128(%ymm7, %ymm15, %ymm14, %ymm9, %ymm10); /* ctr: +9:+8 */
+       vpshufb %ymm13, %ymm7, %ymm4;
+       add2_le128(%ymm7, %ymm15, %ymm14, %ymm9, %ymm10); /* ctr: +11:+10 */
+       vpshufb %ymm13, %ymm7, %ymm5;
+       add2_le128(%ymm7, %ymm15, %ymm14, %ymm9, %ymm10); /* ctr: +13:+12 */
+       vpshufb %ymm13, %ymm7, %ymm6;
+       add2_le128(%ymm7, %ymm15, %ymm14, %ymm9, %ymm10); /* ctr: +15:+14 */
+       vpshufb %ymm13, %ymm7, %ymm7;
+
+       jmp .Lctr_enc_blk16_rounds;
+
+       /* Handle trailing eight blocks. */
+.align 8
+.Lctr_enc_blk8:
+       cmpq $8, %r8;
+       jb .Lctr_enc_blk4;
+
+       leaq -8(%r8), %r8;
+
+       vbroadcasti128 (%rsi), %ymm3;
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+
+       /* detect if carry handling is needed */
+       addb $8, 15(%rsi);
+       jc .Lctr_enc_blk8_handle_carry;
+
+       /* Increment counters. */
+       vpaddb .Lbige_addb_0 rRIP, %ymm3, %ymm0;
+       vpaddb .Lbige_addb_2 rRIP, %ymm3, %ymm1;
+       vpaddb .Lbige_addb_4 rRIP, %ymm3, %ymm2;
+       vpaddb .Lbige_addb_6 rRIP, %ymm3, %ymm3;
+       leaq 8(%r10), %r10;
+
+  .Lctr_enc_blk8_rounds:
+       /* AES rounds */
+       XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lctr_enc_blk8_last;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lctr_enc_blk8_last;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lctr_enc_blk8_last:
+       vpxor (0 * 16)(%rcx), %ymm4, %ymm5; /* Xor src to last round key. */
+       vpxor (2 * 16)(%rcx), %ymm4, %ymm6;
+       vpxor (4 * 16)(%rcx), %ymm4, %ymm7;
+       vpxor (6 * 16)(%rcx), %ymm4, %ymm4;
+       leaq (8 * 16)(%rcx), %rcx;
+       vaesenclast %ymm5, %ymm0, %ymm0;
+       vaesenclast %ymm6, %ymm1, %ymm1;
+       vaesenclast %ymm7, %ymm2, %ymm2;
+       vaesenclast %ymm4, %ymm3, %ymm3;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       leaq (8 * 16)(%rdx), %rdx;
+
+       jmp .Lctr_enc_blk4;
+
+  .align 8
+  .Lctr_enc_blk8_handle_carry:
+       /* Increment counters (handle carry). */
+       vpshufb %xmm13, %xmm3, %xmm1; /* be => le */
+       vmovdqa %xmm1, %xmm0;
+       inc_le128(%xmm1, %xmm15, %xmm5);
+       vinserti128 $1, %xmm1, %ymm0, %ymm3; /* ctr: +1:+0 */
+       vpshufb %ymm13, %ymm3, %ymm0;
+       addq $8, %r10;
+       adcq $0, %r11;
+       bswapq %r10;
+       bswapq %r11;
+       movq %r10, 8(%rsi);
+       movq %r11, 0(%rsi);
+       bswapq %r10;
+       bswapq %r11;
+       add2_le128(%ymm3, %ymm15, %ymm14, %ymm5, %ymm6); /* ctr: +3:+2 */
+       vpshufb %ymm13, %ymm3, %ymm1;
+       add2_le128(%ymm3, %ymm15, %ymm14, %ymm5, %ymm6); /* ctr: +5:+4 */
+       vpshufb %ymm13, %ymm3, %ymm2;
+       add2_le128(%ymm3, %ymm15, %ymm14, %ymm5, %ymm6); /* ctr: +7:+6 */
+       vpshufb %ymm13, %ymm3, %ymm3;
+
+       jmp .Lctr_enc_blk8_rounds;
+
+       /* Handle trailing four blocks. */
+.align 8
+.Lctr_enc_blk4:
+       cmpq $4, %r8;
+       jb .Lctr_enc_blk1;
+
+       leaq -4(%r8), %r8;
+
+       vbroadcasti128 (%rsi), %ymm3;
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+
+       /* detect if carry handling is needed */
+       addb $4, 15(%rsi);
+       jc .Lctr_enc_blk4_handle_carry;
+
+       /* Increment counters. */
+       vpaddb .Lbige_addb_0 rRIP, %ymm3, %ymm0;
+       vpaddb .Lbige_addb_2 rRIP, %ymm3, %ymm1;
+       leaq 4(%r10), %r10;
+
+  .Lctr_enc_blk4_rounds:
+       /* AES rounds */
+       XOR2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lctr_enc_blk4_last;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lctr_enc_blk4_last;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lctr_enc_blk4_last:
+       vpxor (0 * 16)(%rcx), %ymm4, %ymm5; /* Xor src to last round key. */
+       vpxor (2 * 16)(%rcx), %ymm4, %ymm6;
+       leaq (4 * 16)(%rcx), %rcx;
+       vaesenclast %ymm5, %ymm0, %ymm0;
+       vaesenclast %ymm6, %ymm1, %ymm1;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       leaq (4 * 16)(%rdx), %rdx;
+
+       jmp .Lctr_enc_blk1;
+
+  .align 8
+  .Lctr_enc_blk4_handle_carry:
+       /* Increment counters (handle carry). */
+       vpshufb %xmm13, %xmm3, %xmm1; /* be => le */
+       vmovdqa %xmm1, %xmm0;
+       inc_le128(%xmm1, %xmm15, %xmm5);
+       vinserti128 $1, %xmm1, %ymm0, %ymm3; /* ctr: +1:+0 */
+       vpshufb %ymm13, %ymm3, %ymm0;
+       addq $4, %r10;
+       adcq $0, %r11;
+       bswapq %r10;
+       bswapq %r11;
+       movq %r10, 8(%rsi);
+       movq %r11, 0(%rsi);
+       bswapq %r10;
+       bswapq %r11;
+       add2_le128(%ymm3, %ymm15, %ymm14, %ymm5, %ymm6); /* ctr: +3:+2 */
+       vpshufb %ymm13, %ymm3, %ymm1;
+
+       jmp .Lctr_enc_blk4_rounds;
+
+       /* Process trailing one to three blocks, one per loop. */
+.align 8
+.Lctr_enc_blk1:
+       cmpq $1, %r8;
+       jb .Ldone_ctr_enc;
+
+       leaq -1(%r8), %r8;
+
+       /* Load and increament counter. */
+       vmovdqu (%rsi), %xmm0;
+       addq $1, %r10;
+       adcq $0, %r11;
+       bswapq %r10;
+       bswapq %r11;
+       movq %r10, 8(%rsi);
+       movq %r11, 0(%rsi);
+       bswapq %r10;
+       bswapq %r11;
+
+       /* AES rounds. */
+       vpxor (0 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (1 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (2 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (3 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (4 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (5 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (6 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (7 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (8 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (9 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (10 * 16)(%rdi), %xmm1;
+       cmpl $12, %r9d;
+       jb .Lctr_enc_blk1_last;
+       vaesenc %xmm1, %xmm0, %xmm0;
+       vaesenc (11 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (12 * 16)(%rdi), %xmm1;
+       jz .Lctr_enc_blk1_last;
+       vaesenc %xmm1, %xmm0, %xmm0;
+       vaesenc (13 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (14 * 16)(%rdi), %xmm1;
+
+       /* Last round and output handling. */
+  .Lctr_enc_blk1_last:
+       vpxor (%rcx), %xmm1, %xmm1; /* Xor src to last round key. */
+       leaq 16(%rcx), %rcx;
+       vaesenclast %xmm1, %xmm0, %xmm0; /* Last round and xor with xmm1. */
+       vmovdqu %xmm0, (%rdx);
+       leaq 16(%rdx), %rdx;
+
+       jmp .Lctr_enc_blk1;
+
+.align 8
+.Ldone_ctr_enc:
+       vzeroall;
+       xorl %r10d, %r10d;
+       xorl %r11d, %r11d;
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_vaes_avx2_ctr_enc_amd64,.-_gcry_vaes_avx2_ctr_enc_amd64)
+
+/**********************************************************************
+  Little-endian 32-bit CTR-mode encryption (GCM-SIV)
+ **********************************************************************/
+ELF(.type _gcry_vaes_avx2_ctr32le_enc_amd64,@function)
+.globl _gcry_vaes_avx2_ctr32le_enc_amd64
+_gcry_vaes_avx2_ctr32le_enc_amd64:
+       /* input:
+        *      %rdi: round keys
+        *      %rsi: counter
+        *      %rdx: dst
+        *      %rcx: src
+        *      %r8:  nblocks
+        *      %r9:  nrounds
+        */
+       CFI_STARTPROC();
+
+       vbroadcasti128 (%rsi), %ymm15; // CTR
+
+       /* Process 16 blocks per loop. */
+.align 8
+.Lctr32le_enc_blk16:
+       cmpq $16, %r8;
+       jb .Lctr32le_enc_blk8;
+
+       leaq -16(%r8), %r8;
+
+       vbroadcasti128 (0 * 16)(%rdi), %ymm8;
+
+       /* Increment counters. */
+       vpaddd .Lle_addd_0 rRIP, %ymm15, %ymm0;
+       vpaddd .Lle_addd_2 rRIP, %ymm15, %ymm1;
+       vpaddd .Lle_addd_4 rRIP, %ymm15, %ymm2;
+       vpaddd .Lle_addd_6 rRIP, %ymm15, %ymm3;
+       vpaddd .Lle_addd_8 rRIP, %ymm15, %ymm4;
+       vpaddd .Lle_addd_10 rRIP, %ymm15, %ymm5;
+       vpaddd .Lle_addd_12 rRIP, %ymm15, %ymm6;
+       vpaddd .Lle_addd_14 rRIP, %ymm15, %ymm7;
+
+       vpaddd .Lle_addd_16_2 rRIP, %ymm15, %ymm15;
+
+       /* AES rounds */
+       XOR8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (1 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm8;
+       cmpl $12, %r9d;
+       jb .Lctr32le_enc_blk16_last;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm8;
+       jz .Lctr32le_enc_blk16_last;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm8;
+       VAESENC8(%ymm8, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm8;
+
+       /* Last round and output handling. */
+  .Lctr32le_enc_blk16_last:
+       vpxor (0 * 16)(%rcx), %ymm8, %ymm9; /* Xor src to last round key. */
+       vpxor (2 * 16)(%rcx), %ymm8, %ymm10;
+       vpxor (4 * 16)(%rcx), %ymm8, %ymm11;
+       vpxor (6 * 16)(%rcx), %ymm8, %ymm12;
+       vaesenclast %ymm9, %ymm0, %ymm0;
+       vaesenclast %ymm10, %ymm1, %ymm1;
+       vaesenclast %ymm11, %ymm2, %ymm2;
+       vaesenclast %ymm12, %ymm3, %ymm3;
+       vpxor (8 * 16)(%rcx), %ymm8, %ymm9;
+       vpxor (10 * 16)(%rcx), %ymm8, %ymm10;
+       vpxor (12 * 16)(%rcx), %ymm8, %ymm11;
+       vpxor (14 * 16)(%rcx), %ymm8, %ymm8;
+       leaq (16 * 16)(%rcx), %rcx;
+       vaesenclast %ymm9, %ymm4, %ymm4;
+       vaesenclast %ymm10, %ymm5, %ymm5;
+       vaesenclast %ymm11, %ymm6, %ymm6;
+       vaesenclast %ymm8, %ymm7, %ymm7;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       vmovdqu %ymm4, (8 * 16)(%rdx);
+       vmovdqu %ymm5, (10 * 16)(%rdx);
+       vmovdqu %ymm6, (12 * 16)(%rdx);
+       vmovdqu %ymm7, (14 * 16)(%rdx);
+       leaq (16 * 16)(%rdx), %rdx;
+
+       jmp .Lctr32le_enc_blk16;
+
+       /* Handle trailing eight blocks. */
+.align 8
+.Lctr32le_enc_blk8:
+       cmpq $8, %r8;
+       jb .Lctr32le_enc_blk4;
+
+       leaq -8(%r8), %r8;
+
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+
+       /* Increment counters. */
+       vpaddd .Lle_addd_0 rRIP, %ymm15, %ymm0;
+       vpaddd .Lle_addd_2 rRIP, %ymm15, %ymm1;
+       vpaddd .Lle_addd_4 rRIP, %ymm15, %ymm2;
+       vpaddd .Lle_addd_6 rRIP, %ymm15, %ymm3;
+
+       vpaddd .Lle_addd_8_2 rRIP, %ymm15, %ymm15;
+
+       /* AES rounds */
+       XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lctr32le_enc_blk8_last;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lctr32le_enc_blk8_last;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lctr32le_enc_blk8_last:
+       vpxor (0 * 16)(%rcx), %ymm4, %ymm5; /* Xor src to last round key. */
+       vpxor (2 * 16)(%rcx), %ymm4, %ymm6;
+       vpxor (4 * 16)(%rcx), %ymm4, %ymm7;
+       vpxor (6 * 16)(%rcx), %ymm4, %ymm4;
+       leaq (8 * 16)(%rcx), %rcx;
+       vaesenclast %ymm5, %ymm0, %ymm0;
+       vaesenclast %ymm6, %ymm1, %ymm1;
+       vaesenclast %ymm7, %ymm2, %ymm2;
+       vaesenclast %ymm4, %ymm3, %ymm3;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       vmovdqu %ymm2, (4 * 16)(%rdx);
+       vmovdqu %ymm3, (6 * 16)(%rdx);
+       leaq (8 * 16)(%rdx), %rdx;
+
+       /* Handle trailing four blocks. */
+.align 8
+.Lctr32le_enc_blk4:
+       cmpq $4, %r8;
+       jb .Lctr32le_enc_blk1;
+
+       leaq -4(%r8), %r8;
+
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+
+       /* Increment counters. */
+       vpaddd .Lle_addd_0 rRIP, %ymm15, %ymm0;
+       vpaddd .Lle_addd_2 rRIP, %ymm15, %ymm1;
+
+       vpaddd .Lle_addd_4_2 rRIP, %ymm15, %ymm15;
+
+       /* AES rounds */
+       XOR2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+       cmpl $12, %r9d;
+       jb .Lctr32le_enc_blk4_last;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+       jz .Lctr32le_enc_blk4_last;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+       VAESENC2(%ymm4, %ymm0, %ymm1);
+       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+       /* Last round and output handling. */
+  .Lctr32le_enc_blk4_last:
+       vpxor (0 * 16)(%rcx), %ymm4, %ymm5; /* Xor src to last round key. */
+       vpxor (2 * 16)(%rcx), %ymm4, %ymm6;
+       leaq (4 * 16)(%rcx), %rcx;
+       vaesenclast %ymm5, %ymm0, %ymm0;
+       vaesenclast %ymm6, %ymm1, %ymm1;
+       vmovdqu %ymm0, (0 * 16)(%rdx);
+       vmovdqu %ymm1, (2 * 16)(%rdx);
+       leaq (4 * 16)(%rdx), %rdx;
+
+       /* Process trailing one to three blocks, one per loop. */
+.align 8
+.Lctr32le_enc_blk1:
+       cmpq $1, %r8;
+       jb .Ldone_ctr32le_enc;
+
+       leaq -1(%r8), %r8;
+
+       /* Load and increament counter. */
+       vmovdqu %xmm15, %xmm0;
+       vpaddd .Lle_addd_1 rRIP, %xmm15, %xmm15;
+
+       /* AES rounds. */
+       vpxor (0 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (1 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (2 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (3 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (4 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (5 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (6 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (7 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (8 * 16)(%rdi), %xmm0, %xmm0;
+       vaesenc (9 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (10 * 16)(%rdi), %xmm1;
+       cmpl $12, %r9d;
+       jb .Lctr32le_enc_blk1_last;
+       vaesenc %xmm1, %xmm0, %xmm0;
+       vaesenc (11 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (12 * 16)(%rdi), %xmm1;
+       jz .Lctr32le_enc_blk1_last;
+       vaesenc %xmm1, %xmm0, %xmm0;
+       vaesenc (13 * 16)(%rdi), %xmm0, %xmm0;
+       vmovdqa (14 * 16)(%rdi), %xmm1;
+
+       /* Last round and output handling. */
+  .Lctr32le_enc_blk1_last:
+       vpxor (%rcx), %xmm1, %xmm1; /* Xor src to last round key. */
+       leaq 16(%rcx), %rcx;
+       vaesenclast %xmm1, %xmm0, %xmm0; /* Last round and xor with xmm1. */
+       vmovdqu %xmm0, (%rdx);
+       leaq 16(%rdx), %rdx;
+
+       jmp .Lctr32le_enc_blk1;
+
+.align 8
+.Ldone_ctr32le_enc:
+       vmovdqu %xmm15, (%rsi);
+       vzeroall;
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size 
_gcry_vaes_avx2_ctr32le_enc_amd64,.-_gcry_vaes_avx2_ctr32le_enc_amd64)
+
+/**********************************************************************
+  OCB-mode encryption/decryption
+ **********************************************************************/
+ELF(.type _gcry_vaes_avx2_ocb_checksum,@function)
+_gcry_vaes_avx2_ocb_checksum:
+       /* input:
+        *      %rax:     offset pointer
+        *      %r10:     plaintext pointer
+        *      %r11:     nblocks
+        */
+       CFI_STARTPROC();
+
+       vpxor %xmm0, %xmm0, %xmm0;
+       cmpq $4, %r11;
+       jb .Locb_checksum_blk1;
+       vpxor %xmm1, %xmm1, %xmm1;
+       vpxor %xmm2, %xmm2, %xmm2;
+       vpxor %xmm3, %xmm3, %xmm3;
+       cmpq $16, %r11;
+       jb .Locb_checksum_blk4;
+       vpxor %xmm4, %xmm4, %xmm4;
+       vpxor %xmm5, %xmm5, %xmm5;
+       vpxor %xmm6, %xmm6, %xmm6;
+       vpxor %xmm7, %xmm7, %xmm7;
+       cmpq $32, %r11;
+       jb .Locb_checksum_blk16;
+       vpxor %xmm8, %xmm8, %xmm8;
+       vpxor %xmm9, %xmm9, %xmm9;
+       vpxor %xmm10, %xmm10, %xmm10;
+       vpxor %xmm11, %xmm11, %xmm11;
+       vpxor %xmm12, %xmm12, %xmm12;
+       vpxor %xmm13, %xmm13, %xmm13;
+       vpxor %xmm14, %xmm14, %xmm14;
+       vpxor %xmm15, %xmm15, %xmm15;
+
+.align 8
+.Locb_checksum_blk32:
+       cmpq $32, %r11;
+       jb .Locb_checksum_blk32_done;
+
+       leaq -32(%r11), %r11;
+
+       vpxor (0 * 16)(%r10), %ymm0, %ymm0;
+       vpxor (2 * 16)(%r10), %ymm1, %ymm1;
+       vpxor (4 * 16)(%r10), %ymm2, %ymm2;
+       vpxor (6 * 16)(%r10), %ymm3, %ymm3;
+       vpxor (8 * 16)(%r10), %ymm4, %ymm4;
+       vpxor (10 * 16)(%r10), %ymm5, %ymm5;
+       vpxor (12 * 16)(%r10), %ymm6, %ymm6;
+       vpxor (14 * 16)(%r10), %ymm7, %ymm7;
+       vpxor (16 * 16)(%r10), %ymm8, %ymm8;
+       vpxor (18 * 16)(%r10), %ymm9, %ymm9;
+       vpxor (20 * 16)(%r10), %ymm10, %ymm10;
+       vpxor (22 * 16)(%r10), %ymm11, %ymm11;
+       vpxor (24 * 16)(%r10), %ymm12, %ymm12;
+       vpxor (26 * 16)(%r10), %ymm13, %ymm13;
+       vpxor (28 * 16)(%r10), %ymm14, %ymm14;
+       vpxor (30 * 16)(%r10), %ymm15, %ymm15;
+       leaq (32 * 16)(%r10), %r10;
+
+       jmp .Locb_checksum_blk32;
+
+.align 8
+.Locb_checksum_blk32_done:
+       vpxor %ymm8, %ymm0, %ymm0;
+       vpxor %ymm9, %ymm1, %ymm1;
+       vpxor %ymm10, %ymm2, %ymm2;
+       vpxor %ymm11, %ymm3, %ymm3;
+       vpxor %ymm12, %ymm4, %ymm4;
+       vpxor %ymm13, %ymm5, %ymm5;
+       vpxor %ymm14, %ymm6, %ymm6;
+       vpxor %ymm15, %ymm7, %ymm7;
+
+.align 8
+.Locb_checksum_blk16:
+       cmpq $16, %r11;
+       jb .Locb_checksum_blk16_done;
+
+       leaq -16(%r11), %r11;
+
+       vpxor (0 * 16)(%r10), %ymm0, %ymm0;
+       vpxor (2 * 16)(%r10), %ymm1, %ymm1;
+       vpxor (4 * 16)(%r10), %ymm2, %ymm2;
+       vpxor (6 * 16)(%r10), %ymm3, %ymm3;
+       vpxor (8 * 16)(%r10), %ymm4, %ymm4;
+       vpxor (10 * 16)(%r10), %ymm5, %ymm5;
+       vpxor (12 * 16)(%r10), %ymm6, %ymm6;
+       vpxor (14 * 16)(%r10), %ymm7, %ymm7;
+       leaq (16 * 16)(%r10), %r10;
+
+       jmp .Locb_checksum_blk16;
+
+.align 8
+.Locb_checksum_blk16_done:
+       vpxor %ymm4, %ymm0, %ymm0;
+       vpxor %ymm5, %ymm1, %ymm1;
+       vpxor %ymm6, %ymm2, %ymm2;
+       vpxor %ymm7, %ymm3, %ymm3;
+       vextracti128 $1, %ymm0, %xmm4;
+       vextracti128 $1, %ymm1, %xmm5;
+       vextracti128 $1, %ymm2, %xmm6;
+       vextracti128 $1, %ymm3, %xmm7;
+       vpxor %xmm4, %xmm0, %xmm0;
+       vpxor %xmm5, %xmm1, %xmm1;
+       vpxor %xmm6, %xmm2, %xmm2;
+       vpxor %xmm7, %xmm3, %xmm3;
+
+.align 8
+.Locb_checksum_blk4:
+       cmpq $4, %r11;
+       jb .Locb_checksum_blk4_done;
+
+       leaq -4(%r11), %r11;
+
+       vpxor (0 * 16)(%r10), %xmm0, %xmm0;
+       vpxor (1 * 16)(%r10), %xmm1, %xmm1;
+       vpxor (2 * 16)(%r10), %xmm2, %xmm2;
+       vpxor (3 * 16)(%r10), %xmm3, %xmm3;
+       leaq (4 * 16)(%r10), %r10;
+
+       jmp .Locb_checksum_blk4;
+
+.align 8
+.Locb_checksum_blk4_done:
+       vpxor %xmm1, %xmm0, %xmm0;
+       vpxor %xmm3, %xmm2, %xmm2;
+       vpxor %xmm2, %xmm0, %xmm0;
+
+.align 8
+.Locb_checksum_blk1:
+       cmpq $1, %r11;
+       jb .Locb_checksum_done;
+
+       leaq -1(%r11), %r11;
+
+       vpxor (%r10), %xmm0, %xmm0;
+       leaq 16(%r10), %r10;
+
+       jmp .Locb_checksum_blk1;
+
+.align 8
+.Locb_checksum_done:
+       vpxor (%rax), %xmm0, %xmm0;
+       vmovdqu %xmm0, (%rax);
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_vaes_avx2_ocb_checksum,.-_gcry_vaes_avx2_ocb_checksum)
+
+ELF(.type _gcry_vaes_avx2_ocb_crypt_amd64,@function)
+.globl _gcry_vaes_avx2_ocb_crypt_amd64
+_gcry_vaes_avx2_ocb_crypt_amd64:
+       /* input:
+        *      %rdi:     round keys
+        *      %esi:     nblk
+        *      %rdx:     dst
+        *      %rcx:     src
+        *      %r8:      nblocks
+        *      %r9:      nrounds
+        *      16(%rbp): offset
+        *      24(%rbp): checksum
+        *      32(%rbp): L-array
+        *      40(%rbp): encrypt (%r15d)
+        */
+       CFI_STARTPROC();
+
+#define STACK_REGS_POS (16 * 16 + 4 * 16)
+#define STACK_ALLOC (STACK_REGS_POS + 6 * 8)
+
+       pushq %rbp;
+       CFI_PUSH(%rbp);
+       movq %rsp, %rbp;
+       CFI_DEF_CFA_REGISTER(%rbp);
+
+       subq $STACK_ALLOC, %rsp;
+       andq $~63, %rsp;
+
+       movq %r12, (STACK_REGS_POS + 0 * 8)(%rsp);
+       CFI_REG_ON_STACK(r12, STACK_REGS_POS + 0 * 8);
+       movq %r13, (STACK_REGS_POS + 1 * 8)(%rsp);
+       CFI_REG_ON_STACK(r13, STACK_REGS_POS + 1 * 8);
+       movq %r14, (STACK_REGS_POS + 2 * 8)(%rsp);
+       CFI_REG_ON_STACK(r14, STACK_REGS_POS + 2 * 8);
+       movq %r15, (STACK_REGS_POS + 3 * 8)(%rsp);
+       CFI_REG_ON_STACK(r15, STACK_REGS_POS + 3 * 8);
+
+       movl 40(%rbp), %r15d; /* encrypt-flag. */
+       movq 16(%rbp), %r14; /* offset ptr. */
+
+       /* Handle encryption checksumming. */
+       testl %r15d, %r15d;
+       jz .Locb_dec_checksum_prepare;
+       movq 24(%rbp), %rax; /* checksum ptr. */
+       movq %rcx, %r10;
+       movq %r8, %r11;
+       call _gcry_vaes_avx2_ocb_checksum;
+       jmp .Locb_enc_checksum_done;
+.Locb_dec_checksum_prepare:
+       /* Store plaintext address and number of blocks for decryption
+        * checksumming. */
+       movq %rdx, (STACK_REGS_POS + 4 * 8)(%rsp);
+       movq %r8, (STACK_REGS_POS + 5 * 8)(%rsp);
+.Locb_enc_checksum_done:
+
+       vmovdqu (%r14), %xmm15; /* Load offset. */
+       movq 32(%rbp), %r14; /* L-array ptr. */
+       vmovdqa (0 * 16)(%rdi), %xmm0; /* first key */
+       movl $(10 * 16), %eax;
+       cmpl $12, %r9d;
+       jb .Llast_key_ptr;
+       movl $(12 * 16), %eax;
+       je .Llast_key_ptr;
+       movl $(14 * 16), %eax;
+  .align 8
+  .Llast_key_ptr:
+       vpxor (%rdi, %rax), %xmm0, %xmm0; /* first key ^ last key */
+       vpxor (0 * 16)(%rdi), %xmm15, %xmm15; /* offset ^ first key */
+       vmovdqa %xmm0, (14 * 16)(%rsp);
+       vmovdqa %xmm0, (15 * 16)(%rsp);
+
+.align 8
+.Lhandle_unaligned_ocb:
+       /* Get number of blocks to align nblk to 16 (and L-array optimization). 
*/
+       movl %esi, %r10d;
+       negl %r10d;
+       andl $15, %r10d;
+       cmpq %r8, %r10;
+       cmovaq %r8, %r10;
+       cmpq $1, %r10;
+       jb .Lunaligned_ocb_done;
+
+       /* Number of blocks after alignment. */
+       movq %r8, %r11;
+       subq %r10, %r11;
+
+       /* If number after alignment is less than 16, skip aligned handling
+        * completely. */
+       cmp $16, %r11;
+       cmovbq %r8, %r10;
+
+       /* Unaligned: Process eight blocks per loop. */
+.align 8
+.Locb_unaligned_blk8:
+       cmpq $8, %r10;
+       jb .Locb_unaligned_blk4;
+
+       leaq -8(%r8), %r8;
+       leaq -8(%r10), %r10;
+
+       leal 1(%esi), %r11d;
+       leal 2(%esi), %r12d;
+       leal 3(%esi), %r13d;
+       leal 4(%esi), %eax;
+       tzcntl %r11d, %r11d;
+       tzcntl %r12d, %r12d;
+       tzcntl %r13d, %r13d;
+       tzcntl %eax, %eax;
+       shll $4, %r11d;
+       shll $4, %r12d;
+       shll $4, %r13d;
+       shll $4, %eax;
+       vpxor (%r14, %r11), %xmm15, %xmm5;
+       vpxor (%r14, %r12), %xmm5, %xmm6;
+       vpxor (%r14, %r13), %xmm6, %xmm7;
+       vpxor (%r14, %rax), %xmm7, %xmm8;
+
+       leal 5(%esi), %r11d;
+       leal 6(%esi), %r12d;
+       leal 7(%esi), %r13d;
+       leal 8(%esi), %esi;
+       tzcntl %r11d, %r11d;
+       tzcntl %r12d, %r12d;
+       tzcntl %r13d, %r13d;
+       tzcntl %esi, %eax;
+       shll $4, %r11d;
+       shll $4, %r12d;
+       shll $4, %r13d;
+       shll $4, %eax;
+       vpxor (%r14, %r11), %xmm8, %xmm9;
+       vpxor (%r14, %r12), %xmm9, %xmm10;
+       vpxor (%r14, %r13), %xmm10, %xmm11;
+       vpxor (%r14, %rax), %xmm11, %xmm15;
+
+       vinserti128 $1, %xmm6, %ymm5, %ymm5;
+       vinserti128 $1, %xmm8, %ymm7, %ymm6;
+       vinserti128 $1, %xmm10, %ymm9, %ymm7;
+       vinserti128 $1, %xmm15, %ymm11, %ymm8;
+
+       vpxor (0 * 16)(%rcx), %ymm5, %ymm0;
+       vpxor (2 * 16)(%rcx), %ymm6, %ymm1;
+       vpxor (4 * 16)(%rcx), %ymm7, %ymm2;
+       vpxor (6 * 16)(%rcx), %ymm8, %ymm3;
+       leaq (8 * 16)(%rcx), %rcx;
+
+       vmovdqa (14 * 16)(%rsp), %ymm9;
+
+       testl %r15d, %r15d;
+       jz .Locb_unaligned_blk8_dec;
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               cmpl $12, %r9d;
+               jb .Locb_unaligned_blk8_enc_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               jz .Locb_unaligned_blk8_enc_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+
+               /* Last round and output handling. */
+       .Locb_unaligned_blk8_enc_last:
+               vpxor %ymm5, %ymm9, %ymm5; /* Xor src to last round key. */
+               vpxor %ymm6, %ymm9, %ymm6;
+               vpxor %ymm7, %ymm9, %ymm7;
+               vpxor %ymm8, %ymm9, %ymm4;
+               vaesenclast %ymm5, %ymm0, %ymm0;
+               vaesenclast %ymm6, %ymm1, %ymm1;
+               vaesenclast %ymm7, %ymm2, %ymm2;
+               vaesenclast %ymm4, %ymm3, %ymm3;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               vmovdqu %ymm2, (4 * 16)(%rdx);
+               vmovdqu %ymm3, (6 * 16)(%rdx);
+               leaq (8 * 16)(%rdx), %rdx;
+
+               jmp .Locb_unaligned_blk8;
+
+       .align 8
+       .Locb_unaligned_blk8_dec:
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               cmpl $12, %r9d;
+               jb .Locb_unaligned_blk8_dec_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               jz .Locb_unaligned_blk8_dec_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+
+               /* Last round and output handling. */
+       .Locb_unaligned_blk8_dec_last:
+               vpxor %ymm5, %ymm9, %ymm5; /* Xor src to last round key. */
+               vpxor %ymm6, %ymm9, %ymm6;
+               vpxor %ymm7, %ymm9, %ymm7;
+               vpxor %ymm8, %ymm9, %ymm4;
+               vaesdeclast %ymm5, %ymm0, %ymm0;
+               vaesdeclast %ymm6, %ymm1, %ymm1;
+               vaesdeclast %ymm7, %ymm2, %ymm2;
+               vaesdeclast %ymm4, %ymm3, %ymm3;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               vmovdqu %ymm2, (4 * 16)(%rdx);
+               vmovdqu %ymm3, (6 * 16)(%rdx);
+               leaq (8 * 16)(%rdx), %rdx;
+
+               jmp .Locb_unaligned_blk8;
+
+       /* Unaligned: Process four blocks. */
+.align 8
+.Locb_unaligned_blk4:
+       cmpq $4, %r10;
+       jb .Locb_unaligned_blk1;
+
+       leaq -4(%r8), %r8;
+       leaq -4(%r10), %r10;
+
+       leal 1(%esi), %r11d;
+       leal 2(%esi), %r12d;
+       leal 3(%esi), %r13d;
+       leal 4(%esi), %esi;
+       tzcntl %r11d, %r11d;
+       tzcntl %r12d, %r12d;
+       tzcntl %r13d, %r13d;
+       tzcntl %esi, %eax;
+       shll $4, %r11d;
+       shll $4, %r12d;
+       shll $4, %r13d;
+       shll $4, %eax;
+
+       vpxor (%r14, %r11), %xmm15, %xmm5;
+       vpxor (%r14, %r12), %xmm5, %xmm6;
+       vinserti128 $1, %xmm6, %ymm5, %ymm5;
+       vpxor (%r14, %r13), %xmm6, %xmm7;
+       vpxor (%r14, %rax), %xmm7, %xmm15;
+       vinserti128 $1, %xmm15, %ymm7, %ymm6;
+
+       vpxor (0 * 16)(%rcx), %ymm5, %ymm0;
+       vpxor (2 * 16)(%rcx), %ymm6, %ymm1;
+       leaq (4 * 16)(%rcx), %rcx;
+
+       testl %r15d, %r15d;
+       jz .Locb_unaligned_blk4_dec;
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               cmpl $12, %r9d;
+               jb .Locb_unaligned_blk4_enc_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               jz .Locb_unaligned_blk4_enc_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+
+             /* Last round and output handling. */
+       .Locb_unaligned_blk4_enc_last:
+               vmovdqa (14 * 16)(%rsp), %ymm8;
+               vpxor %ymm5, %ymm8, %ymm5; /* Xor src to last round key. */
+               vpxor %ymm6, %ymm8, %ymm6;
+               vaesenclast %ymm5, %ymm0, %ymm0;
+               vaesenclast %ymm6, %ymm1, %ymm1;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               leaq (4 * 16)(%rdx), %rdx;
+
+               jmp .Locb_unaligned_blk1;
+
+       .align 8
+       .Locb_unaligned_blk4_dec:
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               cmpl $12, %r9d;
+               jb .Locb_unaligned_blk4_dec_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               jz .Locb_unaligned_blk4_dec_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+
+             /* Last round and output handling. */
+       .Locb_unaligned_blk4_dec_last:
+               vmovdqa (14 * 16)(%rsp), %ymm8;
+               vpxor %ymm5, %ymm8, %ymm5; /* Xor src to last round key. */
+               vpxor %ymm6, %ymm8, %ymm6;
+               vaesdeclast %ymm5, %ymm0, %ymm0;
+               vaesdeclast %ymm6, %ymm1, %ymm1;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               leaq (4 * 16)(%rdx), %rdx;
+
+       /* Unaligned: Process one block per loop. */
+.align 8
+.Locb_unaligned_blk1:
+       cmpq $1, %r10;
+       jb .Lunaligned_ocb_done;
+
+       leaq -1(%r8), %r8;
+       leaq -1(%r10), %r10;
+
+       leal 1(%esi), %esi;
+       tzcntl %esi, %r11d;
+       shll $4, %r11d;
+       vpxor (%r14, %r11), %xmm15, %xmm15;
+       vpxor (%rcx), %xmm15, %xmm0;
+       leaq 16(%rcx), %rcx;
+
+       testl %r15d, %r15d;
+       jz .Locb_unaligned_blk1_dec;
+               /* AES rounds. */
+               vaesenc (1 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (2 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (3 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (4 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (5 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (6 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (7 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (8 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (9 * 16)(%rdi), %xmm0, %xmm0;
+               cmpl $12, %r9d;
+               jb .Locb_unaligned_blk1_enc_last;
+               vaesenc (10 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (11 * 16)(%rdi), %xmm0, %xmm0;
+               jz .Locb_unaligned_blk1_enc_last;
+               vaesenc (12 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (13 * 16)(%rdi), %xmm0, %xmm0;
+
+               /* Last round and output handling. */
+       .Locb_unaligned_blk1_enc_last:
+               vpxor (14 * 16)(%rsp), %xmm15, %xmm1;
+               vaesenclast %xmm1, %xmm0, %xmm0;
+               vmovdqu %xmm0, (%rdx);
+               leaq 16(%rdx), %rdx;
+
+               jmp .Locb_unaligned_blk1;
+
+       .align 8
+       .Locb_unaligned_blk1_dec:
+               /* AES rounds. */
+               vaesdec (1 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (2 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (3 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (4 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (5 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (6 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (7 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (8 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (9 * 16)(%rdi), %xmm0, %xmm0;
+               cmpl $12, %r9d;
+               jb .Locb_unaligned_blk1_dec_last;
+               vaesdec (10 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (11 * 16)(%rdi), %xmm0, %xmm0;
+               jz .Locb_unaligned_blk1_dec_last;
+               vaesdec (12 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (13 * 16)(%rdi), %xmm0, %xmm0;
+
+               /* Last round and output handling. */
+       .Locb_unaligned_blk1_dec_last:
+               vpxor (14 * 16)(%rsp), %xmm15, %xmm1;
+               vaesdeclast %xmm1, %xmm0, %xmm0;
+               vmovdqu %xmm0, (%rdx);
+               leaq 16(%rdx), %rdx;
+
+               jmp .Locb_unaligned_blk1;
+
+.align 8
+.Lunaligned_ocb_done:
+       cmpq $1, %r8;
+       jb .Ldone_ocb;
+
+       /* Short buffers do not benefit from L-array optimization. */
+       movq %r8, %r10;
+       cmpq $16, %r8;
+       jb .Locb_unaligned_blk8;
+
+       vinserti128 $1, %xmm15, %ymm15, %ymm15;
+
+       /* Prepare L-array optimization.
+        * Since nblk is aligned to 16, offsets will have following
+        * construction:
+        *  - block1 = ntz{0} = offset ^ L[0]
+        *  - block2 = ntz{1} = offset ^ L[0] ^ L[1]
+        *  - block3 = ntz{0} = offset ^ L[1]
+        *  - block4 = ntz{2} = offset ^ L[1] ^ L[2]
+        *  - block5 = ntz{0} = offset ^ L[0] ^ L[1] ^ L[2]
+        *  - block6 = ntz{1} = offset ^ L[0] ^ L[2]
+        *  - block7 = ntz{0} = offset ^ L[2]
+        *  - block8 = ntz{3} = offset ^ L[2] ^ L[3]
+        *  - block9 = ntz{0} = offset ^ L[0] ^ L[2] ^ L[3]
+        *  - block10 = ntz{1} = offset ^ L[0] ^ L[1] ^ L[2] ^ L[3]
+        *  - block11 = ntz{0} = offset ^ L[1] ^ L[2] ^ L[3]
+        *  - block12 = ntz{2} = offset ^ L[1] ^ L[3]
+        *  - block13 = ntz{0} = offset ^ L[0] ^ L[1] ^ L[3]
+        *  - block14 = ntz{1} = offset ^ L[0] ^ L[3]
+        *  - block15 = ntz{0} = offset ^ L[3]
+        *  - block16 = ntz{x} = offset ^ L[3] ^ L[ntz{x}]
+        */
+       vmovdqu (0 * 16)(%r14), %xmm0;
+       vmovdqu (1 * 16)(%r14), %xmm1;
+       vmovdqu (2 * 16)(%r14), %xmm2;
+       vmovdqu (3 * 16)(%r14), %xmm3;
+       vpxor %xmm0, %xmm1, %xmm4; /* L[0] ^ L[1] */
+       vpxor %xmm0, %xmm2, %xmm5; /* L[0] ^ L[2] */
+       vpxor %xmm0, %xmm3, %xmm6; /* L[0] ^ L[3] */
+       vpxor %xmm1, %xmm2, %xmm7; /* L[1] ^ L[2] */
+       vpxor %xmm1, %xmm3, %xmm8; /* L[1] ^ L[3] */
+       vpxor %xmm2, %xmm3, %xmm9; /* L[2] ^ L[3] */
+       vpxor %xmm4, %xmm2, %xmm10; /* L[0] ^ L[1] ^ L[2] */
+       vpxor %xmm5, %xmm3, %xmm11; /* L[0] ^ L[2] ^ L[3] */
+       vpxor %xmm7, %xmm3, %xmm12; /* L[1] ^ L[2] ^ L[3] */
+       vpxor %xmm0, %xmm8, %xmm13; /* L[0] ^ L[1] ^ L[3] */
+       vpxor %xmm4, %xmm9, %xmm14; /* L[0] ^ L[1] ^ L[2] ^ L[3] */
+       vinserti128 $1, %xmm4, %ymm0, %ymm0;
+       vinserti128 $1, %xmm7, %ymm1, %ymm1;
+       vinserti128 $1, %xmm5, %ymm10, %ymm10;
+       vinserti128 $1, %xmm9, %ymm2, %ymm2;
+       vinserti128 $1, %xmm14, %ymm11, %ymm11;
+       vinserti128 $1, %xmm8, %ymm12, %ymm12;
+       vinserti128 $1, %xmm6, %ymm13, %ymm13;
+       vmovdqa %ymm0,  (0 * 16)(%rsp);
+       vmovdqa %ymm1,  (2 * 16)(%rsp);
+       vmovdqa %ymm10, (4 * 16)(%rsp);
+       vmovdqa %ymm2,  (6 * 16)(%rsp);
+       vmovdqa %ymm11, (8 * 16)(%rsp);
+       vmovdqa %ymm12, (10 * 16)(%rsp);
+       vmovdqa %ymm13, (12 * 16)(%rsp);
+
+       /* Aligned: Process 16 blocks per loop. */
+.align 8
+.Locb_aligned_blk16:
+       cmpq $16, %r8;
+       jb .Locb_aligned_blk8;
+
+       leaq -16(%r8), %r8;
+
+       leal 16(%esi), %esi;
+       tzcntl %esi, %eax;
+       shll $4, %eax;
+
+       vpxor (0 * 16)(%rsp), %ymm15, %ymm8;
+       vpxor (2 * 16)(%rsp), %ymm15, %ymm9;
+       vpxor (4 * 16)(%rsp), %ymm15, %ymm10;
+       vpxor (6 * 16)(%rsp), %ymm15, %ymm11;
+       vpxor (8 * 16)(%rsp), %ymm15, %ymm12;
+
+       vpxor (3 * 16)(%r14), %xmm15, %xmm13; /* offset ^ first key ^ L[3] */
+       vpxor (%r14, %rax), %xmm13, %xmm14; /* offset ^ first key ^ L[3] ^ 
L[ntz{nblk+16}] */
+       vinserti128 $1, %xmm14, %ymm13, %ymm14;
+
+       vpxor (10 * 16)(%rsp), %ymm15, %ymm13;
+       vpxor (14 * 16)(%rcx), %ymm14, %ymm7;
+
+       vpxor (0 * 16)(%rcx), %ymm8, %ymm0;
+       vpxor (2 * 16)(%rcx), %ymm9, %ymm1;
+       vpxor (4 * 16)(%rcx), %ymm10, %ymm2;
+       vpxor (6 * 16)(%rcx), %ymm11, %ymm3;
+       vpxor (8 * 16)(%rcx), %ymm12, %ymm4;
+       vpxor (10 * 16)(%rcx), %ymm13, %ymm5;
+       vmovdqa %ymm13, (16 * 16)(%rsp);
+       vpxor (12 * 16)(%rsp), %ymm15, %ymm13;
+       vpxor (12 * 16)(%rcx), %ymm13, %ymm6;
+       vmovdqa %ymm13, (18 * 16)(%rsp);
+
+       leaq (16 * 16)(%rcx), %rcx;
+
+       vperm2i128 $0x11, %ymm14, %ymm14, %ymm15;
+
+       testl %r15d, %r15d;
+       jz .Locb_aligned_blk16_dec;
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               cmpl $12, %r9d;
+               jb .Locb_aligned_blk16_enc_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               jz .Locb_aligned_blk16_enc_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm13;
+               VAESENC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+
+               /* Last round and output handling. */
+       .Locb_aligned_blk16_enc_last:
+               vmovdqa (14 * 16)(%rsp), %ymm13;
+               vpxor %ymm8, %ymm13, %ymm8;
+               vpxor %ymm9, %ymm13, %ymm9;
+               vpxor %ymm10, %ymm13, %ymm10;
+               vpxor %ymm11, %ymm13, %ymm11;
+               vaesenclast %ymm8, %ymm0, %ymm0;
+               vaesenclast %ymm9, %ymm1, %ymm1;
+               vaesenclast %ymm10, %ymm2, %ymm2;
+               vaesenclast %ymm11, %ymm3, %ymm3;
+               vpxor %ymm12, %ymm13, %ymm12;
+               vpxor (16 * 16)(%rsp), %ymm13, %ymm8;
+               vpxor (18 * 16)(%rsp), %ymm13, %ymm9;
+               vpxor %ymm14, %ymm13, %ymm13;
+               vaesenclast %ymm12, %ymm4, %ymm4;
+               vaesenclast %ymm8, %ymm5, %ymm5;
+               vaesenclast %ymm9, %ymm6, %ymm6;
+               vaesenclast %ymm13, %ymm7, %ymm7;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               vmovdqu %ymm2, (4 * 16)(%rdx);
+               vmovdqu %ymm3, (6 * 16)(%rdx);
+               vmovdqu %ymm4, (8 * 16)(%rdx);
+               vmovdqu %ymm5, (10 * 16)(%rdx);
+               vmovdqu %ymm6, (12 * 16)(%rdx);
+               vmovdqu %ymm7, (14 * 16)(%rdx);
+               leaq (16 * 16)(%rdx), %rdx;
+
+               jmp .Locb_aligned_blk16;
+
+       .align 8
+       .Locb_aligned_blk16_dec:
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               cmpl $12, %r9d;
+               jb .Locb_aligned_blk16_dec_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               jz .Locb_aligned_blk16_dec_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm13;
+               VAESDEC8(%ymm13, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, 
%ymm6, %ymm7);
+
+               /* Last round and output handling. */
+       .Locb_aligned_blk16_dec_last:
+               vmovdqa (14 * 16)(%rsp), %ymm13;
+               vpxor %ymm8, %ymm13, %ymm8;
+               vpxor %ymm9, %ymm13, %ymm9;
+               vpxor %ymm10, %ymm13, %ymm10;
+               vpxor %ymm11, %ymm13, %ymm11;
+               vaesdeclast %ymm8, %ymm0, %ymm0;
+               vaesdeclast %ymm9, %ymm1, %ymm1;
+               vaesdeclast %ymm10, %ymm2, %ymm2;
+               vaesdeclast %ymm11, %ymm3, %ymm3;
+               vpxor %ymm12, %ymm13, %ymm12;
+               vpxor (16 * 16)(%rsp), %ymm13, %ymm8;
+               vpxor (18 * 16)(%rsp), %ymm13, %ymm9;
+               vpxor %ymm14, %ymm13, %ymm13;
+               vaesdeclast %ymm12, %ymm4, %ymm4;
+               vaesdeclast %ymm8, %ymm5, %ymm5;
+               vaesdeclast %ymm9, %ymm6, %ymm6;
+               vaesdeclast %ymm13, %ymm7, %ymm7;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               vmovdqu %ymm2, (4 * 16)(%rdx);
+               vmovdqu %ymm3, (6 * 16)(%rdx);
+               vmovdqu %ymm4, (8 * 16)(%rdx);
+               vmovdqu %ymm5, (10 * 16)(%rdx);
+               vmovdqu %ymm6, (12 * 16)(%rdx);
+               vmovdqu %ymm7, (14 * 16)(%rdx);
+               leaq (16 * 16)(%rdx), %rdx;
+
+               jmp .Locb_aligned_blk16;
+
+       /* Aligned: Process trailing eight blocks. */
+.align 8
+.Locb_aligned_blk8:
+       cmpq $8, %r8;
+       jb .Locb_aligned_done;
+
+       leaq -8(%r8), %r8;
+
+       leal 8(%esi), %esi;
+       tzcntl %esi, %eax;
+       shll $4, %eax;
+
+       vpxor (0 * 16)(%rsp), %ymm15, %ymm5;
+       vpxor (2 * 16)(%rsp), %ymm15, %ymm6;
+       vpxor (4 * 16)(%rsp), %ymm15, %ymm7;
+
+       vpxor (2 * 16)(%r14), %xmm15, %xmm13; /* offset ^ first key ^ L[2] */
+       vpxor (%r14, %rax), %xmm13, %xmm14; /* offset ^ first key ^ L[2] ^ 
L[ntz{nblk+8}] */
+       vinserti128 $1, %xmm14, %ymm13, %ymm14;
+
+       vpxor (0 * 16)(%rcx), %ymm5, %ymm0;
+       vpxor (2 * 16)(%rcx), %ymm6, %ymm1;
+       vpxor (4 * 16)(%rcx), %ymm7, %ymm2;
+       vpxor (6 * 16)(%rcx), %ymm14, %ymm3;
+       leaq (8 * 16)(%rcx), %rcx;
+
+       vperm2i128 $0x11, %ymm14, %ymm14, %ymm15;
+
+       vmovdqa (14 * 16)(%rsp), %ymm8;
+
+       testl %r15d, %r15d;
+       jz .Locb_aligned_blk8_dec;
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               cmpl $12, %r9d;
+               jb .Locb_aligned_blk8_enc_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               jz .Locb_aligned_blk8_enc_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+
+               /* Last round and output handling. */
+       .Locb_aligned_blk8_enc_last:
+               vpxor %ymm5, %ymm8, %ymm5;
+               vpxor %ymm6, %ymm8, %ymm6;
+               vpxor %ymm7, %ymm8, %ymm7;
+               vpxor %ymm14, %ymm8, %ymm4;
+               vaesenclast %ymm5, %ymm0, %ymm0;
+               vaesenclast %ymm6, %ymm1, %ymm1;
+               vaesenclast %ymm7, %ymm2, %ymm2;
+               vaesenclast %ymm4, %ymm3, %ymm3;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               vmovdqu %ymm2, (4 * 16)(%rdx);
+               vmovdqu %ymm3, (6 * 16)(%rdx);
+               leaq (8 * 16)(%rdx), %rdx;
+
+               jmp .Locb_aligned_done;
+
+       .align 8
+       .Locb_aligned_blk8_dec:
+               /* AES rounds */
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               cmpl $12, %r9d;
+               jb .Locb_aligned_blk8_dec_last;
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               jz .Locb_aligned_blk8_dec_last;
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+               vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+               /* Last round and output handling. */
+       .Locb_aligned_blk8_dec_last:
+               vpxor %ymm5, %ymm8, %ymm5;
+               vpxor %ymm6, %ymm8, %ymm6;
+               vpxor %ymm7, %ymm8, %ymm7;
+               vpxor %ymm14, %ymm8, %ymm4;
+               vaesdeclast %ymm5, %ymm0, %ymm0;
+               vaesdeclast %ymm6, %ymm1, %ymm1;
+               vaesdeclast %ymm7, %ymm2, %ymm2;
+               vaesdeclast %ymm4, %ymm3, %ymm3;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               vmovdqu %ymm2, (4 * 16)(%rdx);
+               vmovdqu %ymm3, (6 * 16)(%rdx);
+               leaq (8 * 16)(%rdx), %rdx;
+
+.align 8
+.Locb_aligned_done:
+       /* Burn stack. */
+       vpxor %ymm0, %ymm0, %ymm0;
+       vmovdqa %ymm0, (0 * 16)(%rsp);
+       vmovdqa %ymm0, (2 * 16)(%rsp);
+       vmovdqa %ymm0, (4 * 16)(%rsp);
+       vmovdqa %ymm0, (6 * 16)(%rsp);
+       vmovdqa %ymm0, (8 * 16)(%rsp);
+       vmovdqa %ymm0, (10 * 16)(%rsp);
+       vmovdqa %ymm0, (12 * 16)(%rsp);
+       vmovdqa %ymm0, (16 * 16)(%rsp);
+       vmovdqa %ymm0, (18 * 16)(%rsp);
+
+       /* Handle tailing 1…7 blocks in nblk-unaligned loop. */
+       movq %r8, %r10;
+       cmpq $1, %r8;
+       jnb .Locb_unaligned_blk8;
+
+.align 8
+.Ldone_ocb:
+       movq 16(%rbp), %r14; /* offset ptr. */
+       vpxor (0 * 16)(%rdi), %xmm15, %xmm15; /* offset ^ first key ^ first key 
*/
+       vmovdqu %xmm15, (%r14); /* Store offset. */
+
+       /* Handle decryption checksumming. */
+
+       testl %r15d, %r15d;
+       jnz .Locb_dec_checksum_done;
+       movq 24(%rbp), %rax; /* checksum ptr. */
+       movq (STACK_REGS_POS + 4 * 8)(%rsp), %r10;
+       movq (STACK_REGS_POS + 5 * 8)(%rsp), %r11;
+       call _gcry_vaes_avx2_ocb_checksum;
+.Locb_dec_checksum_done:
+
+       /* Burn stack. */
+       vpxor %ymm0, %ymm0, %ymm0;
+       vmovdqa %ymm0, (14 * 16)(%rsp);
+
+       vzeroall;
+
+       movq (STACK_REGS_POS + 0 * 8)(%rsp), %r12;
+       CFI_RESTORE(%r12);
+       movq (STACK_REGS_POS + 1 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r13);
+       movq (STACK_REGS_POS + 2 * 8)(%rsp), %r14;
+       CFI_RESTORE(%r14);
+       movq (STACK_REGS_POS + 3 * 8)(%rsp), %r15;
+       CFI_RESTORE(%r15);
+
+       leave;
+       CFI_LEAVE();
+       ret_spec_stop
+
+#undef STACK_REGS_POS
+#undef STACK_ALLOC
+
+       CFI_ENDPROC();
+ELF(.size _gcry_vaes_avx2_ocb_crypt_amd64,.-_gcry_vaes_avx2_ocb_crypt_amd64)
+
+/**********************************************************************
+  CTR-mode encryption
+ **********************************************************************/
+ELF(.type _gcry_vaes_avx2_xts_crypt_amd64,@function)
+.globl _gcry_vaes_avx2_xts_crypt_amd64
+_gcry_vaes_avx2_xts_crypt_amd64:
+       /* input:
+        *      %rdi: round keys
+        *      %rsi: tweak
+        *      %rdx: dst
+        *      %rcx: src
+        *      %r8:  nblocks
+        *      %r9:  nrounds
+        *      8(%rsp): encrypt
+        */
+       CFI_STARTPROC();
+
+       movl 8(%rsp), %eax;
+
+#define tweak_clmul(shift, out, tweak, hi_tweak, tmp1, tmp2) \
+       vpsrld $(32-(shift)), hi_tweak, tmp2; \
+       vpsllq $(shift), tweak, out; \
+       vpclmulqdq $0, .Lxts_gfmul_clmul rRIP, tmp2, tmp1; \
+       vpunpckhqdq tmp2, tmp1, tmp1; \
+       vpxor tmp1, out, out;
+
+       /* Prepare tweak. */
+       vmovdqu (%rsi), %xmm15;
+       vpshufb .Lxts_high_bit_shuf rRIP, %xmm15, %xmm13;
+       tweak_clmul(1, %xmm11, %xmm15, %xmm13, %xmm0, %xmm1);
+       vinserti128 $1, %xmm11, %ymm15, %ymm15; /* tweak:tweak1 */
+       vpshufb .Lxts_high_bit_shuf rRIP, %ymm15, %ymm13;
+
+       cmpq $8, %r8;
+       jb .Lxts_crypt_blk4;
+
+       /* Process eight blocks per loop. */
+       leaq -8(%r8), %r8;
+
+       vmovdqa %ymm15, %ymm5;
+       tweak_clmul(2, %ymm6, %ymm15, %ymm13, %ymm0, %ymm1);
+       tweak_clmul(4, %ymm7, %ymm15, %ymm13, %ymm0, %ymm1);
+       tweak_clmul(6, %ymm8, %ymm15, %ymm13, %ymm0, %ymm1);
+       tweak_clmul(8, %ymm15, %ymm15, %ymm13, %ymm0, %ymm1);
+       vpshufb .Lxts_high_bit_shuf rRIP, %ymm15, %ymm13;
+
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+       vpxor (0 * 16)(%rcx), %ymm5, %ymm0;
+       vpxor (2 * 16)(%rcx), %ymm6, %ymm1;
+       vpxor (4 * 16)(%rcx), %ymm7, %ymm2;
+       vpxor (6 * 16)(%rcx), %ymm8, %ymm3;
+
+       leaq (8 * 16)(%rcx), %rcx;
+
+.align 8
+.Lxts_crypt_blk8_loop:
+       cmpq $8, %r8;
+       jb .Lxts_crypt_blk8_tail;
+               leaq -8(%r8), %r8;
+
+               testl %eax, %eax;
+               jz .Lxts_dec_blk8;
+                       /* AES rounds */
+                       XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                         vmovdqa %ymm15, %ymm9;
+                         tweak_clmul(2, %ymm10, %ymm15, %ymm13, %ymm12, 
%ymm14);
+                         tweak_clmul(4, %ymm11, %ymm15, %ymm13, %ymm12, 
%ymm14);
+                       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+                       cmpl $12, %r9d;
+                       jb .Lxts_enc_blk8_last;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+                       jz .Lxts_enc_blk8_last;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+                       /* Last round and output handling. */
+               .Lxts_enc_blk8_last:
+                       vpxor %ymm4, %ymm5, %ymm5; /* Xor tweak to last round 
key. */
+                       vpxor %ymm4, %ymm6, %ymm6;
+                       vpxor %ymm4, %ymm7, %ymm7;
+                       vpxor %ymm4, %ymm8, %ymm4;
+                         tweak_clmul(6, %ymm8, %ymm15, %ymm13, %ymm12, %ymm14);
+                         tweak_clmul(8, %ymm15, %ymm15, %ymm13, %ymm12, 
%ymm14);
+                       vpshufb .Lxts_high_bit_shuf rRIP, %ymm15, %ymm13;
+                       vaesenclast %ymm5, %ymm0, %ymm0;
+                       vaesenclast %ymm6, %ymm1, %ymm1;
+                       vaesenclast %ymm7, %ymm2, %ymm2;
+                       vaesenclast %ymm4, %ymm3, %ymm3;
+
+                       vmovdqu %ymm0, (0 * 16)(%rdx);
+                       vmovdqu %ymm1, (2 * 16)(%rdx);
+                       vmovdqu %ymm2, (4 * 16)(%rdx);
+                       vmovdqu %ymm3, (6 * 16)(%rdx);
+                       leaq (8 * 16)(%rdx), %rdx;
+
+                       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+                       vpxor (0 * 16)(%rcx), %ymm9, %ymm0;
+                       vpxor (2 * 16)(%rcx), %ymm10, %ymm1;
+                       vpxor (4 * 16)(%rcx), %ymm11, %ymm2;
+                       vpxor (6 * 16)(%rcx), %ymm8, %ymm3;
+
+                         vmovdqa %ymm9, %ymm5;
+                         vmovdqa %ymm10, %ymm6;
+                         vmovdqa %ymm11, %ymm7;
+
+                       leaq (8 * 16)(%rcx), %rcx;
+
+                       jmp .Lxts_crypt_blk8_loop;
+
+               .align 8
+               .Lxts_dec_blk8:
+                       /* AES rounds */
+                       XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                         vmovdqa %ymm15, %ymm9;
+                         tweak_clmul(2, %ymm10, %ymm15, %ymm13, %ymm12, 
%ymm14);
+                         tweak_clmul(4, %ymm11, %ymm15, %ymm13, %ymm12, 
%ymm14);
+                       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+                       cmpl $12, %r9d;
+                       jb .Lxts_dec_blk8_last;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+                       jz .Lxts_dec_blk8_last;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+                       /* Last round and output handling. */
+               .Lxts_dec_blk8_last:
+                       vpxor %ymm4, %ymm5, %ymm5; /* Xor tweak to last round 
key. */
+                       vpxor %ymm4, %ymm6, %ymm6;
+                       vpxor %ymm4, %ymm7, %ymm7;
+                       vpxor %ymm4, %ymm8, %ymm4;
+                         tweak_clmul(6, %ymm8, %ymm15, %ymm13, %ymm12, %ymm14);
+                         tweak_clmul(8, %ymm15, %ymm15, %ymm13, %ymm12, 
%ymm14);
+                       vpshufb .Lxts_high_bit_shuf rRIP, %ymm15, %ymm13;
+                       vaesdeclast %ymm5, %ymm0, %ymm0;
+                       vaesdeclast %ymm6, %ymm1, %ymm1;
+                       vaesdeclast %ymm7, %ymm2, %ymm2;
+                       vaesdeclast %ymm4, %ymm3, %ymm3;
+
+                       vmovdqu %ymm0, (0 * 16)(%rdx);
+                       vmovdqu %ymm1, (2 * 16)(%rdx);
+                       vmovdqu %ymm2, (4 * 16)(%rdx);
+                       vmovdqu %ymm3, (6 * 16)(%rdx);
+                       leaq (8 * 16)(%rdx), %rdx;
+
+                       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+                       vpxor (0 * 16)(%rcx), %ymm9, %ymm0;
+                       vpxor (2 * 16)(%rcx), %ymm10, %ymm1;
+                       vpxor (4 * 16)(%rcx), %ymm11, %ymm2;
+                       vpxor (6 * 16)(%rcx), %ymm8, %ymm3;
+
+                         vmovdqa %ymm9, %ymm5;
+                         vmovdqa %ymm10, %ymm6;
+                         vmovdqa %ymm11, %ymm7;
+
+                       leaq (8 * 16)(%rcx), %rcx;
+
+                       jmp .Lxts_crypt_blk8_loop;
+
+       .align 8
+       .Lxts_crypt_blk8_tail:
+               testl %eax, %eax;
+               jz .Lxts_dec_tail_blk8;
+                       /* AES rounds */
+                       XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+                       cmpl $12, %r9d;
+                       jb .Lxts_enc_blk8_tail_last;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+                       jz .Lxts_enc_blk8_tail_last;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+                       VAESENC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+                       /* Last round and output handling. */
+               .Lxts_enc_blk8_tail_last:
+                       vpxor %ymm4, %ymm5, %ymm5; /* Xor tweak to last round 
key. */
+                       vpxor %ymm4, %ymm6, %ymm6;
+                       vpxor %ymm4, %ymm7, %ymm7;
+                       vpxor %ymm4, %ymm8, %ymm4;
+                       vaesenclast %ymm5, %ymm0, %ymm0;
+                       vaesenclast %ymm6, %ymm1, %ymm1;
+                       vaesenclast %ymm7, %ymm2, %ymm2;
+                       vaesenclast %ymm4, %ymm3, %ymm3;
+                       vmovdqu %ymm0, (0 * 16)(%rdx);
+                       vmovdqu %ymm1, (2 * 16)(%rdx);
+                       vmovdqu %ymm2, (4 * 16)(%rdx);
+                       vmovdqu %ymm3, (6 * 16)(%rdx);
+                       leaq (8 * 16)(%rdx), %rdx;
+
+                       jmp .Lxts_crypt_blk4;
+
+               .align 8
+               .Lxts_dec_tail_blk8:
+                       /* AES rounds */
+                       XOR4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+                       cmpl $12, %r9d;
+                       jb .Lxts_dec_blk8_tail_last;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+                       jz .Lxts_dec_blk8_tail_last;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+                       VAESDEC4(%ymm4, %ymm0, %ymm1, %ymm2, %ymm3);
+                       vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+                       /* Last round and output handling. */
+               .Lxts_dec_blk8_tail_last:
+                       vpxor %ymm4, %ymm5, %ymm5; /* Xor tweak to last round 
key. */
+                       vpxor %ymm4, %ymm6, %ymm6;
+                       vpxor %ymm4, %ymm7, %ymm7;
+                       vpxor %ymm4, %ymm8, %ymm4;
+                       vaesdeclast %ymm5, %ymm0, %ymm0;
+                       vaesdeclast %ymm6, %ymm1, %ymm1;
+                       vaesdeclast %ymm7, %ymm2, %ymm2;
+                       vaesdeclast %ymm4, %ymm3, %ymm3;
+                       vmovdqu %ymm0, (0 * 16)(%rdx);
+                       vmovdqu %ymm1, (2 * 16)(%rdx);
+                       vmovdqu %ymm2, (4 * 16)(%rdx);
+                       vmovdqu %ymm3, (6 * 16)(%rdx);
+                       leaq (8 * 16)(%rdx), %rdx;
+
+       /* Handle trailing four blocks. */
+.align 8
+.Lxts_crypt_blk4:
+       /* Try exit early as typically input length is large power of 2. */
+       cmpq $0, %r8;
+       jb .Ldone_xts_crypt;
+       cmpq $4, %r8;
+       jb .Lxts_crypt_blk1;
+
+       leaq -4(%r8), %r8;
+
+       vmovdqa %ymm15, %ymm5;
+       tweak_clmul(2, %ymm6, %ymm15, %ymm13, %ymm0, %ymm1);
+       tweak_clmul(4, %ymm15, %ymm15, %ymm13, %ymm0, %ymm1);
+       vpshufb .Lxts_high_bit_shuf rRIP, %ymm15, %ymm13;
+
+       vbroadcasti128 (0 * 16)(%rdi), %ymm4;
+       vpxor (0 * 16)(%rcx), %ymm5, %ymm0;
+       vpxor (2 * 16)(%rcx), %ymm6, %ymm1;
+
+       leaq (4 * 16)(%rcx), %rcx;
+
+       testl %eax, %eax;
+       jz .Lxts_dec_blk4;
+               /* AES rounds */
+               XOR2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               cmpl $12, %r9d;
+               jb .Lxts_enc_blk4_last;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               jz .Lxts_enc_blk4_last;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESENC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+               /* Last round and output handling. */
+       .Lxts_enc_blk4_last:
+               vpxor %ymm4, %ymm5, %ymm5; /* Xor tweak to last round key. */
+               vpxor %ymm4, %ymm6, %ymm6;
+               vaesenclast %ymm5, %ymm0, %ymm0;
+               vaesenclast %ymm6, %ymm1, %ymm1;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               leaq (4 * 16)(%rdx), %rdx;
+
+               jmp .Lxts_crypt_blk1;
+
+       .align 8
+       .Lxts_dec_blk4:
+               /* AES rounds */
+               XOR2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (1 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (2 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (3 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (4 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (5 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (6 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (7 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (8 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (9 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (10 * 16)(%rdi), %ymm4;
+               cmpl $12, %r9d;
+               jb .Lxts_dec_blk4_last;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (11 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (12 * 16)(%rdi), %ymm4;
+               jz .Lxts_dec_blk4_last;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (13 * 16)(%rdi), %ymm4;
+               VAESDEC2(%ymm4, %ymm0, %ymm1);
+               vbroadcasti128 (14 * 16)(%rdi), %ymm4;
+
+               /* Last round and output handling. */
+       .Lxts_dec_blk4_last:
+               vpxor %ymm4, %ymm5, %ymm5; /* Xor tweak to last round key. */
+               vpxor %ymm4, %ymm6, %ymm6;
+               vaesdeclast %ymm5, %ymm0, %ymm0;
+               vaesdeclast %ymm6, %ymm1, %ymm1;
+               vmovdqu %ymm0, (0 * 16)(%rdx);
+               vmovdqu %ymm1, (2 * 16)(%rdx);
+               leaq (4 * 16)(%rdx), %rdx;
+
+       /* Process trailing one to three blocks, one per loop. */
+.align 8
+.Lxts_crypt_blk1:
+       cmpq $1, %r8;
+       jb .Ldone_xts_crypt;
+
+       leaq -1(%r8), %r8;
+
+       vpxor (%rcx), %xmm15, %xmm0;
+       vmovdqa %xmm15, %xmm5;
+       tweak_clmul(1, %xmm15, %xmm15, %xmm13, %xmm2, %xmm3);
+       vpshufb .Lxts_high_bit_shuf rRIP, %xmm15, %xmm13;
+
+       leaq 16(%rcx), %rcx;
+
+       testl %eax, %eax;
+       jz .Lxts_dec_blk1;
+               /* AES rounds. */
+               vpxor (0 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (1 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (2 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (3 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (4 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (5 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (6 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (7 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (8 * 16)(%rdi), %xmm0, %xmm0;
+               vaesenc (9 * 16)(%rdi), %xmm0, %xmm0;
+               vmovdqa (10 * 16)(%rdi), %xmm1;
+               cmpl $12, %r9d;
+               jb .Lxts_enc_blk1_last;
+               vaesenc %xmm1, %xmm0, %xmm0;
+               vaesenc (11 * 16)(%rdi), %xmm0, %xmm0;
+               vmovdqa (12 * 16)(%rdi), %xmm1;
+               jz .Lxts_enc_blk1_last;
+               vaesenc %xmm1, %xmm0, %xmm0;
+               vaesenc (13 * 16)(%rdi), %xmm0, %xmm0;
+               vmovdqa (14 * 16)(%rdi), %xmm1;
+
+               /* Last round and output handling. */
+       .Lxts_enc_blk1_last:
+               vpxor %xmm1, %xmm5, %xmm5; /* Xor tweak to last round key. */
+               vaesenclast %xmm5, %xmm0, %xmm0;
+               vmovdqu %xmm0, (%rdx);
+               leaq 16(%rdx), %rdx;
+
+               jmp .Lxts_crypt_blk1;
+
+       .align 8
+       .Lxts_dec_blk1:
+               /* AES rounds. */
+               vpxor (0 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (1 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (2 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (3 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (4 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (5 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (6 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (7 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (8 * 16)(%rdi), %xmm0, %xmm0;
+               vaesdec (9 * 16)(%rdi), %xmm0, %xmm0;
+               vmovdqa (10 * 16)(%rdi), %xmm1;
+               cmpl $12, %r9d;
+               jb .Lxts_dec_blk1_last;
+               vaesdec %xmm1, %xmm0, %xmm0;
+               vaesdec (11 * 16)(%rdi), %xmm0, %xmm0;
+               vmovdqa (12 * 16)(%rdi), %xmm1;
+               jz .Lxts_dec_blk1_last;
+               vaesdec %xmm1, %xmm0, %xmm0;
+               vaesdec (13 * 16)(%rdi), %xmm0, %xmm0;
+               vmovdqa (14 * 16)(%rdi), %xmm1;
+
+               /* Last round and output handling. */
+       .Lxts_dec_blk1_last:
+               vpxor %xmm1, %xmm5, %xmm5; /* Xor tweak to last round key. */
+               vaesdeclast %xmm5, %xmm0, %xmm0;
+               vmovdqu %xmm0, (%rdx);
+               leaq 16(%rdx), %rdx;
+
+               jmp .Lxts_crypt_blk1;
+
+.align 8
+.Ldone_xts_crypt:
+       /* Store IV. */
+       vmovdqu %xmm15, (%rsi);
+
+       vzeroall;
+
+       xorl %eax, %eax
+       ret_spec_stop
+       CFI_ENDPROC();
+ELF(.size _gcry_vaes_avx2_xts_crypt_amd64,.-_gcry_vaes_avx2_xts_crypt_amd64)
+
+/**********************************************************************
+  constants
+ **********************************************************************/
+ELF(.type _gcry_vaes_consts,@object)
+_gcry_vaes_consts:
+.align 32
+.Lbige_addb_0:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lbige_addb_1:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
+.Lbige_addb_2:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2
+.Lbige_addb_3:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3
+.Lbige_addb_4:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4
+.Lbige_addb_5:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5
+.Lbige_addb_6:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6
+.Lbige_addb_7:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
+.Lbige_addb_8:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8
+.Lbige_addb_9:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9
+.Lbige_addb_10:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 10
+.Lbige_addb_11:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11
+.Lbige_addb_12:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12
+.Lbige_addb_13:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13
+.Lbige_addb_14:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14
+.Lbige_addb_15:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15
+
+.Lle_addd_0:
+       .byte 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_1:
+       .byte 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_2:
+       .byte 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_3:
+       .byte 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_4:
+       .byte 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_5:
+       .byte 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_6:
+       .byte 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_7:
+       .byte 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_8:
+       .byte 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_9:
+       .byte 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_10:
+       .byte 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_11:
+       .byte 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_12:
+       .byte 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_13:
+       .byte 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_14:
+       .byte 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_15:
+       .byte 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+
+.Lle_addd_4_2:
+       .byte 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_8_2:
+       .byte 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+.Lle_addd_16_2:
+       .byte 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+       .byte 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+
+.Lxts_gfmul_clmul:
+       .long 0x00, 0x87, 0x00, 0x00
+       .long 0x00, 0x87, 0x00, 0x00
+.Lxts_high_bit_shuf:
+       .byte -1, -1, -1, -1, 12, 13, 14, 15
+       .byte 4, 5, 6, 7, -1, -1, -1, -1
+       .byte -1, -1, -1, -1, 12, 13, 14, 15
+       .byte 4, 5, 6, 7, -1, -1, -1, -1
+.Lbswap128_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+ELF(.size _gcry_vaes_consts,.-_gcry_vaes_consts)
+
+#endif /* HAVE_GCC_INLINE_ASM_VAES */
+#endif /* __x86_64__ */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael-vaes.c 
b/grub-core/lib/libgcrypt/cipher/rijndael-vaes.c
new file mode 100644
index 000000000..0d7d13672
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rijndael-vaes.c
@@ -0,0 +1,197 @@
+/* VAES/AVX2 accelerated AES for Libgcrypt
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "types.h"  /* for byte and u32 typedefs */
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_VAES
+
+
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+#  define ASM_FUNC_ABI
+# endif
+
+
+extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx);
+
+
+extern void _gcry_vaes_avx2_cbc_dec_amd64 (const void *keysched,
+                                          unsigned char *iv,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks,
+                                          unsigned int nrounds) ASM_FUNC_ABI;
+
+extern void _gcry_vaes_avx2_cfb_dec_amd64 (const void *keysched,
+                                          unsigned char *iv,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks,
+                                          unsigned int nrounds) ASM_FUNC_ABI;
+
+extern void _gcry_vaes_avx2_ctr_enc_amd64 (const void *keysched,
+                                          unsigned char *ctr,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks,
+                                          unsigned int nrounds) ASM_FUNC_ABI;
+
+extern void _gcry_vaes_avx2_ctr32le_enc_amd64 (const void *keysched,
+                                              unsigned char *ctr,
+                                              void *outbuf_arg,
+                                              const void *inbuf_arg,
+                                              size_t nblocks,
+                                              unsigned int nrounds)
+                                               ASM_FUNC_ABI;
+
+extern void _gcry_vaes_avx2_ocb_crypt_amd64 (const void *keysched,
+                                            unsigned int blkn,
+                                            void *outbuf_arg,
+                                            const void *inbuf_arg,
+                                            size_t nblocks,
+                                            unsigned int nrounds,
+                                            unsigned char *offset,
+                                            unsigned char *checksum,
+                                            unsigned char *L_table,
+                                            int encrypt) ASM_FUNC_ABI;
+
+extern void _gcry_vaes_avx2_xts_crypt_amd64 (const void *keysched,
+                                            unsigned char *tweak,
+                                            void *outbuf_arg,
+                                            const void *inbuf_arg,
+                                            size_t nblocks,
+                                            unsigned int nrounds,
+                                            int encrypt) ASM_FUNC_ABI;
+
+
+void
+_gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv,
+                       void *outbuf, const void *inbuf,
+                       size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  const void *keysched = ctx->keyschdec32;
+  unsigned int nrounds = ctx->rounds;
+
+  if (!ctx->decryption_prepared)
+    {
+      _gcry_aes_aesni_prepare_decryption (ctx);
+      ctx->decryption_prepared = 1;
+    }
+
+  _gcry_vaes_avx2_cbc_dec_amd64 (keysched, iv, outbuf, inbuf, nblocks, 
nrounds);
+}
+
+void
+_gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv,
+                       void *outbuf, const void *inbuf,
+                       size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_vaes_avx2_cfb_dec_amd64 (keysched, iv, outbuf, inbuf, nblocks, 
nrounds);
+}
+
+void
+_gcry_aes_vaes_ctr_enc (void *context, unsigned char *iv,
+                       void *outbuf, const void *inbuf,
+                       size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_vaes_avx2_ctr_enc_amd64 (keysched, iv, outbuf, inbuf, nblocks, 
nrounds);
+}
+
+void
+_gcry_aes_vaes_ctr32le_enc (void *context, unsigned char *iv,
+                           void *outbuf, const void *inbuf,
+                           size_t nblocks)
+{
+  RIJNDAEL_context *ctx = context;
+  const void *keysched = ctx->keyschenc32;
+  unsigned int nrounds = ctx->rounds;
+
+  _gcry_vaes_avx2_ctr32le_enc_amd64 (keysched, iv, outbuf, inbuf, nblocks,
+                                    nrounds);
+}
+
+size_t
+_gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                         const void *inbuf_arg, size_t nblocks,
+                         int encrypt)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int nrounds = ctx->rounds;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+
+  if (!encrypt && !ctx->decryption_prepared)
+    {
+      _gcry_aes_aesni_prepare_decryption (ctx);
+      ctx->decryption_prepared = 1;
+    }
+
+  c->u_mode.ocb.data_nblocks = blkn + nblocks;
+
+  _gcry_vaes_avx2_ocb_crypt_amd64 (keysched, (unsigned int)blkn, outbuf, inbuf,
+                                  nblocks, nrounds, c->u_iv.iv, c->u_ctr.ctr,
+                                  c->u_mode.ocb.L[0], encrypt);
+
+  return 0;
+}
+
+void
+_gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak,
+                         void *outbuf, const void *inbuf,
+                         size_t nblocks, int encrypt)
+{
+  RIJNDAEL_context *ctx = context;
+  const void *keysched = encrypt ? ctx->keyschenc32 : ctx->keyschdec32;
+  unsigned int nrounds = ctx->rounds;
+
+  if (!encrypt && !ctx->decryption_prepared)
+    {
+      _gcry_aes_aesni_prepare_decryption (ctx);
+      ctx->decryption_prepared = 1;
+    }
+
+  _gcry_vaes_avx2_xts_crypt_amd64 (keysched, tweak, outbuf, inbuf, nblocks,
+                                  nrounds, encrypt);
+}
+
+#endif /* USE_VAES */
diff --git a/grub-core/lib/libgcrypt/cipher/rijndael.c 
b/grub-core/lib/libgcrypt/cipher/rijndael.c
index b3effa2db..9b96b6166 100644
--- a/grub-core/lib/libgcrypt/cipher/rijndael.c
+++ b/grub-core/lib/libgcrypt/cipher/rijndael.c
@@ -1,6 +1,6 @@
 /* Rijndael (AES) for GnuPG
  * Copyright (C) 2000, 2001, 2002, 2003, 2007,
- *               2008, 2011 Free Software Foundation, Inc.
+ *               2008, 2011, 2012 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
  *
@@ -45,157 +45,396 @@
 #include "types.h"  /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
-
-#define MAXKC                  (256/32)
-#define MAXROUNDS              14
-#define BLOCKSIZE               (128/8)
-
-
-/* Helper macro to force alignment to 16 bytes.  */
-#ifdef __GNUC__
-# define ATTR_ALIGNED_16  __attribute__ ((aligned (16)))
-#else
-# define ATTR_ALIGNED_16
-#endif
-
-
-/* USE_PADLOCK indicates whether to compile the padlock specific
-   code.  */
-#undef USE_PADLOCK
-#ifdef ENABLE_PADLOCK_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
-#  define USE_PADLOCK 1
-# endif
-#endif /*ENABLE_PADLOCK_SUPPORT*/
-
-/* USE_AESNI inidicates whether to compile with Intel AES-NI code.  We
-   need the vector-size attribute which seems to be available since
-   gcc 3.  However, to be on the safe side we require at least gcc 4.  */
-#undef USE_AESNI
-#ifdef ENABLE_AESNI_SUPPORT
-# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4
-#  define USE_AESNI 1
-# endif
-#endif /* ENABLE_AESNI_SUPPORT */
+#include "bufhelp.h"
+#include "cipher-selftest.h"
+#include "rijndael-internal.h"
+#include "./cipher-internal.h"
+
+
+#ifdef USE_AMD64_ASM
+/* AMD64 assembly implementations of AES */
+extern unsigned int _gcry_aes_amd64_encrypt_block(const void *keysched_enc,
+                                                  unsigned char *out,
+                                                  const unsigned char *in,
+                                                  int rounds,
+                                                  const void *encT);
+
+extern unsigned int _gcry_aes_amd64_decrypt_block(const void *keysched_dec,
+                                                  unsigned char *out,
+                                                  const unsigned char *in,
+                                                  int rounds,
+                                                  const void *decT);
+#endif /*USE_AMD64_ASM*/
 
 #ifdef USE_AESNI
-  typedef int m128i_t __attribute__ ((__vector_size__ (16)));
-#endif /*USE_AESNI*/
+/* AES-NI (AMD64 & i386) accelerated implementations of AES */
+extern void _gcry_aes_aesni_do_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_aesni_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_aesni_encrypt (const RIJNDAEL_context *ctx,
+                                             unsigned char *dst,
+                                             const unsigned char *src);
+extern unsigned int _gcry_aes_aesni_decrypt (const RIJNDAEL_context *ctx,
+                                             unsigned char *dst,
+                                             const unsigned char *src);
+extern void _gcry_aes_aesni_cfb_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cbc_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks, int cbc_mac);
+extern void _gcry_aes_aesni_ctr_enc (void *context, unsigned char *ctr,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_ctr32le_enc (void *context, unsigned char *ctr,
+                                        void *outbuf_arg,
+                                        const void *inbuf_arg, size_t nblocks);
+extern void _gcry_aes_aesni_cfb_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_aesni_cbc_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern size_t _gcry_aes_aesni_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                         const void *inbuf_arg, size_t nblocks,
+                                         int encrypt);
+extern size_t _gcry_aes_aesni_ocb_auth (gcry_cipher_hd_t c, const void 
*abuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_aesni_xts_crypt (void *context, unsigned char *tweak,
+                                       void *outbuf_arg, const void *inbuf_arg,
+                                       size_t nblocks, int encrypt);
+#endif
 
-/* Define an u32 variant for the sake of gcc 4.4's strict aliasing.  */
-#if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 )
-typedef u32           __attribute__ ((__may_alias__)) u32_a_t;
-#else
-typedef u32           u32_a_t;
+#ifdef USE_VAES
+/* VAES (AMD64) accelerated implementation of AES */
+
+extern void _gcry_aes_vaes_cfb_dec (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+extern void _gcry_aes_vaes_cbc_dec (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+extern void _gcry_aes_vaes_ctr_enc (void *context, unsigned char *ctr,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+extern void _gcry_aes_vaes_ctr32le_enc (void *context, unsigned char *ctr,
+                                       void *outbuf_arg, const void *inbuf_arg,
+                                       size_t nblocks);
+extern size_t _gcry_aes_vaes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                       const void *inbuf_arg, size_t nblocks,
+                                       int encrypt);
+extern void _gcry_aes_vaes_xts_crypt (void *context, unsigned char *tweak,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks, int encrypt);
 #endif
 
+#ifdef USE_SSSE3
+/* SSSE3 (AMD64) vector permutation implementation of AES */
+extern void _gcry_aes_ssse3_do_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_ssse3_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_ssse3_encrypt (const RIJNDAEL_context *ctx,
+                                             unsigned char *dst,
+                                             const unsigned char *src);
+extern unsigned int _gcry_aes_ssse3_decrypt (const RIJNDAEL_context *ctx,
+                                             unsigned char *dst,
+                                             const unsigned char *src);
+extern void _gcry_aes_ssse3_cfb_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks,
+                                     int cbc_mac);
+extern void _gcry_aes_ssse3_ctr_enc (void *context, unsigned char *ctr,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cfb_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ssse3_cbc_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern size_t _gcry_aes_ssse3_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                         const void *inbuf_arg, size_t nblocks,
+                                         int encrypt);
+extern size_t _gcry_aes_ssse3_ocb_auth (gcry_cipher_hd_t c, const void 
*abuf_arg,
+                                        size_t nblocks);
+#endif
 
-
-/* Our context object.  */
-typedef struct
-{
-  /* The first fields are the keyschedule arrays.  This is so that
-     they are aligned on a 16 byte boundary if using gcc.  This
-     alignment is required for the AES-NI code and a good idea in any
-     case.  The alignment is guaranteed due to the way cipher.c
-     allocates the space for the context.  The PROPERLY_ALIGNED_TYPE
-     hack is used to force a minimal alignment if not using gcc of if
-     the alignment requirement is higher that 16 bytes.  */
-  union
-  {
-    PROPERLY_ALIGNED_TYPE dummy;
-    byte keyschedule[MAXROUNDS+1][4][4];
-#ifdef USE_PADLOCK
-    /* The key as passed to the padlock engine.  It is only used if
-       the padlock engine is used (USE_PADLOCK, below).  */
-    unsigned char padlock_key[16] __attribute__ ((aligned (16)));
-#endif /*USE_PADLOCK*/
-  } u1;
-  union
-  {
-    PROPERLY_ALIGNED_TYPE dummy;
-    byte keyschedule[MAXROUNDS+1][4][4];
-  } u2;
-  int rounds;               /* Key-length-dependent number of rounds.  */
-  int decryption_prepared;  /* The decryption key schedule is available.  */
 #ifdef USE_PADLOCK
-  int use_padlock;          /* Padlock shall be used.  */
-#endif /*USE_PADLOCK*/
-#ifdef USE_AESNI
-  int use_aesni;            /* AES-NI shall be used.  */
-#endif /*USE_AESNI*/
-} RIJNDAEL_context ATTR_ALIGNED_16;
-
-/* Macros defining alias for the keyschedules.  */
-#define keyschenc  u1.keyschedule
-#define keyschdec  u2.keyschedule
-#define padlockkey u1.padlock_key
-
-/* Two macros to be called prior and after the use of AESNI
-   instructions.  There should be no external function calls between
-   the use of these macros.  There purpose is to make sure that the
-   SSE regsiters are cleared and won't reveal any information about
-   the key or the data.  */
-#ifdef USE_AESNI
-# define aesni_prepare() do { } while (0)
-# define aesni_cleanup()                                                \
-  do { asm volatile ("pxor %%xmm0, %%xmm0\n\t"                          \
-                     "pxor %%xmm1, %%xmm1\n" :: );                      \
-  } while (0)
-# define aesni_cleanup_2_4()                                            \
-  do { asm volatile ("pxor %%xmm2, %%xmm2\n\t"                          \
-                     "pxor %%xmm3, %%xmm3\n"                            \
-                     "pxor %%xmm4, %%xmm4\n":: );                       \
-  } while (0)
-#else
-# define aesni_prepare() do { } while (0)
-# define aesni_cleanup() do { } while (0)
+extern unsigned int _gcry_aes_padlock_encrypt (const RIJNDAEL_context *ctx,
+                                               unsigned char *bx,
+                                               const unsigned char *ax);
+extern unsigned int _gcry_aes_padlock_decrypt (const RIJNDAEL_context *ctx,
+                                               unsigned char *bx,
+                                               const unsigned char *ax);
+extern void _gcry_aes_padlock_prepare_decryption (RIJNDAEL_context *ctx);
 #endif
 
+#ifdef USE_ARM_ASM
+/* ARM assembly implementations of AES */
+extern unsigned int _gcry_aes_arm_encrypt_block(const void *keysched_enc,
+                                                unsigned char *out,
+                                                const unsigned char *in,
+                                                int rounds,
+                                                const void *encT);
+
+extern unsigned int _gcry_aes_arm_decrypt_block(const void *keysched_dec,
+                                                unsigned char *out,
+                                                const unsigned char *in,
+                                                int rounds,
+                                                const void *decT);
+#endif /*USE_ARM_ASM*/
+
+#ifdef USE_ARM_CE
+/* ARMv8 Crypto Extension implementations of AES */
+extern void _gcry_aes_armv8_ce_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_armv8_ce_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_armv8_ce_encrypt(const RIJNDAEL_context *ctx,
+                                               unsigned char *dst,
+                                               const unsigned char *src);
+extern unsigned int _gcry_aes_armv8_ce_decrypt(const RIJNDAEL_context *ctx,
+                                               unsigned char *dst,
+                                               const unsigned char *src);
+
+extern void _gcry_aes_armv8_ce_cfb_enc (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void 
*inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_enc (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void 
*inbuf_arg,
+                                        size_t nblocks,
+                                        int cbc_mac);
+extern void _gcry_aes_armv8_ce_ctr_enc (void *context, unsigned char *ctr,
+                                        void *outbuf_arg, const void 
*inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_ctr32le_enc (void *context, unsigned char *ctr,
+                                            void *outbuf_arg,
+                                            const void *inbuf_arg,
+                                            size_t nblocks);
+extern void _gcry_aes_armv8_ce_cfb_dec (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void 
*inbuf_arg,
+                                        size_t nblocks);
+extern void _gcry_aes_armv8_ce_cbc_dec (void *context, unsigned char *iv,
+                                        void *outbuf_arg, const void 
*inbuf_arg,
+                                        size_t nblocks);
+extern size_t _gcry_aes_armv8_ce_ocb_crypt (gcry_cipher_hd_t c, void 
*outbuf_arg,
+                                            const void *inbuf_arg, size_t 
nblocks,
+                                            int encrypt);
+extern size_t _gcry_aes_armv8_ce_ocb_auth (gcry_cipher_hd_t c,
+                                           const void *abuf_arg, size_t 
nblocks);
+extern void _gcry_aes_armv8_ce_xts_crypt (void *context, unsigned char *tweak,
+                                          void *outbuf_arg,
+                                          const void *inbuf_arg,
+                                          size_t nblocks, int encrypt);
+#endif /*USE_ARM_ASM*/
+
+#ifdef USE_PPC_CRYPTO
+/* PowerPC Crypto implementations of AES */
+extern void _gcry_aes_ppc8_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_ppc8_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_ppc8_encrypt(const RIJNDAEL_context *ctx,
+                                          unsigned char *dst,
+                                          const unsigned char *src);
+extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx,
+                                          unsigned char *dst,
+                                          const unsigned char *src);
+
+extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_enc (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc8_ctr_enc (void *context, unsigned char *ctr,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+extern void _gcry_aes_ppc8_cfb_dec (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+extern void _gcry_aes_ppc8_cbc_dec (void *context, unsigned char *iv,
+                                   void *outbuf_arg, const void *inbuf_arg,
+                                   size_t nblocks);
+
+extern size_t _gcry_aes_ppc8_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                       const void *inbuf_arg, size_t nblocks,
+                                       int encrypt);
+extern size_t _gcry_aes_ppc8_ocb_auth (gcry_cipher_hd_t c,
+                                      const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc8_xts_crypt (void *context, unsigned char *tweak,
+                                     void *outbuf_arg,
+                                     const void *inbuf_arg,
+                                     size_t nblocks, int encrypt);
+#endif /*USE_PPC_CRYPTO*/
+
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+/* Power9 little-endian crypto implementations of AES */
+extern unsigned int _gcry_aes_ppc9le_encrypt(const RIJNDAEL_context *ctx,
+                                           unsigned char *dst,
+                                           const unsigned char *src);
+extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx,
+                                           unsigned char *dst,
+                                           const unsigned char *src);
+
+extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_enc (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks, int cbc_mac);
+extern void _gcry_aes_ppc9le_ctr_enc (void *context, unsigned char *ctr,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ppc9le_cfb_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+extern void _gcry_aes_ppc9le_cbc_dec (void *context, unsigned char *iv,
+                                     void *outbuf_arg, const void *inbuf_arg,
+                                     size_t nblocks);
+
+extern size_t _gcry_aes_ppc9le_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                         const void *inbuf_arg, size_t nblocks,
+                                         int encrypt);
+extern size_t _gcry_aes_ppc9le_ocb_auth (gcry_cipher_hd_t c,
+                                       const void *abuf_arg, size_t nblocks);
+
+extern void _gcry_aes_ppc9le_xts_crypt (void *context, unsigned char *tweak,
+                                       void *outbuf_arg,
+                                       const void *inbuf_arg,
+                                       size_t nblocks, int encrypt);
+
+extern size_t _gcry_aes_p10le_gcm_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                        const void *inbuf_arg,
+                                        size_t nblocks, int encrypt);
+#endif /*USE_PPC_CRYPTO_WITH_PPC9LE*/
+
+#ifdef USE_S390X_CRYPTO
+/* zSeries crypto implementations of AES */
+extern int _gcry_aes_s390x_setup_acceleration(RIJNDAEL_context *ctx,
+                                             unsigned int keylen,
+                                             unsigned int hwfeatures,
+                                             cipher_bulk_ops_t *bulk_ops);
+extern void _gcry_aes_s390x_setkey(RIJNDAEL_context *ctx, const byte *key);
+extern void _gcry_aes_s390x_prepare_decryption(RIJNDAEL_context *ctx);
+
+extern unsigned int _gcry_aes_s390x_encrypt(const RIJNDAEL_context *ctx,
+                                           unsigned char *dst,
+                                           const unsigned char *src);
+extern unsigned int _gcry_aes_s390x_decrypt(const RIJNDAEL_context *ctx,
+                                           unsigned char *dst,
+                                           const unsigned char *src);
+
+#endif /*USE_S390X_CRYPTO*/
+
+static unsigned int do_encrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
+                                const unsigned char *ax);
+static unsigned int do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
+                                const unsigned char *ax);
+
+static void _gcry_aes_cfb_enc (void *context, unsigned char *iv,
+                              void *outbuf, const void *inbuf,
+                              size_t nblocks);
+static void _gcry_aes_cfb_dec (void *context, unsigned char *iv,
+                              void *outbuf_arg, const void *inbuf_arg,
+                              size_t nblocks);
+static void _gcry_aes_cbc_enc (void *context, unsigned char *iv,
+                              void *outbuf_arg, const void *inbuf_arg,
+                              size_t nblocks, int cbc_mac);
+static void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
+                              void *outbuf_arg, const void *inbuf_arg,
+                              size_t nblocks);
+static void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
+                              void *outbuf_arg, const void *inbuf_arg,
+                              size_t nblocks);
+static size_t _gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                  const void *inbuf_arg, size_t nblocks,
+                                  int encrypt);
+static size_t _gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                 size_t nblocks);
+static void _gcry_aes_xts_crypt (void *context, unsigned char *tweak,
+                                void *outbuf_arg, const void *inbuf_arg,
+                                size_t nblocks, int encrypt);
+
 
 /* All the numbers.  */
 #include "rijndael-tables.h"
 
 
 
-/* Function prototypes.  */
-#ifdef USE_AESNI
-/* We don't want to inline these functions to help gcc allocate enough
-   registers.  */
-static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr,
-                          unsigned char *b, const unsigned char *a)
-  __attribute__ ((__noinline__));
-static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr,
-                            unsigned char *b, const unsigned char *a)
-  __attribute__ ((__noinline__));
-#endif /*USE_AESNI*/
 
+/* Function prototypes.  */
 static const char *selftest(void);
+static void prepare_decryption(RIJNDAEL_context *ctx);
+
+
+
+/* Prefetching for encryption/decryption tables. */
+static inline void prefetch_table(const volatile byte *tab, size_t len)
+{
+  size_t i;
+
+  for (i = 0; len - i >= 8 * 32; i += 8 * 32)
+    {
+      (void)tab[i + 0 * 32];
+      (void)tab[i + 1 * 32];
+      (void)tab[i + 2 * 32];
+      (void)tab[i + 3 * 32];
+      (void)tab[i + 4 * 32];
+      (void)tab[i + 5 * 32];
+      (void)tab[i + 6 * 32];
+      (void)tab[i + 7 * 32];
+    }
+  for (; i < len; i += 32)
+    {
+      (void)tab[i];
+    }
+
+  (void)tab[len - 1];
+}
+
+static void prefetch_enc(void)
+{
+  /* Modify counters to trigger copy-on-write and unsharing if physical pages
+   * of look-up table are shared between processes.  Modifying counters also
+   * causes checksums for pages to change and hint same-page merging algorithm
+   * that these pages are frequently changing.  */
+  enc_tables.counter_head++;
+  enc_tables.counter_tail++;
+
+  /* Prefetch look-up tables to cache.  */
+  prefetch_table((const void *)&enc_tables, sizeof(enc_tables));
+}
+
+static void prefetch_dec(void)
+{
+  /* Modify counters to trigger copy-on-write and unsharing if physical pages
+   * of look-up table are shared between processes.  Modifying counters also
+   * causes checksums for pages to change and hint same-page merging algorithm
+   * that these pages are frequently changing.  */
+  dec_tables.counter_head++;
+  dec_tables.counter_tail++;
+
+  /* Prefetch look-up tables to cache.  */
+  prefetch_table((const void *)&dec_tables, sizeof(dec_tables));
+}
 
 
 
 /* Perform the key setup.  */
 static gcry_err_code_t
-do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen)
+do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen,
+           cipher_bulk_ops_t *bulk_ops)
 {
   static int initialized = 0;
-  static const char *selftest_failed=0;
+  static const char *selftest_failed = 0;
+  void (*hw_setkey)(RIJNDAEL_context *ctx, const byte *key) = NULL;
   int rounds;
-  unsigned int i;
-  int j, r, t, rconpointer = 0;
+  int i,j, r, t, rconpointer = 0;
   int KC;
-  union
-  {
-    PROPERLY_ALIGNED_TYPE dummy;
-    byte k[MAXKC][4];
-  } k;
-#define k k.k
-  union
-  {
-    PROPERLY_ALIGNED_TYPE dummy;
-    byte tk[MAXKC][4];
-  } tk;
-#define tk tk.tk
+  unsigned int hwfeatures;
 
   /* The on-the-fly self tests are only run in non-fips mode. In fips
      mode explicit self-tests are required.  Actually the on-the-fly
@@ -214,145 +453,226 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, 
const unsigned keylen)
   if (selftest_failed)
     return GPG_ERR_SELFTEST_FAILED;
 
-  ctx->decryption_prepared = 0;
-#ifdef USE_PADLOCK
-  ctx->use_padlock = 0;
-#endif
-#ifdef USE_AESNI
-  ctx->use_aesni = 0;
-#endif
-
   if( keylen == 128/8 )
     {
       rounds = 10;
       KC = 4;
-
-      if (0)
-        {
-          ;
-        }
-#ifdef USE_PADLOCK
-      else if ((_gcry_get_hw_features () & HWF_PADLOCK_AES))
-        {
-          ctx->use_padlock = 1;
-          memcpy (ctx->padlockkey, key, keylen);
-        }
-#endif
-#ifdef USE_AESNI
-      else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
-        {
-          ctx->use_aesni = 1;
-        }
-#endif
     }
   else if ( keylen == 192/8 )
     {
       rounds = 12;
       KC = 6;
-
-      if (0)
-        {
-          ;
-        }
-#ifdef USE_AESNI
-      else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
-        {
-          ctx->use_aesni = 1;
-        }
-#endif
     }
   else if ( keylen == 256/8 )
     {
       rounds = 14;
       KC = 8;
-
-      if (0)
-        {
-          ;
-        }
-#ifdef USE_AESNI
-      else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI))
-        {
-          ctx->use_aesni = 1;
-        }
-#endif
     }
   else
     return GPG_ERR_INV_KEYLEN;
 
   ctx->rounds = rounds;
+  hwfeatures = _gcry_get_hw_features ();
 
-  /* NB: We don't yet support Padlock hardware key generation.  */
+  ctx->decryption_prepared = 0;
+
+  /* Setup default bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cfb_enc = _gcry_aes_cfb_enc;
+  bulk_ops->cfb_dec = _gcry_aes_cfb_dec;
+  bulk_ops->cbc_enc = _gcry_aes_cbc_enc;
+  bulk_ops->cbc_dec = _gcry_aes_cbc_dec;
+  bulk_ops->ctr_enc = _gcry_aes_ctr_enc;
+  bulk_ops->ocb_crypt = _gcry_aes_ocb_crypt;
+  bulk_ops->ocb_auth  = _gcry_aes_ocb_auth;
+  bulk_ops->xts_crypt = _gcry_aes_xts_crypt;
+
+  (void)hwfeatures;
 
   if (0)
-    ;
-#ifdef USE_AESNI_is_disabled_here
-  else if (ctx->use_aesni && ctx->rounds == 10)
     {
-      /* Note: This code works for AES-128 but it is not much better
-         than using the standard key schedule.  We disable it for
-         now and don't put any effort into implementing this for
-         AES-192 and AES-256.  */
-      asm volatile ("movl   %[key], %%esi\n\t"
-                    "movdqu (%%esi), %%xmm1\n\t"     /* xmm1 := key   */
-                    "movl   %[ksch], %%esi\n\t"
-                    "movdqa %%xmm1, (%%esi)\n\t"     /* ksch[0] := xmm1  */
-                    "aeskeygenassist $0x01, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x10(%%esi)\n\t" /* ksch[1] := xmm1  */
-                    "aeskeygenassist $0x02, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x20(%%esi)\n\t" /* ksch[2] := xmm1  */
-                    "aeskeygenassist $0x04, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x30(%%esi)\n\t" /* ksch[3] := xmm1  */
-                    "aeskeygenassist $0x08, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x40(%%esi)\n\t" /* ksch[4] := xmm1  */
-                    "aeskeygenassist $0x10, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x50(%%esi)\n\t" /* ksch[5] := xmm1  */
-                    "aeskeygenassist $0x20, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x60(%%esi)\n\t" /* ksch[6] := xmm1  */
-                    "aeskeygenassist $0x40, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x70(%%esi)\n\t" /* ksch[7] := xmm1  */
-                    "aeskeygenassist $0x80, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x80(%%esi)\n\t" /* ksch[8] := xmm1  */
-                    "aeskeygenassist $0x1b, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0x90(%%esi)\n\t" /* ksch[9] := xmm1  */
-                    "aeskeygenassist $0x36, %%xmm1, %%xmm2\n\t"
-                    "call .Lexpand128_%=\n\t"
-                    "movdqa %%xmm1, 0xa0(%%esi)\n\t" /* ksch[10] := xmm1  */
-                    "jmp .Lleave%=\n"
-
-                    ".Lexpand128_%=:\n\t"
-                    "pshufd $0xff, %%xmm2, %%xmm2\n\t"
-                    "movdqa %%xmm1, %%xmm3\n\t"
-                    "pslldq $4, %%xmm3\n\t"
-                    "pxor   %%xmm3, %%xmm1\n\t"
-                    "pslldq $4, %%xmm3\n\t"
-                    "pxor   %%xmm3, %%xmm1\n\t"
-                    "pslldq $4, %%xmm3\n\t"
-                    "pxor   %%xmm3, %%xmm2\n\t"
-                    "pxor   %%xmm2, %%xmm1\n\t"
-                    "ret\n"
-
-                    ".Lleave%=:\n\t"
-                    "pxor %%xmm1, %%xmm1\n\t"
-                    "pxor %%xmm2, %%xmm2\n\t"
-                    "pxor %%xmm3, %%xmm3\n"
-                    :
-                    : [key] "g" (key), [ksch] "g" (ctx->keyschenc)
-                    : "%esi", "cc", "memory" );
+      ;
+    }
+#ifdef USE_AESNI
+  else if (hwfeatures & HWF_INTEL_AESNI)
+    {
+      hw_setkey = _gcry_aes_aesni_do_setkey;
+      ctx->encrypt_fn = _gcry_aes_aesni_encrypt;
+      ctx->decrypt_fn = _gcry_aes_aesni_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->prepare_decryption = _gcry_aes_aesni_prepare_decryption;
+      ctx->use_avx = !!(hwfeatures & HWF_INTEL_AVX);
+      ctx->use_avx2 = !!(hwfeatures & HWF_INTEL_AVX2);
+
+      /* Setup AES-NI bulk encryption routines.  */
+      bulk_ops->cfb_enc = _gcry_aes_aesni_cfb_enc;
+      bulk_ops->cfb_dec = _gcry_aes_aesni_cfb_dec;
+      bulk_ops->cbc_enc = _gcry_aes_aesni_cbc_enc;
+      bulk_ops->cbc_dec = _gcry_aes_aesni_cbc_dec;
+      bulk_ops->ctr_enc = _gcry_aes_aesni_ctr_enc;
+      bulk_ops->ctr32le_enc = _gcry_aes_aesni_ctr32le_enc;
+      bulk_ops->ocb_crypt = _gcry_aes_aesni_ocb_crypt;
+      bulk_ops->ocb_auth = _gcry_aes_aesni_ocb_auth;
+      bulk_ops->xts_crypt = _gcry_aes_aesni_xts_crypt;
+
+#ifdef USE_VAES
+      if ((hwfeatures & HWF_INTEL_VAES_VPCLMUL) &&
+         (hwfeatures & HWF_INTEL_AVX2))
+       {
+         /* Setup VAES bulk encryption routines.  */
+         bulk_ops->cfb_dec = _gcry_aes_vaes_cfb_dec;
+         bulk_ops->cbc_dec = _gcry_aes_vaes_cbc_dec;
+         bulk_ops->ctr_enc = _gcry_aes_vaes_ctr_enc;
+         bulk_ops->ctr32le_enc = _gcry_aes_vaes_ctr32le_enc;
+         bulk_ops->ocb_crypt = _gcry_aes_vaes_ocb_crypt;
+         bulk_ops->xts_crypt = _gcry_aes_vaes_xts_crypt;
+       }
+#endif
+    }
+#endif
+#ifdef USE_PADLOCK
+  else if ((hwfeatures & HWF_PADLOCK_AES) && keylen == 128/8)
+    {
+      ctx->encrypt_fn = _gcry_aes_padlock_encrypt;
+      ctx->decrypt_fn = _gcry_aes_padlock_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->prepare_decryption = _gcry_aes_padlock_prepare_decryption;
+      memcpy (ctx->padlockkey, key, keylen);
+    }
+#endif
+#ifdef USE_SSSE3
+  else if (hwfeatures & HWF_INTEL_SSSE3)
+    {
+      hw_setkey = _gcry_aes_ssse3_do_setkey;
+      ctx->encrypt_fn = _gcry_aes_ssse3_encrypt;
+      ctx->decrypt_fn = _gcry_aes_ssse3_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->prepare_decryption = _gcry_aes_ssse3_prepare_decryption;
+
+      /* Setup SSSE3 bulk encryption routines.  */
+      bulk_ops->cfb_enc = _gcry_aes_ssse3_cfb_enc;
+      bulk_ops->cfb_dec = _gcry_aes_ssse3_cfb_dec;
+      bulk_ops->cbc_enc = _gcry_aes_ssse3_cbc_enc;
+      bulk_ops->cbc_dec = _gcry_aes_ssse3_cbc_dec;
+      bulk_ops->ctr_enc = _gcry_aes_ssse3_ctr_enc;
+      bulk_ops->ocb_crypt = _gcry_aes_ssse3_ocb_crypt;
+      bulk_ops->ocb_auth = _gcry_aes_ssse3_ocb_auth;
+    }
+#endif
+#ifdef USE_ARM_CE
+  else if (hwfeatures & HWF_ARM_AES)
+    {
+      hw_setkey = _gcry_aes_armv8_ce_setkey;
+      ctx->encrypt_fn = _gcry_aes_armv8_ce_encrypt;
+      ctx->decrypt_fn = _gcry_aes_armv8_ce_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->prepare_decryption = _gcry_aes_armv8_ce_prepare_decryption;
+
+      /* Setup ARM-CE bulk encryption routines.  */
+      bulk_ops->cfb_enc = _gcry_aes_armv8_ce_cfb_enc;
+      bulk_ops->cfb_dec = _gcry_aes_armv8_ce_cfb_dec;
+      bulk_ops->cbc_enc = _gcry_aes_armv8_ce_cbc_enc;
+      bulk_ops->cbc_dec = _gcry_aes_armv8_ce_cbc_dec;
+      bulk_ops->ctr_enc = _gcry_aes_armv8_ce_ctr_enc;
+      bulk_ops->ctr32le_enc = _gcry_aes_armv8_ce_ctr32le_enc;
+      bulk_ops->ocb_crypt = _gcry_aes_armv8_ce_ocb_crypt;
+      bulk_ops->ocb_auth = _gcry_aes_armv8_ce_ocb_auth;
+      bulk_ops->xts_crypt = _gcry_aes_armv8_ce_xts_crypt;
+    }
+#endif
+#ifdef USE_PPC_CRYPTO_WITH_PPC9LE
+  else if ((hwfeatures & HWF_PPC_VCRYPTO) && (hwfeatures & HWF_PPC_ARCH_3_00))
+    {
+      hw_setkey = _gcry_aes_ppc8_setkey;
+      ctx->encrypt_fn = _gcry_aes_ppc9le_encrypt;
+      ctx->decrypt_fn = _gcry_aes_ppc9le_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption;
+
+      /* Setup PPC9LE bulk encryption routines.  */
+      bulk_ops->cfb_enc = _gcry_aes_ppc9le_cfb_enc;
+      bulk_ops->cfb_dec = _gcry_aes_ppc9le_cfb_dec;
+      bulk_ops->cbc_enc = _gcry_aes_ppc9le_cbc_enc;
+      bulk_ops->cbc_dec = _gcry_aes_ppc9le_cbc_dec;
+      bulk_ops->ctr_enc = _gcry_aes_ppc9le_ctr_enc;
+      bulk_ops->ocb_crypt = _gcry_aes_ppc9le_ocb_crypt;
+      bulk_ops->ocb_auth = _gcry_aes_ppc9le_ocb_auth;
+      bulk_ops->xts_crypt = _gcry_aes_ppc9le_xts_crypt;
+      if (hwfeatures & HWF_PPC_ARCH_3_10)  /* for P10 */
+        bulk_ops->gcm_crypt = _gcry_aes_p10le_gcm_crypt;
+    }
+#endif
+#ifdef USE_PPC_CRYPTO
+  else if (hwfeatures & HWF_PPC_VCRYPTO)
+    {
+      hw_setkey = _gcry_aes_ppc8_setkey;
+      ctx->encrypt_fn = _gcry_aes_ppc8_encrypt;
+      ctx->decrypt_fn = _gcry_aes_ppc8_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption;
+
+      /* Setup PPC8 bulk encryption routines.  */
+      bulk_ops->cfb_enc = _gcry_aes_ppc8_cfb_enc;
+      bulk_ops->cfb_dec = _gcry_aes_ppc8_cfb_dec;
+      bulk_ops->cbc_enc = _gcry_aes_ppc8_cbc_enc;
+      bulk_ops->cbc_dec = _gcry_aes_ppc8_cbc_dec;
+      bulk_ops->ctr_enc = _gcry_aes_ppc8_ctr_enc;
+      bulk_ops->ocb_crypt = _gcry_aes_ppc8_ocb_crypt;
+      bulk_ops->ocb_auth = _gcry_aes_ppc8_ocb_auth;
+      bulk_ops->xts_crypt = _gcry_aes_ppc8_xts_crypt;
+    }
+#endif
+#ifdef USE_S390X_CRYPTO
+  else if (_gcry_aes_s390x_setup_acceleration (ctx, keylen, hwfeatures,
+                                              bulk_ops))
+  {
+      hw_setkey = _gcry_aes_s390x_setkey;
+      ctx->encrypt_fn = _gcry_aes_s390x_encrypt;
+      ctx->decrypt_fn = _gcry_aes_s390x_decrypt;
+      ctx->prefetch_enc_fn = NULL;
+      ctx->prefetch_dec_fn = NULL;
+      ctx->prepare_decryption = _gcry_aes_s390x_prepare_decryption;
+    }
+#endif
+  else
+    {
+      ctx->encrypt_fn = do_encrypt;
+      ctx->decrypt_fn = do_decrypt;
+      ctx->prefetch_enc_fn = prefetch_enc;
+      ctx->prefetch_dec_fn = prefetch_dec;
+      ctx->prepare_decryption = prepare_decryption;
+    }
+
+  /* NB: We don't yet support Padlock hardware key generation.  */
+
+  if (hw_setkey)
+    {
+      hw_setkey (ctx, key);
     }
-#endif /*USE_AESNI*/
   else
     {
-#define W (ctx->keyschenc)
+      const byte *sbox = ((const byte *)encT) + 1;
+      union
+        {
+          PROPERLY_ALIGNED_TYPE dummy;
+          byte data[MAXKC][4];
+          u32 data32[MAXKC];
+        } tkk[2];
+#define k      tkk[0].data
+#define k_u32  tkk[0].data32
+#define tk     tkk[1].data
+#define tk_u32 tkk[1].data32
+#define W      (ctx->keyschenc)
+#define W_u32  (ctx->keyschenc32)
+
+      prefetch_enc();
+
       for (i = 0; i < keylen; i++)
         {
           k[i >> 2][i & 3] = key[i];
@@ -360,7 +680,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const 
unsigned keylen)
 
       for (j = KC-1; j >= 0; j--)
         {
-          *((u32_a_t*)tk[j]) = *((u32_a_t*)k[j]);
+          tk_u32[j] = k_u32[j];
         }
       r = 0;
       t = 0;
@@ -369,7 +689,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const 
unsigned keylen)
         {
           for (; (j < KC) && (t < 4); j++, t++)
             {
-              *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]);
+              W_u32[r][t] = le_bswap32(tk_u32[j]);
             }
           if (t == 4)
             {
@@ -382,32 +702,32 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const 
unsigned keylen)
         {
           /* While not enough round key material calculated calculate
              new values.  */
-          tk[0][0] ^= S[tk[KC-1][1]];
-          tk[0][1] ^= S[tk[KC-1][2]];
-          tk[0][2] ^= S[tk[KC-1][3]];
-          tk[0][3] ^= S[tk[KC-1][0]];
+          tk[0][0] ^= sbox[tk[KC-1][1] * 4];
+          tk[0][1] ^= sbox[tk[KC-1][2] * 4];
+          tk[0][2] ^= sbox[tk[KC-1][3] * 4];
+          tk[0][3] ^= sbox[tk[KC-1][0] * 4];
           tk[0][0] ^= rcon[rconpointer++];
 
           if (KC != 8)
             {
               for (j = 1; j < KC; j++)
                 {
-                  *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
+                  tk_u32[j] ^= tk_u32[j-1];
                 }
             }
           else
             {
               for (j = 1; j < KC/2; j++)
                 {
-                  *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
+                  tk_u32[j] ^= tk_u32[j-1];
                 }
-              tk[KC/2][0] ^= S[tk[KC/2 - 1][0]];
-              tk[KC/2][1] ^= S[tk[KC/2 - 1][1]];
-              tk[KC/2][2] ^= S[tk[KC/2 - 1][2]];
-              tk[KC/2][3] ^= S[tk[KC/2 - 1][3]];
+              tk[KC/2][0] ^= sbox[tk[KC/2 - 1][0] * 4];
+              tk[KC/2][1] ^= sbox[tk[KC/2 - 1][1] * 4];
+              tk[KC/2][2] ^= sbox[tk[KC/2 - 1][2] * 4];
+              tk[KC/2][3] ^= sbox[tk[KC/2 - 1][3] * 4];
               for (j = KC/2 + 1; j < KC; j++)
                 {
-                  *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]);
+                  tk_u32[j] ^= tk_u32[j-1];
                 }
             }
 
@@ -416,7 +736,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const 
unsigned keylen)
             {
               for (; (j < KC) && (t < 4); j++, t++)
                 {
-                  *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]);
+                  W_u32[r][t] = le_bswap32(tk_u32[j]);
                 }
               if (t == 4)
                 {
@@ -426,22 +746,24 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const 
unsigned keylen)
             }
         }
 #undef W
+#undef tk
+#undef k
+#undef W_u32
+#undef tk_u32
+#undef k_u32
+      wipememory(&tkk, sizeof(tkk));
     }
 
   return 0;
-#undef tk
-#undef k
 }
 
 
 static gcry_err_code_t
-rijndael_setkey (void *context, const byte *key, const unsigned keylen)
+rijndael_setkey (void *context, const byte *key, const unsigned keylen,
+                 cipher_bulk_ops_t *bulk_ops)
 {
   RIJNDAEL_context *ctx = context;
-
-  int rc = do_setkey (ctx, key, keylen);
-  _gcry_burn_stack ( 100 + 16*sizeof(int));
-  return rc;
+  return do_setkey (ctx, key, keylen, bulk_ops);
 }
 
 
@@ -449,716 +771,217 @@ rijndael_setkey (void *context, const byte *key, const 
unsigned keylen)
 static void
 prepare_decryption( RIJNDAEL_context *ctx )
 {
+  const byte *sbox = ((const byte *)encT) + 1;
   int r;
 
-#ifdef USE_AESNI
-  if (ctx->use_aesni)
-    {
-      /* The AES-NI decrypt instructions use the Equivalent Inverse
-         Cipher, thus we can't use the the standard decrypt key
-         preparation.  */
-        m128i_t *ekey = (m128i_t*)ctx->keyschenc;
-        m128i_t *dkey = (m128i_t*)ctx->keyschdec;
-        int rr;
-
-        dkey[0] = ekey[ctx->rounds];
-        for (r=1, rr=ctx->rounds-1; r < ctx->rounds; r++, rr--)
-          {
-            asm volatile
-              ("movdqu %[ekey], %%xmm1\n\t"
-               /*"aesimc %%xmm1, %%xmm1\n\t"*/
-               ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t"
-               "movdqu %%xmm1, %[dkey]"
-               : [dkey] "=m" (dkey[r])
-               : [ekey] "m" (ekey[rr]) );
-          }
-        dkey[r] = ekey[0];
-    }
-  else
-#endif /*USE_AESNI*/
-    {
-      union
-      {
-        PROPERLY_ALIGNED_TYPE dummy;
-        byte *w;
-      } w;
-#define w w.w
-
-      for (r=0; r < MAXROUNDS+1; r++ )
-        {
-          *((u32_a_t*)ctx->keyschdec[r][0]) = 
*((u32_a_t*)ctx->keyschenc[r][0]);
-          *((u32_a_t*)ctx->keyschdec[r][1]) = 
*((u32_a_t*)ctx->keyschenc[r][1]);
-          *((u32_a_t*)ctx->keyschdec[r][2]) = 
*((u32_a_t*)ctx->keyschenc[r][2]);
-          *((u32_a_t*)ctx->keyschdec[r][3]) = 
*((u32_a_t*)ctx->keyschenc[r][3]);
-        }
-#define W (ctx->keyschdec)
-      for (r = 1; r < ctx->rounds; r++)
-        {
-          w = W[r][0];
-          *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
-            ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
-
-          w = W[r][1];
-          *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
-            ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
+  prefetch_enc();
+  prefetch_dec();
 
-          w = W[r][2];
-          *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
-        ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
+  ctx->keyschdec32[0][0] = ctx->keyschenc32[0][0];
+  ctx->keyschdec32[0][1] = ctx->keyschenc32[0][1];
+  ctx->keyschdec32[0][2] = ctx->keyschenc32[0][2];
+  ctx->keyschdec32[0][3] = ctx->keyschenc32[0][3];
 
-          w = W[r][3];
-          *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]])
-            ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]);
-        }
-#undef W
-#undef w
+  for (r = 1; r < ctx->rounds; r++)
+    {
+      u32 *wi = ctx->keyschenc32[r];
+      u32 *wo = ctx->keyschdec32[r];
+      u32 wt;
+
+      wt = wi[0];
+      wo[0] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+             ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+             ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+             ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
+
+      wt = wi[1];
+      wo[1] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+             ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+             ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+             ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
+
+      wt = wi[2];
+      wo[2] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+             ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+             ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+             ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
+
+      wt = wi[3];
+      wo[3] = rol(decT[sbox[(byte)(wt >> 0) * 4]], 8 * 0)
+             ^ rol(decT[sbox[(byte)(wt >> 8) * 4]], 8 * 1)
+             ^ rol(decT[sbox[(byte)(wt >> 16) * 4]], 8 * 2)
+             ^ rol(decT[sbox[(byte)(wt >> 24) * 4]], 8 * 3);
     }
+
+  ctx->keyschdec32[r][0] = ctx->keyschenc32[r][0];
+  ctx->keyschdec32[r][1] = ctx->keyschenc32[r][1];
+  ctx->keyschdec32[r][2] = ctx->keyschenc32[r][2];
+  ctx->keyschdec32[r][3] = ctx->keyschenc32[r][3];
 }
 
 
-/* Encrypt one block.  A and B need to be aligned on a 4 byte
-   boundary.  A and B may be the same. */
-static void
-do_encrypt_aligned (const RIJNDAEL_context *ctx,
-                    unsigned char *b, const unsigned char *a)
+#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM)
+/* Encrypt one block. A and B may be the same. */
+static unsigned int
+do_encrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b,
+               const unsigned char *a)
 {
-#define rk (ctx->keyschenc)
+#define rk (ctx->keyschenc32)
+  const byte *sbox = ((const byte *)encT) + 1;
   int rounds = ctx->rounds;
   int r;
-  union
-  {
-    u32  tempu32[4];  /* Force correct alignment. */
-    byte temp[4][4];
-  } u;
-
-  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a   )) ^ *((u32_a_t*)rk[0][0]);
-  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[0][1]);
-  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[0][2]);
-  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[0][3]);
-  *((u32_a_t*)(b    ))   = (*((u32_a_t*)T1[u.temp[0][0]])
-                        ^ *((u32_a_t*)T2[u.temp[1][1]])
-                        ^ *((u32_a_t*)T3[u.temp[2][2]])
-                        ^ *((u32_a_t*)T4[u.temp[3][3]]));
-  *((u32_a_t*)(b + 4))   = (*((u32_a_t*)T1[u.temp[1][0]])
-                        ^ *((u32_a_t*)T2[u.temp[2][1]])
-                        ^ *((u32_a_t*)T3[u.temp[3][2]])
-                        ^ *((u32_a_t*)T4[u.temp[0][3]]));
-  *((u32_a_t*)(b + 8))   = (*((u32_a_t*)T1[u.temp[2][0]])
-                        ^ *((u32_a_t*)T2[u.temp[3][1]])
-                        ^ *((u32_a_t*)T3[u.temp[0][2]])
-                        ^ *((u32_a_t*)T4[u.temp[1][3]]));
-  *((u32_a_t*)(b +12))   = (*((u32_a_t*)T1[u.temp[3][0]])
-                        ^ *((u32_a_t*)T2[u.temp[0][1]])
-                        ^ *((u32_a_t*)T3[u.temp[1][2]])
-                        ^ *((u32_a_t*)T4[u.temp[2][3]]));
-
-  for (r = 1; r < rounds-1; r++)
+  u32 sa[4];
+  u32 sb[4];
+
+  sb[0] = buf_get_le32(a + 0);
+  sb[1] = buf_get_le32(a + 4);
+  sb[2] = buf_get_le32(a + 8);
+  sb[3] = buf_get_le32(a + 12);
+
+  sa[0] = sb[0] ^ rk[0][0];
+  sa[1] = sb[1] ^ rk[0][1];
+  sa[2] = sb[2] ^ rk[0][2];
+  sa[3] = sb[3] ^ rk[0][3];
+
+  sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+  sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+  sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+  sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+  sa[0] = rk[1][0] ^ sb[0];
+
+  sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+  sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+  sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+  sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+  sa[1] = rk[1][1] ^ sb[1];
+
+  sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+  sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+  sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+  sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+  sa[2] = rk[1][2] ^ sb[2];
+
+  sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+  sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+  sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+  sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+  sa[3] = rk[1][3] ^ sb[3];
+
+  for (r = 2; r < rounds; r++)
     {
-      *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[r][0]);
-      *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
-      *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
-      *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
-
-      *((u32_a_t*)(b    ))   = (*((u32_a_t*)T1[u.temp[0][0]])
-                            ^ *((u32_a_t*)T2[u.temp[1][1]])
-                            ^ *((u32_a_t*)T3[u.temp[2][2]])
-                            ^ *((u32_a_t*)T4[u.temp[3][3]]));
-      *((u32_a_t*)(b + 4))   = (*((u32_a_t*)T1[u.temp[1][0]])
-                            ^ *((u32_a_t*)T2[u.temp[2][1]])
-                            ^ *((u32_a_t*)T3[u.temp[3][2]])
-                            ^ *((u32_a_t*)T4[u.temp[0][3]]));
-      *((u32_a_t*)(b + 8))   = (*((u32_a_t*)T1[u.temp[2][0]])
-                            ^ *((u32_a_t*)T2[u.temp[3][1]])
-                            ^ *((u32_a_t*)T3[u.temp[0][2]])
-                            ^ *((u32_a_t*)T4[u.temp[1][3]]));
-      *((u32_a_t*)(b +12))   = (*((u32_a_t*)T1[u.temp[3][0]])
-                            ^ *((u32_a_t*)T2[u.temp[0][1]])
-                            ^ *((u32_a_t*)T3[u.temp[1][2]])
-                            ^ *((u32_a_t*)T4[u.temp[2][3]]));
+      sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+      sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+      sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+      sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+      sa[0] = rk[r][0] ^ sb[0];
+
+      sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+      sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+      sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+      sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+      sa[1] = rk[r][1] ^ sb[1];
+
+      sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+      sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+      sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+      sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+      sa[2] = rk[r][2] ^ sb[2];
+
+      sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+      sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+      sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+      sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+      sa[3] = rk[r][3] ^ sb[3];
+
+      r++;
+
+      sb[0] = rol(encT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+      sb[3] = rol(encT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+      sb[2] = rol(encT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+      sb[1] = rol(encT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+      sa[0] = rk[r][0] ^ sb[0];
+
+      sb[1] ^= rol(encT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+      sa[0] ^= rol(encT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+      sb[3] ^= rol(encT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+      sb[2] ^= rol(encT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+      sa[1] = rk[r][1] ^ sb[1];
+
+      sb[2] ^= rol(encT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+      sa[1] ^= rol(encT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+      sa[0] ^= rol(encT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+      sb[3] ^= rol(encT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+      sa[2] = rk[r][2] ^ sb[2];
+
+      sb[3] ^= rol(encT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+      sa[2] ^= rol(encT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+      sa[1] ^= rol(encT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+      sa[0] ^= rol(encT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+      sa[3] = rk[r][3] ^ sb[3];
     }
 
   /* Last round is special. */
-  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[rounds-1][0]);
-  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[rounds-1][1]);
-  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[rounds-1][2]);
-  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[rounds-1][3]);
-  b[ 0] = T1[u.temp[0][0]][1];
-  b[ 1] = T1[u.temp[1][1]][1];
-  b[ 2] = T1[u.temp[2][2]][1];
-  b[ 3] = T1[u.temp[3][3]][1];
-  b[ 4] = T1[u.temp[1][0]][1];
-  b[ 5] = T1[u.temp[2][1]][1];
-  b[ 6] = T1[u.temp[3][2]][1];
-  b[ 7] = T1[u.temp[0][3]][1];
-  b[ 8] = T1[u.temp[2][0]][1];
-  b[ 9] = T1[u.temp[3][1]][1];
-  b[10] = T1[u.temp[0][2]][1];
-  b[11] = T1[u.temp[1][3]][1];
-  b[12] = T1[u.temp[3][0]][1];
-  b[13] = T1[u.temp[0][1]][1];
-  b[14] = T1[u.temp[1][2]][1];
-  b[15] = T1[u.temp[2][3]][1];
-  *((u32_a_t*)(b   )) ^= *((u32_a_t*)rk[rounds][0]);
-  *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[rounds][1]);
-  *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]);
-  *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]);
-#undef rk
-}
-
 
-static void
-do_encrypt (const RIJNDAEL_context *ctx,
-            unsigned char *bx, const unsigned char *ax)
-{
-  /* BX and AX are not necessary correctly aligned.  Thus we might
-     need to copy them here.  We try to align to a 16 bytes.  */
-  if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
-    {
-      union
-      {
-        u32  dummy[4];
-        byte a[16] ATTR_ALIGNED_16;
-      } a;
-      union
-      {
-        u32  dummy[4];
-        byte b[16] ATTR_ALIGNED_16;
-      } b;
+  sb[0] = ((u32)sbox[(byte)(sa[0] >> (0 * 8)) * 4]) << (0 * 8);
+  sb[3] = ((u32)sbox[(byte)(sa[0] >> (1 * 8)) * 4]) << (1 * 8);
+  sb[2] = ((u32)sbox[(byte)(sa[0] >> (2 * 8)) * 4]) << (2 * 8);
+  sb[1] = ((u32)sbox[(byte)(sa[0] >> (3 * 8)) * 4]) << (3 * 8);
+  sa[0] = rk[r][0] ^ sb[0];
+
+  sb[1] ^= ((u32)sbox[(byte)(sa[1] >> (0 * 8)) * 4]) << (0 * 8);
+  sa[0] ^= ((u32)sbox[(byte)(sa[1] >> (1 * 8)) * 4]) << (1 * 8);
+  sb[3] ^= ((u32)sbox[(byte)(sa[1] >> (2 * 8)) * 4]) << (2 * 8);
+  sb[2] ^= ((u32)sbox[(byte)(sa[1] >> (3 * 8)) * 4]) << (3 * 8);
+  sa[1] = rk[r][1] ^ sb[1];
+
+  sb[2] ^= ((u32)sbox[(byte)(sa[2] >> (0 * 8)) * 4]) << (0 * 8);
+  sa[1] ^= ((u32)sbox[(byte)(sa[2] >> (1 * 8)) * 4]) << (1 * 8);
+  sa[0] ^= ((u32)sbox[(byte)(sa[2] >> (2 * 8)) * 4]) << (2 * 8);
+  sb[3] ^= ((u32)sbox[(byte)(sa[2] >> (3 * 8)) * 4]) << (3 * 8);
+  sa[2] = rk[r][2] ^ sb[2];
+
+  sb[3] ^= ((u32)sbox[(byte)(sa[3] >> (0 * 8)) * 4]) << (0 * 8);
+  sa[2] ^= ((u32)sbox[(byte)(sa[3] >> (1 * 8)) * 4]) << (1 * 8);
+  sa[1] ^= ((u32)sbox[(byte)(sa[3] >> (2 * 8)) * 4]) << (2 * 8);
+  sa[0] ^= ((u32)sbox[(byte)(sa[3] >> (3 * 8)) * 4]) << (3 * 8);
+  sa[3] = rk[r][3] ^ sb[3];
+
+  buf_put_le32(b + 0, sa[0]);
+  buf_put_le32(b + 4, sa[1]);
+  buf_put_le32(b + 8, sa[2]);
+  buf_put_le32(b + 12, sa[3]);
+#undef rk
 
-      memcpy (a.a, ax, 16);
-      do_encrypt_aligned (ctx, b.b, a.a);
-      memcpy (bx, b.b, 16);
-    }
-  else
-    {
-      do_encrypt_aligned (ctx, bx, ax);
-    }
+  return (56 + 2*sizeof(int));
 }
+#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/
 
 
-/* Encrypt or decrypt one block using the padlock engine.  A and B may
-   be the same. */
-#ifdef USE_PADLOCK
-static void
-do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag,
+static unsigned int
+do_encrypt (const RIJNDAEL_context *ctx,
             unsigned char *bx, const unsigned char *ax)
 {
-  /* BX and AX are not necessary correctly aligned.  Thus we need to
-     copy them here. */
-  unsigned char a[16] __attribute__ ((aligned (16)));
-  unsigned char b[16] __attribute__ ((aligned (16)));
-  unsigned int cword[4] __attribute__ ((aligned (16)));
-
-  /* The control word fields are:
-      127:12   11:10 9     8     7     6     5     4     3:0
-      RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND  */
-  cword[0] = (ctx->rounds & 15);  /* (The mask is just a safeguard.)  */
-  cword[1] = 0;
-  cword[2] = 0;
-  cword[3] = 0;
-  if (decrypt_flag)
-    cword[0] |= 0x00000200;
-
-  memcpy (a, ax, 16);
-
-  asm volatile
-    ("pushfl\n\t"          /* Force key reload.  */
-     "popfl\n\t"
-     "xchg %3, %%ebx\n\t"  /* Load key.  */
-     "movl $1, %%ecx\n\t"  /* Init counter for just one block.  */
-     ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */
-     "xchg %3, %%ebx\n"    /* Restore GOT register.  */
-     : /* No output */
-     : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey)
-     : "%ecx", "cc", "memory"
-     );
-
-  memcpy (bx, b, 16);
-
-}
-#endif /*USE_PADLOCK*/
-
-
-#ifdef USE_AESNI
-/* Encrypt one block using the Intel AES-NI instructions.  A and B may
-   be the same; they need to be properly aligned to 16 bytes.
-
-   Our problem here is that gcc does not allow the "x" constraint for
-   SSE registers in asm unless you compile with -msse.  The common
-   wisdom is to use a separate file for SSE instructions and build it
-   separately.  This would require a lot of extra build system stuff,
-   similar to what we do in mpi/ for the asm stuff.  What we do
-   instead is to use standard registers and a bit more of plain asm
-   which copies the data and key stuff to the SSE registers and later
-   back.  If we decide to implement some block modes with parallelized
-   AES instructions, it might indeed be better to use plain asm ala
-   mpi/.  */
-static void
-do_aesni_enc_aligned (const RIJNDAEL_context *ctx,
-                      unsigned char *b, const unsigned char *a)
-{
-#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
-#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
-  /* Note: For now we relax the alignment requirement for A and B: It
-     does not make much difference because in many case we would need
-     to memcpy them to an extra buffer; using the movdqu is much faster
-     that memcpy and movdqa.  For CFB we know that the IV is properly
-     aligned but that is a special case.  We should better implement
-     CFB direct in asm.  */
-  asm volatile ("movdqu %[src], %%xmm0\n\t"     /* xmm0 := *a     */
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0] */
-                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
-                "cmp $10, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
-                "cmp $12, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
-
-                ".Lenclast%=:\n\t"
-                aesenclast_xmm1_xmm0
-                "movdqu %%xmm0, %[dst]\n"
-                : [dst] "=m" (*b)
-                : [src] "m" (*a),
-                  [key] "r" (ctx->keyschenc),
-                  [rounds] "r" (ctx->rounds)
-                : "%esi", "cc", "memory");
-#undef aesenc_xmm1_xmm0
-#undef aesenclast_xmm1_xmm0
-}
-
-
-static void
-do_aesni_dec_aligned (const RIJNDAEL_context *ctx,
-                      unsigned char *b, const unsigned char *a)
-{
-#define aesdec_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t"
-#define aesdeclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t"
-  asm volatile ("movdqu %[src], %%xmm0\n\t"     /* xmm0 := *a     */
-                "movl   %[key], %%esi\n\t"
-                "movdqa (%%esi), %%xmm1\n\t"
-                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
-                "cmp $10, %[rounds]\n\t"
-                "jz .Ldeclast%=\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
-                "cmp $12, %[rounds]\n\t"
-                "jz .Ldeclast%=\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
-                aesdec_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
-
-                ".Ldeclast%=:\n\t"
-                aesdeclast_xmm1_xmm0
-                "movdqu %%xmm0, %[dst]\n"
-                : [dst] "=m" (*b)
-                : [src] "m" (*a),
-                  [key] "r" (ctx->keyschdec),
-                  [rounds] "r" (ctx->rounds)
-                : "%esi", "cc", "memory");
-#undef aesdec_xmm1_xmm0
-#undef aesdeclast_xmm1_xmm0
-}
-
-
-/* Perform a CFB encryption or decryption round using the
-   initialization vector IV and the input block A.  Write the result
-   to the output block B and update IV.  IV needs to be 16 byte
-   aligned.  */
-static void
-do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag,
-              unsigned char *iv, unsigned char *b, const unsigned char *a)
-{
-#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
-#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
-  asm volatile ("movdqa %[iv], %%xmm0\n\t"      /* xmm0 := IV     */
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0] */
-                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0] */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
-                "cmp $10, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
-                "cmp $12, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
-
-                ".Lenclast%=:\n\t"
-                aesenclast_xmm1_xmm0
-                "movdqu %[src], %%xmm1\n\t"      /* Save input.  */
-                "pxor %%xmm1, %%xmm0\n\t"        /* xmm0 = input ^ IV  */
-
-                "cmp $1, %[decrypt]\n\t"
-                "jz .Ldecrypt_%=\n\t"
-                "movdqa %%xmm0, %[iv]\n\t"       /* [encrypt] Store IV.  */
-                "jmp .Lleave_%=\n"
-                ".Ldecrypt_%=:\n\t"
-                "movdqa %%xmm1, %[iv]\n"         /* [decrypt] Store IV.  */
-                ".Lleave_%=:\n\t"
-                "movdqu %%xmm0, %[dst]\n"        /* Store output.   */
-                : [iv] "+m" (*iv), [dst] "=m" (*b)
-                : [src] "m" (*a),
-                  [key] "g" (ctx->keyschenc),
-                  [rounds] "g" (ctx->rounds),
-                  [decrypt] "m" (decrypt_flag)
-                : "%esi", "cc", "memory");
-#undef aesenc_xmm1_xmm0
-#undef aesenclast_xmm1_xmm0
-}
-
-/* Perform a CTR encryption round using the counter CTR and the input
-   block A.  Write the result to the output block B and update CTR.
-   CTR needs to be a 16 byte aligned little-endian value.  */
-static void
-do_aesni_ctr (const RIJNDAEL_context *ctx,
-              unsigned char *ctr, unsigned char *b, const unsigned char *a)
-{
-#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
-#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
-  static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
-    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
-
-  asm volatile ("movdqa %[ctr], %%xmm0\n\t"     /* xmm0, xmm2 := CTR   */
-                "movaps %%xmm0, %%xmm2\n\t"
-                "mov    $1, %%esi\n\t"          /* xmm2++ (big-endian) */
-                "movd   %%esi, %%xmm1\n\t"
-                "pshufb %[mask], %%xmm2\n\t"
-                "paddq  %%xmm1, %%xmm2\n\t"
-                "pshufb %[mask], %%xmm2\n\t"
-                "movdqa %%xmm2, %[ctr]\n"       /* Update CTR.         */
-
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0]    */
-                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
-                "cmp $10, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
-                "cmp $12, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                "movdqa 0xe0(%%esi), %%xmm1\n"
-
-                ".Lenclast%=:\n\t"
-                aesenclast_xmm1_xmm0
-                "movdqu %[src], %%xmm1\n\t"      /* xmm1 := input   */
-                "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR ^= input  */
-                "movdqu %%xmm0, %[dst]"          /* Store EncCTR.    */
-
-                : [ctr] "+m" (*ctr), [dst] "=m" (*b)
-                : [src] "m" (*a),
-                  [key] "g" (ctx->keyschenc),
-                  [rounds] "g" (ctx->rounds),
-                  [mask] "m" (*be_mask)
-                : "%esi", "cc", "memory");
-#undef aesenc_xmm1_xmm0
-#undef aesenclast_xmm1_xmm0
-}
-
-
-/* Four blocks at a time variant of do_aesni_ctr.  */
-static void
-do_aesni_ctr_4 (const RIJNDAEL_context *ctx,
-                unsigned char *ctr, unsigned char *b, const unsigned char *a)
-{
-#define aesenc_xmm1_xmm0      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t"
-#define aesenc_xmm1_xmm2      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t"
-#define aesenc_xmm1_xmm3      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t"
-#define aesenc_xmm1_xmm4      ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t"
-#define aesenclast_xmm1_xmm0  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t"
-#define aesenclast_xmm1_xmm2  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t"
-#define aesenclast_xmm1_xmm3  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t"
-#define aesenclast_xmm1_xmm4  ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t"
-
-  static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
-    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
-
-  /* Register usage:
-      esi   keyschedule
-      xmm0  CTR-0
-      xmm1  temp / round key
-      xmm2  CTR-1
-      xmm3  CTR-2
-      xmm4  CTR-3
-      xmm5  temp
-   */
-
-  asm volatile ("movdqa %[ctr], %%xmm0\n\t"     /* xmm0, xmm2 := CTR   */
-                "movaps %%xmm0, %%xmm2\n\t"
-                "mov    $1, %%esi\n\t"          /* xmm1 := 1 */
-                "movd   %%esi, %%xmm1\n\t"
-                "pshufb %[mask], %%xmm2\n\t"    /* xmm2 := le(xmm2) */
-                "paddq  %%xmm1, %%xmm2\n\t"     /* xmm2++           */
-                "movaps %%xmm2, %%xmm3\n\t"     /* xmm3 := xmm2     */
-                "paddq  %%xmm1, %%xmm3\n\t"     /* xmm3++           */
-                "movaps %%xmm3, %%xmm4\n\t"     /* xmm4 := xmm3     */
-                "paddq  %%xmm1, %%xmm4\n\t"     /* xmm4++           */
-                "movaps %%xmm4, %%xmm5\n\t"     /* xmm5 := xmm4     */
-                "paddq  %%xmm1, %%xmm5\n\t"     /* xmm5++           */
-                "pshufb %[mask], %%xmm2\n\t"    /* xmm2 := be(xmm2) */
-                "pshufb %[mask], %%xmm3\n\t"    /* xmm3 := be(xmm3) */
-                "pshufb %[mask], %%xmm4\n\t"    /* xmm4 := be(xmm4) */
-                "pshufb %[mask], %%xmm5\n\t"    /* xmm5 := be(xmm5) */
-                "movdqa %%xmm5, %[ctr]\n"       /* Update CTR.      */
-
-                "movl   %[key], %%esi\n\t"      /* esi  := keyschenc */
-                "movdqa (%%esi), %%xmm1\n\t"    /* xmm1 := key[0]    */
-                "pxor   %%xmm1, %%xmm0\n\t"     /* xmm0 ^= key[0]    */
-                "pxor   %%xmm1, %%xmm2\n\t"     /* xmm2 ^= key[0]    */
-                "pxor   %%xmm1, %%xmm3\n\t"     /* xmm3 ^= key[0]    */
-                "pxor   %%xmm1, %%xmm4\n\t"     /* xmm4 ^= key[0]    */
-                "movdqa 0x10(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x20(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x30(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x40(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x50(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x60(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x70(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x80(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0x90(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0xa0(%%esi), %%xmm1\n\t"
-                "cmp $10, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0xb0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0xc0(%%esi), %%xmm1\n\t"
-                "cmp $12, %[rounds]\n\t"
-                "jz .Lenclast%=\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0xd0(%%esi), %%xmm1\n\t"
-                aesenc_xmm1_xmm0
-                aesenc_xmm1_xmm2
-                aesenc_xmm1_xmm3
-                aesenc_xmm1_xmm4
-                "movdqa 0xe0(%%esi), %%xmm1\n"
-
-                ".Lenclast%=:\n\t"
-                aesenclast_xmm1_xmm0
-                aesenclast_xmm1_xmm2
-                aesenclast_xmm1_xmm3
-                aesenclast_xmm1_xmm4
-
-                "movdqu %[src], %%xmm1\n\t"      /* Get block 1.      */
-                "pxor %%xmm1, %%xmm0\n\t"        /* EncCTR-1 ^= input */
-                "movdqu %%xmm0, %[dst]\n\t"      /* Store block 1     */
-
-                "movdqu (16)%[src], %%xmm1\n\t"  /* Get block 2.      */
-                "pxor %%xmm1, %%xmm2\n\t"        /* EncCTR-2 ^= input */
-                "movdqu %%xmm2, (16)%[dst]\n\t"  /* Store block 2.    */
-
-                "movdqu (32)%[src], %%xmm1\n\t"  /* Get block 3.      */
-                "pxor %%xmm1, %%xmm3\n\t"        /* EncCTR-3 ^= input */
-                "movdqu %%xmm3, (32)%[dst]\n\t"  /* Store block 3.    */
-
-                "movdqu (48)%[src], %%xmm1\n\t"  /* Get block 4.      */
-                "pxor %%xmm1, %%xmm4\n\t"        /* EncCTR-4 ^= input */
-                "movdqu %%xmm4, (48)%[dst]"      /* Store block 4.   */
-
-                : [ctr] "+m" (*ctr), [dst] "=m" (*b)
-                : [src] "m" (*a),
-                  [key] "g" (ctx->keyschenc),
-                  [rounds] "g" (ctx->rounds),
-                  [mask] "m" (*be_mask)
-                : "%esi", "cc", "memory");
-#undef aesenc_xmm1_xmm0
-#undef aesenc_xmm1_xmm2
-#undef aesenc_xmm1_xmm3
-#undef aesenc_xmm1_xmm4
-#undef aesenclast_xmm1_xmm0
-#undef aesenclast_xmm1_xmm2
-#undef aesenclast_xmm1_xmm3
-#undef aesenclast_xmm1_xmm4
-}
-
-
-static void
-do_aesni (RIJNDAEL_context *ctx, int decrypt_flag,
-          unsigned char *bx, const unsigned char *ax)
-{
-
-  if (decrypt_flag)
-    {
-      if (!ctx->decryption_prepared )
-        {
-          prepare_decryption ( ctx );
-          ctx->decryption_prepared = 1;
-        }
-      do_aesni_dec_aligned (ctx, bx, ax);
-    }
-  else
-    do_aesni_enc_aligned (ctx, bx, ax);
+#ifdef USE_AMD64_ASM
+  return _gcry_aes_amd64_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds,
+                                      enc_tables.T);
+#elif defined(USE_ARM_ASM)
+  return _gcry_aes_arm_encrypt_block(ctx->keyschenc, bx, ax, ctx->rounds,
+                                    enc_tables.T);
+#else
+  return do_encrypt_fn (ctx, bx, ax);
+#endif /* !USE_ARM_ASM && !USE_AMD64_ASM*/
 }
-#endif /*USE_AESNI*/
 
 
-static void
+static unsigned int
 rijndael_encrypt (void *context, byte *b, const byte *a)
 {
   RIJNDAEL_context *ctx = context;
 
-  if (0)
-    ;
-#ifdef USE_PADLOCK
-  else if (ctx->use_padlock)
-    {
-      do_padlock (ctx, 0, b, a);
-      _gcry_burn_stack (48 + 15 /* possible padding for alignment */);
-    }
-#endif /*USE_PADLOCK*/
-#ifdef USE_AESNI
-  else if (ctx->use_aesni)
-    {
-      aesni_prepare ();
-      do_aesni (ctx, 0, b, a);
-      aesni_cleanup ();
-    }
-#endif /*USE_AESNI*/
-  else
-    {
-      do_encrypt (ctx, b, a);
-      _gcry_burn_stack (56 + 2*sizeof(int));
-    }
+  if (ctx->prefetch_enc_fn)
+    ctx->prefetch_enc_fn();
+
+  return ctx->encrypt_fn (ctx, b, a);
 }
 
 
@@ -1166,59 +989,32 @@ rijndael_encrypt (void *context, byte *b, const byte *a)
    make sure that IV is aligned on an unsigned long boundary.  This
    function is only intended for the bulk encryption feature of
    cipher.c. */
-void
+static void
 _gcry_aes_cfb_enc (void *context, unsigned char *iv,
                    void *outbuf_arg, const void *inbuf_arg,
-                   unsigned int nblocks)
+                   size_t nblocks)
 {
   RIJNDAEL_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char *ivp;
-  int i;
+  unsigned int burn_depth = 0;
+  rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
-  if (0)
-    ;
-#ifdef USE_PADLOCK
-  else if (ctx->use_padlock)
-    {
-      /* Fixme: Let Padlock do the CFBing.  */
-      for ( ;nblocks; nblocks-- )
-        {
-          /* Encrypt the IV. */
-          do_padlock (ctx, 0, iv, iv);
-          /* XOR the input with the IV and store input into IV.  */
-          for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
-            *outbuf++ = (*ivp++ ^= *inbuf++);
-        }
-    }
-#endif /*USE_PADLOCK*/
-#ifdef USE_AESNI
-  else if (ctx->use_aesni)
-    {
-      aesni_prepare ();
-      for ( ;nblocks; nblocks-- )
-        {
-          do_aesni_cfb (ctx, 0, iv, outbuf, inbuf);
-          outbuf += BLOCKSIZE;
-          inbuf  += BLOCKSIZE;
-        }
-      aesni_cleanup ();
-    }
-#endif /*USE_AESNI*/
-  else
+  if (ctx->prefetch_enc_fn)
+    ctx->prefetch_enc_fn();
+
+  for ( ;nblocks; nblocks-- )
     {
-      for ( ;nblocks; nblocks-- )
-        {
-          /* Encrypt the IV. */
-          do_encrypt_aligned (ctx, iv, iv);
-          /* XOR the input with the IV and store input into IV.  */
-          for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
-            *outbuf++ = (*ivp++ ^= *inbuf++);
-        }
+      /* Encrypt the IV. */
+      burn_depth = encrypt_fn (ctx, iv, iv);
+      /* XOR the input with the IV and store input into IV.  */
+      cipher_block_xor_2dst(outbuf, iv, inbuf, BLOCKSIZE);
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
     }
 
-  _gcry_burn_stack (48 + 2*sizeof(int));
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
 }
 
 
@@ -1226,52 +1022,40 @@ _gcry_aes_cfb_enc (void *context, unsigned char *iv,
    make sure that IV is aligned on an unsigned long boundary.  This
    function is only intended for the bulk encryption feature of
    cipher.c. */
-void
+static void
 _gcry_aes_cbc_enc (void *context, unsigned char *iv,
                    void *outbuf_arg, const void *inbuf_arg,
-                   unsigned int nblocks, int cbc_mac)
+                   size_t nblocks, int cbc_mac)
 {
   RIJNDAEL_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char *ivp;
-  int i;
+  unsigned char *last_iv;
+  unsigned int burn_depth = 0;
+  rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
-#ifdef USE_AESNI
-  if (ctx->use_aesni)
-    aesni_prepare ();
-#endif /*USE_AESNI*/
+  if (ctx->prefetch_enc_fn)
+    ctx->prefetch_enc_fn();
+
+  last_iv = iv;
 
   for ( ;nblocks; nblocks-- )
     {
-      for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
-        outbuf[i] = inbuf[i] ^ *ivp++;
+      cipher_block_xor(outbuf, inbuf, last_iv, BLOCKSIZE);
 
-      if (0)
-        ;
-#ifdef USE_PADLOCK
-      else if (ctx->use_padlock)
-        do_padlock (ctx, 0, outbuf, outbuf);
-#endif /*USE_PADLOCK*/
-#ifdef USE_AESNI
-      else if (ctx->use_aesni)
-        do_aesni (ctx, 0, outbuf, outbuf);
-#endif /*USE_AESNI*/
-      else
-        do_encrypt (ctx, outbuf, outbuf );
+      burn_depth = encrypt_fn (ctx, outbuf, outbuf);
 
-      memcpy (iv, outbuf, BLOCKSIZE);
+      last_iv = outbuf;
       inbuf += BLOCKSIZE;
       if (!cbc_mac)
-        outbuf += BLOCKSIZE;
+       outbuf += BLOCKSIZE;
     }
 
-#ifdef USE_AESNI
-  if (ctx->use_aesni)
-    aesni_cleanup ();
-#endif /*USE_AESNI*/
+  if (last_iv != iv)
+    cipher_block_cpy (iv, last_iv, BLOCKSIZE);
 
-  _gcry_burn_stack (48 + 2*sizeof(int));
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
 }
 
 
@@ -1280,355 +1064,480 @@ _gcry_aes_cbc_enc (void *context, unsigned char *iv,
    minimum alignment is for an u32.  This function is only intended
    for the bulk encryption feature of cipher.c.  CTR is expected to be
    of size BLOCKSIZE. */
-void
+static void
 _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
                    void *outbuf_arg, const void *inbuf_arg,
-                   unsigned int nblocks)
+                   size_t nblocks)
 {
   RIJNDAEL_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char *p;
-  int i;
+  unsigned int burn_depth = 0;
+  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } tmp;
+  rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
-  if (0)
-    ;
-#ifdef USE_AESNI
-  else if (ctx->use_aesni)
+  if (ctx->prefetch_enc_fn)
+    ctx->prefetch_enc_fn();
+
+  for ( ;nblocks; nblocks-- )
     {
-      aesni_prepare ();
-      for ( ;nblocks > 3 ; nblocks -= 4 )
-        {
-          do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf);
-          outbuf += 4*BLOCKSIZE;
-          inbuf  += 4*BLOCKSIZE;
-        }
-      for ( ;nblocks; nblocks-- )
-        {
-          do_aesni_ctr (ctx, ctr, outbuf, inbuf);
-          outbuf += BLOCKSIZE;
-          inbuf  += BLOCKSIZE;
-        }
-      aesni_cleanup ();
-      aesni_cleanup_2_4 ();
+      /* Encrypt the counter. */
+      burn_depth = encrypt_fn (ctx, tmp.x1, ctr);
+      /* XOR the input with the encrypted counter and store in output.  */
+      cipher_block_xor(outbuf, tmp.x1, inbuf, BLOCKSIZE);
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
+      /* Increment the counter.  */
+      cipher_block_add(ctr, 1, BLOCKSIZE);
     }
-#endif /*USE_AESNI*/
-  else
-    {
-      union { unsigned char x1[16]; u32 x32[4]; } tmp;
 
-      for ( ;nblocks; nblocks-- )
-        {
-          /* Encrypt the counter. */
-          do_encrypt_aligned (ctx, tmp.x1, ctr);
-          /* XOR the input with the encrypted counter and store in output.  */
-          for (p=tmp.x1, i=0; i < BLOCKSIZE; i++)
-            *outbuf++ = (*p++ ^= *inbuf++);
-          /* Increment the counter.  */
-          for (i = BLOCKSIZE; i > 0; i--)
-            {
-              ctr[i-1]++;
-              if (ctr[i-1])
-                break;
-            }
-        }
-    }
+  wipememory(&tmp, sizeof(tmp));
 
-  _gcry_burn_stack (48 + 2*sizeof(int));
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
 }
 
 
 
-/* Decrypt one block.  A and B need to be aligned on a 4 byte boundary
-   and the decryption must have been prepared.  A and B may be the
-   same. */
-static void
-do_decrypt_aligned (RIJNDAEL_context *ctx,
-                    unsigned char *b, const unsigned char *a)
+#if !defined(USE_ARM_ASM) && !defined(USE_AMD64_ASM)
+/* Decrypt one block.  A and B may be the same. */
+static unsigned int
+do_decrypt_fn (const RIJNDAEL_context *ctx, unsigned char *b,
+               const unsigned char *a)
 {
-#define rk  (ctx->keyschdec)
+#define rk (ctx->keyschdec32)
   int rounds = ctx->rounds;
   int r;
-  union
-  {
-    u32  tempu32[4];  /* Force correct alignment. */
-    byte temp[4][4];
-  } u;
-
-
-  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a   )) ^ *((u32_a_t*)rk[rounds][0]);
-  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[rounds][1]);
-  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[rounds][2]);
-  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[rounds][3]);
-
-  *((u32_a_t*)(b   ))    = (*((u32_a_t*)T5[u.temp[0][0]])
-                        ^ *((u32_a_t*)T6[u.temp[3][1]])
-                        ^ *((u32_a_t*)T7[u.temp[2][2]])
-                        ^ *((u32_a_t*)T8[u.temp[1][3]]));
-  *((u32_a_t*)(b+ 4))    = (*((u32_a_t*)T5[u.temp[1][0]])
-                        ^ *((u32_a_t*)T6[u.temp[0][1]])
-                        ^ *((u32_a_t*)T7[u.temp[3][2]])
-                        ^ *((u32_a_t*)T8[u.temp[2][3]]));
-  *((u32_a_t*)(b+ 8))    = (*((u32_a_t*)T5[u.temp[2][0]])
-                        ^ *((u32_a_t*)T6[u.temp[1][1]])
-                        ^ *((u32_a_t*)T7[u.temp[0][2]])
-                        ^ *((u32_a_t*)T8[u.temp[3][3]]));
-  *((u32_a_t*)(b+12))    = (*((u32_a_t*)T5[u.temp[3][0]])
-                        ^ *((u32_a_t*)T6[u.temp[2][1]])
-                        ^ *((u32_a_t*)T7[u.temp[1][2]])
-                        ^ *((u32_a_t*)T8[u.temp[0][3]]));
-
-  for (r = rounds-1; r > 1; r--)
+  u32 sa[4];
+  u32 sb[4];
+
+  sb[0] = buf_get_le32(a + 0);
+  sb[1] = buf_get_le32(a + 4);
+  sb[2] = buf_get_le32(a + 8);
+  sb[3] = buf_get_le32(a + 12);
+
+  sa[0] = sb[0] ^ rk[rounds][0];
+  sa[1] = sb[1] ^ rk[rounds][1];
+  sa[2] = sb[2] ^ rk[rounds][2];
+  sa[3] = sb[3] ^ rk[rounds][3];
+
+  for (r = rounds - 1; r > 1; r--)
     {
-      *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[r][0]);
-      *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]);
-      *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]);
-      *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]);
-      *((u32_a_t*)(b   ))    = (*((u32_a_t*)T5[u.temp[0][0]])
-                            ^ *((u32_a_t*)T6[u.temp[3][1]])
-                            ^ *((u32_a_t*)T7[u.temp[2][2]])
-                            ^ *((u32_a_t*)T8[u.temp[1][3]]));
-      *((u32_a_t*)(b+ 4))    = (*((u32_a_t*)T5[u.temp[1][0]])
-                            ^ *((u32_a_t*)T6[u.temp[0][1]])
-                            ^ *((u32_a_t*)T7[u.temp[3][2]])
-                            ^ *((u32_a_t*)T8[u.temp[2][3]]));
-      *((u32_a_t*)(b+ 8))    = (*((u32_a_t*)T5[u.temp[2][0]])
-                            ^ *((u32_a_t*)T6[u.temp[1][1]])
-                            ^ *((u32_a_t*)T7[u.temp[0][2]])
-                            ^ *((u32_a_t*)T8[u.temp[3][3]]));
-      *((u32_a_t*)(b+12))    = (*((u32_a_t*)T5[u.temp[3][0]])
-                            ^ *((u32_a_t*)T6[u.temp[2][1]])
-                            ^ *((u32_a_t*)T7[u.temp[1][2]])
-                            ^ *((u32_a_t*)T8[u.temp[0][3]]));
+      sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+      sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+      sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+      sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+      sa[0] = rk[r][0] ^ sb[0];
+
+      sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+      sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+      sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+      sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+      sa[1] = rk[r][1] ^ sb[1];
+
+      sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+      sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+      sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+      sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+      sa[2] = rk[r][2] ^ sb[2];
+
+      sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+      sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+      sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+      sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+      sa[3] = rk[r][3] ^ sb[3];
+
+      r--;
+
+      sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+      sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+      sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+      sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+      sa[0] = rk[r][0] ^ sb[0];
+
+      sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+      sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+      sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+      sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+      sa[1] = rk[r][1] ^ sb[1];
+
+      sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+      sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+      sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+      sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+      sa[2] = rk[r][2] ^ sb[2];
+
+      sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+      sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+      sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+      sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+      sa[3] = rk[r][3] ^ sb[3];
     }
 
+  sb[0] = rol(decT[(byte)(sa[0] >> (0 * 8))], (0 * 8));
+  sb[1] = rol(decT[(byte)(sa[0] >> (1 * 8))], (1 * 8));
+  sb[2] = rol(decT[(byte)(sa[0] >> (2 * 8))], (2 * 8));
+  sb[3] = rol(decT[(byte)(sa[0] >> (3 * 8))], (3 * 8));
+  sa[0] = rk[1][0] ^ sb[0];
+
+  sb[1] ^= rol(decT[(byte)(sa[1] >> (0 * 8))], (0 * 8));
+  sb[2] ^= rol(decT[(byte)(sa[1] >> (1 * 8))], (1 * 8));
+  sb[3] ^= rol(decT[(byte)(sa[1] >> (2 * 8))], (2 * 8));
+  sa[0] ^= rol(decT[(byte)(sa[1] >> (3 * 8))], (3 * 8));
+  sa[1] = rk[1][1] ^ sb[1];
+
+  sb[2] ^= rol(decT[(byte)(sa[2] >> (0 * 8))], (0 * 8));
+  sb[3] ^= rol(decT[(byte)(sa[2] >> (1 * 8))], (1 * 8));
+  sa[0] ^= rol(decT[(byte)(sa[2] >> (2 * 8))], (2 * 8));
+  sa[1] ^= rol(decT[(byte)(sa[2] >> (3 * 8))], (3 * 8));
+  sa[2] = rk[1][2] ^ sb[2];
+
+  sb[3] ^= rol(decT[(byte)(sa[3] >> (0 * 8))], (0 * 8));
+  sa[0] ^= rol(decT[(byte)(sa[3] >> (1 * 8))], (1 * 8));
+  sa[1] ^= rol(decT[(byte)(sa[3] >> (2 * 8))], (2 * 8));
+  sa[2] ^= rol(decT[(byte)(sa[3] >> (3 * 8))], (3 * 8));
+  sa[3] = rk[1][3] ^ sb[3];
+
   /* Last round is special. */
-  *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b   )) ^ *((u32_a_t*)rk[1][0]);
-  *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[1][1]);
-  *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[1][2]);
-  *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[1][3]);
-  b[ 0] = S5[u.temp[0][0]];
-  b[ 1] = S5[u.temp[3][1]];
-  b[ 2] = S5[u.temp[2][2]];
-  b[ 3] = S5[u.temp[1][3]];
-  b[ 4] = S5[u.temp[1][0]];
-  b[ 5] = S5[u.temp[0][1]];
-  b[ 6] = S5[u.temp[3][2]];
-  b[ 7] = S5[u.temp[2][3]];
-  b[ 8] = S5[u.temp[2][0]];
-  b[ 9] = S5[u.temp[1][1]];
-  b[10] = S5[u.temp[0][2]];
-  b[11] = S5[u.temp[3][3]];
-  b[12] = S5[u.temp[3][0]];
-  b[13] = S5[u.temp[2][1]];
-  b[14] = S5[u.temp[1][2]];
-  b[15] = S5[u.temp[0][3]];
-  *((u32_a_t*)(b   )) ^= *((u32_a_t*)rk[0][0]);
-  *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[0][1]);
-  *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]);
-  *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]);
+  sb[0] = (u32)inv_sbox[(byte)(sa[0] >> (0 * 8))] << (0 * 8);
+  sb[1] = (u32)inv_sbox[(byte)(sa[0] >> (1 * 8))] << (1 * 8);
+  sb[2] = (u32)inv_sbox[(byte)(sa[0] >> (2 * 8))] << (2 * 8);
+  sb[3] = (u32)inv_sbox[(byte)(sa[0] >> (3 * 8))] << (3 * 8);
+  sa[0] = sb[0] ^ rk[0][0];
+
+  sb[1] ^= (u32)inv_sbox[(byte)(sa[1] >> (0 * 8))] << (0 * 8);
+  sb[2] ^= (u32)inv_sbox[(byte)(sa[1] >> (1 * 8))] << (1 * 8);
+  sb[3] ^= (u32)inv_sbox[(byte)(sa[1] >> (2 * 8))] << (2 * 8);
+  sa[0] ^= (u32)inv_sbox[(byte)(sa[1] >> (3 * 8))] << (3 * 8);
+  sa[1] = sb[1] ^ rk[0][1];
+
+  sb[2] ^= (u32)inv_sbox[(byte)(sa[2] >> (0 * 8))] << (0 * 8);
+  sb[3] ^= (u32)inv_sbox[(byte)(sa[2] >> (1 * 8))] << (1 * 8);
+  sa[0] ^= (u32)inv_sbox[(byte)(sa[2] >> (2 * 8))] << (2 * 8);
+  sa[1] ^= (u32)inv_sbox[(byte)(sa[2] >> (3 * 8))] << (3 * 8);
+  sa[2] = sb[2] ^ rk[0][2];
+
+  sb[3] ^= (u32)inv_sbox[(byte)(sa[3] >> (0 * 8))] << (0 * 8);
+  sa[0] ^= (u32)inv_sbox[(byte)(sa[3] >> (1 * 8))] << (1 * 8);
+  sa[1] ^= (u32)inv_sbox[(byte)(sa[3] >> (2 * 8))] << (2 * 8);
+  sa[2] ^= (u32)inv_sbox[(byte)(sa[3] >> (3 * 8))] << (3 * 8);
+  sa[3] = sb[3] ^ rk[0][3];
+
+  buf_put_le32(b + 0, sa[0]);
+  buf_put_le32(b + 4, sa[1]);
+  buf_put_le32(b + 8, sa[2]);
+  buf_put_le32(b + 12, sa[3]);
 #undef rk
+
+  return (56+2*sizeof(int));
 }
+#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/
 
 
 /* Decrypt one block.  AX and BX may be the same. */
-static void
-do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax)
+static unsigned int
+do_decrypt (const RIJNDAEL_context *ctx, unsigned char *bx,
+            const unsigned char *ax)
+{
+#ifdef USE_AMD64_ASM
+  return _gcry_aes_amd64_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
+                                      dec_tables.T);
+#elif defined(USE_ARM_ASM)
+  return _gcry_aes_arm_decrypt_block(ctx->keyschdec, bx, ax, ctx->rounds,
+                                    dec_tables.T);
+#else
+  return do_decrypt_fn (ctx, bx, ax);
+#endif /*!USE_ARM_ASM && !USE_AMD64_ASM*/
+}
+
+
+static inline void
+check_decryption_preparation (RIJNDAEL_context *ctx)
 {
   if ( !ctx->decryption_prepared )
     {
-      prepare_decryption ( ctx );
-      _gcry_burn_stack (64);
+      ctx->prepare_decryption ( ctx );
       ctx->decryption_prepared = 1;
     }
+}
 
-  /* BX and AX are not necessary correctly aligned.  Thus we might
-     need to copy them here.  We try to align to a 16 bytes. */
-  if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f))
-    {
-      union
-      {
-        u32  dummy[4];
-        byte a[16] ATTR_ALIGNED_16;
-      } a;
-      union
-      {
-        u32  dummy[4];
-        byte b[16] ATTR_ALIGNED_16;
-      } b;
 
-      memcpy (a.a, ax, 16);
-      do_decrypt_aligned (ctx, b.b, a.a);
-      memcpy (bx, b.b, 16);
-    }
-  else
-    {
-      do_decrypt_aligned (ctx, bx, ax);
-    }
-}
+static unsigned int
+rijndael_decrypt (void *context, byte *b, const byte *a)
+{
+  RIJNDAEL_context *ctx = context;
+
+  check_decryption_preparation (ctx);
 
+  if (ctx->prefetch_dec_fn)
+    ctx->prefetch_dec_fn();
 
+  return ctx->decrypt_fn (ctx, b, a);
+}
 
 
+/* Bulk decryption of complete blocks in CFB mode.  Caller needs to
+   make sure that IV is aligned on an unsigned long boundary.  This
+   function is only intended for the bulk encryption feature of
+   cipher.c. */
 static void
-rijndael_decrypt (void *context, byte *b, const byte *a)
+_gcry_aes_cfb_dec (void *context, unsigned char *iv,
+                   void *outbuf_arg, const void *inbuf_arg,
+                   size_t nblocks)
 {
   RIJNDAEL_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int burn_depth = 0;
+  rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
 
-  if (0)
-    ;
-#ifdef USE_PADLOCK
-  else if (ctx->use_padlock)
-    {
-      do_padlock (ctx, 1, b, a);
-      _gcry_burn_stack (48 + 2*sizeof(int) /* FIXME */);
-    }
-#endif /*USE_PADLOCK*/
-#ifdef USE_AESNI
-  else if (ctx->use_aesni)
-    {
-      aesni_prepare ();
-      do_aesni (ctx, 1, b, a);
-      aesni_cleanup ();
-    }
-#endif /*USE_AESNI*/
-  else
+  if (ctx->prefetch_enc_fn)
+    ctx->prefetch_enc_fn();
+
+  for ( ;nblocks; nblocks-- )
     {
-      do_decrypt (ctx, b, a);
-      _gcry_burn_stack (56+2*sizeof(int));
+      burn_depth = encrypt_fn (ctx, iv, iv);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOCKSIZE);
+      outbuf += BLOCKSIZE;
+      inbuf  += BLOCKSIZE;
     }
+
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
 }
 
 
-/* Bulk decryption of complete blocks in CFB mode.  Caller needs to
-   make sure that IV is aligned on an unisgned lonhg boundary.  This
+/* Bulk decryption of complete blocks in CBC mode.  Caller needs to
+   make sure that IV is aligned on an unsigned long boundary.  This
    function is only intended for the bulk encryption feature of
    cipher.c. */
-void
-_gcry_aes_cfb_dec (void *context, unsigned char *iv,
+static void
+_gcry_aes_cbc_dec (void *context, unsigned char *iv,
                    void *outbuf_arg, const void *inbuf_arg,
-                   unsigned int nblocks)
+                   size_t nblocks)
 {
   RIJNDAEL_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char *ivp;
-  unsigned char temp;
-  int i;
+  unsigned int burn_depth = 0;
+  unsigned char savebuf[BLOCKSIZE] ATTR_ALIGNED_16;
+  rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
 
-  if (0)
-    ;
-#ifdef USE_PADLOCK
-  else if (ctx->use_padlock)
+  check_decryption_preparation (ctx);
+
+  if (ctx->prefetch_dec_fn)
+    ctx->prefetch_dec_fn();
+
+  for ( ;nblocks; nblocks-- )
     {
-      /* Fixme:  Let Padlock do the CFBing.  */
-      for ( ;nblocks; nblocks-- )
-        {
-          do_padlock (ctx, 0, iv, iv);
-          for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
-            {
-              temp = *inbuf++;
-              *outbuf++ = *ivp ^ temp;
-              *ivp++ = temp;
-            }
-        }
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+
+      burn_depth = decrypt_fn (ctx, savebuf, inbuf);
+
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOCKSIZE);
+      inbuf += BLOCKSIZE;
+      outbuf += BLOCKSIZE;
     }
-#endif /*USE_PADLOCK*/
-#ifdef USE_AESNI
-  else if (ctx->use_aesni)
+
+  wipememory(savebuf, sizeof(savebuf));
+
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+}
+
+
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_aes_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                     const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int burn_depth = 0;
+
+  if (encrypt)
     {
-      aesni_prepare ();
+      union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+      rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+      if (ctx->prefetch_enc_fn)
+        ctx->prefetch_enc_fn();
+
       for ( ;nblocks; nblocks-- )
         {
-          do_aesni_cfb (ctx, 1, iv, outbuf, inbuf);
+          u64 i = ++c->u_mode.ocb.data_nblocks;
+          const unsigned char *l = ocb_get_l(c, i);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          /* Checksum_i = Checksum_{i-1} xor P_i  */
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+          inbuf += BLOCKSIZE;
           outbuf += BLOCKSIZE;
-          inbuf  += BLOCKSIZE;
         }
-      aesni_cleanup ();
     }
-#endif /*USE_AESNI*/
   else
     {
+      union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+      rijndael_cryptfn_t decrypt_fn = ctx->decrypt_fn;
+
+      check_decryption_preparation (ctx);
+
+      if (ctx->prefetch_dec_fn)
+        ctx->prefetch_dec_fn();
+
       for ( ;nblocks; nblocks-- )
         {
-          do_encrypt_aligned (ctx, iv, iv);
-          for (ivp=iv,i=0; i < BLOCKSIZE; i++ )
-            {
-              temp = *inbuf++;
-              *outbuf++ = *ivp ^ temp;
-              *ivp++ = temp;
-            }
+          u64 i = ++c->u_mode.ocb.data_nblocks;
+          const unsigned char *l = ocb_get_l(c, i);
+
+          /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+          cipher_block_xor_1 (c->u_iv.iv, l, BLOCKSIZE);
+          cipher_block_cpy (l_tmp.x1, inbuf, BLOCKSIZE);
+          /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          burn_depth = decrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+          cipher_block_xor_1 (l_tmp.x1, c->u_iv.iv, BLOCKSIZE);
+          /* Checksum_i = Checksum_{i-1} xor P_i  */
+          cipher_block_xor_1 (c->u_ctr.ctr, l_tmp.x1, BLOCKSIZE);
+          cipher_block_cpy (outbuf, l_tmp.x1, BLOCKSIZE);
+
+          inbuf += BLOCKSIZE;
+          outbuf += BLOCKSIZE;
         }
     }
 
-  _gcry_burn_stack (48 + 2*sizeof(int));
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+
+  return 0;
 }
 
 
-/* Bulk decryption of complete blocks in CBC mode.  Caller needs to
-   make sure that IV is aligned on an unsigned long boundary.  This
-   function is only intended for the bulk encryption feature of
-   cipher.c. */
-void
-_gcry_aes_cbc_dec (void *context, unsigned char *iv,
-                   void *outbuf_arg, const void *inbuf_arg,
-                   unsigned int nblocks)
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_aes_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
+{
+  RIJNDAEL_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  unsigned int burn_depth = 0;
+  union { unsigned char x1[16] ATTR_ALIGNED_16; u32 x32[4]; } l_tmp;
+  rijndael_cryptfn_t encrypt_fn = ctx->encrypt_fn;
+
+  if (ctx->prefetch_enc_fn)
+    ctx->prefetch_enc_fn();
+
+  for ( ;nblocks; nblocks-- )
+    {
+      u64 i = ++c->u_mode.ocb.aad_nblocks;
+      const unsigned char *l = ocb_get_l(c, i);
+
+      /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+      cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l, BLOCKSIZE);
+      /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+      cipher_block_xor (l_tmp.x1, c->u_mode.ocb.aad_offset, abuf,
+                       BLOCKSIZE);
+      burn_depth = encrypt_fn (ctx, l_tmp.x1, l_tmp.x1);
+      cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp.x1, BLOCKSIZE);
+
+      abuf += BLOCKSIZE;
+    }
+
+  wipememory(&l_tmp, sizeof(l_tmp));
+
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 4 * sizeof(void *));
+
+  return 0;
+}
+
+
+/* Bulk encryption/decryption of complete blocks in XTS mode. */
+static void
+_gcry_aes_xts_crypt (void *context, unsigned char *tweak,
+                    void *outbuf_arg, const void *inbuf_arg,
+                    size_t nblocks, int encrypt)
 {
   RIJNDAEL_context *ctx = context;
   unsigned char *outbuf = outbuf_arg;
   const unsigned char *inbuf = inbuf_arg;
-  unsigned char *ivp;
-  int i;
-  unsigned char savebuf[BLOCKSIZE];
+  unsigned int burn_depth = 0;
+  rijndael_cryptfn_t crypt_fn;
+  u64 tweak_lo, tweak_hi, tweak_next_lo, tweak_next_hi, tmp_lo, tmp_hi, carry;
 
-#ifdef USE_AESNI
-  if (ctx->use_aesni)
-    aesni_prepare ();
-#endif /*USE_AESNI*/
+  if (encrypt)
+    {
+      if (ctx->prefetch_enc_fn)
+       ctx->prefetch_enc_fn();
 
-  for ( ;nblocks; nblocks-- )
+      crypt_fn = ctx->encrypt_fn;
+    }
+  else
     {
-      /* We need to save INBUF away because it may be identical to
-         OUTBUF.  */
-      memcpy (savebuf, inbuf, BLOCKSIZE);
+      check_decryption_preparation (ctx);
 
-      if (0)
-        ;
-#ifdef USE_PADLOCK
-      else if (ctx->use_padlock)
-        do_padlock (ctx, 1, outbuf, inbuf);
-#endif /*USE_PADLOCK*/
-#ifdef USE_AESNI
-      else if (ctx->use_aesni)
-        do_aesni (ctx, 1, outbuf, inbuf);
-#endif /*USE_AESNI*/
-      else
-        do_decrypt (ctx, outbuf, inbuf);
-
-      for (ivp=iv, i=0; i < BLOCKSIZE; i++ )
-        outbuf[i] ^= *ivp++;
-      memcpy (iv, savebuf, BLOCKSIZE);
-      inbuf += BLOCKSIZE;
-      outbuf += BLOCKSIZE;
+      if (ctx->prefetch_dec_fn)
+       ctx->prefetch_dec_fn();
+
+      crypt_fn = ctx->decrypt_fn;
     }
 
-#ifdef USE_AESNI
-  if (ctx->use_aesni)
-    aesni_cleanup ();
-#endif /*USE_AESNI*/
+  tweak_next_lo = buf_get_le64 (tweak + 0);
+  tweak_next_hi = buf_get_le64 (tweak + 8);
 
-  _gcry_burn_stack (48 + 2*sizeof(int) + BLOCKSIZE + 4*sizeof (char*));
-}
+  while (nblocks)
+    {
+      tweak_lo = tweak_next_lo;
+      tweak_hi = tweak_next_hi;
+
+      /* Xor-Encrypt/Decrypt-Xor block. */
+      tmp_lo = buf_get_le64 (inbuf + 0) ^ tweak_lo;
+      tmp_hi = buf_get_le64 (inbuf + 8) ^ tweak_hi;
+
+      buf_put_le64 (outbuf + 0, tmp_lo);
+      buf_put_le64 (outbuf + 8, tmp_hi);
+
+      /* Generate next tweak. */
+      carry = -(tweak_next_hi >> 63) & 0x87;
+      tweak_next_hi = (tweak_next_hi << 1) + (tweak_next_lo >> 63);
+      tweak_next_lo = (tweak_next_lo << 1) ^ carry;
 
+      burn_depth = crypt_fn (ctx, outbuf, outbuf);
 
+      buf_put_le64 (outbuf + 0, buf_get_le64 (outbuf + 0) ^ tweak_lo);
+      buf_put_le64 (outbuf + 8, buf_get_le64 (outbuf + 8) ^ tweak_hi);
+
+      outbuf += GCRY_XTS_BLOCK_LEN;
+      inbuf += GCRY_XTS_BLOCK_LEN;
+      nblocks--;
+    }
+
+  buf_put_le64 (tweak + 0, tweak_next_lo);
+  buf_put_le64 (tweak + 8, tweak_next_hi);
+
+  if (burn_depth)
+    _gcry_burn_stack (burn_depth + 5 * sizeof(void *));
+}
 
 
 /* Run the self-tests for AES 128.  Returns NULL on success. */
 static const char*
 selftest_basic_128 (void)
 {
-  RIJNDAEL_context ctx;
+  RIJNDAEL_context *ctx;
+  unsigned char *ctxmem;
   unsigned char scratch[16];
+  cipher_bulk_ops_t bulk_ops;
 
   /* The test vectors are from the AES supplied ones; more or less
      randomly taken from ecb_tbl.txt (I=42,81,14) */
@@ -1670,11 +1579,21 @@ selftest_basic_128 (void)
     };
 #endif
 
-  rijndael_setkey (&ctx, key_128, sizeof (key_128));
-  rijndael_encrypt (&ctx, scratch, plaintext_128);
+  /* Because gcc/ld can only align the CTX struct on 8 bytes on the
+     stack, we need to allocate that context on the heap.  */
+  ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+  if (!ctx)
+    return "failed to allocate memory";
+
+  rijndael_setkey (ctx, key_128, sizeof (key_128), &bulk_ops);
+  rijndael_encrypt (ctx, scratch, plaintext_128);
   if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128)))
-     return "AES-128 test encryption failed.";
-  rijndael_decrypt (&ctx, scratch, scratch);
+    {
+      xfree (ctxmem);
+      return "AES-128 test encryption failed.";
+    }
+  rijndael_decrypt (ctx, scratch, scratch);
+  xfree (ctxmem);
   if (memcmp (scratch, plaintext_128, sizeof (plaintext_128)))
     return "AES-128 test decryption failed.";
 
@@ -1685,8 +1604,10 @@ selftest_basic_128 (void)
 static const char*
 selftest_basic_192 (void)
 {
-  RIJNDAEL_context ctx;
+  RIJNDAEL_context *ctx;
+  unsigned char *ctxmem;
   unsigned char scratch[16];
+  cipher_bulk_ops_t bulk_ops;
 
   static unsigned char plaintext_192[16] =
     {
@@ -1705,11 +1626,18 @@ selftest_basic_192 (void)
       0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA
     };
 
-  rijndael_setkey (&ctx, key_192, sizeof(key_192));
-  rijndael_encrypt (&ctx, scratch, plaintext_192);
+  ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+  if (!ctx)
+    return "failed to allocate memory";
+  rijndael_setkey (ctx, key_192, sizeof(key_192), &bulk_ops);
+  rijndael_encrypt (ctx, scratch, plaintext_192);
   if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192)))
-    return "AES-192 test encryption failed.";
-  rijndael_decrypt (&ctx, scratch, scratch);
+    {
+      xfree (ctxmem);
+      return "AES-192 test encryption failed.";
+    }
+  rijndael_decrypt (ctx, scratch, scratch);
+  xfree (ctxmem);
   if (memcmp (scratch, plaintext_192, sizeof (plaintext_192)))
     return "AES-192 test decryption failed.";
 
@@ -1721,8 +1649,10 @@ selftest_basic_192 (void)
 static const char*
 selftest_basic_256 (void)
 {
-  RIJNDAEL_context ctx;
+  RIJNDAEL_context *ctx;
+  unsigned char *ctxmem;
   unsigned char scratch[16];
+  cipher_bulk_ops_t bulk_ops;
 
   static unsigned char plaintext_256[16] =
     {
@@ -1742,17 +1672,79 @@ selftest_basic_256 (void)
       0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3
     };
 
-  rijndael_setkey (&ctx, key_256, sizeof(key_256));
-  rijndael_encrypt (&ctx, scratch, plaintext_256);
+  ctx = _gcry_cipher_selftest_alloc_ctx (sizeof *ctx, &ctxmem);
+  if (!ctx)
+    return "failed to allocate memory";
+  rijndael_setkey (ctx, key_256, sizeof(key_256), &bulk_ops);
+  rijndael_encrypt (ctx, scratch, plaintext_256);
   if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
-    return "AES-256 test encryption failed.";
-  rijndael_decrypt (&ctx, scratch, scratch);
+    {
+      xfree (ctxmem);
+      return "AES-256 test encryption failed.";
+    }
+  rijndael_decrypt (ctx, scratch, scratch);
+  xfree (ctxmem);
   if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
     return "AES-256 test decryption failed.";
 
   return NULL;
 }
 
+
+/* Run the self-tests for AES-CTR-128, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+#ifdef USE_VAES
+  const int nblocks = 16+1;
+#else
+  const int nblocks = 8+1;
+#endif
+  const int blocksize = BLOCKSIZE;
+  const int context_size = sizeof(RIJNDAEL_context);
+
+  return _gcry_selftest_helper_ctr("AES", &rijndael_setkey,
+           &rijndael_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for AES-CBC-128, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+#ifdef USE_VAES
+  const int nblocks = 16+2;
+#else
+  const int nblocks = 8+2;
+#endif
+  const int blocksize = BLOCKSIZE;
+  const int context_size = sizeof(RIJNDAEL_context);
+
+  return _gcry_selftest_helper_cbc("AES", &rijndael_setkey,
+           &rijndael_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for AES-CFB-128, tests bulk CFB decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+#ifdef USE_VAES
+  const int nblocks = 16+2;
+#else
+  const int nblocks = 8+2;
+#endif
+  const int blocksize = BLOCKSIZE;
+  const int context_size = sizeof(RIJNDAEL_context);
+
+  return _gcry_selftest_helper_cfb("AES", &rijndael_setkey,
+           &rijndael_encrypt, nblocks, blocksize, context_size);
+}
+
+
 /* Run all the self-tests and return NULL on success.  This function
    is used for the on-the-fly self-tests. */
 static const char *
@@ -1765,6 +1757,15 @@ selftest (void)
        || (r = selftest_basic_256 ()) )
     return r;
 
+  if ( (r = selftest_ctr_128 ()) )
+    return r;
+
+  if ( (r = selftest_cbc_128 ()) )
+    return r;
+
+  if ( (r = selftest_cfb_128 ()) )
+    return r;
+
   return r;
 }
 
@@ -1773,7 +1774,7 @@ selftest (void)
 static const char *
 selftest_fips_128_38a (int requested_mode)
 {
-  struct tv
+  static const struct tv
   {
     int mode;
     const unsigned char key[16];
@@ -2021,25 +2022,28 @@ static const char *rijndael_names[] =
     NULL
   };
 
-static gcry_cipher_oid_spec_t rijndael_oids[] =
+static const gcry_cipher_oid_spec_t rijndael_oids[] =
   {
     { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB },
     { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC },
     { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB },
     { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB },
+    { "2.16.840.1.101.3.4.1.6", GCRY_CIPHER_MODE_GCM },
+    { "2.16.840.1.101.3.4.1.7", GCRY_CIPHER_MODE_CCM },
     { NULL }
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_aes =
   {
-    "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context),
-    rijndael_setkey, rijndael_encrypt, rijndael_decrypt
-  };
-cipher_extra_spec_t _gcry_cipher_extraspec_aes =
-  {
+    GCRY_CIPHER_AES, {0, 1},
+    "AES", rijndael_names, rijndael_oids, 16, 128,
+    sizeof (RIJNDAEL_context),
+    rijndael_setkey, rijndael_encrypt, rijndael_decrypt,
+    NULL, NULL,
     run_selftests
   };
 
+
 static const char *rijndael192_names[] =
   {
     "RIJNDAEL192",
@@ -2047,25 +2051,28 @@ static const char *rijndael192_names[] =
     NULL
   };
 
-static gcry_cipher_oid_spec_t rijndael192_oids[] =
+static const gcry_cipher_oid_spec_t rijndael192_oids[] =
   {
     { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB },
     { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC },
     { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB },
     { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB },
+    { "2.16.840.1.101.3.4.1.26", GCRY_CIPHER_MODE_GCM },
+    { "2.16.840.1.101.3.4.1.27", GCRY_CIPHER_MODE_CCM },
     { NULL }
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_aes192 =
   {
-    "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof 
(RIJNDAEL_context),
-    rijndael_setkey, rijndael_encrypt, rijndael_decrypt
-  };
-cipher_extra_spec_t _gcry_cipher_extraspec_aes192 =
-  {
+    GCRY_CIPHER_AES192, {0, 1},
+    "AES192", rijndael192_names, rijndael192_oids, 16, 192,
+    sizeof (RIJNDAEL_context),
+    rijndael_setkey, rijndael_encrypt, rijndael_decrypt,
+    NULL, NULL,
     run_selftests
   };
 
+
 static const char *rijndael256_names[] =
   {
     "RIJNDAEL256",
@@ -2073,23 +2080,23 @@ static const char *rijndael256_names[] =
     NULL
   };
 
-static gcry_cipher_oid_spec_t rijndael256_oids[] =
+static const gcry_cipher_oid_spec_t rijndael256_oids[] =
   {
     { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB },
     { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC },
     { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB },
     { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB },
+    { "2.16.840.1.101.3.4.1.46", GCRY_CIPHER_MODE_GCM },
+    { "2.16.840.1.101.3.4.1.47", GCRY_CIPHER_MODE_CCM },
     { NULL }
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_aes256 =
   {
+    GCRY_CIPHER_AES256, {0, 1},
     "AES256", rijndael256_names, rijndael256_oids, 16, 256,
     sizeof (RIJNDAEL_context),
-    rijndael_setkey, rijndael_encrypt, rijndael_decrypt
-  };
-
-cipher_extra_spec_t _gcry_cipher_extraspec_aes256 =
-  {
+    rijndael_setkey, rijndael_encrypt, rijndael_decrypt,
+    NULL, NULL,
     run_selftests
   };
diff --git a/grub-core/lib/libgcrypt/cipher/rmd160.c 
b/grub-core/lib/libgcrypt/cipher/rmd160.c
index 179a4d965..5c54fdffd 100644
--- a/grub-core/lib/libgcrypt/cipher/rmd160.c
+++ b/grub-core/lib/libgcrypt/cipher/rmd160.c
@@ -24,10 +24,11 @@
 #include <string.h>
 
 #include "g10lib.h"
-#include "rmd.h"
+#include "hash-common.h"
 #include "cipher.h" /* Only used for the rmd160_hash_buffer() prototype. */
 
 #include "bithelp.h"
+#include "bufhelp.h"
 
 /*********************************
  * RIPEMD-160 is not patented, see (as of 25.10.97)
@@ -139,57 +140,50 @@
  * 1 million times "a"   52783243c1697bdbe16d37f97f68f08325dc1528
  */
 
+typedef struct
+{
+  gcry_md_block_ctx_t bctx;
+  u32  h0,h1,h2,h3,h4;
+} RMD160_CONTEXT;
+
+
+static unsigned int
+transform ( void *ctx, const unsigned char *data, size_t nblks );
 
-void
-_gcry_rmd160_init (void *context)
+static void
+rmd160_init (void *context, unsigned int flags)
 {
   RMD160_CONTEXT *hd = context;
 
+  (void)flags;
+
   hd->h0 = 0x67452301;
   hd->h1 = 0xEFCDAB89;
   hd->h2 = 0x98BADCFE;
   hd->h3 = 0x10325476;
   hd->h4 = 0xC3D2E1F0;
-  hd->nblocks = 0;
-  hd->count = 0;
-}
 
+  hd->bctx.nblocks = 0;
+  hd->bctx.nblocks_high = 0;
+  hd->bctx.count = 0;
+  hd->bctx.blocksize_shift = _gcry_ctz(64);
+  hd->bctx.bwrite = transform;
+}
 
 
 /****************
  * Transform the message X which consists of 16 32-bit-words
  */
-static void
-transform ( RMD160_CONTEXT *hd, const unsigned char *data )
+static unsigned int
+transform_blk ( void *ctx, const unsigned char *data )
 {
-  register u32 a,b,c,d,e;
-  u32 aa,bb,cc,dd,ee,t;
-#ifdef WORDS_BIGENDIAN
-  u32 x[16];
-  {
-    int i;
-    byte *p2;
-    const byte *p1;
-    for (i=0, p1=data, p2=(byte*)x; i < 16; i++, p2 += 4 )
-      {
-        p2[3] = *p1++;
-        p2[2] = *p1++;
-        p2[1] = *p1++;
-        p2[0] = *p1++;
-      }
-  }
-#else
-  /* This version is better because it is always aligned;
-   * The performance penalty on a 586-100 is about 6% which
-   * is acceptable - because the data is more local it might
-   * also be possible that this is faster on some machines.
-   * This function (when compiled with -02 on gcc 2.7.2)
-   * executes on a 586-100 (39.73 bogomips) at about 1900kb/sec;
-   * [measured with a 4MB data and "gpgm --print-md rmd160"] */
+  RMD160_CONTEXT *hd = ctx;
+  register u32 al, ar, bl, br, cl, cr, dl, dr, el, er;
   u32 x[16];
-  memcpy( x, data, 64 );
-#endif
+  int i;
 
+  for ( i = 0; i < 16; i++ )
+    x[i] = buf_get_le32(data + i * 4);
 
 #define K0  0x00000000
 #define K1  0x5A827999
@@ -206,278 +200,228 @@ transform ( RMD160_CONTEXT *hd, const unsigned char 
*data )
 #define F2(x,y,z)   ( ((x) | ~(y)) ^ (z) )
 #define F3(x,y,z)   ( ((x) & (z)) | ((y) & ~(z)) )
 #define F4(x,y,z)   ( (x) ^ ((y) | ~(z)) )
-#define R(a,b,c,d,e,f,k,r,s) do { t = a + f(b,c,d) + k + x[r]; \
-                                 a = rol(t,s) + e;            \
+#define R(a,b,c,d,e,f,k,r,s) do { a += f(b,c,d) + k + x[r]; \
+                                 a = rol(a,s) + e;            \
                                  c = rol(c,10);               \
                                } while(0)
 
-  /* left lane */
-  a = hd->h0;
-  b = hd->h1;
-  c = hd->h2;
-  d = hd->h3;
-  e = hd->h4;
-  R( a, b, c, d, e, F0, K0,  0, 11 );
-  R( e, a, b, c, d, F0, K0,  1, 14 );
-  R( d, e, a, b, c, F0, K0,  2, 15 );
-  R( c, d, e, a, b, F0, K0,  3, 12 );
-  R( b, c, d, e, a, F0, K0,  4,  5 );
-  R( a, b, c, d, e, F0, K0,  5,  8 );
-  R( e, a, b, c, d, F0, K0,  6,  7 );
-  R( d, e, a, b, c, F0, K0,  7,  9 );
-  R( c, d, e, a, b, F0, K0,  8, 11 );
-  R( b, c, d, e, a, F0, K0,  9, 13 );
-  R( a, b, c, d, e, F0, K0, 10, 14 );
-  R( e, a, b, c, d, F0, K0, 11, 15 );
-  R( d, e, a, b, c, F0, K0, 12,  6 );
-  R( c, d, e, a, b, F0, K0, 13,  7 );
-  R( b, c, d, e, a, F0, K0, 14,  9 );
-  R( a, b, c, d, e, F0, K0, 15,  8 );
-  R( e, a, b, c, d, F1, K1,  7,  7 );
-  R( d, e, a, b, c, F1, K1,  4,  6 );
-  R( c, d, e, a, b, F1, K1, 13,  8 );
-  R( b, c, d, e, a, F1, K1,  1, 13 );
-  R( a, b, c, d, e, F1, K1, 10, 11 );
-  R( e, a, b, c, d, F1, K1,  6,  9 );
-  R( d, e, a, b, c, F1, K1, 15,  7 );
-  R( c, d, e, a, b, F1, K1,  3, 15 );
-  R( b, c, d, e, a, F1, K1, 12,  7 );
-  R( a, b, c, d, e, F1, K1,  0, 12 );
-  R( e, a, b, c, d, F1, K1,  9, 15 );
-  R( d, e, a, b, c, F1, K1,  5,  9 );
-  R( c, d, e, a, b, F1, K1,  2, 11 );
-  R( b, c, d, e, a, F1, K1, 14,  7 );
-  R( a, b, c, d, e, F1, K1, 11, 13 );
-  R( e, a, b, c, d, F1, K1,  8, 12 );
-  R( d, e, a, b, c, F2, K2,  3, 11 );
-  R( c, d, e, a, b, F2, K2, 10, 13 );
-  R( b, c, d, e, a, F2, K2, 14,  6 );
-  R( a, b, c, d, e, F2, K2,  4,  7 );
-  R( e, a, b, c, d, F2, K2,  9, 14 );
-  R( d, e, a, b, c, F2, K2, 15,  9 );
-  R( c, d, e, a, b, F2, K2,  8, 13 );
-  R( b, c, d, e, a, F2, K2,  1, 15 );
-  R( a, b, c, d, e, F2, K2,  2, 14 );
-  R( e, a, b, c, d, F2, K2,  7,  8 );
-  R( d, e, a, b, c, F2, K2,  0, 13 );
-  R( c, d, e, a, b, F2, K2,  6,  6 );
-  R( b, c, d, e, a, F2, K2, 13,  5 );
-  R( a, b, c, d, e, F2, K2, 11, 12 );
-  R( e, a, b, c, d, F2, K2,  5,  7 );
-  R( d, e, a, b, c, F2, K2, 12,  5 );
-  R( c, d, e, a, b, F3, K3,  1, 11 );
-  R( b, c, d, e, a, F3, K3,  9, 12 );
-  R( a, b, c, d, e, F3, K3, 11, 14 );
-  R( e, a, b, c, d, F3, K3, 10, 15 );
-  R( d, e, a, b, c, F3, K3,  0, 14 );
-  R( c, d, e, a, b, F3, K3,  8, 15 );
-  R( b, c, d, e, a, F3, K3, 12,  9 );
-  R( a, b, c, d, e, F3, K3,  4,  8 );
-  R( e, a, b, c, d, F3, K3, 13,  9 );
-  R( d, e, a, b, c, F3, K3,  3, 14 );
-  R( c, d, e, a, b, F3, K3,  7,  5 );
-  R( b, c, d, e, a, F3, K3, 15,  6 );
-  R( a, b, c, d, e, F3, K3, 14,  8 );
-  R( e, a, b, c, d, F3, K3,  5,  6 );
-  R( d, e, a, b, c, F3, K3,  6,  5 );
-  R( c, d, e, a, b, F3, K3,  2, 12 );
-  R( b, c, d, e, a, F4, K4,  4,  9 );
-  R( a, b, c, d, e, F4, K4,  0, 15 );
-  R( e, a, b, c, d, F4, K4,  5,  5 );
-  R( d, e, a, b, c, F4, K4,  9, 11 );
-  R( c, d, e, a, b, F4, K4,  7,  6 );
-  R( b, c, d, e, a, F4, K4, 12,  8 );
-  R( a, b, c, d, e, F4, K4,  2, 13 );
-  R( e, a, b, c, d, F4, K4, 10, 12 );
-  R( d, e, a, b, c, F4, K4, 14,  5 );
-  R( c, d, e, a, b, F4, K4,  1, 12 );
-  R( b, c, d, e, a, F4, K4,  3, 13 );
-  R( a, b, c, d, e, F4, K4,  8, 14 );
-  R( e, a, b, c, d, F4, K4, 11, 11 );
-  R( d, e, a, b, c, F4, K4,  6,  8 );
-  R( c, d, e, a, b, F4, K4, 15,  5 );
-  R( b, c, d, e, a, F4, K4, 13,  6 );
-
-  aa = a; bb = b; cc = c; dd = d; ee = e;
-
-  /* right lane */
-  a = hd->h0;
-  b = hd->h1;
-  c = hd->h2;
-  d = hd->h3;
-  e = hd->h4;
-  R( a, b, c, d, e, F4, KK0,   5,  8);
-  R( e, a, b, c, d, F4, KK0, 14,  9);
-  R( d, e, a, b, c, F4, KK0,   7,  9);
-  R( c, d, e, a, b, F4, KK0,   0, 11);
-  R( b, c, d, e, a, F4, KK0,   9, 13);
-  R( a, b, c, d, e, F4, KK0,   2, 15);
-  R( e, a, b, c, d, F4, KK0, 11, 15);
-  R( d, e, a, b, c, F4, KK0,   4,  5);
-  R( c, d, e, a, b, F4, KK0, 13,  7);
-  R( b, c, d, e, a, F4, KK0,   6,  7);
-  R( a, b, c, d, e, F4, KK0, 15,  8);
-  R( e, a, b, c, d, F4, KK0,   8, 11);
-  R( d, e, a, b, c, F4, KK0,   1, 14);
-  R( c, d, e, a, b, F4, KK0, 10, 14);
-  R( b, c, d, e, a, F4, KK0,   3, 12);
-  R( a, b, c, d, e, F4, KK0, 12,  6);
-  R( e, a, b, c, d, F3, KK1,   6,  9);
-  R( d, e, a, b, c, F3, KK1, 11, 13);
-  R( c, d, e, a, b, F3, KK1,   3, 15);
-  R( b, c, d, e, a, F3, KK1,   7,  7);
-  R( a, b, c, d, e, F3, KK1,   0, 12);
-  R( e, a, b, c, d, F3, KK1, 13,  8);
-  R( d, e, a, b, c, F3, KK1,   5,  9);
-  R( c, d, e, a, b, F3, KK1, 10, 11);
-  R( b, c, d, e, a, F3, KK1, 14,  7);
-  R( a, b, c, d, e, F3, KK1, 15,  7);
-  R( e, a, b, c, d, F3, KK1,   8, 12);
-  R( d, e, a, b, c, F3, KK1, 12,  7);
-  R( c, d, e, a, b, F3, KK1,   4,  6);
-  R( b, c, d, e, a, F3, KK1,   9, 15);
-  R( a, b, c, d, e, F3, KK1,   1, 13);
-  R( e, a, b, c, d, F3, KK1,   2, 11);
-  R( d, e, a, b, c, F2, KK2, 15,  9);
-  R( c, d, e, a, b, F2, KK2,   5,  7);
-  R( b, c, d, e, a, F2, KK2,   1, 15);
-  R( a, b, c, d, e, F2, KK2,   3, 11);
-  R( e, a, b, c, d, F2, KK2,   7,  8);
-  R( d, e, a, b, c, F2, KK2, 14,  6);
-  R( c, d, e, a, b, F2, KK2,   6,  6);
-  R( b, c, d, e, a, F2, KK2,   9, 14);
-  R( a, b, c, d, e, F2, KK2, 11, 12);
-  R( e, a, b, c, d, F2, KK2,   8, 13);
-  R( d, e, a, b, c, F2, KK2, 12,  5);
-  R( c, d, e, a, b, F2, KK2,   2, 14);
-  R( b, c, d, e, a, F2, KK2, 10, 13);
-  R( a, b, c, d, e, F2, KK2,   0, 13);
-  R( e, a, b, c, d, F2, KK2,   4,  7);
-  R( d, e, a, b, c, F2, KK2, 13,  5);
-  R( c, d, e, a, b, F1, KK3,   8, 15);
-  R( b, c, d, e, a, F1, KK3,   6,  5);
-  R( a, b, c, d, e, F1, KK3,   4,  8);
-  R( e, a, b, c, d, F1, KK3,   1, 11);
-  R( d, e, a, b, c, F1, KK3,   3, 14);
-  R( c, d, e, a, b, F1, KK3, 11, 14);
-  R( b, c, d, e, a, F1, KK3, 15,  6);
-  R( a, b, c, d, e, F1, KK3,   0, 14);
-  R( e, a, b, c, d, F1, KK3,   5,  6);
-  R( d, e, a, b, c, F1, KK3, 12,  9);
-  R( c, d, e, a, b, F1, KK3,   2, 12);
-  R( b, c, d, e, a, F1, KK3, 13,  9);
-  R( a, b, c, d, e, F1, KK3,   9, 12);
-  R( e, a, b, c, d, F1, KK3,   7,  5);
-  R( d, e, a, b, c, F1, KK3, 10, 15);
-  R( c, d, e, a, b, F1, KK3, 14,  8);
-  R( b, c, d, e, a, F0, KK4, 12,  8);
-  R( a, b, c, d, e, F0, KK4, 15,  5);
-  R( e, a, b, c, d, F0, KK4, 10, 12);
-  R( d, e, a, b, c, F0, KK4,   4,  9);
-  R( c, d, e, a, b, F0, KK4,   1, 12);
-  R( b, c, d, e, a, F0, KK4,   5,  5);
-  R( a, b, c, d, e, F0, KK4,   8, 14);
-  R( e, a, b, c, d, F0, KK4,   7,  6);
-  R( d, e, a, b, c, F0, KK4,   6,  8);
-  R( c, d, e, a, b, F0, KK4,   2, 13);
-  R( b, c, d, e, a, F0, KK4, 13,  6);
-  R( a, b, c, d, e, F0, KK4, 14,  5);
-  R( e, a, b, c, d, F0, KK4,   0, 15);
-  R( d, e, a, b, c, F0, KK4,   3, 13);
-  R( c, d, e, a, b, F0, KK4,   9, 11);
-  R( b, c, d, e, a, F0, KK4, 11, 11);
-
-
-  t       = hd->h1 + d + cc;
-  hd->h1 = hd->h2 + e + dd;
-  hd->h2 = hd->h3 + a + ee;
-  hd->h3 = hd->h4 + b + aa;
-  hd->h4 = hd->h0 + c + bb;
-  hd->h0 = t;
+  /* left lane and right lanes interleaved */
+  al = ar = hd->h0;
+  bl = br = hd->h1;
+  cl = cr = hd->h2;
+  dl = dr = hd->h3;
+  el = er = hd->h4;
+  R( al, bl, cl, dl, el, F0, K0,  0, 11 );
+  R( ar, br, cr, dr, er, F4, KK0,      5,  8);
+  R( el, al, bl, cl, dl, F0, K0,  1, 14 );
+  R( er, ar, br, cr, dr, F4, KK0, 14,  9);
+  R( dl, el, al, bl, cl, F0, K0,  2, 15 );
+  R( dr, er, ar, br, cr, F4, KK0,      7,  9);
+  R( cl, dl, el, al, bl, F0, K0,  3, 12 );
+  R( cr, dr, er, ar, br, F4, KK0,      0, 11);
+  R( bl, cl, dl, el, al, F0, K0,  4,  5 );
+  R( br, cr, dr, er, ar, F4, KK0,      9, 13);
+  R( al, bl, cl, dl, el, F0, K0,  5,  8 );
+  R( ar, br, cr, dr, er, F4, KK0,      2, 15);
+  R( el, al, bl, cl, dl, F0, K0,  6,  7 );
+  R( er, ar, br, cr, dr, F4, KK0, 11, 15);
+  R( dl, el, al, bl, cl, F0, K0,  7,  9 );
+  R( dr, er, ar, br, cr, F4, KK0,      4,  5);
+  R( cl, dl, el, al, bl, F0, K0,  8, 11 );
+  R( cr, dr, er, ar, br, F4, KK0, 13,  7);
+  R( bl, cl, dl, el, al, F0, K0,  9, 13 );
+  R( br, cr, dr, er, ar, F4, KK0,      6,  7);
+  R( al, bl, cl, dl, el, F0, K0, 10, 14 );
+  R( ar, br, cr, dr, er, F4, KK0, 15,  8);
+  R( el, al, bl, cl, dl, F0, K0, 11, 15 );
+  R( er, ar, br, cr, dr, F4, KK0,      8, 11);
+  R( dl, el, al, bl, cl, F0, K0, 12,  6 );
+  R( dr, er, ar, br, cr, F4, KK0,      1, 14);
+  R( cl, dl, el, al, bl, F0, K0, 13,  7 );
+  R( cr, dr, er, ar, br, F4, KK0, 10, 14);
+  R( bl, cl, dl, el, al, F0, K0, 14,  9 );
+  R( br, cr, dr, er, ar, F4, KK0,      3, 12);
+  R( al, bl, cl, dl, el, F0, K0, 15,  8 );
+  R( ar, br, cr, dr, er, F4, KK0, 12,  6);
+  R( el, al, bl, cl, dl, F1, K1,  7,  7 );
+  R( er, ar, br, cr, dr, F3, KK1,      6,  9);
+  R( dl, el, al, bl, cl, F1, K1,  4,  6 );
+  R( dr, er, ar, br, cr, F3, KK1, 11, 13);
+  R( cl, dl, el, al, bl, F1, K1, 13,  8 );
+  R( cr, dr, er, ar, br, F3, KK1,      3, 15);
+  R( bl, cl, dl, el, al, F1, K1,  1, 13 );
+  R( br, cr, dr, er, ar, F3, KK1,      7,  7);
+  R( al, bl, cl, dl, el, F1, K1, 10, 11 );
+  R( ar, br, cr, dr, er, F3, KK1,      0, 12);
+  R( el, al, bl, cl, dl, F1, K1,  6,  9 );
+  R( er, ar, br, cr, dr, F3, KK1, 13,  8);
+  R( dl, el, al, bl, cl, F1, K1, 15,  7 );
+  R( dr, er, ar, br, cr, F3, KK1,      5,  9);
+  R( cl, dl, el, al, bl, F1, K1,  3, 15 );
+  R( cr, dr, er, ar, br, F3, KK1, 10, 11);
+  R( bl, cl, dl, el, al, F1, K1, 12,  7 );
+  R( br, cr, dr, er, ar, F3, KK1, 14,  7);
+  R( al, bl, cl, dl, el, F1, K1,  0, 12 );
+  R( ar, br, cr, dr, er, F3, KK1, 15,  7);
+  R( el, al, bl, cl, dl, F1, K1,  9, 15 );
+  R( er, ar, br, cr, dr, F3, KK1,      8, 12);
+  R( dl, el, al, bl, cl, F1, K1,  5,  9 );
+  R( dr, er, ar, br, cr, F3, KK1, 12,  7);
+  R( cl, dl, el, al, bl, F1, K1,  2, 11 );
+  R( cr, dr, er, ar, br, F3, KK1,      4,  6);
+  R( bl, cl, dl, el, al, F1, K1, 14,  7 );
+  R( br, cr, dr, er, ar, F3, KK1,      9, 15);
+  R( al, bl, cl, dl, el, F1, K1, 11, 13 );
+  R( ar, br, cr, dr, er, F3, KK1,      1, 13);
+  R( el, al, bl, cl, dl, F1, K1,  8, 12 );
+  R( er, ar, br, cr, dr, F3, KK1,      2, 11);
+  R( dl, el, al, bl, cl, F2, K2,  3, 11 );
+  R( dr, er, ar, br, cr, F2, KK2, 15,  9);
+  R( cl, dl, el, al, bl, F2, K2, 10, 13 );
+  R( cr, dr, er, ar, br, F2, KK2,      5,  7);
+  R( bl, cl, dl, el, al, F2, K2, 14,  6 );
+  R( br, cr, dr, er, ar, F2, KK2,      1, 15);
+  R( al, bl, cl, dl, el, F2, K2,  4,  7 );
+  R( ar, br, cr, dr, er, F2, KK2,      3, 11);
+  R( el, al, bl, cl, dl, F2, K2,  9, 14 );
+  R( er, ar, br, cr, dr, F2, KK2,      7,  8);
+  R( dl, el, al, bl, cl, F2, K2, 15,  9 );
+  R( dr, er, ar, br, cr, F2, KK2, 14,  6);
+  R( cl, dl, el, al, bl, F2, K2,  8, 13 );
+  R( cr, dr, er, ar, br, F2, KK2,      6,  6);
+  R( bl, cl, dl, el, al, F2, K2,  1, 15 );
+  R( br, cr, dr, er, ar, F2, KK2,      9, 14);
+  R( al, bl, cl, dl, el, F2, K2,  2, 14 );
+  R( ar, br, cr, dr, er, F2, KK2, 11, 12);
+  R( el, al, bl, cl, dl, F2, K2,  7,  8 );
+  R( er, ar, br, cr, dr, F2, KK2,      8, 13);
+  R( dl, el, al, bl, cl, F2, K2,  0, 13 );
+  R( dr, er, ar, br, cr, F2, KK2, 12,  5);
+  R( cl, dl, el, al, bl, F2, K2,  6,  6 );
+  R( cr, dr, er, ar, br, F2, KK2,      2, 14);
+  R( bl, cl, dl, el, al, F2, K2, 13,  5 );
+  R( br, cr, dr, er, ar, F2, KK2, 10, 13);
+  R( al, bl, cl, dl, el, F2, K2, 11, 12 );
+  R( ar, br, cr, dr, er, F2, KK2,      0, 13);
+  R( el, al, bl, cl, dl, F2, K2,  5,  7 );
+  R( er, ar, br, cr, dr, F2, KK2,      4,  7);
+  R( dl, el, al, bl, cl, F2, K2, 12,  5 );
+  R( dr, er, ar, br, cr, F2, KK2, 13,  5);
+  R( cl, dl, el, al, bl, F3, K3,  1, 11 );
+  R( cr, dr, er, ar, br, F1, KK3,      8, 15);
+  R( bl, cl, dl, el, al, F3, K3,  9, 12 );
+  R( br, cr, dr, er, ar, F1, KK3,      6,  5);
+  R( al, bl, cl, dl, el, F3, K3, 11, 14 );
+  R( ar, br, cr, dr, er, F1, KK3,      4,  8);
+  R( el, al, bl, cl, dl, F3, K3, 10, 15 );
+  R( er, ar, br, cr, dr, F1, KK3,      1, 11);
+  R( dl, el, al, bl, cl, F3, K3,  0, 14 );
+  R( dr, er, ar, br, cr, F1, KK3,      3, 14);
+  R( cl, dl, el, al, bl, F3, K3,  8, 15 );
+  R( cr, dr, er, ar, br, F1, KK3, 11, 14);
+  R( bl, cl, dl, el, al, F3, K3, 12,  9 );
+  R( br, cr, dr, er, ar, F1, KK3, 15,  6);
+  R( al, bl, cl, dl, el, F3, K3,  4,  8 );
+  R( ar, br, cr, dr, er, F1, KK3,      0, 14);
+  R( el, al, bl, cl, dl, F3, K3, 13,  9 );
+  R( er, ar, br, cr, dr, F1, KK3,      5,  6);
+  R( dl, el, al, bl, cl, F3, K3,  3, 14 );
+  R( dr, er, ar, br, cr, F1, KK3, 12,  9);
+  R( cl, dl, el, al, bl, F3, K3,  7,  5 );
+  R( cr, dr, er, ar, br, F1, KK3,      2, 12);
+  R( bl, cl, dl, el, al, F3, K3, 15,  6 );
+  R( br, cr, dr, er, ar, F1, KK3, 13,  9);
+  R( al, bl, cl, dl, el, F3, K3, 14,  8 );
+  R( ar, br, cr, dr, er, F1, KK3,      9, 12);
+  R( el, al, bl, cl, dl, F3, K3,  5,  6 );
+  R( er, ar, br, cr, dr, F1, KK3,      7,  5);
+  R( dl, el, al, bl, cl, F3, K3,  6,  5 );
+  R( dr, er, ar, br, cr, F1, KK3, 10, 15);
+  R( cl, dl, el, al, bl, F3, K3,  2, 12 );
+  R( cr, dr, er, ar, br, F1, KK3, 14,  8);
+  R( bl, cl, dl, el, al, F4, K4,  4,  9 );
+  R( br, cr, dr, er, ar, F0, KK4, 12,  8);
+  R( al, bl, cl, dl, el, F4, K4,  0, 15 );
+  R( ar, br, cr, dr, er, F0, KK4, 15,  5);
+  R( el, al, bl, cl, dl, F4, K4,  5,  5 );
+  R( er, ar, br, cr, dr, F0, KK4, 10, 12);
+  R( dl, el, al, bl, cl, F4, K4,  9, 11 );
+  R( dr, er, ar, br, cr, F0, KK4,      4,  9);
+  R( cl, dl, el, al, bl, F4, K4,  7,  6 );
+  R( cr, dr, er, ar, br, F0, KK4,      1, 12);
+  R( bl, cl, dl, el, al, F4, K4, 12,  8 );
+  R( br, cr, dr, er, ar, F0, KK4,      5,  5);
+  R( al, bl, cl, dl, el, F4, K4,  2, 13 );
+  R( ar, br, cr, dr, er, F0, KK4,      8, 14);
+  R( el, al, bl, cl, dl, F4, K4, 10, 12 );
+  R( er, ar, br, cr, dr, F0, KK4,      7,  6);
+  R( dl, el, al, bl, cl, F4, K4, 14,  5 );
+  R( dr, er, ar, br, cr, F0, KK4,      6,  8);
+  R( cl, dl, el, al, bl, F4, K4,  1, 12 );
+  R( cr, dr, er, ar, br, F0, KK4,      2, 13);
+  R( bl, cl, dl, el, al, F4, K4,  3, 13 );
+  R( br, cr, dr, er, ar, F0, KK4, 13,  6);
+  R( al, bl, cl, dl, el, F4, K4,  8, 14 );
+  R( ar, br, cr, dr, er, F0, KK4, 14,  5);
+  R( el, al, bl, cl, dl, F4, K4, 11, 11 );
+  R( er, ar, br, cr, dr, F0, KK4,      0, 15);
+  R( dl, el, al, bl, cl, F4, K4,  6,  8 );
+  R( dr, er, ar, br, cr, F0, KK4,      3, 13);
+  R( cl, dl, el, al, bl, F4, K4, 15,  5 );
+  R( cr, dr, er, ar, br, F0, KK4,      9, 11);
+  R( bl, cl, dl, el, al, F4, K4, 13,  6 );
+  R( br, cr, dr, er, ar, F0, KK4, 11, 11);
+
+  dr += cl + hd->h1;
+  hd->h1 = hd->h2 + dl + er;
+  hd->h2 = hd->h3 + el + ar;
+  hd->h3 = hd->h4 + al + br;
+  hd->h4 = hd->h0 + bl + cr;
+  hd->h0 = dr;
+
+  return /*burn_stack*/ 104+5*sizeof(void*);
 }
 
 
-/* Update the message digest with the contents
- * of INBUF with length INLEN.
- */
-static void
-rmd160_write ( void *context, const void *inbuf_arg, size_t inlen)
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
 {
-  const unsigned char *inbuf = inbuf_arg;
-  RMD160_CONTEXT *hd = context;
-
-  if( hd->count == 64 )  /* flush the buffer */
-    {
-      transform( hd, hd->buf );
-      _gcry_burn_stack (108+5*sizeof(void*));
-      hd->count = 0;
-      hd->nblocks++;
-    }
-  if( !inbuf )
-    return;
-  if( hd->count )
-    {
-      for( ; inlen && hd->count < 64; inlen-- )
-        hd->buf[hd->count++] = *inbuf++;
-      rmd160_write( hd, NULL, 0 );
-      if( !inlen )
-        return;
-    }
+  unsigned int burn;
 
-  while( inlen >= 64 )
+  do
     {
-      transform( hd, inbuf );
-      hd->count = 0;
-      hd->nblocks++;
-      inlen -= 64;
-      inbuf += 64;
+      burn = transform_blk (c, data);
+      data += 64;
     }
-  _gcry_burn_stack (108+5*sizeof(void*));
-  for( ; inlen && hd->count < 64; inlen-- )
-    hd->buf[hd->count++] = *inbuf++;
-}
-
-/****************
- * Apply the rmd160 transform function on the buffer which must have
- * a length 64 bytes. Do not use this function together with the
- * other functions, use rmd160_init to initialize internal variables.
- * Returns: 16 bytes in buffer with the mixed contentes of buffer.
- */
-void
-_gcry_rmd160_mixblock ( RMD160_CONTEXT *hd, void *blockof64byte )
-{
-  char *p = blockof64byte;
+  while (--nblks);
 
-  transform ( hd, blockof64byte );
-#define X(a) do { *(u32*)p = hd->h##a ; p += 4; } while(0)
-  X(0);
-  X(1);
-  X(2);
-  X(3);
-  X(4);
-#undef X
+  return burn;
 }
 
 
-/* The routine terminates the computation
+/*
+ * The routine terminates the computation
  */
-
 static void
 rmd160_final( void *context )
 {
   RMD160_CONTEXT *hd = context;
-  u32 t, msb, lsb;
+  u32 t, th, msb, lsb;
   byte *p;
+  unsigned int burn;
 
-  rmd160_write(hd, NULL, 0); /* flush */;
+  t = hd->bctx.nblocks;
+  if (sizeof t == sizeof hd->bctx.nblocks)
+    th = hd->bctx.nblocks_high;
+  else
+    th = hd->bctx.nblocks >> 32;
 
-  t = hd->nblocks;
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
-  msb = t >> 26;
+  msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
-  if( (lsb += hd->count) < t )
+  if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
@@ -485,45 +429,41 @@ rmd160_final( void *context )
   msb <<= 3;
   msb |= t >> 29;
 
-  if( hd->count < 56 )  /* enough room */
+  if (hd->bctx.count < 56)  /* enough room */
     {
-      hd->buf[hd->count++] = 0x80; /* pad */
-      while( hd->count < 56 )
-        hd->buf[hd->count++] = 0;  /* pad */
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+      if (hd->bctx.count < 56)
+       memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 60, msb);
+      burn = transform (hd, hd->bctx.buf, 1);
     }
-  else  /* need one extra block */
+  else /* need one extra block */
     {
-      hd->buf[hd->count++] = 0x80; /* pad character */
-      while( hd->count < 64 )
-        hd->buf[hd->count++] = 0;
-      rmd160_write(hd, NULL, 0);  /* flush */;
-      memset(hd->buf, 0, 56 ); /* fill next block with zeroes */
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+      /* fill pad and next block with zeroes */
+      memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+      burn = transform (hd, hd->bctx.buf, 2);
     }
-  /* append the 64 bit count */
-  hd->buf[56] = lsb       ;
-  hd->buf[57] = lsb >>  8;
-  hd->buf[58] = lsb >> 16;
-  hd->buf[59] = lsb >> 24;
-  hd->buf[60] = msb       ;
-  hd->buf[61] = msb >>  8;
-  hd->buf[62] = msb >> 16;
-  hd->buf[63] = msb >> 24;
-  transform( hd, hd->buf );
-  _gcry_burn_stack (108+5*sizeof(void*));
-
-  p = hd->buf;
-#ifdef WORDS_BIGENDIAN
-#define X(a) do { *p++ = hd->h##a         ; *p++ = hd->h##a >> 8;      \
-                 *p++ = hd->h##a >> 16; *p++ = hd->h##a >> 24; } while(0)
-#else /* little endian */
-#define X(a) do { *(u32*)p = hd->h##a ; p += 4; } while(0)
-#endif
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_le32(p, hd->h##a); p += 4; } while(0)
   X(0);
   X(1);
   X(2);
   X(3);
   X(4);
 #undef X
+
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
 }
 
 static byte *
@@ -531,31 +471,37 @@ rmd160_read( void *context )
 {
   RMD160_CONTEXT *hd = context;
 
-  return hd->buf;
+  return hd->bctx.buf;
 }
 
 
 
 /****************
- * Shortcut functions which puts the hash value of the supplied buffer
+ * Shortcut functions which puts the hash value of the supplied buffer iov
  * into outbuf which must have a size of 20 bytes.
  */
-void
-_gcry_rmd160_hash_buffer (void *outbuf, const void *buffer, size_t length )
+static void
+_gcry_rmd160_hash_buffers (void *outbuf, size_t nbytes,
+                          const gcry_buffer_t *iov, int iovcnt)
 {
   RMD160_CONTEXT hd;
 
-  _gcry_rmd160_init ( &hd );
-  rmd160_write ( &hd, buffer, length );
+  (void)nbytes;
+
+  rmd160_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
   rmd160_final ( &hd );
-  memcpy ( outbuf, hd.buf, 20 );
+  memcpy ( outbuf, hd.bctx.buf, 20 );
 }
 
-static byte asn[15] = /* Object ID is 1.3.36.3.2.1 */
+
+static const byte asn[15] = /* Object ID is 1.3.36.3.2.1 */
   { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x24, 0x03,
     0x02, 0x01, 0x05, 0x00, 0x04, 0x14 };
 
-static gcry_md_oid_spec_t oid_spec_rmd160[] =
+static const gcry_md_oid_spec_t oid_spec_rmd160[] =
   {
     /* rsaSignatureWithripemd160 */
     { "1.3.36.3.3.1.2" },
@@ -564,9 +510,11 @@ static gcry_md_oid_spec_t oid_spec_rmd160[] =
     { NULL }
   };
 
-gcry_md_spec_t _gcry_digest_spec_rmd160 =
+const gcry_md_spec_t _gcry_digest_spec_rmd160 =
   {
+    GCRY_MD_RMD160, {0, 0},
     "RIPEMD160", asn, DIM (asn), oid_spec_rmd160, 20,
-    _gcry_rmd160_init, rmd160_write, rmd160_final, rmd160_read,
+    rmd160_init, _gcry_md_block_write, rmd160_final, rmd160_read, NULL,
+    _gcry_rmd160_hash_buffers,
     sizeof (RMD160_CONTEXT)
   };
diff --git a/grub-core/lib/libgcrypt/cipher/rsa-common.c 
b/grub-core/lib/libgcrypt/cipher/rsa-common.c
new file mode 100644
index 000000000..9fe7b4065
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/rsa-common.c
@@ -0,0 +1,1115 @@
+/* rsa-common.c - Supporting functions for RSA
+ * Copyright (C) 2011 Free Software Foundation, Inc.
+ * Copyright (C) 2013  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "cipher.h"
+#include "pubkey-internal.h"
+#include "const-time.h"
+
+
+/* Turn VALUE into an octet string and store it in an allocated buffer
+   at R_FRAME or - if R_RAME is NULL - copy it into the caller
+   provided buffer SPACE; either SPACE or R_FRAME may be used.  If
+   SPACE if not NULL, the caller must provide a buffer of at least
+   NBYTES.  If the resulting octet string is shorter than NBYTES pad
+   it to the left with zeroes.  If VALUE does not fit into NBYTES
+   return an error code.  */
+static gpg_err_code_t
+octet_string_from_mpi (unsigned char **r_frame, void *space,
+                       gcry_mpi_t value, size_t nbytes)
+{
+  return _gcry_mpi_to_octet_string (r_frame, space, value, nbytes);
+}
+
+
+
+/* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block
+   type 2 padding.  On success the result is stored as a new MPI at
+   R_RESULT.  On error the value at R_RESULT is undefined.
+
+   If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
+   the seed instead of using a random string for it.  This feature is
+   only useful for regression tests.  Note that this value may not
+   contain zero bytes.
+
+   We encode the value in this way:
+
+     0  2  RND(n bytes)  0  VALUE
+
+   0   is a marker we unfortunately can't encode because we return an
+       MPI which strips all leading zeroes.
+   2   is the block type.
+   RND are non-zero random bytes.
+
+   (Note that OpenPGP includes the cipher algorithm and a checksum in
+   VALUE; the caller needs to prepare the value accordingly.)
+  */
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_enc (gcry_mpi_t *r_result, unsigned int nbits,
+                                const unsigned char *value, size_t valuelen,
+                                const unsigned char *random_override,
+                                size_t random_override_len)
+{
+  gcry_err_code_t rc = 0;
+  unsigned char *frame = NULL;
+  size_t nframe = (nbits+7) / 8;
+  int i;
+  size_t n;
+  unsigned char *p;
+
+  if (valuelen + 7 > nframe || !nframe)
+    {
+      /* Can't encode a VALUELEN value in a NFRAME bytes frame.  */
+      return GPG_ERR_TOO_SHORT; /* The key is too short.  */
+    }
+
+  if ( !(frame = xtrymalloc_secure (nframe)))
+    return gpg_err_code_from_syserror ();
+
+  n = 0;
+  frame[n++] = 0;
+  frame[n++] = 2; /* block type */
+  i = nframe - 3 - valuelen;
+  gcry_assert (i > 0);
+
+  if (random_override)
+    {
+      int j;
+
+      if (random_override_len != i)
+        {
+          xfree (frame);
+          return GPG_ERR_INV_ARG;
+        }
+      /* Check that random does not include a zero byte.  */
+      for (j=0; j < random_override_len; j++)
+        if (!random_override[j])
+          {
+            xfree (frame);
+            return GPG_ERR_INV_ARG;
+          }
+      memcpy (frame + n, random_override, random_override_len);
+      n += random_override_len;
+    }
+  else
+    {
+      p = _gcry_random_bytes_secure (i, GCRY_STRONG_RANDOM);
+      /* Replace zero bytes by new values. */
+      for (;;)
+        {
+          int j, k;
+          unsigned char *pp;
+
+          /* Count the zero bytes. */
+          for (j=k=0; j < i; j++)
+            {
+              if (!p[j])
+                k++;
+            }
+          if (!k)
+            break; /* Okay: no (more) zero bytes. */
+
+          k += k/128 + 3; /* Better get some more. */
+          pp = _gcry_random_bytes_secure (k, GCRY_STRONG_RANDOM);
+          for (j=0; j < i && k; )
+            {
+              if (!p[j])
+                p[j] = pp[--k];
+              if (p[j])
+                j++;
+            }
+          xfree (pp);
+        }
+      memcpy (frame+n, p, i);
+      n += i;
+      xfree (p);
+    }
+
+  frame[n++] = 0;
+  memcpy (frame+n, value, valuelen);
+  n += valuelen;
+  gcry_assert (n == nframe);
+
+  rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
+  if (!rc &&DBG_CIPHER)
+    log_mpidump ("PKCS#1 block type 2 encoded data", *r_result);
+  xfree (frame);
+
+  return rc;
+}
+
+
+/*
+ *                    <--len-->
+ * DST-------v       v-------------SRC
+ *           [.................]
+ *            <---- buflen --->
+ *
+ * Copy the memory area SRC with LEN into another memory area DST.
+ * Conditions met:
+ *         the address SRC > DST
+ *         DST + BUFLEN == SRC + LEN.
+ *
+ * Memory access doesn't depends on LEN, but always done independently,
+ * sliding memory area by 1, 2, 4 ... until done.
+ */
+static void
+memmov_independently (void *dst, const void *src, size_t len, size_t buflen)
+{
+  size_t offset = (size_t)((char *)src - (char *)dst);
+  size_t shift;
+
+  (void)len; /* No dependency.  */
+  for (shift = 1; shift < buflen; shift <<= 1)
+    {
+      ct_memmov_cond (dst, (char *)dst + shift, buflen - shift, (offset&1));
+      offset >>= 1;
+    }
+}
+
+
+/* Decode a plaintext in VALUE assuming pkcs#1 block type 2 padding.
+   NBITS is the size of the secret key.  On success the result is
+   stored as a newly allocated buffer at R_RESULT and its valid length at
+   R_RESULTLEN.  On error NULL is stored at R_RESULT.  */
+gpg_err_code_t
+_gcry_rsa_pkcs1_decode_for_enc (unsigned char **r_result, size_t *r_resultlen,
+                                unsigned int nbits, gcry_mpi_t value)
+{
+  gcry_error_t err;
+  unsigned char *frame = NULL;
+  size_t nframe = (nbits+7) / 8;
+  size_t n, n0;
+  unsigned int failed = 0;
+  unsigned int not_found = 1;
+
+  *r_result = NULL;
+
+  if ( !(frame = xtrymalloc_secure (nframe)))
+    return gpg_err_code_from_syserror ();
+
+  err = _gcry_mpi_print (GCRYMPI_FMT_USG, frame, nframe, &n, value);
+  if (err)
+    {
+      xfree (frame);
+      return gcry_err_code (err);
+    }
+
+  nframe = n; /* Set NFRAME to the actual length.  */
+
+  /* FRAME = 0x00 || 0x02 || PS || 0x00 || M
+
+     pkcs#1 requires that the first byte is zero.  Our MPIs usually
+     strip leading zero bytes; thus we are not able to detect them.
+     However due to the way gcry_mpi_print is implemented we may see
+     leading zero bytes nevertheless.  We handle this by making the
+     first zero byte optional.  */
+  if (nframe < 4)
+    {
+      xfree (frame);
+      return GPG_ERR_ENCODING_PROBLEM;  /* Too short.  */
+    }
+  n = 0;
+  if (!frame[0])
+    n++;
+  failed |= ct_not_equal_byte (frame[n++], 0x02);
+
+  /* Find the terminating zero byte.  */
+  n0 = n;
+  for (; n < nframe; n++)
+    {
+      not_found &= ct_not_equal_byte (frame[n], 0x00);
+      n0 += not_found;
+    }
+
+  failed |= not_found;
+  n0 += ct_is_zero (not_found); /* Skip the zero byte.  */
+
+  /* To avoid an extra allocation we reuse the frame buffer.  The only
+     caller of this function will anyway free the result soon.  */
+  memmov_independently (frame, frame + n0, nframe - n0, nframe);
+
+  *r_result = frame;
+  *r_resultlen = nframe - n0;
+
+  if (DBG_CIPHER)
+    log_printhex ("value extracted from PKCS#1 block type 2 encoded data",
+                  *r_result, *r_resultlen);
+
+  return (0U - failed) & GPG_ERR_ENCODING_PROBLEM;
+}
+
+
+/* Encode {VALUE,VALUELEN} for an NBITS keys and hash algorithm ALGO
+   using the pkcs#1 block type 1 padding.  On success the result is
+   stored as a new MPI at R_RESULT.  On error the value at R_RESULT is
+   undefined.
+
+   We encode the value in this way:
+
+     0  1  PAD(n bytes)  0  ASN(asnlen bytes) VALUE(valuelen bytes)
+
+   0   is a marker we unfortunately can't encode because we return an
+       MPI which strips all leading zeroes.
+   1   is the block type.
+   PAD consists of 0xff bytes.
+   0   marks the end of the padding.
+   ASN is the DER encoding of the hash algorithm; along with the VALUE
+       it yields a valid DER encoding.
+
+   (Note that PGP prior to version 2.3 encoded the message digest as:
+      0   1   MD(16 bytes)   0   PAD(n bytes)   1
+    The MD is always 16 bytes here because it's always MD5.  GnuPG
+    does not not support pre-v2.3 signatures, but I'm including this
+    comment so the information is easily found if needed.)
+*/
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+                                const unsigned char *value, size_t valuelen,
+                                int algo)
+{
+  gcry_err_code_t rc = 0;
+  byte asn[100];
+  byte *frame = NULL;
+  size_t nframe = (nbits+7) / 8;
+  int i;
+  size_t n;
+  size_t asnlen, dlen;
+
+  asnlen = DIM(asn);
+  dlen = _gcry_md_get_algo_dlen (algo);
+
+  if (_gcry_md_algo_info (algo, GCRYCTL_GET_ASNOID, asn, &asnlen))
+    {
+      /* We don't have yet all of the above algorithms.  */
+      return GPG_ERR_NOT_IMPLEMENTED;
+    }
+
+  if ( valuelen != dlen )
+    {
+      /* Hash value does not match the length of digest for
+         the given algorithm.  */
+      return GPG_ERR_CONFLICT;
+    }
+
+  if ( !dlen || dlen + asnlen + 4 > nframe)
+    {
+      /* Can't encode an DLEN byte digest MD into an NFRAME byte
+         frame.  */
+      return GPG_ERR_TOO_SHORT;
+    }
+
+  if ( !(frame = xtrymalloc (nframe)) )
+    return gpg_err_code_from_syserror ();
+
+  /* Assemble the pkcs#1 block type 1. */
+  n = 0;
+  frame[n++] = 0;
+  frame[n++] = 1; /* block type */
+  i = nframe - valuelen - asnlen - 3 ;
+  gcry_assert (i > 1);
+  memset (frame+n, 0xff, i );
+  n += i;
+  frame[n++] = 0;
+  memcpy (frame+n, asn, asnlen);
+  n += asnlen;
+  memcpy (frame+n, value, valuelen );
+  n += valuelen;
+  gcry_assert (n == nframe);
+
+  /* Convert it into an MPI. */
+  rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
+  if (!rc && DBG_CIPHER)
+    log_mpidump ("PKCS#1 block type 1 encoded data", *r_result);
+  xfree (frame);
+
+  return rc;
+}
+
+/* Encode {VALUE,VALUELEN} for an NBITS keys using the pkcs#1 block
+   type 1 padding.  On success the result is stored as a new MPI at
+   R_RESULT.  On error the value at R_RESULT is undefined.
+
+   We encode the value in this way:
+
+     0  1  PAD(n bytes)  0  VALUE(valuelen bytes)
+
+   0   is a marker we unfortunately can't encode because we return an
+       MPI which strips all leading zeroes.
+   1   is the block type.
+   PAD consists of 0xff bytes.
+   0   marks the end of the padding.
+
+   (Note that PGP prior to version 2.3 encoded the message digest as:
+      0   1   MD(16 bytes)   0   PAD(n bytes)   1
+    The MD is always 16 bytes here because it's always MD5.  GnuPG
+    does not not support pre-v2.3 signatures, but I'm including this
+    comment so the information is easily found if needed.)
+*/
+gpg_err_code_t
+_gcry_rsa_pkcs1_encode_raw_for_sig (gcry_mpi_t *r_result, unsigned int nbits,
+                                const unsigned char *value, size_t valuelen)
+{
+  gcry_err_code_t rc = 0;
+  gcry_error_t err;
+  byte *frame = NULL;
+  size_t nframe = (nbits+7) / 8;
+  int i;
+  size_t n;
+
+  if ( !valuelen || valuelen + 4 > nframe)
+    {
+      /* Can't encode an DLEN byte digest MD into an NFRAME byte
+         frame.  */
+      return GPG_ERR_TOO_SHORT;
+    }
+
+  if ( !(frame = xtrymalloc (nframe)) )
+    return gpg_err_code_from_syserror ();
+
+  /* Assemble the pkcs#1 block type 1. */
+  n = 0;
+  frame[n++] = 0;
+  frame[n++] = 1; /* block type */
+  i = nframe - valuelen - 3 ;
+  gcry_assert (i > 1);
+  memset (frame+n, 0xff, i );
+  n += i;
+  frame[n++] = 0;
+  memcpy (frame+n, value, valuelen );
+  n += valuelen;
+  gcry_assert (n == nframe);
+
+  /* Convert it into an MPI. */
+  err = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, n, &nframe);
+  if (err)
+    rc = gcry_err_code (err);
+  else if (DBG_CIPHER)
+    log_mpidump ("PKCS#1 block type 1 encoded data", *r_result);
+  xfree (frame);
+
+  return rc;
+}
+
+
+/* Mask generation function for OAEP.  See RFC-3447 B.2.1.  */
+static gcry_err_code_t
+mgf1 (unsigned char *output, size_t outlen, unsigned char *seed, size_t 
seedlen,
+      int algo)
+{
+  size_t dlen, nbytes, n;
+  int idx;
+  gcry_md_hd_t hd;
+  gcry_err_code_t err;
+
+  err = _gcry_md_open (&hd, algo, 0);
+  if (err)
+    return err;
+
+  dlen = _gcry_md_get_algo_dlen (algo);
+
+  /* We skip step 1 which would be assert(OUTLEN <= 2^32).  The loop
+     in step 3 is merged with step 4 by concatenating no more octets
+     than what would fit into OUTPUT.  The ceiling for the counter IDX
+     is implemented indirectly.  */
+  nbytes = 0;  /* Step 2.  */
+  idx = 0;
+  while ( nbytes < outlen )
+    {
+      unsigned char c[4], *digest;
+
+      if (idx)
+        _gcry_md_reset (hd);
+
+      c[0] = (idx >> 24) & 0xFF;
+      c[1] = (idx >> 16) & 0xFF;
+      c[2] = (idx >> 8) & 0xFF;
+      c[3] = idx & 0xFF;
+      idx++;
+
+      _gcry_md_write (hd, seed, seedlen);
+      _gcry_md_write (hd, c, 4);
+      digest = _gcry_md_read (hd, 0);
+
+      n = (outlen - nbytes < dlen)? (outlen - nbytes) : dlen;
+      memcpy (output+nbytes, digest, n);
+      nbytes += n;
+    }
+
+  _gcry_md_close (hd);
+  return GPG_ERR_NO_ERROR;
+}
+
+
+/* RFC-3447 (pkcs#1 v2.1) OAEP encoding.  NBITS is the length of the
+   key measured in bits.  ALGO is the hash function; it must be a
+   valid and usable algorithm.  {VALUE,VALUELEN} is the message to
+   encrypt.  {LABEL,LABELLEN} is the optional label to be associated
+   with the message, if LABEL is NULL the default is to use the empty
+   string as label.  On success the encoded ciphertext is returned at
+   R_RESULT.
+
+   If {RANDOM_OVERRIDE, RANDOM_OVERRIDE_LEN} is given it is used as
+   the seed instead of using a random string for it.  This feature is
+   only useful for regression tests.
+
+   Here is figure 1 from the RFC depicting the process:
+
+                             +----------+---------+-------+
+                        DB = |  lHash   |    PS   |   M   |
+                             +----------+---------+-------+
+                                            |
+                  +----------+              V
+                  |   seed   |--> MGF ---> xor
+                  +----------+              |
+                        |                   |
+               +--+     V                   |
+               |00|    xor <----- MGF <-----|
+               +--+     |                   |
+                 |      |                   |
+                 V      V                   V
+               +--+----------+----------------------------+
+         EM =  |00|maskedSeed|          maskedDB          |
+               +--+----------+----------------------------+
+  */
+gpg_err_code_t
+_gcry_rsa_oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+                       const unsigned char *value, size_t valuelen,
+                       const unsigned char *label, size_t labellen,
+                       const void *random_override, size_t random_override_len)
+{
+  gcry_err_code_t rc = 0;
+  unsigned char *frame = NULL;
+  size_t nframe = (nbits+7) / 8;
+  unsigned char *p;
+  size_t hlen;
+  size_t n;
+
+  *r_result = NULL;
+
+  /* Set defaults for LABEL.  */
+  if (!label || !labellen)
+    {
+      label = (const unsigned char*)"";
+      labellen = 0;
+    }
+
+  hlen = _gcry_md_get_algo_dlen (algo);
+
+  /* We skip step 1a which would be to check that LABELLEN is not
+     greater than 2^61-1.  See rfc-3447 7.1.1. */
+
+  /* Step 1b.  Note that the obsolete rfc-2437 uses the check:
+     valuelen > nframe - 2 * hlen - 1 .  */
+  if (valuelen > nframe - 2 * hlen - 2 || !nframe)
+    {
+      /* Can't encode a VALUELEN value in a NFRAME bytes frame. */
+      return GPG_ERR_TOO_SHORT; /* The key is too short.  */
+    }
+
+  /* Allocate the frame.  */
+  frame = xtrycalloc_secure (1, nframe);
+  if (!frame)
+    return gpg_err_code_from_syserror ();
+
+  /* Step 2a: Compute the hash of the label.  We store it in the frame
+     where later the maskedDB will commence.  */
+  _gcry_md_hash_buffer (algo, frame + 1 + hlen, label, labellen);
+
+  /* Step 2b: Set octet string to zero.  */
+  /* This has already been done while allocating FRAME.  */
+
+  /* Step 2c: Create DB by concatenating lHash, PS, 0x01 and M.  */
+  n = nframe - valuelen - 1;
+  frame[n] = 0x01;
+  memcpy (frame + n + 1, value, valuelen);
+
+  /* Step 3d: Generate seed.  We store it where the maskedSeed will go
+     later. */
+  if (random_override)
+    {
+      if (random_override_len != hlen)
+        {
+          xfree (frame);
+          return GPG_ERR_INV_ARG;
+        }
+      memcpy (frame + 1, random_override, hlen);
+    }
+  else
+    _gcry_randomize (frame + 1, hlen, GCRY_STRONG_RANDOM);
+
+  /* Step 2e and 2f: Create maskedDB.  */
+  {
+    unsigned char *dmask;
+
+    dmask = xtrymalloc_secure (nframe - hlen - 1);
+    if (!dmask)
+      {
+        rc = gpg_err_code_from_syserror ();
+        xfree (frame);
+        return rc;
+      }
+    rc = mgf1 (dmask, nframe - hlen - 1, frame+1, hlen, algo);
+    if (rc)
+      {
+        xfree (dmask);
+        xfree (frame);
+        return rc;
+      }
+    for (n = 1 + hlen, p = dmask; n < nframe; n++)
+      frame[n] ^= *p++;
+    xfree (dmask);
+  }
+
+  /* Step 2g and 2h: Create maskedSeed.  */
+  {
+    unsigned char *smask;
+
+    smask = xtrymalloc_secure (hlen);
+    if (!smask)
+      {
+        rc = gpg_err_code_from_syserror ();
+        xfree (frame);
+        return rc;
+      }
+    rc = mgf1 (smask, hlen, frame + 1 + hlen, nframe - hlen - 1, algo);
+    if (rc)
+      {
+        xfree (smask);
+        xfree (frame);
+        return rc;
+      }
+    for (n = 1, p = smask; n < 1 + hlen; n++)
+      frame[n] ^= *p++;
+    xfree (smask);
+  }
+
+  /* Step 2i: Concatenate 0x00, maskedSeed and maskedDB.  */
+  /* This has already been done by using in-place operations.  */
+
+  /* Convert the stuff into an MPI as expected by the caller.  */
+  rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, frame, nframe, NULL);
+  if (!rc && DBG_CIPHER)
+    log_mpidump ("OAEP encoded data", *r_result);
+  xfree (frame);
+
+  return rc;
+}
+
+
+/* RFC-3447 (pkcs#1 v2.1) OAEP decoding.  NBITS is the length of the
+   key measured in bits.  ALGO is the hash function; it must be a
+   valid and usable algorithm.  VALUE is the raw decrypted message
+   {LABEL,LABELLEN} is the optional label to be associated with the
+   message, if LABEL is NULL the default is to use the empty string as
+   label.  On success the plaintext is returned as a newly allocated
+   buffer at R_RESULT; its valid length is stored at R_RESULTLEN.  On
+   error NULL is stored at R_RESULT.  */
+gpg_err_code_t
+_gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen,
+                       unsigned int nbits, int algo,
+                       gcry_mpi_t value,
+                       const unsigned char *label, size_t labellen)
+{
+  gcry_err_code_t rc;
+  unsigned char *frame = NULL; /* Encoded messages (EM).  */
+  unsigned char *masked_seed;  /* Points into FRAME.  */
+  unsigned char *masked_db;    /* Points into FRAME.  */
+  unsigned char *seed = NULL;  /* Allocated space for the seed and DB.  */
+  unsigned char *db;           /* Points into SEED.  */
+  unsigned char *lhash = NULL; /* Hash of the label.  */
+  size_t nframe;               /* Length of the ciphertext (EM).  */
+  size_t hlen;                 /* Length of the hash digest.  */
+  size_t db_len;               /* Length of DB and masked_db.  */
+  size_t nkey = (nbits+7)/8;   /* Length of the key in bytes.  */
+  int failed = 0;              /* Error indicator.  */
+  size_t n, n1;
+  unsigned int not_found = 1;
+
+  *r_result = NULL;
+
+  /* This code is implemented as described by rfc-3447 7.1.2.  */
+
+  /* Set defaults for LABEL.  */
+  if (!label || !labellen)
+    {
+      label = (const unsigned char*)"";
+      labellen = 0;
+    }
+
+  /* Get the length of the digest.  */
+  hlen = _gcry_md_get_algo_dlen (algo);
+
+  /* Hash the label right away.  */
+  lhash = xtrymalloc (hlen);
+  if (!lhash)
+    return gpg_err_code_from_syserror ();
+  _gcry_md_hash_buffer (algo, lhash, label, labellen);
+
+  /* Turn the MPI into an octet string.  If the octet string is
+     shorter than the key we pad it to the left with zeroes.  This may
+     happen due to the leading zero in OAEP frames and due to the
+     following random octets (seed^mask) which may have leading zero
+     bytes.  This all is needed to cope with our leading zeroes
+     suppressing MPI implementation.  The code implictly implements
+     Step 1b (bail out if NFRAME != N).  */
+  rc = octet_string_from_mpi (&frame, NULL, value, nkey);
+  if (rc)
+    {
+      xfree (lhash);
+      return GPG_ERR_ENCODING_PROBLEM;
+    }
+  nframe = nkey;
+
+  /* Step 1c: Check that the key is long enough.  */
+  if ( nframe < 2 * hlen + 2 )
+    {
+      xfree (frame);
+      xfree (lhash);
+      return GPG_ERR_ENCODING_PROBLEM;
+    }
+
+  /* Step 2 has already been done by the caller and the
+     gcry_mpi_aprint above.  */
+
+  /* Allocate space for SEED and DB.  */
+  seed = xtrymalloc_secure (nframe - 1);
+  if (!seed)
+    {
+      rc = gpg_err_code_from_syserror ();
+      xfree (frame);
+      xfree (lhash);
+      return rc;
+    }
+  db = seed + hlen;
+
+  /* To avoid chosen ciphertext attacks from now on we make sure to
+     run all code even in the error case; this avoids possible timing
+     attacks as described by Manger.  */
+
+  /* Step 3a: Hash the label.  */
+  /* This has already been done.  */
+
+  /* Step 3b: Separate the encoded message.  */
+  masked_seed = frame + 1;
+  masked_db   = frame + 1 + hlen;
+  db_len      = nframe - 1 - hlen;
+
+  /* Step 3c and 3d: seed = maskedSeed ^ mgf(maskedDB, hlen).  */
+  failed |= (mgf1 (seed, hlen, masked_db, db_len, algo) != 0);
+  for (n = 0; n < hlen; n++)
+    seed[n] ^= masked_seed[n];
+
+  /* Step 3e and 3f: db = maskedDB ^ mgf(seed, db_len).  */
+  failed |= (mgf1 (db, db_len, seed, hlen, algo) != 0);
+  for (n = 0; n < db_len; n++)
+    db[n] ^= masked_db[n];
+
+  /* Step 3g: Check lhash, an possible empty padding string terminated
+     by 0x01 and the first byte of EM being 0.  */
+  failed |= ct_not_memequal (lhash, db, hlen);
+  for (n = n1 = hlen; n < db_len; n++)
+    {
+      not_found &= ct_not_equal_byte (db[n], 0x01);
+      n1 += not_found;
+    }
+  failed |= not_found;
+  failed |= ct_not_equal_byte (frame[0], 0x00);
+
+  xfree (lhash);
+  xfree (frame);
+
+  /* Step 4: Output M.  */
+  /* To avoid an extra allocation we reuse the seed buffer.  The only
+     caller of this function will anyway free the result soon.  */
+  n1 += !not_found;
+  memmov_independently (seed, db + n1, db_len - n1, nframe - 1);
+  *r_result = seed;
+  *r_resultlen = db_len - n1;
+  seed = NULL;
+
+  if (DBG_CIPHER)
+    log_printhex ("value extracted from OAEP encoded data",
+                  *r_result, *r_resultlen);
+
+  return (0U - failed) & GPG_ERR_ENCODING_PROBLEM;
+}
+
+
+/* RFC-3447 (pkcs#1 v2.1) PSS encoding.  Encode {VALUE,VALUELEN} for
+   an NBITS key.  ALGO is a valid hash algorithm and SALTLEN is the
+   length of salt to be used.  When HASHED_ALREADY is set, VALUE is
+   already the mHash from the picture below.  Otherwise, VALUE is M.
+
+   On success the result is stored as a new MPI at R_RESULT.  On error
+   the value at R_RESULT is undefined.
+
+   If RANDOM_OVERRIDE is given it is used as the salt instead of using
+   a random string for the salt.  This feature is only useful for
+   regression tests.
+
+   Here is figure 2 from the RFC (errata 595 applied) depicting the
+   process:
+
+                                  +-----------+
+                                  |     M     |
+                                  +-----------+
+                                        |
+                                        V
+                                      Hash
+                                        |
+                                        V
+                          +--------+----------+----------+
+                     M' = |Padding1|  mHash   |   salt   |
+                          +--------+----------+----------+
+                                         |
+               +--------+----------+     V
+         DB =  |Padding2| salt     |   Hash
+               +--------+----------+     |
+                         |               |
+                         V               |    +----+
+                        xor <--- MGF <---|    |0xbc|
+                         |               |    +----+
+                         |               |      |
+                         V               V      V
+               +-------------------+----------+----+
+         EM =  |    maskedDB       |     H    |0xbc|
+               +-------------------+----------+----+
+
+  */
+gpg_err_code_t
+_gcry_rsa_pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo,
+                      int saltlen, int hashed_already,
+                      const unsigned char *value, size_t valuelen,
+                      const void *random_override)
+{
+  gcry_err_code_t rc = 0;
+  gcry_md_hd_t hd = NULL;
+  unsigned char *digest;
+  size_t hlen;                 /* Length of the hash digest.  */
+  unsigned char *em = NULL;    /* Encoded message.  */
+  size_t emlen = (nbits+7)/8;  /* Length in bytes of EM.  */
+  unsigned char *h;            /* Points into EM.  */
+  unsigned char *buf = NULL;   /* Help buffer.  */
+  size_t buflen;               /* Length of BUF.  */
+  unsigned char *mhash;        /* Points into BUF.  */
+  unsigned char *salt;         /* Points into BUF.  */
+  unsigned char *dbmask;       /* Points into BUF.  */
+  unsigned char *p;
+  size_t n;
+
+
+  /* This code is implemented as described by rfc-3447 9.1.1.  */
+
+  rc = _gcry_md_open (&hd, algo, 0);
+  if (rc)
+    return rc;
+
+  /* Get the length of the digest.  */
+  hlen = _gcry_md_get_algo_dlen (algo);
+  gcry_assert (hlen);  /* We expect a valid ALGO here.  */
+
+  /* The FIPS 186-4 Section 5.5 allows only 0 <= sLen <= hLen */
+  if (fips_mode () && saltlen > hlen)
+    {
+      rc = GPG_ERR_INV_ARG;
+      goto leave;
+    }
+
+  /* Allocate a help buffer and setup some pointers.  */
+  buflen = 8 + hlen + saltlen + (emlen - hlen - 1);
+  buf = xtrymalloc (buflen);
+  if (!buf)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+  mhash = buf + 8;
+  salt  = mhash + hlen;
+  dbmask= salt + saltlen;
+
+  /* Step 2: mHash = Hash(M) (or copy input to mHash, if already hashed).   */
+  if (!hashed_already)
+    {
+      _gcry_md_write (hd, value, valuelen);
+      digest = _gcry_md_read (hd, 0);
+      memcpy (mhash, digest, hlen);
+      _gcry_md_reset (hd);
+    }
+  else
+    {
+      if (valuelen != hlen)
+        {
+          rc = GPG_ERR_INV_LENGTH;
+          goto leave;
+        }
+      memcpy (mhash, value, hlen);
+    }
+
+  /* Step 3: Check length constraints.  */
+  if (emlen < hlen + saltlen + 2)
+    {
+      rc = GPG_ERR_TOO_SHORT;
+      goto leave;
+    }
+
+  /* Allocate space for EM.  */
+  em = xtrymalloc (emlen);
+  if (!em)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+  h = em + emlen - 1 - hlen;
+
+  /* Step 4: Create a salt.  */
+  if (saltlen)
+    {
+      if (random_override)
+        memcpy (salt, random_override, saltlen);
+      else
+        _gcry_randomize (salt, saltlen, GCRY_STRONG_RANDOM);
+    }
+
+  /* Step 5 and 6: M' = Hash(Padding1 || mHash || salt).  */
+  memset (buf, 0, 8);  /* Padding.  */
+
+  _gcry_md_write (hd, buf, 8 + hlen + saltlen);
+  digest = _gcry_md_read (hd, 0);
+  memcpy (h, digest, hlen);
+
+  /* Step 7 and 8: DB = PS || 0x01 || salt.  */
+  /* Note that we use EM to store DB and later Xor in-place.  */
+  p = em + emlen - 1 - hlen - saltlen - 1;
+  memset (em, 0, p - em);
+  *p++ = 0x01;
+  memcpy (p, salt, saltlen);
+
+  /* Step 9: dbmask = MGF(H, emlen - hlen - 1).  */
+  mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo);
+
+  /* Step 10: maskedDB = DB ^ dbMask */
+  for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++)
+    em[n] ^= *p;
+
+  /* Step 11: Set the leftmost bits to zero.  */
+  em[0] &= 0xFF >> (8 * emlen - nbits);
+
+  /* Step 12: EM = maskedDB || H || 0xbc.  */
+  em[emlen-1] = 0xbc;
+
+  /* Convert EM into an MPI.  */
+  rc = _gcry_mpi_scan (r_result, GCRYMPI_FMT_USG, em, emlen, NULL);
+  if (!rc && DBG_CIPHER)
+    log_mpidump ("PSS encoded data", *r_result);
+
+ leave:
+  _gcry_md_close (hd);
+  if (em)
+    {
+      wipememory (em, emlen);
+      xfree (em);
+    }
+  if (buf)
+    {
+      wipememory (buf, buflen);
+      xfree (buf);
+    }
+  return rc;
+}
+
+
+/* Verify a signature assuming PSS padding.  When HASHED_ALREADY is
+   set, VALUE is the hash of the message (mHash); its length must
+   match the digest length of ALGO.  Otherwise, its M (before mHash).
+   VALUE is an opaque MPI.  ENCODED is the output of the RSA public
+   key function (EM).  NBITS is the size of the public key.  ALGO is
+   the hash algorithm and SALTLEN is the length of the used salt.  The
+   function returns 0 on success or on error code.  */
+gpg_err_code_t
+_gcry_rsa_pss_verify (gcry_mpi_t value, int hashed_already,
+                      gcry_mpi_t encoded,
+                      unsigned int nbits, int algo, size_t saltlen)
+{
+  gcry_err_code_t rc = 0;
+  gcry_md_hd_t hd = NULL;
+  unsigned char *digest;
+  size_t hlen;                 /* Length of the hash digest.  */
+  unsigned char *em = NULL;    /* Encoded message.  */
+  size_t emlen = (nbits+7)/8;  /* Length in bytes of EM.  */
+  unsigned char *salt;         /* Points into EM.  */
+  unsigned char *h;            /* Points into EM.  */
+  unsigned char *buf = NULL;   /* Help buffer.  */
+  size_t buflen;               /* Length of BUF.  */
+  unsigned char *dbmask;       /* Points into BUF.  */
+  unsigned char *mhash;        /* Points into BUF.  */
+  unsigned char *p;
+  size_t n;
+  unsigned int input_nbits;
+
+  /* This code is implemented as described by rfc-3447 9.1.2.  */
+
+  rc = _gcry_md_open (&hd, algo, 0);
+  if (rc)
+    return rc;
+
+  /* Get the length of the digest.  */
+  hlen = _gcry_md_get_algo_dlen (algo);
+  gcry_assert (hlen);  /* We expect a valid ALGO here.  */
+
+  /* The FIPS 186-4 Section 5.5 allows only 0 <= sLen <= hLen */
+  if (fips_mode () && saltlen > hlen)
+    {
+      rc = GPG_ERR_INV_ARG;
+      goto leave;
+    }
+
+  /* Allocate a help buffer and setup some pointers.
+     This buffer is used for two purposes:
+        +------------------------------+-------+
+     1. | dbmask                       | mHash |
+        +------------------------------+-------+
+           emlen - hlen - 1              hlen
+
+        +----------+-------+---------+-+-------+
+     2. | padding1 | mHash | salt    | | mHash |
+        +----------+-------+---------+-+-------+
+             8       hlen    saltlen     hlen
+  */
+  buflen = 8 + hlen + saltlen;
+  if (buflen < emlen - hlen - 1)
+    buflen = emlen - hlen - 1;
+  buflen += hlen;
+  buf = xtrymalloc (buflen);
+  if (!buf)
+    {
+      rc = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+  dbmask = buf;
+  mhash = buf + buflen - hlen;
+
+  /* Step 2: mHash = Hash(M) (or copy input to mHash, if already hashed).   */
+  p = mpi_get_opaque (value, &input_nbits);
+  if (!p)
+    {
+      rc = GPG_ERR_INV_ARG;
+      goto leave;
+    }
+
+  if (!hashed_already)
+    {
+      _gcry_md_write (hd, p, (input_nbits+7)/8);
+      digest = _gcry_md_read (hd, 0);
+      memcpy (mhash, digest, hlen);
+      _gcry_md_reset (hd);
+    }
+  else
+    memcpy (mhash, p, hlen);
+
+  /* Convert the signature into an octet string.  */
+  rc = octet_string_from_mpi (&em, NULL, encoded, emlen);
+  if (rc)
+    goto leave;
+
+  /* Step 3: Check length of EM.  Because we internally use MPI
+     functions we can't do this properly; EMLEN is always the length
+     of the key because octet_string_from_mpi needs to left pad the
+     result with zero to cope with the fact that our MPIs suppress all
+     leading zeroes.  Thus what we test here are merely the digest and
+     salt lengths to the key.  */
+  if (emlen < hlen + saltlen + 2)
+    {
+      rc = GPG_ERR_TOO_SHORT; /* For the hash and saltlen.  */
+      goto leave;
+    }
+
+  /* Step 4: Check last octet.  */
+  if (em[emlen - 1] != 0xbc)
+    {
+      rc = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+
+  /* Step 5: Split EM.  */
+  h = em + emlen - 1 - hlen;
+
+  /* Step 6: Check the leftmost bits.  */
+  if ((em[0] & ~(0xFF >> (8 * emlen - nbits))))
+    {
+      rc = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+
+  /* Step 7: dbmask = MGF(H, emlen - hlen - 1).  */
+  mgf1 (dbmask, emlen - hlen - 1, h, hlen, algo);
+
+  /* Step 8: maskedDB = DB ^ dbMask.  */
+  for (n = 0, p = dbmask; n < emlen - hlen - 1; n++, p++)
+    em[n] ^= *p;
+
+  /* Step 9: Set leftmost bits in DB to zero.  */
+  em[0] &= 0xFF >> (8 * emlen - nbits);
+
+  /* Step 10: Check the padding of DB.  */
+  for (n = 0; n < emlen - hlen - saltlen - 2 && !em[n]; n++)
+    ;
+  if (n != emlen - hlen - saltlen - 2 || em[n++] != 1)
+    {
+      rc = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
+
+  /* Step 11: Extract salt from DB.  */
+  salt = em + n;
+
+  /* Step 12:  M' = (0x)00 00 00 00 00 00 00 00 || mHash || salt */
+  memset (buf, 0, 8);
+  memcpy (buf+8, mhash, hlen);
+  memcpy (buf+8+hlen, salt, saltlen);
+
+  /* Step 13:  H' = Hash(M').  */
+  _gcry_md_write (hd, buf, 8 + hlen + saltlen);
+  digest = _gcry_md_read (hd, 0);
+  memcpy (buf, digest, hlen);
+
+  /* Step 14:  Check H == H'.   */
+  rc = memcmp (h, buf, hlen) ? GPG_ERR_BAD_SIGNATURE : GPG_ERR_NO_ERROR;
+
+ leave:
+  _gcry_md_close (hd);
+  if (em)
+    {
+      wipememory (em, emlen);
+      xfree (em);
+    }
+  if (buf)
+    {
+      wipememory (buf, buflen);
+      xfree (buf);
+    }
+  return rc;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/rsa.c 
b/grub-core/lib/libgcrypt/cipher/rsa.c
index ccc9f9645..ff66e6f8f 100644
--- a/grub-core/lib/libgcrypt/cipher/rsa.c
+++ b/grub-core/lib/libgcrypt/cipher/rsa.c
@@ -32,6 +32,8 @@
 #include "g10lib.h"
 #include "mpi.h"
 #include "cipher.h"
+#include "pubkey-internal.h"
+#include "const-time.h"
 
 
 typedef struct
@@ -52,42 +54,69 @@ typedef struct
 } RSA_secret_key;
 
 
-/* A sample 1024 bit RSA key used for the selftests.  */
-static const char sample_secret_key[] =
-"(private-key"
-" (rsa"
-"  (n #00e0ce96f90b6c9e02f3922beada93fe50a875eac6bcc18bb9a9cf2e84965caa"
-"      2d1ff95a7f542465c6c0c19d276e4526ce048868a7a914fd343cc3a87dd74291"
-"      ffc565506d5bbb25cbac6a0e2dd1f8bcaab0d4a29c2f37c950f363484bf269f7"
-"      891440464baf79827e03a36e70b814938eebdc63e964247be75dc58b014b7ea251#)"
-"  (e #010001#)"
-"  (d #046129f2489d71579be0a75fe029bd6cdb574ebf57ea8a5b0fda942cab943b11"
-"      7d7bb95e5d28875e0f9fc5fcc06a72f6d502464dabded78ef6b716177b83d5bd"
-"      c543dc5d3fed932e59f5897e92e6f58a0f33424106a3b6fa2cbf877510e4ac21"
-"      c3ee47851e97d12996222ac3566d4ccb0b83d164074abf7de655fc2446da1781#)"
-"  (p #00e861b700e17e8afe6837e7512e35b6ca11d0ae47d8b85161c67baf64377213"
-"      fe52d772f2035b3ca830af41d8a4120e1c1c70d12cc22f00d28d31dd48a8d424f1#)"
-"  (q #00f7a7ca5367c661f8e62df34f0d05c10c88e5492348dd7bddc942c9a8f369f9"
-"      35a07785d2db805215ed786e4285df1658eed3ce84f469b81b50d358407b4ad361#)"
-"  (u #304559a9ead56d2309d203811a641bb1a09626bc8eb36fffa23c968ec5bd891e"
-"      ebbafc73ae666e01ba7c8990bae06cc2bbe10b75e69fcacb353a6473079d8e9b#)))";
-/* A sample 1024 bit RSA key used for the selftests (public only).  */
-static const char sample_public_key[] =
-"(public-key"
-" (rsa"
-"  (n #00e0ce96f90b6c9e02f3922beada93fe50a875eac6bcc18bb9a9cf2e84965caa"
-"      2d1ff95a7f542465c6c0c19d276e4526ce048868a7a914fd343cc3a87dd74291"
-"      ffc565506d5bbb25cbac6a0e2dd1f8bcaab0d4a29c2f37c950f363484bf269f7"
-"      891440464baf79827e03a36e70b814938eebdc63e964247be75dc58b014b7ea251#)"
-"  (e #010001#)))";
+static const char *rsa_names[] =
+  {
+    "rsa",
+    "openpgp-rsa",
+    "oid.1.2.840.113549.1.1.1",
+    NULL,
+  };
 
 
+/* A sample 2048 bit RSA key used for the selftests.  */
+static const char sample_secret_key[] =
+" (private-key"
+"  (rsa"
+"  (n #009F56231A3D82E3E7D613D59D53E9AB921BEF9F08A782AED0B6E46ADBC853EC"
+"      7C71C422435A3CD8FA0DB9EFD55CD3295BADC4E8E2E2B94E15AE82866AB8ADE8"
+"      7E469FAE76DC3577DE87F1F419C4EB41123DFAF8D16922D5EDBAD6E9076D5A1C"
+"      958106F0AE5E2E9193C6B49124C64C2A241C4075D4AF16299EB87A6585BAE917"
+"      DEF27FCDD165764D069BC18D16527B29DAAB549F7BBED4A7C6A842D203ED6613"
+"      6E2411744E432CD26D940132F25874483DCAEECDFD95744819CBCF1EA810681C"
+"      42907EBCB1C7EAFBE75C87EC32C5413EA10476545D3FC7B2ADB1B66B7F200918"
+"      664B0E5261C2895AA28B0DE321E921B3F877172CCCAB81F43EF98002916156F6CB#)"
+"   (e #010001#)"
+"   (d #07EF82500C403899934FE993AC5A36F14FF2DF38CF1EF315F205EE4C83EDAA19"
+"       8890FC23DE9AA933CAFB37B6A8A8DBA675411958337287310D3FF2F1DDC0CB93"
+"       7E70F57F75F833C021852B631D2B9A520E4431A03C5C3FCB5742DCD841D9FB12"
+"       771AA1620DCEC3F1583426066ED9DC3F7028C5B59202C88FDF20396E2FA0EC4F"
+"       5A22D9008F3043673931BC14A5046D6327398327900867E39CC61B2D1AFE2F48"
+"       EC8E1E3861C68D257D7425F4E6F99ABD77D61F10CA100EFC14389071831B33DD"
+"       69CC8EABEF860D1DC2AAA84ABEAE5DFC91BC124DAF0F4C8EF5BBEA436751DE84"
+"       3A8063E827A024466F44C28614F93B0732A100D4A0D86D532FE1E22C7725E401#)"
+"   (p #00C29D438F115825779631CD665A5739367F3E128ADC29766483A46CA80897E0"
+"       79B32881860B8F9A6A04C2614A904F6F2578DAE13EA67CD60AE3D0AA00A1FF9B"
+"       441485E44B2DC3D0B60260FBFE073B5AC72FAF67964DE15C8212C389D20DB9CF"
+"       54AF6AEF5C4196EAA56495DD30CF709F499D5AB30CA35E086C2A1589D6283F1783#)"
+"   (q #00D1984135231CB243FE959C0CBEF551EDD986AD7BEDF71EDF447BE3DA27AF46"
+"       79C974A6FA69E4D52FE796650623DE70622862713932AA2FD9F2EC856EAEAA77"
+"       88B4EA6084DC81C902F014829B18EA8B2666EC41586818E0589E18876065F97E"
+"       8D22CE2DA53A05951EC132DCEF41E70A9C35F4ACC268FFAC2ADF54FA1DA110B919#)"
+"   (u #67CF0FD7635205DD80FA814EE9E9C267C17376BF3209FB5D1BC42890D2822A04"
+"       479DAF4D5B6ED69D0F8D1AF94164D07F8CD52ECEFE880641FA0F41DDAB1785E4"
+"       A37A32F997A516480B4CD4F6482B9466A1765093ED95023CA32D5EDC1E34CEE9"
+"       AF595BC51FE43C4BF810FA225AF697FB473B83815966188A4312C048B885E3F7#)))";
+
+/* A sample 2048 bit RSA key used for the selftests (public only).  */
+static const char sample_public_key[] =
+" (public-key"
+"  (rsa"
+"   (n #009F56231A3D82E3E7D613D59D53E9AB921BEF9F08A782AED0B6E46ADBC853EC"
+"       7C71C422435A3CD8FA0DB9EFD55CD3295BADC4E8E2E2B94E15AE82866AB8ADE8"
+"       7E469FAE76DC3577DE87F1F419C4EB41123DFAF8D16922D5EDBAD6E9076D5A1C"
+"       958106F0AE5E2E9193C6B49124C64C2A241C4075D4AF16299EB87A6585BAE917"
+"       DEF27FCDD165764D069BC18D16527B29DAAB549F7BBED4A7C6A842D203ED6613"
+"       6E2411744E432CD26D940132F25874483DCAEECDFD95744819CBCF1EA810681C"
+"       42907EBCB1C7EAFBE75C87EC32C5413EA10476545D3FC7B2ADB1B66B7F200918"
+"       664B0E5261C2895AA28B0DE321E921B3F877172CCCAB81F43EF98002916156F6CB#)"
+"   (e #010001#)))";
 
 
 static int test_keys (RSA_secret_key *sk, unsigned nbits);
 static int  check_secret_key (RSA_secret_key *sk);
 static void public (gcry_mpi_t output, gcry_mpi_t input, RSA_public_key *skey);
 static void secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey);
+static unsigned int rsa_get_nbits (gcry_sexp_t parms);
 
 
 /* Check that a freshly generated key actually works.  Returns 0 on success. */
@@ -96,59 +125,102 @@ test_keys (RSA_secret_key *sk, unsigned int nbits)
 {
   int result = -1; /* Default to failure.  */
   RSA_public_key pk;
-  gcry_mpi_t plaintext = gcry_mpi_new (nbits);
-  gcry_mpi_t ciphertext = gcry_mpi_new (nbits);
-  gcry_mpi_t decr_plaintext = gcry_mpi_new (nbits);
-  gcry_mpi_t signature = gcry_mpi_new (nbits);
+  gcry_mpi_t plaintext = mpi_new (nbits);
+  gcry_mpi_t ciphertext = mpi_new (nbits);
+  gcry_mpi_t decr_plaintext = mpi_new (nbits);
+  gcry_mpi_t signature = mpi_new (nbits);
 
   /* Put the relevant parameters into a public key structure.  */
   pk.n = sk->n;
   pk.e = sk->e;
 
   /* Create a random plaintext.  */
-  gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM);
+  _gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM);
 
   /* Encrypt using the public key.  */
   public (ciphertext, plaintext, &pk);
 
   /* Check that the cipher text does not match the plaintext.  */
-  if (!gcry_mpi_cmp (ciphertext, plaintext))
+  if (!mpi_cmp (ciphertext, plaintext))
     goto leave; /* Ciphertext is identical to the plaintext.  */
 
   /* Decrypt using the secret key.  */
   secret (decr_plaintext, ciphertext, sk);
 
   /* Check that the decrypted plaintext matches the original plaintext.  */
-  if (gcry_mpi_cmp (decr_plaintext, plaintext))
+  if (mpi_cmp (decr_plaintext, plaintext))
     goto leave; /* Plaintext does not match.  */
 
   /* Create another random plaintext as data for signature checking.  */
-  gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM);
+  _gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM);
 
   /* Use the RSA secret function to create a signature of the plaintext.  */
   secret (signature, plaintext, sk);
 
   /* Use the RSA public function to verify this signature.  */
   public (decr_plaintext, signature, &pk);
-  if (gcry_mpi_cmp (decr_plaintext, plaintext))
+  if (mpi_cmp (decr_plaintext, plaintext))
     goto leave; /* Signature does not match.  */
 
   /* Modify the signature and check that the signing fails.  */
-  gcry_mpi_add_ui (signature, signature, 1);
+  mpi_add_ui (signature, signature, 1);
   public (decr_plaintext, signature, &pk);
-  if (!gcry_mpi_cmp (decr_plaintext, plaintext))
+  if (!mpi_cmp (decr_plaintext, plaintext))
     goto leave; /* Signature matches but should not.  */
 
   result = 0; /* All tests succeeded.  */
 
  leave:
-  gcry_mpi_release (signature);
-  gcry_mpi_release (decr_plaintext);
-  gcry_mpi_release (ciphertext);
-  gcry_mpi_release (plaintext);
+  _gcry_mpi_release (signature);
+  _gcry_mpi_release (decr_plaintext);
+  _gcry_mpi_release (ciphertext);
+  _gcry_mpi_release (plaintext);
   return result;
 }
 
+static int
+test_keys_fips (gcry_sexp_t skey)
+{
+  int result = -1; /* Default to failure.  */
+  char plaintext[128];
+  gcry_sexp_t sig = NULL;
+  const char *data_tmpl = "(data (flags pkcs1) (hash %s %b))";
+  gcry_md_hd_t hd = NULL;
+  int ec;
+
+  /* Create a random plaintext.  */
+  _gcry_randomize (plaintext, sizeof plaintext, GCRY_WEAK_RANDOM);
+
+  /* Open MD context and feed the random data in */
+  ec = _gcry_md_open (&hd, GCRY_MD_SHA256, 0);
+  if (ec)
+    goto leave;
+  _gcry_md_write (hd, plaintext, sizeof(plaintext));
+
+  /* Use the RSA secret function to create a signature of the plaintext.  */
+  ec = _gcry_pk_sign_md (&sig, data_tmpl, hd, skey, NULL);
+  if (ec)
+    goto leave;
+
+  /* Use the RSA public function to verify this signature.  */
+  ec = _gcry_pk_verify_md (sig, data_tmpl, hd, skey, NULL);
+  if (ec)
+    goto leave;
+
+  /* Modify the data and check that the signing fails.  */
+  _gcry_md_reset(hd);
+  plaintext[sizeof plaintext / 2] ^= 1;
+  _gcry_md_write (hd, plaintext, sizeof(plaintext));
+  ec = _gcry_pk_verify_md (sig, data_tmpl, hd, skey, NULL);
+  if (ec != GPG_ERR_BAD_SIGNATURE)
+    goto leave; /* Signature verification worked on modified data  */
+
+  result = 0; /* All tests succeeded.  */
+ leave:
+  sexp_release (sig);
+  _gcry_md_close (hd);
+  return result;
+}
 
 /* Callback used by the prime generation to test whether the exponent
    is suitable. Returns 0 if the test has been passed. */
@@ -161,8 +233,8 @@ check_exponent (void *arg, gcry_mpi_t a)
 
   mpi_sub_ui (a, a, 1);
   tmp = _gcry_mpi_alloc_like (a);
-  result = !gcry_mpi_gcd(tmp, e, a); /* GCD is not 1. */
-  gcry_mpi_release (tmp);
+  result = !mpi_gcd(tmp, e, a); /* GCD is not 1. */
+  _gcry_mpi_release (tmp);
   mpi_add_ui (a, a, 1);
   return result;
 }
@@ -192,14 +264,6 @@ generate_std (RSA_secret_key *sk, unsigned int nbits, 
unsigned long use_e,
   gcry_mpi_t f;
   gcry_random_level_t random_level;
 
-  if (fips_mode ())
-    {
-      if (nbits < 1024)
-        return GPG_ERR_INV_VALUE;
-      if (transient_key)
-        return GPG_ERR_INV_VALUE;
-    }
-
   /* The random quality depends on the transient_key flag.  */
   random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM;
 
@@ -228,16 +292,16 @@ generate_std (RSA_secret_key *sk, unsigned int nbits, 
unsigned long use_e,
       mpi_set_ui (e, use_e);
     }
 
-  n = gcry_mpi_new (nbits);
+  n = mpi_new (nbits);
 
   p = q = NULL;
   do
     {
       /* select two (very secret) primes */
       if (p)
-        gcry_mpi_release (p);
+        _gcry_mpi_release (p);
       if (q)
-        gcry_mpi_release (q);
+        _gcry_mpi_release (q);
       if (use_e)
         { /* Do an extra test to ensure that the given exponent is
              suitable. */
@@ -261,16 +325,16 @@ generate_std (RSA_secret_key *sk, unsigned int nbits, 
unsigned long use_e,
   /* calculate Euler totient: phi = (p-1)(q-1) */
   t1 = mpi_alloc_secure( mpi_get_nlimbs(p) );
   t2 = mpi_alloc_secure( mpi_get_nlimbs(p) );
-  phi = gcry_mpi_snew ( nbits );
-  g    = gcry_mpi_snew ( nbits );
-  f    = gcry_mpi_snew ( nbits );
+  phi   = mpi_snew ( nbits );
+  g    = mpi_snew ( nbits );
+  f    = mpi_snew ( nbits );
   mpi_sub_ui( t1, p, 1 );
   mpi_sub_ui( t2, q, 1 );
   mpi_mul( phi, t1, t2 );
-  gcry_mpi_gcd(g, t1, t2);
+  mpi_gcd (g, t1, t2);
   mpi_fdiv_q(f, phi, g);
 
-  while (!gcry_mpi_gcd(t1, e, phi)) /* (while gcd is not 1) */
+  while (!mpi_gcd(t1, e, phi)) /* (while gcd is not 1) */
     {
       if (use_e)
         BUG (); /* The prime generator already made sure that we
@@ -278,11 +342,11 @@ generate_std (RSA_secret_key *sk, unsigned int nbits, 
unsigned long use_e,
       mpi_add_ui (e, e, 2);
     }
 
-  /* calculate the secret key d = e^1 mod phi */
-  d = gcry_mpi_snew ( nbits );
-  mpi_invm(d, e, f );
+  /* calculate the secret key d = e^-1 mod phi */
+  d = mpi_snew ( nbits );
+  mpi_invm (d, e, f );
   /* calculate the inverse of p and q (used for chinese remainder theorem)*/
-  u = gcry_mpi_snew ( nbits );
+  u = mpi_snew ( nbits );
   mpi_invm(u, p, q );
 
   if( DBG_CIPHER )
@@ -298,11 +362,11 @@ generate_std (RSA_secret_key *sk, unsigned int nbits, 
unsigned long use_e,
       log_mpidump("  u= ", u );
     }
 
-  gcry_mpi_release (t1);
-  gcry_mpi_release (t2);
-  gcry_mpi_release (phi);
-  gcry_mpi_release (f);
-  gcry_mpi_release (g);
+  _gcry_mpi_release (t1);
+  _gcry_mpi_release (t2);
+  _gcry_mpi_release (phi);
+  _gcry_mpi_release (f);
+  _gcry_mpi_release (g);
 
   sk->n = n;
   sk->e = e;
@@ -314,12 +378,12 @@ generate_std (RSA_secret_key *sk, unsigned int nbits, 
unsigned long use_e,
   /* Now we can test our keys. */
   if (test_keys (sk, nbits - 64))
     {
-      gcry_mpi_release (sk->n); sk->n = NULL;
-      gcry_mpi_release (sk->e); sk->e = NULL;
-      gcry_mpi_release (sk->p); sk->p = NULL;
-      gcry_mpi_release (sk->q); sk->q = NULL;
-      gcry_mpi_release (sk->d); sk->d = NULL;
-      gcry_mpi_release (sk->u); sk->u = NULL;
+      _gcry_mpi_release (sk->n); sk->n = NULL;
+      _gcry_mpi_release (sk->e); sk->e = NULL;
+      _gcry_mpi_release (sk->p); sk->p = NULL;
+      _gcry_mpi_release (sk->q); sk->q = NULL;
+      _gcry_mpi_release (sk->d); sk->d = NULL;
+      _gcry_mpi_release (sk->u); sk->u = NULL;
       fips_signal_error ("self-test after key generation failed");
       return GPG_ERR_SELFTEST_FAILED;
     }
@@ -328,14 +392,327 @@ generate_std (RSA_secret_key *sk, unsigned int nbits, 
unsigned long use_e,
 }
 
 
+/* Check the RSA key length is acceptable for key generation or usage */
+static gpg_err_code_t
+rsa_check_keysize (unsigned int nbits)
+{
+  if (fips_mode () && nbits < 2048)
+    return GPG_ERR_INV_VALUE;
+
+  return GPG_ERR_NO_ERROR;
+}
+
+
+/* Check the RSA key length is acceptable for signature verification
+ *
+ * FIPS allows signature verification with RSA keys of size
+ * 1024, 1280, 1536 and 1792 in legacy mode, but this is up to the
+ * calling application to decide if the signature is legacy and
+ * should be accepted.
+ */
+static gpg_err_code_t
+rsa_check_verify_keysize (unsigned int nbits)
+{
+  if (fips_mode ())
+    {
+      if ((nbits >= 1024 && (nbits % 256) == 0) || nbits >= 2048)
+        return GPG_ERR_NO_ERROR;
+
+      return GPG_ERR_INV_VALUE;
+    }
+
+  return GPG_ERR_NO_ERROR;
+}
+
+
+/****************
+ * Generate a key pair with a key of size NBITS.
+ * USE_E = 0 let Libcgrypt decide what exponent to use.
+ *       = 1 request the use of a "secure" exponent; this is required by some
+ *           specification to be 65537.
+ *       > 2 Use this public exponent.  If the given exponent
+ *           is not odd one is internally added to it.
+ * TESTPARMS: If set, do not generate but test whether the p,q is probably 
prime
+ *            Returns key with zeroes to not break code calling this function.
+ * TRANSIENT_KEY:  If true, generate the primes using the standard RNG.
+ * Returns: 2 structures filled with all needed values
+ */
+static gpg_err_code_t
+generate_fips (RSA_secret_key *sk, unsigned int nbits, unsigned long use_e,
+               gcry_sexp_t testparms, int transient_key)
+{
+  gcry_mpi_t p, q; /* the two primes */
+  gcry_mpi_t d;    /* the private key */
+  gcry_mpi_t u;
+  gcry_mpi_t p1, q1;
+  gcry_mpi_t n;    /* the public key */
+  gcry_mpi_t e;    /* the exponent */
+  gcry_mpi_t g;
+  gcry_mpi_t minp;
+  gcry_mpi_t diff, mindiff;
+  gcry_random_level_t random_level;
+  unsigned int pbits = nbits/2;
+  unsigned int i;
+  int pqswitch;
+  gpg_err_code_t ec;
+
+  if (nbits <= 1024 || (nbits & 0x1FF))
+    return GPG_ERR_INV_VALUE;
+  ec = rsa_check_keysize (nbits);
+  if (ec)
+    return ec;
+
+  /* Set default error code.  */
+  ec = GPG_ERR_NO_PRIME;
+
+  /* The random quality depends on the transient_key flag.  */
+  random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM;
+
+  if (testparms)
+    {
+      /* Parameters to derive the key are given.  */
+      /* Note that we explicitly need to setup the values of tbl
+         because some compilers (e.g. OpenWatcom, IRIX) don't allow to
+         initialize a structure with automatic variables.  */
+      struct { const char *name; gcry_mpi_t *value; } tbl[] = {
+        { "e" },
+        { "p" },
+        { "q" },
+        { NULL }
+      };
+      int idx;
+      gcry_sexp_t oneparm;
+
+      tbl[0].value = &e;
+      tbl[1].value = &p;
+      tbl[2].value = &q;
+
+      for (idx=0; tbl[idx].name; idx++)
+        {
+          oneparm = sexp_find_token (testparms, tbl[idx].name, 0);
+          if (oneparm)
+            {
+              *tbl[idx].value = sexp_nth_mpi (oneparm, 1, GCRYMPI_FMT_USG);
+              sexp_release (oneparm);
+            }
+        }
+      for (idx=0; tbl[idx].name; idx++)
+        if (!*tbl[idx].value)
+          break;
+      if (tbl[idx].name)
+        {
+          /* At least one parameter is missing.  */
+          for (idx=0; tbl[idx].name; idx++)
+            _gcry_mpi_release (*tbl[idx].value);
+          return GPG_ERR_MISSING_VALUE;
+        }
+    }
+  else
+    {
+      if (use_e < 65537)
+        use_e = 65537;  /* This is the smallest value allowed by FIPS */
+
+      e = mpi_alloc ((32+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB);
+
+      use_e |= 1; /* make sure this is odd */
+      mpi_set_ui (e, use_e);
+
+      p = mpi_snew (pbits);
+      q = mpi_snew (pbits);
+    }
+
+  n = mpi_new (nbits);
+  d = mpi_snew (nbits);
+  u = mpi_snew (nbits);
+
+  /* prepare approximate minimum p and q */
+  minp = mpi_new (pbits);
+  mpi_set_ui (minp, 0xB504F334);
+  mpi_lshift (minp, minp, pbits - 32);
+
+  /* prepare minimum p and q difference */
+  diff = mpi_new (pbits);
+  mindiff = mpi_new (pbits - 99);
+  mpi_set_ui (mindiff, 1);
+  mpi_lshift (mindiff, mindiff, pbits - 100);
+
+  p1 = mpi_snew (pbits);
+  q1 = mpi_snew (pbits);
+  g  = mpi_snew (pbits);
+
+ retry:
+  /* generate p and q */
+  for (i = 0; i < 10 * pbits; i++)
+    {
+    ploop:
+      if (!testparms)
+        {
+          _gcry_mpi_randomize (p, pbits, random_level);
+          mpi_set_bit (p, 0);
+        }
+      if (mpi_cmp (p, minp) < 0)
+        {
+          if (testparms)
+            goto err;
+          goto ploop;
+        }
+
+      mpi_sub_ui (p1, p, 1);
+      if (mpi_gcd (g, p1, e))
+        {
+          if (_gcry_fips186_4_prime_check (p, pbits) != GPG_ERR_NO_ERROR)
+            {
+              /* not a prime */
+              if (testparms)
+                goto err;
+            }
+          else
+            break;
+        }
+      else if (testparms)
+        goto err;
+    }
+  if (i >= 10 * pbits)
+    goto err;
+
+  for (i = 0; i < 20 * pbits; i++)
+    {
+    qloop:
+      if (!testparms)
+        {
+          _gcry_mpi_randomize (q, pbits, random_level);
+          mpi_set_bit (q, 0);
+        }
+      if (mpi_cmp (q, minp) < 0)
+        {
+          if (testparms)
+            goto err;
+          goto qloop;
+        }
+      if (mpi_cmp (p, q) > 0)
+        {
+          pqswitch = 1;
+          mpi_sub (diff, p, q);
+        }
+      else
+        {
+          pqswitch = 0;
+          mpi_sub (diff, q, p);
+        }
+      if (mpi_cmp (diff, mindiff) < 0)
+        {
+          if (testparms)
+            goto err;
+          goto qloop;
+        }
+
+      mpi_sub_ui (q1, q, 1);
+      if (mpi_gcd (g, q1, e))
+        {
+          if (_gcry_fips186_4_prime_check (q, pbits) != GPG_ERR_NO_ERROR)
+            {
+              /* not a prime */
+              if (testparms)
+                goto err;
+            }
+          else
+            break;
+        }
+      else if (testparms)
+        goto err;
+    }
+  if (i >= 20 * pbits)
+    goto err;
+
+  if (testparms)
+    {
+      mpi_clear (p);
+      mpi_clear (q);
+    }
+  else
+    {
+      gcry_mpi_t f;
+
+      if (pqswitch)
+        {
+          gcry_mpi_t tmp;
+
+          tmp = p;
+          p = q;
+          q = tmp;
+        }
+
+      f = mpi_snew (nbits);
+
+      /* calculate the modulus */
+      mpi_mul (n, p, q);
+
+      /* calculate the secret key d = e^1 mod phi */
+      mpi_gcd (g, p1, q1);
+      mpi_fdiv_q (f, p1, g);
+      mpi_mul (f, f, q1);
+
+      mpi_invm (d, e, f);
+
+      _gcry_mpi_release (f);
+
+      if (mpi_get_nbits (d) < pbits)
+        goto retry;
+
+      /* calculate the inverse of p and q (used for chinese remainder 
theorem)*/
+      mpi_invm (u, p, q );
+    }
+
+  ec = 0; /* Success.  */
+
+  if (DBG_CIPHER)
+    {
+      log_mpidump("  p= ", p );
+      log_mpidump("  q= ", q );
+      log_mpidump("  n= ", n );
+      log_mpidump("  e= ", e );
+      log_mpidump("  d= ", d );
+      log_mpidump("  u= ", u );
+    }
+
+ err:
+
+  _gcry_mpi_release (p1);
+  _gcry_mpi_release (q1);
+  _gcry_mpi_release (g);
+  _gcry_mpi_release (minp);
+  _gcry_mpi_release (mindiff);
+  _gcry_mpi_release (diff);
+
+  sk->n = n;
+  sk->e = e;
+  sk->p = p;
+  sk->q = q;
+  sk->d = d;
+  sk->u = u;
+
+  if (ec)
+    {
+      _gcry_mpi_release (sk->n); sk->n = NULL;
+      _gcry_mpi_release (sk->e); sk->e = NULL;
+      _gcry_mpi_release (sk->p); sk->p = NULL;
+      _gcry_mpi_release (sk->q); sk->q = NULL;
+      _gcry_mpi_release (sk->d); sk->d = NULL;
+      _gcry_mpi_release (sk->u); sk->u = NULL;
+    }
+
+  return ec;
+}
+
+
 /* Helper for generate_x931.  */
 static gcry_mpi_t
 gen_x931_parm_xp (unsigned int nbits)
 {
   gcry_mpi_t xp;
 
-  xp = gcry_mpi_snew (nbits);
-  gcry_mpi_randomize (xp, nbits, GCRY_VERY_STRONG_RANDOM);
+  xp = mpi_snew (nbits);
+  _gcry_mpi_randomize (xp, nbits, GCRY_VERY_STRONG_RANDOM);
 
   /* The requirement for Xp is:
 
@@ -358,8 +735,8 @@ gen_x931_parm_xi (void)
 {
   gcry_mpi_t xi;
 
-  xi = gcry_mpi_snew (101);
-  gcry_mpi_randomize (xi, 101, GCRY_VERY_STRONG_RANDOM);
+  xi = mpi_snew (101);
+  _gcry_mpi_randomize (xi, 101, GCRY_VERY_STRONG_RANDOM);
   mpi_set_highbit (xi, 100);
   gcry_assert ( mpi_get_nbits (xi) == 101 );
 
@@ -388,7 +765,9 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
 
   *swapped = 0;
 
-  if (e_value == 1)   /* Alias for a secure value. */
+  if (e_value == 0)        /* 65537 is the libgcrypt's selection. */
+    e_value = 65537;
+  else if (e_value == 1)   /* Alias for a secure value. */
     e_value = 65537;
 
   /* Point 1 of section 4.1:  k = 1024 + 256s with S >= 0  */
@@ -402,7 +781,7 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
   if (e_value < 3)
     return GPG_ERR_INV_VALUE;
 
-  /* Our implementaion requires E to be odd.  */
+  /* Our implementation requires E to be odd.  */
   if (!(e_value & 1))
     return GPG_ERR_INV_VALUE;
 
@@ -425,15 +804,15 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
         /* Not given: Generate them.  */
         xp = gen_x931_parm_xp (nbits/2);
         /* Make sure that |xp - xq| > 2^{nbits - 100} holds.  */
-        tmpval = gcry_mpi_snew (nbits/2);
+        tmpval = mpi_snew (nbits/2);
         do
           {
-            gcry_mpi_release (xq);
+            _gcry_mpi_release (xq);
             xq = gen_x931_parm_xp (nbits/2);
             mpi_sub (tmpval, xp, xq);
           }
         while (mpi_get_nbits (tmpval) <= (nbits/2 - 100));
-        gcry_mpi_release (tmpval);
+        _gcry_mpi_release (tmpval);
 
         xp1 = gen_x931_parm_xi ();
         xp2 = gen_x931_parm_xi ();
@@ -468,12 +847,11 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
 
         for (idx=0; tbl[idx].name; idx++)
           {
-            oneparm = gcry_sexp_find_token (deriveparms, tbl[idx].name, 0);
+            oneparm = sexp_find_token (deriveparms, tbl[idx].name, 0);
             if (oneparm)
               {
-                *tbl[idx].value = gcry_sexp_nth_mpi (oneparm, 1,
-                                                     GCRYMPI_FMT_USG);
-                gcry_sexp_release (oneparm);
+                *tbl[idx].value = sexp_nth_mpi (oneparm, 1, GCRYMPI_FMT_USG);
+                sexp_release (oneparm);
               }
           }
         for (idx=0; tbl[idx].name; idx++)
@@ -483,7 +861,7 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
           {
             /* At least one parameter is missing.  */
             for (idx=0; tbl[idx].name; idx++)
-              gcry_mpi_release (*tbl[idx].value);
+              _gcry_mpi_release (*tbl[idx].value);
             return GPG_ERR_MISSING_VALUE;
           }
       }
@@ -493,17 +871,17 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
     /* Find two prime numbers.  */
     p = _gcry_derive_x931_prime (xp, xp1, xp2, e, NULL, NULL);
     q = _gcry_derive_x931_prime (xq, xq1, xq2, e, NULL, NULL);
-    gcry_mpi_release (xp);  xp  = NULL;
-    gcry_mpi_release (xp1); xp1 = NULL;
-    gcry_mpi_release (xp2); xp2 = NULL;
-    gcry_mpi_release (xq);  xq  = NULL;
-    gcry_mpi_release (xq1); xq1 = NULL;
-    gcry_mpi_release (xq2); xq2 = NULL;
+    _gcry_mpi_release (xp);  xp  = NULL;
+    _gcry_mpi_release (xp1); xp1 = NULL;
+    _gcry_mpi_release (xp2); xp2 = NULL;
+    _gcry_mpi_release (xq);  xq  = NULL;
+    _gcry_mpi_release (xq1); xq1 = NULL;
+    _gcry_mpi_release (xq2); xq2 = NULL;
     if (!p || !q)
       {
-        gcry_mpi_release (p);
-        gcry_mpi_release (q);
-        gcry_mpi_release (e);
+        _gcry_mpi_release (p);
+        _gcry_mpi_release (q);
+        _gcry_mpi_release (e);
         return GPG_ERR_NO_PRIME;
       }
   }
@@ -516,26 +894,26 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
       mpi_swap (p, q);
       *swapped = 1;
     }
-  n = gcry_mpi_new (nbits);
+  n = mpi_new (nbits);
   mpi_mul (n, p, q);
 
   /* Compute the Euler totient:  phi = (p-1)(q-1)  */
-  pm1 = gcry_mpi_snew (nbits/2);
-  qm1 = gcry_mpi_snew (nbits/2);
-  phi = gcry_mpi_snew (nbits);
+  pm1 = mpi_snew (nbits/2);
+  qm1 = mpi_snew (nbits/2);
+  phi = mpi_snew (nbits);
   mpi_sub_ui (pm1, p, 1);
   mpi_sub_ui (qm1, q, 1);
   mpi_mul (phi, pm1, qm1);
 
-  g = gcry_mpi_snew (nbits);
-  gcry_assert (gcry_mpi_gcd (g, e, phi));
+  g = mpi_snew (nbits);
+  gcry_assert (mpi_gcd (g, e, phi));
 
   /* Compute: f = lcm(p-1,q-1) = phi / gcd(p-1,q-1) */
-  gcry_mpi_gcd (g, pm1, qm1);
+  mpi_gcd (g, pm1, qm1);
   f = pm1; pm1 = NULL;
-  gcry_mpi_release (qm1); qm1 = NULL;
+  _gcry_mpi_release (qm1); qm1 = NULL;
   mpi_fdiv_q (f, phi, g);
-  gcry_mpi_release (phi); phi = NULL;
+  _gcry_mpi_release (phi); phi = NULL;
   d = g; g = NULL;
   /* Compute the secret key:  d = e^{-1} mod lcm(p-1,q-1) */
   mpi_invm (d, e, f);
@@ -567,12 +945,12 @@ generate_x931 (RSA_secret_key *sk, unsigned int nbits, 
unsigned long e_value,
   /* Now we can test our keys. */
   if (test_keys (sk, nbits - 64))
     {
-      gcry_mpi_release (sk->n); sk->n = NULL;
-      gcry_mpi_release (sk->e); sk->e = NULL;
-      gcry_mpi_release (sk->p); sk->p = NULL;
-      gcry_mpi_release (sk->q); sk->q = NULL;
-      gcry_mpi_release (sk->d); sk->d = NULL;
-      gcry_mpi_release (sk->u); sk->u = NULL;
+      _gcry_mpi_release (sk->n); sk->n = NULL;
+      _gcry_mpi_release (sk->e); sk->e = NULL;
+      _gcry_mpi_release (sk->p); sk->p = NULL;
+      _gcry_mpi_release (sk->q); sk->q = NULL;
+      _gcry_mpi_release (sk->d); sk->d = NULL;
+      _gcry_mpi_release (sk->u); sk->u = NULL;
       fips_signal_error ("self-test after key generation failed");
       return GPG_ERR_SELFTEST_FAILED;
     }
@@ -662,7 +1040,7 @@ stronger_key_check ( RSA_secret_key *skey )
       {
         log_info ( "RSA Oops: d is wrong - fixed\n");
         mpi_set (skey->d, t);
-        _gcry_log_mpidump ("  fixed d", skey->d);
+        log_printmpi ("  fixed d", skey->d);
       }
 
     /* check for correctness of u */
@@ -671,7 +1049,7 @@ stronger_key_check ( RSA_secret_key *skey )
       {
         log_info ( "RSA Oops: u is wrong - fixed\n");
         mpi_set (skey->u, t);
-        _gcry_log_mpidump ("  fixed u", skey->u);
+        log_printmpi ("  fixed u", skey->u);
       }
 
     log_info ( "RSA secret key check finished\n");
@@ -684,374 +1062,721 @@ stronger_key_check ( RSA_secret_key *skey )
 #endif
 
 
-
-/****************
- * Secret key operation. Encrypt INPUT with SKEY and put result into OUTPUT.
+
+/* Secret key operation - standard version.
  *
  *     m = c^d mod n
- *
- * Or faster:
+ */
+static void
+secret_core_std (gcry_mpi_t M, gcry_mpi_t C,
+                 gcry_mpi_t D, gcry_mpi_t N)
+{
+  mpi_powm (M, C, D, N);
+}
+
+
+/* Secret key operation - using the CRT.
  *
  *      m1 = c ^ (d mod (p-1)) mod p
  *      m2 = c ^ (d mod (q-1)) mod q
  *      h = u * (m2 - m1) mod q
  *      m = m1 + h * p
- *
- * Where m is OUTPUT, c is INPUT and d,n,p,q,u are elements of SKEY.
  */
 static void
-secret(gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey )
+secret_core_crt (gcry_mpi_t M, gcry_mpi_t C,
+                 gcry_mpi_t D, unsigned int Nlimbs,
+                 gcry_mpi_t P, gcry_mpi_t Q, gcry_mpi_t U)
+{
+  gcry_mpi_t m1 = mpi_alloc_secure ( Nlimbs + 1 );
+  gcry_mpi_t m2 = mpi_alloc_secure ( Nlimbs + 1 );
+  gcry_mpi_t h  = mpi_alloc_secure ( Nlimbs + 1 );
+  gcry_mpi_t D_blind = mpi_alloc_secure ( Nlimbs + 1 );
+  gcry_mpi_t r;
+  unsigned int r_nbits;
+
+  r_nbits = mpi_get_nbits (P) / 4;
+  if (r_nbits < 96)
+    r_nbits = 96;
+  r = mpi_secure_new (r_nbits);
+
+  /* d_blind = (d mod (p-1)) + (p-1) * r            */
+  /* m1 = c ^ d_blind mod p */
+  _gcry_mpi_randomize (r, r_nbits, GCRY_WEAK_RANDOM);
+  mpi_set_highbit (r, r_nbits - 1);
+  mpi_sub_ui ( h, P, 1 );
+  mpi_mul ( D_blind, h, r );
+  mpi_fdiv_r ( h, D, h );
+  mpi_add ( D_blind, D_blind, h );
+  mpi_powm ( m1, C, D_blind, P );
+
+  /* d_blind = (d mod (q-1)) + (q-1) * r            */
+  /* m2 = c ^ d_blind mod q */
+  _gcry_mpi_randomize (r, r_nbits, GCRY_WEAK_RANDOM);
+  mpi_set_highbit (r, r_nbits - 1);
+  mpi_sub_ui ( h, Q, 1  );
+  mpi_mul ( D_blind, h, r );
+  mpi_fdiv_r ( h, D, h );
+  mpi_add ( D_blind, D_blind, h );
+  mpi_powm ( m2, C, D_blind, Q );
+
+  mpi_free ( r );
+  mpi_free ( D_blind );
+
+  /* h = u * ( m2 - m1 ) mod q */
+  mpi_sub ( h, m2, m1 );
+  if ( mpi_has_sign ( h ) )
+    mpi_add ( h, h, Q );
+  mpi_mulm ( h, U, h, Q );
+
+  /* m = m1 + h * p */
+  mpi_mul ( h, h, P );
+  mpi_add ( M, m1, h );
+
+  mpi_free ( h );
+  mpi_free ( m1 );
+  mpi_free ( m2 );
+}
+
+
+/* Secret key operation.
+ * Encrypt INPUT with SKEY and put result into
+ * OUTPUT.  SKEY has the secret key parameters.
+ */
+static void
+secret (gcry_mpi_t output, gcry_mpi_t input, RSA_secret_key *skey )
 {
+  /* Remove superfluous leading zeroes from INPUT.  */
+  mpi_normalize (input);
+
   if (!skey->p || !skey->q || !skey->u)
     {
-      mpi_powm (output, input, skey->d, skey->n);
+      secret_core_std (output, input, skey->d, skey->n);
     }
   else
     {
-      gcry_mpi_t m1 = mpi_alloc_secure( mpi_get_nlimbs(skey->n)+1 );
-      gcry_mpi_t m2 = mpi_alloc_secure( mpi_get_nlimbs(skey->n)+1 );
-      gcry_mpi_t h  = mpi_alloc_secure( mpi_get_nlimbs(skey->n)+1 );
-
-      /* m1 = c ^ (d mod (p-1)) mod p */
-      mpi_sub_ui( h, skey->p, 1  );
-      mpi_fdiv_r( h, skey->d, h );
-      mpi_powm( m1, input, h, skey->p );
-      /* m2 = c ^ (d mod (q-1)) mod q */
-      mpi_sub_ui( h, skey->q, 1  );
-      mpi_fdiv_r( h, skey->d, h );
-      mpi_powm( m2, input, h, skey->q );
-      /* h = u * ( m2 - m1 ) mod q */
-      mpi_sub( h, m2, m1 );
-      if ( mpi_is_neg( h ) )
-        mpi_add ( h, h, skey->q );
-      mpi_mulm( h, skey->u, h, skey->q );
-      /* m = m2 + h * p */
-      mpi_mul ( h, h, skey->p );
-      mpi_add ( output, m1, h );
-
-      mpi_free ( h );
-      mpi_free ( m1 );
-      mpi_free ( m2 );
+      secret_core_crt (output, input, skey->d, mpi_get_nlimbs (skey->n),
+                       skey->p, skey->q, skey->u);
     }
 }
 
 
-
-/* Perform RSA blinding.  */
-static gcry_mpi_t
-rsa_blind (gcry_mpi_t x, gcry_mpi_t r, gcry_mpi_t e, gcry_mpi_t n)
+static void
+secret_blinded (gcry_mpi_t output, gcry_mpi_t input,
+                RSA_secret_key *sk, unsigned int nbits)
 {
-  /* A helper.  */
-  gcry_mpi_t a;
-
-  /* Result.  */
-  gcry_mpi_t y;
-
-  a = gcry_mpi_snew (gcry_mpi_get_nbits (n));
-  y = gcry_mpi_snew (gcry_mpi_get_nbits (n));
+  gcry_mpi_t r;                   /* Random number needed for blinding.  */
+  gcry_mpi_t ri;          /* Modular multiplicative inverse of r.  */
+  gcry_mpi_t bldata;       /* Blinded data to decrypt.  */
+
+  /* First, we need a random number r between 0 and n - 1, which is
+   * relatively prime to n (i.e. it is neither p nor q).  The random
+   * number needs to be only unpredictable, thus we employ the
+   * gcry_create_nonce function by using GCRY_WEAK_RANDOM with
+   * gcry_mpi_randomize.  */
+  r  = mpi_snew (nbits);
+  ri = mpi_snew (nbits);
+  bldata = mpi_snew (nbits);
 
-  /* Now we calculate: y = (x * r^e) mod n, where r is the random
-     number, e is the public exponent, x is the non-blinded data and n
-     is the RSA modulus.  */
-  gcry_mpi_powm (a, r, e, n);
-  gcry_mpi_mulm (y, a, x, n);
-
-  gcry_mpi_release (a);
-
-  return y;
-}
-
-/* Undo RSA blinding.  */
-static gcry_mpi_t
-rsa_unblind (gcry_mpi_t x, gcry_mpi_t ri, gcry_mpi_t n)
-{
-  gcry_mpi_t y;
+  do
+    {
+      _gcry_mpi_randomize (r, nbits, GCRY_WEAK_RANDOM);
+      mpi_mod (r, r, sk->n);
+    }
+  while (!mpi_invm (ri, r, sk->n));
 
-  y = gcry_mpi_snew (gcry_mpi_get_nbits (n));
+  /* Do blinding.  We calculate: y = (x * r^e) mod n, where r is the
+   * random number, e is the public exponent, x is the non-blinded
+   * input data and n is the RSA modulus.  */
+  mpi_powm (bldata, r, sk->e, sk->n);
+  mpi_mulm (bldata, bldata, input, sk->n);
 
-  /* Here we calculate: y = (x * r^-1) mod n, where x is the blinded
-     decrypted data, ri is the modular multiplicative inverse of r and
-     n is the RSA modulus.  */
+  /* Perform decryption.  */
+  secret (output, bldata, sk);
+  _gcry_mpi_release (bldata);
 
-  gcry_mpi_mulm (y, ri, x, n);
+  /* Undo blinding.  Here we calculate: y = (x * r^-1) mod n, where x
+   * is the blinded decrypted data, ri is the modular multiplicative
+   * inverse of r and n is the RSA modulus.  */
+  mpi_mulm (output, output, ri, sk->n);
 
-  return y;
+  _gcry_mpi_release (r);
+  _gcry_mpi_release (ri);
 }
 
+
 /*********************************************
  **************  interface  ******************
  *********************************************/
 
 static gcry_err_code_t
-rsa_generate_ext (int algo, unsigned int nbits, unsigned long evalue,
-                  const gcry_sexp_t genparms,
-                  gcry_mpi_t *skey, gcry_mpi_t **retfactors,
-                  gcry_sexp_t *r_extrainfo)
+rsa_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey)
 {
-  RSA_secret_key sk;
   gpg_err_code_t ec;
+  unsigned int nbits;
+  unsigned long evalue;
+  RSA_secret_key sk;
   gcry_sexp_t deriveparms;
-  int transient_key = 0;
-  int use_x931 = 0;
+  int flags = 0;
   gcry_sexp_t l1;
+  gcry_sexp_t swap_info = NULL;
+  int testparms = 0;
+
+  memset (&sk, 0, sizeof sk);
 
-  (void)algo;
+  ec = _gcry_pk_util_get_nbits (genparms, &nbits);
+  if (ec)
+    return ec;
 
-  *retfactors = NULL; /* We don't return them.  */
+  ec = _gcry_pk_util_get_rsa_use_e (genparms, &evalue);
+  if (ec)
+    return ec;
+
+  /* Parse the optional flags list.  */
+  l1 = sexp_find_token (genparms, "flags", 0);
+  if (l1)
+    {
+      ec = _gcry_pk_util_parse_flaglist (l1, &flags, NULL);
+      sexp_release (l1);
+      if (ec)
+        return ec;
+    }
 
   deriveparms = (genparms?
-                 gcry_sexp_find_token (genparms, "derive-parms", 0) : NULL);
+                 sexp_find_token (genparms, "derive-parms", 0) : NULL);
   if (!deriveparms)
     {
       /* Parse the optional "use-x931" flag. */
-      l1 = gcry_sexp_find_token (genparms, "use-x931", 0);
+      l1 = sexp_find_token (genparms, "use-x931", 0);
       if (l1)
         {
-          use_x931 = 1;
-          gcry_sexp_release (l1);
+          flags |= PUBKEY_FLAG_USE_X931;
+          sexp_release (l1);
         }
     }
 
-  if (deriveparms || use_x931 || fips_mode ())
+  if (deriveparms || (flags & PUBKEY_FLAG_USE_X931))
     {
       int swapped;
-      ec = generate_x931 (&sk, nbits, evalue, deriveparms, &swapped);
-      gcry_sexp_release (deriveparms);
-      if (!ec && r_extrainfo && swapped)
+      if (fips_mode ())
         {
-          ec = gcry_sexp_new (r_extrainfo,
-                              "(misc-key-info(p-q-swapped))", 0, 1);
-          if (ec)
-            {
-              gcry_mpi_release (sk.n); sk.n = NULL;
-              gcry_mpi_release (sk.e); sk.e = NULL;
-              gcry_mpi_release (sk.p); sk.p = NULL;
-              gcry_mpi_release (sk.q); sk.q = NULL;
-              gcry_mpi_release (sk.d); sk.d = NULL;
-              gcry_mpi_release (sk.u); sk.u = NULL;
-            }
+          sexp_release (deriveparms);
+          return GPG_ERR_INV_SEXP;
         }
+      ec = generate_x931 (&sk, nbits, evalue, deriveparms, &swapped);
+      sexp_release (deriveparms);
+      if (!ec && swapped)
+        ec = sexp_new (&swap_info, "(misc-key-info(p-q-swapped))", 0, 1);
     }
   else
     {
       /* Parse the optional "transient-key" flag. */
-      l1 = gcry_sexp_find_token (genparms, "transient-key", 0);
-      if (l1)
+      if (!(flags & PUBKEY_FLAG_TRANSIENT_KEY))
         {
-          transient_key = 1;
-          gcry_sexp_release (l1);
+          l1 = sexp_find_token (genparms, "transient-key", 0);
+          if (l1)
+            {
+              flags |= PUBKEY_FLAG_TRANSIENT_KEY;
+              sexp_release (l1);
+            }
         }
+      deriveparms = (genparms? sexp_find_token (genparms, "test-parms", 0)
+                     /**/    : NULL);
+      if (deriveparms)
+        testparms = 1;
+
       /* Generate.  */
-      ec = generate_std (&sk, nbits, evalue, transient_key);
+      if (deriveparms || fips_mode ())
+        {
+          ec = generate_fips (&sk, nbits, evalue, deriveparms,
+                              !!(flags & PUBKEY_FLAG_TRANSIENT_KEY));
+        }
+      else
+        {
+          ec = generate_std (&sk, nbits, evalue,
+                             !!(flags & PUBKEY_FLAG_TRANSIENT_KEY));
+        }
+      sexp_release (deriveparms);
     }
 
   if (!ec)
     {
-      skey[0] = sk.n;
-      skey[1] = sk.e;
-      skey[2] = sk.d;
-      skey[3] = sk.p;
-      skey[4] = sk.q;
-      skey[5] = sk.u;
+      ec = sexp_build (r_skey, NULL,
+                       "(key-data"
+                       " (public-key"
+                       "  (rsa(n%m)(e%m)))"
+                       " (private-key"
+                       "  (rsa(n%m)(e%m)(d%m)(p%m)(q%m)(u%m)))"
+                       " %S)",
+                       sk.n, sk.e,
+                       sk.n, sk.e, sk.d, sk.p, sk.q, sk.u,
+                       swap_info);
     }
 
-  return ec;
-}
+  mpi_free (sk.n);
+  mpi_free (sk.e);
+  mpi_free (sk.p);
+  mpi_free (sk.q);
+  mpi_free (sk.d);
+  mpi_free (sk.u);
+  sexp_release (swap_info);
 
+  if (!ec && !testparms && fips_mode () && test_keys_fips (*r_skey))
+    {
+      sexp_release (*r_skey); *r_skey = NULL;
+      fips_signal_error ("self-test after key generation failed");
+      return GPG_ERR_SELFTEST_FAILED;
+    }
 
-static gcry_err_code_t
-rsa_generate (int algo, unsigned int nbits, unsigned long evalue,
-              gcry_mpi_t *skey, gcry_mpi_t **retfactors)
-{
-  return rsa_generate_ext (algo, nbits, evalue, NULL, skey, retfactors, NULL);
+  return ec;
 }
 
 
 static gcry_err_code_t
-rsa_check_secret_key (int algo, gcry_mpi_t *skey)
+rsa_check_secret_key (gcry_sexp_t keyparms)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  RSA_secret_key sk;
-
-  (void)algo;
+  gcry_err_code_t rc;
+  RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL};
 
-  sk.n = skey[0];
-  sk.e = skey[1];
-  sk.d = skey[2];
-  sk.p = skey[3];
-  sk.q = skey[4];
-  sk.u = skey[5];
+  /* To check the key we need the optional parameters. */
+  rc = sexp_extract_param (keyparms, NULL, "nedpqu",
+                           &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u,
+                           NULL);
+  if (rc)
+    goto leave;
 
-  if (!sk.p || !sk.q || !sk.u)
-    err = GPG_ERR_NO_OBJ;  /* To check the key we need the optional
-                              parameters. */
-  else if (!check_secret_key (&sk))
-    err = GPG_ERR_BAD_SECKEY;
+  if (!check_secret_key (&sk))
+    rc = GPG_ERR_BAD_SECKEY;
 
-  return err;
+ leave:
+  _gcry_mpi_release (sk.n);
+  _gcry_mpi_release (sk.e);
+  _gcry_mpi_release (sk.d);
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.q);
+  _gcry_mpi_release (sk.u);
+  if (DBG_CIPHER)
+    log_debug ("rsa_testkey    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-rsa_encrypt (int algo, gcry_mpi_t *resarr, gcry_mpi_t data,
-             gcry_mpi_t *pkey, int flags)
+rsa_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  RSA_public_key pk;
+  gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_mpi_t data = NULL;
+  RSA_public_key pk = {NULL, NULL};
+  gcry_mpi_t ciph = NULL;
+  unsigned int nbits = rsa_get_nbits (keyparms);
+
+  rc = rsa_check_keysize (nbits);
+  if (rc)
+    return rc;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, nbits);
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_mpidump ("rsa_encrypt data", data);
+  if (!data || mpi_is_opaque (data))
+    {
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
 
-  (void)algo;
-  (void)flags;
+  /* Extract the key.  */
+  rc = sexp_extract_param (keyparms, NULL, "ne", &pk.n, &pk.e, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_mpidump ("rsa_encrypt    n", pk.n);
+      log_mpidump ("rsa_encrypt    e", pk.e);
+    }
 
-  pk.n = pkey[0];
-  pk.e = pkey[1];
-  resarr[0] = mpi_alloc (mpi_get_nlimbs (pk.n));
-  public (resarr[0], data, &pk);
+  /* Do RSA computation and build result.  */
+  ciph = mpi_new (0);
+  public (ciph, data, &pk);
+  if (DBG_CIPHER)
+    log_mpidump ("rsa_encrypt  res", ciph);
+  if ((ctx.flags & PUBKEY_FLAG_FIXEDLEN))
+    {
+      /* We need to make sure to return the correct length to avoid
+         problems with missing leading zeroes.  */
+      unsigned char *em;
+      size_t emlen = (mpi_get_nbits (pk.n)+7)/8;
 
-  return GPG_ERR_NO_ERROR;
+      rc = _gcry_mpi_to_octet_string (&em, NULL, ciph, emlen);
+      if (!rc)
+        {
+          rc = sexp_build (r_ciph, NULL, "(enc-val(rsa(a%b)))", (int)emlen, 
em);
+          xfree (em);
+        }
+    }
+  else
+    rc = sexp_build (r_ciph, NULL, "(enc-val(rsa(a%m)))", ciph);
+
+ leave:
+  _gcry_mpi_release (ciph);
+  _gcry_mpi_release (pk.n);
+  _gcry_mpi_release (pk.e);
+  _gcry_mpi_release (data);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("rsa_encrypt    => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-rsa_decrypt (int algo, gcry_mpi_t *result, gcry_mpi_t *data,
-             gcry_mpi_t *skey, int flags)
+rsa_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+
 {
-  RSA_secret_key sk;
-  gcry_mpi_t r = MPI_NULL;     /* Random number needed for blinding.  */
-  gcry_mpi_t ri = MPI_NULL;    /* Modular multiplicative inverse of
-                                  r.  */
-  gcry_mpi_t x = MPI_NULL;     /* Data to decrypt.  */
-  gcry_mpi_t y;                        /* Result.  */
+  gpg_err_code_t rc, rc_sexp;
+  struct pk_encoding_ctx ctx;
+  gcry_sexp_t l1 = NULL;
+  gcry_mpi_t data = NULL;
+  RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL};
+  gcry_mpi_t plain = NULL;
+  unsigned char *unpad = NULL;
+  size_t unpadlen = 0;
+  unsigned int nbits = rsa_get_nbits (keyparms);
+  gcry_sexp_t result = NULL;
+  gcry_sexp_t dummy = NULL;
+
+  rc = rsa_check_keysize (nbits);
+  if (rc)
+    return rc;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, nbits);
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_preparse_encval (s_data, rsa_names, &l1, &ctx);
+  if (rc)
+    goto leave;
+  rc = sexp_extract_param (l1, NULL, "a", &data, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_printmpi ("rsa_decrypt data", data);
+  if (mpi_is_opaque (data))
+    {
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
+  if (fips_mode () && (ctx.encoding == PUBKEY_ENC_PKCS1 ||
+                       ctx.encoding == PUBKEY_ENC_OAEP))
+    {
+      rc = GPG_ERR_INV_FLAG;
+      goto leave;
+    }
 
-  (void)algo;
+  /* Extract the key.  */
+  rc = sexp_extract_param (keyparms, NULL, "nedp?q?u?",
+                           &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u,
+                           NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_printmpi ("rsa_decrypt    n", sk.n);
+      log_printmpi ("rsa_decrypt    e", sk.e);
+      if (!fips_mode ())
+        {
+          log_printmpi ("rsa_decrypt    d", sk.d);
+          log_printmpi ("rsa_decrypt    p", sk.p);
+          log_printmpi ("rsa_decrypt    q", sk.q);
+          log_printmpi ("rsa_decrypt    u", sk.u);
+        }
+    }
 
-  /* Extract private key.  */
-  sk.n = skey[0];
-  sk.e = skey[1];
-  sk.d = skey[2];
-  sk.p = skey[3]; /* Optional. */
-  sk.q = skey[4]; /* Optional. */
-  sk.u = skey[5]; /* Optional. */
+  /* Better make sure that there are no superfluous leading zeroes in
+     the input and it has not been "padded" using multiples of N.
+     This mitigates side-channel attacks (CVE-2013-4576).  */
+  mpi_normalize (data);
+  mpi_fdiv_r (data, data, sk.n);
 
-  y = gcry_mpi_snew (gcry_mpi_get_nbits (sk.n));
+  /* Allocate MPI for the plaintext.  */
+  plain = mpi_snew (nbits);
 
   /* We use blinding by default to mitigate timing attacks which can
      be practically mounted over the network as shown by Brumley and
      Boney in 2003.  */
-  if (! (flags & PUBKEY_FLAG_NO_BLINDING))
-    {
-      /* Initialize blinding.  */
+  if ((ctx.flags & PUBKEY_FLAG_NO_BLINDING))
+    secret (plain, data, &sk);
+  else
+    secret_blinded (plain, data, &sk, nbits);
 
-      /* First, we need a random number r between 0 and n - 1, which
-        is relatively prime to n (i.e. it is neither p nor q).  The
-        random number needs to be only unpredictable, thus we employ
-        the gcry_create_nonce function by using GCRY_WEAK_RANDOM with
-        gcry_mpi_randomize.  */
-      r = gcry_mpi_snew (gcry_mpi_get_nbits (sk.n));
-      ri = gcry_mpi_snew (gcry_mpi_get_nbits (sk.n));
+  if (DBG_CIPHER)
+    log_printmpi ("rsa_decrypt  res", plain);
 
-      gcry_mpi_randomize (r, gcry_mpi_get_nbits (sk.n), GCRY_WEAK_RANDOM);
-      gcry_mpi_mod (r, r, sk.n);
+  /* Reverse the encoding and build the s-expression.  */
+  switch (ctx.encoding)
+    {
+    case PUBKEY_ENC_PKCS1:
+      rc = _gcry_rsa_pkcs1_decode_for_enc (&unpad, &unpadlen, nbits, plain);
+      mpi_free (plain);
+      plain = NULL;
+      rc_sexp = sexp_build (&result, NULL, "(value %b)", (int)unpadlen, unpad);
+      *r_plain = sexp_null_cond (result, ct_is_not_zero (rc));
+      dummy = sexp_null_cond (result, ct_is_zero (rc));
+      sexp_release (dummy);
+      rc = ct_ulong_select (rc_sexp, rc,
+                           ct_is_zero (rc) & ct_is_not_zero (rc_sexp));
+      break;
+
+    case PUBKEY_ENC_OAEP:
+      rc = _gcry_rsa_oaep_decode (&unpad, &unpadlen,
+                                  nbits, ctx.hash_algo,
+                                  plain, ctx.label, ctx.labellen);
+      mpi_free (plain);
+      plain = NULL;
+      rc_sexp = sexp_build (&result, NULL, "(value %b)", (int)unpadlen, unpad);
+      *r_plain = sexp_null_cond (result, ct_is_not_zero (rc));
+      dummy = sexp_null_cond (result, ct_is_zero (rc));
+      sexp_release (dummy);
+      rc = ct_ulong_select (rc_sexp, rc,
+                           ct_is_zero (rc) & ct_is_not_zero (rc_sexp));
+      break;
 
-      /* Calculate inverse of r.  It practically impossible that the
-         following test fails, thus we do not add code to release
-         allocated resources.  */
-      if (!gcry_mpi_invm (ri, r, sk.n))
-       return GPG_ERR_INTERNAL;
+    default:
+      /* Raw format.  For backward compatibility we need to assume a
+         signed mpi by using the sexp format string "%m".  */
+      rc = sexp_build (r_plain, NULL,
+                       (ctx.flags & PUBKEY_FLAG_LEGACYRESULT)
+                       ? "%m":"(value %m)", plain);
+      break;
     }
 
-  if (! (flags & PUBKEY_FLAG_NO_BLINDING))
-    x = rsa_blind (data[0], r, sk.e, sk.n);
-  else
-    x = data[0];
+ leave:
+  xfree (unpad);
+  _gcry_mpi_release (plain);
+  _gcry_mpi_release (sk.n);
+  _gcry_mpi_release (sk.e);
+  _gcry_mpi_release (sk.d);
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.q);
+  _gcry_mpi_release (sk.u);
+  _gcry_mpi_release (data);
+  sexp_release (l1);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("rsa_decrypt    => %s\n", gpg_strerror (rc));
+  return rc;
+}
 
-  /* Do the encryption.  */
-  secret (y, x, &sk);
 
-  if (! (flags & PUBKEY_FLAG_NO_BLINDING))
-    {
-      /* Undo blinding.  */
-      gcry_mpi_t a = gcry_mpi_copy (y);
+static gcry_err_code_t
+rsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
+{
+  gpg_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_mpi_t data = NULL;
+  RSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL, NULL};
+  RSA_public_key pk;
+  gcry_mpi_t sig = NULL;
+  gcry_mpi_t result = NULL;
+  unsigned int nbits = rsa_get_nbits (keyparms);
 
-      gcry_mpi_release (y);
-      y = rsa_unblind (a, ri, sk.n);
+  rc = rsa_check_keysize (nbits);
+  if (rc)
+    return rc;
 
-      gcry_mpi_release (a);
-    }
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, nbits);
 
-  if (! (flags & PUBKEY_FLAG_NO_BLINDING))
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_printmpi ("rsa_sign   data", data);
+  if (mpi_is_opaque (data))
     {
-      /* Deallocate resources needed for blinding.  */
-      gcry_mpi_release (x);
-      gcry_mpi_release (r);
-      gcry_mpi_release (ri);
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
     }
 
-  /* Copy out result.  */
-  *result = y;
-
-  return GPG_ERR_NO_ERROR;
-}
+  /* Extract the key.  */
+  rc = sexp_extract_param (keyparms, NULL, "nedp?q?u?",
+                           &sk.n, &sk.e, &sk.d, &sk.p, &sk.q, &sk.u,
+                           NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    {
+      log_printmpi ("rsa_sign      n", sk.n);
+      log_printmpi ("rsa_sign      e", sk.e);
+      if (!fips_mode ())
+        {
+          log_printmpi ("rsa_sign      d", sk.d);
+          log_printmpi ("rsa_sign      p", sk.p);
+          log_printmpi ("rsa_sign      q", sk.q);
+          log_printmpi ("rsa_sign      u", sk.u);
+        }
+    }
 
+  /* Do RSA computation.  */
+  sig = mpi_new (0);
+  if ((ctx.flags & PUBKEY_FLAG_NO_BLINDING))
+    secret (sig, data, &sk);
+  else
+    secret_blinded (sig, data, &sk, nbits);
+  if (DBG_CIPHER)
+    log_printmpi ("rsa_sign    res", sig);
+
+  /* Check that the created signature is good.  This detects a failure
+     of the CRT algorithm  (Lenstra's attack on RSA's use of the CRT).  */
+  result = mpi_new (0);
+  pk.n = sk.n;
+  pk.e = sk.e;
+  public (result, sig, &pk);
+  if (mpi_cmp (result, data))
+    {
+      rc = GPG_ERR_BAD_SIGNATURE;
+      goto leave;
+    }
 
-static gcry_err_code_t
-rsa_sign (int algo, gcry_mpi_t *resarr, gcry_mpi_t data, gcry_mpi_t *skey)
-{
-  RSA_secret_key sk;
+  /* Convert the result.  */
+  if ((ctx.flags & PUBKEY_FLAG_FIXEDLEN))
+    {
+      /* We need to make sure to return the correct length to avoid
+         problems with missing leading zeroes.  */
+      unsigned char *em;
+      size_t emlen = (mpi_get_nbits (sk.n)+7)/8;
 
-  (void)algo;
+      rc = _gcry_mpi_to_octet_string (&em, NULL, sig, emlen);
+      if (!rc)
+        {
+          rc = sexp_build (r_sig, NULL, "(sig-val(rsa(s%b)))", (int)emlen, em);
+          xfree (em);
+        }
+    }
+  else
+    rc = sexp_build (r_sig, NULL, "(sig-val(rsa(s%M)))", sig);
 
-  sk.n = skey[0];
-  sk.e = skey[1];
-  sk.d = skey[2];
-  sk.p = skey[3];
-  sk.q = skey[4];
-  sk.u = skey[5];
-  resarr[0] = mpi_alloc( mpi_get_nlimbs (sk.n));
-  secret (resarr[0], data, &sk);
 
-  return GPG_ERR_NO_ERROR;
+ leave:
+  _gcry_mpi_release (result);
+  _gcry_mpi_release (sig);
+  _gcry_mpi_release (sk.n);
+  _gcry_mpi_release (sk.e);
+  _gcry_mpi_release (sk.d);
+  _gcry_mpi_release (sk.p);
+  _gcry_mpi_release (sk.q);
+  _gcry_mpi_release (sk.u);
+  _gcry_mpi_release (data);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("rsa_sign      => %s\n", gpg_strerror (rc));
+  return rc;
 }
 
 
 static gcry_err_code_t
-rsa_verify (int algo, gcry_mpi_t hash, gcry_mpi_t *data, gcry_mpi_t *pkey,
-                 int (*cmp) (void *opaque, gcry_mpi_t tmp),
-                 void *opaquev)
+rsa_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms)
 {
-  RSA_public_key pk;
-  gcry_mpi_t result;
   gcry_err_code_t rc;
+  struct pk_encoding_ctx ctx;
+  gcry_sexp_t l1 = NULL;
+  gcry_mpi_t sig = NULL;
+  gcry_mpi_t data = NULL;
+  RSA_public_key pk = { NULL, NULL };
+  gcry_mpi_t result = NULL;
+  unsigned int nbits = rsa_get_nbits (keyparms);
+
+  rc = rsa_check_verify_keysize (nbits);
+  if (rc)
+    return rc;
+
+  _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, nbits);
+
+  /* Extract the data.  */
+  rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_printmpi ("rsa_verify data", data);
+  if (ctx.encoding != PUBKEY_ENC_PSS && mpi_is_opaque (data))
+    {
+      rc = GPG_ERR_INV_DATA;
+      goto leave;
+    }
 
-  (void)algo;
-  (void)cmp;
-  (void)opaquev;
+  /* Extract the signature value.  */
+  rc = _gcry_pk_util_preparse_sigval (s_sig, rsa_names, &l1, NULL);
+  if (rc)
+    goto leave;
+  rc = sexp_extract_param (l1, NULL, "s", &sig, NULL);
+  if (rc)
+    goto leave;
+  if (DBG_CIPHER)
+    log_printmpi ("rsa_verify  sig", sig);
 
-  pk.n = pkey[0];
-  pk.e = pkey[1];
-  result = gcry_mpi_new ( 160 );
-  public( result, data[0], &pk );
-#ifdef IS_DEVELOPMENT_VERSION
+  /* Extract the key.  */
+  rc = sexp_extract_param (keyparms, NULL, "ne", &pk.n, &pk.e, NULL);
+  if (rc)
+    goto leave;
   if (DBG_CIPHER)
     {
-      log_mpidump ("rsa verify result:", result );
-      log_mpidump ("             hash:", hash );
+      log_printmpi ("rsa_verify    n", pk.n);
+      log_printmpi ("rsa_verify    e", pk.e);
     }
-#endif /*IS_DEVELOPMENT_VERSION*/
-  if (cmp)
-    rc = (*cmp) (opaquev, result);
+
+  /* Do RSA computation and compare.  */
+  result = mpi_new (0);
+  public (result, sig, &pk);
+  if (DBG_CIPHER)
+    log_printmpi ("rsa_verify  cmp", result);
+  if (ctx.verify_cmp)
+    rc = ctx.verify_cmp (&ctx, result);
   else
-    rc = mpi_cmp (result, hash) ? GPG_ERR_BAD_SIGNATURE : GPG_ERR_NO_ERROR;
-  gcry_mpi_release (result);
+    rc = mpi_cmp (result, data) ? GPG_ERR_BAD_SIGNATURE : 0;
 
+ leave:
+  _gcry_mpi_release (result);
+  _gcry_mpi_release (pk.n);
+  _gcry_mpi_release (pk.e);
+  _gcry_mpi_release (data);
+  _gcry_mpi_release (sig);
+  sexp_release (l1);
+  _gcry_pk_util_free_encoding_ctx (&ctx);
+  if (DBG_CIPHER)
+    log_debug ("rsa_verify    => %s\n", rc?gpg_strerror (rc):"Good");
   return rc;
 }
 
 
+
+/* Return the number of bits for the key described by PARMS.  On error
+ * 0 is returned.  The format of PARMS starts with the algorithm name;
+ * for example:
+ *
+ *   (rsa
+ *     (n <mpi>)
+ *     (e <mpi>))
+ *
+ * More parameters may be given but we only need N here.
+ */
 static unsigned int
-rsa_get_nbits (int algo, gcry_mpi_t *pkey)
+rsa_get_nbits (gcry_sexp_t parms)
 {
-  (void)algo;
+  gcry_sexp_t l1;
+  gcry_mpi_t n;
+  unsigned int nbits;
 
-  return mpi_get_nbits (pkey[0]);
+  l1 = sexp_find_token (parms, "n", 1);
+  if (!l1)
+    return 0; /* Parameter N not found.  */
+
+  n = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG);
+  sexp_release (l1);
+  nbits = n? mpi_get_nbits (n) : 0;
+  _gcry_mpi_release (n);
+  return nbits;
 }
 
 
@@ -1075,19 +1800,19 @@ compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparam)
   const char *data;
   size_t datalen;
 
-  l1 = gcry_sexp_find_token (keyparam, "n", 1);
+  l1 = sexp_find_token (keyparam, "n", 1);
   if (!l1)
     return GPG_ERR_NO_OBJ;
 
-  data = gcry_sexp_nth_data (l1, 1, &datalen);
+  data = sexp_nth_data (l1, 1, &datalen);
   if (!data)
     {
-      gcry_sexp_release (l1);
+      sexp_release (l1);
       return GPG_ERR_NO_OBJ;
     }
 
-  gcry_md_write (md, data, datalen);
-  gcry_sexp_release (l1);
+  _gcry_md_write (md, data, datalen);
+  sexp_release (l1);
 
   return 0;
 }
@@ -1100,45 +1825,169 @@ compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparam)
  */
 
 static const char *
-selftest_sign_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
+selftest_hash_sign_2048 (gcry_sexp_t pkey, gcry_sexp_t skey)
+{
+  int md_algo = GCRY_MD_SHA256;
+  gcry_md_hd_t hd = NULL;
+  const char *data_tmpl = "(data (flags pkcs1) (hash %s %b))";
+  static const char sample_data[] =
+    "11223344556677889900aabbccddeeff"
+    "102030405060708090a0b0c0d0f01121";
+  static const char sample_data_bad[] =
+    "11223344556677889900aabbccddeeff"
+    "802030405060708090a0b0c0d0f01121";
+
+  const char *errtxt = NULL;
+  gcry_error_t err;
+  gcry_sexp_t sig = NULL;
+  /* raw signature data reference */
+  const char ref_data[] =
+    "518f41dea3ad884e93eefff8d7ca68a6f4c30d923632e35673651d675cebd652"
+    "a44ed66f6879b18f3d48b2d235b1dd78f6189be1440352cc94231a55c1f93109"
+    "84616b2841c42fe9a6e37be34cd188207209bd028e2fa93e721fbac40c31a068"
+    "1253b312d4e07addb9c7f3d508fa89f218ea7c7f7b9f6a9b1e522c19fa1cd839"
+    "93f9d4ca2f16c3d0b9abafe5e63e848152afc72ce7ee19ea45353116f85209ea"
+    "b9de42129dbccdac8faa461e8e8cc2ae801101cc6add4ba76ccb752030b0e827"
+    "7352b11cdecebae9cdc9a626c4701cd9c85cd287618888c5fae8b4d0ba48915d"
+    "e5cc64e3aee2ba2862d04348ea71f65454f74f9fd1e3108005cc367ca41585a4";
+  gcry_mpi_t ref_mpi = NULL;
+  gcry_mpi_t sig_mpi = NULL;
+
+  err = _gcry_md_open (&hd, md_algo, 0);
+  if (err)
+    {
+      errtxt = "gcry_md_open failed";
+      goto leave;
+    }
+
+  _gcry_md_write (hd, sample_data, sizeof(sample_data));
+
+  err = _gcry_pk_sign_md (&sig, data_tmpl, hd, skey, NULL);
+  if (err)
+    {
+      errtxt = "signing failed";
+      goto leave;
+    }
+
+  err = _gcry_mpi_scan(&ref_mpi, GCRYMPI_FMT_HEX, ref_data, 0, NULL);
+  if (err)
+    {
+      errtxt = "converting ref_data to mpi failed";
+      goto leave;
+    }
+
+  err = _gcry_sexp_extract_param(sig, "sig-val!rsa", "s", &sig_mpi, NULL);
+  if (err)
+    {
+      errtxt = "extracting signature data failed";
+      goto leave;
+    }
+
+  if (mpi_cmp (sig_mpi, ref_mpi))
+    {
+      errtxt = "signature does not match reference data";
+      goto leave;
+    }
+
+  err = _gcry_pk_verify_md (sig, data_tmpl, hd, pkey, NULL);
+  if (err)
+    {
+      errtxt = "verify failed";
+      goto leave;
+    }
+
+  _gcry_md_reset(hd);
+  _gcry_md_write (hd, sample_data_bad, sizeof(sample_data_bad));
+  err = _gcry_pk_verify_md (sig, data_tmpl, hd, pkey, NULL);
+  if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
+    {
+      errtxt = "bad signature not detected";
+      goto leave;
+    }
+
+
+ leave:
+  sexp_release (sig);
+  _gcry_md_close (hd);
+  _gcry_mpi_release (ref_mpi);
+  _gcry_mpi_release (sig_mpi);
+  return errtxt;
+}
+
+static const char *
+selftest_sign_2048 (gcry_sexp_t pkey, gcry_sexp_t skey)
 {
   static const char sample_data[] =
     "(data (flags pkcs1)"
-    " (hash sha1 #11223344556677889900aabbccddeeff10203040#))";
+    " (hash sha256 #11223344556677889900aabbccddeeff"
+    /**/           "102030405060708090a0b0c0d0f01121#))";
   static const char sample_data_bad[] =
     "(data (flags pkcs1)"
-    " (hash sha1 #11223344556677889900aabbccddeeff80203040#))";
+    " (hash sha256 #11223344556677889900aabbccddeeff"
+    /**/           "802030405060708090a0b0c0d0f01121#))";
 
   const char *errtxt = NULL;
   gcry_error_t err;
   gcry_sexp_t data = NULL;
   gcry_sexp_t data_bad = NULL;
   gcry_sexp_t sig = NULL;
-
-  err = gcry_sexp_sscan (&data, NULL,
-                         sample_data, strlen (sample_data));
+  /* raw signature data reference */
+  const char ref_data[] =
+    "6252a19a11e1d5155ed9376036277193d644fa239397fff03e9b92d6f86415d6"
+    "d30da9273775f290e580d038295ff8ff89522becccfa6ae870bf76b76df402a8"
+    "54f69347e3db3de8e1e7d4dada281ec556810c7a8ecd0b5f51f9b1c0e7aa7557"
+    "61aa2b8ba5f811304acc6af0eca41fe49baf33bf34eddaf44e21e036ac7f0b68"
+    "03cdef1c60021fb7b5b97ebacdd88ab755ce29af568dbc5728cc6e6eff42618d"
+    "62a0386ca8beed46402bdeeef29b6a3feded906bace411a06a39192bf516ae10"
+    "67e4320fa8ea113968525f4574d022a3ceeaafdc41079efe1f22cc94bf59d8d3"
+    "328085da9674857db56de5978a62394aab48aa3b72e23a1b16260cfd9daafe65";
+  gcry_mpi_t ref_mpi = NULL;
+  gcry_mpi_t sig_mpi = NULL;
+
+  err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data));
   if (!err)
-    err = gcry_sexp_sscan (&data_bad, NULL,
-                           sample_data_bad, strlen (sample_data_bad));
+    err = sexp_sscan (&data_bad, NULL,
+                      sample_data_bad, strlen (sample_data_bad));
   if (err)
     {
       errtxt = "converting data failed";
       goto leave;
     }
 
-  err = gcry_pk_sign (&sig, data, skey);
+  err = _gcry_pk_sign (&sig, data, skey);
   if (err)
     {
       errtxt = "signing failed";
       goto leave;
     }
-  err = gcry_pk_verify (sig, data, pkey);
+
+  err = _gcry_mpi_scan(&ref_mpi, GCRYMPI_FMT_HEX, ref_data, 0, NULL);
+  if (err)
+    {
+      errtxt = "converting ref_data to mpi failed";
+      goto leave;
+    }
+
+  err = _gcry_sexp_extract_param(sig, "sig-val!rsa", "s", &sig_mpi, NULL);
+  if (err)
+    {
+      errtxt = "extracting signature data failed";
+      goto leave;
+    }
+
+  if (mpi_cmp (sig_mpi, ref_mpi))
+    {
+      errtxt = "signature does not match reference data";
+      goto leave;
+    }
+
+  err = _gcry_pk_verify (sig, data, pkey);
   if (err)
     {
       errtxt = "verify failed";
       goto leave;
     }
-  err = gcry_pk_verify (sig, data_bad, pkey);
+  err = _gcry_pk_verify (sig, data_bad, pkey);
   if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE)
     {
       errtxt = "bad signature not detected";
@@ -1147,9 +1996,11 @@ selftest_sign_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
 
 
  leave:
-  gcry_sexp_release (sig);
-  gcry_sexp_release (data_bad);
-  gcry_sexp_release (data);
+  sexp_release (sig);
+  sexp_release (data_bad);
+  sexp_release (data);
+  _gcry_mpi_release (ref_mpi);
+  _gcry_mpi_release (sig_mpi);
   return errtxt;
 }
 
@@ -1169,45 +2020,52 @@ extract_a_from_sexp (gcry_sexp_t encr_data)
   gcry_sexp_t l1, l2, l3;
   gcry_mpi_t a_value;
 
-  l1 = gcry_sexp_find_token (encr_data, "enc-val", 0);
+  l1 = sexp_find_token (encr_data, "enc-val", 0);
   if (!l1)
     return NULL;
-  l2 = gcry_sexp_find_token (l1, "rsa", 0);
-  gcry_sexp_release (l1);
+  l2 = sexp_find_token (l1, "rsa", 0);
+  sexp_release (l1);
   if (!l2)
     return NULL;
-  l3 = gcry_sexp_find_token (l2, "a", 0);
-  gcry_sexp_release (l2);
+  l3 = sexp_find_token (l2, "a", 0);
+  sexp_release (l2);
   if (!l3)
     return NULL;
-  a_value = gcry_sexp_nth_mpi (l3, 1, 0);
-  gcry_sexp_release (l3);
+  a_value = sexp_nth_mpi (l3, 1, 0);
+  sexp_release (l3);
 
   return a_value;
 }
 
 
 static const char *
-selftest_encr_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
+selftest_encr_2048 (gcry_sexp_t pkey, gcry_sexp_t skey)
 {
   const char *errtxt = NULL;
   gcry_error_t err;
-  const unsigned int nbits = 1000; /* Encrypt 1000 random bits.  */
-  gcry_mpi_t plaintext = NULL;
+  static const char plaintext[] =
+    "Jim quickly realized that the beautiful gowns are expensive.";
   gcry_sexp_t plain = NULL;
   gcry_sexp_t encr  = NULL;
   gcry_mpi_t  ciphertext = NULL;
   gcry_sexp_t decr  = NULL;
-  gcry_mpi_t  decr_plaintext = NULL;
+  char *decr_plaintext = NULL;
   gcry_sexp_t tmplist = NULL;
-
-  /* Create plaintext.  The plaintext is actually a big integer number.  */
-  plaintext = gcry_mpi_new (nbits);
-  gcry_mpi_randomize (plaintext, nbits, GCRY_WEAK_RANDOM);
+  /* expected result of encrypting the plaintext with sample_secret_key */
+  static const char ref_data[] =
+    "18022e2593a402a737caaa93b4c7e750e20ca265452980e1d6b7710fbd3e"
+    "7dce72be5c2110fb47691cb38f42170ee3b4a37f2498d4a51567d762585e"
+    "4cb81d04fbc7df4144f8e5eac2d4b8688521b64011f11d7ad53f4c874004"
+    "819856f2e2a6f83d1c9c4e73ac26089789c14482b0b8d44139133c88c4a5"
+    "2dba9dd6d6ffc622666b7d129168333d999706af30a2d7d272db7734e5ed"
+    "fb8c64ea3018af3ad20f4a013a5060cb0f5e72753967bebe294280a6ed0d"
+    "dbd3c4f11d0a8696e9d32a0dc03deb0b5e49b2cbd1503392642d4e1211f3"
+    "e8e2ee38abaa3671ccd57fcde8ca76e85fd2cb77c35706a970a213a27352"
+    "cec92a9604d543ddb5fc478ff50e0622";
+  gcry_mpi_t ref_mpi = NULL;
 
   /* Put the plaintext into an S-expression.  */
-  err = gcry_sexp_build (&plain, NULL,
-                         "(data (flags raw) (value %m))", plaintext);
+  err = sexp_build (&plain, NULL, "(data (flags raw) (value %s))", plaintext);
   if (err)
     {
       errtxt = "converting data failed";
@@ -1215,33 +2073,40 @@ selftest_encr_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
     }
 
   /* Encrypt.  */
-  err = gcry_pk_encrypt (&encr, plain, pkey);
+  err = _gcry_pk_encrypt (&encr, plain, pkey);
   if (err)
     {
       errtxt = "encrypt failed";
       goto leave;
     }
 
+  err = _gcry_mpi_scan(&ref_mpi, GCRYMPI_FMT_HEX, ref_data, 0, NULL);
+  if (err)
+    {
+      errtxt = "converting encrydata to mpi failed";
+      goto leave;
+    }
+
   /* Extraxt the ciphertext from the returned S-expression.  */
-  /*gcry_sexp_dump (encr);*/
+  /*sexp_dump (encr);*/
   ciphertext = extract_a_from_sexp (encr);
   if (!ciphertext)
     {
-      errtxt = "gcry_pk_decrypt returned garbage";
+      errtxt = "gcry_pk_encrypt returned garbage";
       goto leave;
     }
 
   /* Check that the ciphertext does no match the plaintext.  */
-  /* _gcry_log_mpidump ("plaintext", plaintext); */
-  /* _gcry_log_mpidump ("ciphertxt", ciphertext); */
-  if (!gcry_mpi_cmp (plaintext, ciphertext))
+  /* _gcry_log_printmpi ("plaintext", plaintext); */
+  /* _gcry_log_printmpi ("ciphertxt", ciphertext); */
+  if (mpi_cmp (ref_mpi, ciphertext))
     {
-      errtxt = "ciphertext matches plaintext";
+      errtxt = "ciphertext doesn't match reference data";
       goto leave;
     }
 
   /* Decrypt.  */
-  err = gcry_pk_decrypt (&decr, encr, skey);
+  err = _gcry_pk_decrypt (&decr, encr, skey);
   if (err)
     {
       errtxt = "decrypt failed";
@@ -1254,11 +2119,11 @@ selftest_encr_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
      gcry_pk_encrypt directly to gcry_pk_decrypt, such a flag value
      won't be there as of today.  To be prepared for future changes we
      take care of it anyway.  */
-  tmplist = gcry_sexp_find_token (decr, "value", 0);
+  tmplist = sexp_find_token (decr, "value", 0);
   if (tmplist)
-    decr_plaintext = gcry_sexp_nth_mpi (tmplist, 1, GCRYMPI_FMT_USG);
+    decr_plaintext = sexp_nth_string (tmplist, 1);
   else
-    decr_plaintext = gcry_sexp_nth_mpi (decr, 0, GCRYMPI_FMT_USG);
+    decr_plaintext = sexp_nth_string (decr, 0);
   if (!decr_plaintext)
     {
       errtxt = "decrypt returned no plaintext";
@@ -1266,26 +2131,26 @@ selftest_encr_1024 (gcry_sexp_t pkey, gcry_sexp_t skey)
     }
 
   /* Check that the decrypted plaintext matches the original  plaintext.  */
-  if (gcry_mpi_cmp (plaintext, decr_plaintext))
+  if (strcmp (plaintext, decr_plaintext))
     {
       errtxt = "mismatch";
       goto leave;
     }
 
  leave:
-  gcry_sexp_release (tmplist);
-  gcry_mpi_release (decr_plaintext);
-  gcry_sexp_release (decr);
-  gcry_mpi_release (ciphertext);
-  gcry_sexp_release (encr);
-  gcry_sexp_release (plain);
-  gcry_mpi_release (plaintext);
+  sexp_release (tmplist);
+  xfree (decr_plaintext);
+  sexp_release (decr);
+  _gcry_mpi_release (ciphertext);
+  _gcry_mpi_release (ref_mpi);
+  sexp_release (encr);
+  sexp_release (plain);
   return errtxt;
 }
 
 
 static gpg_err_code_t
-selftests_rsa (selftest_report_func_t report)
+selftests_rsa (selftest_report_func_t report, int extended)
 {
   const char *what;
   const char *errtxt;
@@ -1295,42 +2160,49 @@ selftests_rsa (selftest_report_func_t report)
 
   /* Convert the S-expressions into the internal representation.  */
   what = "convert";
-  err = gcry_sexp_sscan (&skey, NULL,
-                         sample_secret_key, strlen (sample_secret_key));
+  err = sexp_sscan (&skey, NULL, sample_secret_key, strlen 
(sample_secret_key));
   if (!err)
-    err = gcry_sexp_sscan (&pkey, NULL,
-                           sample_public_key, strlen (sample_public_key));
+    err = sexp_sscan (&pkey, NULL,
+                      sample_public_key, strlen (sample_public_key));
   if (err)
     {
-      errtxt = gcry_strerror (err);
+      errtxt = _gcry_strerror (err);
       goto failed;
     }
 
   what = "key consistency";
-  err = gcry_pk_testkey (skey);
+  err = _gcry_pk_testkey (skey);
   if (err)
     {
-      errtxt = gcry_strerror (err);
+      errtxt = _gcry_strerror (err);
       goto failed;
     }
 
-  what = "sign";
-  errtxt = selftest_sign_1024 (pkey, skey);
+  if (extended)
+    {
+      what = "sign";
+      errtxt = selftest_sign_2048 (pkey, skey);
+      if (errtxt)
+        goto failed;
+    }
+
+  what = "digest sign";
+  errtxt = selftest_hash_sign_2048 (pkey, skey);
   if (errtxt)
     goto failed;
 
   what = "encrypt";
-  errtxt = selftest_encr_1024 (pkey, skey);
+  errtxt = selftest_encr_2048 (pkey, skey);
   if (errtxt)
     goto failed;
 
-  gcry_sexp_release (pkey);
-  gcry_sexp_release (skey);
+  sexp_release (pkey);
+  sexp_release (skey);
   return 0; /* Succeeded. */
 
  failed:
-  gcry_sexp_release (pkey);
-  gcry_sexp_release (skey);
+  sexp_release (pkey);
+  sexp_release (skey);
   if (report)
     report ("pubkey", GCRY_PK_RSA, what, errtxt);
   return GPG_ERR_SELFTEST_FAILED;
@@ -1343,12 +2215,10 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
 {
   gpg_err_code_t ec;
 
-  (void)extended;
-
   switch (algo)
     {
     case GCRY_PK_RSA:
-      ec = selftests_rsa (report);
+      ec = selftests_rsa (report, extended);
       break;
     default:
       ec = GPG_ERR_PUBKEY_ALGO;
@@ -1361,19 +2231,12 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
 
 
 
-static const char *rsa_names[] =
-  {
-    "rsa",
-    "openpgp-rsa",
-    "oid.1.2.840.113549.1.1.1",
-    NULL,
-  };
-
 gcry_pk_spec_t _gcry_pubkey_spec_rsa =
   {
+    GCRY_PK_RSA, { 0, 1 },
+    (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR),
     "RSA", rsa_names,
     "ne", "nedpqu", "a", "s", "n",
-    GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR,
     rsa_generate,
     rsa_check_secret_key,
     rsa_encrypt,
@@ -1381,10 +2244,6 @@ gcry_pk_spec_t _gcry_pubkey_spec_rsa =
     rsa_sign,
     rsa_verify,
     rsa_get_nbits,
-  };
-pk_extra_spec_t _gcry_pubkey_extraspec_rsa =
-  {
     run_selftests,
-    rsa_generate_ext,
     compute_keygrip
   };
diff --git a/grub-core/lib/libgcrypt/cipher/salsa20-amd64.S 
b/grub-core/lib/libgcrypt/cipher/salsa20-amd64.S
new file mode 100644
index 000000000..646260636
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/salsa20-amd64.S
@@ -0,0 +1,940 @@
+/* salsa20-amd64.S  -  AMD64 implementation of Salsa20
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Based on public domain implementation by D. J. Bernstein at
+ *  http://cr.yp.to/snuffle.html
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SALSA20)
+
+#include "asm-common-amd64.h"
+
+.text
+
+.align 8
+.globl _gcry_salsa20_amd64_keysetup
+ELF(.type  _gcry_salsa20_amd64_keysetup,@function;)
+_gcry_salsa20_amd64_keysetup:
+       CFI_STARTPROC();
+       movl   0(%rsi),%r8d
+       movl   4(%rsi),%r9d
+       movl   8(%rsi),%eax
+       movl   12(%rsi),%r10d
+       movl   %r8d,20(%rdi)
+       movl   %r9d,40(%rdi)
+       movl   %eax,60(%rdi)
+       movl   %r10d,48(%rdi)
+       cmp  $256,%rdx
+       jb .L_kbits128
+.L_kbits256:
+       movl   16(%rsi),%edx
+       movl   20(%rsi),%ecx
+       movl   24(%rsi),%r8d
+       movl   28(%rsi),%esi
+       movl   %edx,28(%rdi)
+       movl   %ecx,16(%rdi)
+       movl   %r8d,36(%rdi)
+       movl   %esi,56(%rdi)
+       mov  $1634760805,%rsi
+       mov  $857760878,%rdx
+       mov  $2036477234,%rcx
+       mov  $1797285236,%r8
+       movl   %esi,0(%rdi)
+       movl   %edx,4(%rdi)
+       movl   %ecx,8(%rdi)
+       movl   %r8d,12(%rdi)
+       jmp .L_keysetupdone
+.L_kbits128:
+       movl   0(%rsi),%edx
+       movl   4(%rsi),%ecx
+       movl   8(%rsi),%r8d
+       movl   12(%rsi),%esi
+       movl   %edx,28(%rdi)
+       movl   %ecx,16(%rdi)
+       movl   %r8d,36(%rdi)
+       movl   %esi,56(%rdi)
+       mov  $1634760805,%rsi
+       mov  $824206446,%rdx
+       mov  $2036477238,%rcx
+       mov  $1797285236,%r8
+       movl   %esi,0(%rdi)
+       movl   %edx,4(%rdi)
+       movl   %ecx,8(%rdi)
+       movl   %r8d,12(%rdi)
+.L_keysetupdone:
+       ret_spec_stop
+       CFI_ENDPROC();
+
+.align 8
+.globl _gcry_salsa20_amd64_ivsetup
+ELF(.type  _gcry_salsa20_amd64_ivsetup,@function;)
+_gcry_salsa20_amd64_ivsetup:
+       CFI_STARTPROC();
+       movl   0(%rsi),%r8d
+       movl   4(%rsi),%esi
+       mov  $0,%r9
+       mov  $0,%rax
+       movl   %r8d,24(%rdi)
+       movl   %esi,44(%rdi)
+       movl   %r9d,32(%rdi)
+       movl   %eax,52(%rdi)
+       ret_spec_stop
+       CFI_ENDPROC();
+
+.align 8
+.globl _gcry_salsa20_amd64_encrypt_blocks
+ELF(.type  _gcry_salsa20_amd64_encrypt_blocks,@function;)
+_gcry_salsa20_amd64_encrypt_blocks:
+       /*
+        * Modifications to original implementation:
+        *  - Number of rounds passing in register %r8 (for Salsa20/12).
+        *  - Length is input as number of blocks, so don't handle tail bytes
+        *    (this is done in salsa20.c).
+        */
+       CFI_STARTPROC();
+       push %rbx
+       CFI_PUSH(%rbx);
+       shlq $6, %rcx /* blocks to bytes */
+       mov %r8, %rbx
+       mov %rsp,%r11
+       CFI_DEF_CFA_REGISTER(%r11);
+       sub $384,%rsp
+       and $~31,%rsp
+       mov  %rdi,%r8
+       mov  %rsi,%rsi
+       mov  %rdx,%rdi
+       mov  %rcx,%rdx
+       cmp  $0,%rdx
+       jbe .L_done
+.L_start:
+       cmp  $256,%rdx
+       jb .L_bytes_are_64_128_or_192
+       movdqa 0(%r8),%xmm0
+       pshufd $0x55,%xmm0,%xmm1
+       pshufd $0xaa,%xmm0,%xmm2
+       pshufd $0xff,%xmm0,%xmm3
+       pshufd $0x00,%xmm0,%xmm0
+       movdqa %xmm1,0(%rsp)
+       movdqa %xmm2,16(%rsp)
+       movdqa %xmm3,32(%rsp)
+       movdqa %xmm0,48(%rsp)
+       movdqa 16(%r8),%xmm0
+       pshufd $0xaa,%xmm0,%xmm1
+       pshufd $0xff,%xmm0,%xmm2
+       pshufd $0x00,%xmm0,%xmm3
+       pshufd $0x55,%xmm0,%xmm0
+       movdqa %xmm1,64(%rsp)
+       movdqa %xmm2,80(%rsp)
+       movdqa %xmm3,96(%rsp)
+       movdqa %xmm0,112(%rsp)
+       movdqa 32(%r8),%xmm0
+       pshufd $0xff,%xmm0,%xmm1
+       pshufd $0x55,%xmm0,%xmm2
+       pshufd $0xaa,%xmm0,%xmm0
+       movdqa %xmm1,128(%rsp)
+       movdqa %xmm2,144(%rsp)
+       movdqa %xmm0,160(%rsp)
+       movdqa 48(%r8),%xmm0
+       pshufd $0x00,%xmm0,%xmm1
+       pshufd $0xaa,%xmm0,%xmm2
+       pshufd $0xff,%xmm0,%xmm0
+       movdqa %xmm1,176(%rsp)
+       movdqa %xmm2,192(%rsp)
+       movdqa %xmm0,208(%rsp)
+.L_bytesatleast256:
+       movl   32(%r8),%ecx
+       movl   52(%r8),%r9d
+       movl %ecx,224(%rsp)
+       movl %r9d,240(%rsp)
+       add  $1,%ecx
+       adc  $0,%r9d
+       movl %ecx,4+224(%rsp)
+       movl %r9d,4+240(%rsp)
+       add  $1,%ecx
+       adc  $0,%r9d
+       movl %ecx,8+224(%rsp)
+       movl %r9d,8+240(%rsp)
+       add  $1,%ecx
+       adc  $0,%r9d
+       movl %ecx,12+224(%rsp)
+       movl %r9d,12+240(%rsp)
+       add  $1,%ecx
+       adc  $0,%r9d
+       movl   %ecx,32(%r8)
+       movl   %r9d,52(%r8)
+       movq %rdx,288(%rsp)
+       mov  %rbx,%rdx
+       movdqa 0(%rsp),%xmm0
+       movdqa 16(%rsp),%xmm1
+       movdqa 32(%rsp),%xmm2
+       movdqa 192(%rsp),%xmm3
+       movdqa 208(%rsp),%xmm4
+       movdqa 64(%rsp),%xmm5
+       movdqa 80(%rsp),%xmm6
+       movdqa 112(%rsp),%xmm7
+       movdqa 128(%rsp),%xmm8
+       movdqa 144(%rsp),%xmm9
+       movdqa 160(%rsp),%xmm10
+       movdqa 240(%rsp),%xmm11
+       movdqa 48(%rsp),%xmm12
+       movdqa 96(%rsp),%xmm13
+       movdqa 176(%rsp),%xmm14
+       movdqa 224(%rsp),%xmm15
+.L_mainloop1:
+       movdqa %xmm1,256(%rsp)
+       movdqa %xmm2,272(%rsp)
+       movdqa %xmm13,%xmm1
+       paddd %xmm12,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $7,%xmm1
+       pxor  %xmm1,%xmm14
+       psrld $25,%xmm2
+       pxor  %xmm2,%xmm14
+       movdqa %xmm7,%xmm1
+       paddd %xmm0,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $7,%xmm1
+       pxor  %xmm1,%xmm11
+       psrld $25,%xmm2
+       pxor  %xmm2,%xmm11
+       movdqa %xmm12,%xmm1
+       paddd %xmm14,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $9,%xmm1
+       pxor  %xmm1,%xmm15
+       psrld $23,%xmm2
+       pxor  %xmm2,%xmm15
+       movdqa %xmm0,%xmm1
+       paddd %xmm11,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $9,%xmm1
+       pxor  %xmm1,%xmm9
+       psrld $23,%xmm2
+       pxor  %xmm2,%xmm9
+       movdqa %xmm14,%xmm1
+       paddd %xmm15,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $13,%xmm1
+       pxor  %xmm1,%xmm13
+       psrld $19,%xmm2
+       pxor  %xmm2,%xmm13
+       movdqa %xmm11,%xmm1
+       paddd %xmm9,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $13,%xmm1
+       pxor  %xmm1,%xmm7
+       psrld $19,%xmm2
+       pxor  %xmm2,%xmm7
+       movdqa %xmm15,%xmm1
+       paddd %xmm13,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $18,%xmm1
+       pxor  %xmm1,%xmm12
+       psrld $14,%xmm2
+       pxor  %xmm2,%xmm12
+       movdqa 256(%rsp),%xmm1
+       movdqa %xmm12,256(%rsp)
+       movdqa %xmm9,%xmm2
+       paddd %xmm7,%xmm2
+       movdqa %xmm2,%xmm12
+       pslld $18,%xmm2
+       pxor  %xmm2,%xmm0
+       psrld $14,%xmm12
+       pxor  %xmm12,%xmm0
+       movdqa %xmm5,%xmm2
+       paddd %xmm1,%xmm2
+       movdqa %xmm2,%xmm12
+       pslld $7,%xmm2
+       pxor  %xmm2,%xmm3
+       psrld $25,%xmm12
+       pxor  %xmm12,%xmm3
+       movdqa 272(%rsp),%xmm2
+       movdqa %xmm0,272(%rsp)
+       movdqa %xmm6,%xmm0
+       paddd %xmm2,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $7,%xmm0
+       pxor  %xmm0,%xmm4
+       psrld $25,%xmm12
+       pxor  %xmm12,%xmm4
+       movdqa %xmm1,%xmm0
+       paddd %xmm3,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $9,%xmm0
+       pxor  %xmm0,%xmm10
+       psrld $23,%xmm12
+       pxor  %xmm12,%xmm10
+       movdqa %xmm2,%xmm0
+       paddd %xmm4,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $9,%xmm0
+       pxor  %xmm0,%xmm8
+       psrld $23,%xmm12
+       pxor  %xmm12,%xmm8
+       movdqa %xmm3,%xmm0
+       paddd %xmm10,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $13,%xmm0
+       pxor  %xmm0,%xmm5
+       psrld $19,%xmm12
+       pxor  %xmm12,%xmm5
+       movdqa %xmm4,%xmm0
+       paddd %xmm8,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $13,%xmm0
+       pxor  %xmm0,%xmm6
+       psrld $19,%xmm12
+       pxor  %xmm12,%xmm6
+       movdqa %xmm10,%xmm0
+       paddd %xmm5,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $18,%xmm0
+       pxor  %xmm0,%xmm1
+       psrld $14,%xmm12
+       pxor  %xmm12,%xmm1
+       movdqa 256(%rsp),%xmm0
+       movdqa %xmm1,256(%rsp)
+       movdqa %xmm4,%xmm1
+       paddd %xmm0,%xmm1
+       movdqa %xmm1,%xmm12
+       pslld $7,%xmm1
+       pxor  %xmm1,%xmm7
+       psrld $25,%xmm12
+       pxor  %xmm12,%xmm7
+       movdqa %xmm8,%xmm1
+       paddd %xmm6,%xmm1
+       movdqa %xmm1,%xmm12
+       pslld $18,%xmm1
+       pxor  %xmm1,%xmm2
+       psrld $14,%xmm12
+       pxor  %xmm12,%xmm2
+       movdqa 272(%rsp),%xmm12
+       movdqa %xmm2,272(%rsp)
+       movdqa %xmm14,%xmm1
+       paddd %xmm12,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $7,%xmm1
+       pxor  %xmm1,%xmm5
+       psrld $25,%xmm2
+       pxor  %xmm2,%xmm5
+       movdqa %xmm0,%xmm1
+       paddd %xmm7,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $9,%xmm1
+       pxor  %xmm1,%xmm10
+       psrld $23,%xmm2
+       pxor  %xmm2,%xmm10
+       movdqa %xmm12,%xmm1
+       paddd %xmm5,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $9,%xmm1
+       pxor  %xmm1,%xmm8
+       psrld $23,%xmm2
+       pxor  %xmm2,%xmm8
+       movdqa %xmm7,%xmm1
+       paddd %xmm10,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $13,%xmm1
+       pxor  %xmm1,%xmm4
+       psrld $19,%xmm2
+       pxor  %xmm2,%xmm4
+       movdqa %xmm5,%xmm1
+       paddd %xmm8,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $13,%xmm1
+       pxor  %xmm1,%xmm14
+       psrld $19,%xmm2
+       pxor  %xmm2,%xmm14
+       movdqa %xmm10,%xmm1
+       paddd %xmm4,%xmm1
+       movdqa %xmm1,%xmm2
+       pslld $18,%xmm1
+       pxor  %xmm1,%xmm0
+       psrld $14,%xmm2
+       pxor  %xmm2,%xmm0
+       movdqa 256(%rsp),%xmm1
+       movdqa %xmm0,256(%rsp)
+       movdqa %xmm8,%xmm0
+       paddd %xmm14,%xmm0
+       movdqa %xmm0,%xmm2
+       pslld $18,%xmm0
+       pxor  %xmm0,%xmm12
+       psrld $14,%xmm2
+       pxor  %xmm2,%xmm12
+       movdqa %xmm11,%xmm0
+       paddd %xmm1,%xmm0
+       movdqa %xmm0,%xmm2
+       pslld $7,%xmm0
+       pxor  %xmm0,%xmm6
+       psrld $25,%xmm2
+       pxor  %xmm2,%xmm6
+       movdqa 272(%rsp),%xmm2
+       movdqa %xmm12,272(%rsp)
+       movdqa %xmm3,%xmm0
+       paddd %xmm2,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $7,%xmm0
+       pxor  %xmm0,%xmm13
+       psrld $25,%xmm12
+       pxor  %xmm12,%xmm13
+       movdqa %xmm1,%xmm0
+       paddd %xmm6,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $9,%xmm0
+       pxor  %xmm0,%xmm15
+       psrld $23,%xmm12
+       pxor  %xmm12,%xmm15
+       movdqa %xmm2,%xmm0
+       paddd %xmm13,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $9,%xmm0
+       pxor  %xmm0,%xmm9
+       psrld $23,%xmm12
+       pxor  %xmm12,%xmm9
+       movdqa %xmm6,%xmm0
+       paddd %xmm15,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $13,%xmm0
+       pxor  %xmm0,%xmm11
+       psrld $19,%xmm12
+       pxor  %xmm12,%xmm11
+       movdqa %xmm13,%xmm0
+       paddd %xmm9,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $13,%xmm0
+       pxor  %xmm0,%xmm3
+       psrld $19,%xmm12
+       pxor  %xmm12,%xmm3
+       movdqa %xmm15,%xmm0
+       paddd %xmm11,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $18,%xmm0
+       pxor  %xmm0,%xmm1
+       psrld $14,%xmm12
+       pxor  %xmm12,%xmm1
+       movdqa %xmm9,%xmm0
+       paddd %xmm3,%xmm0
+       movdqa %xmm0,%xmm12
+       pslld $18,%xmm0
+       pxor  %xmm0,%xmm2
+       psrld $14,%xmm12
+       pxor  %xmm12,%xmm2
+       movdqa 256(%rsp),%xmm12
+       movdqa 272(%rsp),%xmm0
+       sub  $2,%rdx
+       ja .L_mainloop1
+       paddd 48(%rsp),%xmm12
+       paddd 112(%rsp),%xmm7
+       paddd 160(%rsp),%xmm10
+       paddd 208(%rsp),%xmm4
+       movd   %xmm12,%rdx
+       movd   %xmm7,%rcx
+       movd   %xmm10,%r9
+       movd   %xmm4,%rax
+       pshufd $0x39,%xmm12,%xmm12
+       pshufd $0x39,%xmm7,%xmm7
+       pshufd $0x39,%xmm10,%xmm10
+       pshufd $0x39,%xmm4,%xmm4
+       xorl 0(%rsi),%edx
+       xorl 4(%rsi),%ecx
+       xorl 8(%rsi),%r9d
+       xorl 12(%rsi),%eax
+       movl   %edx,0(%rdi)
+       movl   %ecx,4(%rdi)
+       movl   %r9d,8(%rdi)
+       movl   %eax,12(%rdi)
+       movd   %xmm12,%rdx
+       movd   %xmm7,%rcx
+       movd   %xmm10,%r9
+       movd   %xmm4,%rax
+       pshufd $0x39,%xmm12,%xmm12
+       pshufd $0x39,%xmm7,%xmm7
+       pshufd $0x39,%xmm10,%xmm10
+       pshufd $0x39,%xmm4,%xmm4
+       xorl 64(%rsi),%edx
+       xorl 68(%rsi),%ecx
+       xorl 72(%rsi),%r9d
+       xorl 76(%rsi),%eax
+       movl   %edx,64(%rdi)
+       movl   %ecx,68(%rdi)
+       movl   %r9d,72(%rdi)
+       movl   %eax,76(%rdi)
+       movd   %xmm12,%rdx
+       movd   %xmm7,%rcx
+       movd   %xmm10,%r9
+       movd   %xmm4,%rax
+       pshufd $0x39,%xmm12,%xmm12
+       pshufd $0x39,%xmm7,%xmm7
+       pshufd $0x39,%xmm10,%xmm10
+       pshufd $0x39,%xmm4,%xmm4
+       xorl 128(%rsi),%edx
+       xorl 132(%rsi),%ecx
+       xorl 136(%rsi),%r9d
+       xorl 140(%rsi),%eax
+       movl   %edx,128(%rdi)
+       movl   %ecx,132(%rdi)
+       movl   %r9d,136(%rdi)
+       movl   %eax,140(%rdi)
+       movd   %xmm12,%rdx
+       movd   %xmm7,%rcx
+       movd   %xmm10,%r9
+       movd   %xmm4,%rax
+       xorl 192(%rsi),%edx
+       xorl 196(%rsi),%ecx
+       xorl 200(%rsi),%r9d
+       xorl 204(%rsi),%eax
+       movl   %edx,192(%rdi)
+       movl   %ecx,196(%rdi)
+       movl   %r9d,200(%rdi)
+       movl   %eax,204(%rdi)
+       paddd 176(%rsp),%xmm14
+       paddd 0(%rsp),%xmm0
+       paddd 64(%rsp),%xmm5
+       paddd 128(%rsp),%xmm8
+       movd   %xmm14,%rdx
+       movd   %xmm0,%rcx
+       movd   %xmm5,%r9
+       movd   %xmm8,%rax
+       pshufd $0x39,%xmm14,%xmm14
+       pshufd $0x39,%xmm0,%xmm0
+       pshufd $0x39,%xmm5,%xmm5
+       pshufd $0x39,%xmm8,%xmm8
+       xorl 16(%rsi),%edx
+       xorl 20(%rsi),%ecx
+       xorl 24(%rsi),%r9d
+       xorl 28(%rsi),%eax
+       movl   %edx,16(%rdi)
+       movl   %ecx,20(%rdi)
+       movl   %r9d,24(%rdi)
+       movl   %eax,28(%rdi)
+       movd   %xmm14,%rdx
+       movd   %xmm0,%rcx
+       movd   %xmm5,%r9
+       movd   %xmm8,%rax
+       pshufd $0x39,%xmm14,%xmm14
+       pshufd $0x39,%xmm0,%xmm0
+       pshufd $0x39,%xmm5,%xmm5
+       pshufd $0x39,%xmm8,%xmm8
+       xorl 80(%rsi),%edx
+       xorl 84(%rsi),%ecx
+       xorl 88(%rsi),%r9d
+       xorl 92(%rsi),%eax
+       movl   %edx,80(%rdi)
+       movl   %ecx,84(%rdi)
+       movl   %r9d,88(%rdi)
+       movl   %eax,92(%rdi)
+       movd   %xmm14,%rdx
+       movd   %xmm0,%rcx
+       movd   %xmm5,%r9
+       movd   %xmm8,%rax
+       pshufd $0x39,%xmm14,%xmm14
+       pshufd $0x39,%xmm0,%xmm0
+       pshufd $0x39,%xmm5,%xmm5
+       pshufd $0x39,%xmm8,%xmm8
+       xorl 144(%rsi),%edx
+       xorl 148(%rsi),%ecx
+       xorl 152(%rsi),%r9d
+       xorl 156(%rsi),%eax
+       movl   %edx,144(%rdi)
+       movl   %ecx,148(%rdi)
+       movl   %r9d,152(%rdi)
+       movl   %eax,156(%rdi)
+       movd   %xmm14,%rdx
+       movd   %xmm0,%rcx
+       movd   %xmm5,%r9
+       movd   %xmm8,%rax
+       xorl 208(%rsi),%edx
+       xorl 212(%rsi),%ecx
+       xorl 216(%rsi),%r9d
+       xorl 220(%rsi),%eax
+       movl   %edx,208(%rdi)
+       movl   %ecx,212(%rdi)
+       movl   %r9d,216(%rdi)
+       movl   %eax,220(%rdi)
+       paddd 224(%rsp),%xmm15
+       paddd 240(%rsp),%xmm11
+       paddd 16(%rsp),%xmm1
+       paddd 80(%rsp),%xmm6
+       movd   %xmm15,%rdx
+       movd   %xmm11,%rcx
+       movd   %xmm1,%r9
+       movd   %xmm6,%rax
+       pshufd $0x39,%xmm15,%xmm15
+       pshufd $0x39,%xmm11,%xmm11
+       pshufd $0x39,%xmm1,%xmm1
+       pshufd $0x39,%xmm6,%xmm6
+       xorl 32(%rsi),%edx
+       xorl 36(%rsi),%ecx
+       xorl 40(%rsi),%r9d
+       xorl 44(%rsi),%eax
+       movl   %edx,32(%rdi)
+       movl   %ecx,36(%rdi)
+       movl   %r9d,40(%rdi)
+       movl   %eax,44(%rdi)
+       movd   %xmm15,%rdx
+       movd   %xmm11,%rcx
+       movd   %xmm1,%r9
+       movd   %xmm6,%rax
+       pshufd $0x39,%xmm15,%xmm15
+       pshufd $0x39,%xmm11,%xmm11
+       pshufd $0x39,%xmm1,%xmm1
+       pshufd $0x39,%xmm6,%xmm6
+       xorl 96(%rsi),%edx
+       xorl 100(%rsi),%ecx
+       xorl 104(%rsi),%r9d
+       xorl 108(%rsi),%eax
+       movl   %edx,96(%rdi)
+       movl   %ecx,100(%rdi)
+       movl   %r9d,104(%rdi)
+       movl   %eax,108(%rdi)
+       movd   %xmm15,%rdx
+       movd   %xmm11,%rcx
+       movd   %xmm1,%r9
+       movd   %xmm6,%rax
+       pshufd $0x39,%xmm15,%xmm15
+       pshufd $0x39,%xmm11,%xmm11
+       pshufd $0x39,%xmm1,%xmm1
+       pshufd $0x39,%xmm6,%xmm6
+       xorl 160(%rsi),%edx
+       xorl 164(%rsi),%ecx
+       xorl 168(%rsi),%r9d
+       xorl 172(%rsi),%eax
+       movl   %edx,160(%rdi)
+       movl   %ecx,164(%rdi)
+       movl   %r9d,168(%rdi)
+       movl   %eax,172(%rdi)
+       movd   %xmm15,%rdx
+       movd   %xmm11,%rcx
+       movd   %xmm1,%r9
+       movd   %xmm6,%rax
+       xorl 224(%rsi),%edx
+       xorl 228(%rsi),%ecx
+       xorl 232(%rsi),%r9d
+       xorl 236(%rsi),%eax
+       movl   %edx,224(%rdi)
+       movl   %ecx,228(%rdi)
+       movl   %r9d,232(%rdi)
+       movl   %eax,236(%rdi)
+       paddd 96(%rsp),%xmm13
+       paddd 144(%rsp),%xmm9
+       paddd 192(%rsp),%xmm3
+       paddd 32(%rsp),%xmm2
+       movd   %xmm13,%rdx
+       movd   %xmm9,%rcx
+       movd   %xmm3,%r9
+       movd   %xmm2,%rax
+       pshufd $0x39,%xmm13,%xmm13
+       pshufd $0x39,%xmm9,%xmm9
+       pshufd $0x39,%xmm3,%xmm3
+       pshufd $0x39,%xmm2,%xmm2
+       xorl 48(%rsi),%edx
+       xorl 52(%rsi),%ecx
+       xorl 56(%rsi),%r9d
+       xorl 60(%rsi),%eax
+       movl   %edx,48(%rdi)
+       movl   %ecx,52(%rdi)
+       movl   %r9d,56(%rdi)
+       movl   %eax,60(%rdi)
+       movd   %xmm13,%rdx
+       movd   %xmm9,%rcx
+       movd   %xmm3,%r9
+       movd   %xmm2,%rax
+       pshufd $0x39,%xmm13,%xmm13
+       pshufd $0x39,%xmm9,%xmm9
+       pshufd $0x39,%xmm3,%xmm3
+       pshufd $0x39,%xmm2,%xmm2
+       xorl 112(%rsi),%edx
+       xorl 116(%rsi),%ecx
+       xorl 120(%rsi),%r9d
+       xorl 124(%rsi),%eax
+       movl   %edx,112(%rdi)
+       movl   %ecx,116(%rdi)
+       movl   %r9d,120(%rdi)
+       movl   %eax,124(%rdi)
+       movd   %xmm13,%rdx
+       movd   %xmm9,%rcx
+       movd   %xmm3,%r9
+       movd   %xmm2,%rax
+       pshufd $0x39,%xmm13,%xmm13
+       pshufd $0x39,%xmm9,%xmm9
+       pshufd $0x39,%xmm3,%xmm3
+       pshufd $0x39,%xmm2,%xmm2
+       xorl 176(%rsi),%edx
+       xorl 180(%rsi),%ecx
+       xorl 184(%rsi),%r9d
+       xorl 188(%rsi),%eax
+       movl   %edx,176(%rdi)
+       movl   %ecx,180(%rdi)
+       movl   %r9d,184(%rdi)
+       movl   %eax,188(%rdi)
+       movd   %xmm13,%rdx
+       movd   %xmm9,%rcx
+       movd   %xmm3,%r9
+       movd   %xmm2,%rax
+       xorl 240(%rsi),%edx
+       xorl 244(%rsi),%ecx
+       xorl 248(%rsi),%r9d
+       xorl 252(%rsi),%eax
+       movl   %edx,240(%rdi)
+       movl   %ecx,244(%rdi)
+       movl   %r9d,248(%rdi)
+       movl   %eax,252(%rdi)
+       movq 288(%rsp),%rdx
+       sub  $256,%rdx
+       add  $256,%rsi
+       add  $256,%rdi
+       cmp  $256,%rdx
+       jae .L_bytesatleast256
+       cmp  $0,%rdx
+       jbe .L_done
+.L_bytes_are_64_128_or_192:
+       movq %rdx,288(%rsp)
+       movdqa 0(%r8),%xmm0
+       movdqa 16(%r8),%xmm1
+       movdqa 32(%r8),%xmm2
+       movdqa 48(%r8),%xmm3
+       movdqa %xmm1,%xmm4
+       mov  %rbx,%rdx
+.L_mainloop2:
+       paddd %xmm0,%xmm4
+       movdqa %xmm0,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $7,%xmm4
+       psrld $25,%xmm6
+       pxor  %xmm4,%xmm3
+       pxor  %xmm6,%xmm3
+       paddd %xmm3,%xmm5
+       movdqa %xmm3,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $9,%xmm5
+       psrld $23,%xmm6
+       pxor  %xmm5,%xmm2
+       pshufd $0x93,%xmm3,%xmm3
+       pxor  %xmm6,%xmm2
+       paddd %xmm2,%xmm4
+       movdqa %xmm2,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $13,%xmm4
+       psrld $19,%xmm6
+       pxor  %xmm4,%xmm1
+       pshufd $0x4e,%xmm2,%xmm2
+       pxor  %xmm6,%xmm1
+       paddd %xmm1,%xmm5
+       movdqa %xmm3,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $18,%xmm5
+       psrld $14,%xmm6
+       pxor  %xmm5,%xmm0
+       pshufd $0x39,%xmm1,%xmm1
+       pxor  %xmm6,%xmm0
+       paddd %xmm0,%xmm4
+       movdqa %xmm0,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $7,%xmm4
+       psrld $25,%xmm6
+       pxor  %xmm4,%xmm1
+       pxor  %xmm6,%xmm1
+       paddd %xmm1,%xmm5
+       movdqa %xmm1,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $9,%xmm5
+       psrld $23,%xmm6
+       pxor  %xmm5,%xmm2
+       pshufd $0x93,%xmm1,%xmm1
+       pxor  %xmm6,%xmm2
+       paddd %xmm2,%xmm4
+       movdqa %xmm2,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $13,%xmm4
+       psrld $19,%xmm6
+       pxor  %xmm4,%xmm3
+       pshufd $0x4e,%xmm2,%xmm2
+       pxor  %xmm6,%xmm3
+       paddd %xmm3,%xmm5
+       movdqa %xmm1,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $18,%xmm5
+       psrld $14,%xmm6
+       pxor  %xmm5,%xmm0
+       pshufd $0x39,%xmm3,%xmm3
+       pxor  %xmm6,%xmm0
+       paddd %xmm0,%xmm4
+       movdqa %xmm0,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $7,%xmm4
+       psrld $25,%xmm6
+       pxor  %xmm4,%xmm3
+       pxor  %xmm6,%xmm3
+       paddd %xmm3,%xmm5
+       movdqa %xmm3,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $9,%xmm5
+       psrld $23,%xmm6
+       pxor  %xmm5,%xmm2
+       pshufd $0x93,%xmm3,%xmm3
+       pxor  %xmm6,%xmm2
+       paddd %xmm2,%xmm4
+       movdqa %xmm2,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $13,%xmm4
+       psrld $19,%xmm6
+       pxor  %xmm4,%xmm1
+       pshufd $0x4e,%xmm2,%xmm2
+       pxor  %xmm6,%xmm1
+       paddd %xmm1,%xmm5
+       movdqa %xmm3,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $18,%xmm5
+       psrld $14,%xmm6
+       pxor  %xmm5,%xmm0
+       pshufd $0x39,%xmm1,%xmm1
+       pxor  %xmm6,%xmm0
+       paddd %xmm0,%xmm4
+       movdqa %xmm0,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $7,%xmm4
+       psrld $25,%xmm6
+       pxor  %xmm4,%xmm1
+       pxor  %xmm6,%xmm1
+       paddd %xmm1,%xmm5
+       movdqa %xmm1,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $9,%xmm5
+       psrld $23,%xmm6
+       pxor  %xmm5,%xmm2
+       pshufd $0x93,%xmm1,%xmm1
+       pxor  %xmm6,%xmm2
+       paddd %xmm2,%xmm4
+       movdqa %xmm2,%xmm5
+       movdqa %xmm4,%xmm6
+       pslld $13,%xmm4
+       psrld $19,%xmm6
+       pxor  %xmm4,%xmm3
+       pshufd $0x4e,%xmm2,%xmm2
+       pxor  %xmm6,%xmm3
+       sub  $4,%rdx
+       paddd %xmm3,%xmm5
+       movdqa %xmm1,%xmm4
+       movdqa %xmm5,%xmm6
+       pslld $18,%xmm5
+       pxor   %xmm7,%xmm7
+       psrld $14,%xmm6
+       pxor  %xmm5,%xmm0
+       pshufd $0x39,%xmm3,%xmm3
+       pxor  %xmm6,%xmm0
+       ja .L_mainloop2
+       paddd 0(%r8),%xmm0
+       paddd 16(%r8),%xmm1
+       paddd 32(%r8),%xmm2
+       paddd 48(%r8),%xmm3
+       movd   %xmm0,%rdx
+       movd   %xmm1,%rcx
+       movd   %xmm2,%rax
+       movd   %xmm3,%r10
+       pshufd $0x39,%xmm0,%xmm0
+       pshufd $0x39,%xmm1,%xmm1
+       pshufd $0x39,%xmm2,%xmm2
+       pshufd $0x39,%xmm3,%xmm3
+       xorl 0(%rsi),%edx
+       xorl 48(%rsi),%ecx
+       xorl 32(%rsi),%eax
+       xorl 16(%rsi),%r10d
+       movl   %edx,0(%rdi)
+       movl   %ecx,48(%rdi)
+       movl   %eax,32(%rdi)
+       movl   %r10d,16(%rdi)
+       movd   %xmm0,%rdx
+       movd   %xmm1,%rcx
+       movd   %xmm2,%rax
+       movd   %xmm3,%r10
+       pshufd $0x39,%xmm0,%xmm0
+       pshufd $0x39,%xmm1,%xmm1
+       pshufd $0x39,%xmm2,%xmm2
+       pshufd $0x39,%xmm3,%xmm3
+       xorl 20(%rsi),%edx
+       xorl 4(%rsi),%ecx
+       xorl 52(%rsi),%eax
+       xorl 36(%rsi),%r10d
+       movl   %edx,20(%rdi)
+       movl   %ecx,4(%rdi)
+       movl   %eax,52(%rdi)
+       movl   %r10d,36(%rdi)
+       movd   %xmm0,%rdx
+       movd   %xmm1,%rcx
+       movd   %xmm2,%rax
+       movd   %xmm3,%r10
+       pshufd $0x39,%xmm0,%xmm0
+       pshufd $0x39,%xmm1,%xmm1
+       pshufd $0x39,%xmm2,%xmm2
+       pshufd $0x39,%xmm3,%xmm3
+       xorl 40(%rsi),%edx
+       xorl 24(%rsi),%ecx
+       xorl 8(%rsi),%eax
+       xorl 56(%rsi),%r10d
+       movl   %edx,40(%rdi)
+       movl   %ecx,24(%rdi)
+       movl   %eax,8(%rdi)
+       movl   %r10d,56(%rdi)
+       movd   %xmm0,%rdx
+       movd   %xmm1,%rcx
+       movd   %xmm2,%rax
+       movd   %xmm3,%r10
+       xorl 60(%rsi),%edx
+       xorl 44(%rsi),%ecx
+       xorl 28(%rsi),%eax
+       xorl 12(%rsi),%r10d
+       movl   %edx,60(%rdi)
+       movl   %ecx,44(%rdi)
+       movl   %eax,28(%rdi)
+       movl   %r10d,12(%rdi)
+       movq 288(%rsp),%rdx
+       movl   32(%r8),%ecx
+       movl   52(%r8),%eax
+       add  $1,%ecx
+       adc  $0,%eax
+       movl   %ecx,32(%r8)
+       movl   %eax,52(%r8)
+       cmp  $64,%rdx
+       ja .L_bytes_are_128_or_192
+.L_done:
+       CFI_REMEMBER_STATE();
+       mov %r11,%rax
+       sub %rsp,%rax
+       mov %r11,%rsp
+       CFI_REGISTER(%r11, %rsp)
+       CFI_DEF_CFA_REGISTER(%rsp)
+       pop %rbx
+       CFI_POP(%rbx)
+       ret_spec_stop
+       CFI_RESTORE_STATE();
+.L_bytes_are_128_or_192:
+       sub  $64,%rdx
+       add  $64,%rdi
+       add  $64,%rsi
+       jmp .L_bytes_are_64_128_or_192
+       CFI_ENDPROC();
+ELF(.size 
_gcry_salsa20_amd64_encrypt_blocks,.-_gcry_salsa20_amd64_encrypt_blocks;)
+
+#endif /*defined(USE_SALSA20)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/salsa20-armv7-neon.S 
b/grub-core/lib/libgcrypt/cipher/salsa20-armv7-neon.S
new file mode 100644
index 000000000..3686e3fa6
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/salsa20-armv7-neon.S
@@ -0,0 +1,899 @@
+/* salsa-armv7-neon.S  -  ARM NEON implementation of Salsa20 cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON) && defined(USE_SALSA20)
+
+/*
+ * Based on public domain implementation from SUPERCOP benchmarking framework
+ * by Peter Schwabe and D. J. Bernstein. Paper about the implementation at:
+ *   http://cryptojedi.org/papers/#neoncrypto
+ */
+
+.syntax unified
+.arm
+.fpu neon
+.text
+
+.align 2
+.globl _gcry_arm_neon_salsa20_encrypt
+.type  _gcry_arm_neon_salsa20_encrypt,%function;
+_gcry_arm_neon_salsa20_encrypt:
+       /* Modifications:
+        *  - arguments changed to (void *c, const void *m, unsigned int nblks,
+         *    void *ctx, unsigned int rounds) from (void *c, const void *m,
+         *    unsigned long long mlen, const void *n, const void *k)
+        *  - nonce and key read from 'ctx' as well as sigma and counter.
+        *  - read in counter from 'ctx' at the start.
+         *  - update counter in 'ctx' at the end.
+        *  - length is input as number of blocks, so don't handle tail bytes
+        *    (this is done in salsa20.c).
+        */
+       lsl r2,r2,#6
+       vpush {q4,q5,q6,q7}
+       mov r12,sp
+       sub sp,sp,#352
+       and sp,sp,#0xffffffe0
+       strd r4,[sp,#0]
+       strd r6,[sp,#8]
+       strd r8,[sp,#16]
+       strd r10,[sp,#24]
+       str r14,[sp,#224]
+       str r12,[sp,#228]
+       str r0,[sp,#232]
+       str r1,[sp,#236]
+       str r2,[sp,#240]
+       ldr r4,[r12,#64]
+       str r4,[sp,#244]
+       mov r2,r3
+       add r3,r2,#48
+       vld1.8 {q3},[r2]
+       add r0,r2,#32
+       add r14,r2,#40
+       vmov.i64 q3,#0xff
+       str r14,[sp,#160]
+       ldrd r8,[r2,#4]
+       vld1.8 {d0},[r0]
+       ldrd r4,[r2,#20]
+       vld1.8 {d8-d9},[r2]!
+       ldrd r6,[r0,#0]
+       vmov d4,d9
+       ldr r0,[r14]
+       vrev64.i32 d0,d0
+       ldr r1,[r14,#4]
+       vld1.8 {d10-d11},[r2]
+       strd r6,[sp,#32]
+       sub r2,r2,#16
+       strd r0,[sp,#40]
+       vmov d5,d11
+       strd r8,[sp,#48]
+       vext.32 d1,d0,d10,#1
+       strd r4,[sp,#56]
+       ldr r1,[r2,#0]
+       vshr.u32 q3,q3,#7
+       ldr r4,[r2,#12]
+       vext.32 d3,d11,d9,#1
+       ldr r11,[r2,#16]
+       vext.32 d2,d8,d0,#1
+       ldr r8,[r2,#28]
+       vext.32 d0,d10,d8,#1
+       ldr r0,[r3,#0]
+       add r2,r2,#44
+       vmov q4,q3
+       vld1.8 {d6-d7},[r14]
+       vadd.i64 q3,q3,q4
+       ldr r5,[r3,#4]
+       add r12,sp,#256
+       vst1.8 {d4-d5},[r12,: 128]
+       ldr r10,[r3,#8]
+       add r14,sp,#272
+       vst1.8 {d2-d3},[r14,: 128]
+       ldr r9,[r3,#12]
+       vld1.8 {d2-d3},[r3]
+       strd r0,[sp,#64]
+       ldr r0,[sp,#240]
+       strd r4,[sp,#72]
+       strd r10,[sp,#80]
+       strd r8,[sp,#88]
+       nop
+       cmp r0,#192
+       blo .L_mlenlowbelow192
+.L_mlenatleast192:
+       ldrd r2,[sp,#48]
+       vext.32 d7,d6,d6,#1
+       vmov q8,q1
+       ldrd r6,[sp,#32]
+       vld1.8 {d18-d19},[r12,: 128]
+       vmov q10,q0
+       str r0,[sp,#240]
+       vext.32 d4,d7,d19,#1
+       vmov q11,q8
+       vext.32 d10,d18,d7,#1
+       vadd.i64 q3,q3,q4
+       ldrd r0,[sp,#64]
+       vld1.8 {d24-d25},[r14,: 128]
+       vmov d5,d24
+       add r8,sp,#288
+       ldrd r4,[sp,#72]
+       vmov d11,d25
+       add r9,sp,#304
+       ldrd r10,[sp,#80]
+       vst1.8 {d4-d5},[r8,: 128]
+       strd r2,[sp,#96]
+       vext.32 d7,d6,d6,#1
+       vmov q13,q10
+       strd r6,[sp,#104]
+       vmov d13,d24
+       vst1.8 {d10-d11},[r9,: 128]
+       add r2,sp,#320
+       vext.32 d12,d7,d19,#1
+       vmov d15,d25
+       add r6,sp,#336
+       ldr r12,[sp,#244]
+       vext.32 d14,d18,d7,#1
+       vadd.i64 q3,q3,q4
+       ldrd r8,[sp,#88]
+       vst1.8 {d12-d13},[r2,: 128]
+       ldrd r2,[sp,#56]
+       vst1.8 {d14-d15},[r6,: 128]
+       ldrd r6,[sp,#40]
+.L_mainloop2:
+       str r12,[sp,#248]
+       vadd.i32 q4,q10,q8
+       vadd.i32 q9,q13,q11
+       add r12,r0,r2
+       add r14,r5,r1
+       vshl.i32 q12,q4,#7
+       vshl.i32 q14,q9,#7
+       vshr.u32 q4,q4,#25
+       vshr.u32 q9,q9,#25
+       eor r4,r4,r12,ROR #25
+       eor r7,r7,r14,ROR #25
+       add r12,r4,r0
+       add r14,r7,r5
+       veor q5,q5,q12
+       veor q7,q7,q14
+       veor q4,q5,q4
+       veor q5,q7,q9
+       eor r6,r6,r12,ROR #23
+       eor r3,r3,r14,ROR #23
+       add r12,r6,r4
+       str r7,[sp,#116]
+       add r7,r3,r7
+       ldr r14,[sp,#108]
+       vadd.i32 q7,q8,q4
+       vadd.i32 q9,q11,q5
+       vshl.i32 q12,q7,#9
+       vshl.i32 q14,q9,#9
+       vshr.u32 q7,q7,#23
+       vshr.u32 q9,q9,#23
+       veor q2,q2,q12
+       veor q6,q6,q14
+       veor q2,q2,q7
+       veor q6,q6,q9
+       eor r2,r2,r12,ROR #19
+       str r2,[sp,#120]
+       eor r1,r1,r7,ROR #19
+       ldr r7,[sp,#96]
+       add r2,r2,r6
+       str r6,[sp,#112]
+       add r6,r1,r3
+       ldr r12,[sp,#104]
+       vadd.i32 q7,q4,q2
+       vext.32 q4,q4,q4,#3
+       vadd.i32 q9,q5,q6
+       vshl.i32 q12,q7,#13
+       vext.32 q5,q5,q5,#3
+       vshl.i32 q14,q9,#13
+       eor r0,r0,r2,ROR #14
+       eor r2,r5,r6,ROR #14
+       str r3,[sp,#124]
+       add r3,r10,r12
+       ldr r5,[sp,#100]
+       add r6,r9,r11
+       vshr.u32 q7,q7,#19
+       vshr.u32 q9,q9,#19
+       veor q10,q10,q12
+       veor q12,q13,q14
+       eor r8,r8,r3,ROR #25
+       eor r3,r5,r6,ROR #25
+       add r5,r8,r10
+       add r6,r3,r9
+       veor q7,q10,q7
+       veor q9,q12,q9
+       eor r5,r7,r5,ROR #23
+       eor r6,r14,r6,ROR #23
+       add r7,r5,r8
+       add r14,r6,r3
+       vadd.i32 q10,q2,q7
+       vswp d4,d5
+       vadd.i32 q12,q6,q9
+       vshl.i32 q13,q10,#18
+       vswp d12,d13
+       vshl.i32 q14,q12,#18
+       eor r7,r12,r7,ROR #19
+       eor r11,r11,r14,ROR #19
+       add r12,r7,r5
+       add r14,r11,r6
+       vshr.u32 q10,q10,#14
+       vext.32 q7,q7,q7,#1
+       vshr.u32 q12,q12,#14
+       veor q8,q8,q13
+       vext.32 q9,q9,q9,#1
+       veor q11,q11,q14
+       eor r10,r10,r12,ROR #14
+       eor r9,r9,r14,ROR #14
+       add r12,r0,r3
+       add r14,r2,r4
+       veor q8,q8,q10
+       veor q10,q11,q12
+       eor r1,r1,r12,ROR #25
+       eor r7,r7,r14,ROR #25
+       add r12,r1,r0
+       add r14,r7,r2
+       vadd.i32 q11,q4,q8
+       vadd.i32 q12,q5,q10
+       vshl.i32 q13,q11,#7
+       vshl.i32 q14,q12,#7
+       eor r5,r5,r12,ROR #23
+       eor r6,r6,r14,ROR #23
+       vshr.u32 q11,q11,#25
+       vshr.u32 q12,q12,#25
+       add r12,r5,r1
+       add r14,r6,r7
+       veor q7,q7,q13
+       veor q9,q9,q14
+       veor q7,q7,q11
+       veor q9,q9,q12
+       vadd.i32 q11,q8,q7
+       vadd.i32 q12,q10,q9
+       vshl.i32 q13,q11,#9
+       vshl.i32 q14,q12,#9
+       eor r3,r3,r12,ROR #19
+       str r7,[sp,#104]
+       eor r4,r4,r14,ROR #19
+       ldr r7,[sp,#112]
+       add r12,r3,r5
+       str r6,[sp,#108]
+       add r6,r4,r6
+       ldr r14,[sp,#116]
+       eor r0,r0,r12,ROR #14
+       str r5,[sp,#96]
+       eor r5,r2,r6,ROR #14
+       ldr r2,[sp,#120]
+       vshr.u32 q11,q11,#23
+       vshr.u32 q12,q12,#23
+       veor q2,q2,q13
+       veor q6,q6,q14
+       veor q2,q2,q11
+       veor q6,q6,q12
+       add r6,r10,r14
+       add r12,r9,r8
+       vadd.i32 q11,q7,q2
+       vext.32 q7,q7,q7,#3
+       vadd.i32 q12,q9,q6
+       vshl.i32 q13,q11,#13
+       vext.32 q9,q9,q9,#3
+       vshl.i32 q14,q12,#13
+       vshr.u32 q11,q11,#19
+       vshr.u32 q12,q12,#19
+       eor r11,r11,r6,ROR #25
+       eor r2,r2,r12,ROR #25
+       add r6,r11,r10
+       str r3,[sp,#100]
+       add r3,r2,r9
+       ldr r12,[sp,#124]
+       veor q4,q4,q13
+       veor q5,q5,q14
+       veor q4,q4,q11
+       veor q5,q5,q12
+       eor r6,r7,r6,ROR #23
+       eor r3,r12,r3,ROR #23
+       add r7,r6,r11
+       add r12,r3,r2
+       vadd.i32 q11,q2,q4
+       vswp d4,d5
+       vadd.i32 q12,q6,q5
+       vshl.i32 q13,q11,#18
+       vswp d12,d13
+       vshl.i32 q14,q12,#18
+       eor r7,r14,r7,ROR #19
+       eor r8,r8,r12,ROR #19
+       add r12,r7,r6
+       add r14,r8,r3
+       vshr.u32 q11,q11,#14
+       vext.32 q4,q4,q4,#1
+       vshr.u32 q12,q12,#14
+       veor q8,q8,q13
+       vext.32 q5,q5,q5,#1
+       veor q10,q10,q14
+       eor r10,r10,r12,ROR #14
+       veor q8,q8,q11
+       eor r9,r9,r14,ROR #14
+       veor q10,q10,q12
+       vadd.i32 q11,q7,q8
+       vadd.i32 q12,q9,q10
+       add r12,r0,r2
+       add r14,r5,r1
+       vshl.i32 q13,q11,#7
+       vshl.i32 q14,q12,#7
+       vshr.u32 q11,q11,#25
+       vshr.u32 q12,q12,#25
+       eor r4,r4,r12,ROR #25
+       eor r7,r7,r14,ROR #25
+       add r12,r4,r0
+       add r14,r7,r5
+       veor q4,q4,q13
+       veor q5,q5,q14
+       veor q4,q4,q11
+       veor q5,q5,q12
+       eor r6,r6,r12,ROR #23
+       eor r3,r3,r14,ROR #23
+       add r12,r6,r4
+       str r7,[sp,#116]
+       add r7,r3,r7
+       ldr r14,[sp,#108]
+       vadd.i32 q11,q8,q4
+       vadd.i32 q12,q10,q5
+       vshl.i32 q13,q11,#9
+       vshl.i32 q14,q12,#9
+       vshr.u32 q11,q11,#23
+       vshr.u32 q12,q12,#23
+       veor q2,q2,q13
+       veor q6,q6,q14
+       veor q2,q2,q11
+       veor q6,q6,q12
+       eor r2,r2,r12,ROR #19
+       str r2,[sp,#120]
+       eor r1,r1,r7,ROR #19
+       ldr r7,[sp,#96]
+       add r2,r2,r6
+       str r6,[sp,#112]
+       add r6,r1,r3
+       ldr r12,[sp,#104]
+       vadd.i32 q11,q4,q2
+       vext.32 q4,q4,q4,#3
+       vadd.i32 q12,q5,q6
+       vshl.i32 q13,q11,#13
+       vext.32 q5,q5,q5,#3
+       vshl.i32 q14,q12,#13
+       eor r0,r0,r2,ROR #14
+       eor r2,r5,r6,ROR #14
+       str r3,[sp,#124]
+       add r3,r10,r12
+       ldr r5,[sp,#100]
+       add r6,r9,r11
+       vshr.u32 q11,q11,#19
+       vshr.u32 q12,q12,#19
+       veor q7,q7,q13
+       veor q9,q9,q14
+       eor r8,r8,r3,ROR #25
+       eor r3,r5,r6,ROR #25
+       add r5,r8,r10
+       add r6,r3,r9
+       veor q7,q7,q11
+       veor q9,q9,q12
+       eor r5,r7,r5,ROR #23
+       eor r6,r14,r6,ROR #23
+       add r7,r5,r8
+       add r14,r6,r3
+       vadd.i32 q11,q2,q7
+       vswp d4,d5
+       vadd.i32 q12,q6,q9
+       vshl.i32 q13,q11,#18
+       vswp d12,d13
+       vshl.i32 q14,q12,#18
+       eor r7,r12,r7,ROR #19
+       eor r11,r11,r14,ROR #19
+       add r12,r7,r5
+       add r14,r11,r6
+       vshr.u32 q11,q11,#14
+       vext.32 q7,q7,q7,#1
+       vshr.u32 q12,q12,#14
+       veor q8,q8,q13
+       vext.32 q9,q9,q9,#1
+       veor q10,q10,q14
+       eor r10,r10,r12,ROR #14
+       eor r9,r9,r14,ROR #14
+       add r12,r0,r3
+       add r14,r2,r4
+       veor q8,q8,q11
+       veor q11,q10,q12
+       eor r1,r1,r12,ROR #25
+       eor r7,r7,r14,ROR #25
+       add r12,r1,r0
+       add r14,r7,r2
+       vadd.i32 q10,q4,q8
+       vadd.i32 q12,q5,q11
+       vshl.i32 q13,q10,#7
+       vshl.i32 q14,q12,#7
+       eor r5,r5,r12,ROR #23
+       eor r6,r6,r14,ROR #23
+       vshr.u32 q10,q10,#25
+       vshr.u32 q12,q12,#25
+       add r12,r5,r1
+       add r14,r6,r7
+       veor q7,q7,q13
+       veor q9,q9,q14
+       veor q7,q7,q10
+       veor q9,q9,q12
+       vadd.i32 q10,q8,q7
+       vadd.i32 q12,q11,q9
+       vshl.i32 q13,q10,#9
+       vshl.i32 q14,q12,#9
+       eor r3,r3,r12,ROR #19
+       str r7,[sp,#104]
+       eor r4,r4,r14,ROR #19
+       ldr r7,[sp,#112]
+       add r12,r3,r5
+       str r6,[sp,#108]
+       add r6,r4,r6
+       ldr r14,[sp,#116]
+       eor r0,r0,r12,ROR #14
+       str r5,[sp,#96]
+       eor r5,r2,r6,ROR #14
+       ldr r2,[sp,#120]
+       vshr.u32 q10,q10,#23
+       vshr.u32 q12,q12,#23
+       veor q2,q2,q13
+       veor q6,q6,q14
+       veor q2,q2,q10
+       veor q6,q6,q12
+       add r6,r10,r14
+       add r12,r9,r8
+       vadd.i32 q12,q7,q2
+       vext.32 q10,q7,q7,#3
+       vadd.i32 q7,q9,q6
+       vshl.i32 q14,q12,#13
+       vext.32 q13,q9,q9,#3
+       vshl.i32 q9,q7,#13
+       vshr.u32 q12,q12,#19
+       vshr.u32 q7,q7,#19
+       eor r11,r11,r6,ROR #25
+       eor r2,r2,r12,ROR #25
+       add r6,r11,r10
+       str r3,[sp,#100]
+       add r3,r2,r9
+       ldr r12,[sp,#124]
+       veor q4,q4,q14
+       veor q5,q5,q9
+       veor q4,q4,q12
+       veor q7,q5,q7
+       eor r6,r7,r6,ROR #23
+       eor r3,r12,r3,ROR #23
+       add r7,r6,r11
+       add r12,r3,r2
+       vadd.i32 q5,q2,q4
+       vswp d4,d5
+       vadd.i32 q9,q6,q7
+       vshl.i32 q12,q5,#18
+       vswp d12,d13
+       vshl.i32 q14,q9,#18
+       eor r7,r14,r7,ROR #19
+       eor r8,r8,r12,ROR #19
+       add r12,r7,r6
+       add r14,r8,r3
+       vshr.u32 q15,q5,#14
+       vext.32 q5,q4,q4,#1
+       vshr.u32 q4,q9,#14
+       veor q8,q8,q12
+       vext.32 q7,q7,q7,#1
+       veor q9,q11,q14
+       eor r10,r10,r12,ROR #14
+       ldr r12,[sp,#248]
+       veor q8,q8,q15
+       eor r9,r9,r14,ROR #14
+       veor q11,q9,q4
+       subs r12,r12,#4
+       bhi .L_mainloop2
+       strd r8,[sp,#112]
+       ldrd r8,[sp,#64]
+       strd r2,[sp,#120]
+       ldrd r2,[sp,#96]
+       add r0,r0,r8
+       strd r10,[sp,#96]
+       add r1,r1,r9
+       ldrd r10,[sp,#48]
+       ldrd r8,[sp,#72]
+       add r2,r2,r10
+       strd r6,[sp,#128]
+       add r3,r3,r11
+       ldrd r6,[sp,#104]
+       ldrd r10,[sp,#32]
+       ldr r12,[sp,#236]
+       add r4,r4,r8
+       add r5,r5,r9
+       add r6,r6,r10
+       add r7,r7,r11
+       cmp r12,#0
+       beq .L_nomessage1
+       ldr r8,[r12,#0]
+       ldr r9,[r12,#4]
+       ldr r10,[r12,#8]
+       ldr r11,[r12,#12]
+       eor r0,r0,r8
+       ldr r8,[r12,#16]
+       eor r1,r1,r9
+       ldr r9,[r12,#20]
+       eor r2,r2,r10
+       ldr r10,[r12,#24]
+       eor r3,r3,r11
+       ldr r11,[r12,#28]
+       eor r4,r4,r8
+       eor r5,r5,r9
+       eor r6,r6,r10
+       eor r7,r7,r11
+.L_nomessage1:
+       ldr r14,[sp,#232]
+       vadd.i32 q4,q8,q1
+       str r0,[r14,#0]
+       add r0,sp,#304
+       str r1,[r14,#4]
+       vld1.8 {d16-d17},[r0,: 128]
+       str r2,[r14,#8]
+       vadd.i32 q5,q8,q5
+       str r3,[r14,#12]
+       add r0,sp,#288
+       str r4,[r14,#16]
+       vld1.8 {d16-d17},[r0,: 128]
+       str r5,[r14,#20]
+       vadd.i32 q9,q10,q0
+       str r6,[r14,#24]
+       vadd.i32 q2,q8,q2
+       str r7,[r14,#28]
+       vmov.i64 q8,#0xffffffff
+       ldrd r6,[sp,#128]
+       vext.32 d20,d8,d10,#1
+       ldrd r0,[sp,#40]
+       vext.32 d25,d9,d11,#1
+       ldrd r2,[sp,#120]
+       vbif q4,q9,q8
+       ldrd r4,[sp,#56]
+       vext.32 d21,d5,d19,#1
+       add r6,r6,r0
+       vext.32 d24,d4,d18,#1
+       add r7,r7,r1
+       vbif q2,q5,q8
+       add r2,r2,r4
+       vrev64.i32 q5,q10
+       add r3,r3,r5
+       vrev64.i32 q9,q12
+       adds r0,r0,#3
+       vswp d5,d9
+       adc r1,r1,#0
+       strd r0,[sp,#40]
+       ldrd r8,[sp,#112]
+       ldrd r0,[sp,#88]
+       ldrd r10,[sp,#96]
+       ldrd r4,[sp,#80]
+       add r0,r8,r0
+       add r1,r9,r1
+       add r4,r10,r4
+       add r5,r11,r5
+       add r8,r14,#64
+       cmp r12,#0
+       beq .L_nomessage2
+       ldr r9,[r12,#32]
+       ldr r10,[r12,#36]
+       ldr r11,[r12,#40]
+       ldr r14,[r12,#44]
+       eor r6,r6,r9
+       ldr r9,[r12,#48]
+       eor r7,r7,r10
+       ldr r10,[r12,#52]
+       eor r4,r4,r11
+       ldr r11,[r12,#56]
+       eor r5,r5,r14
+       ldr r14,[r12,#60]
+       add r12,r12,#64
+       eor r2,r2,r9
+       vld1.8 {d20-d21},[r12]!
+       veor q4,q4,q10
+       eor r3,r3,r10
+       vld1.8 {d20-d21},[r12]!
+       veor q5,q5,q10
+       eor r0,r0,r11
+       vld1.8 {d20-d21},[r12]!
+       veor q2,q2,q10
+       eor r1,r1,r14
+       vld1.8 {d20-d21},[r12]!
+       veor q9,q9,q10
+.L_nomessage2:
+       vst1.8 {d8-d9},[r8]!
+       vst1.8 {d10-d11},[r8]!
+       vmov.i64 q4,#0xff
+       vst1.8 {d4-d5},[r8]!
+       vst1.8 {d18-d19},[r8]!
+       str r6,[r8,#-96]
+       add r6,sp,#336
+       str r7,[r8,#-92]
+       add r7,sp,#320
+       str r4,[r8,#-88]
+       vadd.i32 q2,q11,q1
+       vld1.8 {d10-d11},[r6,: 128]
+       vadd.i32 q5,q5,q7
+       vld1.8 {d14-d15},[r7,: 128]
+       vadd.i32 q9,q13,q0
+       vadd.i32 q6,q7,q6
+       str r5,[r8,#-84]
+       vext.32 d14,d4,d10,#1
+       str r2,[r8,#-80]
+       vext.32 d21,d5,d11,#1
+       str r3,[r8,#-76]
+       vbif q2,q9,q8
+       str r0,[r8,#-72]
+       vext.32 d15,d13,d19,#1
+       vshr.u32 q4,q4,#7
+       str r1,[r8,#-68]
+       vext.32 d20,d12,d18,#1
+       vbif q6,q5,q8
+       ldr r0,[sp,#240]
+       vrev64.i32 q5,q7
+       vrev64.i32 q7,q10
+       vswp d13,d5
+       vadd.i64 q3,q3,q4
+       sub r0,r0,#192
+       cmp r12,#0
+       beq .L_nomessage21
+       vld1.8 {d16-d17},[r12]!
+       veor q2,q2,q8
+       vld1.8 {d16-d17},[r12]!
+       veor q5,q5,q8
+       vld1.8 {d16-d17},[r12]!
+       veor q6,q6,q8
+       vld1.8 {d16-d17},[r12]!
+       veor q7,q7,q8
+.L_nomessage21:
+       vst1.8 {d4-d5},[r8]!
+       vst1.8 {d10-d11},[r8]!
+       vst1.8 {d12-d13},[r8]!
+       vst1.8 {d14-d15},[r8]!
+       str r12,[sp,#236]
+       add r14,sp,#272
+       add r12,sp,#256
+       str r8,[sp,#232]
+       cmp r0,#192
+       bhs .L_mlenatleast192
+.L_mlenlowbelow192:
+       cmp r0,#0
+       beq .L_done
+       b .L_mlenatleast1
+.L_nextblock:
+       sub r0,r0,#64
+.L_mlenatleast1:
+.L_handleblock:
+       str r0,[sp,#248]
+       ldrd r2,[sp,#48]
+       ldrd r6,[sp,#32]
+       ldrd r0,[sp,#64]
+       ldrd r4,[sp,#72]
+       ldrd r10,[sp,#80]
+       ldrd r8,[sp,#88]
+       strd r2,[sp,#96]
+       strd r6,[sp,#104]
+       ldrd r2,[sp,#56]
+       ldrd r6,[sp,#40]
+       ldr r12,[sp,#244]
+.L_mainloop1:
+       str r12,[sp,#252]
+       add r12,r0,r2
+       add r14,r5,r1
+       eor r4,r4,r12,ROR #25
+       eor r7,r7,r14,ROR #25
+       add r12,r4,r0
+       add r14,r7,r5
+       eor r6,r6,r12,ROR #23
+       eor r3,r3,r14,ROR #23
+       add r12,r6,r4
+       str r7,[sp,#132]
+       add r7,r3,r7
+       ldr r14,[sp,#104]
+       eor r2,r2,r12,ROR #19
+       str r6,[sp,#128]
+       eor r1,r1,r7,ROR #19
+       ldr r7,[sp,#100]
+       add r6,r2,r6
+       str r2,[sp,#120]
+       add r2,r1,r3
+       ldr r12,[sp,#96]
+       eor r0,r0,r6,ROR #14
+       str r3,[sp,#124]
+       eor r2,r5,r2,ROR #14
+       ldr r3,[sp,#108]
+       add r5,r10,r14
+       add r6,r9,r11
+       eor r8,r8,r5,ROR #25
+       eor r5,r7,r6,ROR #25
+       add r6,r8,r10
+       add r7,r5,r9
+       eor r6,r12,r6,ROR #23
+       eor r3,r3,r7,ROR #23
+       add r7,r6,r8
+       add r12,r3,r5
+       eor r7,r14,r7,ROR #19
+       eor r11,r11,r12,ROR #19
+       add r12,r7,r6
+       add r14,r11,r3
+       eor r10,r10,r12,ROR #14
+       eor r9,r9,r14,ROR #14
+       add r12,r0,r5
+       add r14,r2,r4
+       eor r1,r1,r12,ROR #25
+       eor r7,r7,r14,ROR #25
+       add r12,r1,r0
+       add r14,r7,r2
+       eor r6,r6,r12,ROR #23
+       eor r3,r3,r14,ROR #23
+       add r12,r6,r1
+       str r7,[sp,#104]
+       add r7,r3,r7
+       ldr r14,[sp,#128]
+       eor r5,r5,r12,ROR #19
+       str r3,[sp,#108]
+       eor r4,r4,r7,ROR #19
+       ldr r7,[sp,#132]
+       add r12,r5,r6
+       str r6,[sp,#96]
+       add r3,r4,r3
+       ldr r6,[sp,#120]
+       eor r0,r0,r12,ROR #14
+       str r5,[sp,#100]
+       eor r5,r2,r3,ROR #14
+       ldr r3,[sp,#124]
+       add r2,r10,r7
+       add r12,r9,r8
+       eor r11,r11,r2,ROR #25
+       eor r2,r6,r12,ROR #25
+       add r6,r11,r10
+       add r12,r2,r9
+       eor r6,r14,r6,ROR #23
+       eor r3,r3,r12,ROR #23
+       add r12,r6,r11
+       add r14,r3,r2
+       eor r7,r7,r12,ROR #19
+       eor r8,r8,r14,ROR #19
+       add r12,r7,r6
+       add r14,r8,r3
+       eor r10,r10,r12,ROR #14
+       eor r9,r9,r14,ROR #14
+       ldr r12,[sp,#252]
+       subs r12,r12,#2
+       bhi .L_mainloop1
+       strd r6,[sp,#128]
+       strd r2,[sp,#120]
+       strd r10,[sp,#112]
+       strd r8,[sp,#136]
+       ldrd r2,[sp,#96]
+       ldrd r6,[sp,#104]
+       ldrd r8,[sp,#64]
+       ldrd r10,[sp,#48]
+       add r0,r0,r8
+       add r1,r1,r9
+       add r2,r2,r10
+       add r3,r3,r11
+       ldrd r8,[sp,#72]
+       ldrd r10,[sp,#32]
+       add r4,r4,r8
+       add r5,r5,r9
+       add r6,r6,r10
+       add r7,r7,r11
+       ldr r12,[sp,#236]
+       cmp r12,#0
+       beq .L_nomessage10
+       ldr r8,[r12,#0]
+       ldr r9,[r12,#4]
+       ldr r10,[r12,#8]
+       ldr r11,[r12,#12]
+       eor r0,r0,r8
+       ldr r8,[r12,#16]
+       eor r1,r1,r9
+       ldr r9,[r12,#20]
+       eor r2,r2,r10
+       ldr r10,[r12,#24]
+       eor r3,r3,r11
+       ldr r11,[r12,#28]
+       eor r4,r4,r8
+       eor r5,r5,r9
+       eor r6,r6,r10
+       eor r7,r7,r11
+.L_nomessage10:
+       ldr r14,[sp,#232]
+       str r0,[r14,#0]
+       str r1,[r14,#4]
+       str r2,[r14,#8]
+       str r3,[r14,#12]
+       str r4,[r14,#16]
+       str r5,[r14,#20]
+       str r6,[r14,#24]
+       str r7,[r14,#28]
+       ldrd r6,[sp,#128]
+       ldrd r10,[sp,#112]
+       ldrd r0,[sp,#40]
+       ldrd r4,[sp,#80]
+       add r6,r6,r0
+       add r7,r7,r1
+       add r10,r10,r4
+       add r11,r11,r5
+       adds r0,r0,#1
+       adc r1,r1,#0
+       strd r0,[sp,#40]
+       ldrd r2,[sp,#120]
+       ldrd r8,[sp,#136]
+       ldrd r4,[sp,#56]
+       ldrd r0,[sp,#88]
+       add r2,r2,r4
+       add r3,r3,r5
+       add r0,r8,r0
+       add r1,r9,r1
+       cmp r12,#0
+       beq .L_nomessage11
+       ldr r4,[r12,#32]
+       ldr r5,[r12,#36]
+       ldr r8,[r12,#40]
+       ldr r9,[r12,#44]
+       eor r6,r6,r4
+       ldr r4,[r12,#48]
+       eor r7,r7,r5
+       ldr r5,[r12,#52]
+       eor r10,r10,r8
+       ldr r8,[r12,#56]
+       eor r11,r11,r9
+       ldr r9,[r12,#60]
+       eor r2,r2,r4
+       eor r3,r3,r5
+       eor r0,r0,r8
+       eor r1,r1,r9
+       add r4,r12,#64
+       str r4,[sp,#236]
+.L_nomessage11:
+       str r6,[r14,#32]
+       str r7,[r14,#36]
+       str r10,[r14,#40]
+       str r11,[r14,#44]
+       str r2,[r14,#48]
+       str r3,[r14,#52]
+       str r0,[r14,#56]
+       str r1,[r14,#60]
+       add r0,r14,#64
+       str r0,[sp,#232]
+       ldr r0,[sp,#248]
+       cmp r0,#64
+       bhi .L_nextblock
+.L_done:
+       ldr r2,[sp,#160]
+       ldrd r4,[sp,#0]
+       ldrd r6,[sp,#8]
+       ldrd r8,[sp,#16]
+       ldrd r10,[sp,#24]
+       ldr r12,[sp,#228]
+       ldr r14,[sp,#224]
+       ldrd r0,[sp,#40]
+       strd r0,[r2]
+       sub r0,r12,sp
+       mov sp,r12
+       vpop {q4,q5,q6,q7}
+       add r0,r0,#64
+       bx lr
+.size _gcry_arm_neon_salsa20_encrypt,.-_gcry_arm_neon_salsa20_encrypt;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/salsa20.c 
b/grub-core/lib/libgcrypt/cipher/salsa20.c
new file mode 100644
index 000000000..d8c5c81f3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/salsa20.c
@@ -0,0 +1,600 @@
+/* salsa20.c  -  Bernstein's Salsa20 cipher
+ * Copyright (C) 2012 Simon Josefsson, Niels Möller
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * For a description of the algorithm, see:
+ *   http://cr.yp.to/snuffle/spec.pdf
+ *   http://cr.yp.to/snuffle/design.pdf
+ */
+
+/* The code is based on the code in Nettle
+   (git commit id 9d2d8ddaee35b91a4e1a32ae77cba04bea3480e7)
+   which in turn is based on
+   salsa20-ref.c version 20051118
+   D. J. Bernstein
+   Public domain.
+*/
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "types.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+
+
+/* USE_AMD64 indicates whether to compile with AMD64 code. */
+#undef USE_AMD64
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64 1
+#endif
+
+/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
+#undef USE_ARM_NEON_ASM
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_NEON)
+#  define USE_ARM_NEON_ASM 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+
+#define SALSA20_MIN_KEY_SIZE 16  /* Bytes.  */
+#define SALSA20_MAX_KEY_SIZE 32  /* Bytes.  */
+#define SALSA20_BLOCK_SIZE   64  /* Bytes.  */
+#define SALSA20_IV_SIZE       8  /* Bytes.  */
+#define SALSA20_INPUT_LENGTH 16  /* Bytes.  */
+
+/* Number of rounds.  The standard uses 20 rounds.  In any case the
+   number of rounds must be even.  */
+#define SALSA20_ROUNDS       20
+#define SALSA20R12_ROUNDS    12
+
+
+struct SALSA20_context_s;
+
+typedef unsigned int (*salsa20_core_t) (u32 *dst, struct SALSA20_context_s 
*ctx,
+                                        unsigned int rounds);
+typedef void (* salsa20_keysetup_t)(struct SALSA20_context_s *ctx,
+                                    const byte *key, int keylen);
+typedef void (* salsa20_ivsetup_t)(struct SALSA20_context_s *ctx,
+                                   const byte *iv);
+
+typedef struct SALSA20_context_s
+{
+  /* Indices 1-4 and 11-14 holds the key (two identical copies for the
+     shorter key size), indices 0, 5, 10, 15 are constant, indices 6, 7
+     are the IV, and indices 8, 9 are the block counter:
+
+     C K K K
+     K C I I
+     B B C K
+     K K K C
+  */
+  u32 input[SALSA20_INPUT_LENGTH];
+  u32 pad[SALSA20_INPUT_LENGTH];
+  unsigned int unused; /* bytes in the pad.  */
+#ifdef USE_ARM_NEON_ASM
+  int use_neon;
+#endif
+  salsa20_keysetup_t keysetup;
+  salsa20_ivsetup_t ivsetup;
+  salsa20_core_t core;
+} SALSA20_context_t;
+
+
+/* The masking of the right shift is needed to allow n == 0 (using
+   just 32 - n and 64 - n results in undefined behaviour). Most uses
+   of these macros use a constant and non-zero rotation count. */
+#define ROTL32(n,x) (((x)<<(n)) | ((x)>>((-(n)&31))))
+
+
+#define LE_SWAP32(v) le_bswap32(v)
+
+#define LE_READ_UINT32(p) buf_get_le32(p)
+
+
+static void salsa20_setiv (void *context, const byte *iv, size_t ivlen);
+static const char *selftest (void);
+
+
+#ifdef USE_AMD64
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+/* AMD64 assembly implementations of Salsa20. */
+void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits)
+                                 ASM_FUNC_ABI;
+void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv)
+                                ASM_FUNC_ABI;
+unsigned int
+_gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst,
+                                   size_t len, int rounds) ASM_FUNC_ABI;
+
+static void
+salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
+{
+  _gcry_salsa20_amd64_keysetup(ctx->input, key, keylen * 8);
+}
+
+static void
+salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv)
+{
+  _gcry_salsa20_amd64_ivsetup(ctx->input, iv);
+}
+
+static unsigned int
+salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
+{
+  memset(dst, 0, SALSA20_BLOCK_SIZE);
+  return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds)
+         + ASM_EXTRA_STACK;
+}
+
+#else /* USE_AMD64 */
+
+
+
+#if 0
+# define SALSA20_CORE_DEBUG(i) do {            \
+    unsigned debug_j;                          \
+    for (debug_j = 0; debug_j < 16; debug_j++) \
+      {                                                \
+       if (debug_j == 0)                       \
+         fprintf(stderr, "%2d:", (i));         \
+       else if (debug_j % 4 == 0)              \
+         fprintf(stderr, "\n   ");             \
+       fprintf(stderr, " %8x", pad[debug_j]);  \
+      }                                                \
+    fprintf(stderr, "\n");                     \
+  } while (0)
+#else
+# define SALSA20_CORE_DEBUG(i)
+#endif
+
+#define QROUND(x0, x1, x2, x3)      \
+  do {                              \
+    x1 ^= ROTL32 ( 7, x0 + x3);            \
+    x2 ^= ROTL32 ( 9, x1 + x0);            \
+    x3 ^= ROTL32 (13, x2 + x1);            \
+    x0 ^= ROTL32 (18, x3 + x2);            \
+  } while(0)
+
+static unsigned int
+salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned rounds)
+{
+  u32 pad[SALSA20_INPUT_LENGTH], *src = ctx->input;
+  unsigned int i;
+
+  memcpy (pad, src, sizeof(pad));
+  for (i = 0; i < rounds; i += 2)
+    {
+      SALSA20_CORE_DEBUG (i);
+      QROUND (pad[0],  pad[4],  pad[8],  pad[12]);
+      QROUND (pad[5],  pad[9],  pad[13], pad[1] );
+      QROUND (pad[10], pad[14], pad[2],  pad[6] );
+      QROUND (pad[15], pad[3],  pad[7],  pad[11]);
+
+      SALSA20_CORE_DEBUG (i+1);
+      QROUND (pad[0],  pad[1],  pad[2],  pad[3] );
+      QROUND (pad[5],  pad[6],  pad[7],  pad[4] );
+      QROUND (pad[10], pad[11], pad[8],  pad[9] );
+      QROUND (pad[15], pad[12], pad[13], pad[14]);
+    }
+  SALSA20_CORE_DEBUG (i);
+
+  for (i = 0; i < SALSA20_INPUT_LENGTH; i++)
+    {
+      u32 t = pad[i] + src[i];
+      dst[i] = LE_SWAP32 (t);
+    }
+
+  /* Update counter. */
+  if (!++src[8])
+    src[9]++;
+
+  /* burn_stack */
+  return ( 3*sizeof (void*) \
+         + 2*sizeof (void*) \
+         + 64 \
+         + sizeof (unsigned int) \
+         + sizeof (u32) );
+}
+#undef QROUND
+#undef SALSA20_CORE_DEBUG
+
+static void
+salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen)
+{
+  /* These constants are the little endian encoding of the string
+     "expand 32-byte k".  For the 128 bit variant, the "32" in that
+     string will be fixed up to "16".  */
+  ctx->input[0]  = 0x61707865; /* "apxe"  */
+  ctx->input[5]  = 0x3320646e; /* "3 dn"  */
+  ctx->input[10] = 0x79622d32; /* "yb-2"  */
+  ctx->input[15] = 0x6b206574; /* "k et"  */
+
+  ctx->input[1] = LE_READ_UINT32(key + 0);
+  ctx->input[2] = LE_READ_UINT32(key + 4);
+  ctx->input[3] = LE_READ_UINT32(key + 8);
+  ctx->input[4] = LE_READ_UINT32(key + 12);
+  if (keylen == SALSA20_MAX_KEY_SIZE) /* 256 bits */
+    {
+      ctx->input[11] = LE_READ_UINT32(key + 16);
+      ctx->input[12] = LE_READ_UINT32(key + 20);
+      ctx->input[13] = LE_READ_UINT32(key + 24);
+      ctx->input[14] = LE_READ_UINT32(key + 28);
+    }
+  else  /* 128 bits */
+    {
+      ctx->input[11] = ctx->input[1];
+      ctx->input[12] = ctx->input[2];
+      ctx->input[13] = ctx->input[3];
+      ctx->input[14] = ctx->input[4];
+
+      ctx->input[5]  -= 0x02000000; /* Change to "1 dn".  */
+      ctx->input[10] += 0x00000004; /* Change to "yb-6".  */
+    }
+}
+
+static void salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv)
+{
+  ctx->input[6] = LE_READ_UINT32(iv + 0);
+  ctx->input[7] = LE_READ_UINT32(iv + 4);
+  /* Reset the block counter.  */
+  ctx->input[8] = 0;
+  ctx->input[9] = 0;
+}
+
+#endif /*!USE_AMD64*/
+
+#ifdef USE_ARM_NEON_ASM
+
+/* ARM NEON implementation of Salsa20. */
+unsigned int
+_gcry_arm_neon_salsa20_encrypt(void *c, const void *m, unsigned int nblks,
+                               void *k, unsigned int rounds);
+
+static unsigned int
+salsa20_core_neon (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds)
+{
+  return _gcry_arm_neon_salsa20_encrypt(dst, NULL, 1, ctx->input, rounds);
+}
+
+static void salsa20_ivsetup_neon(SALSA20_context_t *ctx, const byte *iv)
+{
+  memcpy(ctx->input + 8, iv, 8);
+  /* Reset the block counter.  */
+  memset(ctx->input + 10, 0, 8);
+}
+
+static void
+salsa20_keysetup_neon(SALSA20_context_t *ctx, const byte *key, int klen)
+{
+  static const unsigned char sigma32[16] = "expand 32-byte k";
+  static const unsigned char sigma16[16] = "expand 16-byte k";
+
+  if (klen == 16)
+    {
+      memcpy (ctx->input, key, 16);
+      memcpy (ctx->input + 4, key, 16); /* Duplicate 128-bit key. */
+      memcpy (ctx->input + 12, sigma16, 16);
+    }
+  else
+    {
+      /* 32-byte key */
+      memcpy (ctx->input, key, 32);
+      memcpy (ctx->input + 12, sigma32, 16);
+    }
+}
+
+#endif /*USE_ARM_NEON_ASM*/
+
+
+static gcry_err_code_t
+salsa20_do_setkey (SALSA20_context_t *ctx,
+                   const byte *key, unsigned int keylen)
+{
+  static int initialized;
+  static const char *selftest_failed;
+
+  if (!initialized )
+    {
+      initialized = 1;
+      selftest_failed = selftest ();
+      if (selftest_failed)
+        log_error ("SALSA20 selftest failed (%s)\n", selftest_failed );
+    }
+  if (selftest_failed)
+    return GPG_ERR_SELFTEST_FAILED;
+
+  if (keylen != SALSA20_MIN_KEY_SIZE
+      && keylen != SALSA20_MAX_KEY_SIZE)
+    return GPG_ERR_INV_KEYLEN;
+
+  /* Default ops. */
+  ctx->keysetup = salsa20_keysetup;
+  ctx->ivsetup = salsa20_ivsetup;
+  ctx->core = salsa20_core;
+
+#ifdef USE_ARM_NEON_ASM
+  ctx->use_neon = (_gcry_get_hw_features () & HWF_ARM_NEON) != 0;
+  if (ctx->use_neon)
+    {
+      /* Use ARM NEON ops instead. */
+      ctx->keysetup = salsa20_keysetup_neon;
+      ctx->ivsetup = salsa20_ivsetup_neon;
+      ctx->core = salsa20_core_neon;
+    }
+#endif
+
+  ctx->keysetup (ctx, key, keylen);
+
+  /* We default to a zero nonce.  */
+  salsa20_setiv (ctx, NULL, 0);
+
+  return 0;
+}
+
+
+static gcry_err_code_t
+salsa20_setkey (void *context, const byte *key, unsigned int keylen,
+                cipher_bulk_ops_t *bulk_ops)
+{
+  SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+  gcry_err_code_t rc = salsa20_do_setkey (ctx, key, keylen);
+  (void)bulk_ops;
+  _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *));
+  return rc;
+}
+
+
+static void
+salsa20_setiv (void *context, const byte *iv, size_t ivlen)
+{
+  SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+  byte tmp[SALSA20_IV_SIZE];
+
+  if (iv && ivlen != SALSA20_IV_SIZE)
+    log_info ("WARNING: salsa20_setiv: bad ivlen=%u\n", (u32)ivlen);
+
+  if (!iv || ivlen != SALSA20_IV_SIZE)
+    memset (tmp, 0, sizeof(tmp));
+  else
+    memcpy (tmp, iv, SALSA20_IV_SIZE);
+
+  ctx->ivsetup (ctx, tmp);
+
+  /* Reset the unused pad bytes counter.  */
+  ctx->unused = 0;
+
+  wipememory (tmp, sizeof(tmp));
+}
+
+
+
+/* Note: This function requires LENGTH > 0.  */
+static void
+salsa20_do_encrypt_stream (SALSA20_context_t *ctx,
+                           byte *outbuf, const byte *inbuf,
+                           size_t length, unsigned rounds)
+{
+  unsigned int nburn, burn = 0;
+
+  if (ctx->unused)
+    {
+      unsigned char *p = (void*)ctx->pad;
+      size_t n;
+
+      gcry_assert (ctx->unused < SALSA20_BLOCK_SIZE);
+
+      n = ctx->unused;
+      if (n > length)
+        n = length;
+      buf_xor (outbuf, inbuf, p + SALSA20_BLOCK_SIZE - ctx->unused, n);
+      length -= n;
+      outbuf += n;
+      inbuf  += n;
+      ctx->unused -= n;
+      if (!length)
+        return;
+      gcry_assert (!ctx->unused);
+    }
+
+#ifdef USE_AMD64
+  if (length >= SALSA20_BLOCK_SIZE)
+    {
+      size_t nblocks = length / SALSA20_BLOCK_SIZE;
+      burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf,
+                                                nblocks, rounds);
+      burn += ASM_EXTRA_STACK;
+      length -= SALSA20_BLOCK_SIZE * nblocks;
+      outbuf += SALSA20_BLOCK_SIZE * nblocks;
+      inbuf  += SALSA20_BLOCK_SIZE * nblocks;
+    }
+#endif
+
+#ifdef USE_ARM_NEON_ASM
+  if (ctx->use_neon && length >= SALSA20_BLOCK_SIZE)
+    {
+      unsigned int nblocks = length / SALSA20_BLOCK_SIZE;
+      _gcry_arm_neon_salsa20_encrypt (outbuf, inbuf, nblocks, ctx->input,
+                                      rounds);
+      length -= SALSA20_BLOCK_SIZE * nblocks;
+      outbuf += SALSA20_BLOCK_SIZE * nblocks;
+      inbuf  += SALSA20_BLOCK_SIZE * nblocks;
+    }
+#endif
+
+  while (length > 0)
+    {
+      /* Create the next pad and bump the block counter.  Note that it
+         is the user's duty to change to another nonce not later than
+         after 2^70 processed bytes.  */
+      nburn = ctx->core (ctx->pad, ctx, rounds);
+      burn = nburn > burn ? nburn : burn;
+
+      if (length <= SALSA20_BLOCK_SIZE)
+       {
+         buf_xor (outbuf, inbuf, ctx->pad, length);
+          ctx->unused = SALSA20_BLOCK_SIZE - length;
+         break;
+       }
+      buf_xor (outbuf, inbuf, ctx->pad, SALSA20_BLOCK_SIZE);
+      length -= SALSA20_BLOCK_SIZE;
+      outbuf += SALSA20_BLOCK_SIZE;
+      inbuf  += SALSA20_BLOCK_SIZE;
+    }
+
+  _gcry_burn_stack (burn);
+}
+
+
+static void
+salsa20_encrypt_stream (void *context,
+                        byte *outbuf, const byte *inbuf, size_t length)
+{
+  SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+
+  if (length)
+    salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20_ROUNDS);
+}
+
+
+static void
+salsa20r12_encrypt_stream (void *context,
+                           byte *outbuf, const byte *inbuf, size_t length)
+{
+  SALSA20_context_t *ctx = (SALSA20_context_t *)context;
+
+  if (length)
+    salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20R12_ROUNDS);
+}
+
+
+static const char*
+selftest (void)
+{
+  byte ctxbuf[sizeof(SALSA20_context_t) + 15];
+  SALSA20_context_t *ctx;
+  byte scratch[8+1];
+  byte buf[256+64+4];
+  int i;
+
+  static byte key_1[] =
+    { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+      0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+  static const byte nonce_1[] =
+    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+  static const byte plaintext_1[] =
+    { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
+  static const byte ciphertext_1[] =
+    { 0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3};
+
+  /* 16-byte alignment required for amd64 implementation. */
+  ctx = (SALSA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15);
+
+  salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
+  salsa20_setiv  (ctx, nonce_1, sizeof nonce_1);
+  scratch[8] = 0;
+  salsa20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1);
+  if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1))
+    return "Salsa20 encryption test 1 failed.";
+  if (scratch[8])
+    return "Salsa20 wrote too much.";
+  salsa20_setkey( ctx, key_1, sizeof(key_1), NULL);
+  salsa20_setiv  (ctx, nonce_1, sizeof nonce_1);
+  salsa20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1);
+  if (memcmp (scratch, plaintext_1, sizeof plaintext_1))
+    return "Salsa20 decryption test 1 failed.";
+
+  for (i = 0; i < sizeof buf; i++)
+    buf[i] = i;
+  salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
+  salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
+  /*encrypt*/
+  salsa20_encrypt_stream (ctx, buf, buf, sizeof buf);
+  /*decrypt*/
+  salsa20_setkey (ctx, key_1, sizeof key_1, NULL);
+  salsa20_setiv (ctx, nonce_1, sizeof nonce_1);
+  salsa20_encrypt_stream (ctx, buf, buf, 1);
+  salsa20_encrypt_stream (ctx, buf+1, buf+1, (sizeof buf)-1-1);
+  salsa20_encrypt_stream (ctx, buf+(sizeof buf)-1, buf+(sizeof buf)-1, 1);
+  for (i = 0; i < sizeof buf; i++)
+    if (buf[i] != (byte)i)
+      return "Salsa20 encryption test 2 failed.";
+
+  return NULL;
+}
+
+
+gcry_cipher_spec_t _gcry_cipher_spec_salsa20 =
+  {
+    GCRY_CIPHER_SALSA20,
+    {0, 0},     /* flags */
+    "SALSA20",  /* name */
+    NULL,       /* aliases */
+    NULL,       /* oids */
+    1,          /* blocksize in bytes. */
+    SALSA20_MAX_KEY_SIZE*8,  /* standard key length in bits. */
+    sizeof (SALSA20_context_t),
+    salsa20_setkey,
+    NULL,
+    NULL,
+    salsa20_encrypt_stream,
+    salsa20_encrypt_stream,
+    NULL,
+    NULL,
+    salsa20_setiv
+  };
+
+gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12 =
+  {
+    GCRY_CIPHER_SALSA20R12,
+    {0, 0},     /* flags */
+    "SALSA20R12",  /* name */
+    NULL,       /* aliases */
+    NULL,       /* oids */
+    1,          /* blocksize in bytes. */
+    SALSA20_MAX_KEY_SIZE*8,  /* standard key length in bits. */
+    sizeof (SALSA20_context_t),
+    salsa20_setkey,
+    NULL,
+    NULL,
+    salsa20r12_encrypt_stream,
+    salsa20r12_encrypt_stream,
+    NULL,
+    NULL,
+    salsa20_setiv
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/scrypt.c 
b/grub-core/lib/libgcrypt/cipher/scrypt.c
new file mode 100644
index 000000000..5502bdcc6
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/scrypt.c
@@ -0,0 +1,322 @@
+/* scrypt.c - Scrypt password-based key derivation function.
+ * Copyright (C) 2012 Simon Josefsson
+ * Copyright (C) 2013 Christian Grothoff
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Adapted from the nettle, low-level cryptographics library for
+ * libgcrypt by Christian Grothoff; original license:
+ *
+ * Copyright (C) 2012 Simon Josefsson
+ *
+ * The nettle library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or (at your
+ * option) any later version.
+ *
+ * The nettle library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with the nettle library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+ * MA 02111-1301, USA.
+ */
+
+#include <config.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "kdf-internal.h"
+#include "bufhelp.h"
+
+/* We really need a 64 bit type for this code.  */
+#define SALSA20_INPUT_LENGTH 16
+
+#define ROTL32(n,x) (((x)<<(n)) | ((x)>>(32-(n))))
+
+
+/* Reads a 64-bit integer, in network, big-endian, byte order */
+#define READ_UINT64(p) buf_get_be64(p)
+
+
+/* And the other, little-endian, byteorder */
+#define LE_READ_UINT64(p) buf_get_le64(p)
+
+#define LE_SWAP32(v) le_bswap32(v)
+
+
+#define QROUND(x0, x1, x2, x3) do { \
+  x1 ^= ROTL32(7, x0 + x3);        \
+  x2 ^= ROTL32(9, x1 + x0);        \
+  x3 ^= ROTL32(13, x2 + x1);       \
+  x0 ^= ROTL32(18, x3 + x2);       \
+  } while(0)
+
+
+static void
+salsa20_core (u32 *dst, const u32 *src, unsigned int rounds)
+{
+  u32 x[SALSA20_INPUT_LENGTH];
+  unsigned i;
+
+  assert ( (rounds & 1) == 0);
+
+  for (i = 0; i < SALSA20_INPUT_LENGTH; i++)
+    x[i] = LE_SWAP32(src[i]);
+
+  for (i = 0; i < rounds;i += 2)
+    {
+      QROUND(x[0], x[4], x[8], x[12]);
+      QROUND(x[5], x[9], x[13], x[1]);
+      QROUND(x[10], x[14], x[2], x[6]);
+      QROUND(x[15], x[3], x[7], x[11]);
+
+      QROUND(x[0], x[1], x[2], x[3]);
+      QROUND(x[5], x[6], x[7], x[4]);
+      QROUND(x[10], x[11], x[8], x[9]);
+      QROUND(x[15], x[12], x[13], x[14]);
+    }
+
+  for (i = 0; i < SALSA20_INPUT_LENGTH; i++)
+    {
+      u32 t = x[i] + LE_SWAP32(src[i]);
+      dst[i] = LE_SWAP32(t);
+    }
+}
+
+
+static void
+scrypt_block_mix (u32 r, unsigned char *B, unsigned char *tmp2)
+{
+  u64 i;
+  unsigned char *X = tmp2;
+  unsigned char *Y = tmp2 + 64;
+
+#if 0
+  if (r == 1)
+    {
+      for (i = 0; i < 2 * r; i++)
+        {
+          size_t j;
+          printf ("B[%d] = ", (int)i);
+          for (j = 0; j < 64; j++)
+            {
+              if (j && !(j % 16))
+                printf ("\n       ");
+              printf (" %02x", B[i * 64 + j]);
+            }
+          putchar ('\n');
+        }
+    }
+#endif
+
+  /* X = B[2 * r - 1] */
+  memcpy (X, &B[(2 * r - 1) * 64], 64);
+
+  /* for i = 0 to 2 * r - 1 do */
+  for (i = 0; i <= 2 * r - 1; i++)
+    {
+      /* T = X xor B[i] */
+      buf_xor(X, X, &B[i * 64], 64);
+
+      /* X = Salsa (T) */
+      salsa20_core ((u32*)(void*)X, (u32*)(void*)X, 8);
+
+      /* Y[i] = X */
+      memcpy (&Y[i * 64], X, 64);
+    }
+
+  for (i = 0; i < r; i++)
+    {
+      memcpy (&B[i * 64], &Y[2 * i * 64], 64);
+      memcpy (&B[(r + i) * 64], &Y[(2 * i + 1) * 64], 64);
+    }
+
+#if 0
+  if (r==1)
+    {
+      for (i = 0; i < 2 * r; i++)
+        {
+          size_t j;
+          printf ("B'[%d] =", (int)i);
+          for (j = 0; j < 64; j++)
+            {
+              if (j && !(j % 16))
+                printf ("\n       ");
+              printf (" %02x", B[i * 64 + j]);
+            }
+          putchar ('\n');
+        }
+    }
+#endif
+}
+
+
+static void
+scrypt_ro_mix (u32 r, unsigned char *B, u64 N,
+             unsigned char *tmp1, unsigned char *tmp2)
+{
+  unsigned char *X = B, *T = B;
+  u64 i;
+
+#if 0
+  if (r == 1)
+    {
+      printf ("B = ");
+      for (i = 0; i < 128 * r; i++)
+        {
+          if (i && !(i % 16))
+            printf ("\n    ");
+          printf (" %02x", B[i]);
+        }
+      putchar ('\n');
+    }
+#endif
+
+  /* for i = 0 to N - 1 do */
+  for (i = 0; i <= N - 1; i++)
+    {
+      /* V[i] = X */
+      memcpy (&tmp1[i * 128 * r], X, 128 * r);
+
+      /* X =  ScryptBlockMix (X) */
+      scrypt_block_mix (r, X, tmp2);
+    }
+
+  /* for i = 0 to N - 1 do */
+  for (i = 0; i <= N - 1; i++)
+    {
+      u64 j;
+
+      /* j = Integerify (X) mod N */
+      j = LE_READ_UINT64 (&X[128 * r - 64]) % N;
+
+      /* T = X xor V[j] */
+      buf_xor (T, T, &tmp1[j * 128 * r], 128 * r);
+
+      /* X = scryptBlockMix (T) */
+      scrypt_block_mix (r, T, tmp2);
+    }
+
+#if 0
+  if (r == 1)
+    {
+      printf ("B' =");
+      for (i = 0; i < 128 * r; i++)
+        {
+          if (i && !(i % 16))
+            printf ("\n    ");
+          printf (" %02x", B[i]);
+        }
+      putchar ('\n');
+    }
+#endif
+}
+
+
+/*
+ *
+ */
+gcry_err_code_t
+_gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen,
+                  int algo, int subalgo,
+                  const unsigned char *salt, size_t saltlen,
+                  unsigned long iterations,
+                  size_t dkLen, unsigned char *DK)
+{
+  u64 N = subalgo;    /* CPU/memory cost parameter.  */
+  u32 r;              /* Block size.  */
+  u32 p = iterations; /* Parallelization parameter.  */
+
+  gpg_err_code_t ec;
+  u32 i;
+  unsigned char *B = NULL;
+  unsigned char *tmp1 = NULL;
+  unsigned char *tmp2 = NULL;
+  size_t r128;
+  size_t nbytes;
+
+  if (subalgo < 1 || !iterations)
+    return GPG_ERR_INV_VALUE;
+
+  if (algo == GCRY_KDF_SCRYPT)
+    r = 8;
+  else if (algo == 41) /* Hack to allow the use of all test vectors.  */
+    r = 1;
+  else
+    return GPG_ERR_UNKNOWN_ALGORITHM;
+
+  r128 = r * 128;
+  if (r128 / 128 != r)
+    return GPG_ERR_ENOMEM;
+
+  nbytes = p * r128;
+  if (r128 && nbytes / r128 != p)
+    return GPG_ERR_ENOMEM;
+
+  nbytes = N * r128;
+  if (r128 && nbytes / r128 != N)
+    return GPG_ERR_ENOMEM;
+
+  nbytes = 64 + r128;
+  if (nbytes < r128)
+    return GPG_ERR_ENOMEM;
+
+  B = xtrymalloc (p * r128);
+  if (!B)
+    {
+      ec = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+
+  tmp1 = xtrymalloc (N * r128);
+  if (!tmp1)
+    {
+      ec = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+
+  tmp2 = xtrymalloc (64 + r128);
+  if (!tmp2)
+    {
+      ec = gpg_err_code_from_syserror ();
+      goto leave;
+    }
+
+  ec = _gcry_kdf_pkdf2 (passwd, passwdlen, GCRY_MD_SHA256, salt, saltlen,
+                        1 /* iterations */, p * r128, B);
+
+  for (i = 0; !ec && i < p; i++)
+    scrypt_ro_mix (r, &B[i * r128], N, tmp1, tmp2);
+
+  if (!ec)
+    ec = _gcry_kdf_pkdf2 (passwd, passwdlen, GCRY_MD_SHA256, B, p * r128,
+                          1 /* iterations */, dkLen, DK);
+
+ leave:
+  xfree (tmp2);
+  xfree (tmp1);
+  xfree (B);
+
+  return ec;
+}
diff --git a/grub-core/lib/libgcrypt/cipher/seed.c 
b/grub-core/lib/libgcrypt/cipher/seed.c
index ae26e6742..4fd93d752 100644
--- a/grub-core/lib/libgcrypt/cipher/seed.c
+++ b/grub-core/lib/libgcrypt/cipher/seed.c
@@ -29,15 +29,13 @@
 #include "types.h"  /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
 
 #define NUMKC  16
 
-#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ \
-                   ((u32)(pt)[2] <<  8) ^ ((u32)(pt)[3]))
-#define PUTU32(ct, st) { (ct)[0] = (byte)((st) >> 24); \
-                        (ct)[1] = (byte)((st) >> 16); \
-                        (ct)[2] = (byte)((st) >>  8); \
-                        (ct)[3] = (byte)(st); }
+#define GETU32(pt) buf_get_be32(pt)
+#define PUTU32(ct, st) buf_put_be32(ct, st)
 
 union wordbuf
 {
@@ -312,11 +310,12 @@ do_setkey (SEED_context *ctx, const byte *key, const 
unsigned keylen)
 }
 
 static gcry_err_code_t
-seed_setkey (void *context, const byte *key, const unsigned keylen)
+seed_setkey (void *context, const byte *key, const unsigned keylen,
+             cipher_bulk_ops_t *bulk_ops)
 {
   SEED_context *ctx = context;
-
   int rc = do_setkey (ctx, key, keylen);
+  (void)bulk_ops;
   _gcry_burn_stack (4*6 + sizeof(void*)*2 + sizeof(int)*2);
   return rc;
 }
@@ -371,13 +370,13 @@ do_encrypt (const SEED_context *ctx, byte *outbuf, const 
byte *inbuf)
   PUTU32 (outbuf+12, x2);
 }
 
-static void
+static unsigned int
 seed_encrypt (void *context, byte *outbuf, const byte *inbuf)
 {
   SEED_context *ctx = context;
 
   do_encrypt (ctx, outbuf, inbuf);
-  _gcry_burn_stack (4*6);
+  return /*burn_stack*/ (4*6);
 }
 
 
@@ -417,13 +416,13 @@ do_decrypt (SEED_context *ctx, byte *outbuf, const byte 
*inbuf)
   PUTU32 (outbuf+12, x2);
 }
 
-static void
+static unsigned int
 seed_decrypt (void *context, byte *outbuf, const byte *inbuf)
 {
   SEED_context *ctx = context;
 
   do_decrypt (ctx, outbuf, inbuf);
-  _gcry_burn_stack (4*6);
+  return /*burn_stack*/ (4*6);
 }
 
 
@@ -449,7 +448,7 @@ selftest (void)
     0x22, 0x6B, 0xC3, 0x14, 0x2C, 0xD4, 0x0D, 0x4A,
   };
 
-  seed_setkey (&ctx, key, sizeof(key));
+  seed_setkey (&ctx, key, sizeof(key), NULL);
   seed_encrypt (&ctx, scratch, plaintext);
   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
     return "SEED test encryption failed.";
@@ -462,7 +461,7 @@ selftest (void)
 
 
 
-static gcry_cipher_oid_spec_t seed_oids[] =
+static const gcry_cipher_oid_spec_t seed_oids[] =
   {
     { "1.2.410.200004.1.3", GCRY_CIPHER_MODE_ECB },
     { "1.2.410.200004.1.4", GCRY_CIPHER_MODE_CBC },
@@ -473,6 +472,7 @@ static gcry_cipher_oid_spec_t seed_oids[] =
 
 gcry_cipher_spec_t _gcry_cipher_spec_seed =
   {
+    GCRY_CIPHER_SEED, {0, 0},
     "SEED", NULL, seed_oids, 16, 128, sizeof (SEED_context),
     seed_setkey, seed_encrypt, seed_decrypt,
   };
diff --git a/grub-core/lib/libgcrypt/cipher/serpent-armv7-neon.S 
b/grub-core/lib/libgcrypt/cipher/serpent-armv7-neon.S
new file mode 100644
index 000000000..adff63946
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/serpent-armv7-neon.S
@@ -0,0 +1,1124 @@
+/* serpent-armv7-neon.S  -  ARM/NEON assembly implementation of Serpent cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.text
+
+.syntax unified
+.fpu neon
+.arm
+
+/* ARM registers */
+#define RROUND r0
+
+/* NEON vector registers */
+#define RA0 q0
+#define RA1 q1
+#define RA2 q2
+#define RA3 q3
+#define RA4 q4
+#define RB0 q5
+#define RB1 q6
+#define RB2 q7
+#define RB3 q8
+#define RB4 q9
+
+#define RT0 q10
+#define RT1 q11
+#define RT2 q12
+#define RT3 q13
+
+#define RA0d0 d0
+#define RA0d1 d1
+#define RA1d0 d2
+#define RA1d1 d3
+#define RA2d0 d4
+#define RA2d1 d5
+#define RA3d0 d6
+#define RA3d1 d7
+#define RA4d0 d8
+#define RA4d1 d9
+#define RB0d0 d10
+#define RB0d1 d11
+#define RB1d0 d12
+#define RB1d1 d13
+#define RB2d0 d14
+#define RB2d1 d15
+#define RB3d0 d16
+#define RB3d1 d17
+#define RB4d0 d18
+#define RB4d1 d19
+#define RT0d0 d20
+#define RT0d1 d21
+#define RT1d0 d22
+#define RT1d1 d23
+#define RT2d0 d24
+#define RT2d1 d25
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+#define transpose_4x4(_q0, _q1, _q2, _q3) \
+       vtrn.32 _q0, _q1;       \
+       vtrn.32 _q2, _q3;       \
+       vswp _q0##d1, _q2##d0;  \
+       vswp _q1##d1, _q3##d0;
+
+/**********************************************************************
+  8-way serpent
+ **********************************************************************/
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ *   (New York, New York, USA), p. 317–329, National Institute of Standards and
+ *   Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+#define SBOX0(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       veor    a3, a3, a0;     veor    b3, b3, b0;     vmov    a4, a1;         
vmov    b4, b1;         \
+       vand    a1, a1, a3;     vand    b1, b1, b3;     veor    a4, a4, a2;     
veor    b4, b4, b2;     \
+       veor    a1, a1, a0;     veor    b1, b1, b0;     vorr    a0, a0, a3;     
vorr    b0, b0, b3;     \
+       veor    a0, a0, a4;     veor    b0, b0, b4;     veor    a4, a4, a3;     
veor    b4, b4, b3;     \
+       veor    a3, a3, a2;     veor    b3, b3, b2;     vorr    a2, a2, a1;     
vorr    b2, b2, b1;     \
+       veor    a2, a2, a4;     veor    b2, b2, b4;     vmvn    a4, a4;         
vmvn    b4, b4;         \
+       vorr    a4, a4, a1;     vorr    b4, b4, b1;     veor    a1, a1, a3;     
veor    b1, b1, b3;     \
+       veor    a1, a1, a4;     veor    b1, b1, b4;     vorr    a3, a3, a0;     
vorr    b3, b3, b0;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     veor    a4, a3;         
veor    b4, b3;
+
+#define SBOX0_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmvn    a2, a2;         vmvn    b2, b2;         vmov    a4, a1;         
vmov    b4, b1;         \
+       vorr    a1, a1, a0;     vorr    b1, b1, b0;     vmvn    a4, a4;         
vmvn    b4, b4;         \
+       veor    a1, a1, a2;     veor    b1, b1, b2;     vorr    a2, a2, a4;     
vorr    b2, b2, b4;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     veor    a0, a0, a4;     
veor    b0, b0, b4;     \
+       veor    a2, a2, a0;     veor    b2, b2, b0;     vand    a0, a0, a3;     
vand    b0, b0, b3;     \
+       veor    a4, a4, a0;     veor    b4, b4, b0;     vorr    a0, a0, a1;     
vorr    b0, b0, b1;     \
+       veor    a0, a0, a2;     veor    b0, b0, b2;     veor    a3, a3, a4;     
veor    b3, b3, b4;     \
+       veor    a2, a2, a1;     veor    b2, b2, b1;     veor    a3, a3, a0;     
veor    b3, b3, b0;     \
+       veor    a3, a3, a1;     veor    b3, b3, b1;\
+       vand    a2, a2, a3;     vand    b2, b2, b3;\
+       veor    a4, a2; veor    b4, b2;
+
+#define SBOX1(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmvn    a0, a0;         vmvn    b0, b0;         vmvn    a2, a2;         
vmvn    b2, b2;         \
+       vmov    a4, a0;         vmov    b4, b0;         vand    a0, a0, a1;     
vand    b0, b0, b1;     \
+       veor    a2, a2, a0;     veor    b2, b2, b0;     vorr    a0, a0, a3;     
vorr    b0, b0, b3;     \
+       veor    a3, a3, a2;     veor    b3, b3, b2;     veor    a1, a1, a0;     
veor    b1, b1, b0;     \
+       veor    a0, a0, a4;     veor    b0, b0, b4;     vorr    a4, a4, a1;     
vorr    b4, b4, b1;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     vorr    a2, a2, a0;     
vorr    b2, b2, b0;     \
+       vand    a2, a2, a4;     vand    b2, b2, b4;     veor    a0, a0, a1;     
veor    b0, b0, b1;     \
+       vand    a1, a1, a2;     vand    b1, b1, b2;\
+       veor    a1, a1, a0;     veor    b1, b1, b0;     vand    a0, a0, a2;     
vand    b0, b0, b2;     \
+       veor    a0, a4;         veor    b0, b4;
+
+#define SBOX1_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmov    a4, a1;         vmov    b4, b1;         veor    a1, a1, a3;     
veor    b1, b1, b3;     \
+       vand    a3, a3, a1;     vand    b3, b3, b1;     veor    a4, a4, a2;     
veor    b4, b4, b2;     \
+       veor    a3, a3, a0;     veor    b3, b3, b0;     vorr    a0, a0, a1;     
vorr    b0, b0, b1;     \
+       veor    a2, a2, a3;     veor    b2, b2, b3;     veor    a0, a0, a4;     
veor    b0, b0, b4;     \
+       vorr    a0, a0, a2;     vorr    b0, b0, b2;     veor    a1, a1, a3;     
veor    b1, b1, b3;     \
+       veor    a0, a0, a1;     veor    b0, b0, b1;     vorr    a1, a1, a3;     
vorr    b1, b1, b3;     \
+       veor    a1, a1, a0;     veor    b1, b1, b0;     vmvn    a4, a4;         
vmvn    b4, b4;         \
+       veor    a4, a4, a1;     veor    b4, b4, b1;     vorr    a1, a1, a0;     
vorr    b1, b1, b0;     \
+       veor    a1, a1, a0;     veor    b1, b1, b0;\
+       vorr    a1, a1, a4;     vorr    b1, b1, b4;\
+       veor    a3, a1;         veor    b3, b1;
+
+#define SBOX2(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmov    a4, a0;         vmov    b4, b0;         vand    a0, a0, a2;     
vand    b0, b0, b2;     \
+       veor    a0, a0, a3;     veor    b0, b0, b3;     veor    a2, a2, a1;     
veor    b2, b2, b1;     \
+       veor    a2, a2, a0;     veor    b2, b2, b0;     vorr    a3, a3, a4;     
vorr    b3, b3, b4;     \
+       veor    a3, a3, a1;     veor    b3, b3, b1;     veor    a4, a4, a2;     
veor    b4, b4, b2;     \
+       vmov    a1, a3;         vmov    b1, b3;         vorr    a3, a3, a4;     
vorr    b3, b3, b4;     \
+       veor    a3, a3, a0;     veor    b3, b3, b0;     vand    a0, a0, a1;     
vand    b0, b0, b1;     \
+       veor    a4, a4, a0;     veor    b4, b4, b0;     veor    a1, a1, a3;     
veor    b1, b1, b3;     \
+       veor    a1, a1, a4;     veor    b1, b1, b4;     vmvn    a4, a4;         
vmvn    b4, b4;
+
+#define SBOX2_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       veor    a2, a2, a3;     veor    b2, b2, b3;     veor    a3, a3, a0;     
veor    b3, b3, b0;     \
+       vmov    a4, a3;         vmov    b4, b3;         vand    a3, a3, a2;     
vand    b3, b3, b2;     \
+       veor    a3, a3, a1;     veor    b3, b3, b1;     vorr    a1, a1, a2;     
vorr    b1, b1, b2;     \
+       veor    a1, a1, a4;     veor    b1, b1, b4;     vand    a4, a4, a3;     
vand    b4, b4, b3;     \
+       veor    a2, a2, a3;     veor    b2, b2, b3;     vand    a4, a4, a0;     
vand    b4, b4, b0;     \
+       veor    a4, a4, a2;     veor    b4, b4, b2;     vand    a2, a2, a1;     
vand    b2, b2, b1;     \
+       vorr    a2, a2, a0;     vorr    b2, b2, b0;     vmvn    a3, a3;         
vmvn    b3, b3;         \
+       veor    a2, a2, a3;     veor    b2, b2, b3;     veor    a0, a0, a3;     
veor    b0, b0, b3;     \
+       vand    a0, a0, a1;     vand    b0, b0, b1;     veor    a3, a3, a4;     
veor    b3, b3, b4;     \
+       veor    a3, a0;         veor    b3, b0;
+
+#define SBOX3(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmov    a4, a0;         vmov    b4, b0;         vorr    a0, a0, a3;     
vorr    b0, b0, b3;     \
+       veor    a3, a3, a1;     veor    b3, b3, b1;     vand    a1, a1, a4;     
vand    b1, b1, b4;     \
+       veor    a4, a4, a2;     veor    b4, b4, b2;     veor    a2, a2, a3;     
veor    b2, b2, b3;     \
+       vand    a3, a3, a0;     vand    b3, b3, b0;     vorr    a4, a4, a1;     
vorr    b4, b4, b1;     \
+       veor    a3, a3, a4;     veor    b3, b3, b4;     veor    a0, a0, a1;     
veor    b0, b0, b1;     \
+       vand    a4, a4, a0;     vand    b4, b4, b0;     veor    a1, a1, a3;     
veor    b1, b1, b3;     \
+       veor    a4, a4, a2;     veor    b4, b4, b2;     vorr    a1, a1, a0;     
vorr    b1, b1, b0;     \
+       veor    a1, a1, a2;     veor    b1, b1, b2;     veor    a0, a0, a3;     
veor    b0, b0, b3;     \
+       vmov    a2, a1;         vmov    b2, b1;         vorr    a1, a1, a3;     
vorr    b1, b1, b3;     \
+       veor    a1, a0;         veor    b1, b0;
+
+#define SBOX3_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmov    a4, a2;         vmov    b4, b2;         veor    a2, a2, a1;     
veor    b2, b2, b1;     \
+       veor    a0, a0, a2;     veor    b0, b0, b2;     vand    a4, a4, a2;     
vand    b4, b4, b2;     \
+       veor    a4, a4, a0;     veor    b4, b4, b0;     vand    a0, a0, a1;     
vand    b0, b0, b1;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     vorr    a3, a3, a4;     
vorr    b3, b3, b4;     \
+       veor    a2, a2, a3;     veor    b2, b2, b3;     veor    a0, a0, a3;     
veor    b0, b0, b3;     \
+       veor    a1, a1, a4;     veor    b1, b1, b4;     vand    a3, a3, a2;     
vand    b3, b3, b2;     \
+       veor    a3, a3, a1;     veor    b3, b3, b1;     veor    a1, a1, a0;     
veor    b1, b1, b0;     \
+       vorr    a1, a1, a2;     vorr    b1, b1, b2;     veor    a0, a0, a3;     
veor    b0, b0, b3;     \
+       veor    a1, a1, a4;     veor    b1, b1, b4;\
+       veor    a0, a1;         veor    b0, b1;
+
+#define SBOX4(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     vmvn    a3, a3;         
vmvn    b3, b3;         \
+       veor    a2, a2, a3;     veor    b2, b2, b3;     veor    a3, a3, a0;     
veor    b3, b3, b0;     \
+       vmov    a4, a1;         vmov    b4, b1;         vand    a1, a1, a3;     
vand    b1, b1, b3;     \
+       veor    a1, a1, a2;     veor    b1, b1, b2;     veor    a4, a4, a3;     
veor    b4, b4, b3;     \
+       veor    a0, a0, a4;     veor    b0, b0, b4;     vand    a2, a2, a4;     
vand    b2, b2, b4;     \
+       veor    a2, a2, a0;     veor    b2, b2, b0;     vand    a0, a0, a1;     
vand    b0, b0, b1;     \
+       veor    a3, a3, a0;     veor    b3, b3, b0;     vorr    a4, a4, a1;     
vorr    b4, b4, b1;     \
+       veor    a4, a4, a0;     veor    b4, b4, b0;     vorr    a0, a0, a3;     
vorr    b0, b0, b3;     \
+       veor    a0, a0, a2;     veor    b0, b0, b2;     vand    a2, a2, a3;     
vand    b2, b2, b3;     \
+       vmvn    a0, a0;         vmvn    b0, b0;         veor    a4, a2;         
veor    b4, b2;
+
+#define SBOX4_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmov    a4, a2;         vmov    b4, b2;         vand    a2, a2, a3;     
vand    b2, b2, b3;     \
+       veor    a2, a2, a1;     veor    b2, b2, b1;     vorr    a1, a1, a3;     
vorr    b1, b1, b3;     \
+       vand    a1, a1, a0;     vand    b1, b1, b0;     veor    a4, a4, a2;     
veor    b4, b4, b2;     \
+       veor    a4, a4, a1;     veor    b4, b4, b1;     vand    a1, a1, a2;     
vand    b1, b1, b2;     \
+       vmvn    a0, a0;         vmvn    b0, b0;         veor    a3, a3, a4;     
veor    b3, b3, b4;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     vand    a3, a3, a0;     
vand    b3, b3, b0;     \
+       veor    a3, a3, a2;     veor    b3, b3, b2;     veor    a0, a0, a1;     
veor    b0, b0, b1;     \
+       vand    a2, a2, a0;     vand    b2, b2, b0;     veor    a3, a3, a0;     
veor    b3, b3, b0;     \
+       veor    a2, a2, a4;     veor    b2, b2, b4;\
+       vorr    a2, a2, a3;     vorr    b2, b2, b3;     veor    a3, a3, a0;     
veor    b3, b3, b0;     \
+       veor    a2, a1;         veor    b2, b1;
+
+#define SBOX5(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       veor    a0, a0, a1;     veor    b0, b0, b1;     veor    a1, a1, a3;     
veor    b1, b1, b3;     \
+       vmvn    a3, a3;         vmvn    b3, b3;         vmov    a4, a1;         
vmov    b4, b1;         \
+       vand    a1, a1, a0;     vand    b1, b1, b0;     veor    a2, a2, a3;     
veor    b2, b2, b3;     \
+       veor    a1, a1, a2;     veor    b1, b1, b2;     vorr    a2, a2, a4;     
vorr    b2, b2, b4;     \
+       veor    a4, a4, a3;     veor    b4, b4, b3;     vand    a3, a3, a1;     
vand    b3, b3, b1;     \
+       veor    a3, a3, a0;     veor    b3, b3, b0;     veor    a4, a4, a1;     
veor    b4, b4, b1;     \
+       veor    a4, a4, a2;     veor    b4, b4, b2;     veor    a2, a2, a0;     
veor    b2, b2, b0;     \
+       vand    a0, a0, a3;     vand    b0, b0, b3;     vmvn    a2, a2;         
vmvn    b2, b2;         \
+       veor    a0, a0, a4;     veor    b0, b0, b4;     vorr    a4, a4, a3;     
vorr    b4, b4, b3;     \
+       veor    a2, a4;         veor    b2, b4;
+
+#define SBOX5_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmvn    a1, a1;         vmvn    b1, b1;         vmov    a4, a3;         
vmov    b4, b3;         \
+       veor    a2, a2, a1;     veor    b2, b2, b1;     vorr    a3, a3, a0;     
vorr    b3, b3, b0;     \
+       veor    a3, a3, a2;     veor    b3, b3, b2;     vorr    a2, a2, a1;     
vorr    b2, b2, b1;     \
+       vand    a2, a2, a0;     vand    b2, b2, b0;     veor    a4, a4, a3;     
veor    b4, b4, b3;     \
+       veor    a2, a2, a4;     veor    b2, b2, b4;     vorr    a4, a4, a0;     
vorr    b4, b4, b0;     \
+       veor    a4, a4, a1;     veor    b4, b4, b1;     vand    a1, a1, a2;     
vand    b1, b1, b2;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     veor    a4, a4, a2;     
veor    b4, b4, b2;     \
+       vand    a3, a3, a4;     vand    b3, b3, b4;     veor    a4, a4, a1;     
veor    b4, b4, b1;     \
+       veor    a3, a3, a4;     veor    b3, b3, b4;     vmvn    a4, a4;         
vmvn    b4, b4;         \
+       veor    a3, a0;         veor    b3, b0;
+
+#define SBOX6(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmvn    a2, a2;         vmvn    b2, b2;         vmov    a4, a3;         
vmov    b4, b3;         \
+       vand    a3, a3, a0;     vand    b3, b3, b0;     veor    a0, a0, a4;     
veor    b0, b0, b4;     \
+       veor    a3, a3, a2;     veor    b3, b3, b2;     vorr    a2, a2, a4;     
vorr    b2, b2, b4;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     veor    a2, a2, a0;     
veor    b2, b2, b0;     \
+       vorr    a0, a0, a1;     vorr    b0, b0, b1;     veor    a2, a2, a1;     
veor    b2, b2, b1;     \
+       veor    a4, a4, a0;     veor    b4, b4, b0;     vorr    a0, a0, a3;     
vorr    b0, b0, b3;     \
+       veor    a0, a0, a2;     veor    b0, b0, b2;     veor    a4, a4, a3;     
veor    b4, b4, b3;     \
+       veor    a4, a4, a0;     veor    b4, b4, b0;     vmvn    a3, a3;         
vmvn    b3, b3;         \
+       vand    a2, a2, a4;     vand    b2, b2, b4;\
+       veor    a2, a3;         veor    b2, b3;
+
+#define SBOX6_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       veor    a0, a0, a2;     veor    b0, b0, b2;     vmov    a4, a2;         
vmov    b4, b2;         \
+       vand    a2, a2, a0;     vand    b2, b2, b0;     veor    a4, a4, a3;     
veor    b4, b4, b3;     \
+       vmvn    a2, a2;         vmvn    b2, b2;         veor    a3, a3, a1;     
veor    b3, b3, b1;     \
+       veor    a2, a2, a3;     veor    b2, b2, b3;     vorr    a4, a4, a0;     
vorr    b4, b4, b0;     \
+       veor    a0, a0, a2;     veor    b0, b0, b2;     veor    a3, a3, a4;     
veor    b3, b3, b4;     \
+       veor    a4, a4, a1;     veor    b4, b4, b1;     vand    a1, a1, a3;     
vand    b1, b1, b3;     \
+       veor    a1, a1, a0;     veor    b1, b1, b0;     veor    a0, a0, a3;     
veor    b0, b0, b3;     \
+       vorr    a0, a0, a2;     vorr    b0, b0, b2;     veor    a3, a3, a1;     
veor    b3, b3, b1;     \
+       veor    a4, a0;         veor    b4, b0;
+
+#define SBOX7(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmov    a4, a1;         vmov    b4, b1;         vorr    a1, a1, a2;     
vorr    b1, b1, b2;     \
+       veor    a1, a1, a3;     veor    b1, b1, b3;     veor    a4, a4, a2;     
veor    b4, b4, b2;     \
+       veor    a2, a2, a1;     veor    b2, b2, b1;     vorr    a3, a3, a4;     
vorr    b3, b3, b4;     \
+       vand    a3, a3, a0;     vand    b3, b3, b0;     veor    a4, a4, a2;     
veor    b4, b4, b2;     \
+       veor    a3, a3, a1;     veor    b3, b3, b1;     vorr    a1, a1, a4;     
vorr    b1, b1, b4;     \
+       veor    a1, a1, a0;     veor    b1, b1, b0;     vorr    a0, a0, a4;     
vorr    b0, b0, b4;     \
+       veor    a0, a0, a2;     veor    b0, b0, b2;     veor    a1, a1, a4;     
veor    b1, b1, b4;     \
+       veor    a2, a2, a1;     veor    b2, b2, b1;     vand    a1, a1, a0;     
vand    b1, b1, b0;     \
+       veor    a1, a1, a4;     veor    b1, b1, b4;     vmvn    a2, a2;         
vmvn    b2, b2;         \
+       vorr    a2, a2, a0;     vorr    b2, b2, b0;\
+       veor    a4, a2;         veor    b4, b2;
+
+#define SBOX7_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vmov    a4, a2;         vmov    b4, b2;         veor    a2, a2, a0;     
veor    b2, b2, b0;     \
+       vand    a0, a0, a3;     vand    b0, b0, b3;     vorr    a4, a4, a3;     
vorr    b4, b4, b3;     \
+       vmvn    a2, a2;         vmvn    b2, b2;         veor    a3, a3, a1;     
veor    b3, b3, b1;     \
+       vorr    a1, a1, a0;     vorr    b1, b1, b0;     veor    a0, a0, a2;     
veor    b0, b0, b2;     \
+       vand    a2, a2, a4;     vand    b2, b2, b4;     vand    a3, a3, a4;     
vand    b3, b3, b4;     \
+       veor    a1, a1, a2;     veor    b1, b1, b2;     veor    a2, a2, a0;     
veor    b2, b2, b0;     \
+       vorr    a0, a0, a2;     vorr    b0, b0, b2;     veor    a4, a4, a1;     
veor    b4, b4, b1;     \
+       veor    a0, a0, a3;     veor    b0, b0, b3;     veor    a3, a3, a4;     
veor    b3, b3, b4;     \
+       vorr    a4, a4, a0;     vorr    b4, b4, b0;     veor    a3, a3, a2;     
veor    b3, b3, b2;     \
+       veor    a4, a2;         veor    b4, b2;
+
+/* Apply SBOX number WHICH to to the block.  */
+#define SBOX(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       SBOX##which (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4)
+
+/* Apply inverse SBOX number WHICH to to the block.  */
+#define SBOX_INVERSE(which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       SBOX##which##_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4)
+
+/* XOR round key into block state in a0,a1,a2,a3. a4 used as temporary.  */
+#define BLOCK_XOR_KEY(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vdup.32 RT3, RT0d0[0]; \
+       vdup.32 RT1, RT0d0[1]; \
+       vdup.32 RT2, RT0d1[0]; \
+       vdup.32 RT0, RT0d1[1]; \
+       veor a0, a0, RT3;       veor b0, b0, RT3; \
+       veor a1, a1, RT1;       veor b1, b1, RT1; \
+       veor a2, a2, RT2;       veor b2, b2, RT2; \
+       veor a3, a3, RT0;       veor b3, b3, RT0;
+
+#define BLOCK_LOAD_KEY_ENC() \
+       vld1.8 {RT0d0, RT0d1}, [RROUND]!;
+
+#define BLOCK_LOAD_KEY_DEC() \
+       vld1.8 {RT0d0, RT0d1}, [RROUND]; \
+       sub RROUND, RROUND, #16
+
+/* Apply the linear transformation to BLOCK.  */
+#define LINEAR_TRANSFORMATION(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vshl.u32        a4, a0, #13;            vshl.u32        b4, b0, #13;    
        \
+       vshr.u32        a0, a0, #(32-13);       vshr.u32        b0, b0, 
#(32-13);       \
+       veor            a0, a0, a4;             veor            b0, b0, b4;     
        \
+       vshl.u32        a4, a2, #3;             vshl.u32        b4, b2, #3;     
        \
+       vshr.u32        a2, a2, #(32-3);        vshr.u32        b2, b2, 
#(32-3);        \
+       veor            a2, a2, a4;             veor            b2, b2, b4;     
        \
+       veor            a1, a0, a1;             veor            b1, b0, b1;     
        \
+       veor            a1, a2, a1;             veor            b1, b2, b1;     
        \
+       vshl.u32        a4, a0, #3;             vshl.u32        b4, b0, #3;     
        \
+       veor            a3, a2, a3;             veor            b3, b2, b3;     
        \
+       veor            a3, a4, a3;             veor            b3, b4, b3;     
        \
+       vshl.u32        a4, a1, #1;             vshl.u32        b4, b1, #1;     
        \
+       vshr.u32        a1, a1, #(32-1);        vshr.u32        b1, b1, 
#(32-1);        \
+       veor            a1, a1, a4;             veor            b1, b1, b4;     
        \
+       vshl.u32        a4, a3, #7;             vshl.u32        b4, b3, #7;     
        \
+       vshr.u32        a3, a3, #(32-7);        vshr.u32        b3, b3, 
#(32-7);        \
+       veor            a3, a3, a4;             veor            b3, b3, b4;     
        \
+       veor            a0, a1, a0;             veor            b0, b1, b0;     
        \
+       veor            a0, a3, a0;             veor            b0, b3, b0;     
        \
+       vshl.u32        a4, a1, #7;             vshl.u32        b4, b1, #7;     
        \
+       veor            a2, a3, a2;             veor            b2, b3, b2;     
        \
+       veor            a2, a4, a2;             veor            b2, b4, b2;     
        \
+       vshl.u32        a4, a0, #5;             vshl.u32        b4, b0, #5;     
        \
+       vshr.u32        a0, a0, #(32-5);        vshr.u32        b0, b0, 
#(32-5);        \
+       veor            a0, a0, a4;             veor            b0, b0, b4;     
        \
+       vshl.u32        a4, a2, #22;            vshl.u32        b4, b2, #22;    
        \
+       vshr.u32        a2, a2, #(32-22);       vshr.u32        b2, b2, 
#(32-22);       \
+       veor            a2, a2, a4;             veor            b2, b2, b4;
+
+/* Apply the inverse linear transformation to BLOCK.  */
+#define LINEAR_TRANSFORMATION_INVERSE(a0, a1, a2, a3, a4, b0, b1, b2, b3, b4) \
+       vshr.u32        a4, a2, #22;            vshr.u32        b4, b2, #22;    
        \
+       vshl.u32        a2, a2, #(32-22);       vshl.u32        b2, b2, 
#(32-22);       \
+       veor            a2, a2, a4;             veor            b2, b2, b4;     
        \
+       vshr.u32        a4, a0, #5;             vshr.u32        b4, b0, #5;     
        \
+       vshl.u32        a0, a0, #(32-5);        vshl.u32        b0, b0, 
#(32-5);        \
+       veor            a0, a0, a4;             veor            b0, b0, b4;     
        \
+       vshl.u32        a4, a1, #7;             vshl.u32        b4, b1, #7;     
        \
+       veor            a2, a3, a2;             veor            b2, b3, b2;     
        \
+       veor            a2, a4, a2;             veor            b2, b4, b2;     
        \
+       veor            a0, a1, a0;             veor            b0, b1, b0;     
        \
+       veor            a0, a3, a0;             veor            b0, b3, b0;     
        \
+       vshr.u32        a4, a3, #7;             vshr.u32        b4, b3, #7;     
        \
+       vshl.u32        a3, a3, #(32-7);        vshl.u32        b3, b3, 
#(32-7);        \
+       veor            a3, a3, a4;             veor            b3, b3, b4;     
        \
+       vshr.u32        a4, a1, #1;             vshr.u32        b4, b1, #1;     
        \
+       vshl.u32        a1, a1, #(32-1);        vshl.u32        b1, b1, 
#(32-1);        \
+       veor            a1, a1, a4;             veor            b1, b1, b4;     
        \
+       vshl.u32        a4, a0, #3;             vshl.u32        b4, b0, #3;     
        \
+       veor            a3, a2, a3;             veor            b3, b2, b3;     
        \
+       veor            a3, a4, a3;             veor            b3, b4, b3;     
        \
+       veor            a1, a0, a1;             veor            b1, b0, b1;     
        \
+       veor            a1, a2, a1;             veor            b1, b2, b1;     
        \
+       vshr.u32        a4, a2, #3;             vshr.u32        b4, b2, #3;     
        \
+       vshl.u32        a2, a2, #(32-3);        vshl.u32        b2, b2, 
#(32-3);        \
+       veor            a2, a2, a4;             veor            b2, b2, b4;     
        \
+       vshr.u32        a4, a0, #13;            vshr.u32        b4, b0, #13;    
        \
+       vshl.u32        a0, a0, #(32-13);       vshl.u32        b0, b0, 
#(32-13);       \
+       veor            a0, a0, a4;             veor            b0, b0, b4;
+
+/* Apply a Serpent round to eight parallel blocks.  This macro increments
+   `round'.  */
+#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+                           b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4);         \
+       BLOCK_LOAD_KEY_ENC ();                                          \
+       SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4);           \
+       LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, 
nb4);
+
+/* Apply the last Serpent round to eight parallel blocks.  This macro 
increments
+   `round'.  */
+#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+                                b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4);         \
+       BLOCK_LOAD_KEY_ENC ();                                          \
+       SBOX (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4);           \
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4);
+
+/* Apply an inverse Serpent round to eight parallel blocks.  This macro
+   increments `round'.  */
+#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \
+                                   na0, na1, na2, na3, na4, \
+                                   b0, b1, b2, b3, b4, \
+                                   nb0, nb1, nb2, nb3, nb4) \
+       LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4); 
\
+       SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4);           
\
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4);       
\
+       BLOCK_LOAD_KEY_DEC ();
+
+/* Apply the first inverse Serpent round to eight parallel blocks.  This macro
+   increments `round'.  */
+#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \
+                                         na0, na1, na2, na3, na4, \
+                                         b0, b1, b2, b3, b4, \
+                                         nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, b0, b1, b2, b3, b4);                 
\
+       BLOCK_LOAD_KEY_DEC ();                                                  
\
+       SBOX_INVERSE (which, a0, a1, a2, a3, a4, b0, b1, b2, b3, b4);           
\
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, nb0, nb1, nb2, nb3, nb4);       
\
+       BLOCK_LOAD_KEY_DEC ();
+
+.align 3
+.type __serpent_enc_blk8,%function;
+__serpent_enc_blk8:
+       /* input:
+        *      r0: round key pointer
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+        *                                              blocks
+        * output:
+        *      RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel
+        *                                              ciphertext blocks
+        */
+
+       transpose_4x4(RA0, RA1, RA2, RA3);
+       BLOCK_LOAD_KEY_ENC ();
+       transpose_4x4(RB0, RB1, RB2, RB3);
+
+       ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+                    RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+       ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+                    RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+       ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+                    RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+       ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+                    RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+       ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+                    RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+       ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+                    RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+       ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+                    RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+       ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+                    RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+       ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0,
+                    RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0);
+       ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0,
+                    RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0);
+       ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2,
+                     RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2);
+       ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4,
+                     RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4);
+       ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0,
+                     RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0);
+       ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0,
+                     RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0);
+       ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3,
+                     RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3);
+       ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0,
+                     RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0);
+       ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4,
+                     RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4);
+       ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4,
+                     RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4);
+       ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2,
+                     RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2);
+       ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3,
+                     RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3);
+       ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4,
+                     RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4);
+       ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4,
+                     RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4);
+       ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0,
+                     RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0);
+       ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4,
+                     RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4);
+       ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+                     RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+       ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+                     RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+       ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+                     RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+       ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+                     RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+       ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+                     RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+       ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+                     RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+       ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+                     RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+       ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+                          RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+
+       transpose_4x4(RA4, RA1, RA2, RA0);
+       transpose_4x4(RB4, RB1, RB2, RB0);
+
+       bx lr;
+.size __serpent_enc_blk8,.-__serpent_enc_blk8;
+
+.align 3
+.type   __serpent_dec_blk8,%function;
+__serpent_dec_blk8:
+       /* input:
+        *      r0: round key pointer
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+        *                                              ciphertext blocks
+        * output:
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+        *                                              blocks
+        */
+
+       add RROUND, RROUND, #(32*16);
+
+       transpose_4x4(RA0, RA1, RA2, RA3);
+       BLOCK_LOAD_KEY_DEC ();
+       transpose_4x4(RB0, RB1, RB2, RB3);
+
+       ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4,
+                                   RA3, RA0, RA1, RA4, RA2,
+                                   RB0, RB1, RB2, RB3, RB4,
+                                   RB3, RB0, RB1, RB4, RB2);
+       ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3,
+                             RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3);
+       ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0,
+                             RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0);
+       ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3,
+                             RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3);
+       ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3,
+                             RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3);
+       ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4,
+                             RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4);
+       ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3,
+                             RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3);
+       ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1,
+                             RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1);
+       ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2,
+                             RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2);
+       ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0,
+                             RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0);
+       ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4,
+                             RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4);
+       ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0,
+                             RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0);
+       ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0,
+                             RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0);
+       ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1,
+                             RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1);
+       ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0,
+                             RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0);
+       ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3,
+                             RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3);
+       ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2,
+                             RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2);
+       ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4,
+                             RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4);
+       ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1,
+                             RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1);
+       ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4,
+                             RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4);
+       ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4,
+                             RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4);
+       ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3,
+                             RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3);
+       ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4,
+                            RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4);
+       ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0,
+                            RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0);
+       ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2,
+                            RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2);
+       ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1,
+                            RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1);
+       ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3,
+                            RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3);
+       ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1,
+                            RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1);
+       ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1,
+                            RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1);
+       ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0,
+                            RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0);
+       ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1,
+                            RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1);
+       ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4,
+                            RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4);
+
+       transpose_4x4(RA0, RA1, RA2, RA3);
+       transpose_4x4(RB0, RB1, RB2, RB3);
+
+       bx lr;
+.size __serpent_dec_blk8,.-__serpent_dec_blk8;
+
+.align 3
+.globl _gcry_serpent_neon_ctr_enc
+.type _gcry_serpent_neon_ctr_enc,%function;
+_gcry_serpent_neon_ctr_enc:
+       /* input:
+        *      r0: ctx, CTX
+        *      r1: dst (8 blocks)
+        *      r2: src (8 blocks)
+        *      r3: iv
+        */
+
+       vmov.u8 RT1d0, #0xff; /* u64: -1 */
+       push {r4,lr};
+       vadd.u64 RT2d0, RT1d0, RT1d0; /* u64: -2 */
+       vpush {RA4-RB2};
+
+       /* load IV and byteswap */
+       vld1.8 {RA0}, [r3];
+       vrev64.u8 RT0, RA0; /* be => le */
+       ldr r4, [r3, #8];
+
+       /* construct IVs */
+       vsub.u64 RA2d1, RT0d1, RT2d0; /* +2 */
+       vsub.u64 RA1d1, RT0d1, RT1d0; /* +1 */
+       cmp r4, #-1;
+
+       vsub.u64 RB0d1, RA2d1, RT2d0; /* +4 */
+       vsub.u64 RA3d1, RA2d1, RT1d0; /* +3 */
+       ldr r4, [r3, #12];
+
+       vsub.u64 RB2d1, RB0d1, RT2d0; /* +6 */
+       vsub.u64 RB1d1, RB0d1, RT1d0; /* +5 */
+
+       vsub.u64 RT2d1, RB2d1, RT2d0; /* +8 */
+       vsub.u64 RB3d1, RB2d1, RT1d0; /* +7 */
+
+       vmov RA1d0, RT0d0;
+       vmov RA2d0, RT0d0;
+       vmov RA3d0, RT0d0;
+       vmov RB0d0, RT0d0;
+       rev r4, r4;
+       vmov RB1d0, RT0d0;
+       vmov RB2d0, RT0d0;
+       vmov RB3d0, RT0d0;
+       vmov RT2d0, RT0d0;
+
+       /* check need for handling 64-bit overflow and carry */
+       beq .Ldo_ctr_carry;
+
+.Lctr_carry_done:
+       /* le => be */
+       vrev64.u8 RA1, RA1;
+       vrev64.u8 RA2, RA2;
+       vrev64.u8 RA3, RA3;
+       vrev64.u8 RB0, RB0;
+       vrev64.u8 RT2, RT2;
+       vrev64.u8 RB1, RB1;
+       vrev64.u8 RB2, RB2;
+       vrev64.u8 RB3, RB3;
+       /* store new IV */
+       vst1.8 {RT2}, [r3];
+
+       bl __serpent_enc_blk8;
+
+       vld1.8 {RT0, RT1}, [r2]!;
+       vld1.8 {RT2, RT3}, [r2]!;
+       veor RA4, RA4, RT0;
+       veor RA1, RA1, RT1;
+       vld1.8 {RT0, RT1}, [r2]!;
+       veor RA2, RA2, RT2;
+       veor RA0, RA0, RT3;
+       vld1.8 {RT2, RT3}, [r2]!;
+       veor RB4, RB4, RT0;
+       veor RT0, RT0;
+       veor RB1, RB1, RT1;
+       veor RT1, RT1;
+       veor RB2, RB2, RT2;
+       veor RT2, RT2;
+       veor RB0, RB0, RT3;
+       veor RT3, RT3;
+
+       vst1.8 {RA4}, [r1]!;
+       vst1.8 {RA1}, [r1]!;
+       veor RA1, RA1;
+       vst1.8 {RA2}, [r1]!;
+       veor RA2, RA2;
+       vst1.8 {RA0}, [r1]!;
+       veor RA0, RA0;
+       vst1.8 {RB4}, [r1]!;
+       veor RB4, RB4;
+       vst1.8 {RB1}, [r1]!;
+       vst1.8 {RB2}, [r1]!;
+       vst1.8 {RB0}, [r1]!;
+
+       vpop {RA4-RB2};
+
+       /* clear the used registers */
+       veor RA3, RA3;
+       veor RB3, RB3;
+
+       pop {r4,pc};
+
+.Ldo_ctr_carry:
+       cmp r4, #-8;
+       blo .Lctr_carry_done;
+       beq .Lcarry_RT2;
+
+       cmp r4, #-6;
+       blo .Lcarry_RB3;
+       beq .Lcarry_RB2;
+
+       cmp r4, #-4;
+       blo .Lcarry_RB1;
+       beq .Lcarry_RB0;
+
+       cmp r4, #-2;
+       blo .Lcarry_RA3;
+       beq .Lcarry_RA2;
+
+       vsub.u64 RA1d0, RT1d0;
+.Lcarry_RA2:
+       vsub.u64 RA2d0, RT1d0;
+.Lcarry_RA3:
+       vsub.u64 RA3d0, RT1d0;
+.Lcarry_RB0:
+       vsub.u64 RB0d0, RT1d0;
+.Lcarry_RB1:
+       vsub.u64 RB1d0, RT1d0;
+.Lcarry_RB2:
+       vsub.u64 RB2d0, RT1d0;
+.Lcarry_RB3:
+       vsub.u64 RB3d0, RT1d0;
+.Lcarry_RT2:
+       vsub.u64 RT2d0, RT1d0;
+
+       b .Lctr_carry_done;
+.size _gcry_serpent_neon_ctr_enc,.-_gcry_serpent_neon_ctr_enc;
+
+.align 3
+.globl _gcry_serpent_neon_cfb_dec
+.type _gcry_serpent_neon_cfb_dec,%function;
+_gcry_serpent_neon_cfb_dec:
+       /* input:
+        *      r0: ctx, CTX
+        *      r1: dst (8 blocks)
+        *      r2: src (8 blocks)
+        *      r3: iv
+        */
+
+       push {lr};
+       vpush {RA4-RB2};
+
+       /* Load input */
+       vld1.8 {RA0}, [r3];
+       vld1.8 {RA1, RA2}, [r2]!;
+       vld1.8 {RA3}, [r2]!;
+       vld1.8 {RB0}, [r2]!;
+       vld1.8 {RB1, RB2}, [r2]!;
+       vld1.8 {RB3}, [r2]!;
+
+       /* Update IV */
+       vld1.8 {RT0}, [r2]!;
+       vst1.8 {RT0}, [r3];
+       mov r3, lr;
+       sub r2, r2, #(8*16);
+
+       bl __serpent_enc_blk8;
+
+       vld1.8 {RT0, RT1}, [r2]!;
+       vld1.8 {RT2, RT3}, [r2]!;
+       veor RA4, RA4, RT0;
+       veor RA1, RA1, RT1;
+       vld1.8 {RT0, RT1}, [r2]!;
+       veor RA2, RA2, RT2;
+       veor RA0, RA0, RT3;
+       vld1.8 {RT2, RT3}, [r2]!;
+       veor RB4, RB4, RT0;
+       veor RT0, RT0;
+       veor RB1, RB1, RT1;
+       veor RT1, RT1;
+       veor RB2, RB2, RT2;
+       veor RT2, RT2;
+       veor RB0, RB0, RT3;
+       veor RT3, RT3;
+
+       vst1.8 {RA4}, [r1]!;
+       vst1.8 {RA1}, [r1]!;
+       veor RA1, RA1;
+       vst1.8 {RA2}, [r1]!;
+       veor RA2, RA2;
+       vst1.8 {RA0}, [r1]!;
+       veor RA0, RA0;
+       vst1.8 {RB4}, [r1]!;
+       veor RB4, RB4;
+       vst1.8 {RB1}, [r1]!;
+       vst1.8 {RB2}, [r1]!;
+       vst1.8 {RB0}, [r1]!;
+
+       vpop {RA4-RB2};
+
+       /* clear the used registers */
+       veor RA3, RA3;
+       veor RB3, RB3;
+
+       pop {pc};
+.size _gcry_serpent_neon_cfb_dec,.-_gcry_serpent_neon_cfb_dec;
+
+.align 3
+.globl _gcry_serpent_neon_cbc_dec
+.type _gcry_serpent_neon_cbc_dec,%function;
+_gcry_serpent_neon_cbc_dec:
+       /* input:
+        *      r0: ctx, CTX
+        *      r1: dst (8 blocks)
+        *      r2: src (8 blocks)
+        *      r3: iv
+        */
+
+       push {lr};
+       vpush {RA4-RB2};
+
+       vld1.8 {RA0, RA1}, [r2]!;
+       vld1.8 {RA2, RA3}, [r2]!;
+       vld1.8 {RB0, RB1}, [r2]!;
+       vld1.8 {RB2, RB3}, [r2]!;
+       sub r2, r2, #(8*16);
+
+       bl __serpent_dec_blk8;
+
+       vld1.8 {RB4}, [r3];
+       vld1.8 {RT0, RT1}, [r2]!;
+       vld1.8 {RT2, RT3}, [r2]!;
+       veor RA0, RA0, RB4;
+       veor RA1, RA1, RT0;
+       veor RA2, RA2, RT1;
+       vld1.8 {RT0, RT1}, [r2]!;
+       veor RA3, RA3, RT2;
+       veor RB0, RB0, RT3;
+       vld1.8 {RT2, RT3}, [r2]!;
+       veor RB1, RB1, RT0;
+       veor RT0, RT0;
+       veor RB2, RB2, RT1;
+       veor RT1, RT1;
+       veor RB3, RB3, RT2;
+       veor RT2, RT2;
+       vst1.8 {RT3}, [r3]; /* store new IV */
+       veor RT3, RT3;
+
+       vst1.8 {RA0, RA1}, [r1]!;
+       veor RA0, RA0;
+       veor RA1, RA1;
+       vst1.8 {RA2, RA3}, [r1]!;
+       veor RA2, RA2;
+       vst1.8 {RB0, RB1}, [r1]!;
+       veor RA3, RA3;
+       vst1.8 {RB2, RB3}, [r1]!;
+       veor RB3, RB3;
+
+       vpop {RA4-RB2};
+
+       /* clear the used registers */
+       veor RB4, RB4;
+
+       pop {pc};
+.size _gcry_serpent_neon_cbc_dec,.-_gcry_serpent_neon_cbc_dec;
+
+.align 3
+.globl _gcry_serpent_neon_ocb_enc
+.type _gcry_serpent_neon_ocb_enc,%function;
+_gcry_serpent_neon_ocb_enc:
+       /* input:
+        *      r0  : ctx, CTX
+        *      r1  : dst (8 blocks)
+        *      r2  : src (8 blocks)
+        *      r3  : offset
+        *      sp+0: checksum
+        *      sp+4: L pointers (void *L[8])
+        */
+
+       push {r4-r11, ip, lr};
+       add ip, sp, #(10*4);
+
+       vpush {RA4-RB2};
+
+       ldm ip, {r4, lr};
+
+       vld1.8 {RT0}, [r3];
+       vld1.8 {RT1}, [r4];
+
+       /* Load L pointers */
+       ldm lr!, {r5, r6, r7, r8};
+       ldm lr, {r9, r10, r11, ip};
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+       vld1.8 {RA0, RA1}, [r2]!;
+       vld1.8 {RA2, RA3}, [r2]!;
+       vld1.8 {RB0, RB1}, [r2]!;
+       vld1.8 {RB2, RB3}, [r2];
+
+#define OCB_INPUT(lreg, vreg) \
+         vld1.8 {RT3}, [lreg]; \
+         veor RT0, RT3; \
+         veor RT1, vreg; \
+         veor vreg, RT0; \
+         vst1.8 {RT0}, [r1]!;
+
+       OCB_INPUT(r5, RA0);
+       OCB_INPUT(r6, RA1);
+       OCB_INPUT(r7, RA2);
+       OCB_INPUT(r8, RA3);
+       OCB_INPUT(r9, RB0);
+       OCB_INPUT(r10, RB1);
+       OCB_INPUT(r11, RB2);
+       OCB_INPUT(ip, RB3);
+#undef OCB_INPUT
+
+       sub r1, r1, #(8*16);
+       vst1.8 {RT0}, [r3];
+       vst1.8 {RT1}, [r4];
+       mov r2, r1;
+
+       bl __serpent_enc_blk8;
+
+       vld1.8 {RT0, RT1}, [r1]!;
+       veor RT0, RA4, RT0;
+       veor RT1, RA1, RT1;
+       vld1.8 {RT2, RT3}, [r1]!;
+       vst1.8 {RT0, RT1}, [r2]!;
+       veor RT2, RA2, RT2;
+       veor RT3, RA0, RT3;
+       vld1.8 {RT0, RT1}, [r1]!;
+       vst1.8 {RT2, RT3}, [r2]!;
+       veor RT0, RB4, RT0;
+       veor RT1, RB1, RT1;
+       vld1.8 {RT2, RT3}, [r1]!;
+       vst1.8 {RT0, RT1}, [r2]!;
+       veor RT2, RB2, RT2;
+       veor RT3, RB0, RT3;
+       vst1.8 {RT2, RT3}, [r2]!;
+
+       vpop {RA4-RB2};
+
+       /* clear the used registers */
+       veor RA3, RA3;
+       veor RB3, RB3;
+
+       pop {r4-r11, ip, pc};
+.size _gcry_serpent_neon_ocb_enc,.-_gcry_serpent_neon_ocb_enc;
+
+.align 3
+.globl _gcry_serpent_neon_ocb_dec
+.type _gcry_serpent_neon_ocb_dec,%function;
+_gcry_serpent_neon_ocb_dec:
+       /* input:
+        *      r0  : ctx, CTX
+        *      r1  : dst (8 blocks)
+        *      r2  : src (8 blocks)
+        *      r3  : offset
+        *      sp+0: checksum
+        *      sp+4: L pointers (void *L[8])
+        */
+
+       push {r4-r11, ip, lr};
+       add ip, sp, #(10*4);
+
+       vpush {RA4-RB2};
+
+       ldm ip, {r4, lr};
+
+       vld1.8 {RT0}, [r3];
+
+       /* Load L pointers */
+       ldm lr!, {r5, r6, r7, r8};
+       ldm lr, {r9, r10, r11, ip};
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+
+       vld1.8 {RA0, RA1}, [r2]!;
+       vld1.8 {RA2, RA3}, [r2]!;
+       vld1.8 {RB0, RB1}, [r2]!;
+       vld1.8 {RB2, RB3}, [r2];
+
+#define OCB_INPUT(lreg, vreg) \
+         vld1.8 {RT3}, [lreg]; \
+         veor RT0, RT3; \
+         veor vreg, RT0; \
+         vst1.8 {RT0}, [r1]!;
+
+       OCB_INPUT(r5, RA0);
+       OCB_INPUT(r6, RA1);
+       OCB_INPUT(r7, RA2);
+       OCB_INPUT(r8, RA3);
+       OCB_INPUT(r9, RB0);
+       OCB_INPUT(r10, RB1);
+       OCB_INPUT(r11, RB2);
+       OCB_INPUT(ip, RB3);
+#undef OCB_INPUT
+
+       sub r1, r1, #(8*16);
+       vst1.8 {RT0}, [r3];
+       mov r2, r1;
+
+       bl __serpent_dec_blk8;
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       vld1.8 {RA4}, [r4];
+
+       vld1.8 {RT0, RT1}, [r1]!;
+       veor RA0, RA0, RT0;
+       veor RA1, RA1, RT1;
+       vld1.8 {RT2, RT3}, [r1]!;
+       veor RA4, RA4, RA0;
+       vst1.8 {RA0, RA1}, [r2]!;
+       veor RA4, RA4, RA1;
+       veor RA2, RA2, RT2;
+       veor RA3, RA3, RT3;
+       vld1.8 {RT0, RT1}, [r1]!;
+       veor RA4, RA4, RA2;
+       vst1.8 {RA2, RA3}, [r2]!;
+       veor RA4, RA4, RA3;
+       veor RB0, RB0, RT0;
+       veor RB1, RB1, RT1;
+       vld1.8 {RT2, RT3}, [r1]!;
+       veor RA4, RA4, RB0;
+       vst1.8 {RB0, RB1}, [r2]!;
+       veor RA4, RA4, RB1;
+       veor RB2, RB2, RT2;
+       veor RB3, RB3, RT3;
+       veor RA4, RA4, RB2;
+       vst1.8 {RB2, RB3}, [r2]!;
+
+       veor RA4, RA4, RB3;
+       vst1.8 {RA4}, [r4];
+
+       vpop {RA4-RB2};
+
+       /* clear the used registers */
+       veor RB4, RB4;
+
+       pop {r4-r11, ip, pc};
+.size _gcry_serpent_neon_ocb_dec,.-_gcry_serpent_neon_ocb_dec;
+
+.align 3
+.globl _gcry_serpent_neon_ocb_auth
+.type _gcry_serpent_neon_ocb_auth,%function;
+_gcry_serpent_neon_ocb_auth:
+       /* input:
+        *      r0  : ctx, CTX
+        *      r1  : abuf (8 blocks)
+        *      r2  : offset
+        *      r3  : checksum
+        *      sp+0: L pointers (void *L[8])
+        */
+
+       push {r5-r11, ip, lr};
+       ldr lr, [sp, #(9*4)];
+
+       vpush {RA4-RB2};
+
+       vld1.8 {RT0}, [r2];
+
+       /* Load L pointers */
+       ldm lr!, {r5, r6, r7, r8};
+       ldm lr, {r9, r10, r11, ip};
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+
+       vld1.8 {RA0, RA1}, [r1]!;
+       vld1.8 {RA2, RA3}, [r1]!;
+       vld1.8 {RB0, RB1}, [r1]!;
+       vld1.8 {RB2, RB3}, [r1];
+
+#define OCB_INPUT(lreg, vreg) \
+         vld1.8 {RT3}, [lreg]; \
+         veor RT0, RT3; \
+         veor vreg, RT0;
+
+       OCB_INPUT(r5, RA0);
+       OCB_INPUT(r6, RA1);
+       OCB_INPUT(r7, RA2);
+       OCB_INPUT(r8, RA3);
+       OCB_INPUT(r9, RB0);
+       OCB_INPUT(r10, RB1);
+       OCB_INPUT(r11, RB2);
+       OCB_INPUT(ip, RB3);
+#undef OCB_INPUT
+
+       vst1.8 {RT0}, [r2];
+
+       bl __serpent_enc_blk8;
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       vld1.8 {RT0}, [r3];
+
+       veor RA4, RB4;
+       veor RA1, RB1;
+       veor RA2, RB2;
+       veor RA0, RB0;
+
+       veor RA2, RT0;
+       veor RA1, RA4;
+       veor RA0, RA2;
+
+       veor RA0, RA1;
+
+       vst1.8 {RA0}, [r3];
+
+       vpop {RA4-RB2};
+
+       /* clear the used registers */
+       veor RA3, RA3;
+       veor RB3, RB3;
+
+       pop {r5-r11, ip, pc};
+.size _gcry_serpent_neon_ocb_auth,.-_gcry_serpent_neon_ocb_auth;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/serpent-avx2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/serpent-avx2-amd64.S
new file mode 100644
index 000000000..d3515a21d
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/serpent-avx2-amd64.S
@@ -0,0 +1,1160 @@
+/* serpent-avx2-amd64.S  -  AVX2 implementation of Serpent cipher
+ *
+ * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT) && 
\
+    defined(ENABLE_AVX2_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+/* struct serpent_context: */
+#define ctx_keys 0
+
+/* register macros */
+#define CTX %rdi
+
+/* vector registers */
+#define RA0 %ymm0
+#define RA1 %ymm1
+#define RA2 %ymm2
+#define RA3 %ymm3
+#define RA4 %ymm4
+
+#define RB0 %ymm5
+#define RB1 %ymm6
+#define RB2 %ymm7
+#define RB3 %ymm8
+#define RB4 %ymm9
+
+#define RNOT %ymm10
+#define RTMP0 %ymm11
+#define RTMP1 %ymm12
+#define RTMP2 %ymm13
+#define RTMP3 %ymm14
+#define RTMP4 %ymm15
+
+#define RNOTx %xmm10
+#define RTMP0x %xmm11
+#define RTMP1x %xmm12
+#define RTMP2x %xmm13
+#define RTMP3x %xmm14
+#define RTMP4x %xmm15
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+/* vector 32-bit rotation to left */
+#define vec_rol(reg, nleft, tmp) \
+       vpslld $(nleft), reg, tmp;              \
+       vpsrld $(32 - (nleft)), reg, reg;       \
+       vpor tmp, reg, reg;
+
+/* vector 32-bit rotation to right */
+#define vec_ror(reg, nright, tmp) \
+       vec_rol(reg, 32 - nright, tmp)
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+       vpunpckhdq x1, x0, t2; \
+       vpunpckldq x1, x0, x0; \
+       \
+       vpunpckldq x3, x2, t1; \
+       vpunpckhdq x3, x2, x2; \
+       \
+       vpunpckhqdq t1, x0, x1; \
+       vpunpcklqdq t1, x0, x0; \
+       \
+       vpunpckhqdq x2, t2, x3; \
+       vpunpcklqdq x2, t2, x2;
+
+/**********************************************************************
+  16-way serpent
+ **********************************************************************/
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ *   (New York, New York, USA), p. 317–329, National Institute of Standards and
+ *   Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+#define SBOX0(r0, r1, r2, r3, r4) \
+       vpxor   r0, r3, r3;             vmovdqa r1, r4;                 \
+       vpand   r3, r1, r1;             vpxor   r2, r4, r4;             \
+       vpxor   r0, r1, r1;             vpor    r3, r0, r0;             \
+       vpxor   r4, r0, r0;             vpxor   r3, r4, r4;             \
+       vpxor   r2, r3, r3;             vpor    r1, r2, r2;             \
+       vpxor   r4, r2, r2;             vpxor   RNOT, r4, r4;           \
+       vpor    r1, r4, r4;             vpxor   r3, r1, r1;             \
+       vpxor   r4, r1, r1;             vpor    r0, r3, r3;             \
+       vpxor   r3, r1, r1;             vpxor   r3, r4, r4;
+
+#define SBOX0_INVERSE(r0, r1, r2, r3, r4) \
+       vpxor   RNOT, r2, r2;           vmovdqa r1, r4;                 \
+       vpor    r0, r1, r1;             vpxor   RNOT, r4, r4;           \
+       vpxor   r2, r1, r1;             vpor    r4, r2, r2;             \
+       vpxor   r3, r1, r1;             vpxor   r4, r0, r0;             \
+       vpxor   r0, r2, r2;             vpand   r3, r0, r0;             \
+       vpxor   r0, r4, r4;             vpor    r1, r0, r0;             \
+       vpxor   r2, r0, r0;             vpxor   r4, r3, r3;             \
+       vpxor   r1, r2, r2;             vpxor   r0, r3, r3;             \
+       vpxor   r1, r3, r3;     \
+       vpand   r3, r2, r2;     \
+       vpxor   r2, r4, r4;
+
+#define SBOX1(r0, r1, r2, r3, r4) \
+       vpxor   RNOT, r0, r0;           vpxor   RNOT, r2, r2;           \
+       vmovdqa r0, r4;                 vpand   r1, r0, r0;             \
+       vpxor   r0, r2, r2;             vpor    r3, r0, r0;             \
+       vpxor   r2, r3, r3;             vpxor   r0, r1, r1;             \
+       vpxor   r4, r0, r0;             vpor    r1, r4, r4;             \
+       vpxor   r3, r1, r1;             vpor    r0, r2, r2;             \
+       vpand   r4, r2, r2;             vpxor   r1, r0, r0;             \
+       vpand   r2, r1, r1;     \
+       vpxor   r0, r1, r1;             vpand   r2, r0, r0;             \
+       vpxor   r4, r0, r0;
+
+#define SBOX1_INVERSE(r0, r1, r2, r3, r4) \
+       vmovdqa r1, r4;                 vpxor   r3, r1, r1;             \
+       vpand   r1, r3, r3;             vpxor   r2, r4, r4;             \
+       vpxor   r0, r3, r3;             vpor    r1, r0, r0;             \
+       vpxor   r3, r2, r2;             vpxor   r4, r0, r0;             \
+       vpor    r2, r0, r0;             vpxor   r3, r1, r1;             \
+       vpxor   r1, r0, r0;             vpor    r3, r1, r1;             \
+       vpxor   r0, r1, r1;             vpxor   RNOT, r4, r4;           \
+       vpxor   r1, r4, r4;             vpor    r0, r1, r1;             \
+       vpxor   r0, r1, r1;     \
+       vpor    r4, r1, r1;     \
+       vpxor   r1, r3, r3;
+
+#define SBOX2(r0, r1, r2, r3, r4) \
+       vmovdqa r0, r4;                 vpand   r2, r0, r0;             \
+       vpxor   r3, r0, r0;             vpxor   r1, r2, r2;             \
+       vpxor   r0, r2, r2;             vpor    r4, r3, r3;             \
+       vpxor   r1, r3, r3;             vpxor   r2, r4, r4;             \
+       vmovdqa r3, r1;                 vpor    r4, r3, r3;             \
+       vpxor   r0, r3, r3;             vpand   r1, r0, r0;             \
+       vpxor   r0, r4, r4;             vpxor   r3, r1, r1;             \
+       vpxor   r4, r1, r1;             vpxor   RNOT, r4, r4;
+
+#define SBOX2_INVERSE(r0, r1, r2, r3, r4) \
+       vpxor   r3, r2, r2;             vpxor   r0, r3, r3;             \
+       vmovdqa r3, r4;                 vpand   r2, r3, r3;             \
+       vpxor   r1, r3, r3;             vpor    r2, r1, r1;             \
+       vpxor   r4, r1, r1;             vpand   r3, r4, r4;             \
+       vpxor   r3, r2, r2;             vpand   r0, r4, r4;             \
+       vpxor   r2, r4, r4;             vpand   r1, r2, r2;             \
+       vpor    r0, r2, r2;             vpxor   RNOT, r3, r3;           \
+       vpxor   r3, r2, r2;             vpxor   r3, r0, r0;             \
+       vpand   r1, r0, r0;             vpxor   r4, r3, r3;             \
+       vpxor   r0, r3, r3;
+
+#define SBOX3(r0, r1, r2, r3, r4) \
+       vmovdqa r0, r4;                 vpor    r3, r0, r0;             \
+       vpxor   r1, r3, r3;             vpand   r4, r1, r1;             \
+       vpxor   r2, r4, r4;             vpxor   r3, r2, r2;             \
+       vpand   r0, r3, r3;             vpor    r1, r4, r4;             \
+       vpxor   r4, r3, r3;             vpxor   r1, r0, r0;             \
+       vpand   r0, r4, r4;             vpxor   r3, r1, r1;             \
+       vpxor   r2, r4, r4;             vpor    r0, r1, r1;             \
+       vpxor   r2, r1, r1;             vpxor   r3, r0, r0;             \
+       vmovdqa r1, r2;                 vpor    r3, r1, r1;             \
+       vpxor   r0, r1, r1;
+
+#define SBOX3_INVERSE(r0, r1, r2, r3, r4) \
+       vmovdqa r2, r4;                 vpxor   r1, r2, r2;             \
+       vpxor   r2, r0, r0;             vpand   r2, r4, r4;             \
+       vpxor   r0, r4, r4;             vpand   r1, r0, r0;             \
+       vpxor   r3, r1, r1;             vpor    r4, r3, r3;             \
+       vpxor   r3, r2, r2;             vpxor   r3, r0, r0;             \
+       vpxor   r4, r1, r1;             vpand   r2, r3, r3;             \
+       vpxor   r1, r3, r3;             vpxor   r0, r1, r1;             \
+       vpor    r2, r1, r1;             vpxor   r3, r0, r0;             \
+       vpxor   r4, r1, r1;     \
+       vpxor   r1, r0, r0;
+
+#define SBOX4(r0, r1, r2, r3, r4) \
+       vpxor   r3, r1, r1;             vpxor   RNOT, r3, r3;           \
+       vpxor   r3, r2, r2;             vpxor   r0, r3, r3;             \
+       vmovdqa r1, r4;                 vpand   r3, r1, r1;             \
+       vpxor   r2, r1, r1;             vpxor   r3, r4, r4;             \
+       vpxor   r4, r0, r0;             vpand   r4, r2, r2;             \
+       vpxor   r0, r2, r2;             vpand   r1, r0, r0;             \
+       vpxor   r0, r3, r3;             vpor    r1, r4, r4;             \
+       vpxor   r0, r4, r4;             vpor    r3, r0, r0;             \
+       vpxor   r2, r0, r0;             vpand   r3, r2, r2;             \
+       vpxor   RNOT, r0, r0;           vpxor   r2, r4, r4;
+
+#define SBOX4_INVERSE(r0, r1, r2, r3, r4) \
+       vmovdqa r2, r4;                 vpand   r3, r2, r2;             \
+       vpxor   r1, r2, r2;             vpor    r3, r1, r1;             \
+       vpand   r0, r1, r1;             vpxor   r2, r4, r4;             \
+       vpxor   r1, r4, r4;             vpand   r2, r1, r1;             \
+       vpxor   RNOT, r0, r0;           vpxor   r4, r3, r3;             \
+       vpxor   r3, r1, r1;             vpand   r0, r3, r3;             \
+       vpxor   r2, r3, r3;             vpxor   r1, r0, r0;             \
+       vpand   r0, r2, r2;             vpxor   r0, r3, r3;             \
+       vpxor   r4, r2, r2;     \
+       vpor    r3, r2, r2;             vpxor   r0, r3, r3;             \
+       vpxor   r1, r2, r2;
+
+#define SBOX5(r0, r1, r2, r3, r4) \
+       vpxor   r1, r0, r0;             vpxor   r3, r1, r1;             \
+       vpxor   RNOT, r3, r3;           vmovdqa r1, r4;                 \
+       vpand   r0, r1, r1;             vpxor   r3, r2, r2;             \
+       vpxor   r2, r1, r1;             vpor    r4, r2, r2;             \
+       vpxor   r3, r4, r4;             vpand   r1, r3, r3;             \
+       vpxor   r0, r3, r3;             vpxor   r1, r4, r4;             \
+       vpxor   r2, r4, r4;             vpxor   r0, r2, r2;             \
+       vpand   r3, r0, r0;             vpxor   RNOT, r2, r2;           \
+       vpxor   r4, r0, r0;             vpor    r3, r4, r4;             \
+       vpxor   r4, r2, r2;
+
+#define SBOX5_INVERSE(r0, r1, r2, r3, r4) \
+       vpxor   RNOT, r1, r1;           vmovdqa r3, r4;                 \
+       vpxor   r1, r2, r2;             vpor    r0, r3, r3;             \
+       vpxor   r2, r3, r3;             vpor    r1, r2, r2;             \
+       vpand   r0, r2, r2;             vpxor   r3, r4, r4;             \
+       vpxor   r4, r2, r2;             vpor    r0, r4, r4;             \
+       vpxor   r1, r4, r4;             vpand   r2, r1, r1;             \
+       vpxor   r3, r1, r1;             vpxor   r2, r4, r4;             \
+       vpand   r4, r3, r3;             vpxor   r1, r4, r4;             \
+       vpxor   r4, r3, r3;             vpxor   RNOT, r4, r4;           \
+       vpxor   r0, r3, r3;
+
+#define SBOX6(r0, r1, r2, r3, r4) \
+       vpxor   RNOT, r2, r2;           vmovdqa r3, r4;                 \
+       vpand   r0, r3, r3;             vpxor   r4, r0, r0;             \
+       vpxor   r2, r3, r3;             vpor    r4, r2, r2;             \
+       vpxor   r3, r1, r1;             vpxor   r0, r2, r2;             \
+       vpor    r1, r0, r0;             vpxor   r1, r2, r2;             \
+       vpxor   r0, r4, r4;             vpor    r3, r0, r0;             \
+       vpxor   r2, r0, r0;             vpxor   r3, r4, r4;             \
+       vpxor   r0, r4, r4;             vpxor   RNOT, r3, r3;           \
+       vpand   r4, r2, r2;     \
+       vpxor   r3, r2, r2;
+
+#define SBOX6_INVERSE(r0, r1, r2, r3, r4) \
+       vpxor   r2, r0, r0;             vmovdqa r2, r4;                 \
+       vpand   r0, r2, r2;             vpxor   r3, r4, r4;             \
+       vpxor   RNOT, r2, r2;           vpxor   r1, r3, r3;             \
+       vpxor   r3, r2, r2;             vpor    r0, r4, r4;             \
+       vpxor   r2, r0, r0;             vpxor   r4, r3, r3;             \
+       vpxor   r1, r4, r4;             vpand   r3, r1, r1;             \
+       vpxor   r0, r1, r1;             vpxor   r3, r0, r0;             \
+       vpor    r2, r0, r0;             vpxor   r1, r3, r3;             \
+       vpxor   r0, r4, r4;
+
+#define SBOX7(r0, r1, r2, r3, r4) \
+       vmovdqa r1, r4;                 vpor    r2, r1, r1;             \
+       vpxor   r3, r1, r1;             vpxor   r2, r4, r4;             \
+       vpxor   r1, r2, r2;             vpor    r4, r3, r3;             \
+       vpand   r0, r3, r3;             vpxor   r2, r4, r4;             \
+       vpxor   r1, r3, r3;             vpor    r4, r1, r1;             \
+       vpxor   r0, r1, r1;             vpor    r4, r0, r0;             \
+       vpxor   r2, r0, r0;             vpxor   r4, r1, r1;             \
+       vpxor   r1, r2, r2;             vpand   r0, r1, r1;             \
+       vpxor   r4, r1, r1;             vpxor   RNOT, r2, r2;           \
+       vpor    r0, r2, r2;     \
+       vpxor   r2, r4, r4;
+
+#define SBOX7_INVERSE(r0, r1, r2, r3, r4) \
+       vmovdqa r2, r4;                 vpxor   r0, r2, r2;             \
+       vpand   r3, r0, r0;             vpor    r3, r4, r4;             \
+       vpxor   RNOT, r2, r2;           vpxor   r1, r3, r3;             \
+       vpor    r0, r1, r1;             vpxor   r2, r0, r0;             \
+       vpand   r4, r2, r2;             vpand   r4, r3, r3;             \
+       vpxor   r2, r1, r1;             vpxor   r0, r2, r2;             \
+       vpor    r2, r0, r0;             vpxor   r1, r4, r4;             \
+       vpxor   r3, r0, r0;             vpxor   r4, r3, r3;             \
+       vpor    r0, r4, r4;             vpxor   r2, r3, r3;             \
+       vpxor   r2, r4, r4;
+
+/* Apply SBOX number WHICH to to the block.  */
+#define SBOX(which, r0, r1, r2, r3, r4) \
+       SBOX##which (r0, r1, r2, r3, r4)
+
+/* Apply inverse SBOX number WHICH to to the block.  */
+#define SBOX_INVERSE(which, r0, r1, r2, r3, r4) \
+       SBOX##which##_INVERSE (r0, r1, r2, r3, r4)
+
+/* XOR round key into block state in r0,r1,r2,r3. r4 used as temporary.  */
+#define BLOCK_XOR_KEY(r0, r1, r2, r3, r4, round) \
+       vpbroadcastd (ctx_keys + (round) * 16 + 0 * 4)(CTX), r4; \
+       vpxor r4, r0, r0; \
+       vpbroadcastd (ctx_keys + (round) * 16 + 1 * 4)(CTX), r4; \
+       vpxor r4, r1, r1; \
+       vpbroadcastd (ctx_keys + (round) * 16 + 2 * 4)(CTX), r4; \
+       vpxor r4, r2, r2; \
+       vpbroadcastd (ctx_keys + (round) * 16 + 3 * 4)(CTX), r4; \
+       vpxor r4, r3, r3;
+
+/* Apply the linear transformation to BLOCK.  */
+#define LINEAR_TRANSFORMATION(r0, r1, r2, r3, r4) \
+       vec_rol(r0, 13, r4);    \
+       vec_rol(r2, 3, r4);     \
+       vpxor r0, r1, r1;       \
+       vpxor r2, r1, r1;       \
+       vpslld $3, r0, r4;      \
+       vpxor r2, r3, r3;       \
+       vpxor r4, r3, r3;       \
+       vec_rol(r1, 1, r4);     \
+       vec_rol(r3, 7, r4);     \
+       vpxor r1, r0, r0;       \
+       vpxor r3, r0, r0;       \
+       vpslld $7, r1, r4;      \
+       vpxor r3, r2, r2;       \
+       vpxor r4, r2, r2;       \
+       vec_rol(r0, 5, r4);     \
+       vec_rol(r2, 22, r4);
+
+/* Apply the inverse linear transformation to BLOCK.  */
+#define LINEAR_TRANSFORMATION_INVERSE(r0, r1, r2, r3, r4) \
+       vec_ror(r2, 22, r4);    \
+       vec_ror(r0, 5, r4);     \
+       vpslld $7, r1, r4;      \
+       vpxor r3, r2, r2;       \
+       vpxor r4, r2, r2;       \
+       vpxor r1, r0, r0;       \
+       vpxor r3, r0, r0;       \
+       vec_ror(r3, 7, r4);     \
+       vec_ror(r1, 1, r4);     \
+       vpslld $3, r0, r4;      \
+       vpxor r2, r3, r3;       \
+       vpxor r4, r3, r3;       \
+       vpxor r0, r1, r1;       \
+       vpxor r2, r1, r1;       \
+       vec_ror(r2, 3, r4);     \
+       vec_ror(r0, 13, r4);
+
+/* Apply a Serpent round to sixteen parallel blocks.  This macro increments
+   `round'.  */
+#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+                           b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round);              \
+       SBOX (which, a0, a1, a2, a3, a4);                       \
+               BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round);              \
+               SBOX (which, b0, b1, b2, b3, b4);                       \
+       LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4);        \
+               LINEAR_TRANSFORMATION (nb0, nb1, nb2, nb3, nb4);
+
+/* Apply the last Serpent round to sixteen parallel blocks.  This macro
+   increments `round'.  */
+#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+                                b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round);              \
+       SBOX (which, a0, a1, a2, a3, a4);                       \
+               BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round);              \
+               SBOX (which, b0, b1, b2, b3, b4);                       \
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, ((round) + 1));         \
+               BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, ((round) + 1));
+
+/* Apply an inverse Serpent round to sixteen parallel blocks.  This macro
+   increments `round'.  */
+#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \
+                                   na0, na1, na2, na3, na4, \
+                                   b0, b1, b2, b3, b4, \
+                                   nb0, nb1, nb2, nb3, nb4) \
+       LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4);     \
+               LINEAR_TRANSFORMATION_INVERSE (b0, b1, b2, b3, b4);     \
+       SBOX_INVERSE (which, a0, a1, a2, a3, a4);               \
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round);         \
+               SBOX_INVERSE (which, b0, b1, b2, b3, b4);               \
+               BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+/* Apply the first inverse Serpent round to sixteen parallel blocks.  This 
macro
+   increments `round'.  */
+#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \
+                                         na0, na1, na2, na3, na4, \
+                                         b0, b1, b2, b3, b4, \
+                                         nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, ((round) + 1));      \
+               BLOCK_XOR_KEY (b0, b1, b2, b3, b4, ((round) + 1));      \
+       SBOX_INVERSE (which, a0, a1, a2, a3, a4);       \
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \
+               SBOX_INVERSE (which, b0, b1, b2, b3, b4);       \
+               BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+.text
+
+.align 8
+ELF(.type   __serpent_enc_blk16,@function;)
+__serpent_enc_blk16:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+        *                                              plaintext blocks
+        * output:
+        *      RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: sixteen parallel
+        *                                              ciphertext blocks
+        */
+       CFI_STARTPROC();
+
+       vpcmpeqd RNOT, RNOT, RNOT;
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+       ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+                    RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+       ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+                    RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+       ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+                    RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+       ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+                    RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+       ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+                    RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+       ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+                    RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+       ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+                    RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+       ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+                    RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+       ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0,
+                    RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0);
+       ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0,
+                    RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0);
+       ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2,
+                     RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2);
+       ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4,
+                     RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4);
+       ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0,
+                     RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0);
+       ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0,
+                     RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0);
+       ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3,
+                     RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3);
+       ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0,
+                     RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0);
+       ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4,
+                     RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4);
+       ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4,
+                     RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4);
+       ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2,
+                     RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2);
+       ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3,
+                     RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3);
+       ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4,
+                     RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4);
+       ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4,
+                     RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4);
+       ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0,
+                     RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0);
+       ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4,
+                     RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4);
+       ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+                     RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+       ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+                     RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+       ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+                     RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+       ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+                     RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+       ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+                     RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+       ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+                     RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+       ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+                     RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+       ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+                          RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+
+       transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1);
+       transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __serpent_enc_blk16,.-__serpent_enc_blk16;)
+
+.align 8
+ELF(.type   __serpent_dec_blk16,@function;)
+__serpent_dec_blk16:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+        *                                              ciphertext blocks
+        * output:
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+        *                                              plaintext blocks
+        */
+       CFI_STARTPROC();
+
+       vpcmpeqd RNOT, RNOT, RNOT;
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+       ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4,
+                                   RA3, RA0, RA1, RA4, RA2,
+                                   RB0, RB1, RB2, RB3, RB4,
+                                   RB3, RB0, RB1, RB4, RB2);
+       ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3,
+                             RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3);
+       ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0,
+                             RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0);
+       ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3,
+                             RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3);
+       ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3,
+                             RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3);
+       ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4,
+                             RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4);
+       ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3,
+                             RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3);
+       ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1,
+                             RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1);
+       ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2,
+                             RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2);
+       ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0,
+                             RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0);
+       ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4,
+                             RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4);
+       ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0,
+                             RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0);
+       ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0,
+                             RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0);
+       ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1,
+                             RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1);
+       ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0,
+                             RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0);
+       ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3,
+                             RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3);
+       ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2,
+                             RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2);
+       ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4,
+                             RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4);
+       ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1,
+                             RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1);
+       ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4,
+                             RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4);
+       ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4,
+                             RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4);
+       ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3,
+                             RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3);
+       ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4,
+                            RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4);
+       ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0,
+                            RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0);
+       ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2,
+                            RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2);
+       ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1,
+                            RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1);
+       ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3,
+                            RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3);
+       ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1,
+                            RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1);
+       ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1,
+                            RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1);
+       ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0,
+                            RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0);
+       ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1,
+                            RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1);
+       ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4,
+                            RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4);
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __serpent_dec_blk16,.-__serpent_dec_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+       vpcmpeqq minus_one, x, tmp; \
+       vpsubq minus_one, x, x; \
+       vpslldq $8, tmp, tmp; \
+       vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_serpent_avx2_ctr_enc
+ELF(.type   _gcry_serpent_avx2_ctr_enc,@function;)
+_gcry_serpent_avx2_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+
+       movq 8(%rcx), %rax;
+       bswapq %rax;
+
+       vzeroupper;
+
+       vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+       vpcmpeqd RNOT, RNOT, RNOT;
+       vpsrldq $8, RNOT, RNOT;   /* ab: -1:0 ; cd: -1:0 */
+       vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+       /* load IV and byteswap */
+       vmovdqu (%rcx), RTMP4x;
+       vpshufb RTMP3x, RTMP4x, RTMP4x;
+       vmovdqa RTMP4x, RTMP0x;
+       inc_le128(RTMP4x, RNOTx, RTMP1x);
+       vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+       vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+       /* check need for handling 64-bit overflow and carry */
+       cmpq $(0xffffffffffffffff - 16), %rax;
+       ja .Lhandle_ctr_carry;
+
+       /* construct IVs */
+       vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+       vpshufb RTMP3, RTMP0, RA1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+       vpshufb RTMP3, RTMP0, RA2;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+       vpshufb RTMP3, RTMP0, RA3;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+       vpshufb RTMP3, RTMP0, RB0;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+       vpshufb RTMP3, RTMP0, RB1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+       vpshufb RTMP3, RTMP0, RB2;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+       vpshufb RTMP3, RTMP0, RB3;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+       vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+       jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+       /* construct IVs */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vextracti128 $1, RTMP0, RTMP0x;
+       vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+       /* store new IV */
+       vmovdqu RTMP0x, (%rcx);
+
+       call __serpent_enc_blk16;
+
+       vpxor (0 * 32)(%rdx), RA4, RA4;
+       vpxor (1 * 32)(%rdx), RA1, RA1;
+       vpxor (2 * 32)(%rdx), RA2, RA2;
+       vpxor (3 * 32)(%rdx), RA0, RA0;
+       vpxor (4 * 32)(%rdx), RB4, RB4;
+       vpxor (5 * 32)(%rdx), RB1, RB1;
+       vpxor (6 * 32)(%rdx), RB2, RB2;
+       vpxor (7 * 32)(%rdx), RB0, RB0;
+
+       vmovdqu RA4, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA0, (3 * 32)(%rsi);
+       vmovdqu RB4, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB0, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ctr_enc,.-_gcry_serpent_avx2_ctr_enc;)
+
+.align 8
+.globl _gcry_serpent_avx2_cbc_dec
+ELF(.type   _gcry_serpent_avx2_cbc_dec,@function;)
+_gcry_serpent_avx2_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       vmovdqu (0 * 32)(%rdx), RA0;
+       vmovdqu (1 * 32)(%rdx), RA1;
+       vmovdqu (2 * 32)(%rdx), RA2;
+       vmovdqu (3 * 32)(%rdx), RA3;
+       vmovdqu (4 * 32)(%rdx), RB0;
+       vmovdqu (5 * 32)(%rdx), RB1;
+       vmovdqu (6 * 32)(%rdx), RB2;
+       vmovdqu (7 * 32)(%rdx), RB3;
+
+       call __serpent_dec_blk16;
+
+       vmovdqu (%rcx), RNOTx;
+       vinserti128 $1, (%rdx), RNOT, RNOT;
+       vpxor RNOT, RA0, RA0;
+       vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
+       vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
+       vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
+       vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
+       vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+       vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
+       vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
+       vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+       vmovdqu RNOTx, (%rcx); /* store new IV */
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA3, (3 * 32)(%rsi);
+       vmovdqu RB0, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB3, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_cbc_dec,.-_gcry_serpent_avx2_cbc_dec;)
+
+.align 8
+.globl _gcry_serpent_avx2_cfb_dec
+ELF(.type   _gcry_serpent_avx2_cfb_dec,@function;)
+_gcry_serpent_avx2_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       /* Load input */
+       vmovdqu (%rcx), RNOTx;
+       vinserti128 $1, (%rdx), RNOT, RA0;
+       vmovdqu (0 * 32 + 16)(%rdx), RA1;
+       vmovdqu (1 * 32 + 16)(%rdx), RA2;
+       vmovdqu (2 * 32 + 16)(%rdx), RA3;
+       vmovdqu (3 * 32 + 16)(%rdx), RB0;
+       vmovdqu (4 * 32 + 16)(%rdx), RB1;
+       vmovdqu (5 * 32 + 16)(%rdx), RB2;
+       vmovdqu (6 * 32 + 16)(%rdx), RB3;
+
+       /* Update IV */
+       vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+       vmovdqu RNOTx, (%rcx);
+
+       call __serpent_enc_blk16;
+
+       vpxor (0 * 32)(%rdx), RA4, RA4;
+       vpxor (1 * 32)(%rdx), RA1, RA1;
+       vpxor (2 * 32)(%rdx), RA2, RA2;
+       vpxor (3 * 32)(%rdx), RA0, RA0;
+       vpxor (4 * 32)(%rdx), RB4, RB4;
+       vpxor (5 * 32)(%rdx), RB1, RB1;
+       vpxor (6 * 32)(%rdx), RB2, RB2;
+       vpxor (7 * 32)(%rdx), RB0, RB0;
+
+       vmovdqu RA4, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA0, (3 * 32)(%rsi);
+       vmovdqu RB4, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB0, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_cfb_dec,.-_gcry_serpent_avx2_cfb_dec;)
+
+.align 8
+.globl _gcry_serpent_avx2_ocb_enc
+ELF(.type _gcry_serpent_avx2_ocb_enc,@function;)
+
+_gcry_serpent_avx2_ocb_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rcx), RTMP0x;
+       vmovdqu (%r8), RTMP1x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RTMP1, RTMP1; \
+         vpxor yreg, RNOT, yreg; \
+         vmovdqu RNOT, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RA1);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, RA2);
+       OCB_INPUT(3, %r12, %r13, RA3);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, RB0);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, RB2);
+       OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vmovdqu RTMP0x, (%rcx);
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%r8);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __serpent_enc_blk16;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vpxor (0 * 32)(%rsi), RA4, RA4;
+       vpxor (1 * 32)(%rsi), RA1, RA1;
+       vpxor (2 * 32)(%rsi), RA2, RA2;
+       vpxor (3 * 32)(%rsi), RA0, RA0;
+       vpxor (4 * 32)(%rsi), RB4, RB4;
+       vpxor (5 * 32)(%rsi), RB1, RB1;
+       vpxor (6 * 32)(%rsi), RB2, RB2;
+       vpxor (7 * 32)(%rsi), RB0, RB0;
+
+       vmovdqu RA4, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA0, (3 * 32)(%rsi);
+       vmovdqu RB4, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB0, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ocb_enc,.-_gcry_serpent_avx2_ocb_enc;)
+
+.align 8
+.globl _gcry_serpent_avx2_ocb_dec
+ELF(.type _gcry_serpent_avx2_ocb_dec,@function;)
+
+_gcry_serpent_avx2_ocb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rcx), RTMP0x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RNOT, yreg; \
+         vmovdqu RNOT, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RA1);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, RA2);
+       OCB_INPUT(3, %r12, %r13, RA3);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, RB0);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, RB2);
+       OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0x, (%rcx);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __serpent_dec_blk16;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vmovdqu (%r8), RTMP1x;
+
+       vpxor (0 * 32)(%rsi), RA0, RA0;
+       vpxor (1 * 32)(%rsi), RA1, RA1;
+       vpxor (2 * 32)(%rsi), RA2, RA2;
+       vpxor (3 * 32)(%rsi), RA3, RA3;
+       vpxor (4 * 32)(%rsi), RB0, RB0;
+       vpxor (5 * 32)(%rsi), RB1, RB1;
+       vpxor (6 * 32)(%rsi), RB2, RB2;
+       vpxor (7 * 32)(%rsi), RB3, RB3;
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vpxor RA0, RTMP1, RTMP1;
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vpxor RA1, RTMP1, RTMP1;
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vpxor RA2, RTMP1, RTMP1;
+       vmovdqu RA3, (3 * 32)(%rsi);
+       vpxor RA3, RTMP1, RTMP1;
+       vmovdqu RB0, (4 * 32)(%rsi);
+       vpxor RB0, RTMP1, RTMP1;
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vpxor RB1, RTMP1, RTMP1;
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vpxor RB2, RTMP1, RTMP1;
+       vmovdqu RB3, (7 * 32)(%rsi);
+       vpxor RB3, RTMP1, RTMP1;
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%r8);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ocb_dec,.-_gcry_serpent_avx2_ocb_dec;)
+
+.align 8
+.globl _gcry_serpent_avx2_ocb_auth
+ELF(.type _gcry_serpent_avx2_ocb_auth,@function;)
+
+_gcry_serpent_avx2_ocb_auth:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: abuf (16 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rdx), RTMP0x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rsi), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RNOT, yreg;
+
+       movq (0 * 8)(%r8), %r10;
+       movq (1 * 8)(%r8), %r11;
+       movq (2 * 8)(%r8), %r12;
+       movq (3 * 8)(%r8), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RA1);
+       movq (4 * 8)(%r8), %r10;
+       movq (5 * 8)(%r8), %r11;
+       movq (6 * 8)(%r8), %r12;
+       movq (7 * 8)(%r8), %r13;
+       OCB_INPUT(2, %r10, %r11, RA2);
+       OCB_INPUT(3, %r12, %r13, RA3);
+       movq (8 * 8)(%r8), %r10;
+       movq (9 * 8)(%r8), %r11;
+       movq (10 * 8)(%r8), %r12;
+       movq (11 * 8)(%r8), %r13;
+       OCB_INPUT(4, %r10, %r11, RB0);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r8), %r10;
+       movq (13 * 8)(%r8), %r11;
+       movq (14 * 8)(%r8), %r12;
+       movq (15 * 8)(%r8), %r13;
+       OCB_INPUT(6, %r10, %r11, RB2);
+       OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0x, (%rdx);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __serpent_enc_blk16;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vpxor RA4, RB4, RA4;
+       vpxor RA1, RB1, RA1;
+       vpxor RA2, RB2, RA2;
+       vpxor RA0, RB0, RA0;
+
+       vpxor RA4, RA1, RA1;
+       vpxor RA2, RA0, RA0;
+
+       vpxor RA1, RA0, RTMP1;
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vpxor (%rcx), RTMP1x, RTMP1x;
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%rcx);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_avx2_ocb_auth,.-_gcry_serpent_avx2_ocb_auth;)
+
+.align 16
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+#endif /*defined(USE_SERPENT) && defined(ENABLE_AVX2_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/serpent-sse2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/serpent-sse2-amd64.S
new file mode 100644
index 000000000..b59350951
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/serpent-sse2-amd64.S
@@ -0,0 +1,1211 @@
+/* serpent-sse2-amd64.S  -  SSE2 implementation of Serpent cipher
+ *
+ * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_SERPENT)
+
+#include "asm-common-amd64.h"
+
+/* struct serpent_context: */
+#define ctx_keys 0
+
+/* register macros */
+#define CTX %rdi
+
+/* vector registers */
+#define RA0 %xmm0
+#define RA1 %xmm1
+#define RA2 %xmm2
+#define RA3 %xmm3
+#define RA4 %xmm4
+
+#define RB0 %xmm5
+#define RB1 %xmm6
+#define RB2 %xmm7
+#define RB3 %xmm8
+#define RB4 %xmm9
+
+#define RNOT %xmm10
+#define RTMP0 %xmm11
+#define RTMP1 %xmm12
+#define RTMP2 %xmm13
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+/* vector 32-bit rotation to left */
+#define vec_rol(reg, nleft, tmp) \
+       movdqa reg, tmp;                \
+       pslld $(nleft), tmp;            \
+       psrld $(32 - (nleft)), reg;     \
+       por tmp, reg;
+
+/* vector 32-bit rotation to right */
+#define vec_ror(reg, nright, tmp) \
+       vec_rol(reg, 32 - nright, tmp)
+
+/* 4x4 32-bit integer matrix transpose */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \
+       movdqa    x0, t2; \
+       punpckhdq x1, t2; \
+       punpckldq x1, x0; \
+       \
+       movdqa    x2, t1; \
+       punpckldq x3, t1; \
+       punpckhdq x3, x2; \
+       \
+       movdqa     x0, x1; \
+       punpckhqdq t1, x1; \
+       punpcklqdq t1, x0; \
+       \
+       movdqa     t2, x3; \
+       punpckhqdq x2, x3; \
+       punpcklqdq x2, t2; \
+       movdqa     t2, x2;
+
+/* fill xmm register with 32-bit value from memory */
+#define pbroadcastd(mem32, xreg) \
+       movd mem32, xreg; \
+       pshufd $0, xreg, xreg;
+
+/* xor with unaligned memory operand */
+#define pxor_u(umem128, xreg, t) \
+       movdqu umem128, t; \
+       pxor t, xreg;
+
+/* 128-bit wide byte swap */
+#define pbswap(xreg, t0) \
+       /* reorder 32-bit words, [a,b,c,d] => [d,c,b,a] */ \
+       pshufd $0x1b, xreg, xreg; \
+       /* reorder high&low 16-bit words, [d0,d1,c0,c1] => [d1,d0,c1,c0] */ \
+       pshuflw $0xb1, xreg, xreg; \
+       pshufhw $0xb1, xreg, xreg; \
+       /* reorder bytes in 16-bit words */ \
+       movdqa xreg, t0; \
+       psrlw $8, t0; \
+       psllw $8, xreg; \
+       por t0, xreg;
+
+/**********************************************************************
+  8-way serpent
+ **********************************************************************/
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ *   (New York, New York, USA), p. 317–329, National Institute of Standards and
+ *   Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
+#define SBOX0(r0, r1, r2, r3, r4) \
+       pxor    r0, r3;         movdqa  r1, r4;         \
+       pand    r3, r1;         pxor    r2, r4;         \
+       pxor    r0, r1;         por     r3, r0;         \
+       pxor    r4, r0;         pxor    r3, r4;         \
+       pxor    r2, r3;         por     r1, r2;         \
+       pxor    r4, r2;         pxor    RNOT, r4;       \
+       por     r1, r4;         pxor    r3, r1;         \
+       pxor    r4, r1;         por     r0, r3;         \
+       pxor    r3, r1;         pxor    r3, r4;
+
+#define SBOX0_INVERSE(r0, r1, r2, r3, r4) \
+       pxor    RNOT, r2;       movdqa  r1, r4;         \
+       por     r0, r1;         pxor    RNOT, r4;       \
+       pxor    r2, r1;         por     r4, r2;         \
+       pxor    r3, r1;         pxor    r4, r0;         \
+       pxor    r0, r2;         pand    r3, r0;         \
+       pxor    r0, r4;         por     r1, r0;         \
+       pxor    r2, r0;         pxor    r4, r3;         \
+       pxor    r1, r2;         pxor    r0, r3;         \
+       pxor    r1, r3; \
+       pand    r3, r2; \
+       pxor    r2, r4;
+
+#define SBOX1(r0, r1, r2, r3, r4) \
+       pxor    RNOT, r0;       pxor    RNOT, r2;       \
+       movdqa  r0, r4;         pand    r1, r0;         \
+       pxor    r0, r2;         por     r3, r0;         \
+       pxor    r2, r3;         pxor    r0, r1;         \
+       pxor    r4, r0;         por     r1, r4;         \
+       pxor    r3, r1;         por     r0, r2;         \
+       pand    r4, r2;         pxor    r1, r0;         \
+       pand    r2, r1; \
+       pxor    r0, r1;         pand    r2, r0;         \
+       pxor    r4, r0;
+
+#define SBOX1_INVERSE(r0, r1, r2, r3, r4) \
+       movdqa  r1, r4;         pxor    r3, r1;         \
+       pand    r1, r3;         pxor    r2, r4;         \
+       pxor    r0, r3;         por     r1, r0;         \
+       pxor    r3, r2;         pxor    r4, r0;         \
+       por     r2, r0;         pxor    r3, r1;         \
+       pxor    r1, r0;         por     r3, r1;         \
+       pxor    r0, r1;         pxor    RNOT, r4;       \
+       pxor    r1, r4;         por     r0, r1;         \
+       pxor    r0, r1; \
+       por     r4, r1; \
+       pxor    r1, r3;
+
+#define SBOX2(r0, r1, r2, r3, r4) \
+       movdqa  r0, r4;         pand    r2, r0;         \
+       pxor    r3, r0;         pxor    r1, r2;         \
+       pxor    r0, r2;         por     r4, r3;         \
+       pxor    r1, r3;         pxor    r2, r4;         \
+       movdqa  r3, r1;         por     r4, r3;         \
+       pxor    r0, r3;         pand    r1, r0;         \
+       pxor    r0, r4;         pxor    r3, r1;         \
+       pxor    r4, r1;         pxor    RNOT, r4;
+
+#define SBOX2_INVERSE(r0, r1, r2, r3, r4) \
+       pxor    r3, r2;         pxor    r0, r3;         \
+       movdqa  r3, r4;         pand    r2, r3;         \
+       pxor    r1, r3;         por     r2, r1;         \
+       pxor    r4, r1;         pand    r3, r4;         \
+       pxor    r3, r2;         pand    r0, r4;         \
+       pxor    r2, r4;         pand    r1, r2;         \
+       por     r0, r2;         pxor    RNOT, r3;       \
+       pxor    r3, r2;         pxor    r3, r0;         \
+       pand    r1, r0;         pxor    r4, r3;         \
+       pxor    r0, r3;
+
+#define SBOX3(r0, r1, r2, r3, r4) \
+       movdqa  r0, r4;         por     r3, r0;         \
+       pxor    r1, r3;         pand    r4, r1;         \
+       pxor    r2, r4;         pxor    r3, r2;         \
+       pand    r0, r3;         por     r1, r4;         \
+       pxor    r4, r3;         pxor    r1, r0;         \
+       pand    r0, r4;         pxor    r3, r1;         \
+       pxor    r2, r4;         por     r0, r1;         \
+       pxor    r2, r1;         pxor    r3, r0;         \
+       movdqa  r1, r2;         por     r3, r1;         \
+       pxor    r0, r1;
+
+#define SBOX3_INVERSE(r0, r1, r2, r3, r4) \
+       movdqa  r2, r4;         pxor    r1, r2;         \
+       pxor    r2, r0;         pand    r2, r4;         \
+       pxor    r0, r4;         pand    r1, r0;         \
+       pxor    r3, r1;         por     r4, r3;         \
+       pxor    r3, r2;         pxor    r3, r0;         \
+       pxor    r4, r1;         pand    r2, r3;         \
+       pxor    r1, r3;         pxor    r0, r1;         \
+       por     r2, r1;         pxor    r3, r0;         \
+       pxor    r4, r1; \
+       pxor    r1, r0;
+
+#define SBOX4(r0, r1, r2, r3, r4) \
+       pxor    r3, r1;         pxor    RNOT, r3;       \
+       pxor    r3, r2;         pxor    r0, r3;         \
+       movdqa  r1, r4;         pand    r3, r1;         \
+       pxor    r2, r1;         pxor    r3, r4;         \
+       pxor    r4, r0;         pand    r4, r2;         \
+       pxor    r0, r2;         pand    r1, r0;         \
+       pxor    r0, r3;         por     r1, r4;         \
+       pxor    r0, r4;         por     r3, r0;         \
+       pxor    r2, r0;         pand    r3, r2;         \
+       pxor    RNOT, r0;       pxor    r2, r4;
+
+#define SBOX4_INVERSE(r0, r1, r2, r3, r4) \
+       movdqa  r2, r4;         pand    r3, r2;         \
+       pxor    r1, r2;         por     r3, r1;         \
+       pand    r0, r1;         pxor    r2, r4;         \
+       pxor    r1, r4;         pand    r2, r1;         \
+       pxor    RNOT, r0;       pxor    r4, r3;         \
+       pxor    r3, r1;         pand    r0, r3;         \
+       pxor    r2, r3;         pxor    r1, r0;         \
+       pand    r0, r2;         pxor    r0, r3;         \
+       pxor    r4, r2; \
+       por     r3, r2;         pxor    r0, r3;         \
+       pxor    r1, r2;
+
+#define SBOX5(r0, r1, r2, r3, r4) \
+       pxor    r1, r0;         pxor    r3, r1;         \
+       pxor    RNOT, r3;       movdqa  r1, r4;         \
+       pand    r0, r1;         pxor    r3, r2;         \
+       pxor    r2, r1;         por     r4, r2;         \
+       pxor    r3, r4;         pand    r1, r3;         \
+       pxor    r0, r3;         pxor    r1, r4;         \
+       pxor    r2, r4;         pxor    r0, r2;         \
+       pand    r3, r0;         pxor    RNOT, r2;       \
+       pxor    r4, r0;         por     r3, r4;         \
+       pxor    r4, r2;
+
+#define SBOX5_INVERSE(r0, r1, r2, r3, r4) \
+       pxor    RNOT, r1;       movdqa  r3, r4;         \
+       pxor    r1, r2;         por     r0, r3;         \
+       pxor    r2, r3;         por     r1, r2;         \
+       pand    r0, r2;         pxor    r3, r4;         \
+       pxor    r4, r2;         por     r0, r4;         \
+       pxor    r1, r4;         pand    r2, r1;         \
+       pxor    r3, r1;         pxor    r2, r4;         \
+       pand    r4, r3;         pxor    r1, r4;         \
+       pxor    r4, r3;         pxor    RNOT, r4;       \
+       pxor    r0, r3;
+
+#define SBOX6(r0, r1, r2, r3, r4) \
+       pxor    RNOT, r2;       movdqa  r3, r4;         \
+       pand    r0, r3;         pxor    r4, r0;         \
+       pxor    r2, r3;         por     r4, r2;         \
+       pxor    r3, r1;         pxor    r0, r2;         \
+       por     r1, r0;         pxor    r1, r2;         \
+       pxor    r0, r4;         por     r3, r0;         \
+       pxor    r2, r0;         pxor    r3, r4;         \
+       pxor    r0, r4;         pxor    RNOT, r3;       \
+       pand    r4, r2; \
+       pxor    r3, r2;
+
+#define SBOX6_INVERSE(r0, r1, r2, r3, r4) \
+       pxor    r2, r0;         movdqa  r2, r4;         \
+       pand    r0, r2;         pxor    r3, r4;         \
+       pxor    RNOT, r2;       pxor    r1, r3;         \
+       pxor    r3, r2;         por     r0, r4;         \
+       pxor    r2, r0;         pxor    r4, r3;         \
+       pxor    r1, r4;         pand    r3, r1;         \
+       pxor    r0, r1;         pxor    r3, r0;         \
+       por     r2, r0;         pxor    r1, r3;         \
+       pxor    r0, r4;
+
+#define SBOX7(r0, r1, r2, r3, r4) \
+       movdqa  r1, r4;         por     r2, r1;         \
+       pxor    r3, r1;         pxor    r2, r4;         \
+       pxor    r1, r2;         por     r4, r3;         \
+       pand    r0, r3;         pxor    r2, r4;         \
+       pxor    r1, r3;         por     r4, r1;         \
+       pxor    r0, r1;         por     r4, r0;         \
+       pxor    r2, r0;         pxor    r4, r1;         \
+       pxor    r1, r2;         pand    r0, r1;         \
+       pxor    r4, r1;         pxor    RNOT, r2;       \
+       por     r0, r2; \
+       pxor    r2, r4;
+
+#define SBOX7_INVERSE(r0, r1, r2, r3, r4) \
+       movdqa  r2, r4;         pxor    r0, r2;         \
+       pand    r3, r0;         por     r3, r4;         \
+       pxor    RNOT, r2;       pxor    r1, r3;         \
+       por     r0, r1;         pxor    r2, r0;         \
+       pand    r4, r2;         pand    r4, r3;         \
+       pxor    r2, r1;         pxor    r0, r2;         \
+       por     r2, r0;         pxor    r1, r4;         \
+       pxor    r3, r0;         pxor    r4, r3;         \
+       por     r0, r4;         pxor    r2, r3;         \
+       pxor    r2, r4;
+
+/* Apply SBOX number WHICH to to the block.  */
+#define SBOX(which, r0, r1, r2, r3, r4) \
+       SBOX##which (r0, r1, r2, r3, r4)
+
+/* Apply inverse SBOX number WHICH to to the block.  */
+#define SBOX_INVERSE(which, r0, r1, r2, r3, r4) \
+       SBOX##which##_INVERSE (r0, r1, r2, r3, r4)
+
+/* XOR round key into block state in r0,r1,r2,r3. r4 used as temporary.  */
+#define BLOCK_XOR_KEY(r0, r1, r2, r3, r4, round) \
+       pbroadcastd ((ctx_keys + (round) * 16 + 0 * 4)(CTX), r4); \
+       pxor r4, r0; \
+       pbroadcastd ((ctx_keys + (round) * 16 + 1 * 4)(CTX), r4); \
+       pxor r4, r1; \
+       pbroadcastd ((ctx_keys + (round) * 16 + 2 * 4)(CTX), r4); \
+       pxor r4, r2; \
+       pbroadcastd ((ctx_keys + (round) * 16 + 3 * 4)(CTX), r4); \
+       pxor r4, r3;
+
+/* Apply the linear transformation to BLOCK.  */
+#define LINEAR_TRANSFORMATION(r0, r1, r2, r3, r4) \
+       vec_rol(r0, 13, r4);    \
+       vec_rol(r2, 3, r4);     \
+       pxor r0, r1;            \
+       pxor r2, r1;            \
+       movdqa r0, r4;          \
+       pslld $3, r4;           \
+       pxor r2, r3;            \
+       pxor r4, r3;            \
+       vec_rol(r1, 1, r4);     \
+       vec_rol(r3, 7, r4);     \
+       pxor r1, r0;            \
+       pxor r3, r0;            \
+       movdqa r1, r4;          \
+       pslld $7, r4;           \
+       pxor r3, r2;            \
+       pxor r4, r2;            \
+       vec_rol(r0, 5, r4);     \
+       vec_rol(r2, 22, r4);
+
+/* Apply the inverse linear transformation to BLOCK.  */
+#define LINEAR_TRANSFORMATION_INVERSE(r0, r1, r2, r3, r4) \
+       vec_ror(r2, 22, r4);    \
+       vec_ror(r0, 5, r4);     \
+       movdqa r1, r4;          \
+       pslld $7, r4;           \
+       pxor r3, r2;            \
+       pxor r4, r2;            \
+       pxor r1, r0;            \
+       pxor r3, r0;            \
+       vec_ror(r3, 7, r4);     \
+       vec_ror(r1, 1, r4);     \
+       movdqa r0, r4;          \
+       pslld $3, r4;           \
+       pxor r2, r3;            \
+       pxor r4, r3;            \
+       pxor r0, r1;            \
+       pxor r2, r1;            \
+       vec_ror(r2, 3, r4);     \
+       vec_ror(r0, 13, r4);
+
+/* Apply a Serpent round to eight parallel blocks.  This macro increments
+   `round'.  */
+#define ROUND(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+                           b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round);              \
+       SBOX (which, a0, a1, a2, a3, a4);                       \
+               BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round);              \
+               SBOX (which, b0, b1, b2, b3, b4);                       \
+       LINEAR_TRANSFORMATION (na0, na1, na2, na3, na4);        \
+               LINEAR_TRANSFORMATION (nb0, nb1, nb2, nb3, nb4);
+
+/* Apply the last Serpent round to eight parallel blocks.  This macro 
increments
+   `round'.  */
+#define ROUND_LAST(round, which, a0, a1, a2, a3, a4, na0, na1, na2, na3, na4, \
+                                b0, b1, b2, b3, b4, nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, round);              \
+       SBOX (which, a0, a1, a2, a3, a4);                       \
+               BLOCK_XOR_KEY (b0, b1, b2, b3, b4, round);              \
+               SBOX (which, b0, b1, b2, b3, b4);                       \
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, ((round) + 1));         \
+               BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, ((round) + 1));
+
+/* Apply an inverse Serpent round to eight parallel blocks.  This macro
+   increments `round'.  */
+#define ROUND_INVERSE(round, which, a0, a1, a2, a3, a4, \
+                                   na0, na1, na2, na3, na4, \
+                                   b0, b1, b2, b3, b4, \
+                                   nb0, nb1, nb2, nb3, nb4) \
+       LINEAR_TRANSFORMATION_INVERSE (a0, a1, a2, a3, a4);     \
+               LINEAR_TRANSFORMATION_INVERSE (b0, b1, b2, b3, b4);     \
+       SBOX_INVERSE (which, a0, a1, a2, a3, a4);               \
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round);         \
+               SBOX_INVERSE (which, b0, b1, b2, b3, b4);               \
+               BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+/* Apply the first inverse Serpent round to eight parallel blocks.  This macro
+   increments `round'.  */
+#define ROUND_FIRST_INVERSE(round, which, a0, a1, a2, a3, a4, \
+                                         na0, na1, na2, na3, na4, \
+                                         b0, b1, b2, b3, b4, \
+                                         nb0, nb1, nb2, nb3, nb4) \
+       BLOCK_XOR_KEY (a0, a1, a2, a3, a4, ((round) + 1));      \
+               BLOCK_XOR_KEY (b0, b1, b2, b3, b4, ((round) + 1));      \
+       SBOX_INVERSE (which, a0, a1, a2, a3, a4);       \
+       BLOCK_XOR_KEY (na0, na1, na2, na3, na4, round); \
+               SBOX_INVERSE (which, b0, b1, b2, b3, b4);       \
+               BLOCK_XOR_KEY (nb0, nb1, nb2, nb3, nb4, round);
+
+.text
+
+.align 8
+ELF(.type   __serpent_enc_blk8,@function;)
+__serpent_enc_blk8:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+        *                                              blocks
+        * output:
+        *      RA4, RA1, RA2, RA0, RB4, RB1, RB2, RB0: eight parallel
+        *                                              ciphertext blocks
+        */
+       CFI_STARTPROC();
+
+       pcmpeqd RNOT, RNOT;
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+       ROUND (0, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+                    RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+       ROUND (1, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+                    RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+       ROUND (2, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+                    RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+       ROUND (3, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+                    RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+       ROUND (4, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+                    RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+       ROUND (5, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+                    RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+       ROUND (6, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+                    RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+       ROUND (7, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+                    RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+       ROUND (8, 0, RA4, RA1, RA2, RA0, RA3, RA1, RA3, RA2, RA4, RA0,
+                    RB4, RB1, RB2, RB0, RB3, RB1, RB3, RB2, RB4, RB0);
+       ROUND (9, 1, RA1, RA3, RA2, RA4, RA0, RA2, RA1, RA4, RA3, RA0,
+                    RB1, RB3, RB2, RB4, RB0, RB2, RB1, RB4, RB3, RB0);
+       ROUND (10, 2, RA2, RA1, RA4, RA3, RA0, RA4, RA3, RA1, RA0, RA2,
+                     RB2, RB1, RB4, RB3, RB0, RB4, RB3, RB1, RB0, RB2);
+       ROUND (11, 3, RA4, RA3, RA1, RA0, RA2, RA3, RA1, RA0, RA2, RA4,
+                     RB4, RB3, RB1, RB0, RB2, RB3, RB1, RB0, RB2, RB4);
+       ROUND (12, 4, RA3, RA1, RA0, RA2, RA4, RA1, RA4, RA3, RA2, RA0,
+                     RB3, RB1, RB0, RB2, RB4, RB1, RB4, RB3, RB2, RB0);
+       ROUND (13, 5, RA1, RA4, RA3, RA2, RA0, RA4, RA2, RA1, RA3, RA0,
+                     RB1, RB4, RB3, RB2, RB0, RB4, RB2, RB1, RB3, RB0);
+       ROUND (14, 6, RA4, RA2, RA1, RA3, RA0, RA4, RA2, RA0, RA1, RA3,
+                     RB4, RB2, RB1, RB3, RB0, RB4, RB2, RB0, RB1, RB3);
+       ROUND (15, 7, RA4, RA2, RA0, RA1, RA3, RA3, RA1, RA2, RA4, RA0,
+                     RB4, RB2, RB0, RB1, RB3, RB3, RB1, RB2, RB4, RB0);
+       ROUND (16, 0, RA3, RA1, RA2, RA4, RA0, RA1, RA0, RA2, RA3, RA4,
+                     RB3, RB1, RB2, RB4, RB0, RB1, RB0, RB2, RB3, RB4);
+       ROUND (17, 1, RA1, RA0, RA2, RA3, RA4, RA2, RA1, RA3, RA0, RA4,
+                     RB1, RB0, RB2, RB3, RB4, RB2, RB1, RB3, RB0, RB4);
+       ROUND (18, 2, RA2, RA1, RA3, RA0, RA4, RA3, RA0, RA1, RA4, RA2,
+                     RB2, RB1, RB3, RB0, RB4, RB3, RB0, RB1, RB4, RB2);
+       ROUND (19, 3, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA4, RA2, RA3,
+                     RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB4, RB2, RB3);
+       ROUND (20, 4, RA0, RA1, RA4, RA2, RA3, RA1, RA3, RA0, RA2, RA4,
+                     RB0, RB1, RB4, RB2, RB3, RB1, RB3, RB0, RB2, RB4);
+       ROUND (21, 5, RA1, RA3, RA0, RA2, RA4, RA3, RA2, RA1, RA0, RA4,
+                     RB1, RB3, RB0, RB2, RB4, RB3, RB2, RB1, RB0, RB4);
+       ROUND (22, 6, RA3, RA2, RA1, RA0, RA4, RA3, RA2, RA4, RA1, RA0,
+                     RB3, RB2, RB1, RB0, RB4, RB3, RB2, RB4, RB1, RB0);
+       ROUND (23, 7, RA3, RA2, RA4, RA1, RA0, RA0, RA1, RA2, RA3, RA4,
+                     RB3, RB2, RB4, RB1, RB0, RB0, RB1, RB2, RB3, RB4);
+       ROUND (24, 0, RA0, RA1, RA2, RA3, RA4, RA1, RA4, RA2, RA0, RA3,
+                     RB0, RB1, RB2, RB3, RB4, RB1, RB4, RB2, RB0, RB3);
+       ROUND (25, 1, RA1, RA4, RA2, RA0, RA3, RA2, RA1, RA0, RA4, RA3,
+                     RB1, RB4, RB2, RB0, RB3, RB2, RB1, RB0, RB4, RB3);
+       ROUND (26, 2, RA2, RA1, RA0, RA4, RA3, RA0, RA4, RA1, RA3, RA2,
+                     RB2, RB1, RB0, RB4, RB3, RB0, RB4, RB1, RB3, RB2);
+       ROUND (27, 3, RA0, RA4, RA1, RA3, RA2, RA4, RA1, RA3, RA2, RA0,
+                     RB0, RB4, RB1, RB3, RB2, RB4, RB1, RB3, RB2, RB0);
+       ROUND (28, 4, RA4, RA1, RA3, RA2, RA0, RA1, RA0, RA4, RA2, RA3,
+                     RB4, RB1, RB3, RB2, RB0, RB1, RB0, RB4, RB2, RB3);
+       ROUND (29, 5, RA1, RA0, RA4, RA2, RA3, RA0, RA2, RA1, RA4, RA3,
+                     RB1, RB0, RB4, RB2, RB3, RB0, RB2, RB1, RB4, RB3);
+       ROUND (30, 6, RA0, RA2, RA1, RA4, RA3, RA0, RA2, RA3, RA1, RA4,
+                     RB0, RB2, RB1, RB4, RB3, RB0, RB2, RB3, RB1, RB4);
+       ROUND_LAST (31, 7, RA0, RA2, RA3, RA1, RA4, RA4, RA1, RA2, RA0, RA3,
+                          RB0, RB2, RB3, RB1, RB4, RB4, RB1, RB2, RB0, RB3);
+
+       transpose_4x4(RA4, RA1, RA2, RA0, RA3, RTMP0, RTMP1);
+       transpose_4x4(RB4, RB1, RB2, RB0, RB3, RTMP0, RTMP1);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __serpent_enc_blk8,.-__serpent_enc_blk8;)
+
+.align 8
+ELF(.type   __serpent_dec_blk8,@function;)
+__serpent_dec_blk8:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+        *                                              ciphertext blocks
+        * output:
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+        *                                              blocks
+        */
+       CFI_STARTPROC();
+
+       pcmpeqd RNOT, RNOT;
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+       ROUND_FIRST_INVERSE (31, 7, RA0, RA1, RA2, RA3, RA4,
+                                   RA3, RA0, RA1, RA4, RA2,
+                                   RB0, RB1, RB2, RB3, RB4,
+                                   RB3, RB0, RB1, RB4, RB2);
+       ROUND_INVERSE (30, 6, RA3, RA0, RA1, RA4, RA2, RA0, RA1, RA2, RA4, RA3,
+                             RB3, RB0, RB1, RB4, RB2, RB0, RB1, RB2, RB4, RB3);
+       ROUND_INVERSE (29, 5, RA0, RA1, RA2, RA4, RA3, RA1, RA3, RA4, RA2, RA0,
+                             RB0, RB1, RB2, RB4, RB3, RB1, RB3, RB4, RB2, RB0);
+       ROUND_INVERSE (28, 4, RA1, RA3, RA4, RA2, RA0, RA1, RA2, RA4, RA0, RA3,
+                             RB1, RB3, RB4, RB2, RB0, RB1, RB2, RB4, RB0, RB3);
+       ROUND_INVERSE (27, 3, RA1, RA2, RA4, RA0, RA3, RA4, RA2, RA0, RA1, RA3,
+                             RB1, RB2, RB4, RB0, RB3, RB4, RB2, RB0, RB1, RB3);
+       ROUND_INVERSE (26, 2, RA4, RA2, RA0, RA1, RA3, RA2, RA3, RA0, RA1, RA4,
+                             RB4, RB2, RB0, RB1, RB3, RB2, RB3, RB0, RB1, RB4);
+       ROUND_INVERSE (25, 1, RA2, RA3, RA0, RA1, RA4, RA4, RA2, RA1, RA0, RA3,
+                             RB2, RB3, RB0, RB1, RB4, RB4, RB2, RB1, RB0, RB3);
+       ROUND_INVERSE (24, 0, RA4, RA2, RA1, RA0, RA3, RA4, RA3, RA2, RA0, RA1,
+                             RB4, RB2, RB1, RB0, RB3, RB4, RB3, RB2, RB0, RB1);
+       ROUND_INVERSE (23, 7, RA4, RA3, RA2, RA0, RA1, RA0, RA4, RA3, RA1, RA2,
+                             RB4, RB3, RB2, RB0, RB1, RB0, RB4, RB3, RB1, RB2);
+       ROUND_INVERSE (22, 6, RA0, RA4, RA3, RA1, RA2, RA4, RA3, RA2, RA1, RA0,
+                             RB0, RB4, RB3, RB1, RB2, RB4, RB3, RB2, RB1, RB0);
+       ROUND_INVERSE (21, 5, RA4, RA3, RA2, RA1, RA0, RA3, RA0, RA1, RA2, RA4,
+                             RB4, RB3, RB2, RB1, RB0, RB3, RB0, RB1, RB2, RB4);
+       ROUND_INVERSE (20, 4, RA3, RA0, RA1, RA2, RA4, RA3, RA2, RA1, RA4, RA0,
+                             RB3, RB0, RB1, RB2, RB4, RB3, RB2, RB1, RB4, RB0);
+       ROUND_INVERSE (19, 3, RA3, RA2, RA1, RA4, RA0, RA1, RA2, RA4, RA3, RA0,
+                             RB3, RB2, RB1, RB4, RB0, RB1, RB2, RB4, RB3, RB0);
+       ROUND_INVERSE (18, 2, RA1, RA2, RA4, RA3, RA0, RA2, RA0, RA4, RA3, RA1,
+                             RB1, RB2, RB4, RB3, RB0, RB2, RB0, RB4, RB3, RB1);
+       ROUND_INVERSE (17, 1, RA2, RA0, RA4, RA3, RA1, RA1, RA2, RA3, RA4, RA0,
+                             RB2, RB0, RB4, RB3, RB1, RB1, RB2, RB3, RB4, RB0);
+       ROUND_INVERSE (16, 0, RA1, RA2, RA3, RA4, RA0, RA1, RA0, RA2, RA4, RA3,
+                             RB1, RB2, RB3, RB4, RB0, RB1, RB0, RB2, RB4, RB3);
+       ROUND_INVERSE (15, 7, RA1, RA0, RA2, RA4, RA3, RA4, RA1, RA0, RA3, RA2,
+                             RB1, RB0, RB2, RB4, RB3, RB4, RB1, RB0, RB3, RB2);
+       ROUND_INVERSE (14, 6, RA4, RA1, RA0, RA3, RA2, RA1, RA0, RA2, RA3, RA4,
+                             RB4, RB1, RB0, RB3, RB2, RB1, RB0, RB2, RB3, RB4);
+       ROUND_INVERSE (13, 5, RA1, RA0, RA2, RA3, RA4, RA0, RA4, RA3, RA2, RA1,
+                             RB1, RB0, RB2, RB3, RB4, RB0, RB4, RB3, RB2, RB1);
+       ROUND_INVERSE (12, 4, RA0, RA4, RA3, RA2, RA1, RA0, RA2, RA3, RA1, RA4,
+                             RB0, RB4, RB3, RB2, RB1, RB0, RB2, RB3, RB1, RB4);
+       ROUND_INVERSE (11, 3, RA0, RA2, RA3, RA1, RA4, RA3, RA2, RA1, RA0, RA4,
+                             RB0, RB2, RB3, RB1, RB4, RB3, RB2, RB1, RB0, RB4);
+       ROUND_INVERSE (10, 2, RA3, RA2, RA1, RA0, RA4, RA2, RA4, RA1, RA0, RA3,
+                             RB3, RB2, RB1, RB0, RB4, RB2, RB4, RB1, RB0, RB3);
+       ROUND_INVERSE (9, 1, RA2, RA4, RA1, RA0, RA3, RA3, RA2, RA0, RA1, RA4,
+                            RB2, RB4, RB1, RB0, RB3, RB3, RB2, RB0, RB1, RB4);
+       ROUND_INVERSE (8, 0, RA3, RA2, RA0, RA1, RA4, RA3, RA4, RA2, RA1, RA0,
+                            RB3, RB2, RB0, RB1, RB4, RB3, RB4, RB2, RB1, RB0);
+       ROUND_INVERSE (7, 7, RA3, RA4, RA2, RA1, RA0, RA1, RA3, RA4, RA0, RA2,
+                            RB3, RB4, RB2, RB1, RB0, RB1, RB3, RB4, RB0, RB2);
+       ROUND_INVERSE (6, 6, RA1, RA3, RA4, RA0, RA2, RA3, RA4, RA2, RA0, RA1,
+                            RB1, RB3, RB4, RB0, RB2, RB3, RB4, RB2, RB0, RB1);
+       ROUND_INVERSE (5, 5, RA3, RA4, RA2, RA0, RA1, RA4, RA1, RA0, RA2, RA3,
+                            RB3, RB4, RB2, RB0, RB1, RB4, RB1, RB0, RB2, RB3);
+       ROUND_INVERSE (4, 4, RA4, RA1, RA0, RA2, RA3, RA4, RA2, RA0, RA3, RA1,
+                            RB4, RB1, RB0, RB2, RB3, RB4, RB2, RB0, RB3, RB1);
+       ROUND_INVERSE (3, 3, RA4, RA2, RA0, RA3, RA1, RA0, RA2, RA3, RA4, RA1,
+                            RB4, RB2, RB0, RB3, RB1, RB0, RB2, RB3, RB4, RB1);
+       ROUND_INVERSE (2, 2, RA0, RA2, RA3, RA4, RA1, RA2, RA1, RA3, RA4, RA0,
+                            RB0, RB2, RB3, RB4, RB1, RB2, RB1, RB3, RB4, RB0);
+       ROUND_INVERSE (1, 1, RA2, RA1, RA3, RA4, RA0, RA0, RA2, RA4, RA3, RA1,
+                            RB2, RB1, RB3, RB4, RB0, RB0, RB2, RB4, RB3, RB1);
+       ROUND_INVERSE (0, 0, RA0, RA2, RA4, RA3, RA1, RA0, RA1, RA2, RA3, RA4,
+                            RB0, RB2, RB4, RB3, RB1, RB0, RB1, RB2, RB3, RB4);
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RA4, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RB4, RTMP0, RTMP1);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __serpent_dec_blk8,.-__serpent_dec_blk8;)
+
+.align 8
+.globl _gcry_serpent_sse2_ctr_enc
+ELF(.type   _gcry_serpent_sse2_ctr_enc,@function;)
+_gcry_serpent_sse2_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+
+       /* load IV and byteswap */
+       movdqu (%rcx), RA0;
+       movdqa RA0, RTMP0;
+       pbswap(RTMP0, RTMP1); /* be => le */
+
+       pcmpeqd RNOT, RNOT;
+       psrldq $8, RNOT; /* low: -1, high: 0 */
+       movdqa RNOT, RTMP2;
+       paddq RTMP2, RTMP2; /* low: -2, high: 0 */
+
+       /* construct IVs */
+       movdqa RTMP0, RTMP1;
+       psubq RNOT, RTMP0; /* +1 */
+       movdqa RTMP0, RA1;
+       psubq RTMP2, RTMP1; /* +2 */
+       movdqa RTMP1, RA2;
+       psubq RTMP2, RTMP0; /* +3 */
+       movdqa RTMP0, RA3;
+       psubq RTMP2, RTMP1; /* +4 */
+       movdqa RTMP1, RB0;
+       psubq RTMP2, RTMP0; /* +5 */
+       movdqa RTMP0, RB1;
+       psubq RTMP2, RTMP1; /* +6 */
+       movdqa RTMP1, RB2;
+       psubq RTMP2, RTMP0; /* +7 */
+       movdqa RTMP0, RB3;
+       psubq RTMP2, RTMP1; /* +8 */
+
+       /* check need for handling 64-bit overflow and carry */
+       cmpl $0xffffffff, 8(%rcx);
+       jne .Lno_ctr_carry;
+
+       movl 12(%rcx), %eax;
+       bswapl %eax;
+       cmpl $-8, %eax;
+       jb .Lno_ctr_carry;
+       pslldq $8, RNOT; /* low: 0, high: -1 */
+       je .Lcarry_RTMP0;
+
+       cmpl $-6, %eax;
+       jb .Lcarry_RB3;
+       je .Lcarry_RB2;
+
+       cmpl $-4, %eax;
+       jb .Lcarry_RB1;
+       je .Lcarry_RB0;
+
+       cmpl $-2, %eax;
+       jb .Lcarry_RA3;
+       je .Lcarry_RA2;
+
+       psubq RNOT, RA1;
+.Lcarry_RA2:
+       psubq RNOT, RA2;
+.Lcarry_RA3:
+       psubq RNOT, RA3;
+.Lcarry_RB0:
+       psubq RNOT, RB0;
+.Lcarry_RB1:
+       psubq RNOT, RB1;
+.Lcarry_RB2:
+       psubq RNOT, RB2;
+.Lcarry_RB3:
+       psubq RNOT, RB3;
+.Lcarry_RTMP0:
+       psubq RNOT, RTMP1;
+
+.Lno_ctr_carry:
+       /* le => be */
+       pbswap(RA1, RTMP0);
+       pbswap(RA2, RTMP0);
+       pbswap(RA3, RTMP0);
+       pbswap(RB0, RTMP0);
+       pbswap(RB1, RTMP0);
+       pbswap(RB2, RTMP0);
+       pbswap(RB3, RTMP0);
+       pbswap(RTMP1, RTMP0);
+       /* store new IV */
+       movdqu RTMP1, (%rcx);
+
+       call __serpent_enc_blk8;
+
+       pxor_u((0 * 16)(%rdx), RA4, RTMP0);
+       pxor_u((1 * 16)(%rdx), RA1, RTMP0);
+       pxor_u((2 * 16)(%rdx), RA2, RTMP0);
+       pxor_u((3 * 16)(%rdx), RA0, RTMP0);
+       pxor_u((4 * 16)(%rdx), RB4, RTMP0);
+       pxor_u((5 * 16)(%rdx), RB1, RTMP0);
+       pxor_u((6 * 16)(%rdx), RB2, RTMP0);
+       pxor_u((7 * 16)(%rdx), RB0, RTMP0);
+
+       movdqu RA4, (0 * 16)(%rsi);
+       movdqu RA1, (1 * 16)(%rsi);
+       movdqu RA2, (2 * 16)(%rsi);
+       movdqu RA0, (3 * 16)(%rsi);
+       movdqu RB4, (4 * 16)(%rsi);
+       movdqu RB1, (5 * 16)(%rsi);
+       movdqu RB2, (6 * 16)(%rsi);
+       movdqu RB0, (7 * 16)(%rsi);
+
+       /* clear the used registers */
+       pxor RA0, RA0;
+       pxor RA1, RA1;
+       pxor RA2, RA2;
+       pxor RA3, RA3;
+       pxor RA4, RA4;
+       pxor RB0, RB0;
+       pxor RB1, RB1;
+       pxor RB2, RB2;
+       pxor RB3, RB3;
+       pxor RB4, RB4;
+       pxor RTMP0, RTMP0;
+       pxor RTMP1, RTMP1;
+       pxor RTMP2, RTMP2;
+       pxor RNOT, RNOT;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ctr_enc,.-_gcry_serpent_sse2_ctr_enc;)
+
+.align 8
+.globl _gcry_serpent_sse2_cbc_dec
+ELF(.type   _gcry_serpent_sse2_cbc_dec,@function;)
+_gcry_serpent_sse2_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       movdqu (0 * 16)(%rdx), RA0;
+       movdqu (1 * 16)(%rdx), RA1;
+       movdqu (2 * 16)(%rdx), RA2;
+       movdqu (3 * 16)(%rdx), RA3;
+       movdqu (4 * 16)(%rdx), RB0;
+       movdqu (5 * 16)(%rdx), RB1;
+       movdqu (6 * 16)(%rdx), RB2;
+       movdqu (7 * 16)(%rdx), RB3;
+
+       call __serpent_dec_blk8;
+
+       movdqu (7 * 16)(%rdx), RNOT;
+       pxor_u((%rcx), RA0, RTMP0);
+       pxor_u((0 * 16)(%rdx), RA1, RTMP0);
+       pxor_u((1 * 16)(%rdx), RA2, RTMP0);
+       pxor_u((2 * 16)(%rdx), RA3, RTMP0);
+       pxor_u((3 * 16)(%rdx), RB0, RTMP0);
+       pxor_u((4 * 16)(%rdx), RB1, RTMP0);
+       pxor_u((5 * 16)(%rdx), RB2, RTMP0);
+       pxor_u((6 * 16)(%rdx), RB3, RTMP0);
+       movdqu RNOT, (%rcx); /* store new IV */
+
+       movdqu RA0, (0 * 16)(%rsi);
+       movdqu RA1, (1 * 16)(%rsi);
+       movdqu RA2, (2 * 16)(%rsi);
+       movdqu RA3, (3 * 16)(%rsi);
+       movdqu RB0, (4 * 16)(%rsi);
+       movdqu RB1, (5 * 16)(%rsi);
+       movdqu RB2, (6 * 16)(%rsi);
+       movdqu RB3, (7 * 16)(%rsi);
+
+       /* clear the used registers */
+       pxor RA0, RA0;
+       pxor RA1, RA1;
+       pxor RA2, RA2;
+       pxor RA3, RA3;
+       pxor RA4, RA4;
+       pxor RB0, RB0;
+       pxor RB1, RB1;
+       pxor RB2, RB2;
+       pxor RB3, RB3;
+       pxor RB4, RB4;
+       pxor RTMP0, RTMP0;
+       pxor RTMP1, RTMP1;
+       pxor RTMP2, RTMP2;
+       pxor RNOT, RNOT;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_cbc_dec,.-_gcry_serpent_sse2_cbc_dec;)
+
+.align 8
+.globl _gcry_serpent_sse2_cfb_dec
+ELF(.type   _gcry_serpent_sse2_cfb_dec,@function;)
+_gcry_serpent_sse2_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       /* Load input */
+       movdqu (%rcx), RA0;
+       movdqu 0 * 16(%rdx), RA1;
+       movdqu 1 * 16(%rdx), RA2;
+       movdqu 2 * 16(%rdx), RA3;
+       movdqu 3 * 16(%rdx), RB0;
+       movdqu 4 * 16(%rdx), RB1;
+       movdqu 5 * 16(%rdx), RB2;
+       movdqu 6 * 16(%rdx), RB3;
+
+       /* Update IV */
+       movdqu 7 * 16(%rdx), RNOT;
+       movdqu RNOT, (%rcx);
+
+       call __serpent_enc_blk8;
+
+       pxor_u((0 * 16)(%rdx), RA4, RTMP0);
+       pxor_u((1 * 16)(%rdx), RA1, RTMP0);
+       pxor_u((2 * 16)(%rdx), RA2, RTMP0);
+       pxor_u((3 * 16)(%rdx), RA0, RTMP0);
+       pxor_u((4 * 16)(%rdx), RB4, RTMP0);
+       pxor_u((5 * 16)(%rdx), RB1, RTMP0);
+       pxor_u((6 * 16)(%rdx), RB2, RTMP0);
+       pxor_u((7 * 16)(%rdx), RB0, RTMP0);
+
+       movdqu RA4, (0 * 16)(%rsi);
+       movdqu RA1, (1 * 16)(%rsi);
+       movdqu RA2, (2 * 16)(%rsi);
+       movdqu RA0, (3 * 16)(%rsi);
+       movdqu RB4, (4 * 16)(%rsi);
+       movdqu RB1, (5 * 16)(%rsi);
+       movdqu RB2, (6 * 16)(%rsi);
+       movdqu RB0, (7 * 16)(%rsi);
+
+       /* clear the used registers */
+       pxor RA0, RA0;
+       pxor RA1, RA1;
+       pxor RA2, RA2;
+       pxor RA3, RA3;
+       pxor RA4, RA4;
+       pxor RB0, RB0;
+       pxor RB1, RB1;
+       pxor RB2, RB2;
+       pxor RB3, RB3;
+       pxor RB4, RB4;
+       pxor RTMP0, RTMP0;
+       pxor RTMP1, RTMP1;
+       pxor RTMP2, RTMP2;
+       pxor RNOT, RNOT;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_cfb_dec,.-_gcry_serpent_sse2_cfb_dec;)
+
+.align 8
+.globl _gcry_serpent_sse2_ocb_enc
+ELF(.type _gcry_serpent_sse2_ocb_enc,@function;)
+
+_gcry_serpent_sse2_ocb_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[8])
+        */
+       CFI_STARTPROC();
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       movdqu (%rcx), RTMP0;
+       movdqu (%r8), RTMP1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         movdqu (n * 16)(%rdx), xreg; \
+         movdqu (lreg), RNOT; \
+         pxor RNOT, RTMP0; \
+         pxor xreg, RTMP1; \
+         pxor RTMP0, xreg; \
+         movdqu RTMP0, (n * 16)(%rsi);
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, RA0);
+       OCB_INPUT(1, %r11, RA1);
+       OCB_INPUT(2, %r12, RA2);
+       OCB_INPUT(3, %r13, RA3);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, RB0);
+       OCB_INPUT(5, %r11, RB1);
+       OCB_INPUT(6, %r12, RB2);
+       OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+       movdqu RTMP0, (%rcx);
+       movdqu RTMP1, (%r8);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __serpent_enc_blk8;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       pxor_u((0 * 16)(%rsi), RA4, RTMP0);
+       pxor_u((1 * 16)(%rsi), RA1, RTMP0);
+       pxor_u((2 * 16)(%rsi), RA2, RTMP0);
+       pxor_u((3 * 16)(%rsi), RA0, RTMP0);
+       pxor_u((4 * 16)(%rsi), RB4, RTMP0);
+       pxor_u((5 * 16)(%rsi), RB1, RTMP0);
+       pxor_u((6 * 16)(%rsi), RB2, RTMP0);
+       pxor_u((7 * 16)(%rsi), RB0, RTMP0);
+
+       movdqu RA4, (0 * 16)(%rsi);
+       movdqu RA1, (1 * 16)(%rsi);
+       movdqu RA2, (2 * 16)(%rsi);
+       movdqu RA0, (3 * 16)(%rsi);
+       movdqu RB4, (4 * 16)(%rsi);
+       movdqu RB1, (5 * 16)(%rsi);
+       movdqu RB2, (6 * 16)(%rsi);
+       movdqu RB0, (7 * 16)(%rsi);
+
+       /* clear the used registers */
+       pxor RA0, RA0;
+       pxor RA1, RA1;
+       pxor RA2, RA2;
+       pxor RA3, RA3;
+       pxor RA4, RA4;
+       pxor RB0, RB0;
+       pxor RB1, RB1;
+       pxor RB2, RB2;
+       pxor RB3, RB3;
+       pxor RB4, RB4;
+       pxor RTMP0, RTMP0;
+       pxor RTMP1, RTMP1;
+       pxor RTMP2, RTMP2;
+       pxor RNOT, RNOT;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ocb_enc,.-_gcry_serpent_sse2_ocb_enc;)
+
+.align 8
+.globl _gcry_serpent_sse2_ocb_dec
+ELF(.type _gcry_serpent_sse2_ocb_dec,@function;)
+
+_gcry_serpent_sse2_ocb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[8])
+        */
+       CFI_STARTPROC();
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       movdqu (%rcx), RTMP0;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         movdqu (n * 16)(%rdx), xreg; \
+         movdqu (lreg), RNOT; \
+         pxor RNOT, RTMP0; \
+         pxor RTMP0, xreg; \
+         movdqu RTMP0, (n * 16)(%rsi);
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, RA0);
+       OCB_INPUT(1, %r11, RA1);
+       OCB_INPUT(2, %r12, RA2);
+       OCB_INPUT(3, %r13, RA3);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, RB0);
+       OCB_INPUT(5, %r11, RB1);
+       OCB_INPUT(6, %r12, RB2);
+       OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+       movdqu RTMP0, (%rcx);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __serpent_dec_blk8;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       movdqu (%r8), RTMP0;
+
+       pxor_u((0 * 16)(%rsi), RA0, RTMP1);
+       pxor_u((1 * 16)(%rsi), RA1, RTMP1);
+       pxor_u((2 * 16)(%rsi), RA2, RTMP1);
+       pxor_u((3 * 16)(%rsi), RA3, RTMP1);
+       pxor_u((4 * 16)(%rsi), RB0, RTMP1);
+       pxor_u((5 * 16)(%rsi), RB1, RTMP1);
+       pxor_u((6 * 16)(%rsi), RB2, RTMP1);
+       pxor_u((7 * 16)(%rsi), RB3, RTMP1);
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+
+       movdqu RA0, (0 * 16)(%rsi);
+       pxor RA0, RTMP0;
+       movdqu RA1, (1 * 16)(%rsi);
+       pxor RA1, RTMP0;
+       movdqu RA2, (2 * 16)(%rsi);
+       pxor RA2, RTMP0;
+       movdqu RA3, (3 * 16)(%rsi);
+       pxor RA3, RTMP0;
+       movdqu RB0, (4 * 16)(%rsi);
+       pxor RB0, RTMP0;
+       movdqu RB1, (5 * 16)(%rsi);
+       pxor RB1, RTMP0;
+       movdqu RB2, (6 * 16)(%rsi);
+       pxor RB2, RTMP0;
+       movdqu RB3, (7 * 16)(%rsi);
+       pxor RB3, RTMP0;
+
+       movdqu RTMP0, (%r8);
+
+       /* clear the used registers */
+       pxor RA0, RA0;
+       pxor RA1, RA1;
+       pxor RA2, RA2;
+       pxor RA3, RA3;
+       pxor RA4, RA4;
+       pxor RB0, RB0;
+       pxor RB1, RB1;
+       pxor RB2, RB2;
+       pxor RB3, RB3;
+       pxor RB4, RB4;
+       pxor RTMP0, RTMP0;
+       pxor RTMP1, RTMP1;
+       pxor RTMP2, RTMP2;
+       pxor RNOT, RNOT;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ocb_dec,.-_gcry_serpent_sse2_ocb_dec;)
+
+.align 8
+.globl _gcry_serpent_sse2_ocb_auth
+ELF(.type _gcry_serpent_sse2_ocb_auth,@function;)
+
+_gcry_serpent_sse2_ocb_auth:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: abuf (8 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[8])
+        */
+       CFI_STARTPROC();
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       movdqu (%rdx), RTMP0;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         movdqu (n * 16)(%rsi), xreg; \
+         movdqu (lreg), RNOT; \
+         pxor RNOT, RTMP0; \
+         pxor RTMP0, xreg;
+       movq (0 * 8)(%r8), %r10;
+       movq (1 * 8)(%r8), %r11;
+       movq (2 * 8)(%r8), %r12;
+       movq (3 * 8)(%r8), %r13;
+       OCB_INPUT(0, %r10, RA0);
+       OCB_INPUT(1, %r11, RA1);
+       OCB_INPUT(2, %r12, RA2);
+       OCB_INPUT(3, %r13, RA3);
+       movq (4 * 8)(%r8), %r10;
+       movq (5 * 8)(%r8), %r11;
+       movq (6 * 8)(%r8), %r12;
+       movq (7 * 8)(%r8), %r13;
+       OCB_INPUT(4, %r10, RB0);
+       OCB_INPUT(5, %r11, RB1);
+       OCB_INPUT(6, %r12, RB2);
+       OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+       movdqu RTMP0, (%rdx);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __serpent_enc_blk8;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       movdqu (%rcx), RTMP0;
+       pxor RB4, RA4;
+       pxor RB1, RA1;
+       pxor RB2, RA2;
+       pxor RB0, RA0;
+
+       pxor RTMP0, RA2;
+       pxor RA4, RA1;
+       pxor RA2, RA0;
+
+       pxor RA1, RA0;
+       movdqu RA0, (%rcx);
+
+       /* clear the used registers */
+       pxor RA0, RA0;
+       pxor RA1, RA1;
+       pxor RA2, RA2;
+       pxor RA3, RA3;
+       pxor RA4, RA4;
+       pxor RB0, RB0;
+       pxor RB1, RB1;
+       pxor RB2, RB2;
+       pxor RB3, RB3;
+       pxor RB4, RB4;
+       pxor RTMP0, RTMP0;
+       pxor RTMP1, RTMP1;
+       pxor RTMP2, RTMP2;
+       pxor RNOT, RNOT;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_serpent_sse2_ocb_auth,.-_gcry_serpent_sse2_ocb_auth;)
+
+#endif /*defined(USE_SERPENT)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/serpent.c 
b/grub-core/lib/libgcrypt/cipher/serpent.c
index ea14c7eca..159d889fa 100644
--- a/grub-core/lib/libgcrypt/cipher/serpent.c
+++ b/grub-core/lib/libgcrypt/cipher/serpent.c
@@ -28,6 +28,36 @@
 #include "g10lib.h"
 #include "cipher.h"
 #include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+
+/* USE_SSE2 indicates whether to compile with AMD64 SSE2 code. */
+#undef USE_SSE2
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSE2 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# if defined(ENABLE_AVX2_SUPPORT)
+#  define USE_AVX2 1
+# endif
+#endif
+
+/* USE_NEON indicates whether to enable ARM NEON assembly code. */
+#undef USE_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_NEON)
+#  define USE_NEON 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
 
 /* Number of rounds per Serpent encrypt/decrypt operation.  */
 #define ROUNDS 32
@@ -49,415 +79,465 @@ typedef u32 serpent_subkeys_t[ROUNDS + 1][4];
 typedef struct serpent_context
 {
   serpent_subkeys_t keys;      /* Generated subkeys.  */
+
+#ifdef USE_AVX2
+  int use_avx2;
+#endif
+#ifdef USE_NEON
+  int use_neon;
+#endif
 } serpent_context_t;
 
 
-/* A prototype.  */
-static const char *serpent_test (void);
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_SSE2) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+#  define ASM_FUNC_ABI
+# endif
+#endif
 
 
-#define byte_swap_32(x) \
-  (0 \
-   | (((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >>  8) \
-   | (((x) & 0x0000ff00) <<  8) | (((x) & 0x000000ff) << 24))
+#ifdef USE_SSE2
+/* Assembler implementations of Serpent using SSE2.  Process 8 block in
+   parallel.
+ */
+extern void _gcry_serpent_sse2_ctr_enc(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_cbc_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_cfb_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_ocb_enc(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_ocb_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_sse2_ocb_auth(serpent_context_t *ctx,
+                                       const unsigned char *abuf,
+                                       unsigned char *offset,
+                                       unsigned char *checksum,
+                                       const u64 Ls[8]) ASM_FUNC_ABI;
+#endif
 
-/* These are the S-Boxes of Serpent.  They are copied from Serpents
-   reference implementation (the optimized one, contained in
-   `floppy2') and are therefore:
+#ifdef USE_AVX2
+/* Assembler implementations of Serpent using AVX2.  Process 16 block in
+   parallel.
+ */
+extern void _gcry_serpent_avx2_ctr_enc(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_cbc_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_cfb_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_ocb_enc(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_ocb_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_serpent_avx2_ocb_auth(serpent_context_t *ctx,
+                                       const unsigned char *abuf,
+                                       unsigned char *offset,
+                                       unsigned char *checksum,
+                                       const u64 Ls[16]) ASM_FUNC_ABI;
+#endif
+
+#ifdef USE_NEON
+/* Assembler implementations of Serpent using ARM NEON.  Process 8 block in
+   parallel.
+ */
+extern void _gcry_serpent_neon_ctr_enc(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *ctr);
+
+extern void _gcry_serpent_neon_cbc_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv);
+
+extern void _gcry_serpent_neon_cfb_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv);
+
+extern void _gcry_serpent_neon_ocb_enc(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const void *Ls[8]);
+
+extern void _gcry_serpent_neon_ocb_dec(serpent_context_t *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const void *Ls[8]);
+
+extern void _gcry_serpent_neon_ocb_auth(serpent_context_t *ctx,
+                                       const unsigned char *abuf,
+                                       unsigned char *offset,
+                                       unsigned char *checksum,
+                                       const void *Ls[8]);
+#endif
 
-     Copyright (C) 1998 Ross Anderson, Eli Biham, Lars Knudsen.
 
-  To quote the Serpent homepage
-  (http://www.cl.cam.ac.uk/~rja14/serpent.html):
+/* Prototypes.  */
+static const char *serpent_test (void);
 
-  "Serpent is now completely in the public domain, and we impose no
-   restrictions on its use.  This was announced on the 21st August at
-   the First AES Candidate Conference. The optimised implementations
-   in the submission package are now under the GNU PUBLIC LICENSE
-   (GPL), although some comments in the code still say otherwise. You
-   are welcome to use Serpent for any application."  */
+static void _gcry_serpent_ctr_enc (void *context, unsigned char *ctr,
+                                  void *outbuf_arg, const void *inbuf_arg,
+                                  size_t nblocks);
+static void _gcry_serpent_cbc_dec (void *context, unsigned char *iv,
+                                  void *outbuf_arg, const void *inbuf_arg,
+                                  size_t nblocks);
+static void _gcry_serpent_cfb_dec (void *context, unsigned char *iv,
+                                  void *outbuf_arg, const void *inbuf_arg,
+                                  size_t nblocks);
+static size_t _gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                      const void *inbuf_arg, size_t nblocks,
+                                      int encrypt);
+static size_t _gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                     size_t nblocks);
+
+
+/*
+ * These are the S-Boxes of Serpent from following research paper.
+ *
+ *  D. A. Osvik, “Speeding up Serpent,” in Third AES Candidate Conference,
+ *   (New York, New York, USA), p. 317–329, National Institute of Standards and
+ *   Technology, 2000.
+ *
+ * Paper is also available at: http://www.ii.uib.no/~osvik/pub/aes3.pdf
+ *
+ */
 
-#define SBOX0(a, b, c, d, w, x, y, z) \
+#define SBOX0(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t05, t06, t07, t08, t09; \
-    u32 t11, t12, t13, t14, t15, t17, t01; \
-    t01 = b   ^ c  ; \
-    t02 = a   | d  ; \
-    t03 = a   ^ b  ; \
-    z   = t02 ^ t01; \
-    t05 = c   | z  ; \
-    t06 = a   ^ d  ; \
-    t07 = b   | c  ; \
-    t08 = d   & t05; \
-    t09 = t03 & t07; \
-    y   = t09 ^ t08; \
-    t11 = t09 & y  ; \
-    t12 = c   ^ d  ; \
-    t13 = t07 ^ t11; \
-    t14 = b   & t06; \
-    t15 = t06 ^ t13; \
-    w   =     ~ t15; \
-    t17 = w   ^ t14; \
-    x   = t12 ^ t17; \
+    u32 r4; \
+    \
+    r3 ^= r0; r4 =  r1; \
+    r1 &= r3; r4 ^= r2; \
+    r1 ^= r0; r0 |= r3; \
+    r0 ^= r4; r4 ^= r3; \
+    r3 ^= r2; r2 |= r1; \
+    r2 ^= r4; r4 = ~r4; \
+    r4 |= r1; r1 ^= r3; \
+    r1 ^= r4; r3 |= r0; \
+    r1 ^= r3; r4 ^= r3; \
+    \
+    w = r1; x = r4; y = r2; z = r0; \
   }
 
-#define SBOX0_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX0_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t08, t09, t10; \
-    u32 t12, t13, t14, t15, t17, t18, t01; \
-    t01 = c   ^ d  ; \
-    t02 = a   | b  ; \
-    t03 = b   | c  ; \
-    t04 = c   & t01; \
-    t05 = t02 ^ t01; \
-    t06 = a   | t04; \
-    y   =     ~ t05; \
-    t08 = b   ^ d  ; \
-    t09 = t03 & t08; \
-    t10 = d   | y  ; \
-    x   = t09 ^ t06; \
-    t12 = a   | t05; \
-    t13 = x   ^ t12; \
-    t14 = t03 ^ t10; \
-    t15 = a   ^ c  ; \
-    z   = t14 ^ t13; \
-    t17 = t05 & t13; \
-    t18 = t14 | t17; \
-    w   = t15 ^ t18; \
+    u32 r4; \
+    \
+    r2 = ~r2; r4 =  r1; \
+    r1 |= r0; r4 = ~r4; \
+    r1 ^= r2; r2 |= r4; \
+    r1 ^= r3; r0 ^= r4; \
+    r2 ^= r0; r0 &= r3; \
+    r4 ^= r0; r0 |= r1; \
+    r0 ^= r2; r3 ^= r4; \
+    r2 ^= r1; r3 ^= r0; \
+    r3 ^= r1; \
+    r2 &= r3; \
+    r4 ^= r2; \
+    \
+    w = r0; x = r4; y = r1; z = r3; \
   }
 
-#define SBOX1(a, b, c, d, w, x, y, z) \
+#define SBOX1(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t07, t08; \
-    u32 t10, t11, t12, t13, t16, t17, t01; \
-    t01 = a   | d  ; \
-    t02 = c   ^ d  ; \
-    t03 =     ~ b  ; \
-    t04 = a   ^ c  ; \
-    t05 = a   | t03; \
-    t06 = d   & t04; \
-    t07 = t01 & t02; \
-    t08 = b   | t06; \
-    y   = t02 ^ t05; \
-    t10 = t07 ^ t08; \
-    t11 = t01 ^ t10; \
-    t12 = y   ^ t11; \
-    t13 = b   & d  ; \
-    z   =     ~ t10; \
-    x   = t13 ^ t12; \
-    t16 = t10 | x  ; \
-    t17 = t05 & t16; \
-    w   = c   ^ t17; \
+    u32 r4; \
+    \
+    r0 = ~r0; r2 = ~r2; \
+    r4 =  r0; r0 &= r1; \
+    r2 ^= r0; r0 |= r3; \
+    r3 ^= r2; r1 ^= r0; \
+    r0 ^= r4; r4 |= r1; \
+    r1 ^= r3; r2 |= r0; \
+    r2 &= r4; r0 ^= r1; \
+    r1 &= r2; \
+    r1 ^= r0; r0 &= r2; \
+    r0 ^= r4; \
+    \
+    w = r2; x = r0; y = r3; z = r1; \
   }
 
-#define SBOX1_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX1_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t07, t08; \
-    u32 t09, t10, t11, t14, t15, t17, t01; \
-    t01 = a   ^ b  ; \
-    t02 = b   | d  ; \
-    t03 = a   & c  ; \
-    t04 = c   ^ t02; \
-    t05 = a   | t04; \
-    t06 = t01 & t05; \
-    t07 = d   | t03; \
-    t08 = b   ^ t06; \
-    t09 = t07 ^ t06; \
-    t10 = t04 | t03; \
-    t11 = d   & t08; \
-    y   =     ~ t09; \
-    x   = t10 ^ t11; \
-    t14 = a   | y  ; \
-    t15 = t06 ^ x  ; \
-    z   = t01 ^ t04; \
-    t17 = c   ^ t15; \
-    w   = t14 ^ t17; \
+    u32 r4; \
+    \
+    r4 =  r1; r1 ^= r3; \
+    r3 &= r1; r4 ^= r2; \
+    r3 ^= r0; r0 |= r1; \
+    r2 ^= r3; r0 ^= r4; \
+    r0 |= r2; r1 ^= r3; \
+    r0 ^= r1; r1 |= r3; \
+    r1 ^= r0; r4 = ~r4; \
+    r4 ^= r1; r1 |= r0; \
+    r1 ^= r0; \
+    r1 |= r4; \
+    r3 ^= r1; \
+    \
+    w = r4; x = r0; y = r3; z = r2; \
   }
 
-#define SBOX2(a, b, c, d, w, x, y, z) \
+#define SBOX2(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t05, t06, t07, t08; \
-    u32 t09, t10, t12, t13, t14, t01; \
-    t01 = a   | c  ; \
-    t02 = a   ^ b  ; \
-    t03 = d   ^ t01; \
-    w   = t02 ^ t03; \
-    t05 = c   ^ w  ; \
-    t06 = b   ^ t05; \
-    t07 = b   | t05; \
-    t08 = t01 & t06; \
-    t09 = t03 ^ t07; \
-    t10 = t02 | t09; \
-    x   = t10 ^ t08; \
-    t12 = a   | d  ; \
-    t13 = t09 ^ x  ; \
-    t14 = b   ^ t13; \
-    z   =     ~ t09; \
-    y   = t12 ^ t14; \
+    u32 r4; \
+    \
+    r4 =  r0; r0 &= r2; \
+    r0 ^= r3; r2 ^= r1; \
+    r2 ^= r0; r3 |= r4; \
+    r3 ^= r1; r4 ^= r2; \
+    r1 =  r3; r3 |= r4; \
+    r3 ^= r0; r0 &= r1; \
+    r4 ^= r0; r1 ^= r3; \
+    r1 ^= r4; r4 = ~r4; \
+    \
+    w = r2; x = r3; y = r1; z = r4; \
   }
 
-#define SBOX2_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX2_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t06, t07, t08, t09; \
-    u32 t10, t11, t12, t15, t16, t17, t01; \
-    t01 = a   ^ d  ; \
-    t02 = c   ^ d  ; \
-    t03 = a   & c  ; \
-    t04 = b   | t02; \
-    w   = t01 ^ t04; \
-    t06 = a   | c  ; \
-    t07 = d   | w  ; \
-    t08 =     ~ d  ; \
-    t09 = b   & t06; \
-    t10 = t08 | t03; \
-    t11 = b   & t07; \
-    t12 = t06 & t02; \
-    z   = t09 ^ t10; \
-    x   = t12 ^ t11; \
-    t15 = c   & z  ; \
-    t16 = w   ^ x  ; \
-    t17 = t10 ^ t15; \
-    y   = t16 ^ t17; \
+    u32 r4; \
+    \
+    r2 ^= r3; r3 ^= r0; \
+    r4 =  r3; r3 &= r2; \
+    r3 ^= r1; r1 |= r2; \
+    r1 ^= r4; r4 &= r3; \
+    r2 ^= r3; r4 &= r0; \
+    r4 ^= r2; r2 &= r1; \
+    r2 |= r0; r3 = ~r3; \
+    r2 ^= r3; r0 ^= r3; \
+    r0 &= r1; r3 ^= r4; \
+    r3 ^= r0; \
+    \
+    w = r1; x = r4; y = r2; z = r3; \
   }
 
-#define SBOX3(a, b, c, d, w, x, y, z) \
+#define SBOX3(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t07, t08; \
-    u32 t09, t10, t11, t13, t14, t15, t01; \
-    t01 = a   ^ c  ; \
-    t02 = a   | d  ; \
-    t03 = a   & d  ; \
-    t04 = t01 & t02; \
-    t05 = b   | t03; \
-    t06 = a   & b  ; \
-    t07 = d   ^ t04; \
-    t08 = c   | t06; \
-    t09 = b   ^ t07; \
-    t10 = d   & t05; \
-    t11 = t02 ^ t10; \
-    z   = t08 ^ t09; \
-    t13 = d   | z  ; \
-    t14 = a   | t07; \
-    t15 = b   & t13; \
-    y   = t08 ^ t11; \
-    w   = t14 ^ t15; \
-    x   = t05 ^ t04; \
+    u32 r4; \
+    \
+    r4 =  r0; r0 |= r3; \
+    r3 ^= r1; r1 &= r4; \
+    r4 ^= r2; r2 ^= r3; \
+    r3 &= r0; r4 |= r1; \
+    r3 ^= r4; r0 ^= r1; \
+    r4 &= r0; r1 ^= r3; \
+    r4 ^= r2; r1 |= r0; \
+    r1 ^= r2; r0 ^= r3; \
+    r2 =  r1; r1 |= r3; \
+    r1 ^= r0; \
+    \
+    w = r1; x = r2; y = r3; z = r4; \
   }
 
-#define SBOX3_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX3_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t07, t09; \
-    u32 t11, t12, t13, t14, t16, t01; \
-    t01 = c   | d  ; \
-    t02 = a   | d  ; \
-    t03 = c   ^ t02; \
-    t04 = b   ^ t02; \
-    t05 = a   ^ d  ; \
-    t06 = t04 & t03; \
-    t07 = b   & t01; \
-    y   = t05 ^ t06; \
-    t09 = a   ^ t03; \
-    w   = t07 ^ t03; \
-    t11 = w   | t05; \
-    t12 = t09 & t11; \
-    t13 = a   & y  ; \
-    t14 = t01 ^ t05; \
-    x   = b   ^ t12; \
-    t16 = b   | t13; \
-    z   = t14 ^ t16; \
+    u32 r4; \
+    \
+    r4 =  r2; r2 ^= r1; \
+    r0 ^= r2; r4 &= r2; \
+    r4 ^= r0; r0 &= r1; \
+    r1 ^= r3; r3 |= r4; \
+    r2 ^= r3; r0 ^= r3; \
+    r1 ^= r4; r3 &= r2; \
+    r3 ^= r1; r1 ^= r0; \
+    r1 |= r2; r0 ^= r3; \
+    r1 ^= r4; \
+    r0 ^= r1; \
+    \
+    w = r2; x = r1; y = r3; z = r0; \
   }
 
-#define SBOX4(a, b, c, d, w, x, y, z) \
+#define SBOX4(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t08, t09; \
-    u32 t10, t11, t12, t13, t14, t15, t16, t01; \
-    t01 = a   | b  ; \
-    t02 = b   | c  ; \
-    t03 = a   ^ t02; \
-    t04 = b   ^ d  ; \
-    t05 = d   | t03; \
-    t06 = d   & t01; \
-    z   = t03 ^ t06; \
-    t08 = z   & t04; \
-    t09 = t04 & t05; \
-    t10 = c   ^ t06; \
-    t11 = b   & c  ; \
-    t12 = t04 ^ t08; \
-    t13 = t11 | t03; \
-    t14 = t10 ^ t09; \
-    t15 = a   & t05; \
-    t16 = t11 | t12; \
-    y   = t13 ^ t08; \
-    x   = t15 ^ t16; \
-    w   =     ~ t14; \
+    u32 r4; \
+    \
+    r1 ^= r3; r3 = ~r3; \
+    r2 ^= r3; r3 ^= r0; \
+    r4 =  r1; r1 &= r3; \
+    r1 ^= r2; r4 ^= r3; \
+    r0 ^= r4; r2 &= r4; \
+    r2 ^= r0; r0 &= r1; \
+    r3 ^= r0; r4 |= r1; \
+    r4 ^= r0; r0 |= r3; \
+    r0 ^= r2; r2 &= r3; \
+    r0 = ~r0; r4 ^= r2; \
+    \
+    w = r1; x = r4; y = r0; z = r3; \
   }
 
-#define SBOX4_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX4_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t07, t09; \
-    u32 t10, t11, t12, t13, t15, t01; \
-    t01 = b   | d  ; \
-    t02 = c   | d  ; \
-    t03 = a   & t01; \
-    t04 = b   ^ t02; \
-    t05 = c   ^ d  ; \
-    t06 =     ~ t03; \
-    t07 = a   & t04; \
-    x   = t05 ^ t07; \
-    t09 = x   | t06; \
-    t10 = a   ^ t07; \
-    t11 = t01 ^ t09; \
-    t12 = d   ^ t04; \
-    t13 = c   | t10; \
-    z   = t03 ^ t12; \
-    t15 = a   ^ t04; \
-    y   = t11 ^ t13; \
-    w   = t15 ^ t09; \
+    u32 r4; \
+    \
+    r4 =  r2; r2 &= r3; \
+    r2 ^= r1; r1 |= r3; \
+    r1 &= r0; r4 ^= r2; \
+    r4 ^= r1; r1 &= r2; \
+    r0 = ~r0; r3 ^= r4; \
+    r1 ^= r3; r3 &= r0; \
+    r3 ^= r2; r0 ^= r1; \
+    r2 &= r0; r3 ^= r0; \
+    r2 ^= r4; \
+    r2 |= r3; r3 ^= r0; \
+    r2 ^= r1; \
+    \
+    w = r0; x = r3; y = r2; z = r4; \
   }
 
-#define SBOX5(a, b, c, d, w, x, y, z) \
+#define SBOX5(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t07, t08, t09; \
-    u32 t10, t11, t12, t13, t14, t01; \
-    t01 = b   ^ d  ; \
-    t02 = b   | d  ; \
-    t03 = a   & t01; \
-    t04 = c   ^ t02; \
-    t05 = t03 ^ t04; \
-    w   =     ~ t05; \
-    t07 = a   ^ t01; \
-    t08 = d   | w  ; \
-    t09 = b   | t05; \
-    t10 = d   ^ t08; \
-    t11 = b   | t07; \
-    t12 = t03 | w  ; \
-    t13 = t07 | t10; \
-    t14 = t01 ^ t11; \
-    y   = t09 ^ t13; \
-    x   = t07 ^ t08; \
-    z   = t12 ^ t14; \
+    u32 r4; \
+    \
+    r0 ^= r1; r1 ^= r3; \
+    r3 = ~r3; r4 =  r1; \
+    r1 &= r0; r2 ^= r3; \
+    r1 ^= r2; r2 |= r4; \
+    r4 ^= r3; r3 &= r1; \
+    r3 ^= r0; r4 ^= r1; \
+    r4 ^= r2; r2 ^= r0; \
+    r0 &= r3; r2 = ~r2; \
+    r0 ^= r4; r4 |= r3; \
+    r2 ^= r4; \
+    \
+    w = r1; x = r3; y = r0; z = r2; \
   }
 
-#define SBOX5_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX5_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t07, t08, t09; \
-    u32 t10, t12, t13, t15, t16, t01; \
-    t01 = a   & d  ; \
-    t02 = c   ^ t01; \
-    t03 = a   ^ d  ; \
-    t04 = b   & t02; \
-    t05 = a   & c  ; \
-    w   = t03 ^ t04; \
-    t07 = a   & w  ; \
-    t08 = t01 ^ w  ; \
-    t09 = b   | t05; \
-    t10 =     ~ b  ; \
-    x   = t08 ^ t09; \
-    t12 = t10 | t07; \
-    t13 = w   | x  ; \
-    z   = t02 ^ t12; \
-    t15 = t02 ^ t13; \
-    t16 = b   ^ d  ; \
-    y   = t16 ^ t15; \
+    u32 r4; \
+    \
+    r1 = ~r1; r4 =  r3; \
+    r2 ^= r1; r3 |= r0; \
+    r3 ^= r2; r2 |= r1; \
+    r2 &= r0; r4 ^= r3; \
+    r2 ^= r4; r4 |= r0; \
+    r4 ^= r1; r1 &= r2; \
+    r1 ^= r3; r4 ^= r2; \
+    r3 &= r4; r4 ^= r1; \
+    r3 ^= r4; r4 = ~r4; \
+    r3 ^= r0; \
+    \
+    w = r1; x = r4; y = r3; z = r2; \
   }
 
-#define SBOX6(a, b, c, d, w, x, y, z) \
+#define SBOX6(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t07, t08, t09, t10; \
-    u32 t11, t12, t13, t15, t17, t18, t01; \
-    t01 = a   & d  ; \
-    t02 = b   ^ c  ; \
-    t03 = a   ^ d  ; \
-    t04 = t01 ^ t02; \
-    t05 = b   | c  ; \
-    x   =     ~ t04; \
-    t07 = t03 & t05; \
-    t08 = b   & x  ; \
-    t09 = a   | c  ; \
-    t10 = t07 ^ t08; \
-    t11 = b   | d  ; \
-    t12 = c   ^ t11; \
-    t13 = t09 ^ t10; \
-    y   =     ~ t13; \
-    t15 = x   & t03; \
-    z   = t12 ^ t07; \
-    t17 = a   ^ b  ; \
-    t18 = y   ^ t15; \
-    w   = t17 ^ t18; \
+    u32 r4; \
+    \
+    r2 = ~r2; r4 =  r3; \
+    r3 &= r0; r0 ^= r4; \
+    r3 ^= r2; r2 |= r4; \
+    r1 ^= r3; r2 ^= r0; \
+    r0 |= r1; r2 ^= r1; \
+    r4 ^= r0; r0 |= r3; \
+    r0 ^= r2; r4 ^= r3; \
+    r4 ^= r0; r3 = ~r3; \
+    r2 &= r4; \
+    r2 ^= r3; \
+    \
+    w = r0; x = r1; y = r4; z = r2; \
   }
 
-#define SBOX6_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX6_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t07, t08, t09; \
-    u32 t12, t13, t14, t15, t16, t17, t01; \
-    t01 = a   ^ c  ; \
-    t02 =     ~ c  ; \
-    t03 = b   & t01; \
-    t04 = b   | t02; \
-    t05 = d   | t03; \
-    t06 = b   ^ d  ; \
-    t07 = a   & t04; \
-    t08 = a   | t02; \
-    t09 = t07 ^ t05; \
-    x   = t06 ^ t08; \
-    w   =     ~ t09; \
-    t12 = b   & w  ; \
-    t13 = t01 & t05; \
-    t14 = t01 ^ t12; \
-    t15 = t07 ^ t13; \
-    t16 = d   | t02; \
-    t17 = a   ^ x  ; \
-    z   = t17 ^ t15; \
-    y   = t16 ^ t14; \
+    u32 r4; \
+    \
+    r0 ^= r2; r4 =  r2; \
+    r2 &= r0; r4 ^= r3; \
+    r2 = ~r2; r3 ^= r1; \
+    r2 ^= r3; r4 |= r0; \
+    r0 ^= r2; r3 ^= r4; \
+    r4 ^= r1; r1 &= r3; \
+    r1 ^= r0; r0 ^= r3; \
+    r0 |= r2; r3 ^= r1; \
+    r4 ^= r0; \
+    \
+    w = r1; x = r2; y = r4; z = r3; \
   }
 
-#define SBOX7(a, b, c, d, w, x, y, z) \
+#define SBOX7(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t05, t06, t08, t09, t10; \
-    u32 t11, t13, t14, t15, t16, t17, t01; \
-    t01 = a   & c  ; \
-    t02 =     ~ d  ; \
-    t03 = a   & t02; \
-    t04 = b   | t01; \
-    t05 = a   & b  ; \
-    t06 = c   ^ t04; \
-    z   = t03 ^ t06; \
-    t08 = c   | z  ; \
-    t09 = d   | t05; \
-    t10 = a   ^ t08; \
-    t11 = t04 & z  ; \
-    x   = t09 ^ t10; \
-    t13 = b   ^ x  ; \
-    t14 = t01 ^ x  ; \
-    t15 = c   ^ t05; \
-    t16 = t11 | t13; \
-    t17 = t02 | t14; \
-    w   = t15 ^ t17; \
-    y   = a   ^ t16; \
+    u32 r4; \
+    \
+    r4 =  r1; r1 |= r2; \
+    r1 ^= r3; r4 ^= r2; \
+    r2 ^= r1; r3 |= r4; \
+    r3 &= r0; r4 ^= r2; \
+    r3 ^= r1; r1 |= r4; \
+    r1 ^= r0; r0 |= r4; \
+    r0 ^= r2; r1 ^= r4; \
+    r2 ^= r1; r1 &= r0; \
+    r1 ^= r4; r2 = ~r2; \
+    r2 |= r0; \
+    r4 ^= r2; \
+    \
+    w = r4; x = r3; y = r1; z = r0; \
   }
 
-#define SBOX7_INVERSE(a, b, c, d, w, x, y, z) \
+#define SBOX7_INVERSE(r0, r1, r2, r3, w, x, y, z) \
   { \
-    u32 t02, t03, t04, t06, t07, t08, t09; \
-    u32 t10, t11, t13, t14, t15, t16, t01; \
-    t01 = a   & b  ; \
-    t02 = a   | b  ; \
-    t03 = c   | t01; \
-    t04 = d   & t02; \
-    z   = t03 ^ t04; \
-    t06 = b   ^ t04; \
-    t07 = d   ^ z  ; \
-    t08 =     ~ t07; \
-    t09 = t06 | t08; \
-    t10 = b   ^ d  ; \
-    t11 = a   | d  ; \
-    x   = a   ^ t09; \
-    t13 = c   ^ t06; \
-    t14 = c   & t11; \
-    t15 = d   | x  ; \
-    t16 = t01 | t10; \
-    w   = t13 ^ t15; \
-    y   = t14 ^ t16; \
+    u32 r4; \
+    \
+    r4 =  r2; r2 ^= r0; \
+    r0 &= r3; r4 |= r3; \
+    r2 = ~r2; r3 ^= r1; \
+    r1 |= r0; r0 ^= r2; \
+    r2 &= r4; r3 &= r4; \
+    r1 ^= r2; r2 ^= r0; \
+    r0 |= r2; r4 ^= r1; \
+    r0 ^= r3; r3 ^= r4; \
+    r4 |= r0; r3 ^= r2; \
+    r4 ^= r2; \
+    \
+    w = r3; x = r0; y = r1; z = r4; \
   }
 
 /* XOR BLOCK1 into BLOCK0.  */
@@ -478,23 +558,17 @@ static const char *serpent_test (void);
     block_dst[3] = block_src[3];         \
   }
 
-/* Apply SBOX number WHICH to to the block found in ARRAY0 at index
-   INDEX, writing the output to the block found in ARRAY1 at index
-   INDEX.  */
-#define SBOX(which, array0, array1, index)            \
-  SBOX##which (array0[index + 0], array0[index + 1],  \
-               array0[index + 2], array0[index + 3],  \
-               array1[index + 0], array1[index + 1],  \
-               array1[index + 2], array1[index + 3]);
-
-/* Apply inverse SBOX number WHICH to to the block found in ARRAY0 at
-   index INDEX, writing the output to the block found in ARRAY1 at
-   index INDEX.  */
-#define SBOX_INVERSE(which, array0, array1, index)              \
-  SBOX##which##_INVERSE (array0[index + 0], array0[index + 1],  \
-                         array0[index + 2], array0[index + 3],  \
-                         array1[index + 0], array1[index + 1],  \
-                         array1[index + 2], array1[index + 3]);
+/* Apply SBOX number WHICH to to the block found in ARRAY0, writing
+   the output to the block found in ARRAY1.  */
+#define SBOX(which, array0, array1)                         \
+  SBOX##which (array0[0], array0[1], array0[2], array0[3],  \
+               array1[0], array1[1], array1[2], array1[3]);
+
+/* Apply inverse SBOX number WHICH to to the block found in ARRAY0, writing
+   the output to the block found in ARRAY1.  */
+#define SBOX_INVERSE(which, array0, array1)                           \
+  SBOX##which##_INVERSE (array0[0], array0[1], array0[2], array0[3],  \
+                         array1[0], array1[1], array1[2], array1[3]);
 
 /* Apply the linear transformation to BLOCK.  */
 #define LINEAR_TRANSFORMATION(block)                  \
@@ -533,7 +607,7 @@ static const char *serpent_test (void);
   {                                             \
     BLOCK_XOR (block, subkeys[round]);          \
     round++;                                    \
-    SBOX (which, block, block_tmp, 0);          \
+    SBOX (which, block, block_tmp);             \
     LINEAR_TRANSFORMATION (block_tmp);          \
     BLOCK_COPY (block, block_tmp);              \
   }
@@ -546,7 +620,7 @@ static const char *serpent_test (void);
   {                                                  \
     BLOCK_XOR (block, subkeys[round]);               \
     round++;                                         \
-    SBOX (which, block, block_tmp, 0);               \
+    SBOX (which, block, block_tmp);                  \
     BLOCK_XOR (block_tmp, subkeys[round]);           \
     round++;                                         \
   }
@@ -557,7 +631,7 @@ static const char *serpent_test (void);
 #define ROUND_INVERSE(which, subkey, block, block_tmp) \
   {                                                    \
     LINEAR_TRANSFORMATION_INVERSE (block);             \
-    SBOX_INVERSE (which, block, block_tmp, 0);         \
+    SBOX_INVERSE (which, block, block_tmp);            \
     BLOCK_XOR (block_tmp, subkey[round]);              \
     round--;                                           \
     BLOCK_COPY (block, block_tmp);                     \
@@ -571,7 +645,7 @@ static const char *serpent_test (void);
   {                                                           \
     BLOCK_XOR (block, subkeys[round]);                        \
     round--;                                                  \
-    SBOX_INVERSE (which, block, block_tmp, 0);                \
+    SBOX_INVERSE (which, block, block_tmp);                   \
     BLOCK_XOR (block_tmp, subkeys[round]);                    \
     round--;                                                  \
   }
@@ -585,14 +659,10 @@ serpent_key_prepare (const byte *key, unsigned int 
key_length,
   int i;
 
   /* Copy key.  */
-  memcpy (key_prepared, key, key_length);
   key_length /= 4;
-#ifdef WORDS_BIGENDIAN
   for (i = 0; i < key_length; i++)
-    key_prepared[i] = byte_swap_32 (key_prepared[i]);
-#else
-  i = key_length;
-#endif
+    key_prepared[i] = buf_get_le32 (key + i * 4);
+
   if (i < 8)
     {
       /* Key must be padded according to the Serpent
@@ -608,76 +678,97 @@ serpent_key_prepare (const byte *key, unsigned int 
key_length,
 static void
 serpent_subkeys_generate (serpent_key_t key, serpent_subkeys_t subkeys)
 {
-  u32 w_real[140];             /* The `prekey'.  */
-  u32 k[132];
-  u32 *w = &w_real[8];
-  int i, j;
+  u32 w[8];            /* The `prekey'.  */
+  u32 ws[4];
+  u32 wt[4];
 
   /* Initialize with key values.  */
-  for (i = 0; i < 8; i++)
-    w[i - 8] = key[i];
+  w[0] = key[0];
+  w[1] = key[1];
+  w[2] = key[2];
+  w[3] = key[3];
+  w[4] = key[4];
+  w[5] = key[5];
+  w[6] = key[6];
+  w[7] = key[7];
 
   /* Expand to intermediate key using the affine recurrence.  */
-  for (i = 0; i < 132; i++)
-    w[i] = rol (w[i - 8] ^ w[i - 5] ^ w[i - 3] ^ w[i - 1] ^ PHI ^ i, 11);
+#define EXPAND_KEY4(wo, r)                                                     
\
+  wo[0] = w[(r+0)%8] =                                                         
\
+    rol (w[(r+0)%8] ^ w[(r+3)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ PHI ^ (r+0), 11); 
\
+  wo[1] = w[(r+1)%8] =                                                         
\
+    rol (w[(r+1)%8] ^ w[(r+4)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ PHI ^ (r+1), 11); 
\
+  wo[2] = w[(r+2)%8] =                                                         
\
+    rol (w[(r+2)%8] ^ w[(r+5)%8] ^ w[(r+7)%8] ^ w[(r+1)%8] ^ PHI ^ (r+2), 11); 
\
+  wo[3] = w[(r+3)%8] =                                                         
\
+    rol (w[(r+3)%8] ^ w[(r+6)%8] ^ w[(r+0)%8] ^ w[(r+2)%8] ^ PHI ^ (r+3), 11);
+
+#define EXPAND_KEY(r)       \
+  EXPAND_KEY4(ws, (r));     \
+  EXPAND_KEY4(wt, (r + 4));
 
   /* Calculate subkeys via S-Boxes, in bitslice mode.  */
-  SBOX (3, w, k,   0);
-  SBOX (2, w, k,   4);
-  SBOX (1, w, k,   8);
-  SBOX (0, w, k,  12);
-  SBOX (7, w, k,  16);
-  SBOX (6, w, k,  20);
-  SBOX (5, w, k,  24);
-  SBOX (4, w, k,  28);
-  SBOX (3, w, k,  32);
-  SBOX (2, w, k,  36);
-  SBOX (1, w, k,  40);
-  SBOX (0, w, k,  44);
-  SBOX (7, w, k,  48);
-  SBOX (6, w, k,  52);
-  SBOX (5, w, k,  56);
-  SBOX (4, w, k,  60);
-  SBOX (3, w, k,  64);
-  SBOX (2, w, k,  68);
-  SBOX (1, w, k,  72);
-  SBOX (0, w, k,  76);
-  SBOX (7, w, k,  80);
-  SBOX (6, w, k,  84);
-  SBOX (5, w, k,  88);
-  SBOX (4, w, k,  92);
-  SBOX (3, w, k,  96);
-  SBOX (2, w, k, 100);
-  SBOX (1, w, k, 104);
-  SBOX (0, w, k, 108);
-  SBOX (7, w, k, 112);
-  SBOX (6, w, k, 116);
-  SBOX (5, w, k, 120);
-  SBOX (4, w, k, 124);
-  SBOX (3, w, k, 128);
-
-  /* Renumber subkeys.  */
-  for (i = 0; i < ROUNDS + 1; i++)
-    for (j = 0; j < 4; j++)
-      subkeys[i][j] = k[4 * i + j];
+  EXPAND_KEY (0); SBOX (3, ws, subkeys[0]); SBOX (2, wt, subkeys[1]);
+  EXPAND_KEY (8); SBOX (1, ws, subkeys[2]); SBOX (0, wt, subkeys[3]);
+  EXPAND_KEY (16); SBOX (7, ws, subkeys[4]); SBOX (6, wt, subkeys[5]);
+  EXPAND_KEY (24); SBOX (5, ws, subkeys[6]); SBOX (4, wt, subkeys[7]);
+  EXPAND_KEY (32); SBOX (3, ws, subkeys[8]); SBOX (2, wt, subkeys[9]);
+  EXPAND_KEY (40); SBOX (1, ws, subkeys[10]); SBOX (0, wt, subkeys[11]);
+  EXPAND_KEY (48); SBOX (7, ws, subkeys[12]); SBOX (6, wt, subkeys[13]);
+  EXPAND_KEY (56); SBOX (5, ws, subkeys[14]); SBOX (4, wt, subkeys[15]);
+  EXPAND_KEY (64); SBOX (3, ws, subkeys[16]); SBOX (2, wt, subkeys[17]);
+  EXPAND_KEY (72); SBOX (1, ws, subkeys[18]); SBOX (0, wt, subkeys[19]);
+  EXPAND_KEY (80); SBOX (7, ws, subkeys[20]); SBOX (6, wt, subkeys[21]);
+  EXPAND_KEY (88); SBOX (5, ws, subkeys[22]); SBOX (4, wt, subkeys[23]);
+  EXPAND_KEY (96); SBOX (3, ws, subkeys[24]); SBOX (2, wt, subkeys[25]);
+  EXPAND_KEY (104); SBOX (1, ws, subkeys[26]); SBOX (0, wt, subkeys[27]);
+  EXPAND_KEY (112); SBOX (7, ws, subkeys[28]); SBOX (6, wt, subkeys[29]);
+  EXPAND_KEY (120); SBOX (5, ws, subkeys[30]); SBOX (4, wt, subkeys[31]);
+  EXPAND_KEY4 (ws, 128); SBOX (3, ws, subkeys[32]);
+
+  wipememory (ws, sizeof (ws));
+  wipememory (wt, sizeof (wt));
+  wipememory (w, sizeof (w));
 }
 
 /* Initialize CONTEXT with the key KEY of KEY_LENGTH bits.  */
-static void
+static gcry_err_code_t
 serpent_setkey_internal (serpent_context_t *context,
                         const byte *key, unsigned int key_length)
 {
   serpent_key_t key_prepared;
 
+  if (key_length > 32)
+    return GPG_ERR_INV_KEYLEN;
+
   serpent_key_prepare (key, key_length, key_prepared);
   serpent_subkeys_generate (key_prepared, context->keys);
-  _gcry_burn_stack (272 * sizeof (u32));
+
+#ifdef USE_AVX2
+  context->use_avx2 = 0;
+  if ((_gcry_get_hw_features () & HWF_INTEL_AVX2))
+    {
+      context->use_avx2 = 1;
+    }
+#endif
+
+#ifdef USE_NEON
+  context->use_neon = 0;
+  if ((_gcry_get_hw_features () & HWF_ARM_NEON))
+    {
+      context->use_neon = 1;
+    }
+#endif
+
+  wipememory (key_prepared, sizeof(key_prepared));
+  return 0;
 }
 
 /* Initialize CTX with the key KEY of KEY_LENGTH bytes.  */
 static gcry_err_code_t
 serpent_setkey (void *ctx,
-               const byte *key, unsigned int key_length)
+               const byte *key, unsigned int key_length,
+                cipher_bulk_ops_t *bulk_ops)
 {
   serpent_context_t *context = ctx;
   static const char *serpent_test_ret;
@@ -687,19 +778,24 @@ serpent_setkey (void *ctx,
   if (! serpent_init_done)
     {
       /* Execute a self-test the first time, Serpent is used.  */
+      serpent_init_done = 1;
       serpent_test_ret = serpent_test ();
       if (serpent_test_ret)
        log_error ("Serpent test failure: %s\n", serpent_test_ret);
-      serpent_init_done = 1;
     }
 
+  /* Setup bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cbc_dec = _gcry_serpent_cbc_dec;
+  bulk_ops->cfb_dec = _gcry_serpent_cfb_dec;
+  bulk_ops->ctr_enc = _gcry_serpent_ctr_enc;
+  bulk_ops->ocb_crypt = _gcry_serpent_ocb_crypt;
+  bulk_ops->ocb_auth  = _gcry_serpent_ocb_auth;
+
   if (serpent_test_ret)
     ret = GPG_ERR_SELFTEST_FAILED;
   else
-    {
-      serpent_setkey_internal (context, key, key_length);
-      _gcry_burn_stack (sizeof (serpent_key_t));
-    }
+    ret = serpent_setkey_internal (context, key, key_length);
 
   return ret;
 }
@@ -711,13 +807,10 @@ serpent_encrypt_internal (serpent_context_t *context,
   serpent_block_t b, b_next;
   int round = 0;
 
-  memcpy (b, input, sizeof (b));
-#ifdef WORDS_BIGENDIAN
-  b[0] = byte_swap_32 (b[0]);
-  b[1] = byte_swap_32 (b[1]);
-  b[2] = byte_swap_32 (b[2]);
-  b[3] = byte_swap_32 (b[3]);
-#endif
+  b[0] = buf_get_le32 (input + 0);
+  b[1] = buf_get_le32 (input + 4);
+  b[2] = buf_get_le32 (input + 8);
+  b[3] = buf_get_le32 (input + 12);
 
   ROUND (0, context->keys, b, b_next);
   ROUND (1, context->keys, b, b_next);
@@ -753,13 +846,10 @@ serpent_encrypt_internal (serpent_context_t *context,
 
   ROUND_LAST (7, context->keys, b, b_next);
 
-#ifdef WORDS_BIGENDIAN
-  b_next[0] = byte_swap_32 (b_next[0]);
-  b_next[1] = byte_swap_32 (b_next[1]);
-  b_next[2] = byte_swap_32 (b_next[2]);
-  b_next[3] = byte_swap_32 (b_next[3]);
-#endif
-  memcpy (output, b_next, sizeof (b_next));
+  buf_put_le32 (output + 0, b_next[0]);
+  buf_put_le32 (output + 4, b_next[1]);
+  buf_put_le32 (output + 8, b_next[2]);
+  buf_put_le32 (output + 12, b_next[3]);
 }
 
 static void
@@ -769,13 +859,10 @@ serpent_decrypt_internal (serpent_context_t *context,
   serpent_block_t b, b_next;
   int round = ROUNDS;
 
-  memcpy (b_next, input, sizeof (b));
-#ifdef WORDS_BIGENDIAN
-  b_next[0] = byte_swap_32 (b_next[0]);
-  b_next[1] = byte_swap_32 (b_next[1]);
-  b_next[2] = byte_swap_32 (b_next[2]);
-  b_next[3] = byte_swap_32 (b_next[3]);
-#endif
+  b_next[0] = buf_get_le32 (input + 0);
+  b_next[1] = buf_get_le32 (input + 4);
+  b_next[2] = buf_get_le32 (input + 8);
+  b_next[3] = buf_get_le32 (input + 12);
 
   ROUND_FIRST_INVERSE (7, context->keys, b_next, b);
 
@@ -811,35 +898,757 @@ serpent_decrypt_internal (serpent_context_t *context,
   ROUND_INVERSE (1, context->keys, b, b_next);
   ROUND_INVERSE (0, context->keys, b, b_next);
 
-#ifdef WORDS_BIGENDIAN
-  b_next[0] = byte_swap_32 (b_next[0]);
-  b_next[1] = byte_swap_32 (b_next[1]);
-  b_next[2] = byte_swap_32 (b_next[2]);
-  b_next[3] = byte_swap_32 (b_next[3]);
-#endif
-  memcpy (output, b_next, sizeof (b_next));
+  buf_put_le32 (output + 0, b_next[0]);
+  buf_put_le32 (output + 4, b_next[1]);
+  buf_put_le32 (output + 8, b_next[2]);
+  buf_put_le32 (output + 12, b_next[3]);
 }
 
-static void
+static unsigned int
 serpent_encrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
 {
   serpent_context_t *context = ctx;
 
   serpent_encrypt_internal (context, buffer_in, buffer_out);
-  _gcry_burn_stack (2 * sizeof (serpent_block_t));
+  return /*burn_stack*/ (2 * sizeof (serpent_block_t));
 }
 
-static void
+static unsigned int
 serpent_decrypt (void *ctx, byte *buffer_out, const byte *buffer_in)
 {
   serpent_context_t *context = ctx;
 
   serpent_decrypt_internal (context, buffer_in, buffer_out);
-  _gcry_burn_stack (2 * sizeof (serpent_block_t));
+  return /*burn_stack*/ (2 * sizeof (serpent_block_t));
+}
+
+
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size sizeof(serpent_block_t). */
+static void
+_gcry_serpent_ctr_enc(void *context, unsigned char *ctr,
+                      void *outbuf_arg, const void *inbuf_arg,
+                      size_t nblocks)
+{
+  serpent_context_t *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[sizeof(serpent_block_t)];
+  int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_serpent_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+          nblocks -= 16;
+          outbuf += 16 * sizeof(serpent_block_t);
+          inbuf  += 16 * sizeof(serpent_block_t);
+          did_use_avx2 = 1;
+        }
+
+      if (did_use_avx2)
+        {
+          /* serpent-avx2 assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+
+      /* Use generic/sse2 code to handle smaller chunks... */
+      /* TODO: use caching instead? */
+    }
+#endif
+
+#ifdef USE_SSE2
+  {
+    int did_use_sse2 = 0;
+
+    /* Process data in 8 block chunks. */
+    while (nblocks >= 8)
+      {
+        _gcry_serpent_sse2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+        nblocks -= 8;
+        outbuf += 8 * sizeof(serpent_block_t);
+        inbuf  += 8 * sizeof(serpent_block_t);
+        did_use_sse2 = 1;
+      }
+
+    if (did_use_sse2)
+      {
+        /* serpent-sse2 assembly code does not use stack */
+        if (nblocks == 0)
+          burn_stack_depth = 0;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+    /* TODO: use caching instead? */
+  }
+#endif
+
+#ifdef USE_NEON
+  if (ctx->use_neon)
+    {
+      int did_use_neon = 0;
+
+      /* Process data in 8 block chunks. */
+      while (nblocks >= 8)
+        {
+          _gcry_serpent_neon_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+          nblocks -= 8;
+          outbuf += 8 * sizeof(serpent_block_t);
+          inbuf  += 8 * sizeof(serpent_block_t);
+          did_use_neon = 1;
+        }
+
+      if (did_use_neon)
+        {
+          /* serpent-neon assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+      /* TODO: use caching instead? */
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* Encrypt the counter. */
+      serpent_encrypt_internal(ctx, ctr, tmpbuf);
+      /* XOR the input with the encrypted counter and store in output.  */
+      cipher_block_xor(outbuf, tmpbuf, inbuf, sizeof(serpent_block_t));
+      outbuf += sizeof(serpent_block_t);
+      inbuf  += sizeof(serpent_block_t);
+      /* Increment the counter.  */
+      cipher_block_add(ctr, 1, sizeof(serpent_block_t));
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_serpent_cbc_dec(void *context, unsigned char *iv,
+                      void *outbuf_arg, const void *inbuf_arg,
+                      size_t nblocks)
+{
+  serpent_context_t *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char savebuf[sizeof(serpent_block_t)];
+  int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_serpent_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * sizeof(serpent_block_t);
+          inbuf  += 16 * sizeof(serpent_block_t);
+          did_use_avx2 = 1;
+        }
+
+      if (did_use_avx2)
+        {
+          /* serpent-avx2 assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+
+      /* Use generic/sse2 code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_SSE2
+  {
+    int did_use_sse2 = 0;
+
+    /* Process data in 8 block chunks. */
+    while (nblocks >= 8)
+      {
+        _gcry_serpent_sse2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 8;
+        outbuf += 8 * sizeof(serpent_block_t);
+        inbuf  += 8 * sizeof(serpent_block_t);
+        did_use_sse2 = 1;
+      }
+
+    if (did_use_sse2)
+      {
+        /* serpent-sse2 assembly code does not use stack */
+        if (nblocks == 0)
+          burn_stack_depth = 0;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#ifdef USE_NEON
+  if (ctx->use_neon)
+    {
+      int did_use_neon = 0;
+
+      /* Process data in 8 block chunks. */
+      while (nblocks >= 8)
+        {
+          _gcry_serpent_neon_cbc_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 8;
+          outbuf += 8 * sizeof(serpent_block_t);
+          inbuf  += 8 * sizeof(serpent_block_t);
+          did_use_neon = 1;
+        }
+
+      if (did_use_neon)
+        {
+          /* serpent-neon assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      serpent_decrypt_internal (ctx, inbuf, savebuf);
+
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf,
+                                sizeof(serpent_block_t));
+      inbuf += sizeof(serpent_block_t);
+      outbuf += sizeof(serpent_block_t);
+    }
+
+  wipememory(savebuf, sizeof(savebuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_serpent_cfb_dec(void *context, unsigned char *iv,
+                      void *outbuf_arg, const void *inbuf_arg,
+                      size_t nblocks)
+{
+  serpent_context_t *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth = 2 * sizeof (serpent_block_t);
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_serpent_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * sizeof(serpent_block_t);
+          inbuf  += 16 * sizeof(serpent_block_t);
+          did_use_avx2 = 1;
+        }
+
+      if (did_use_avx2)
+        {
+          /* serpent-avx2 assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+
+      /* Use generic/sse2 code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_SSE2
+  {
+    int did_use_sse2 = 0;
+
+    /* Process data in 8 block chunks. */
+    while (nblocks >= 8)
+      {
+        _gcry_serpent_sse2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 8;
+        outbuf += 8 * sizeof(serpent_block_t);
+        inbuf  += 8 * sizeof(serpent_block_t);
+        did_use_sse2 = 1;
+      }
+
+    if (did_use_sse2)
+      {
+        /* serpent-sse2 assembly code does not use stack */
+        if (nblocks == 0)
+          burn_stack_depth = 0;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#ifdef USE_NEON
+  if (ctx->use_neon)
+    {
+      int did_use_neon = 0;
+
+      /* Process data in 8 block chunks. */
+      while (nblocks >= 8)
+        {
+          _gcry_serpent_neon_cfb_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 8;
+          outbuf += 8 * sizeof(serpent_block_t);
+          inbuf  += 8 * sizeof(serpent_block_t);
+          did_use_neon = 1;
+        }
+
+      if (did_use_neon)
+        {
+          /* serpent-neon assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      serpent_encrypt_internal(ctx, iv, iv);
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, sizeof(serpent_block_t));
+      outbuf += sizeof(serpent_block_t);
+      inbuf  += sizeof(serpent_block_t);
+    }
+
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_serpent_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                       const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+  serpent_context_t *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth = 2 * sizeof (serpent_block_t);
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+#else
+  (void)c;
+  (void)outbuf_arg;
+  (void)inbuf_arg;
+  (void)encrypt;
+#endif
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             if (encrypt)
+               _gcry_serpent_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+             else
+               _gcry_serpent_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+
+             nblocks -= 16;
+             outbuf += 16 * sizeof(serpent_block_t);
+             inbuf  += 16 * sizeof(serpent_block_t);
+             did_use_avx2 = 1;
+           }
+       }
+
+      if (did_use_avx2)
+       {
+         /* serpent-avx2 assembly code does not use stack */
+         if (nblocks == 0)
+           burn_stack_depth = 0;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_SSE2
+  {
+    int did_use_sse2 = 0;
+    u64 Ls[8];
+    unsigned int n = 8 - (blkn % 8);
+    u64 *l;
+
+    if (nblocks >= 8)
+      {
+       /* Use u64 to store pointers for x32 support (assembly function
+         * assumes 64-bit pointers). */
+       Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+       Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+       Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+       Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       l = &Ls[(7 + n) % 8];
+
+       /* Process data in 8 block chunks. */
+       while (nblocks >= 8)
+         {
+           blkn += 8;
+           *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+           if (encrypt)
+             _gcry_serpent_sse2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+           else
+             _gcry_serpent_sse2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+
+           nblocks -= 8;
+           outbuf += 8 * sizeof(serpent_block_t);
+           inbuf  += 8 * sizeof(serpent_block_t);
+           did_use_sse2 = 1;
+         }
+      }
+
+    if (did_use_sse2)
+      {
+       /* serpent-sse2 assembly code does not use stack */
+       if (nblocks == 0)
+         burn_stack_depth = 0;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#ifdef USE_NEON
+  if (ctx->use_neon)
+    {
+      int did_use_neon = 0;
+      const void *Ls[8];
+      unsigned int n = 8 - (blkn % 8);
+      const void **l;
+
+      if (nblocks >= 8)
+       {
+         Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+         Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+         Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+         Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+         Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+         Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+         Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+         l = &Ls[(7 + n) % 8];
+
+         /* Process data in 8 block chunks. */
+         while (nblocks >= 8)
+           {
+             blkn += 8;
+             *l = ocb_get_l(c,  blkn - blkn % 8);
+
+             if (encrypt)
+               _gcry_serpent_neon_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+             else
+               _gcry_serpent_neon_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+
+             nblocks -= 8;
+             outbuf += 8 * sizeof(serpent_block_t);
+             inbuf  += 8 * sizeof(serpent_block_t);
+             did_use_neon = 1;
+           }
+       }
+
+      if (did_use_neon)
+       {
+         /* serpent-neon assembly code does not use stack */
+         if (nblocks == 0)
+           burn_stack_depth = 0;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+  c->u_mode.ocb.data_nblocks = blkn;
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+  return nblocks;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_serpent_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                       size_t nblocks)
+{
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+  serpent_context_t *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  int burn_stack_depth = 2 * sizeof(serpent_block_t);
+  u64 blkn = c->u_mode.ocb.aad_nblocks;
+#else
+  (void)c;
+  (void)abuf_arg;
+#endif
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             _gcry_serpent_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+                                         c->u_mode.ocb.aad_sum, Ls);
+
+             nblocks -= 16;
+             abuf += 16 * sizeof(serpent_block_t);
+             did_use_avx2 = 1;
+           }
+       }
+
+      if (did_use_avx2)
+       {
+         /* serpent-avx2 assembly code does not use stack */
+         if (nblocks == 0)
+           burn_stack_depth = 0;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#ifdef USE_SSE2
+  {
+    int did_use_sse2 = 0;
+    u64 Ls[8];
+    unsigned int n = 8 - (blkn % 8);
+    u64 *l;
+
+    if (nblocks >= 8)
+      {
+       /* Use u64 to store pointers for x32 support (assembly function
+       * assumes 64-bit pointers). */
+       Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+       Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+       Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+       Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+       l = &Ls[(7 + n) % 8];
+
+       /* Process data in 8 block chunks. */
+       while (nblocks >= 8)
+         {
+           blkn += 8;
+           *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+           _gcry_serpent_sse2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+                                       c->u_mode.ocb.aad_sum, Ls);
+
+           nblocks -= 8;
+           abuf += 8 * sizeof(serpent_block_t);
+           did_use_sse2 = 1;
+         }
+      }
+
+    if (did_use_sse2)
+      {
+       /* serpent-avx2 assembly code does not use stack */
+       if (nblocks == 0)
+         burn_stack_depth = 0;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+#ifdef USE_NEON
+  if (ctx->use_neon)
+    {
+      int did_use_neon = 0;
+      const void *Ls[8];
+      unsigned int n = 8 - (blkn % 8);
+      const void **l;
+
+      if (nblocks >= 8)
+       {
+         Ls[(0 + n) % 8] = c->u_mode.ocb.L[0];
+         Ls[(1 + n) % 8] = c->u_mode.ocb.L[1];
+         Ls[(2 + n) % 8] = c->u_mode.ocb.L[0];
+         Ls[(3 + n) % 8] = c->u_mode.ocb.L[2];
+         Ls[(4 + n) % 8] = c->u_mode.ocb.L[0];
+         Ls[(5 + n) % 8] = c->u_mode.ocb.L[1];
+         Ls[(6 + n) % 8] = c->u_mode.ocb.L[0];
+         l = &Ls[(7 + n) % 8];
+
+         /* Process data in 8 block chunks. */
+         while (nblocks >= 8)
+           {
+             blkn += 8;
+             *l = ocb_get_l(c, blkn - blkn % 8);
+
+             _gcry_serpent_neon_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+                                         c->u_mode.ocb.aad_sum, Ls);
+
+             nblocks -= 8;
+             abuf += 8 * sizeof(serpent_block_t);
+             did_use_neon = 1;
+           }
+       }
+
+      if (did_use_neon)
+       {
+         /* serpent-neon assembly code does not use stack */
+         if (nblocks == 0)
+           burn_stack_depth = 0;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+#if defined(USE_AVX2) || defined(USE_SSE2) || defined(USE_NEON)
+  c->u_mode.ocb.aad_nblocks = blkn;
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#endif
+
+  return nblocks;
 }
 
 
 
+/* Run the self-tests for SERPENT-CTR-128, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+  const int nblocks = 16+8+1;
+  const int blocksize = sizeof(serpent_block_t);
+  const int context_size = sizeof(serpent_context_t);
+
+  return _gcry_selftest_helper_ctr("SERPENT", &serpent_setkey,
+           &serpent_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+  const int nblocks = 16+8+2;
+  const int blocksize = sizeof(serpent_block_t);
+  const int context_size = sizeof(serpent_context_t);
+
+  return _gcry_selftest_helper_cbc("SERPENT", &serpent_setkey,
+           &serpent_encrypt, nblocks, blocksize, context_size);
+}
+
+
+/* Run the self-tests for SERPENT-CBC-128, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+  const int nblocks = 16+8+2;
+  const int blocksize = sizeof(serpent_block_t);
+  const int context_size = sizeof(serpent_context_t);
+
+  return _gcry_selftest_helper_cfb("SERPENT", &serpent_setkey,
+           &serpent_encrypt, nblocks, blocksize, context_size);
+}
+
+
 /* Serpent test.  */
 
 static const char *
@@ -848,6 +1657,7 @@ serpent_test (void)
   serpent_context_t context;
   unsigned char scratch[16];
   unsigned int i;
+  const char *r;
 
   static struct test
   {
@@ -919,35 +1729,83 @@ serpent_test (void)
        }
     }
 
+  if ( (r = selftest_ctr_128 ()) )
+    return r;
+
+  if ( (r = selftest_cbc_128 ()) )
+    return r;
+
+  if ( (r = selftest_cfb_128 ()) )
+    return r;
+
   return NULL;
 }
 
 
+static const gcry_cipher_oid_spec_t serpent128_oids[] =
+  {
+    {"1.3.6.1.4.1.11591.13.2.1", GCRY_CIPHER_MODE_ECB },
+    {"1.3.6.1.4.1.11591.13.2.2", GCRY_CIPHER_MODE_CBC },
+    {"1.3.6.1.4.1.11591.13.2.3", GCRY_CIPHER_MODE_OFB },
+    {"1.3.6.1.4.1.11591.13.2.4", GCRY_CIPHER_MODE_CFB },
+    { NULL }
+  };
+
+static const gcry_cipher_oid_spec_t serpent192_oids[] =
+  {
+    {"1.3.6.1.4.1.11591.13.2.21", GCRY_CIPHER_MODE_ECB },
+    {"1.3.6.1.4.1.11591.13.2.22", GCRY_CIPHER_MODE_CBC },
+    {"1.3.6.1.4.1.11591.13.2.23", GCRY_CIPHER_MODE_OFB },
+    {"1.3.6.1.4.1.11591.13.2.24", GCRY_CIPHER_MODE_CFB },
+    { NULL }
+  };
 
-/* "SERPENT" is an alias for "SERPENT128".  */
-static const char *cipher_spec_serpent128_aliases[] =
+static const gcry_cipher_oid_spec_t serpent256_oids[] =
+  {
+    {"1.3.6.1.4.1.11591.13.2.41", GCRY_CIPHER_MODE_ECB },
+    {"1.3.6.1.4.1.11591.13.2.42", GCRY_CIPHER_MODE_CBC },
+    {"1.3.6.1.4.1.11591.13.2.43", GCRY_CIPHER_MODE_OFB },
+    {"1.3.6.1.4.1.11591.13.2.44", GCRY_CIPHER_MODE_CFB },
+    { NULL }
+  };
+
+static const char *serpent128_aliases[] =
   {
     "SERPENT",
+    "SERPENT-128",
+    NULL
+  };
+static const char *serpent192_aliases[] =
+  {
+    "SERPENT-192",
+    NULL
+  };
+static const char *serpent256_aliases[] =
+  {
+    "SERPENT-256",
     NULL
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_serpent128 =
   {
-    "SERPENT128", cipher_spec_serpent128_aliases, NULL, 16, 128,
+    GCRY_CIPHER_SERPENT128, {0, 0},
+    "SERPENT128", serpent128_aliases, serpent128_oids, 16, 128,
     sizeof (serpent_context_t),
     serpent_setkey, serpent_encrypt, serpent_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_serpent192 =
   {
-    "SERPENT192", NULL, NULL, 16, 192,
+    GCRY_CIPHER_SERPENT192, {0, 0},
+    "SERPENT192", serpent192_aliases, serpent192_oids, 16, 192,
     sizeof (serpent_context_t),
     serpent_setkey, serpent_encrypt, serpent_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_serpent256 =
   {
-    "SERPENT256", NULL, NULL, 16, 256,
+    GCRY_CIPHER_SERPENT256, {0, 0},
+    "SERPENT256", serpent256_aliases, serpent256_oids, 16, 256,
     sizeof (serpent_context_t),
     serpent_setkey, serpent_encrypt, serpent_decrypt
   };
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-armv7-neon.S 
b/grub-core/lib/libgcrypt/cipher/sha1-armv7-neon.S
new file mode 100644
index 000000000..2de678b8a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-armv7-neon.S
@@ -0,0 +1,526 @@
+/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
+ * Copyright (C) 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ *  Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON) && defined(USE_SHA1)
+
+.syntax unified
+.fpu neon
+.arm
+
+.text
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 4
+gcry_sha1_armv7_neon_K_VEC:
+.LK_VEC:
+.LK1:  .long K1, K1, K1, K1
+.LK2:  .long K2, K2, K2, K2
+.LK3:  .long K3, K3, K3, K3
+.LK4:  .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define RSTATE r0
+#define RDATA r1
+#define RNBLKS r2
+#define ROLDSTACK r3
+#define RWK lr
+
+#define _a r4
+#define _b r5
+#define _c r6
+#define _d r7
+#define _e r8
+
+#define RT0 r9
+#define RT1 r10
+#define RT2 r11
+#define RT3 r12
+
+#define W0 q0
+#define W1 q1
+#define W2 q2
+#define W3 q3
+#define W4 q4
+#define W5 q5
+#define W6 q6
+#define W7 q7
+
+#define tmp0 q8
+#define tmp1 q9
+#define tmp2 q10
+#define tmp3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round function macros. */
+
+#define WK_offs(i) (((i) & 15) * 4)
+
+#define 
_R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 \
+       ldr RT3, [sp, WK_offs(i)]; \
+               pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       bic RT0, d, b; \
+       add e, e, a, ror #(32 - 5); \
+       and RT1, c, b; \
+               pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       add RT0, RT0, RT3; \
+       add e, e, RT1; \
+       ror b, #(32 - 30); \
+               pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       add e, e, RT0;
+
+#define 
_R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 \
+       ldr RT3, [sp, WK_offs(i)]; \
+               pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       eor RT0, d, b; \
+       add e, e, a, ror #(32 - 5); \
+       eor RT0, RT0, c; \
+               pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       add e, e, RT3; \
+       ror b, #(32 - 30); \
+               pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       add e, e, RT0; \
+
+#define 
_R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 \
+       ldr RT3, [sp, WK_offs(i)]; \
+               pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       eor RT0, b, c; \
+       and RT1, b, c; \
+       add e, e, a, ror #(32 - 5); \
+               pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       and RT0, RT0, d; \
+       add RT1, RT1, RT3; \
+       add e, e, RT0; \
+       ror b, #(32 - 30); \
+               pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
+       add e, e, RT1;
+
+#define 
_R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 \
+       
_R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define 
_R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
 \
+       
_R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define R(a,b,c,d,e,f,i) \
+       
_R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
+
+#define dummy(...)
+
+
+/* Input expansion macros. */
+
+/********* Precalc macros for rounds 0-15 
*************************************/
+
+#define W_PRECALC_00_15() \
+       add       RWK, sp, #(WK_offs(0));                       \
+       \
+       vld1.32   {tmp0, tmp1}, [RDATA]!;                       \
+       vrev32.8  W0, tmp0;             /* big => little */     \
+       vld1.32   {tmp2, tmp3}, [RDATA]!;                       \
+       vadd.u32  tmp0, W0, curK;                               \
+       vrev32.8  W7, tmp1;             /* big => little */     \
+       vrev32.8  W6, tmp2;             /* big => little */     \
+       vadd.u32  tmp1, W7, curK;                               \
+       vrev32.8  W5, tmp3;             /* big => little */     \
+       vadd.u32  tmp2, W6, curK;                               \
+       vst1.32   {tmp0, tmp1}, [RWK]!;                         \
+       vadd.u32  tmp3, W5, curK;                               \
+       vst1.32   {tmp2, tmp3}, [RWK];                          \
+
+#define WPRECALC_00_15_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vld1.32   {tmp0, tmp1}, [RDATA]!;                       \
+
+#define WPRECALC_00_15_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       add       RWK, sp, #(WK_offs(0));                       \
+
+#define WPRECALC_00_15_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vrev32.8  W0, tmp0;             /* big => little */     \
+
+#define WPRECALC_00_15_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vld1.32   {tmp2, tmp3}, [RDATA]!;                       \
+
+#define WPRECALC_00_15_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vadd.u32  tmp0, W0, curK;                               \
+
+#define WPRECALC_00_15_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vrev32.8  W7, tmp1;             /* big => little */     \
+
+#define WPRECALC_00_15_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vrev32.8  W6, tmp2;             /* big => little */     \
+
+#define WPRECALC_00_15_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vadd.u32  tmp1, W7, curK;                               \
+
+#define WPRECALC_00_15_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vrev32.8  W5, tmp3;             /* big => little */     \
+
+#define WPRECALC_00_15_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vadd.u32  tmp2, W6, curK;                               \
+
+#define WPRECALC_00_15_10(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vst1.32   {tmp0, tmp1}, [RWK]!;                         \
+
+#define WPRECALC_00_15_11(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vadd.u32  tmp3, W5, curK;                               \
+
+#define WPRECALC_00_15_12(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vst1.32   {tmp2, tmp3}, [RWK];                          \
+
+
+/********* Precalc macros for rounds 16-31 
************************************/
+
+#define WPRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor      tmp0, tmp0;                   \
+       vext.8    W, W_m16, W_m12, #8;          \
+
+#define WPRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       add       RWK, sp, #(WK_offs(i));       \
+       vext.8    tmp0, W_m04, tmp0, #4;        \
+
+#define WPRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor      tmp0, tmp0, W_m16;            \
+       veor.32   W, W, W_m08;                  \
+
+#define WPRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor      tmp1, tmp1;                   \
+       veor      W, W, tmp0;                   \
+
+#define WPRECALC_16_31_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vshl.u32  tmp0, W, #1;                  \
+
+#define WPRECALC_16_31_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vext.8    tmp1, tmp1, W, #(16-12);      \
+       vshr.u32  W, W, #31;                    \
+
+#define WPRECALC_16_31_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vorr      tmp0, tmp0, W;                \
+       vshr.u32  W, tmp1, #30;                 \
+
+#define WPRECALC_16_31_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vshl.u32  tmp1, tmp1, #2;               \
+
+#define WPRECALC_16_31_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor      tmp0, tmp0, W;                \
+
+#define WPRECALC_16_31_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor      W, tmp0, tmp1;                \
+
+#define WPRECALC_16_31_10(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vadd.u32  tmp0, W, curK;                \
+
+#define WPRECALC_16_31_11(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vst1.32   {tmp0}, [RWK];
+
+
+/********* Precalc macros for rounds 32-79 
************************************/
+
+#define WPRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor W, W_m28; \
+
+#define WPRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vext.8 tmp0, W_m08, W_m04, #8; \
+
+#define WPRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor W, W_m16; \
+
+#define WPRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       veor W, tmp0; \
+
+#define WPRECALC_32_79_4(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       add RWK, sp, #(WK_offs(i&~3)); \
+
+#define WPRECALC_32_79_5(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vshl.u32 tmp1, W, #2; \
+
+#define WPRECALC_32_79_6(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vshr.u32 tmp0, W, #30; \
+
+#define WPRECALC_32_79_7(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vorr W, tmp0, tmp1; \
+
+#define WPRECALC_32_79_8(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vadd.u32 tmp0, W, curK; \
+
+#define WPRECALC_32_79_9(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28) \
+       vst1.32 {tmp0}, [RWK];
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_armv7_neon (void *ctx, const unsigned char *data,
+ *                                  size_t nblks)
+ */
+.align 3
+.globl _gcry_sha1_transform_armv7_neon
+.type  _gcry_sha1_transform_armv7_neon,%function;
+_gcry_sha1_transform_armv7_neon:
+  /* input:
+   *   r0: ctx, CTX
+   *   r1: data (64*nblks bytes)
+   *   r2: nblks
+   */
+
+  cmp RNBLKS, #0;
+  beq .Ldo_nothing;
+
+  push {r4-r12, lr};
+
+  GET_DATA_POINTER(RT3, .LK_VEC, _a);
+  vpush {q4-q7};
+
+  mov ROLDSTACK, sp;
+
+  /* Align stack. */
+  sub sp, #(16*4);
+  and sp, #(~(16-1));
+
+  vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
+
+  /* Get the values of the chaining variables. */
+  ldm RSTATE, {_a-_e};
+
+  vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
+
+#undef curK
+#define curK qK1
+  /* Precalc 0-15. */
+  W_PRECALC_00_15();
+
+  b .Loop;
+
+.ltorg
+.Loop:
+  /* Transform 0-15 + Precalc 16-31. */
+  _R( _a, _b, _c, _d, _e, F1,  0, WPRECALC_16_31_0, WPRECALC_16_31_1, 
WPRECALC_16_31_2, 16, W4, W5, W6, W7, W0, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1,  1, WPRECALC_16_31_3, WPRECALC_16_31_4, 
WPRECALC_16_31_5, 16, W4, W5, W6, W7, W0, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F1,  2, WPRECALC_16_31_6, WPRECALC_16_31_7, 
WPRECALC_16_31_8, 16, W4, W5, W6, W7, W0, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F1,  3, WPRECALC_16_31_9, 
WPRECALC_16_31_10,WPRECALC_16_31_11,16, W4, W5, W6, W7, W0, _, _, _ );
+
+#undef curK
+#define curK qK2
+  _R( _b, _c, _d, _e, _a, F1,  4, WPRECALC_16_31_0, WPRECALC_16_31_1, 
WPRECALC_16_31_2, 20, W3, W4, W5, W6, W7, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1,  5, WPRECALC_16_31_3, WPRECALC_16_31_4, 
WPRECALC_16_31_5, 20, W3, W4, W5, W6, W7, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1,  6, WPRECALC_16_31_6, WPRECALC_16_31_7, 
WPRECALC_16_31_8, 20, W3, W4, W5, W6, W7, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F1,  7, WPRECALC_16_31_9, 
WPRECALC_16_31_10,WPRECALC_16_31_11,20, W3, W4, W5, W6, W7, _, _, _ );
+
+  _R( _c, _d, _e, _a, _b, F1,  8, WPRECALC_16_31_0, WPRECALC_16_31_1, 
WPRECALC_16_31_2, 24, W2, W3, W4, W5, W6, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F1,  9, WPRECALC_16_31_3, WPRECALC_16_31_4, 
WPRECALC_16_31_5, 24, W2, W3, W4, W5, W6, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1, 10, WPRECALC_16_31_6, WPRECALC_16_31_7, 
WPRECALC_16_31_8, 24, W2, W3, W4, W5, W6, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F1, 11, WPRECALC_16_31_9, 
WPRECALC_16_31_10,WPRECALC_16_31_11,24, W2, W3, W4, W5, W6, _, _, _ );
+
+  _R( _d, _e, _a, _b, _c, F1, 12, WPRECALC_16_31_0, WPRECALC_16_31_1, 
WPRECALC_16_31_2, 28, W1, W2, W3, W4, W5, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F1, 13, WPRECALC_16_31_3, WPRECALC_16_31_4, 
WPRECALC_16_31_5, 28, W1, W2, W3, W4, W5, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F1, 14, WPRECALC_16_31_6, WPRECALC_16_31_7, 
WPRECALC_16_31_8, 28, W1, W2, W3, W4, W5, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F1, 15, WPRECALC_16_31_9, 
WPRECALC_16_31_10,WPRECALC_16_31_11,28, W1, W2, W3, W4, W5, _, _, _ );
+
+  /* Transform 16-63 + Precalc 32-79. */
+  _R( _e, _a, _b, _c, _d, F1, 16, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _d, _e, _a, _b, _c, F1, 17, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _c, _d, _e, _a, _b, F1, 18, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _b, _c, _d, _e, _a, F1, 19, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 32, W0, W1, W2, W3, W4, W5, W6, W7);
+
+  _R( _a, _b, _c, _d, _e, F2, 20, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _e, _a, _b, _c, _d, F2, 21, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _d, _e, _a, _b, _c, F2, 22, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _c, _d, _e, _a, _b, F2, 23, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 36, W7, W0, W1, W2, W3, W4, W5, W6);
+
+#undef curK
+#define curK qK3
+  _R( _b, _c, _d, _e, _a, F2, 24, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _a, _b, _c, _d, _e, F2, 25, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _e, _a, _b, _c, _d, F2, 26, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _d, _e, _a, _b, _c, F2, 27, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 40, W6, W7, W0, W1, W2, W3, W4, W5);
+
+  _R( _c, _d, _e, _a, _b, F2, 28, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _b, _c, _d, _e, _a, F2, 29, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _a, _b, _c, _d, _e, F2, 30, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _e, _a, _b, _c, _d, F2, 31, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 44, W5, W6, W7, W0, W1, W2, W3, W4);
+
+  _R( _d, _e, _a, _b, _c, F2, 32, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _c, _d, _e, _a, _b, F2, 33, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _b, _c, _d, _e, _a, F2, 34, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+  _R( _a, _b, _c, _d, _e, F2, 35, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 48, W4, W5, W6, W7, W0, W1, W2, W3);
+
+  _R( _e, _a, _b, _c, _d, F2, 36, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _d, _e, _a, _b, _c, F2, 37, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _c, _d, _e, _a, _b, F2, 38, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+  _R( _b, _c, _d, _e, _a, F2, 39, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 52, W3, W4, W5, W6, W7, W0, W1, W2);
+
+  _R( _a, _b, _c, _d, _e, F3, 40, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _e, _a, _b, _c, _d, F3, 41, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _d, _e, _a, _b, _c, F3, 42, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+  _R( _c, _d, _e, _a, _b, F3, 43, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 56, W2, W3, W4, W5, W6, W7, W0, W1);
+
+#undef curK
+#define curK qK4
+  _R( _b, _c, _d, _e, _a, F3, 44, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _a, _b, _c, _d, _e, F3, 45, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _e, _a, _b, _c, _d, F3, 46, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+  _R( _d, _e, _a, _b, _c, F3, 47, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 60, W1, W2, W3, W4, W5, W6, W7, W0);
+
+  _R( _c, _d, _e, _a, _b, F3, 48, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _b, _c, _d, _e, _a, F3, 49, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _a, _b, _c, _d, _e, F3, 50, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+  _R( _e, _a, _b, _c, _d, F3, 51, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 64, W0, W1, W2, W3, W4, W5, W6, W7);
+
+  _R( _d, _e, _a, _b, _c, F3, 52, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _c, _d, _e, _a, _b, F3, 53, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _b, _c, _d, _e, _a, F3, 54, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+  _R( _a, _b, _c, _d, _e, F3, 55, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 68, W7, W0, W1, W2, W3, W4, W5, W6);
+
+  _R( _e, _a, _b, _c, _d, F3, 56, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _d, _e, _a, _b, _c, F3, 57, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _c, _d, _e, _a, _b, F3, 58, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+  _R( _b, _c, _d, _e, _a, F3, 59, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 72, W6, W7, W0, W1, W2, W3, W4, W5);
+
+  subs RNBLKS, #1;
+
+  _R( _a, _b, _c, _d, _e, F4, 60, WPRECALC_32_79_0, WPRECALC_32_79_1, 
WPRECALC_32_79_2, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _e, _a, _b, _c, _d, F4, 61, WPRECALC_32_79_3, WPRECALC_32_79_4, 
WPRECALC_32_79_5, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _d, _e, _a, _b, _c, F4, 62, WPRECALC_32_79_6, dummy,            
WPRECALC_32_79_7, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+  _R( _c, _d, _e, _a, _b, F4, 63, WPRECALC_32_79_8, dummy,            
WPRECALC_32_79_9, 76, W5, W6, W7, W0, W1, W2, W3, W4);
+
+  beq .Lend;
+
+  /* Transform 64-79 + Precalc 0-15 of next block. */
+#undef curK
+#define curK qK1
+  _R( _b, _c, _d, _e, _a, F4, 64, WPRECALC_00_15_0, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 65, WPRECALC_00_15_1, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F4, 66, WPRECALC_00_15_2, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F4, 67, WPRECALC_00_15_3, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+
+  _R( _c, _d, _e, _a, _b, F4, 68, dummy,            dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 69, dummy,            dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 70, WPRECALC_00_15_4, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _e, _a, _b, _c, _d, F4, 71, WPRECALC_00_15_5, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+
+  _R( _d, _e, _a, _b, _c, F4, 72, dummy,            dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F4, 73, dummy,            dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 74, WPRECALC_00_15_6, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _a, _b, _c, _d, _e, F4, 75, WPRECALC_00_15_7, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+
+  _R( _e, _a, _b, _c, _d, F4, 76, WPRECALC_00_15_8, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _d, _e, _a, _b, _c, F4, 77, WPRECALC_00_15_9, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _c, _d, _e, _a, _b, F4, 78, WPRECALC_00_15_10, dummy, dummy, _, _, _, _, 
_, _, _, _, _ );
+  _R( _b, _c, _d, _e, _a, F4, 79, WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, 
_, _, _, _, _, _, _, _, _ );
+
+  /* Update the chaining variables. */
+  ldm RSTATE, {RT0-RT3};
+  add _a, RT0;
+  ldr RT0, [RSTATE, #state_h4];
+  add _b, RT1;
+  add _c, RT2;
+  add _d, RT3;
+  add _e, RT0;
+  stm RSTATE, {_a-_e};
+
+  b .Loop;
+
+.ltorg
+.Lend:
+  /* Transform 64-79 + Clear XMM registers. */
+  R( _b, _c, _d, _e, _a, F4, 64 );
+  R( _a, _b, _c, _d, _e, F4, 65 ); CLEAR_REG(tmp0);
+  R( _e, _a, _b, _c, _d, F4, 66 ); CLEAR_REG(tmp1);
+  R( _d, _e, _a, _b, _c, F4, 67 ); CLEAR_REG(W0);
+  R( _c, _d, _e, _a, _b, F4, 68 ); CLEAR_REG(W1);
+  R( _b, _c, _d, _e, _a, F4, 69 ); CLEAR_REG(W2);
+  R( _a, _b, _c, _d, _e, F4, 70 ); CLEAR_REG(W3);
+  R( _e, _a, _b, _c, _d, F4, 71 ); CLEAR_REG(W4);
+  R( _d, _e, _a, _b, _c, F4, 72 ); CLEAR_REG(W5);
+  R( _c, _d, _e, _a, _b, F4, 73 ); CLEAR_REG(W6);
+  R( _b, _c, _d, _e, _a, F4, 74 ); CLEAR_REG(W7);
+  R( _a, _b, _c, _d, _e, F4, 75 );
+  R( _e, _a, _b, _c, _d, F4, 76 );
+  R( _d, _e, _a, _b, _c, F4, 77 );
+  R( _c, _d, _e, _a, _b, F4, 78 );
+  R( _b, _c, _d, _e, _a, F4, 79 );
+
+  mov sp, ROLDSTACK;
+
+  /* Update the chaining variables. */
+  ldm RSTATE, {RT0-RT3};
+  add _a, RT0;
+  ldr RT0, [RSTATE, #state_h4];
+  add _b, RT1;
+  add _c, RT2;
+  add _d, RT3;
+  vpop {q4-q7};
+  add _e, RT0;
+  stm RSTATE, {_a-_e};
+
+  /* burn_stack */
+  mov r0, #(16*4 + 16*4 + 15);
+
+  pop {r4-r12, pc};
+
+.Ldo_nothing:
+  mov r0, #0;
+  bx lr
+.size _gcry_sha1_transform_armv7_neon,.-_gcry_sha1_transform_armv7_neon;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch32-ce.S 
b/grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch32-ce.S
new file mode 100644
index 000000000..059b9a858
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch32-ce.S
@@ -0,0 +1,220 @@
+/* sha1-armv8-aarch32-ce.S - ARM/CE accelerated SHA-1 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) && defined(USE_SHA1)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 4
+gcry_sha1_aarch32_ce_K_VEC:
+.LK_VEC:
+.LK1:  .long K1, K1, K1, K1
+.LK2:  .long K2, K2, K2, K2
+.LK3:  .long K3, K3, K3, K3
+.LK4:  .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define qH4    q0
+#define sH4    s0
+#define qH0123 q1
+
+#define qABCD q2
+#define qE0   q3
+#define qE1   q4
+
+#define qT0   q5
+#define qT1   q6
+
+#define qW0 q8
+#define qW1 q9
+#define qW2 q10
+#define qW3 q11
+
+#define qK1 q12
+#define qK2 q13
+#define qK3 q14
+#define qK4 q15
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+#define do_add(dst, src0, src1) vadd.u32 dst, src0, src1;
+#define do_sha1su0(w0,w1,w2) sha1su0.32 w0,w1,w2;
+#define do_sha1su1(w0,w3) sha1su1.32 w0,w3;
+
+#define do_rounds(f, e0, e1, t, k, w0, w1, w2, w3, add_fn, sha1su0_fn, 
sha1su1_fn) \
+        sha1su1_fn( w3, w2     ); \
+        sha1h.32    e0, qABCD; \
+        sha1##f.32  qABCD, e1, t; \
+        add_fn(     t, w2, k   ); \
+        sha1su0_fn( w0, w1, w2 );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
+
+/*
+ * unsigned int
+ * _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+ *                                size_t nblks)
+ */
+.align 3
+.globl _gcry_sha1_transform_armv8_ce
+.type  _gcry_sha1_transform_armv8_ce,%function;
+_gcry_sha1_transform_armv8_ce:
+  /* input:
+   *   r0: ctx, CTX
+   *   r1: data (64*nblks bytes)
+   *   r2: nblks
+   */
+
+  cmp r2, #0;
+  push {r4,lr};
+  beq .Ldo_nothing;
+
+  vpush {q4-q7};
+
+  GET_DATA_POINTER(r4, .LK_VEC, lr);
+
+  veor qH4, qH4
+  vld1.32 {qH0123}, [r0]    /* load h0,h1,h2,h3 */
+
+  vld1.32 {qK1-qK2}, [r4]!  /* load K1,K2 */
+  vldr sH4, [r0, #16]       /* load h4 */
+  vld1.32 {qK3-qK4}, [r4]   /* load K3,K4 */
+
+  vld1.8 {qW0-qW1}, [r1]!
+  vmov qABCD, qH0123
+  vld1.8 {qW2-qW3}, [r1]!
+
+  vrev32.8 qW0, qW0
+  vrev32.8 qW1, qW1
+  vrev32.8 qW2, qW2
+  do_add(qT0, qW0, qK1)
+  vrev32.8 qW3, qW3
+  do_add(qT1, qW1, qK1)
+
+.Loop:
+  do_rounds(c, qE1, qH4, qT0, qK1, qW0, qW1, qW2, qW3, do_add, do_sha1su0, _)
+  subs r2, r2, #1
+  do_rounds(c, qE0, qE1, qT1, qK1, qW1, qW2, qW3, qW0, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(c, qE1, qE0, qT0, qK1, qW2, qW3, qW0, qW1, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(c, qE0, qE1, qT1, qK2, qW3, qW0, qW1, qW2, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(c, qE1, qE0, qT0, qK2, qW0, qW1, qW2, qW3, do_add, do_sha1su0, 
do_sha1su1)
+
+  do_rounds(p, qE0, qE1, qT1, qK2, qW1, qW2, qW3, qW0, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, qE1, qE0, qT0, qK2, qW2, qW3, qW0, qW1, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, qE0, qE1, qT1, qK2, qW3, qW0, qW1, qW2, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, qE1, qE0, qT0, qK3, qW0, qW1, qW2, qW3, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, qE0, qE1, qT1, qK3, qW1, qW2, qW3, qW0, do_add, do_sha1su0, 
do_sha1su1)
+
+  do_rounds(m, qE1, qE0, qT0, qK3, qW2, qW3, qW0, qW1, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, qE0, qE1, qT1, qK3, qW3, qW0, qW1, qW2, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, qE1, qE0, qT0, qK3, qW0, qW1, qW2, qW3, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, qE0, qE1, qT1, qK4, qW1, qW2, qW3, qW0, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, qE1, qE0, qT0, qK4, qW2, qW3, qW0, qW1, do_add, do_sha1su0, 
do_sha1su1)
+
+  do_rounds(p, qE0, qE1, qT1, qK4, qW3, qW0, qW1, qW2, do_add, do_sha1su0, 
do_sha1su1)
+  beq .Lend
+
+  vld1.8 {qW0-qW1}, [r1]! /* preload */
+  do_rounds(p, qE1, qE0, qT0, qK4, _  , _  , qW2, qW3, do_add, _, do_sha1su1)
+  vrev32.8 qW0, qW0
+  vld1.8 {qW2}, [r1]!
+  vrev32.8 qW1, qW1
+  do_rounds(p, qE0, qE1, qT1, qK4, _  , _  , qW3, _  , do_add, _, _)
+  vld1.8 {qW3}, [r1]!
+  vrev32.8 qW2, qW2
+  do_rounds(p, qE1, qE0, qT0, _, _, _, _, _, _, _, _)
+  vrev32.8 qW3, qW3
+  do_rounds(p, qE0, qE1, qT1, _, _, _, _, _, _, _, _)
+
+  do_add(qT0, qW0, qK1)
+  vadd.u32 qH4, qE0
+  vadd.u32 qABCD, qH0123
+  do_add(qT1, qW1, qK1)
+
+  vmov qH0123, qABCD
+
+  b .Loop
+
+.Lend:
+  do_rounds(p, qE1, qE0, qT0, qK4, _  , _  , qW2, qW3, do_add, _, do_sha1su1)
+  do_rounds(p, qE0, qE1, qT1, qK4, _  , _  , qW3, _  , do_add, _, _)
+  do_rounds(p, qE1, qE0, qT0, _, _, _, _, _, _, _, _)
+  do_rounds(p, qE0, qE1, qT1, _, _, _, _, _, _, _, _)
+
+  vadd.u32 qH4, qE0
+  vadd.u32 qH0123, qABCD
+
+  CLEAR_REG(qW0)
+  CLEAR_REG(qW1)
+  CLEAR_REG(qW2)
+  CLEAR_REG(qW3)
+  CLEAR_REG(qABCD)
+  CLEAR_REG(qE1)
+  CLEAR_REG(qE0)
+
+  vstr sH4, [r0, #16]    /* store h4 */
+  vst1.32 {qH0123}, [r0] /* store h0,h1,h2,h3 */
+
+  CLEAR_REG(qH0123)
+  CLEAR_REG(qH4)
+  vpop {q4-q7}
+
+.Ldo_nothing:
+  mov r0, #0
+  pop {r4,pc}
+.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch64-ce.S 
b/grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch64-ce.S
new file mode 100644
index 000000000..ea26564b0
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-armv8-aarch64-ce.S
@@ -0,0 +1,201 @@
+/* sha1-armv8-aarch64-ce.S - ARM/CE accelerated SHA-1 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA1)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Constants */
+
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 4
+gcry_sha1_aarch64_ce_K_VEC:
+.LK_VEC:
+.LK1:  .long K1, K1, K1, K1
+.LK2:  .long K2, K2, K2, K2
+.LK3:  .long K3, K3, K3, K3
+.LK4:  .long K4, K4, K4, K4
+
+
+/* Register macros */
+
+#define sH4    s0
+#define vH4    v0
+#define vH0123 v1
+
+#define qABCD q2
+#define sABCD s2
+#define vABCD v2
+#define sE0   s3
+#define vE0   v3
+#define sE1   s4
+#define vE1   v4
+
+#define vT0   v5
+#define vT1   v6
+
+#define vW0 v16
+#define vW1 v17
+#define vW2 v18
+#define vW3 v19
+
+#define vK1 v20
+#define vK2 v21
+#define vK3 v22
+#define vK4 v23
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+#define do_add(dst, src0, src1) add dst.4s, src0.4s, src1.4s;
+#define do_sha1su0(w0,w1,w2) sha1su0 w0.4s,w1.4s,w2.4s;
+#define do_sha1su1(w0,w3) sha1su1 w0.4s,w3.4s;
+
+#define do_rounds(f, e0, e1, t, k, w0, w1, w2, w3, add_fn, sha1su0_fn, 
sha1su1_fn) \
+        sha1su1_fn( v##w3, v##w2     ); \
+        sha1h       e0, sABCD; \
+        sha1##f     qABCD, e1, v##t.4s; \
+        add_fn(     v##t, v##w2, v##k   ); \
+        sha1su0_fn( v##w0, v##w1, v##w2 );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) movi reg.16b, #0;
+
+
+/*
+ * unsigned int
+ * _gcry_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+ *                                size_t nblks)
+ */
+.align 3
+.globl _gcry_sha1_transform_armv8_ce
+ELF(.type  _gcry_sha1_transform_armv8_ce,%function;)
+_gcry_sha1_transform_armv8_ce:
+  /* input:
+   *   x0: ctx, CTX
+   *   x1: data (64*nblks bytes)
+   *   x2: nblks
+   */
+  CFI_STARTPROC();
+
+  cbz x2, .Ldo_nothing;
+
+  GET_DATA_POINTER(x4, .LK_VEC);
+
+  ld1 {vH0123.4s}, [x0]     /* load h0,h1,h2,h3 */
+  ld1 {vK1.4s-vK4.4s}, [x4] /* load K1,K2,K3,K4 */
+  ldr sH4, [x0, #16]        /* load h4 */
+
+  ld1 {vW0.16b-vW3.16b}, [x1], #64
+  mov vABCD.16b, vH0123.16b
+
+  rev32 vW0.16b, vW0.16b
+  rev32 vW1.16b, vW1.16b
+  rev32 vW2.16b, vW2.16b
+  do_add(vT0, vW0, vK1)
+  rev32 vW3.16b, vW3.16b
+  do_add(vT1, vW1, vK1)
+
+.Loop:
+  do_rounds(c, sE1, sH4, T0, K1, W0, W1, W2, W3, do_add, do_sha1su0, _)
+  sub x2, x2, #1
+  do_rounds(c, sE0, sE1, T1, K1, W1, W2, W3, W0, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(c, sE1, sE0, T0, K1, W2, W3, W0, W1, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(c, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(c, sE1, sE0, T0, K2, W0, W1, W2, W3, do_add, do_sha1su0, 
do_sha1su1)
+
+  do_rounds(p, sE0, sE1, T1, K2, W1, W2, W3, W0, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, sE1, sE0, T0, K2, W2, W3, W0, W1, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, sE0, sE1, T1, K2, W3, W0, W1, W2, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(p, sE0, sE1, T1, K3, W1, W2, W3, W0, do_add, do_sha1su0, 
do_sha1su1)
+
+  do_rounds(m, sE1, sE0, T0, K3, W2, W3, W0, W1, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, sE0, sE1, T1, K3, W3, W0, W1, W2, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, sE1, sE0, T0, K3, W0, W1, W2, W3, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, sE0, sE1, T1, K4, W1, W2, W3, W0, do_add, do_sha1su0, 
do_sha1su1)
+  do_rounds(m, sE1, sE0, T0, K4, W2, W3, W0, W1, do_add, do_sha1su0, 
do_sha1su1)
+
+  do_rounds(p, sE0, sE1, T1, K4, W3, W0, W1, W2, do_add, do_sha1su0, 
do_sha1su1)
+  cbz x2, .Lend
+
+  ld1 {vW0.16b-vW1.16b}, [x1], #32 /* preload */
+  do_rounds(p, sE1, sE0, T0, K4, _  , _  , W2, W3, do_add, _, do_sha1su1)
+  rev32 vW0.16b, vW0.16b
+  ld1 {vW2.16b}, [x1], #16
+  rev32 vW1.16b, vW1.16b
+  do_rounds(p, sE0, sE1, T1, K4, _  , _  , W3, _  , do_add, _, _)
+  ld1 {vW3.16b}, [x1], #16
+  rev32 vW2.16b, vW2.16b
+  do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _)
+  rev32 vW3.16b, vW3.16b
+  do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _)
+
+  do_add(vT0, vW0, vK1)
+  add vH4.2s, vH4.2s, vE0.2s
+  add vABCD.4s, vABCD.4s, vH0123.4s
+  do_add(vT1, vW1, vK1)
+
+  mov vH0123.16b, vABCD.16b
+
+  b .Loop
+
+.Lend:
+  do_rounds(p, sE1, sE0, T0, K4, _  , _  , W2, W3, do_add, _, do_sha1su1)
+  do_rounds(p, sE0, sE1, T1, K4, _  , _  , W3, _  , do_add, _, _)
+  do_rounds(p, sE1, sE0, T0, _, _, _, _, _, _, _, _)
+  do_rounds(p, sE0, sE1, T1, _, _, _, _, _, _, _, _)
+
+  add vH4.2s, vH4.2s, vE0.2s
+  add vH0123.4s, vH0123.4s, vABCD.4s
+
+  CLEAR_REG(vW0)
+  CLEAR_REG(vW1)
+  CLEAR_REG(vW2)
+  CLEAR_REG(vW3)
+  CLEAR_REG(vABCD)
+  CLEAR_REG(vE1)
+  CLEAR_REG(vE0)
+
+  str sH4, [x0, #16]    /* store h4 */
+  st1 {vH0123.4s}, [x0] /* store h0,h1,h2,h3 */
+
+  CLEAR_REG(vH0123)
+  CLEAR_REG(vH4)
+
+.Ldo_nothing:
+  mov x0, #0
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_armv8_ce,.-_gcry_sha1_transform_armv8_ce;)
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-avx-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha1-avx-amd64.S
new file mode 100644
index 000000000..acada9607
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-avx-amd64.S
@@ -0,0 +1,429 @@
+/* sha1-avx-amd64.S - Intel AVX accelerated SHA-1 transform function
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ *  Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ *  "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ *  
http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+.text
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 16
+.LK_XMM:
+.LK1:  .long K1, K1, K1, K1
+.LK2:  .long K2, K2, K2, K2
+.LK3:  .long K3, K3, K3, K3
+.LK4:  .long K4, K4, K4, K4
+
+.Lbswap_shufb_ctl:
+       .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %eax
+#define b %ebx
+#define c %ecx
+#define d %edx
+#define e %edi
+
+#define RT0 %esi
+#define RT1 %ebp
+
+#define Wtmp0 %xmm0
+#define Wtmp1 %xmm1
+
+#define W0 %xmm2
+#define W1 %xmm3
+#define W2 %xmm4
+#define W3 %xmm5
+#define W4 %xmm6
+#define W5 %xmm7
+#define W6 %xmm8
+#define W7 %xmm9
+
+#define BSWAP_REG %xmm10
+
+
+/* Round function macros. */
+
+#define WK(i) (((i) & 15) * 4)(%rsp)
+
+#define R_F1(a,b,c,d,e,i) \
+       movl c, RT0; \
+       addl WK(i), e; \
+       xorl d, RT0; \
+       movl a, RT1; \
+       andl b, RT0; \
+       shldl $30, b, b; \
+       xorl d, RT0; \
+       leal (RT0,e), e; \
+       shldl $5, RT1, RT1; \
+       addl RT1, e;
+
+#define R_F2(a,b,c,d,e,i) \
+       movl c, RT0; \
+       addl WK(i), e; \
+       xorl b, RT0; \
+       shldl $30, b, b; \
+       xorl d, RT0; \
+       movl a, RT1; \
+       leal (RT0,e), e; \
+       shldl $5, RT1, RT1; \
+       addl RT1, e;
+
+#define R_F3(a,b,c,d,e,i) \
+       movl c, RT0; \
+       movl b, RT1; \
+       xorl b, RT0; \
+       andl c, RT1; \
+       andl d, RT0; \
+       addl RT1, e; \
+       addl WK(i), e; \
+       shldl $30, b, b; \
+       movl a, RT1; \
+       leal (RT0,e), e; \
+       shldl $5, RT1, RT1; \
+       addl RT1, e;
+
+#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i)
+
+#define R(a,b,c,d,e,f,i) \
+       R_##f(a,b,c,d,e,i)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+       vmovdqu (4*(i))(RDATA), tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+       vpshufb BSWAP_REG, tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0) \
+       vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+       vmovdqa tmp0, WK(i&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpalignr $8, W_m16, W_m12, W; \
+       vpsrldq $4, W_m04, tmp0; \
+       vpxor W_m08, W, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpxor W_m16, tmp0, tmp0; \
+       vpxor tmp0, W, W; \
+       vpslld $1, W, tmp0; \
+       vpslldq $12, W, tmp1; \
+       vpsrld $31, W, W;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpor W, tmp0, tmp0; \
+       vpsrld $30, tmp1, W; \
+       vpslld $2, tmp1, tmp1;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpxor W, tmp0, tmp0; \
+       vpxor tmp1, tmp0, W; \
+       vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \
+       vmovdqa tmp0, WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpxor W_m28, W, W; \
+       vpalignr $8, W_m08, W_m04, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpxor W_m16, W, W; \
+       vpxor tmp0, W, W;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpsrld $30, W, tmp0; \
+       vpslld $2, W, W;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpor W, tmp0, W; \
+       vpaddd (.LK_XMM + ((i)/20)*16) rRIP, W, tmp0; \
+       vmovdqa tmp0, WK((i)&~3);
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_avx (void *ctx, const unsigned char *data,
+ *                                  size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_avx
+ELF(.type _gcry_sha1_transform_amd64_avx,@function)
+.align 16
+_gcry_sha1_transform_amd64_avx:
+  /* input:
+   *   %rdi: ctx, CTX
+   *   %rsi: data (64*nblks bytes)
+   *   %rdx: nblks
+   */
+  CFI_STARTPROC();
+
+  xorl %eax, %eax;
+  cmpq $0, %rdx;
+  jz .Lret;
+
+  vzeroupper;
+
+  movq %rdx, RNBLKS;
+  movq %rdi, RSTATE;
+  movq %rsi, RDATA;
+  pushq %rbx;
+  CFI_PUSH(%rbx);
+  pushq %rbp;
+  CFI_PUSH(%rbp);
+
+  movq %rsp, ROLDSTACK;
+  CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+  subq $(16*4), %rsp;
+  andq $(~31), %rsp;
+
+  /* Get the values of the chaining variables. */
+  movl state_h0(RSTATE), a;
+  movl state_h1(RSTATE), b;
+  movl state_h2(RSTATE), c;
+  movl state_h3(RSTATE), d;
+  movl state_h4(RSTATE), e;
+
+  vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+
+  /* Precalc 0-15. */
+  W_PRECALC_00_15_0(0, W0, Wtmp0);
+  W_PRECALC_00_15_1(1, W0, Wtmp0);
+  W_PRECALC_00_15_2(2, W0, Wtmp0);
+  W_PRECALC_00_15_3(3, W0, Wtmp0);
+  W_PRECALC_00_15_0(4, W7, Wtmp0);
+  W_PRECALC_00_15_1(5, W7, Wtmp0);
+  W_PRECALC_00_15_2(6, W7, Wtmp0);
+  W_PRECALC_00_15_3(7, W7, Wtmp0);
+  W_PRECALC_00_15_0(8, W6, Wtmp0);
+  W_PRECALC_00_15_1(9, W6, Wtmp0);
+  W_PRECALC_00_15_2(10, W6, Wtmp0);
+  W_PRECALC_00_15_3(11, W6, Wtmp0);
+  W_PRECALC_00_15_0(12, W5, Wtmp0);
+  W_PRECALC_00_15_1(13, W5, Wtmp0);
+  W_PRECALC_00_15_2(14, W5, Wtmp0);
+  W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+.align 8
+.Loop:
+  addq $64, RDATA;
+
+  /* Transform 0-15 + Precalc 16-31. */
+  R( a, b, c, d, e, F1,  0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1,  1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1,  2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1,  3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1,  4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1,  5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1,  6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1,  7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1,  8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1,  9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+
+  /* Transform 16-63 + Precalc 32-79. */
+  R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+
+  decq RNBLKS;
+  jz .Lend;
+
+  /* Transform 64-79 + Precalc 0-15 of next block. */
+  R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+  R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+  R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0);
+  R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+  R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+  R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+  R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0);
+  R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+  R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+  R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+  R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0);
+  R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+  R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+  R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+  R( c, d, e, a, b, F4, 78 );
+  addl state_h0(RSTATE), a;   W_PRECALC_00_15_2(14, W5, Wtmp0);
+  R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  jmp .Loop;
+
+.align 16
+.Lend:
+  vzeroall;
+
+  /* Transform 64-79 + burn stack */
+  R( b, c, d, e, a, F4, 64 );
+  R( a, b, c, d, e, F4, 65 );
+  R( e, a, b, c, d, F4, 66 );
+  R( d, e, a, b, c, F4, 67 );
+  R( c, d, e, a, b, F4, 68 );
+  R( b, c, d, e, a, F4, 69 );
+  R( a, b, c, d, e, F4, 70 );
+  R( e, a, b, c, d, F4, 71 );
+  R( d, e, a, b, c, F4, 72 );
+  R( c, d, e, a, b, F4, 73 );
+  R( b, c, d, e, a, F4, 74 );
+  R( a, b, c, d, e, F4, 75 );
+  R( e, a, b, c, d, F4, 76 ); vmovdqa %xmm0, (0*16)(%rsp);
+  R( d, e, a, b, c, F4, 77 ); vmovdqa %xmm0, (1*16)(%rsp);
+  R( c, d, e, a, b, F4, 78 ); vmovdqa %xmm0, (2*16)(%rsp);
+  addl state_h0(RSTATE), a;
+  R( b, c, d, e, a, F4, 79 );
+
+  /* 16*4/16-1 = 3 */
+  vmovdqa %xmm0, (3*16)(%rsp);
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  movq ROLDSTACK, %rsp;
+  CFI_REGISTER(ROLDSTACK, %rsp);
+  CFI_DEF_CFA_REGISTER(%rsp);
+
+  popq %rbp;
+  CFI_POP(%rbp);
+  popq %rbx;
+  CFI_POP(%rbx);
+
+  /* stack already burned */
+  xorl %eax, %eax;
+
+.Lret:
+  ret_spec_stop;
+  CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_avx,
+    .-_gcry_sha1_transform_amd64_avx;)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-avx-bmi2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha1-avx-bmi2-amd64.S
new file mode 100644
index 000000000..5f4b9e697
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-avx-bmi2-amd64.S
@@ -0,0 +1,441 @@
+/* sha1-avx-bmi2-amd64.S - Intel AVX/BMI2 accelerated SHA-1 transform function
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ *  Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ *  "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ *  
http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+.text
+.align 16
+.Lbswap_shufb_ctl:
+       .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+.LK1:  .long 0x5A827999
+.LK2:  .long 0x6ED9EBA1
+.LK3:  .long 0x8F1BBCDC
+.LK4:  .long 0xCA62C1D6
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %esi
+#define b %edi
+#define c %ebp
+#define d %edx
+#define e %ecx
+#define ne %ebx
+
+#define RT0 %eax
+#define RT1 %r12d
+
+#define Wtmp0 %xmm0
+#define Wtmp1 %xmm1
+
+#define W0 %xmm2
+#define W1 %xmm3
+#define W2 %xmm4
+#define W3 %xmm5
+#define W4 %xmm6
+#define W5 %xmm7
+#define W6 %xmm8
+#define W7 %xmm9
+
+#define BSWAP_REG %xmm10
+
+#define K1 %xmm11
+#define K2 %xmm12
+#define K3 %xmm13
+#define K4 %xmm14
+
+
+/* Round function macros. */
+
+#define WK(i) (((i) & 15) * 4)(%rsp)
+
+#define R_F1(a,b,c,d,e,i) \
+       movl c, RT0; \
+       andn d, b, RT1; \
+       addl WK(i), e; \
+       andl b, RT0; \
+       rorxl $2, b, b; \
+       addl RT1, e; \
+       addl ne, a; \
+       leal (RT0,e), ne; \
+       rorxl $27, a, e;
+
+#define R_F2(a,b,c,d,e,i) \
+       movl c, RT0; \
+       addl WK(i), e; \
+       xorl b, RT0; \
+       rorxl $2, b, b; \
+       xorl d, RT0; \
+       addl ne, a; \
+       leal (RT0,e), ne; \
+       rorxl $27, a, e;
+
+#define R_F3(a,b,c,d,e,i) \
+       movl c, RT0; \
+       movl b, RT1; \
+       addl WK(i), e; \
+       xorl b, RT0; \
+       andl c, RT1; \
+       andl d, RT0; \
+       addl RT1, e; \
+       rorxl $2, b, b; \
+       addl ne, a; \
+       leal (RT0,e), ne; \
+       rorxl $27, a, e;
+
+#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i)
+
+#define R(a,b,c,d,e,f,i) \
+       R_##f(a,b,c,d,e,i)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+       vmovdqu (4*(i))(RDATA), tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+       vpshufb BSWAP_REG, tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0, K) \
+       vpaddd K, W, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+       vmovdqa tmp0, WK(i&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpalignr $8, W_m16, W_m12, W; \
+       vpsrldq $4, W_m04, tmp0; \
+       vpxor W_m08, W, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpxor W_m16, tmp0, tmp0; \
+       vpxor tmp0, W, W; \
+       vpslld $1, W, tmp0; \
+       vpslldq $12, W, tmp1; \
+       vpsrld $31, W, W;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpor W, tmp0, tmp0; \
+       vpsrld $30, tmp1, W; \
+       vpslld $2, tmp1, tmp1;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1, K) \
+       vpxor W, tmp0, tmp0; \
+       vpxor tmp1, tmp0, W; \
+       vpaddd K, W, tmp0; \
+       vmovdqa tmp0, WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpxor W_m28, W, W; \
+       vpalignr $8, W_m08, W_m04, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpxor W_m16, W, W; \
+       vpxor tmp0, W, W;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpsrld $30, W, tmp0; \
+       vpslld $2, W, W;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0, K) \
+       vpor W, tmp0, W; \
+       vpaddd K, W, tmp0; \
+       vmovdqa tmp0, WK((i)&~3);
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
+ *                                      size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_avx_bmi2
+ELF(.type _gcry_sha1_transform_amd64_avx_bmi2,@function)
+.align 16
+_gcry_sha1_transform_amd64_avx_bmi2:
+  /* input:
+   *   %rdi: ctx, CTX
+   *   %rsi: data (64*nblks bytes)
+   *   %rdx: nblks
+   */
+  CFI_STARTPROC();
+
+  xorl %eax, %eax;
+  cmpq $0, %rdx;
+  jz .Lret;
+
+  vzeroupper;
+
+  movq %rdx, RNBLKS;
+  movq %rdi, RSTATE;
+  movq %rsi, RDATA;
+  pushq %rbx;
+  CFI_PUSH(%rbx);
+  pushq %rbp;
+  CFI_PUSH(%rbp);
+  pushq %r12;
+  CFI_PUSH(%r12);
+
+  movq %rsp, ROLDSTACK;
+  CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+  subq $(16*4), %rsp;
+  andq $(~31), %rsp;
+
+  /* Get the values of the chaining variables. */
+  movl state_h0(RSTATE), a;
+  movl state_h1(RSTATE), b;
+  movl state_h2(RSTATE), c;
+  movl state_h3(RSTATE), d;
+  movl state_h4(RSTATE), e;
+  xorl ne, ne;
+
+  vmovdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+  vpbroadcastd .LK1 rRIP, K1;
+  vpbroadcastd .LK2 rRIP, K2;
+  vpbroadcastd .LK3 rRIP, K3;
+  vpbroadcastd .LK4 rRIP, K4;
+
+  /* Precalc 0-15. */
+  W_PRECALC_00_15_0(0, W0, Wtmp0);
+  W_PRECALC_00_15_1(1, W0, Wtmp0);
+  W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+  W_PRECALC_00_15_3(3, W0, Wtmp0);
+  W_PRECALC_00_15_0(4, W7, Wtmp0);
+  W_PRECALC_00_15_1(5, W7, Wtmp0);
+  W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+  W_PRECALC_00_15_3(7, W7, Wtmp0);
+  W_PRECALC_00_15_0(8, W6, Wtmp0);
+  W_PRECALC_00_15_1(9, W6, Wtmp0);
+  W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+  W_PRECALC_00_15_3(11, W6, Wtmp0);
+  W_PRECALC_00_15_0(12, W5, Wtmp0);
+  W_PRECALC_00_15_1(13, W5, Wtmp0);
+  W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+  W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+.align 8
+.Loop:
+  addq $64, RDATA;
+
+  /* Transform 0-15 + Precalc 16-31. */
+  R( a, b, c, d, e, F1,  0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1,  1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1,  2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1,  3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1, K1);
+  R( b, c, d, e, a, F1,  4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1,  5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1,  6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1,  7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1, K2);
+  R( c, d, e, a, b, F1,  8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1,  9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1, K2);
+  R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1, K2);
+
+  /* Transform 16-63 + Precalc 32-79. */
+  R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0, K2);
+  R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0, K2);
+  R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0, K3);
+  R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0, K3);
+  R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0, K3);
+  R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0, K3);
+  R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0, K3);
+  R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0, K4);
+  R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0, K4);
+  R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0, K4);
+  R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0, K4);
+  R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0, K4);
+
+  decq RNBLKS;
+  jz .Lend;
+
+  /* Transform 64-79 + Precalc 0-15 of next block. */
+  R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+  R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+  R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+  R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+  R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+  R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+  R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+  R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+  R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+  R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+  R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+  R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+  R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+  R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+  R( c, d, e, a, b, F4, 78 );
+  addl state_h0(RSTATE), a;   W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+  R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+  addl ne, a;
+  xorl ne, ne;
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  jmp .Loop;
+
+.align 16
+.Lend:
+  vzeroall;
+
+  /* Transform 64-79 + burn stack */
+  R( b, c, d, e, a, F4, 64 );
+  R( a, b, c, d, e, F4, 65 );
+  R( e, a, b, c, d, F4, 66 );
+  R( d, e, a, b, c, F4, 67 );
+  R( c, d, e, a, b, F4, 68 );
+  R( b, c, d, e, a, F4, 69 );
+  R( a, b, c, d, e, F4, 70 );
+  R( e, a, b, c, d, F4, 71 );
+  R( d, e, a, b, c, F4, 72 );
+  R( c, d, e, a, b, F4, 73 );
+  R( b, c, d, e, a, F4, 74 );
+  R( a, b, c, d, e, F4, 75 );
+  R( e, a, b, c, d, F4, 76 ); vmovdqa %xmm0, (0*16)(%rsp);
+  R( d, e, a, b, c, F4, 77 ); vmovdqa %xmm0, (1*16)(%rsp);
+  R( c, d, e, a, b, F4, 78 ); vmovdqa %xmm0, (2*16)(%rsp);
+  addl state_h0(RSTATE), a;
+  R( b, c, d, e, a, F4, 79 );
+  addl ne, a;
+  xorl ne, ne;
+
+  /* 16*4/16-1 = 3 */
+  vmovdqa %xmm0, (3*16)(%rsp);
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  movq ROLDSTACK, %rsp;
+  CFI_REGISTER(ROLDSTACK, %rsp);
+  CFI_DEF_CFA_REGISTER(%rsp);
+
+  popq %r12;
+  CFI_POP(%r12);
+  popq %rbp;
+  CFI_POP(%rbp);
+  popq %rbx;
+  CFI_POP(%rbx);
+
+  /* stack already burned */
+  xorl %eax, %eax;
+
+.Lret:
+  ret_spec_stop;
+  CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_avx_bmi2,
+    .-_gcry_sha1_transform_amd64_avx_bmi2;)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S
new file mode 100644
index 000000000..ed52761b8
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S
@@ -0,0 +1,573 @@
+/* sha1-avx2-bmi2-amd64.S - Intel AVX2/BMI2 accelerated SHA-1 transform 
function
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ *  Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ *  "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ *  
http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+     defined(HAVE_GCC_INLINE_ASM_BMI2) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+     defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+#define WK_STACK_WORDS (80 * 2)
+
+.text
+.align 16
+.Lbswap_shufb_ctl:
+       .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+.LK1:  .long 0x5A827999
+.LK2:  .long 0x6ED9EBA1
+.LK3:  .long 0x8F1BBCDC
+.LK4:  .long 0xCA62C1D6
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %eax
+#define b %ebx
+#define c %ecx
+#define d %edx
+#define e %edi
+#define ne %r12d
+
+#define RT0 %esi
+#define RT1 %ebp
+
+#define Wtmp0 %ymm0
+#define Wtmp1 %ymm1
+#define Wtmp0x %xmm0
+#define Wtmp1x %xmm1
+
+#define W0 %ymm2
+#define W1 %ymm3
+#define W2 %ymm4
+#define W3 %ymm5
+#define W4 %ymm6
+#define W5 %ymm7
+#define W6 %ymm8
+#define W7 %ymm9
+
+#define BSWAP_REG %ymm10
+
+#define K1 %ymm11
+#define K2 %ymm12
+#define K3 %ymm13
+#define K4 %ymm14
+
+
+/* Round function macros. */
+
+#define WK(i,block) ((block) * 16 + ((i) / 4) * 32 + ((i) % 4) * 4)(%rsp)
+#define PRE_WK(i) ((i) * 4 * 2)(%rsp)
+
+#define R_F1(a,b,c,d,e,i,block) \
+       movl c, RT0; \
+       andn d, b, RT1; \
+       addl WK(i,block), e; \
+       andl b, RT0; \
+       leal (a,ne), a; \
+       rorxl $2, b, b; \
+       addl RT1, e; \
+       rorxl $27, a, ne; \
+       addl RT0, e;
+
+#define R_F2(a,b,c,d,e,i,block) \
+       addl WK(i,block), e; \
+       movl c, RT0; \
+       xorl b, RT0; \
+       leal (a,ne), a; \
+       rorxl $2, b, b; \
+       xorl d, RT0; \
+       addl RT0, e; \
+       rorxl $27, a, ne;
+
+#define R_F3(a,b,c,d,e,i,block) \
+       movl c, RT0; \
+       addl WK(i,block), e; \
+       movl b, RT1; \
+       xorl b, RT0; \
+       leal (a,ne), a; \
+       rorxl $2, b, b; \
+       andl c, RT1; \
+       addl RT1, e; \
+       andl d, RT0; \
+       rorxl $27, a, ne; \
+       addl RT0, e;
+
+#define R_F4(a,b,c,d,e,i,block) R_F2(a,b,c,d,e,i,block)
+
+#define R(a,b,c,d,e,f,i,block) \
+       R_##f(a,b,c,d,e,i,block)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+       vmovdqu (4*(i))(RDATA), tmp0##x; \
+       vinserti128 $1, (4*(i) + 64)(RDATA), tmp0, tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+       vpshufb BSWAP_REG, tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0, K) \
+       vpaddd K, W, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+       vmovdqa tmp0, PRE_WK((i)&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpalignr $8, W_m16, W_m12, W; \
+       vpsrldq $4, W_m04, tmp0; \
+       vpxor W_m08, W, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpxor W_m16, tmp0, tmp0; \
+       vpxor tmp0, W, W; \
+       vpslld $1, W, tmp0; \
+       vpslldq $12, W, tmp1; \
+       vpsrld $31, W, W;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       vpor W, tmp0, tmp0; \
+       vpsrld $30, tmp1, W; \
+       vpslld $2, tmp1, tmp1;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1, K) \
+       vpxor W, tmp0, tmp0; \
+       vpxor tmp1, tmp0, W; \
+       vpaddd K, W, tmp0; \
+       vmovdqa tmp0, PRE_WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpxor W_m28, W, W; \
+       vpalignr $8, W_m08, W_m04, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpxor W_m16, W, W; \
+       vpxor tmp0, W, W;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       vpsrld $30, W, tmp0; \
+       vpslld $2, W, W;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0, K) \
+       vpor W, tmp0, W; \
+       vpaddd K, W, tmp0; \
+       vmovdqa tmp0, PRE_WK((i)&~3);
+
+
+/*
+ * Transform 2*nblks*64 bytes (2*nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_avx2_bmi2 (void *ctx, const unsigned char *data,
+ *                                       size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_avx2_bmi2
+ELF(.type _gcry_sha1_transform_amd64_avx2_bmi2,@function)
+.align 16
+_gcry_sha1_transform_amd64_avx2_bmi2:
+  /* input:
+   *   %rdi: ctx, CTX
+   *   %rsi: data (64*nblks bytes)
+   *   %rdx: nblks (multiple of 2, larger than 0)
+   */
+  CFI_STARTPROC();
+
+  vzeroupper;
+
+  movq %rdx, RNBLKS;
+  movq %rdi, RSTATE;
+  movq %rsi, RDATA;
+  pushq %rbx;
+  CFI_PUSH(%rbx);
+  pushq %rbp;
+  CFI_PUSH(%rbp);
+  pushq %r12;
+  CFI_PUSH(%r12);
+
+  movq %rsp, ROLDSTACK;
+  CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+  subq $(WK_STACK_WORDS*4), %rsp;
+  andq $(~63), %rsp;
+
+  /* Get the values of the chaining variables. */
+  movl state_h0(RSTATE), a;
+  movl state_h1(RSTATE), b;
+  movl state_h2(RSTATE), c;
+  movl state_h3(RSTATE), d;
+  movl state_h4(RSTATE), e;
+  xorl ne, ne;
+
+  vbroadcasti128 .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+  vpbroadcastd .LK1 rRIP, K1;
+  vpbroadcastd .LK2 rRIP, K2;
+  vpbroadcastd .LK3 rRIP, K3;
+  vpbroadcastd .LK4 rRIP, K4;
+
+  /* Precalc 0-31 for block 1 & 2. */
+  W_PRECALC_00_15_0(0, W0, Wtmp0);
+  W_PRECALC_00_15_1(1, W0, Wtmp0);
+  W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+  W_PRECALC_00_15_3(3, W0, Wtmp0);
+  W_PRECALC_00_15_0(4, W7, Wtmp0);
+  W_PRECALC_00_15_1(5, W7, Wtmp0);
+  W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+  W_PRECALC_00_15_3(7, W7, Wtmp0);
+  W_PRECALC_00_15_0(8, W6, Wtmp0);
+  W_PRECALC_00_15_1(9, W6, Wtmp0);
+  W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+  W_PRECALC_00_15_3(11, W6, Wtmp0);
+  W_PRECALC_00_15_0(12, W5, Wtmp0);
+  W_PRECALC_00_15_1(13, W5, Wtmp0);
+  W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+  W_PRECALC_00_15_3(15, W5, Wtmp0);
+  W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, Wtmp1, K1);
+  W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, Wtmp1, K2);
+  W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, Wtmp1, K2);
+  W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, Wtmp1);
+  W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, Wtmp1, K2);
+
+.align 8
+.Loop:
+  addq $(2 * 64), RDATA;
+
+  /* Transform 0-15 for block 1 + Precalc 32-47 for block 1 & 2. */
+  R( a, b, c, d, e, F1,  0, 0 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( e, a, b, c, d, F1,  1, 0 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( d, e, a, b, c, F1,  2, 0 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( c, d, e, a, b, F1,  3, 0 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0, K2);
+  R( b, c, d, e, a, F1,  4, 0 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( a, b, c, d, e, F1,  5, 0 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( e, a, b, c, d, F1,  6, 0 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( d, e, a, b, c, F1,  7, 0 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0, K2);
+  R( c, d, e, a, b, F1,  8, 0 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( b, c, d, e, a, F1,  9, 0 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( a, b, c, d, e, F1, 10, 0 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( e, a, b, c, d, F1, 11, 0 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0, K3);
+  R( d, e, a, b, c, F1, 12, 0 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( c, d, e, a, b, F1, 13, 0 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( b, c, d, e, a, F1, 14, 0 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( a, b, c, d, e, F1, 15, 0 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0, K3);
+
+  /* Transform 16-47 for block 1 + Precalc 48-79 for block 1 & 2. */
+  R( e, a, b, c, d, F1, 16, 0 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( d, e, a, b, c, F1, 17, 0 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( c, d, e, a, b, F1, 18, 0 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( b, c, d, e, a, F1, 19, 0 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0, K3);
+  R( a, b, c, d, e, F2, 20, 0 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( e, a, b, c, d, F2, 21, 0 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( d, e, a, b, c, F2, 22, 0 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( c, d, e, a, b, F2, 23, 0 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0, K3);
+  R( b, c, d, e, a, F2, 24, 0 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( a, b, c, d, e, F2, 25, 0 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( e, a, b, c, d, F2, 26, 0 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( d, e, a, b, c, F2, 27, 0 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0, K3);
+  R( c, d, e, a, b, F2, 28, 0 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( b, c, d, e, a, F2, 29, 0 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( a, b, c, d, e, F2, 30, 0 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( e, a, b, c, d, F2, 31, 0 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0, K4);
+  R( d, e, a, b, c, F2, 32, 0 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( c, d, e, a, b, F2, 33, 0 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( b, c, d, e, a, F2, 34, 0 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( a, b, c, d, e, F2, 35, 0 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0, K4);
+  R( e, a, b, c, d, F2, 36, 0 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( d, e, a, b, c, F2, 37, 0 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( c, d, e, a, b, F2, 38, 0 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( b, c, d, e, a, F2, 39, 0 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0, K4);
+  R( a, b, c, d, e, F3, 40, 0 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( e, a, b, c, d, F3, 41, 0 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( d, e, a, b, c, F3, 42, 0 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( c, d, e, a, b, F3, 43, 0 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0, K4);
+  R( b, c, d, e, a, F3, 44, 0 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( a, b, c, d, e, F3, 45, 0 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( e, a, b, c, d, F3, 46, 0 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( d, e, a, b, c, F3, 47, 0 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0, K4);
+
+  /* Transform 48-79 for block 1. */
+  R( c, d, e, a, b, F3, 48, 0 );
+  R( b, c, d, e, a, F3, 49, 0 );
+  R( a, b, c, d, e, F3, 50, 0 );
+  R( e, a, b, c, d, F3, 51, 0 );
+  R( d, e, a, b, c, F3, 52, 0 );
+  R( c, d, e, a, b, F3, 53, 0 );
+  R( b, c, d, e, a, F3, 54, 0 );
+  R( a, b, c, d, e, F3, 55, 0 );
+  R( e, a, b, c, d, F3, 56, 0 );
+  R( d, e, a, b, c, F3, 57, 0 );
+  R( c, d, e, a, b, F3, 58, 0 );
+  R( b, c, d, e, a, F3, 59, 0 );
+  R( a, b, c, d, e, F4, 60, 0 );
+  R( e, a, b, c, d, F4, 61, 0 );
+  R( d, e, a, b, c, F4, 62, 0 );
+  R( c, d, e, a, b, F4, 63, 0 );
+  R( b, c, d, e, a, F4, 64, 0 );
+  R( a, b, c, d, e, F4, 65, 0 );
+  R( e, a, b, c, d, F4, 66, 0 );
+  R( d, e, a, b, c, F4, 67, 0 );
+  R( c, d, e, a, b, F4, 68, 0 );
+  R( b, c, d, e, a, F4, 69, 0 );
+  R( a, b, c, d, e, F4, 70, 0 );
+  R( e, a, b, c, d, F4, 71, 0 );
+  R( d, e, a, b, c, F4, 72, 0 );
+  R( c, d, e, a, b, F4, 73, 0 );
+  R( b, c, d, e, a, F4, 74, 0 );
+  R( a, b, c, d, e, F4, 75, 0 );
+  R( e, a, b, c, d, F4, 76, 0 );
+  R( d, e, a, b, c, F4, 77, 0 );
+  R( c, d, e, a, b, F4, 78, 0 );
+  addl state_h0(RSTATE), a;
+  R( b, c, d, e, a, F4, 79, 0 );
+  addl ne, a;
+  xorl ne, ne;
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  /* Transform 0-47 for block 2. */
+  R( a, b, c, d, e, F1,  0, 1 );
+  R( e, a, b, c, d, F1,  1, 1 );
+  R( d, e, a, b, c, F1,  2, 1 );
+  R( c, d, e, a, b, F1,  3, 1 );
+  R( b, c, d, e, a, F1,  4, 1 );
+  R( a, b, c, d, e, F1,  5, 1 );
+  R( e, a, b, c, d, F1,  6, 1 );
+  R( d, e, a, b, c, F1,  7, 1 );
+  R( c, d, e, a, b, F1,  8, 1 );
+  R( b, c, d, e, a, F1,  9, 1 );
+  R( a, b, c, d, e, F1, 10, 1 );
+  R( e, a, b, c, d, F1, 11, 1 );
+  R( d, e, a, b, c, F1, 12, 1 );
+  R( c, d, e, a, b, F1, 13, 1 );
+  R( b, c, d, e, a, F1, 14, 1 );
+  R( a, b, c, d, e, F1, 15, 1 );
+  R( e, a, b, c, d, F1, 16, 1 );
+  R( d, e, a, b, c, F1, 17, 1 );
+  R( c, d, e, a, b, F1, 18, 1 );
+  R( b, c, d, e, a, F1, 19, 1 );
+  R( a, b, c, d, e, F2, 20, 1 );
+  R( e, a, b, c, d, F2, 21, 1 );
+  R( d, e, a, b, c, F2, 22, 1 );
+  R( c, d, e, a, b, F2, 23, 1 );
+  R( b, c, d, e, a, F2, 24, 1 );
+  R( a, b, c, d, e, F2, 25, 1 );
+  R( e, a, b, c, d, F2, 26, 1 );
+  R( d, e, a, b, c, F2, 27, 1 );
+  R( c, d, e, a, b, F2, 28, 1 );
+  R( b, c, d, e, a, F2, 29, 1 );
+  R( a, b, c, d, e, F2, 30, 1 );
+  R( e, a, b, c, d, F2, 31, 1 );
+  R( d, e, a, b, c, F2, 32, 1 );
+  R( c, d, e, a, b, F2, 33, 1 );
+  R( b, c, d, e, a, F2, 34, 1 );
+  R( a, b, c, d, e, F2, 35, 1 );
+  R( e, a, b, c, d, F2, 36, 1 );
+  R( d, e, a, b, c, F2, 37, 1 );
+  R( c, d, e, a, b, F2, 38, 1 );
+  R( b, c, d, e, a, F2, 39, 1 );
+  R( a, b, c, d, e, F3, 40, 1 );
+  R( e, a, b, c, d, F3, 41, 1 );
+  R( d, e, a, b, c, F3, 42, 1 );
+  R( c, d, e, a, b, F3, 43, 1 );
+  R( b, c, d, e, a, F3, 44, 1 );
+  R( a, b, c, d, e, F3, 45, 1 );
+  R( e, a, b, c, d, F3, 46, 1 );
+  R( d, e, a, b, c, F3, 47, 1 );
+
+  addq $-2, RNBLKS;
+  jz .Lend;
+
+  /* Transform 48-79 for block 2 + Precalc 0-31 for next two blocks. */
+  R( c, d, e, a, b, F3, 48, 1 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+  R( b, c, d, e, a, F3, 49, 1 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+  R( a, b, c, d, e, F3, 50, 1 ); W_PRECALC_00_15_2(2, W0, Wtmp0, K1);
+  R( e, a, b, c, d, F3, 51, 1 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+  R( d, e, a, b, c, F3, 52, 1 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+  R( c, d, e, a, b, F3, 53, 1 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+  R( b, c, d, e, a, F3, 54, 1 ); W_PRECALC_00_15_2(6, W7, Wtmp0, K1);
+  R( a, b, c, d, e, F3, 55, 1 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+  R( e, a, b, c, d, F3, 56, 1 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+  R( d, e, a, b, c, F3, 57, 1 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+  R( c, d, e, a, b, F3, 58, 1 ); W_PRECALC_00_15_2(10, W6, Wtmp0, K1);
+  R( b, c, d, e, a, F3, 59, 1 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+  R( a, b, c, d, e, F4, 60, 1 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+  R( e, a, b, c, d, F4, 61, 1 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+  R( d, e, a, b, c, F4, 62, 1 ); W_PRECALC_00_15_2(14, W5, Wtmp0, K1);
+  R( c, d, e, a, b, F4, 63, 1 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+  R( b, c, d, e, a, F4, 64, 1 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, 
Wtmp0, Wtmp1);
+  R( a, b, c, d, e, F4, 65, 1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, 
Wtmp0, Wtmp1);
+  R( e, a, b, c, d, F4, 66, 1 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, 
Wtmp0, Wtmp1);
+  R( d, e, a, b, c, F4, 67, 1 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, 
Wtmp0, Wtmp1, K1);
+  R( c, d, e, a, b, F4, 68, 1 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, 
Wtmp0, Wtmp1);
+  R( b, c, d, e, a, F4, 69, 1 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, 
Wtmp0, Wtmp1);
+  R( a, b, c, d, e, F4, 70, 1 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, 
Wtmp0, Wtmp1);
+  R( e, a, b, c, d, F4, 71, 1 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, 
Wtmp0, Wtmp1, K2);
+  R( d, e, a, b, c, F4, 72, 1 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, 
Wtmp0, Wtmp1);
+  R( c, d, e, a, b, F4, 73, 1 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, 
Wtmp0, Wtmp1);
+  R( b, c, d, e, a, F4, 74, 1 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, 
Wtmp0, Wtmp1);
+  R( a, b, c, d, e, F4, 75, 1 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, 
Wtmp0, Wtmp1, K2);
+  R( e, a, b, c, d, F4, 76, 1 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, 
Wtmp0, Wtmp1);
+  R( d, e, a, b, c, F4, 77, 1 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, 
Wtmp0, Wtmp1);
+  R( c, d, e, a, b, F4, 78, 1 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, 
Wtmp0, Wtmp1);
+  addl state_h0(RSTATE), a;      W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, 
Wtmp0, Wtmp1, K2);
+  R( b, c, d, e, a, F4, 79, 1 );
+  addl ne, a;
+  xorl ne, ne;
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  jmp .Loop;
+
+.align 16
+.Lend:
+  vzeroall;
+
+  /* Transform 48-79 for block 2 + burn stack */
+  R( c, d, e, a, b, F3, 48, 1 );
+  R( b, c, d, e, a, F3, 49, 1 );
+  R( a, b, c, d, e, F3, 50, 1 );
+  R( e, a, b, c, d, F3, 51, 1 );
+  R( d, e, a, b, c, F3, 52, 1 );
+  R( c, d, e, a, b, F3, 53, 1 );
+  R( b, c, d, e, a, F3, 54, 1 );
+  R( a, b, c, d, e, F3, 55, 1 );
+  R( e, a, b, c, d, F3, 56, 1 );
+  R( d, e, a, b, c, F3, 57, 1 );
+  R( c, d, e, a, b, F3, 58, 1 );
+  R( b, c, d, e, a, F3, 59, 1 );
+  R( a, b, c, d, e, F4, 60, 1 ); vmovdqa %ymm0, (0*32)(%rsp);
+  R( e, a, b, c, d, F4, 61, 1 ); vmovdqa %ymm0, (1*32)(%rsp);
+  R( d, e, a, b, c, F4, 62, 1 ); vmovdqa %ymm0, (2*32)(%rsp);
+  R( c, d, e, a, b, F4, 63, 1 ); vmovdqa %ymm0, (3*32)(%rsp);
+  R( b, c, d, e, a, F4, 64, 1 ); vmovdqa %ymm0, (4*32)(%rsp);
+  R( a, b, c, d, e, F4, 65, 1 ); vmovdqa %ymm0, (5*32)(%rsp);
+  R( e, a, b, c, d, F4, 66, 1 ); vmovdqa %ymm0, (6*32)(%rsp);
+  R( d, e, a, b, c, F4, 67, 1 ); vmovdqa %ymm0, (7*32)(%rsp);
+  R( c, d, e, a, b, F4, 68, 1 ); vmovdqa %ymm0, (8*32)(%rsp);
+  R( b, c, d, e, a, F4, 69, 1 ); vmovdqa %ymm0, (9*32)(%rsp);
+  R( a, b, c, d, e, F4, 70, 1 ); vmovdqa %ymm0, (10*32)(%rsp);
+  R( e, a, b, c, d, F4, 71, 1 ); vmovdqa %ymm0, (11*32)(%rsp);
+  R( d, e, a, b, c, F4, 72, 1 ); vmovdqa %ymm0, (12*32)(%rsp);
+  R( c, d, e, a, b, F4, 73, 1 ); vmovdqa %ymm0, (13*32)(%rsp);
+  R( b, c, d, e, a, F4, 74, 1 ); vmovdqa %ymm0, (14*32)(%rsp);
+  R( a, b, c, d, e, F4, 75, 1 ); vmovdqa %ymm0, (15*32)(%rsp);
+  R( e, a, b, c, d, F4, 76, 1 ); vmovdqa %ymm0, (16*32)(%rsp);
+  R( d, e, a, b, c, F4, 77, 1 ); vmovdqa %ymm0, (17*32)(%rsp);
+  R( c, d, e, a, b, F4, 78, 1 ); vmovdqa %ymm0, (18*32)(%rsp);
+  addl state_h0(RSTATE), a;
+  R( b, c, d, e, a, F4, 79, 1 );
+  addl ne, a;
+  xorl ne, ne;
+
+  /* WK_STACK_WORDS*4/32-1 = 19 */
+  vmovdqa %ymm0, (19*32)(%rsp);
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  movq ROLDSTACK, %rsp;
+  CFI_REGISTER(ROLDSTACK, %rsp);
+  CFI_DEF_CFA_REGISTER(%rsp);
+
+  popq %r12;
+  CFI_POP(%r12);
+  popq %rbp;
+  CFI_POP(%rbp);
+  popq %rbx;
+  CFI_POP(%rbx);
+
+  /* stack already burned */
+  xorl %eax, %eax;
+
+  ret_spec_stop;
+  CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_avx2_bmi2,
+    .-_gcry_sha1_transform_amd64_avx2_bmi2;)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-intel-shaext.c 
b/grub-core/lib/libgcrypt/cipher/sha1-intel-shaext.c
new file mode 100644
index 000000000..ddf2be2aa
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-intel-shaext.c
@@ -0,0 +1,292 @@
+/* sha1-intel-shaext.S - SHAEXT accelerated SHA-1 transform function
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "types.h"
+
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+    defined(HAVE_GCC_INLINE_ASM_SSE41) && defined(USE_SHA1) && \
+    defined(ENABLE_SHAEXT_SUPPORT)
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+#  pragma GCC target("no-sse")
+#endif
+#if __clang__
+#  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = 
function)
+#endif
+
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+
+/* Two macros to be called prior and after the use of SHA-EXT
+   instructions.  There should be no external function calls between
+   the use of these macros.  There purpose is to make sure that the
+   SSE regsiters are cleared and won't reveal any information about
+   the key or the data.  */
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define shaext_prepare_variable char win64tmp[2*16]
+# define shaext_prepare_variable_size sizeof(win64tmp)
+# define shaext_prepare()                                               \
+   do { asm volatile ("movdqu %%xmm6, (%0)\n"                           \
+                      "movdqu %%xmm7, (%1)\n"                           \
+                      :                                                 \
+                      : "r" (&win64tmp[0]), "r" (&win64tmp[16])         \
+                      : "memory");                                      \
+   } while (0)
+# define shaext_cleanup(tmp0,tmp1)                                      \
+   do { asm volatile ("movdqu (%0), %%xmm6\n"                           \
+                      "movdqu (%1), %%xmm7\n"                           \
+                      "pxor %%xmm0, %%xmm0\n"                           \
+                      "pxor %%xmm1, %%xmm1\n"                           \
+                      "pxor %%xmm2, %%xmm2\n"                           \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      "movdqa %%xmm0, (%2)\n\t"                         \
+                      "movdqa %%xmm0, (%3)\n\t"                         \
+                      :                                                 \
+                      : "r" (&win64tmp[0]), "r" (&win64tmp[16]),        \
+                        "r" (tmp0), "r" (tmp1)                          \
+                      : "memory");                                      \
+   } while (0)
+#else
+# define shaext_prepare_variable
+# define shaext_prepare_variable_size 0
+# define shaext_prepare() do { } while (0)
+# define shaext_cleanup(tmp0,tmp1)                                      \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n"                           \
+                      "pxor %%xmm1, %%xmm1\n"                           \
+                      "pxor %%xmm2, %%xmm2\n"                           \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      "pxor %%xmm6, %%xmm6\n"                           \
+                      "pxor %%xmm7, %%xmm7\n"                           \
+                      "movdqa %%xmm0, (%0)\n\t"                         \
+                      "movdqa %%xmm0, (%1)\n\t"                         \
+                      :                                                 \
+                      : "r" (tmp0), "r" (tmp1)                          \
+                      : "memory");                                      \
+   } while (0)
+#endif
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ */
+unsigned int ASM_FUNC_ATTR
+_gcry_sha1_transform_intel_shaext(void *state, const unsigned char *data,
+                                  size_t nblks)
+{
+  static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+    { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+  char save_buf[2 * 16 + 15];
+  char *abcd_save;
+  char *e_save;
+  shaext_prepare_variable;
+
+  if (nblks == 0)
+    return 0;
+
+  shaext_prepare ();
+
+  asm volatile ("" : "=r" (abcd_save) : "0" (save_buf) : "memory");
+  abcd_save = abcd_save + (-(uintptr_t)abcd_save & 15);
+  e_save = abcd_save + 16;
+
+  /* byteswap mask => XMM7 */
+  asm volatile ("movdqa %[mask], %%xmm7\n\t" /* Preload mask */
+                :
+                : [mask] "m" (*be_mask)
+                : "memory");
+
+  /* Load state.. ABCD => XMM4, E => XMM5 */
+  asm volatile ("movd 16(%[state]), %%xmm5\n\t"
+                "movdqu (%[state]), %%xmm4\n\t"
+                "pslldq $12, %%xmm5\n\t"
+                "pshufd $0x1b, %%xmm4, %%xmm4\n\t"
+                "movdqa %%xmm5, (%[e_save])\n\t"
+                "movdqa %%xmm4, (%[abcd_save])\n\t"
+                :
+                : [state] "r" (state), [abcd_save] "r" (abcd_save),
+                  [e_save] "r" (e_save)
+                : "memory" );
+
+  /* DATA => XMM[0..4] */
+  asm volatile ("movdqu 0(%[data]), %%xmm0\n\t"
+                "movdqu 16(%[data]), %%xmm1\n\t"
+                "movdqu 32(%[data]), %%xmm2\n\t"
+                "movdqu 48(%[data]), %%xmm3\n\t"
+                "pshufb %%xmm7, %%xmm0\n\t"
+                "pshufb %%xmm7, %%xmm1\n\t"
+                "pshufb %%xmm7, %%xmm2\n\t"
+                "pshufb %%xmm7, %%xmm3\n\t"
+                :
+                : [data] "r" (data)
+                : "memory" );
+  data += 64;
+
+  while (1)
+    {
+      /* Round 0..3 */
+      asm volatile ("paddd %%xmm0, %%xmm5\n\t"
+                    "movdqa %%xmm4, %%xmm6\n\t" /* ABCD => E1 */
+                    "sha1rnds4 $0, %%xmm5, %%xmm4\n\t"
+                    ::: "memory" );
+
+      /* Round 4..7 */
+      asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t"
+                    "movdqa %%xmm4, %%xmm5\n\t"
+                    "sha1rnds4 $0, %%xmm6, %%xmm4\n\t"
+                    "sha1msg1 %%xmm1, %%xmm0\n\t"
+                    ::: "memory" );
+
+      /* Round 8..11 */
+      asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t"
+                    "movdqa %%xmm4, %%xmm6\n\t"
+                    "sha1rnds4 $0, %%xmm5, %%xmm4\n\t"
+                    "sha1msg1 %%xmm2, %%xmm1\n\t"
+                    "pxor %%xmm2, %%xmm0\n\t"
+                    ::: "memory" );
+
+#define ROUND(imm, E0, E1, MSG0, MSG1, MSG2, MSG3) \
+      asm volatile ("sha1nexte %%"MSG0", %%"E0"\n\t" \
+                    "movdqa %%xmm4, %%"E1"\n\t" \
+                    "sha1msg2 %%"MSG0", %%"MSG1"\n\t" \
+                    "sha1rnds4 $"imm", %%"E0", %%xmm4\n\t" \
+                    "sha1msg1 %%"MSG0", %%"MSG3"\n\t" \
+                    "pxor %%"MSG0", %%"MSG2"\n\t" \
+                    ::: "memory" )
+
+      /* Rounds 12..15 to 64..67 */
+      ROUND("0", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+      ROUND("0", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+      ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+      ROUND("1", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+      ROUND("1", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+      ROUND("1", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+      ROUND("1", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+      ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+      ROUND("2", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+      ROUND("2", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+      ROUND("2", "xmm6", "xmm5", "xmm1", "xmm2", "xmm3", "xmm0");
+      ROUND("2", "xmm5", "xmm6", "xmm2", "xmm3", "xmm0", "xmm1");
+      ROUND("3", "xmm6", "xmm5", "xmm3", "xmm0", "xmm1", "xmm2");
+      ROUND("3", "xmm5", "xmm6", "xmm0", "xmm1", "xmm2", "xmm3");
+
+      if (--nblks == 0)
+        break;
+
+      /* Round 68..71 */
+      asm volatile ("movdqu 0(%[data]), %%xmm0\n\t"
+                    "sha1nexte %%xmm1, %%xmm6\n\t"
+                    "movdqa %%xmm4, %%xmm5\n\t"
+                    "sha1msg2 %%xmm1, %%xmm2\n\t"
+                    "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+                    "pxor %%xmm1, %%xmm3\n\t"
+                    "pshufb %%xmm7, %%xmm0\n\t"
+                    :
+                    : [data] "r" (data)
+                    : "memory" );
+
+      /* Round 72..75 */
+      asm volatile ("movdqu 16(%[data]), %%xmm1\n\t"
+                    "sha1nexte %%xmm2, %%xmm5\n\t"
+                    "movdqa %%xmm4, %%xmm6\n\t"
+                    "sha1msg2 %%xmm2, %%xmm3\n\t"
+                    "sha1rnds4 $3, %%xmm5, %%xmm4\n\t"
+                    "pshufb %%xmm7, %%xmm1\n\t"
+                    :
+                    : [data] "r" (data)
+                    : "memory" );
+
+      /* Round 76..79 */
+      asm volatile ("movdqu 32(%[data]), %%xmm2\n\t"
+                    "sha1nexte %%xmm3, %%xmm6\n\t"
+                    "movdqa %%xmm4, %%xmm5\n\t"
+                    "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+                    "pshufb %%xmm7, %%xmm2\n\t"
+                    :
+                    : [data] "r" (data)
+                    : "memory" );
+
+      /* Merge states, store current. */
+      asm volatile ("movdqu 48(%[data]), %%xmm3\n\t"
+                    "sha1nexte (%[e_save]), %%xmm5\n\t"
+                    "paddd (%[abcd_save]), %%xmm4\n\t"
+                    "pshufb %%xmm7, %%xmm3\n\t"
+                    "movdqa %%xmm5, (%[e_save])\n\t"
+                    "movdqa %%xmm4, (%[abcd_save])\n\t"
+                    :
+                    : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save),
+                      [data] "r" (data)
+                    : "memory" );
+
+      data += 64;
+    }
+
+  /* Round 68..71 */
+  asm volatile ("sha1nexte %%xmm1, %%xmm6\n\t"
+                "movdqa %%xmm4, %%xmm5\n\t"
+                "sha1msg2 %%xmm1, %%xmm2\n\t"
+                "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+                "pxor %%xmm1, %%xmm3\n\t"
+                ::: "memory" );
+
+  /* Round 72..75 */
+  asm volatile ("sha1nexte %%xmm2, %%xmm5\n\t"
+                "movdqa %%xmm4, %%xmm6\n\t"
+                "sha1msg2 %%xmm2, %%xmm3\n\t"
+                "sha1rnds4 $3, %%xmm5, %%xmm4\n\t"
+                ::: "memory" );
+
+  /* Round 76..79 */
+  asm volatile ("sha1nexte %%xmm3, %%xmm6\n\t"
+                "movdqa %%xmm4, %%xmm5\n\t"
+                "sha1rnds4 $3, %%xmm6, %%xmm4\n\t"
+                ::: "memory" );
+
+  /* Merge states. */
+  asm volatile ("sha1nexte (%[e_save]), %%xmm5\n\t"
+                "paddd (%[abcd_save]), %%xmm4\n\t"
+                :
+                : [abcd_save] "r" (abcd_save), [e_save] "r" (e_save)
+                : "memory" );
+
+  /* Save state */
+  asm volatile ("pshufd $0x1b, %%xmm4, %%xmm4\n\t"
+                "psrldq $12, %%xmm5\n\t"
+                "movdqu %%xmm4, (%[state])\n\t"
+                "movd %%xmm5, 16(%[state])\n\t"
+                :
+                : [state] "r" (state)
+                : "memory" );
+
+  shaext_cleanup (abcd_save, e_save);
+  return 0;
+}
+
+#if __clang__
+#  pragma clang attribute pop
+#endif
+
+#endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */
diff --git a/grub-core/lib/libgcrypt/cipher/sha1-ssse3-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha1-ssse3-amd64.S
new file mode 100644
index 000000000..f09b1de12
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1-ssse3-amd64.S
@@ -0,0 +1,437 @@
+/* sha1-ssse3-amd64.S - Intel SSSE3 accelerated SHA-1 transform function
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Based on sha1.c:
+ *  Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Intel SSSE3 accelerated SHA-1 implementation based on white paper:
+ *  "Improving the Performance of the Secure Hash Algorithm (SHA-1)"
+ *  
http://software.intel.com/en-us/articles/improving-the-performance-of-the-secure-hash-algorithm-1
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA1)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+
+
+/* Constants */
+
+.text
+#define K1  0x5A827999
+#define K2  0x6ED9EBA1
+#define K3  0x8F1BBCDC
+#define K4  0xCA62C1D6
+.align 16
+.LK_XMM:
+.LK1:  .long K1, K1, K1, K1
+.LK2:  .long K2, K2, K2, K2
+.LK3:  .long K3, K3, K3, K3
+.LK4:  .long K4, K4, K4, K4
+
+.Lbswap_shufb_ctl:
+       .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+
+
+/* Register macros */
+
+#define RSTATE %r8
+#define RDATA %r9
+#define ROLDSTACK %r10
+#define RNBLKS %r11
+
+#define a %eax
+#define b %ebx
+#define c %ecx
+#define d %edx
+#define e %edi
+
+#define RT0 %esi
+#define RT1 %ebp
+
+#define Wtmp0 %xmm0
+#define Wtmp1 %xmm1
+
+#define W0 %xmm2
+#define W1 %xmm3
+#define W2 %xmm4
+#define W3 %xmm5
+#define W4 %xmm6
+#define W5 %xmm7
+#define W6 %xmm8
+#define W7 %xmm9
+
+#define BSWAP_REG %xmm10
+
+
+/* Round function macros. */
+
+#define WK(i) (((i) & 15) * 4)(%rsp)
+
+#define R_F1(a,b,c,d,e,i) \
+       movl c, RT0; \
+       addl WK(i), e; \
+       xorl d, RT0; \
+       movl a, RT1; \
+       andl b, RT0; \
+       roll $30, b; \
+       xorl d, RT0; \
+       leal (RT0,e), e; \
+       roll $5, RT1; \
+       addl RT1, e;
+
+#define R_F2(a,b,c,d,e,i) \
+       movl c, RT0; \
+       addl WK(i), e; \
+       xorl b, RT0; \
+       roll $30, b; \
+       xorl d, RT0; \
+       movl a, RT1; \
+       leal (RT0,e), e; \
+       roll $5, RT1; \
+       addl RT1, e;
+
+#define R_F3(a,b,c,d,e,i) \
+       movl c, RT0; \
+       movl b, RT1; \
+       xorl b, RT0; \
+       andl c, RT1; \
+       andl d, RT0; \
+       addl RT1, e; \
+       addl WK(i), e; \
+       roll $30, b; \
+       movl a, RT1; \
+       leal (RT0,e), e; \
+       roll $5, RT1; \
+       addl RT1, e;
+
+#define R_F4(a,b,c,d,e,i) R_F2(a,b,c,d,e,i)
+
+#define R(a,b,c,d,e,f,i) \
+       R_##f(a,b,c,d,e,i)
+
+
+/* Input expansion macros. */
+
+#define W_PRECALC_00_15_0(i, W, tmp0) \
+       movdqu (4*(i))(RDATA), tmp0;
+
+#define W_PRECALC_00_15_1(i, W, tmp0) \
+       pshufb BSWAP_REG, tmp0; \
+       movdqa tmp0, W;
+
+#define W_PRECALC_00_15_2(i, W, tmp0) \
+       paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0;
+
+#define W_PRECALC_00_15_3(i, W, tmp0) \
+       movdqa tmp0, WK(i&~3);
+
+#define W_PRECALC_16_31_0(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       movdqa W_m12, W; \
+       palignr $8, W_m16, W; \
+       movdqa W_m04, tmp0; \
+       psrldq $4, tmp0; \
+       pxor W_m08, W;
+
+#define W_PRECALC_16_31_1(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       pxor W_m16, tmp0; \
+       pxor tmp0, W; \
+       movdqa W, tmp1; \
+       movdqa W, tmp0; \
+       pslldq $12, tmp1;
+
+#define W_PRECALC_16_31_2(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       psrld $31, W; \
+       pslld $1, tmp0; \
+       por W, tmp0; \
+       movdqa tmp1, W; \
+       psrld $30, tmp1; \
+       pslld $2, W;
+
+#define W_PRECALC_16_31_3(i, W, W_m04, W_m08, W_m12, W_m16, tmp0, tmp1) \
+       pxor W, tmp0; \
+       pxor tmp1, tmp0; \
+       movdqa tmp0, W; \
+       paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \
+       movdqa tmp0, WK((i)&~3);
+
+#define W_PRECALC_32_79_0(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       movdqa W_m04, tmp0; \
+       pxor W_m28, W; \
+       palignr $8, W_m08, tmp0;
+
+#define W_PRECALC_32_79_1(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       pxor W_m16, W; \
+       pxor tmp0, W; \
+       movdqa W, tmp0;
+
+#define W_PRECALC_32_79_2(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       psrld $30, W; \
+       pslld $2, tmp0; \
+       por W, tmp0;
+
+#define W_PRECALC_32_79_3(i, W, W_m04, W_m08, W_m12, W_m16, W_m20, W_m24, 
W_m28, tmp0) \
+       movdqa tmp0, W; \
+       paddd (.LK_XMM + ((i)/20)*16) rRIP, tmp0; \
+       movdqa tmp0, WK((i)&~3);
+
+#define CLEAR_REG(reg) pxor reg, reg;
+
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data,
+ *                                   size_t nblks)
+ */
+.globl _gcry_sha1_transform_amd64_ssse3
+ELF(.type _gcry_sha1_transform_amd64_ssse3,@function)
+.align 16
+_gcry_sha1_transform_amd64_ssse3:
+  /* input:
+   *   %rdi: ctx, CTX
+   *   %rsi: data (64*nblks bytes)
+   *   %rdx: nblks
+   */
+  CFI_STARTPROC();
+
+  xorl %eax, %eax;
+  cmpq $0, %rdx;
+  jz .Lret;
+
+  movq %rdx, RNBLKS;
+  movq %rdi, RSTATE;
+  movq %rsi, RDATA;
+  pushq %rbx;
+  CFI_PUSH(%rbx);
+  pushq %rbp;
+  CFI_PUSH(%rbp);
+
+  movq %rsp, ROLDSTACK;
+  CFI_DEF_CFA_REGISTER(ROLDSTACK);
+
+  subq $(16*4), %rsp;
+  andq $(~31), %rsp;
+
+  /* Get the values of the chaining variables. */
+  movl state_h0(RSTATE), a;
+  movl state_h1(RSTATE), b;
+  movl state_h2(RSTATE), c;
+  movl state_h3(RSTATE), d;
+  movl state_h4(RSTATE), e;
+
+  movdqa .Lbswap_shufb_ctl rRIP, BSWAP_REG;
+
+  /* Precalc 0-15. */
+  W_PRECALC_00_15_0(0, W0, Wtmp0);
+  W_PRECALC_00_15_1(1, W0, Wtmp0);
+  W_PRECALC_00_15_2(2, W0, Wtmp0);
+  W_PRECALC_00_15_3(3, W0, Wtmp0);
+  W_PRECALC_00_15_0(4, W7, Wtmp0);
+  W_PRECALC_00_15_1(5, W7, Wtmp0);
+  W_PRECALC_00_15_2(6, W7, Wtmp0);
+  W_PRECALC_00_15_3(7, W7, Wtmp0);
+  W_PRECALC_00_15_0(8, W6, Wtmp0);
+  W_PRECALC_00_15_1(9, W6, Wtmp0);
+  W_PRECALC_00_15_2(10, W6, Wtmp0);
+  W_PRECALC_00_15_3(11, W6, Wtmp0);
+  W_PRECALC_00_15_0(12, W5, Wtmp0);
+  W_PRECALC_00_15_1(13, W5, Wtmp0);
+  W_PRECALC_00_15_2(14, W5, Wtmp0);
+  W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+.align 8
+.Loop:
+  addq $64, RDATA;
+
+  /* Transform 0-15 + Precalc 16-31. */
+  R( a, b, c, d, e, F1,  0 ); W_PRECALC_16_31_0(16, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1,  1 ); W_PRECALC_16_31_1(17, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1,  2 ); W_PRECALC_16_31_2(18, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1,  3 ); W_PRECALC_16_31_3(19, W4, W5, W6, W7, W0, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1,  4 ); W_PRECALC_16_31_0(20, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1,  5 ); W_PRECALC_16_31_1(21, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1,  6 ); W_PRECALC_16_31_2(22, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1,  7 ); W_PRECALC_16_31_3(23, W3, W4, W5, W6, W7, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1,  8 ); W_PRECALC_16_31_0(24, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1,  9 ); W_PRECALC_16_31_1(25, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1, 10 ); W_PRECALC_16_31_2(26, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( e, a, b, c, d, F1, 11 ); W_PRECALC_16_31_3(27, W2, W3, W4, W5, W6, Wtmp0, 
Wtmp1);
+  R( d, e, a, b, c, F1, 12 ); W_PRECALC_16_31_0(28, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( c, d, e, a, b, F1, 13 ); W_PRECALC_16_31_1(29, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( b, c, d, e, a, F1, 14 ); W_PRECALC_16_31_2(30, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+  R( a, b, c, d, e, F1, 15 ); W_PRECALC_16_31_3(31, W1, W2, W3, W4, W5, Wtmp0, 
Wtmp1);
+
+  /* Transform 16-63 + Precalc 32-79. */
+  R( e, a, b, c, d, F1, 16 ); W_PRECALC_32_79_0(32, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( d, e, a, b, c, F1, 17 ); W_PRECALC_32_79_1(33, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( c, d, e, a, b, F1, 18 ); W_PRECALC_32_79_2(34, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( b, c, d, e, a, F1, 19 ); W_PRECALC_32_79_3(35, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( a, b, c, d, e, F2, 20 ); W_PRECALC_32_79_0(36, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( e, a, b, c, d, F2, 21 ); W_PRECALC_32_79_1(37, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( d, e, a, b, c, F2, 22 ); W_PRECALC_32_79_2(38, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( c, d, e, a, b, F2, 23 ); W_PRECALC_32_79_3(39, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( b, c, d, e, a, F2, 24 ); W_PRECALC_32_79_0(40, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( a, b, c, d, e, F2, 25 ); W_PRECALC_32_79_1(41, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( e, a, b, c, d, F2, 26 ); W_PRECALC_32_79_2(42, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( d, e, a, b, c, F2, 27 ); W_PRECALC_32_79_3(43, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( c, d, e, a, b, F2, 28 ); W_PRECALC_32_79_0(44, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( b, c, d, e, a, F2, 29 ); W_PRECALC_32_79_1(45, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( a, b, c, d, e, F2, 30 ); W_PRECALC_32_79_2(46, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( e, a, b, c, d, F2, 31 ); W_PRECALC_32_79_3(47, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( d, e, a, b, c, F2, 32 ); W_PRECALC_32_79_0(48, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( c, d, e, a, b, F2, 33 ); W_PRECALC_32_79_1(49, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( b, c, d, e, a, F2, 34 ); W_PRECALC_32_79_2(50, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( a, b, c, d, e, F2, 35 ); W_PRECALC_32_79_3(51, W4, W5, W6, W7, W0, W1, 
W2, W3, Wtmp0);
+  R( e, a, b, c, d, F2, 36 ); W_PRECALC_32_79_0(52, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( d, e, a, b, c, F2, 37 ); W_PRECALC_32_79_1(53, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( c, d, e, a, b, F2, 38 ); W_PRECALC_32_79_2(54, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( b, c, d, e, a, F2, 39 ); W_PRECALC_32_79_3(55, W3, W4, W5, W6, W7, W0, 
W1, W2, Wtmp0);
+  R( a, b, c, d, e, F3, 40 ); W_PRECALC_32_79_0(56, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( e, a, b, c, d, F3, 41 ); W_PRECALC_32_79_1(57, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( d, e, a, b, c, F3, 42 ); W_PRECALC_32_79_2(58, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( c, d, e, a, b, F3, 43 ); W_PRECALC_32_79_3(59, W2, W3, W4, W5, W6, W7, 
W0, W1, Wtmp0);
+  R( b, c, d, e, a, F3, 44 ); W_PRECALC_32_79_0(60, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( a, b, c, d, e, F3, 45 ); W_PRECALC_32_79_1(61, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( e, a, b, c, d, F3, 46 ); W_PRECALC_32_79_2(62, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( d, e, a, b, c, F3, 47 ); W_PRECALC_32_79_3(63, W1, W2, W3, W4, W5, W6, 
W7, W0, Wtmp0);
+  R( c, d, e, a, b, F3, 48 ); W_PRECALC_32_79_0(64, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( b, c, d, e, a, F3, 49 ); W_PRECALC_32_79_1(65, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( a, b, c, d, e, F3, 50 ); W_PRECALC_32_79_2(66, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( e, a, b, c, d, F3, 51 ); W_PRECALC_32_79_3(67, W0, W1, W2, W3, W4, W5, 
W6, W7, Wtmp0);
+  R( d, e, a, b, c, F3, 52 ); W_PRECALC_32_79_0(68, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( c, d, e, a, b, F3, 53 ); W_PRECALC_32_79_1(69, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( b, c, d, e, a, F3, 54 ); W_PRECALC_32_79_2(70, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( a, b, c, d, e, F3, 55 ); W_PRECALC_32_79_3(71, W7, W0, W1, W2, W3, W4, 
W5, W6, Wtmp0);
+  R( e, a, b, c, d, F3, 56 ); W_PRECALC_32_79_0(72, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( d, e, a, b, c, F3, 57 ); W_PRECALC_32_79_1(73, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( c, d, e, a, b, F3, 58 ); W_PRECALC_32_79_2(74, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( b, c, d, e, a, F3, 59 ); W_PRECALC_32_79_3(75, W6, W7, W0, W1, W2, W3, 
W4, W5, Wtmp0);
+  R( a, b, c, d, e, F4, 60 ); W_PRECALC_32_79_0(76, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( e, a, b, c, d, F4, 61 ); W_PRECALC_32_79_1(77, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( d, e, a, b, c, F4, 62 ); W_PRECALC_32_79_2(78, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+  R( c, d, e, a, b, F4, 63 ); W_PRECALC_32_79_3(79, W5, W6, W7, W0, W1, W2, 
W3, W4, Wtmp0);
+
+  decq RNBLKS;
+  jz .Lend;
+
+  /* Transform 64-79 + Precalc 0-15 of next block. */
+  R( b, c, d, e, a, F4, 64 ); W_PRECALC_00_15_0(0, W0, Wtmp0);
+  R( a, b, c, d, e, F4, 65 ); W_PRECALC_00_15_1(1, W0, Wtmp0);
+  R( e, a, b, c, d, F4, 66 ); W_PRECALC_00_15_2(2, W0, Wtmp0);
+  R( d, e, a, b, c, F4, 67 ); W_PRECALC_00_15_3(3, W0, Wtmp0);
+  R( c, d, e, a, b, F4, 68 ); W_PRECALC_00_15_0(4, W7, Wtmp0);
+  R( b, c, d, e, a, F4, 69 ); W_PRECALC_00_15_1(5, W7, Wtmp0);
+  R( a, b, c, d, e, F4, 70 ); W_PRECALC_00_15_2(6, W7, Wtmp0);
+  R( e, a, b, c, d, F4, 71 ); W_PRECALC_00_15_3(7, W7, Wtmp0);
+  R( d, e, a, b, c, F4, 72 ); W_PRECALC_00_15_0(8, W6, Wtmp0);
+  R( c, d, e, a, b, F4, 73 ); W_PRECALC_00_15_1(9, W6, Wtmp0);
+  R( b, c, d, e, a, F4, 74 ); W_PRECALC_00_15_2(10, W6, Wtmp0);
+  R( a, b, c, d, e, F4, 75 ); W_PRECALC_00_15_3(11, W6, Wtmp0);
+  R( e, a, b, c, d, F4, 76 ); W_PRECALC_00_15_0(12, W5, Wtmp0);
+  R( d, e, a, b, c, F4, 77 ); W_PRECALC_00_15_1(13, W5, Wtmp0);
+  R( c, d, e, a, b, F4, 78 );
+  addl state_h0(RSTATE), a;   W_PRECALC_00_15_2(14, W5, Wtmp0);
+  R( b, c, d, e, a, F4, 79 ); W_PRECALC_00_15_3(15, W5, Wtmp0);
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  jmp .Loop;
+
+.align 16
+.Lend:
+  /* Transform 64-79 + Clear XMM registers + Burn stack. */
+  R( b, c, d, e, a, F4, 64 ); CLEAR_REG(BSWAP_REG);
+  R( a, b, c, d, e, F4, 65 ); CLEAR_REG(Wtmp0);
+  R( e, a, b, c, d, F4, 66 ); CLEAR_REG(Wtmp1);
+  R( d, e, a, b, c, F4, 67 ); CLEAR_REG(W0);
+  R( c, d, e, a, b, F4, 68 ); CLEAR_REG(W1);
+  R( b, c, d, e, a, F4, 69 ); CLEAR_REG(W2);
+  R( a, b, c, d, e, F4, 70 ); CLEAR_REG(W3);
+  R( e, a, b, c, d, F4, 71 ); CLEAR_REG(W4);
+  R( d, e, a, b, c, F4, 72 ); CLEAR_REG(W5);
+  R( c, d, e, a, b, F4, 73 ); CLEAR_REG(W6);
+  R( b, c, d, e, a, F4, 74 ); CLEAR_REG(W7);
+  R( a, b, c, d, e, F4, 75 );
+  R( e, a, b, c, d, F4, 76 ); movdqa Wtmp0, (0*16)(%rsp);
+  R( d, e, a, b, c, F4, 77 ); movdqa Wtmp0, (1*16)(%rsp);
+  R( c, d, e, a, b, F4, 78 ); movdqa Wtmp0, (2*16)(%rsp);
+  addl state_h0(RSTATE), a;
+  R( b, c, d, e, a, F4, 79 );
+
+  /* 16*4/16-1 = 3 */
+  movdqa Wtmp0, (3*16)(%rsp);
+
+  /* Update the chaining variables. */
+  addl state_h3(RSTATE), d;
+  addl state_h2(RSTATE), c;
+  addl state_h1(RSTATE), b;
+  addl state_h4(RSTATE), e;
+
+  movl d, state_h3(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl a, state_h0(RSTATE);
+  movl e, state_h4(RSTATE);
+
+  movq ROLDSTACK, %rsp;
+  CFI_REGISTER(ROLDSTACK, %rsp);
+  CFI_DEF_CFA_REGISTER(%rsp);
+
+  popq %rbp;
+  CFI_POP(%rbp);
+  popq %rbx;
+  CFI_POP(%rbx);
+
+  /* stack already burned */
+  xorl %eax, %eax;
+
+.Lret:
+  ret_spec_stop;
+  CFI_ENDPROC();
+ELF(.size _gcry_sha1_transform_amd64_ssse3,
+    .-_gcry_sha1_transform_amd64_ssse3;)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha1.c 
b/grub-core/lib/libgcrypt/cipher/sha1.c
index 4b784acf0..b83b9de05 100644
--- a/grub-core/lib/libgcrypt/cipher/sha1.c
+++ b/grub-core/lib/libgcrypt/cipher/sha1.c
@@ -38,8 +38,74 @@
 
 #include "g10lib.h"
 #include "bithelp.h"
+#include "bufhelp.h"
 #include "cipher.h"
-#include "hash-common.h"
+#include "sha1.h"
+
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+/* USE_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
+#undef USE_BMI2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_BMI2 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
+#undef USE_AVX2
+#if defined(USE_BMI2) && defined(HAVE_GCC_INLINE_ASM_AVX2)
+# define USE_AVX2 1
+#endif
+
+/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
+#undef USE_SHAEXT
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+    defined(HAVE_GCC_INLINE_ASM_SSE41) && \
+    defined(ENABLE_SHAEXT_SUPPORT)
+# define USE_SHAEXT 1
+#endif
+
+/* USE_NEON indicates whether to enable ARM NEON assembly code. */
+#undef USE_NEON
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_NEON)
+#  define USE_NEON 1
+# endif
+#endif
+
+/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
+ * code. */
+#undef USE_ARM_CE
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+#  define USE_ARM_CE 1
+# elif defined(__AARCH64EL__) \
+       && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+       && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+#  define USE_ARM_CE 1
+# endif
+#endif
 
 
 /* A macro to test whether P is properly aligned for an u32 type.
@@ -51,31 +117,261 @@
 /* # define U32_ALIGNED_P(p) (!(((uintptr_t)p) % sizeof (u32))) */
 /* #endif */
 
-#define TRANSFORM(x,d,n) transform ((x), (d), (n))
 
 
-typedef struct
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_BMI2) || \
+    defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_SSSE3
+unsigned int
+_gcry_sha1_transform_amd64_ssse3 (void *state, const unsigned char *data,
+                                  size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_ssse3 (void *ctx, const unsigned char *data,
+                               size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+  return _gcry_sha1_transform_amd64_ssse3 (&hd->h0, data, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int
+_gcry_sha1_transform_amd64_avx (void *state, const unsigned char *data,
+                                 size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx (void *ctx, const unsigned char *data,
+                             size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+  return _gcry_sha1_transform_amd64_avx (&hd->h0, data, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_BMI2
+unsigned int
+_gcry_sha1_transform_amd64_avx_bmi2 (void *state, const unsigned char *data,
+                                     size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
+                                  size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+  return _gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, nblks)
+         + ASM_EXTRA_STACK;
+}
+
+#ifdef USE_AVX2
+unsigned int
+_gcry_sha1_transform_amd64_avx2_bmi2 (void *state, const unsigned char *data,
+                                      size_t nblks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha1_transform_amd64_avx2_bmi2 (void *ctx, const unsigned char *data,
+                                   size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+
+  /* AVX2/BMI2 function only handles pair of blocks so nblks needs to be
+   * multiple of 2 and function does not handle zero nblks. Use AVX/BMI2
+   * code to handle these cases. */
+
+  if (nblks <= 1)
+    return do_sha1_transform_amd64_avx_bmi2 (ctx, data, nblks);
+
+  if (nblks & 1)
+    {
+      (void)_gcry_sha1_transform_amd64_avx_bmi2 (&hd->h0, data, 1);
+      nblks--;
+      data += 64;
+    }
+
+  return _gcry_sha1_transform_amd64_avx2_bmi2 (&hd->h0, data, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif /* USE_AVX2 */
+#endif /* USE_BMI2 */
+
+#ifdef USE_SHAEXT
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha1_transform_intel_shaext (void *state, const unsigned char *data,
+                                   size_t nblks);
+
+static unsigned int
+do_sha1_transform_intel_shaext (void *ctx, const unsigned char *data,
+                                size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+  return _gcry_sha1_transform_intel_shaext (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_NEON
+unsigned int
+_gcry_sha1_transform_armv7_neon (void *state, const unsigned char *data,
+                                 size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv7_neon (void *ctx, const unsigned char *data,
+                              size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+  return _gcry_sha1_transform_armv7_neon (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef USE_ARM_CE
+unsigned int
+_gcry_sha1_transform_armv8_ce (void *state, const unsigned char *data,
+                               size_t nblks);
+
+static unsigned int
+do_sha1_transform_armv8_ce (void *ctx, const unsigned char *data,
+                            size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+  return _gcry_sha1_transform_armv8_ce (&hd->h0, data, nblks);
+}
+#endif
+
+#ifdef SHA1_USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+do_sha1_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
+{
+  SHA1_CONTEXT *hd = ctx;
+
+  kimd_execute (KMID_FUNCTION_SHA1, &hd->h0, data, nblks * 64);
+  return 0;
+}
+
+static unsigned int
+do_sha1_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
+                    u32 len_msb, u32 len_lsb)
 {
-  u32           h0,h1,h2,h3,h4;
-  u32           nblocks;
-  unsigned char buf[64];
-  int           count;
-} SHA1_CONTEXT;
+  SHA1_CONTEXT *hd = ctx;
+
+  /* Make sure that 'final_len' is positioned at correct offset relative
+   * to 'h0'. This is because we are passing 'h0' pointer as start of
+   * parameter block to 'klmd' instruction. */
+
+  gcry_assert (offsetof (SHA1_CONTEXT, final_len_msb)
+              - offsetof (SHA1_CONTEXT, h0) == 5 * sizeof(u32));
+  gcry_assert (offsetof (SHA1_CONTEXT, final_len_lsb)
+              - offsetof (SHA1_CONTEXT, final_len_msb) == 1 * sizeof(u32));
+
+  hd->final_len_msb = len_msb;
+  hd->final_len_lsb = len_lsb;
 
+  klmd_execute (KMID_FUNCTION_SHA1, &hd->h0, data, datalen);
+  return 0;
+}
+#endif
+
+
+static unsigned int
+do_transform_generic (void *c, const unsigned char *data, size_t nblks);
 
 
 static void
-sha1_init (void *context)
+sha1_init (void *context, unsigned int flags)
 {
   SHA1_CONTEXT *hd = context;
+  unsigned int features = _gcry_get_hw_features ();
+
+  (void)flags;
 
   hd->h0 = 0x67452301;
   hd->h1 = 0xefcdab89;
   hd->h2 = 0x98badcfe;
   hd->h3 = 0x10325476;
   hd->h4 = 0xc3d2e1f0;
-  hd->nblocks = 0;
-  hd->count = 0;
+
+  hd->bctx.nblocks = 0;
+  hd->bctx.nblocks_high = 0;
+  hd->bctx.count = 0;
+  hd->bctx.blocksize_shift = _gcry_ctz(64);
+
+  /* Order of feature checks is important here; last match will be
+   * selected.  Keep slower implementations at the top and faster at
+   * the bottom.  */
+  hd->bctx.bwrite = do_transform_generic;
+#ifdef USE_SSSE3
+  if ((features & HWF_INTEL_SSSE3) != 0)
+    hd->bctx.bwrite = do_sha1_transform_amd64_ssse3;
+#endif
+#ifdef USE_AVX
+  /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
+   * Therefore use this implementation on Intel CPUs only. */
+  if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+    hd->bctx.bwrite = do_sha1_transform_amd64_avx;
+#endif
+#ifdef USE_BMI2
+  if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_BMI2))
+    hd->bctx.bwrite = do_sha1_transform_amd64_avx_bmi2;
+#endif
+#ifdef USE_AVX2
+  if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_AVX) &&
+      (features & HWF_INTEL_BMI2))
+    hd->bctx.bwrite = do_sha1_transform_amd64_avx2_bmi2;
+#endif
+#ifdef USE_SHAEXT
+  if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+    hd->bctx.bwrite = do_sha1_transform_intel_shaext;
+#endif
+#ifdef USE_NEON
+  if ((features & HWF_ARM_NEON) != 0)
+    hd->bctx.bwrite = do_sha1_transform_armv7_neon;
+#endif
+#ifdef USE_ARM_CE
+  if ((features & HWF_ARM_SHA1) != 0)
+    hd->bctx.bwrite = do_sha1_transform_armv8_ce;
+#endif
+#ifdef SHA1_USE_S390X_CRYPTO
+  hd->use_s390x_crypto = 0;
+  if ((features & HWF_S390X_MSA) != 0)
+    {
+      if ((kimd_query () & km_function_to_mask (KMID_FUNCTION_SHA1)) &&
+         (klmd_query () & km_function_to_mask (KMID_FUNCTION_SHA1)))
+       {
+         hd->bctx.bwrite = do_sha1_transform_s390x;
+         hd->use_s390x_crypto = 1;
+       }
+    }
+#endif
+
+  (void)features;
+}
+
+/*
+ * Initialize the context HD. This is used to prepare the use of
+ * _gcry_sha1_mixblock.  WARNING: This is a special purpose function
+ * for exclusive use by random-csprng.c.
+ */
+void
+_gcry_sha1_mixblock_init (SHA1_CONTEXT *hd)
+{
+  sha1_init (hd, 0);
 }
 
 
@@ -100,37 +396,23 @@ sha1_init (void *context)
                                 b = rol( b, 30 );    \
                               } while(0)
 
-
 /*
  * Transform NBLOCKS of each 64 bytes (16 32-bit words) at DATA.
  */
-static void
-transform (SHA1_CONTEXT *hd, const unsigned char *data, size_t nblocks)
+static unsigned int
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
 {
-  register u32 a, b, c, d, e; /* Local copies of the chaining variables.  */
-  register u32 tm;            /* Helper.  */
-  u32 x[16];                  /* The array we work on. */
+  SHA1_CONTEXT *hd = ctx;
 
-  /* Loop over all blocks.  */
-  for ( ;nblocks; nblocks--)
+  do
     {
-#ifdef WORDS_BIGENDIAN
-      memcpy (x, data, 64);
-      data += 64;
-#else
-      {
-        int i;
-        unsigned char *p;
-
-        for(i=0, p=(unsigned char*)x; i < 16; i++, p += 4 )
-          {
-            p[3] = *data++;
-            p[2] = *data++;
-            p[1] = *data++;
-            p[0] = *data++;
-          }
-      }
-#endif
+      const u32 *idata = (const void *)data;
+      u32 a, b, c, d, e; /* Local copies of the chaining variables.  */
+      u32 tm;            /* Helper.  */
+      u32 x[16];         /* The array we work on. */
+
+#define I(i) (x[i] = buf_get_be32(idata + i))
+
       /* Get the values of the chaining variables. */
       a = hd->h0;
       b = hd->h1;
@@ -139,22 +421,22 @@ transform (SHA1_CONTEXT *hd, const unsigned char *data, 
size_t nblocks)
       e = hd->h4;
 
       /* Transform. */
-      R( a, b, c, d, e, F1, K1, x[ 0] );
-      R( e, a, b, c, d, F1, K1, x[ 1] );
-      R( d, e, a, b, c, F1, K1, x[ 2] );
-      R( c, d, e, a, b, F1, K1, x[ 3] );
-      R( b, c, d, e, a, F1, K1, x[ 4] );
-      R( a, b, c, d, e, F1, K1, x[ 5] );
-      R( e, a, b, c, d, F1, K1, x[ 6] );
-      R( d, e, a, b, c, F1, K1, x[ 7] );
-      R( c, d, e, a, b, F1, K1, x[ 8] );
-      R( b, c, d, e, a, F1, K1, x[ 9] );
-      R( a, b, c, d, e, F1, K1, x[10] );
-      R( e, a, b, c, d, F1, K1, x[11] );
-      R( d, e, a, b, c, F1, K1, x[12] );
-      R( c, d, e, a, b, F1, K1, x[13] );
-      R( b, c, d, e, a, F1, K1, x[14] );
-      R( a, b, c, d, e, F1, K1, x[15] );
+      R( a, b, c, d, e, F1, K1, I( 0) );
+      R( e, a, b, c, d, F1, K1, I( 1) );
+      R( d, e, a, b, c, F1, K1, I( 2) );
+      R( c, d, e, a, b, F1, K1, I( 3) );
+      R( b, c, d, e, a, F1, K1, I( 4) );
+      R( a, b, c, d, e, F1, K1, I( 5) );
+      R( e, a, b, c, d, F1, K1, I( 6) );
+      R( d, e, a, b, c, F1, K1, I( 7) );
+      R( c, d, e, a, b, F1, K1, I( 8) );
+      R( b, c, d, e, a, F1, K1, I( 9) );
+      R( a, b, c, d, e, F1, K1, I(10) );
+      R( e, a, b, c, d, F1, K1, I(11) );
+      R( d, e, a, b, c, F1, K1, I(12) );
+      R( c, d, e, a, b, F1, K1, I(13) );
+      R( b, c, d, e, a, F1, K1, I(14) );
+      R( a, b, c, d, e, F1, K1, I(15) );
       R( e, a, b, c, d, F1, K1, M(16) );
       R( d, e, a, b, c, F1, K1, M(17) );
       R( c, d, e, a, b, F1, K1, M(18) );
@@ -226,53 +508,39 @@ transform (SHA1_CONTEXT *hd, const unsigned char *data, 
size_t nblocks)
       hd->h2 += c;
       hd->h3 += d;
       hd->h4 += e;
+
+      data += 64;
     }
+  while (--nblks);
+
+  return 88+4*sizeof(void*);
 }
 
 
-/* Update the message digest with the contents
- * of INBUF with length INLEN.
+/*
+ * Apply the SHA-1 transform function on the buffer BLOCKOF64BYTE
+ * which must have a length 64 bytes.  BLOCKOF64BYTE must be 32-bit
+ * aligned.  Updates the 20 bytes in BLOCKOF64BYTE with its mixed
+ * content.  Returns the number of bytes which should be burned on the
+ * stack.  You need to use _gcry_sha1_mixblock_init to initialize the
+ * context.
+ * WARNING: This is a special purpose function for exclusive use by
+ * random-csprng.c.
  */
-static void
-sha1_write( void *context, const void *inbuf_arg, size_t inlen)
+unsigned int
+_gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte)
 {
-  const unsigned char *inbuf = inbuf_arg;
-  SHA1_CONTEXT *hd = context;
-  size_t nblocks;
-
-  if (hd->count == 64)  /* Flush the buffer. */
-    {
-      TRANSFORM( hd, hd->buf, 1 );
-      _gcry_burn_stack (88+4*sizeof(void*));
-      hd->count = 0;
-      hd->nblocks++;
-    }
-  if (!inbuf)
-    return;
-
-  if (hd->count)
-    {
-      for (; inlen && hd->count < 64; inlen--)
-        hd->buf[hd->count++] = *inbuf++;
-      sha1_write (hd, NULL, 0);
-      if (!inlen)
-        return;
-    }
+  u32 *p = blockof64byte;
+  unsigned int nburn;
 
-  nblocks = inlen / 64;
-  if (nblocks)
-    {
-      TRANSFORM (hd, inbuf, nblocks);
-      hd->count = 0;
-      hd->nblocks += nblocks;
-      inlen -= nblocks * 64;
-      inbuf += nblocks * 64;
-    }
-  _gcry_burn_stack (88+4*sizeof(void*));
+  nburn = (*hd->bctx.bwrite) (hd, blockof64byte, 1);
+  p[0] = hd->h0;
+  p[1] = hd->h1;
+  p[2] = hd->h2;
+  p[3] = hd->h3;
+  p[4] = hd->h4;
 
-  /* Save remaining bytes.  */
-  for (; inlen && hd->count < 64; inlen--)
-    hd->buf[hd->count++] = *inbuf++;
+  return nburn;
 }
 
 
@@ -287,19 +555,22 @@ static void
 sha1_final(void *context)
 {
   SHA1_CONTEXT *hd = context;
-
-  u32 t, msb, lsb;
+  u32 t, th, msb, lsb;
   unsigned char *p;
+  unsigned int burn;
 
-  sha1_write(hd, NULL, 0); /* flush */;
+  t = hd->bctx.nblocks;
+  if (sizeof t == sizeof hd->bctx.nblocks)
+    th = hd->bctx.nblocks_high;
+  else
+    th = hd->bctx.nblocks >> 32;
 
-  t = hd->nblocks;
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
-  msb = t >> 26;
+  msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
-  if( (lsb += hd->count) < t )
+  if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
@@ -307,39 +578,39 @@ sha1_final(void *context)
   msb <<= 3;
   msb |= t >> 29;
 
-  if( hd->count < 56 )  /* enough room */
+  if (0)
+    { }
+#ifdef SHA1_USE_S390X_CRYPTO
+  else if (hd->use_s390x_crypto)
     {
-      hd->buf[hd->count++] = 0x80; /* pad */
-      while( hd->count < 56 )
-        hd->buf[hd->count++] = 0;  /* pad */
+      burn = do_sha1_final_s390x (hd, hd->bctx.buf, hd->bctx.count, msb, lsb);
+    }
+#endif
+  else if (hd->bctx.count < 56)  /* enough room */
+    {
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+      if (hd->bctx.count < 56)
+       memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+      /* append the 64 bit count */
+      buf_put_be32(hd->bctx.buf + 56, msb);
+      buf_put_be32(hd->bctx.buf + 60, lsb);
+      burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 1 );
     }
   else  /* need one extra block */
     {
-      hd->buf[hd->count++] = 0x80; /* pad character */
-      while( hd->count < 64 )
-        hd->buf[hd->count++] = 0;
-      sha1_write(hd, NULL, 0);  /* flush */;
-      memset(hd->buf, 0, 56 ); /* fill next block with zeroes */
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+      /* fill pad and next block with zeroes */
+      memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+      /* append the 64 bit count */
+      buf_put_be32(hd->bctx.buf + 64 + 56, msb);
+      buf_put_be32(hd->bctx.buf + 64 + 60, lsb);
+      burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 2 );
     }
-  /* append the 64 bit count */
-  hd->buf[56] = msb >> 24;
-  hd->buf[57] = msb >> 16;
-  hd->buf[58] = msb >>  8;
-  hd->buf[59] = msb       ;
-  hd->buf[60] = lsb >> 24;
-  hd->buf[61] = lsb >> 16;
-  hd->buf[62] = lsb >>  8;
-  hd->buf[63] = lsb       ;
-  TRANSFORM( hd, hd->buf, 1 );
-  _gcry_burn_stack (88+4*sizeof(void*));
-
-  p = hd->buf;
-#ifdef WORDS_BIGENDIAN
-#define X(a) do { *(u32*)p = hd->h##a ; p += 4; } while(0)
-#else /* little endian */
-#define X(a) do { *p++ = hd->h##a >> 24; *p++ = hd->h##a >> 16;         \
-                  *p++ = hd->h##a >> 8; *p++ = hd->h##a; } while(0)
-#endif
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_be32(p, hd->h##a); p += 4; } while(0)
   X(0);
   X(1);
   X(2);
@@ -347,6 +618,9 @@ sha1_final(void *context)
   X(4);
 #undef X
 
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
 }
 
 static unsigned char *
@@ -354,22 +628,40 @@ sha1_read( void *context )
 {
   SHA1_CONTEXT *hd = context;
 
-  return hd->buf;
+  return hd->bctx.buf;
 }
 
+
 /****************
- * Shortcut functions which puts the hash value of the supplied buffer
+ * Shortcut functions which puts the hash value of the supplied buffer iov
  * into outbuf which must have a size of 20 bytes.
  */
-void
-_gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length)
+static void
+_gcry_sha1_hash_buffers (void *outbuf, size_t nbytes,
+                        const gcry_buffer_t *iov, int iovcnt)
 {
   SHA1_CONTEXT hd;
 
-  sha1_init (&hd);
-  sha1_write (&hd, buffer, length);
+  (void)nbytes;
+
+  sha1_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
   sha1_final (&hd);
-  memcpy (outbuf, hd.buf, 20);
+  memcpy (outbuf, hd.bctx.buf, 20);
+}
+
+/* Variant of the above shortcut function using a single buffer.  */
+void
+_gcry_sha1_hash_buffer (void *outbuf, const void *buffer, size_t length)
+{
+  gcry_buffer_t iov = { 0 };
+
+  iov.data = (void *)buffer;
+  iov.len = length;
+
+  _gcry_sha1_hash_buffers (outbuf, 20, &iov, 1);
 }
 
 
@@ -446,11 +738,11 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
 
 
 
-static unsigned char asn[15] = /* Object ID is 1.3.14.3.2.26 */
+static const unsigned char asn[15] = /* Object ID is 1.3.14.3.2.26 */
   { 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03,
     0x02, 0x1a, 0x05, 0x00, 0x04, 0x14 };
 
-static gcry_md_oid_spec_t oid_spec_sha1[] =
+static const gcry_md_oid_spec_t oid_spec_sha1[] =
   {
     /* iso.member-body.us.rsadsi.pkcs.pkcs-1.5 (sha1WithRSAEncryption) */
     { "1.2.840.113549.1.1.5" },
@@ -465,13 +757,12 @@ static gcry_md_oid_spec_t oid_spec_sha1[] =
     { NULL },
   };
 
-gcry_md_spec_t _gcry_digest_spec_sha1 =
+const gcry_md_spec_t _gcry_digest_spec_sha1 =
   {
+    GCRY_MD_SHA1, {0, 1},
     "SHA1", asn, DIM (asn), oid_spec_sha1, 20,
-    sha1_init, sha1_write, sha1_final, sha1_read,
-    sizeof (SHA1_CONTEXT)
-  };
-md_extra_spec_t _gcry_digest_extraspec_sha1 =
-  {
+    sha1_init, _gcry_md_block_write, sha1_final, sha1_read, NULL,
+    _gcry_sha1_hash_buffers,
+    sizeof (SHA1_CONTEXT),
     run_selftests
   };
diff --git a/grub-core/lib/libgcrypt/cipher/sha1.h 
b/grub-core/lib/libgcrypt/cipher/sha1.h
new file mode 100644
index 000000000..a35976584
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha1.h
@@ -0,0 +1,47 @@
+/* sha1.h - SHA-1 context definition
+ * Copyright (C) 1998, 2001, 2002, 2003, 2008 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef GCRY_SHA1_H
+#define GCRY_SHA1_H
+
+#include "hash-common.h"
+
+
+/* SHA1_USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef SHA1_USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define SHA1_USE_S390X_CRYPTO 1
+#endif /* SHA1_USE_S390X_CRYPTO */
+
+
+/* We need this here for direct use by random-csprng.c. */
+typedef struct
+{
+  gcry_md_block_ctx_t bctx;
+  u32          h0,h1,h2,h3,h4;
+#ifdef SHA1_USE_S390X_CRYPTO
+  u32          final_len_msb, final_len_lsb; /* needs to be right after h4. */
+  int          use_s390x_crypto;
+#endif
+} SHA1_CONTEXT;
+
+
+void _gcry_sha1_mixblock_init (SHA1_CONTEXT *hd);
+unsigned int _gcry_sha1_mixblock (SHA1_CONTEXT *hd, void *blockof64byte);
+
+#endif /*GCRY_SHA1_H*/
diff --git a/grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch32-ce.S 
b/grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch32-ce.S
new file mode 100644
index 000000000..95778b40e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch32-ce.S
@@ -0,0 +1,231 @@
+/* sha256-armv8-aarch32-ce.S - ARM/CE accelerated SHA-256 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) && defined(USE_SHA256)
+
+.syntax unified
+.arch armv8-a
+.fpu crypto-neon-fp-armv8
+.arm
+
+.text
+
+#ifdef __PIC__
+#  define GET_DATA_POINTER(reg, name, rtmp) \
+               ldr reg, 1f; \
+               ldr rtmp, 2f; \
+               b 3f; \
+       1:      .word _GLOBAL_OFFSET_TABLE_-(3f+8); \
+       2:      .word name(GOT); \
+       3:      add reg, pc, reg; \
+               ldr reg, [reg, rtmp];
+#else
+#  define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name
+#endif
+
+
+/* Constants */
+
+.align 4
+gcry_sha256_aarch32_ce_K:
+.LK:
+  .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+  .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+  .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+  .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+  .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+  .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+  .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+  .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+  .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+  .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+  .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+  .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+  .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+  .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+  .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+  .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+
+/* Register macros */
+
+#define qH0123 q0
+#define qH4567 q1
+
+#define qABCD0 q2
+#define qABCD1 q3
+#define qEFGH  q4
+
+#define qT0 q5
+#define qT1 q6
+
+#define qW0 q8
+#define qW1 q9
+#define qW2 q10
+#define qW3 q11
+
+#define qK0 q12
+#define qK1 q13
+#define qK2 q14
+#define qK3 q15
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+
+#define do_loadk(nk0, nk1) vld1.32 {nk0-nk1},[lr]!;
+#define do_add(a, b) vadd.u32 a, a, b;
+#define do_sha256su0(w0, w1) sha256su0.32 w0, w1;
+#define do_sha256su1(w0, w2, w3) sha256su1.32 w0, w2, w3;
+
+#define do_rounds(k, nk0, nk1, w0, w1, w2, w3, loadk_fn, add_fn, su0_fn, 
su1_fn) \
+        loadk_fn(   nk0, nk1     ); \
+        su0_fn(     w0, w1       ); \
+        vmov        qABCD1, qABCD0; \
+        sha256h.32  qABCD0, qEFGH, k; \
+        sha256h2.32 qEFGH, qABCD1, k; \
+        add_fn(     nk0, w2      ); \
+        su1_fn(     w0, w2, w3   );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
+
+/*
+ * unsigned int
+ * _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data,
+ *                                  size_t num_blks)
+ */
+.align 3
+.globl _gcry_sha256_transform_armv8_ce
+.type  _gcry_sha256_transform_armv8_ce,%function;
+_gcry_sha256_transform_armv8_ce:
+  /* input:
+   *   r0: ctx, CTX
+   *   r1: data (64*nblks bytes)
+   *   r2: nblks
+   */
+
+  cmp r2, #0;
+  push {r4,lr};
+  beq .Ldo_nothing;
+
+  vpush {q4-q7};
+
+  GET_DATA_POINTER(r4, .LK, lr);
+  mov lr, r4
+
+  vld1.32 {qH0123-qH4567}, [r0]  /* load state */
+
+  vld1.8 {qW0-qW1}, [r1]!
+  do_loadk(qK0, qK1)
+  vld1.8 {qW2-qW3}, [r1]!
+  vmov qABCD0, qH0123
+  vmov qEFGH, qH4567
+
+  vrev32.8 qW0, qW0
+  vrev32.8 qW1, qW1
+  vrev32.8 qW2, qW2
+  do_add(qK0, qW0)
+  vrev32.8 qW3, qW3
+  do_add(qK1, qW1)
+
+.Loop:
+  do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  subs r2,r2,#1
+  do_rounds(qK1, qK3, _  , qW1, qW2, qW3, qW0, _       , do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK3, qK1, _  , qW3, qW0, qW1, qW2, _       , do_add, do_sha256su0, 
do_sha256su1)
+
+  do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK1, qK3, _  , qW1, qW2, qW3, qW0, _       , do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK3, qK1, _  , qW3, qW0, qW1, qW2, _       , do_add, do_sha256su0, 
do_sha256su1)
+
+  do_rounds(qK0, qK2, qK3, qW0, qW1, qW2, qW3, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK1, qK3, _  , qW1, qW2, qW3, qW0, _       , do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK2, qK0, qK1, qW2, qW3, qW0, qW1, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(qK3, qK1, _  , qW3, qW0, qW1, qW2, _       , do_add, do_sha256su0, 
do_sha256su1)
+
+  beq .Lend
+
+  do_rounds(qK0, qK2, qK3, qW0, _  , qW2, qW3, do_loadk, do_add, _, _)
+  vld1.8 {qW0}, [r1]!
+  mov lr, r4
+  do_rounds(qK1, qK3, _  , qW1, _  , qW3, _  , _       , do_add, _, _)
+  vld1.8 {qW1}, [r1]!
+  vrev32.8 qW0, qW0
+  do_rounds(qK2, qK0, qK1, qW2, _  , qW0, _  , do_loadk, do_add, _, _)
+  vrev32.8 qW1, qW1
+  vld1.8 {qW2}, [r1]!
+  do_rounds(qK3, qK1, _  , qW3, _  , qW1, _  , _       , do_add, _, _)
+  vld1.8 {qW3}, [r1]!
+
+  vadd.u32 qH0123, qABCD0
+  vadd.u32 qH4567, qEFGH
+
+  vrev32.8 qW2, qW2
+  vmov qABCD0, qH0123
+  vrev32.8 qW3, qW3
+  vmov qEFGH, qH4567
+
+  b .Loop
+
+.Lend:
+
+  do_rounds(qK0, qK2, qK3, qW0, _  , qW2, qW3, do_loadk, do_add, _, _)
+  do_rounds(qK1, qK3, _  , qW1, _  , qW3, _  , _       , do_add, _, _)
+  do_rounds(qK2, _  , _  , qW2, _  , _  , _  , _       , _, _, _)
+  do_rounds(qK3, _  , _  , qW3, _  , _  , _  , _       , _, _, _)
+
+  CLEAR_REG(qW0)
+  CLEAR_REG(qW1)
+  CLEAR_REG(qW2)
+  CLEAR_REG(qW3)
+  CLEAR_REG(qK0)
+  CLEAR_REG(qK1)
+  CLEAR_REG(qK2)
+  CLEAR_REG(qK3)
+
+  vadd.u32 qH0123, qABCD0
+  vadd.u32 qH4567, qEFGH
+
+  CLEAR_REG(qABCD0)
+  CLEAR_REG(qABCD1)
+  CLEAR_REG(qEFGH)
+
+  vst1.32 {qH0123-qH4567}, [r0] /* store state */
+
+  CLEAR_REG(qH0123)
+  CLEAR_REG(qH4567)
+  vpop {q4-q7}
+
+.Ldo_nothing:
+  mov r0, #0
+  pop {r4,pc}
+.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch64-ce.S 
b/grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch64-ce.S
new file mode 100644
index 000000000..d0fa62857
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha256-armv8-aarch64-ce.S
@@ -0,0 +1,215 @@
+/* sha256-armv8-aarch64-ce.S - ARM/CE accelerated SHA-256 transform function
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && defined(USE_SHA256)
+
+.cpu generic+simd+crypto
+
+.text
+
+
+/* Constants */
+
+.align 4
+gcry_sha256_aarch64_ce_K:
+.LK:
+  .long 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
+  .long 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+  .long 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
+  .long 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+  .long 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
+  .long 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+  .long 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
+  .long 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+  .long 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
+  .long 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+  .long 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
+  .long 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+  .long 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
+  .long 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+  .long 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
+  .long 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+
+
+/* Register macros */
+
+#define vH0123 v0
+#define vH4567 v1
+
+#define vABCD0 v2
+#define qABCD0 q2
+#define vABCD1 v3
+#define qABCD1 q3
+#define vEFGH  v4
+#define qEFGH  q4
+
+#define vT0 v5
+#define vT1 v6
+
+#define vW0 v16
+#define vW1 v17
+#define vW2 v18
+#define vW3 v19
+
+#define vK0 v20
+#define vK1 v21
+#define vK2 v22
+#define vK3 v23
+
+
+/* Round macros */
+
+#define _(...) /*_*/
+
+#define do_loadk(nk0, nk1) ld1 {nk0.16b-nk1.16b},[x3],#32;
+#define do_add(a, b) add a.4s, a.4s, b.4s;
+#define do_sha256su0(w0, w1) sha256su0 w0.4s, w1.4s;
+#define do_sha256su1(w0, w2, w3) sha256su1 w0.4s, w2.4s, w3.4s;
+
+#define do_rounds(k, nk0, nk1, w0, w1, w2, w3, loadk_fn, add_fn, su0_fn, 
su1_fn) \
+        loadk_fn(   v##nk0, v##nk1     ); \
+        su0_fn(     v##w0, v##w1       ); \
+        mov         vABCD1.16b, vABCD0.16b; \
+        sha256h     qABCD0, qEFGH, v##k.4s; \
+        sha256h2    qEFGH, qABCD1, v##k.4s; \
+        add_fn(     v##nk0, v##w2      ); \
+        su1_fn(     v##w0, v##w2, v##w3   );
+
+
+/* Other functional macros */
+
+#define CLEAR_REG(reg) movi reg.16b, #0;
+
+
+/*
+ * unsigned int
+ * _gcry_sha256_transform_armv8_ce (u32 state[8], const void *input_data,
+ *                                  size_t num_blks)
+ */
+.align 3
+.globl _gcry_sha256_transform_armv8_ce
+ELF(.type  _gcry_sha256_transform_armv8_ce,%function;)
+_gcry_sha256_transform_armv8_ce:
+  /* input:
+   *   r0: ctx, CTX
+   *   r1: data (64*nblks bytes)
+   *   r2: nblks
+   */
+  CFI_STARTPROC();
+
+  cbz x2, .Ldo_nothing;
+
+  GET_DATA_POINTER(x3, .LK);
+  mov x4, x3
+
+  ld1 {vH0123.4s-vH4567.4s}, [x0]  /* load state */
+
+  ld1 {vW0.16b-vW1.16b}, [x1], #32
+  do_loadk(vK0, vK1)
+  ld1 {vW2.16b-vW3.16b}, [x1], #32
+  mov vABCD0.16b, vH0123.16b
+  mov vEFGH.16b, vH4567.16b
+
+  rev32 vW0.16b, vW0.16b
+  rev32 vW1.16b, vW1.16b
+  rev32 vW2.16b, vW2.16b
+  do_add(vK0, vW0)
+  rev32 vW3.16b, vW3.16b
+  do_add(vK1, vW1)
+
+.Loop:
+  do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  sub x2,x2,#1
+  do_rounds(K1, K3, _ , W1, W2, W3, W0, _       , do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K3, K1, _ , W3, W0, W1, W2, _       , do_add, do_sha256su0, 
do_sha256su1)
+
+  do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K1, K3, _ , W1, W2, W3, W0, _       , do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K3, K1, _ , W3, W0, W1, W2, _       , do_add, do_sha256su0, 
do_sha256su1)
+
+  do_rounds(K0, K2, K3, W0, W1, W2, W3, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K1, K3, _ , W1, W2, W3, W0, _       , do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K2, K0, K1, W2, W3, W0, W1, do_loadk, do_add, do_sha256su0, 
do_sha256su1)
+  do_rounds(K3, K1, _ , W3, W0, W1, W2, _       , do_add, do_sha256su0, 
do_sha256su1)
+
+  cbz x2, .Lend
+
+  do_rounds(K0, K2, K3, W0, _  , W2, W3, do_loadk, do_add, _, _)
+  ld1 {vW0.16b}, [x1], #16
+  mov x3, x4
+  do_rounds(K1, K3, _ , W1, _  , W3, _  , _       , do_add, _, _)
+  ld1 {vW1.16b}, [x1], #16
+  rev32 vW0.16b, vW0.16b
+  do_rounds(K2, K0, K1, W2, _  , W0, _  , do_loadk, do_add, _, _)
+  rev32 vW1.16b, vW1.16b
+  ld1 {vW2.16b}, [x1], #16
+  do_rounds(K3, K1, _ , W3, _  , W1, _  , _       , do_add, _, _)
+  ld1 {vW3.16b}, [x1], #16
+
+  do_add(vH0123, vABCD0)
+  do_add(vH4567, vEFGH)
+
+  rev32 vW2.16b, vW2.16b
+  mov vABCD0.16b, vH0123.16b
+  rev32 vW3.16b, vW3.16b
+  mov vEFGH.16b, vH4567.16b
+
+  b .Loop
+
+.Lend:
+
+  do_rounds(K0, K2, K3, W0, _  , W2, W3, do_loadk, do_add, _, _)
+  do_rounds(K1, K3, _ , W1, _  , W3, _  , _       , do_add, _, _)
+  do_rounds(K2, _ , _ , W2, _  , _  , _  , _       , _, _, _)
+  do_rounds(K3, _ , _ , W3, _  , _  , _  , _       , _, _, _)
+
+  CLEAR_REG(vW0)
+  CLEAR_REG(vW1)
+  CLEAR_REG(vW2)
+  CLEAR_REG(vW3)
+  CLEAR_REG(vK0)
+  CLEAR_REG(vK1)
+  CLEAR_REG(vK2)
+  CLEAR_REG(vK3)
+
+  do_add(vH0123, vABCD0)
+  do_add(vH4567, vEFGH)
+
+  CLEAR_REG(vABCD0)
+  CLEAR_REG(vABCD1)
+  CLEAR_REG(vEFGH)
+
+  st1 {vH0123.4s-vH4567.4s}, [x0] /* store state */
+
+  CLEAR_REG(vH0123)
+  CLEAR_REG(vH4567)
+
+.Ldo_nothing:
+  mov x0, #0
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_sha256_transform_armv8_ce,.-_gcry_sha256_transform_armv8_ce;)
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha256-avx-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha256-avx-amd64.S
new file mode 100644
index 000000000..be8a799df
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha256-avx-amd64.S
@@ -0,0 +1,506 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+;   notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+;   notice, this list of conditions and the following disclaimer in the
+;   documentation and/or other materials provided with the
+;   distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+;   contributors may be used to endorse or promote products derived from
+;   this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This code is described in an Intel White-Paper:
+; "Fast SHA-256 Implementations on Intel Architecture Processors"
+;
+; To find it, surf to http://www.intel.com/p/en_US/embedded
+; and search for that title.
+; The paper is expected to be released roughly at the end of April, 2012
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 1 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ *  by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Note: Based on the SSSE3 implementation.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA256)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+#define        VMOVDQ vmovdqu /* assume buffers not aligned */
+
+#define ROR(p1, p2) \
+       /* shld is faster than ror on Intel Sandybridge */ \
+       shld    p1, p1, (32 - p2);
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros*/
+
+/* addm [mem], reg
+ * Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+       add     p2, p1; \
+       mov     p1, p2;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+/* COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
+ * Load xmm with mem and byte swap each dword */
+#define COPY_XMM_AND_BSWAP(p1, p2, p3) \
+       VMOVDQ p1, p2; \
+       vpshufb p1, p1, p3;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+#define X0 xmm4
+#define X1 xmm5
+#define X2 xmm6
+#define X3 xmm7
+
+#define XTMP0 xmm0
+#define XTMP1 xmm1
+#define XTMP2 xmm2
+#define XTMP3 xmm3
+#define XTMP4 xmm8
+#define XFER xmm9
+
+#define SHUF_00BA xmm10 /* shuffle xBxA -> 00BA */
+#define SHUF_DC00 xmm11 /* shuffle xDxC -> DC00 */
+#define BYTE_FLIP_MASK xmm12
+
+#define NUM_BLKS rdx   /* 3rd arg */
+#define CTX rsi        /* 2nd arg */
+#define INP rdi        /* 1st arg */
+
+#define SRND rdi       /* clobbers INP */
+#define c ecx
+#define d r8d
+#define e edx
+
+#define TBL rbp
+#define a eax
+#define b ebx
+
+#define f r9d
+#define g r10d
+#define h r11d
+
+#define y0 r13d
+#define y1 r14d
+#define y2 r15d
+
+
+
+#define _INP_END_SIZE  8
+#define _INP_SIZE      8
+#define _XFER_SIZE     8
+#define _XMM_SAVE_SIZE 0
+/* STACK_SIZE plus pushes must be an odd multiple of 8 */
+#define _ALIGN_SIZE    8
+
+#define _INP_END       0
+#define _INP           (_INP_END  + _INP_END_SIZE)
+#define _XFER          (_INP      + _INP_SIZE)
+#define _XMM_SAVE      (_XFER     + _XFER_SIZE + _ALIGN_SIZE)
+#define STACK_SIZE     (_XMM_SAVE + _XMM_SAVE_SIZE)
+
+
+#define FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+               /* compute s0 four at a time and s1 two at a time */; \
+               /* compute W[-16] + W[-7] 4 at a time */; \
+       mov     y0, e           /* y0 = e */; \
+       ROR(    y0, (25-11))    /* y0 = e >> (25-11) */; \
+       mov     y1, a           /* y1 = a */; \
+               vpalignr        XTMP0, X3, X2, 4        /* XTMP0 = W[-7] */; \
+       ROR(    y1, (22-13))    /* y1 = a >> (22-13) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       mov     y2, f           /* y2 = f */; \
+       ROR(    y0, (11-6))     /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       xor     y2, g           /* y2 = f^g */; \
+               vpaddd  XTMP0, XTMP0, X0        /* XTMP0 = W[-7] + W[-16] */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       ROR(    y1, (13-2))     /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+               /* compute s0 */; \
+               vpalignr        XTMP1, X1, X0, 4        /* XTMP1 = W[-15] */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       ROR(    y0, 6)          /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+       ROR(    y1, 2)          /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       add     y2, [rsp + _XFER + 0*4] /* y2 = k + w + S1 + CH */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               vpslld  XTMP2, XTMP1, (32-7); \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               vpsrld  XTMP3, XTMP1, 7; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+               vpor    XTMP3, XTMP3, XTMP2     /* XTMP1 = W[-15] ror 7 */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+       mov     y0, e           /* y0 = e */; \
+       mov     y1, a           /* y1 = a */; \
+       ROR(    y0, (25-11))    /* y0 = e >> (25-11) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       mov     y2, f           /* y2 = f */; \
+       ROR(    y1, (22-13))    /* y1 = a >> (22-13) */; \
+               vpslld  XTMP2, XTMP1, (32-18); \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       ROR(    y0, (11-6))     /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+       xor     y2, g           /* y2 = f^g */; \
+               vpsrld  XTMP4, XTMP1, 18; \
+       ROR(    y1, (13-2))     /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       ROR(    y0, 6)          /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+               vpxor   XTMP4, XTMP4, XTMP3; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+               vpsrld  XTMP1, XTMP1, 3 /* XTMP4 = W[-15] >> 3 */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       add     y2, [rsp + _XFER + 1*4] /* y2 = k + w + S1 + CH */; \
+       ROR(    y1, 2)          /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+               vpxor   XTMP1, XTMP1, XTMP2     /* XTMP1 = W[-15] ror 7 ^ 
W[-15] ror 18 */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               vpxor   XTMP1, XTMP1, XTMP4     /* XTMP1 = s0 */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               /* compute low s1 */; \
+               vpshufd XTMP2, X3, 0b11111010   /* XTMP2 = W[-2] {BBAA} */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+               vpaddd  XTMP0, XTMP0, XTMP1     /* XTMP0 = W[-16] + W[-7] + s0 
*/; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+       mov     y0, e           /* y0 = e */; \
+       mov     y1, a           /* y1 = a */; \
+       ROR(    y0, (25-11))    /* y0 = e >> (25-11) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       ROR(    y1, (22-13))    /* y1 = a >> (22-13) */; \
+       mov     y2, f           /* y2 = f */; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       ROR(    y0, (11-6))     /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+               vpsrlq  XTMP3, XTMP2, 17        /* XTMP2 = W[-2] ror 17 {xBxA} 
*/; \
+       xor     y2, g           /* y2 = f^g */; \
+               vpsrlq  XTMP4, XTMP2, 19        /* XTMP3 = W[-2] ror 19 {xBxA} 
*/; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+               vpsrld  XTMP2, XTMP2, 10        /* XTMP4 = W[-2] >> 10 {BBAA} 
*/; \
+       ROR(    y1, (13-2))     /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+       ROR(    y0, 6)          /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+               vpxor   XTMP2, XTMP2, XTMP3; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       ROR(    y1, 2)          /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, [rsp + _XFER + 2*4] /* y2 = k + w + S1 + CH */; \
+               vpxor   XTMP4, XTMP4, XTMP2     /* XTMP4 = s1 {xBxA} */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               vpshufb XTMP4, XTMP4, SHUF_00BA /* XTMP4 = s1 {00BA} */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               vpaddd  XTMP0, XTMP0, XTMP4     /* XTMP0 = {..., ..., W[1], 
W[0]} */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+               /* compute high s1 */; \
+               vpshufd XTMP2, XTMP0, 0b01010000 /* XTMP2 = W[-2] {DDCC} */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+       mov     y0, e           /* y0 = e */; \
+       ROR(    y0, (25-11))    /* y0 = e >> (25-11) */; \
+       mov     y1, a           /* y1 = a */; \
+       ROR(    y1, (22-13))    /* y1 = a >> (22-13) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       mov     y2, f           /* y2 = f */; \
+       ROR(    y0, (11-6))     /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+               vpsrlq  XTMP3, XTMP2, 17        /* XTMP2 = W[-2] ror 17 {xDxC} 
*/; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       xor     y2, g           /* y2 = f^g */; \
+               vpsrlq  X0, XTMP2, 19   /* XTMP3 = W[-2] ror 19 {xDxC} */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       ROR(    y1, (13-2))     /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+               vpsrld  XTMP2, XTMP2,    10     /* X0 = W[-2] >> 10 {DDCC} */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       ROR(    y0, 6)          /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+               vpxor   XTMP2, XTMP2, XTMP3; \
+       ROR(    y1, 2)          /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       add     y2, [rsp + _XFER + 3*4] /* y2 = k + w + S1 + CH */; \
+               vpxor   X0, X0, XTMP2   /* X0 = s1 {xDxC} */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               vpshufb X0, X0, SHUF_DC00       /* X0 = s1 {DC00} */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               vpaddd  X0, X0, XTMP0   /* X0 = {W[3], W[2], W[1], W[0]} */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+       FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h); \
+       FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, h, a, b, c, d, e, f, g); \
+       FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, g, h, a, b, c, d, e, f); \
+       FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, f, g, h, a, b, c, d, e);
+
+/* input is [rsp + _XFER + %1 * 4] */
+#define DO_ROUND(i1, a, b, c, d, e, f, g, h) \
+       mov     y0, e           /* y0 = e */; \
+       ROR(    y0, (25-11))    /* y0 = e >> (25-11) */; \
+       mov     y1, a           /* y1 = a */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       ROR(    y1, (22-13))    /* y1 = a >> (22-13) */; \
+       mov     y2, f           /* y2 = f */; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       ROR(    y0, (11-6))     /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+       xor     y2, g           /* y2 = f^g */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       ROR(    y1, (13-2))     /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       ROR(    y0, 6)          /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       ROR(    y1, 2)          /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, [rsp + _XFER + i1 * 4]      /* y2 = k + w + S1 + CH */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; void sha256_avx(void *input_data, UINT32 digest[8], UINT64 num_blks)
+;; arg 1 : pointer to input data
+;; arg 2 : pointer to digest
+;; arg 3 : Num blocks
+*/
+.text
+.globl _gcry_sha256_transform_amd64_avx
+ELF(.type  _gcry_sha256_transform_amd64_avx,@function;)
+.align 16
+_gcry_sha256_transform_amd64_avx:
+       CFI_STARTPROC()
+       vzeroupper
+
+       push    rbx
+       CFI_PUSH(rbx)
+       push    rbp
+       CFI_PUSH(rbp)
+       push    r13
+       CFI_PUSH(r13)
+       push    r14
+       CFI_PUSH(r14)
+       push    r15
+       CFI_PUSH(r15)
+
+       sub     rsp, STACK_SIZE
+       CFI_ADJUST_CFA_OFFSET(STACK_SIZE);
+
+       shl     NUM_BLKS, 6     /* convert to bytes */
+       jz      .Ldone_hash
+       add     NUM_BLKS, INP   /* pointer to end of data */
+       mov     [rsp + _INP_END], NUM_BLKS
+
+       /* load initial digest */
+       mov     a,[4*0 + CTX]
+       mov     b,[4*1 + CTX]
+       mov     c,[4*2 + CTX]
+       mov     d,[4*3 + CTX]
+       mov     e,[4*4 + CTX]
+       mov     f,[4*5 + CTX]
+       mov     g,[4*6 + CTX]
+       mov     h,[4*7 + CTX]
+
+       vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+       vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+       vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+.Loop0:
+       lea     TBL, [.LK256 ADD_RIP]
+
+       /* byte swap first 16 dwords */
+       COPY_XMM_AND_BSWAP(X0, [INP + 0*16], BYTE_FLIP_MASK)
+       COPY_XMM_AND_BSWAP(X1, [INP + 1*16], BYTE_FLIP_MASK)
+       COPY_XMM_AND_BSWAP(X2, [INP + 2*16], BYTE_FLIP_MASK)
+       COPY_XMM_AND_BSWAP(X3, [INP + 3*16], BYTE_FLIP_MASK)
+
+       mov     [rsp + _INP], INP
+
+       /* schedule 48 input dwords, by doing 3 rounds of 16 each */
+       mov     SRND, 3
+.align 16
+.Loop1:
+       vpaddd  XFER, X0, [TBL + 0*16]
+       vmovdqa [rsp + _XFER], XFER
+       FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h)
+
+       vpaddd  XFER, X1, [TBL + 1*16]
+       vmovdqa [rsp + _XFER], XFER
+       FOUR_ROUNDS_AND_SCHED(X1, X2, X3, X0, e, f, g, h, a, b, c, d)
+
+       vpaddd  XFER, X2, [TBL + 2*16]
+       vmovdqa [rsp + _XFER], XFER
+       FOUR_ROUNDS_AND_SCHED(X2, X3, X0, X1, a, b, c, d, e, f, g, h)
+
+       vpaddd  XFER, X3, [TBL + 3*16]
+       vmovdqa [rsp + _XFER], XFER
+       add     TBL, 4*16
+       FOUR_ROUNDS_AND_SCHED(X3, X0, X1, X2, e, f, g, h, a, b, c, d)
+
+       sub     SRND, 1
+       jne     .Loop1
+
+       mov     SRND, 2
+.Loop2:
+       vpaddd  X0, X0, [TBL + 0*16]
+       vmovdqa [rsp + _XFER], X0
+       DO_ROUND(0, a, b, c, d, e, f, g, h)
+       DO_ROUND(1, h, a, b, c, d, e, f, g)
+       DO_ROUND(2, g, h, a, b, c, d, e, f)
+       DO_ROUND(3, f, g, h, a, b, c, d, e)
+       vpaddd  X1, X1, [TBL + 1*16]
+       vmovdqa [rsp + _XFER], X1
+       add     TBL, 2*16
+       DO_ROUND(0, e, f, g, h, a, b, c, d)
+       DO_ROUND(1, d, e, f, g, h, a, b, c)
+       DO_ROUND(2, c, d, e, f, g, h, a, b)
+       DO_ROUND(3, b, c, d, e, f, g, h, a)
+
+       vmovdqa X0, X2
+       vmovdqa X1, X3
+
+       sub     SRND, 1
+       jne     .Loop2
+
+       addm([4*0 + CTX],a)
+       addm([4*1 + CTX],b)
+       addm([4*2 + CTX],c)
+       addm([4*3 + CTX],d)
+       addm([4*4 + CTX],e)
+       addm([4*5 + CTX],f)
+       addm([4*6 + CTX],g)
+       addm([4*7 + CTX],h)
+
+       mov     INP, [rsp + _INP]
+       add     INP, 64
+       cmp     INP, [rsp + _INP_END]
+       jne     .Loop0
+
+.Ldone_hash:
+       vzeroall
+
+       vmovdqa [rsp + _XFER], XFER
+       xor     eax, eax
+
+       add     rsp, STACK_SIZE
+       CFI_ADJUST_CFA_OFFSET(-STACK_SIZE);
+
+       pop     r15
+       CFI_POP(r15)
+       pop     r14
+       CFI_POP(r14)
+       pop     r13
+       CFI_POP(r13)
+       pop     rbp
+       CFI_POP(rbp)
+       pop     rbx
+       CFI_POP(rbx)
+
+       ret_spec_stop
+       CFI_ENDPROC()
+
+
+.align 16
+.LK256:
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203
+
+/* shuffle xBxA -> 00BA */
+.L_SHUF_00BA:              .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
+
+/* shuffle xDxC -> DC00 */
+.L_SHUF_DC00:              .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S
new file mode 100644
index 000000000..60ad442c0
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S
@@ -0,0 +1,527 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+;   notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+;   notice, this list of conditions and the following disclaimer in the
+;   documentation and/or other materials provided with the
+;   distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+;   contributors may be used to endorse or promote products derived from
+;   this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This code is described in an Intel White-Paper:
+; "Fast SHA-256 Implementations on Intel Architecture Processors"
+;
+; To find it, surf to http://www.intel.com/p/en_US/embedded
+; and search for that title.
+; The paper is expected to be released roughly at the end of April, 2012
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 2 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ *  by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(USE_SHA256)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+#define        VMOVDQ vmovdqu /* ; assume buffers not aligned  */
+
+/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros */
+
+/*  addm [mem], reg */
+/*  Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+       add     p2, p1; \
+       mov     p1, p2;
+
+/* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
+
+#define X0 ymm4
+#define X1 ymm5
+#define X2 ymm6
+#define X3 ymm7
+
+/*  XMM versions of above */
+#define XWORD0 xmm4
+#define XWORD1 xmm5
+#define XWORD2 xmm6
+#define XWORD3 xmm7
+
+#define XTMP0 ymm0
+#define XTMP1 ymm1
+#define XTMP2 ymm2
+#define XTMP3 ymm3
+#define XTMP4 ymm8
+#define XFER ymm9
+#define XTMP5 ymm11
+
+#define SHUF_00BA ymm10 /*  shuffle xBxA -> 00BA */
+#define SHUF_DC00 ymm12 /*  shuffle xDxC -> DC00 */
+#define BYTE_FLIP_MASK ymm13
+
+#define X_BYTE_FLIP_MASK xmm13 /*  XMM version of BYTE_FLIP_MASK */
+
+#define NUM_BLKS rdx /*  3rd arg */
+#define CTX rsi      /*  2nd arg */
+#define INP rdi      /*  1st arg */
+#define c ecx
+#define d r8d
+#define e edx        /*  clobbers NUM_BLKS */
+#define y3 edi       /*  clobbers INP */
+
+#define TBL rbp
+#define SRND CTX     /*  SRND is same register as CTX */
+
+#define a eax
+#define b ebx
+#define f r9d
+#define g r10d
+#define h r11d
+#define old_h r11d
+
+#define T1 r12d
+#define y0 r13d
+#define y1 r14d
+#define y2 r15d
+
+
+#define _XFER_SIZE 2*64*4      /*  2 blocks, 64 rounds, 4 bytes/round */
+#define _XMM_SAVE_SIZE 0
+#define _INP_END_SIZE 8
+#define _INP_SIZE 8
+#define _CTX_SIZE 8
+#define _RSP_SIZE 8
+
+#define _XFER 0
+#define _XMM_SAVE  _XFER     + _XFER_SIZE
+#define _INP_END   _XMM_SAVE + _XMM_SAVE_SIZE
+#define _INP       _INP_END  + _INP_END_SIZE
+#define _CTX       _INP      + _INP_SIZE
+#define _RSP       _CTX      + _CTX_SIZE
+#define STACK_SIZE _RSP      + _RSP_SIZE
+
+#define ONE_ROUND_PART1(XFERIN, a, b, c, d, e, f, g, h) \
+       /* h += Sum1 (e) + Ch (e, f, g) + (k[t] + w[0]); */ \
+       /* d += h; */ \
+       /* h += Sum0 (a) + Maj (a, b, c); */ \
+       \
+       /* Ch(x, y, z) => ((x & y) + (~x & z)) */ \
+       /* Maj(x, y, z) => ((x & y) + (z & (x ^ y))) */ \
+       \
+       mov y3, e; \
+       add h, [XFERIN]; \
+       and y3, f; \
+       rorx y0, e, 25; \
+       rorx y1, e, 11; \
+       lea h, [h + y3]; \
+       andn y3, e, g; \
+       rorx T1, a, 13; \
+       xor y0, y1; \
+       lea h, [h + y3]
+
+#define ONE_ROUND_PART2(a, b, c, d, e, f, g, h) \
+       rorx y2, a, 22; \
+       rorx y1, e, 6; \
+       mov y3, a; \
+       xor T1, y2; \
+       xor y0, y1; \
+       xor y3, b; \
+       lea h, [h + y0]; \
+       mov y0, a; \
+       rorx y2, a, 2; \
+       add d, h; \
+       and y3, c; \
+       xor T1, y2; \
+       lea h, [h + y3]; \
+       lea h, [h + T1]; \
+       and y0, b; \
+       lea h, [h + y0]
+
+#define ONE_ROUND(XFER, a, b, c, d, e, f, g, h) \
+       ONE_ROUND_PART1(XFER, a, b, c, d, e, f, g, h); \
+       ONE_ROUND_PART2(a, b, c, d, e, f, g, h)
+
+#define FOUR_ROUNDS_AND_SCHED(XFERIN, XFEROUT, X0, X1, X2, X3, a, b, c, d, e, 
f, g, h) \
+       /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 
;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               vpalignr        XTMP0, X3, X2, 4        /*  XTMP0 = W[-7] */; \
+               vpaddd  XTMP0, XTMP0, X0        /*  XTMP0 = W[-7] + W[-16]; y1 
= (e >> 6); S1 */; \
+               vpalignr        XTMP1, X1, X0, 4        /*  XTMP1 = W[-15] */; \
+               vpsrld  XTMP2, XTMP1, 7; \
+               vpslld  XTMP3, XTMP1, (32-7); \
+               vpor    XTMP3, XTMP3, XTMP2     /*  XTMP3 = W[-15] ror 7 */; \
+               vpsrld  XTMP2, XTMP1,18; \
+       \
+       ONE_ROUND(0*4+XFERIN, a, b, c, d, e, f, g, h); \
+       \
+       /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 
;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               vpsrld  XTMP4, XTMP1, 3 /*  XTMP4 = W[-15] >> 3 */; \
+               vpslld  XTMP1, XTMP1, (32-18); \
+               vpxor   XTMP3, XTMP3, XTMP1; \
+               vpxor   XTMP3, XTMP3, XTMP2     /*  XTMP3 = W[-15] ror 7 ^ 
W[-15] ror 18 */; \
+               vpxor   XTMP1, XTMP3, XTMP4     /*  XTMP1 = s0 */; \
+               vpshufd XTMP2, X3, 0b11111010   /*  XTMP2 = W[-2] {BBAA} */; \
+               vpaddd  XTMP0, XTMP0, XTMP1     /*  XTMP0 = W[-16] + W[-7] + s0 
*/; \
+               vpsrld  XTMP4, XTMP2, 10        /*  XTMP4 = W[-2] >> 10 {BBAA} 
*/; \
+       \
+       ONE_ROUND(1*4+XFERIN, h, a, b, c, d, e, f, g); \
+       \
+       /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 
;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               vpsrlq  XTMP3, XTMP2, 19        /*  XTMP3 = W[-2] ror 19 {xBxA} 
*/; \
+               vpsrlq  XTMP2, XTMP2, 17        /*  XTMP2 = W[-2] ror 17 {xBxA} 
*/; \
+               vpxor   XTMP2, XTMP2, XTMP3; \
+               vpxor   XTMP4, XTMP4, XTMP2     /*  XTMP4 = s1 {xBxA} */; \
+               vpshufb XTMP4, XTMP4, SHUF_00BA /*  XTMP4 = s1 {00BA} */; \
+               vpaddd  XTMP0, XTMP0, XTMP4     /*  XTMP0 = {..., ..., W[1], 
W[0]} */; \
+               vpshufd XTMP2, XTMP0, 0b1010000 /*  XTMP2 = W[-2] {DDCC} */; \
+       \
+       ONE_ROUND(2*4+XFERIN, g, h, a, b, c, d, e, f); \
+       \
+       /* ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 
;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               vpsrld  XTMP5, XTMP2,   10      /*  XTMP5 = W[-2] >> 10 {DDCC} 
*/; \
+               vpsrlq  XTMP3, XTMP2, 19        /*  XTMP3 = W[-2] ror 19 {xDxC} 
*/; \
+               vpsrlq  XTMP2, XTMP2, 17        /*  XTMP2 = W[-2] ror 17 {xDxC} 
*/; \
+               vpxor   XTMP2, XTMP2, XTMP3; \
+               vpxor   XTMP5, XTMP5, XTMP2     /*  XTMP5 = s1 {xDxC} */; \
+               vpshufb XTMP5, XTMP5, SHUF_DC00 /*  XTMP5 = s1 {DC00} */; \
+               vpaddd  X0, XTMP5, XTMP0        /*  X0 = {W[3], W[2], W[1], 
W[0]} */; \
+               vpaddd  XFER, X0, [TBL + XFEROUT]; \
+       \
+       ONE_ROUND_PART1(3*4+XFERIN, f, g, h, a, b, c, d, e); \
+               vmovdqa [rsp + _XFER + XFEROUT], XFER; \
+       ONE_ROUND_PART2(f, g, h, a, b, c, d, e);
+
+#define DO_4ROUNDS(XFERIN, a, b, c, d, e, f, g, h) \
+       ONE_ROUND(0*4+XFERIN, a, b, c, d, e, f, g, h); \
+       ONE_ROUND(1*4+XFERIN, h, a, b, c, d, e, f, g); \
+       ONE_ROUND(2*4+XFERIN, g, h, a, b, c, d, e, f); \
+       ONE_ROUND(3*4+XFERIN, f, g, h, a, b, c, d, e)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; void sha256_rorx(void *input_data, UINT32 digest[8], UINT64 num_blks)
+;; arg 1 : pointer to input data
+;; arg 2 : pointer to digest
+;; arg 3 : Num blocks
+*/
+.text
+.globl _gcry_sha256_transform_amd64_avx2
+ELF(.type _gcry_sha256_transform_amd64_avx2,@function)
+.align 32
+_gcry_sha256_transform_amd64_avx2:
+       CFI_STARTPROC()
+       xor eax, eax
+
+       cmp rdx, 0
+       je .Lnowork
+
+       push    rbx
+       CFI_PUSH(rbx)
+       push    rbp
+       CFI_PUSH(rbp)
+       push    r12
+       CFI_PUSH(r12)
+       push    r13
+       CFI_PUSH(r13)
+       push    r14
+       CFI_PUSH(r14)
+       push    r15
+       CFI_PUSH(r15)
+
+       vzeroupper
+
+       vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+       vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+       vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+       mov     rax, rsp
+       CFI_DEF_CFA_REGISTER(rax);
+       sub     rsp, STACK_SIZE
+       and     rsp, ~63
+       mov     [rsp + _RSP], rax
+       CFI_CFA_ON_STACK(_RSP, 6 * 8)
+
+       shl     NUM_BLKS, 6     /*  convert to bytes */
+       lea     NUM_BLKS, [NUM_BLKS + INP - 64] /*  pointer to last block */
+       mov     [rsp + _INP_END], NUM_BLKS
+
+       /* Check if only one block of input. Note: Loading initial digest
+        * only uses 'mov' instruction and does not change condition
+        * flags. */
+       cmp     NUM_BLKS, INP
+
+       /* ; load initial digest */
+       mov     a,[4*0 + CTX]
+       mov     b,[4*1 + CTX]
+       mov     c,[4*2 + CTX]
+       mov     d,[4*3 + CTX]
+       mov     e,[4*4 + CTX]
+       mov     f,[4*5 + CTX]
+       mov     g,[4*6 + CTX]
+       mov     h,[4*7 + CTX]
+
+       mov     [rsp + _CTX], CTX
+
+       je      .Ldo_last_block
+
+.Loop0:
+       lea     TBL, [.LK256 ADD_RIP]
+
+       /* ; Load first 16 dwords from two blocks */
+       VMOVDQ  XTMP0, [INP + 0*32]
+       VMOVDQ  XTMP1, [INP + 1*32]
+       VMOVDQ  XTMP2, [INP + 2*32]
+       VMOVDQ  XTMP3, [INP + 3*32]
+
+       /* ; byte swap data */
+       vpshufb XTMP0, XTMP0, BYTE_FLIP_MASK
+       vpshufb XTMP1, XTMP1, BYTE_FLIP_MASK
+       vpshufb XTMP2, XTMP2, BYTE_FLIP_MASK
+       vpshufb XTMP3, XTMP3, BYTE_FLIP_MASK
+
+       /* ; transpose data into high/low halves */
+       vperm2i128      X0, XTMP0, XTMP2, 0x20
+       vperm2i128      X1, XTMP0, XTMP2, 0x31
+       vperm2i128      X2, XTMP1, XTMP3, 0x20
+       vperm2i128      X3, XTMP1, XTMP3, 0x31
+
+.Last_block_enter:
+       add     INP, 64
+       mov     [rsp + _INP], INP
+
+       /* ; schedule 48 input dwords, by doing 3 rounds of 12 each */
+       xor     SRND, SRND
+
+       vpaddd  XFER, X0, [TBL + 0*32]
+       vmovdqa [rsp + _XFER + 0*32], XFER
+       vpaddd  XFER, X1, [TBL + 1*32]
+       vmovdqa [rsp + _XFER + 1*32], XFER
+       vpaddd  XFER, X2, [TBL + 2*32]
+       vmovdqa [rsp + _XFER + 2*32], XFER
+       vpaddd  XFER, X3, [TBL + 3*32]
+       vmovdqa [rsp + _XFER + 3*32], XFER
+
+.align 16
+.Loop1:
+       FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 0*32, SRND + 4*32, X0, X1, 
X2, X3, a, b, c, d, e, f, g, h)
+       FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 1*32, SRND + 5*32, X1, X2, 
X3, X0, e, f, g, h, a, b, c, d)
+       FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 2*32, SRND + 6*32, X2, X3, 
X0, X1, a, b, c, d, e, f, g, h)
+       FOUR_ROUNDS_AND_SCHED(rsp + _XFER + SRND + 3*32, SRND + 7*32, X3, X0, 
X1, X2, e, f, g, h, a, b, c, d)
+
+       add     SRND, 4*32
+       cmp     SRND, 3 * 4*32
+       jb      .Loop1
+
+       /* ; Do last 16 rounds with no scheduling */
+       DO_4ROUNDS(rsp + _XFER + (3*4*32 + 0*32), a, b, c, d, e, f, g, h)
+       DO_4ROUNDS(rsp + _XFER + (3*4*32 + 1*32), e, f, g, h, a, b, c, d)
+       DO_4ROUNDS(rsp + _XFER + (3*4*32 + 2*32), a, b, c, d, e, f, g, h)
+       DO_4ROUNDS(rsp + _XFER + (3*4*32 + 3*32), e, f, g, h, a, b, c, d)
+
+       mov     CTX, [rsp + _CTX]
+       mov     INP, [rsp + _INP]
+
+       addm([4*0 + CTX],a)
+       addm([4*1 + CTX],b)
+       addm([4*2 + CTX],c)
+       addm([4*3 + CTX],d)
+       addm([4*4 + CTX],e)
+       addm([4*5 + CTX],f)
+       addm([4*6 + CTX],g)
+       addm([4*7 + CTX],h)
+
+       cmp     INP, [rsp + _INP_END]
+       ja      .Ldone_hash
+
+       /* ;;; Do second block using previously scheduled results */
+       xor     SRND, SRND
+.align 16
+.Loop3:
+       DO_4ROUNDS(rsp + _XFER + SRND + 0*32 + 16, a, b, c, d, e, f, g, h)
+       DO_4ROUNDS(rsp + _XFER + SRND + 1*32 + 16, e, f, g, h, a, b, c, d)
+       add     SRND, 2*32
+       cmp     SRND, 4 * 4*32
+       jb .Loop3
+
+       mov     CTX, [rsp + _CTX]
+       mov     INP, [rsp + _INP]
+       add     INP, 64
+
+       addm([4*0 + CTX],a)
+       addm([4*1 + CTX],b)
+       addm([4*2 + CTX],c)
+       addm([4*3 + CTX],d)
+       addm([4*4 + CTX],e)
+       addm([4*5 + CTX],f)
+       addm([4*6 + CTX],g)
+       addm([4*7 + CTX],h)
+
+       cmp     INP, [rsp + _INP_END]
+       jb      .Loop0
+       ja      .Ldone_hash
+
+.Ldo_last_block:
+       /* ;;; do last block */
+       lea     TBL, [.LK256 ADD_RIP]
+
+       VMOVDQ  XWORD0, [INP + 0*16]
+       VMOVDQ  XWORD1, [INP + 1*16]
+       VMOVDQ  XWORD2, [INP + 2*16]
+       VMOVDQ  XWORD3, [INP + 3*16]
+
+       vpshufb XWORD0, XWORD0, X_BYTE_FLIP_MASK
+       vpshufb XWORD1, XWORD1, X_BYTE_FLIP_MASK
+       vpshufb XWORD2, XWORD2, X_BYTE_FLIP_MASK
+       vpshufb XWORD3, XWORD3, X_BYTE_FLIP_MASK
+
+       jmp     .Last_block_enter
+
+.Lonly_one_block:
+
+       /* ; load initial digest */
+       mov     a,[4*0 + CTX]
+       mov     b,[4*1 + CTX]
+       mov     c,[4*2 + CTX]
+       mov     d,[4*3 + CTX]
+       mov     e,[4*4 + CTX]
+       mov     f,[4*5 + CTX]
+       mov     g,[4*6 + CTX]
+       mov     h,[4*7 + CTX]
+
+       vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+       vmovdqa SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+       vmovdqa SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+       mov     [rsp + _CTX], CTX
+       jmp     .Ldo_last_block
+
+.Ldone_hash:
+       vzeroall
+
+       /* burn stack */
+       vmovdqa [rsp + _XFER + 0 * 32], ymm0
+       vmovdqa [rsp + _XFER + 1 * 32], ymm0
+       vmovdqa [rsp + _XFER + 2 * 32], ymm0
+       vmovdqa [rsp + _XFER + 3 * 32], ymm0
+       vmovdqa [rsp + _XFER + 4 * 32], ymm0
+       vmovdqa [rsp + _XFER + 5 * 32], ymm0
+       vmovdqa [rsp + _XFER + 6 * 32], ymm0
+       vmovdqa [rsp + _XFER + 7 * 32], ymm0
+       vmovdqa [rsp + _XFER + 8 * 32], ymm0
+       vmovdqa [rsp + _XFER + 9 * 32], ymm0
+       vmovdqa [rsp + _XFER + 10 * 32], ymm0
+       vmovdqa [rsp + _XFER + 11 * 32], ymm0
+       vmovdqa [rsp + _XFER + 12 * 32], ymm0
+       vmovdqa [rsp + _XFER + 13 * 32], ymm0
+       vmovdqa [rsp + _XFER + 14 * 32], ymm0
+       vmovdqa [rsp + _XFER + 15 * 32], ymm0
+       xor     eax, eax
+
+       mov     rsp, [rsp + _RSP]
+       CFI_DEF_CFA_REGISTER(rsp)
+
+       pop     r15
+       CFI_POP(r15)
+       pop     r14
+       CFI_POP(r14)
+       pop     r13
+       CFI_POP(r13)
+       pop     r12
+       CFI_POP(r12)
+       pop     rbp
+       CFI_POP(rbp)
+       pop     rbx
+       CFI_POP(rbx)
+
+.Lnowork:
+       ret_spec_stop
+       CFI_ENDPROC()
+
+.align 64
+.LK256:
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.LPSHUFFLE_BYTE_FLIP_MASK:
+       .octa 
0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203
+
+/*  shuffle xBxA -> 00BA */
+.L_SHUF_00BA:
+       .octa 
0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100
+
+/*  shuffle xDxC -> DC00 */
+.L_SHUF_DC00:
+       .octa 
0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha256-intel-shaext.c 
b/grub-core/lib/libgcrypt/cipher/sha256-intel-shaext.c
new file mode 100644
index 000000000..48c09eefe
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha256-intel-shaext.c
@@ -0,0 +1,363 @@
+/* sha256-intel-shaext.S - SHAEXT accelerated SHA-256 transform function
+ * Copyright (C) 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "types.h"
+
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+    defined(HAVE_GCC_INLINE_ASM_SSE41) && defined(USE_SHA256) && \
+    defined(ENABLE_SHAEXT_SUPPORT)
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE instructions between asm blocks. */
+#  pragma GCC target("no-sse")
+#endif
+#if __clang__
+#  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = 
function)
+#endif
+
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION
+
+/* Two macros to be called prior and after the use of SHA-EXT
+   instructions.  There should be no external function calls between
+   the use of these macros.  There purpose is to make sure that the
+   SSE regsiters are cleared and won't reveal any information about
+   the key or the data.  */
+#ifdef __WIN64__
+/* XMM6-XMM15 are callee-saved registers on WIN64. */
+# define shaext_prepare_variable char win64tmp[2*16]
+# define shaext_prepare_variable_size sizeof(win64tmp)
+# define shaext_prepare()                                               \
+   do { asm volatile ("movdqu %%xmm6, (%0)\n"                           \
+                      "movdqu %%xmm7, (%1)\n"                           \
+                      :                                                 \
+                      : "r" (&win64tmp[0]), "r" (&win64tmp[16])         \
+                      : "memory");                                      \
+   } while (0)
+# define shaext_cleanup(tmp0,tmp1)                                      \
+   do { asm volatile ("movdqu (%0), %%xmm6\n"                           \
+                      "movdqu (%1), %%xmm7\n"                           \
+                      "pxor %%xmm0, %%xmm0\n"                           \
+                      "pxor %%xmm1, %%xmm1\n"                           \
+                      "pxor %%xmm2, %%xmm2\n"                           \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      "movdqa %%xmm0, (%2)\n\t"                         \
+                      "movdqa %%xmm0, (%3)\n\t"                         \
+                      :                                                 \
+                      : "r" (&win64tmp[0]), "r" (&win64tmp[16]),        \
+                        "r" (tmp0), "r" (tmp1)                          \
+                      : "memory");                                      \
+   } while (0)
+#else
+# define shaext_prepare_variable
+# define shaext_prepare_variable_size 0
+# define shaext_prepare() do { } while (0)
+# define shaext_cleanup(tmp0,tmp1)                                      \
+   do { asm volatile ("pxor %%xmm0, %%xmm0\n"                           \
+                      "pxor %%xmm1, %%xmm1\n"                           \
+                      "pxor %%xmm2, %%xmm2\n"                           \
+                      "pxor %%xmm3, %%xmm3\n"                           \
+                      "pxor %%xmm4, %%xmm4\n"                           \
+                      "pxor %%xmm5, %%xmm5\n"                           \
+                      "pxor %%xmm6, %%xmm6\n"                           \
+                      "pxor %%xmm7, %%xmm7\n"                           \
+                      "movdqa %%xmm0, (%0)\n\t"                         \
+                      "movdqa %%xmm0, (%1)\n\t"                         \
+                      :                                                 \
+                      : "r" (tmp0), "r" (tmp1)                          \
+                      : "memory");                                      \
+   } while (0)
+#endif
+
+typedef struct u128_s
+{
+  u32 a, b, c, d;
+} u128_t;
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ */
+unsigned int ASM_FUNC_ATTR
+_gcry_sha256_transform_intel_shaext(u32 state[8], const unsigned char *data,
+                                    size_t nblks)
+{
+  static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
+    { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+  static const u128_t K[16] __attribute__ ((aligned (16))) =
+  {
+    { 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 },
+    { 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 },
+    { 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 },
+    { 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 },
+    { 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc },
+    { 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da },
+    { 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 },
+    { 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 },
+    { 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 },
+    { 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 },
+    { 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 },
+    { 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 },
+    { 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 },
+    { 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 },
+    { 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 },
+    { 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 }
+  };
+  char save_buf[2 * 16 + 15];
+  char *abef_save;
+  char *cdgh_save;
+  shaext_prepare_variable;
+
+  if (nblks == 0)
+    return 0;
+
+  shaext_prepare ();
+
+  asm volatile ("" : "=r" (abef_save) : "0" (save_buf) : "memory");
+  abef_save = abef_save + (-(uintptr_t)abef_save & 15);
+  cdgh_save = abef_save + 16;
+
+  /* byteswap mask => XMM7 */
+  asm volatile ("movdqa %[mask], %%xmm7\n\t" /* Preload mask */
+                :
+                : [mask] "m" (*bshuf_mask)
+                : "memory");
+
+  /* Load state.. ABEF_SAVE => STATE0 XMM1, CDGH_STATE => STATE1 XMM2 */
+  asm volatile ("movups 16(%[state]), %%xmm1\n\t" /* HGFE (xmm=EFGH) */
+                "movups  0(%[state]), %%xmm0\n\t" /* DCBA (xmm=ABCD) */
+                "movaps %%xmm1, %%xmm2\n\t"
+                "shufps $0x11, %%xmm0, %%xmm1\n\t" /* ABEF (xmm=FEBA) */
+                "shufps $0xbb, %%xmm0, %%xmm2\n\t" /* CDGH (xmm=HGDC) */
+                :
+                : [state] "r" (state)
+                : "memory" );
+
+  /* Load message */
+  asm volatile ("movdqu 0*16(%[data]), %%xmm3\n\t"
+                "movdqu 1*16(%[data]), %%xmm4\n\t"
+                "movdqu 2*16(%[data]), %%xmm5\n\t"
+                "movdqu 3*16(%[data]), %%xmm6\n\t"
+                "pshufb %%xmm7, %%xmm3\n\t"
+                "pshufb %%xmm7, %%xmm4\n\t"
+                "pshufb %%xmm7, %%xmm5\n\t"
+                "pshufb %%xmm7, %%xmm6\n\t"
+                :
+                : [data] "r" (data)
+                : "memory" );
+  data += 64;
+
+  do
+    {
+      /* Save state */
+      asm volatile ("movdqa %%xmm1, (%[abef_save])\n\t"
+                    "movdqa %%xmm2, (%[cdgh_save])\n\t"
+                    :
+                    : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save)
+                    : "memory" );
+
+      /* Round 0..3 */
+      asm volatile ("movdqa %%xmm3, %%xmm0\n\t"
+                      "paddd %[constants], %%xmm0\n\t"
+                      "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                      "psrldq $8, %%xmm0\n\t"
+                      "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                    :
+                    : [constants] "m" (K[0].a)
+                    : "memory" );
+
+      /* Round 4..7 */
+      asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+                      "paddd %[constants], %%xmm0\n\t"
+                      "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                      "psrldq $8, %%xmm0\n\t"
+                      "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                    "sha256msg1 %%xmm4, %%xmm3\n\t"
+                    :
+                    : [constants] "m" (K[1].a)
+                    : "memory" );
+
+      /* Round 8..11 */
+      asm volatile ("movdqa %%xmm5, %%xmm0\n\t"
+                      "paddd %[constants], %%xmm0\n\t"
+                      "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                      "psrldq $8, %%xmm0\n\t"
+                      "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                    "sha256msg1 %%xmm5, %%xmm4\n\t"
+                    :
+                    : [constants] "m" (K[2].a)
+                    : "memory" );
+
+#define ROUND(k, MSG0, MSG1, MSG2, MSG3) \
+      asm volatile ("movdqa %%"MSG0", %%xmm0\n\t" \
+                      "paddd %[constants], %%xmm0\n\t" \
+                      "sha256rnds2 %%xmm1, %%xmm2\n\t" \
+                    "movdqa %%"MSG0", %%xmm7\n\t" \
+                    "palignr $4, %%"MSG3", %%xmm7\n\t" \
+                    "paddd %%xmm7, %%"MSG1"\n\t" \
+                    "sha256msg2 %%"MSG0", %%"MSG1"\n\t" \
+                      "psrldq $8, %%xmm0\n\t" \
+                      "sha256rnds2 %%xmm2, %%xmm1\n\t" \
+                    "sha256msg1 %%"MSG0", %%"MSG3"\n\t" \
+                    : \
+                    : [constants] "m" (K[k].a) \
+                    : "memory" )
+
+      /* Rounds 12..15 to 48..51 */
+      ROUND(3, "xmm6", "xmm3", "xmm4", "xmm5");
+      ROUND(4, "xmm3", "xmm4", "xmm5", "xmm6");
+      ROUND(5, "xmm4", "xmm5", "xmm6", "xmm3");
+      ROUND(6, "xmm5", "xmm6", "xmm3", "xmm4");
+      ROUND(7, "xmm6", "xmm3", "xmm4", "xmm5");
+      ROUND(8, "xmm3", "xmm4", "xmm5", "xmm6");
+      ROUND(9, "xmm4", "xmm5", "xmm6", "xmm3");
+      ROUND(10, "xmm5", "xmm6", "xmm3", "xmm4");
+      ROUND(11, "xmm6", "xmm3", "xmm4", "xmm5");
+      ROUND(12, "xmm3", "xmm4", "xmm5", "xmm6");
+
+      if (--nblks == 0)
+        break;
+
+      /* Round 52..55 */
+      asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+                      "paddd %[constants], %%xmm0\n\t"
+                      "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                    "movdqa %%xmm4, %%xmm7\n\t"
+                    "palignr $4, %%xmm3, %%xmm7\n\t"
+                    "movdqu 0*16(%[data]), %%xmm3\n\t"
+                    "paddd %%xmm7, %%xmm5\n\t"
+                    "sha256msg2 %%xmm4, %%xmm5\n\t"
+                      "psrldq $8, %%xmm0\n\t"
+                      "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                    :
+                    : [constants] "m" (K[13].a), [data] "r" (data)
+                    : "memory" );
+
+      /* Round 56..59 */
+      asm volatile ("movdqa %%xmm5, %%xmm0\n\t"
+                      "paddd %[constants], %%xmm0\n\t"
+                      "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                    "movdqa %%xmm5, %%xmm7\n\t"
+                    "palignr $4, %%xmm4, %%xmm7\n\t"
+                    "movdqu 1*16(%[data]), %%xmm4\n\t"
+                    "paddd %%xmm7, %%xmm6\n\t"
+                    "movdqa %[mask], %%xmm7\n\t" /* Reload mask */
+                    "sha256msg2 %%xmm5, %%xmm6\n\t"
+                    "movdqu 2*16(%[data]), %%xmm5\n\t"
+                      "psrldq $8, %%xmm0\n\t"
+                      "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                    :
+                    : [constants] "m" (K[14].a), [mask] "m" (*bshuf_mask),
+                      [data] "r" (data)
+                    : "memory" );
+
+      /* Round 60..63 */
+      asm volatile ("movdqa %%xmm6, %%xmm0\n\t"
+                    "pshufb %%xmm7, %%xmm3\n\t"
+                    "movdqu 3*16(%[data]), %%xmm6\n\t"
+                      "paddd %[constants], %%xmm0\n\t"
+                    "pshufb %%xmm7, %%xmm4\n\t"
+                      "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                      "psrldq $8, %%xmm0\n\t"
+                    "pshufb %%xmm7, %%xmm5\n\t"
+                      "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                    :
+                    : [constants] "m" (K[15].a), [data] "r" (data)
+                    : "memory" );
+      data += 64;
+
+      /* Merge states */
+      asm volatile ("paddd (%[abef_save]), %%xmm1\n\t"
+                    "paddd (%[cdgh_save]), %%xmm2\n\t"
+                    "pshufb %%xmm7, %%xmm6\n\t"
+                    :
+                    : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save)
+                    : "memory" );
+    }
+  while (1);
+
+  /* Round 52..55 */
+  asm volatile ("movdqa %%xmm4, %%xmm0\n\t"
+                  "paddd %[constants], %%xmm0\n\t"
+                  "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                "movdqa %%xmm4, %%xmm7\n\t"
+                "palignr $4, %%xmm3, %%xmm7\n\t"
+                "paddd %%xmm7, %%xmm5\n\t"
+                "sha256msg2 %%xmm4, %%xmm5\n\t"
+                  "psrldq $8, %%xmm0\n\t"
+                  "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                :
+                : [constants] "m" (K[13].a)
+                : "memory" );
+
+  /* Round 56..59 */
+  asm volatile ("movdqa %%xmm5, %%xmm0\n\t"
+                  "paddd %[constants], %%xmm0\n\t"
+                  "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                "movdqa %%xmm5, %%xmm7\n\t"
+                "palignr $4, %%xmm4, %%xmm7\n\t"
+                "paddd %%xmm7, %%xmm6\n\t"
+                "movdqa %[mask], %%xmm7\n\t" /* Reload mask */
+                "sha256msg2 %%xmm5, %%xmm6\n\t"
+                  "psrldq $8, %%xmm0\n\t"
+                  "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                :
+                : [constants] "m" (K[14].a), [mask] "m" (*bshuf_mask)
+                : "memory" );
+
+  /* Round 60..63 */
+  asm volatile ("movdqa %%xmm6, %%xmm0\n\t"
+                  "paddd %[constants], %%xmm0\n\t"
+                  "sha256rnds2 %%xmm1, %%xmm2\n\t"
+                  "psrldq $8, %%xmm0\n\t"
+                  "sha256rnds2 %%xmm2, %%xmm1\n\t"
+                :
+                : [constants] "m" (K[15].a)
+                : "memory" );
+
+  /* Merge states */
+  asm volatile ("paddd (%[abef_save]), %%xmm1\n\t"
+                "paddd (%[cdgh_save]), %%xmm2\n\t"
+                :
+                : [abef_save] "r" (abef_save), [cdgh_save] "r" (cdgh_save)
+                : "memory" );
+
+  /* Save state (XMM1=FEBA, XMM2=HGDC) */
+  asm volatile ("movaps %%xmm1, %%xmm0\n\t"
+                "shufps $0x11, %%xmm2, %%xmm1\n\t" /* xmm=ABCD */
+                "shufps $0xbb, %%xmm2, %%xmm0\n\t" /* xmm=EFGH */
+                "movups %%xmm1, 16(%[state])\n\t"
+                "movups %%xmm0,  0(%[state])\n\t"
+                :
+                : [state] "r" (state)
+                : "memory" );
+
+  shaext_cleanup (abef_save, cdgh_save);
+  return 0;
+}
+
+#if __clang__
+#  pragma clang attribute pop
+#endif
+
+#endif /* HAVE_GCC_INLINE_ASM_SHA_EXT */
diff --git a/grub-core/lib/libgcrypt/cipher/sha256-ppc.c 
b/grub-core/lib/libgcrypt/cipher/sha256-ppc.c
new file mode 100644
index 000000000..a9b59714d
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha256-ppc.c
@@ -0,0 +1,795 @@
+/* sha256-ppc.c - PowerPC vcrypto implementation of SHA-256 transform
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+    defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+    defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+    defined(USE_SHA256) && \
+    __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned int vector4x_u32;
+typedef vector unsigned long long vector2x_u64;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static const u32 K[64] =
+  {
+#define TBL(v) v
+    TBL(0x428a2f98), TBL(0x71374491), TBL(0xb5c0fbcf), TBL(0xe9b5dba5),
+    TBL(0x3956c25b), TBL(0x59f111f1), TBL(0x923f82a4), TBL(0xab1c5ed5),
+    TBL(0xd807aa98), TBL(0x12835b01), TBL(0x243185be), TBL(0x550c7dc3),
+    TBL(0x72be5d74), TBL(0x80deb1fe), TBL(0x9bdc06a7), TBL(0xc19bf174),
+    TBL(0xe49b69c1), TBL(0xefbe4786), TBL(0x0fc19dc6), TBL(0x240ca1cc),
+    TBL(0x2de92c6f), TBL(0x4a7484aa), TBL(0x5cb0a9dc), TBL(0x76f988da),
+    TBL(0x983e5152), TBL(0xa831c66d), TBL(0xb00327c8), TBL(0xbf597fc7),
+    TBL(0xc6e00bf3), TBL(0xd5a79147), TBL(0x06ca6351), TBL(0x14292967),
+    TBL(0x27b70a85), TBL(0x2e1b2138), TBL(0x4d2c6dfc), TBL(0x53380d13),
+    TBL(0x650a7354), TBL(0x766a0abb), TBL(0x81c2c92e), TBL(0x92722c85),
+    TBL(0xa2bfe8a1), TBL(0xa81a664b), TBL(0xc24b8b70), TBL(0xc76c51a3),
+    TBL(0xd192e819), TBL(0xd6990624), TBL(0xf40e3585), TBL(0x106aa070),
+    TBL(0x19a4c116), TBL(0x1e376c08), TBL(0x2748774c), TBL(0x34b0bcb5),
+    TBL(0x391c0cb3), TBL(0x4ed8aa4a), TBL(0x5b9cca4f), TBL(0x682e6ff3),
+    TBL(0x748f82ee), TBL(0x78a5636f), TBL(0x84c87814), TBL(0x8cc70208),
+    TBL(0x90befffa), TBL(0xa4506ceb), TBL(0xbef9a3f7), TBL(0xc67178f2)
+#undef TBL
+  };
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_rol_elems(vector4x_u32 v, unsigned int idx)
+{
+#ifndef WORDS_BIGENDIAN
+  return vec_sld (v, v, (16 - (4 * idx)) & 15);
+#else
+  return vec_sld (v, v, (4 * idx) & 15);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_merge_idx0_elems(vector4x_u32 v0, vector4x_u32 v1,
+                    vector4x_u32 v2, vector4x_u32 v3)
+{
+  return (vector4x_u32)vec_mergeh ((vector2x_u64) vec_mergeh(v0, v1),
+                                  (vector2x_u64) vec_mergeh(v2, v3));
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_ror_u32(vector4x_u32 v, unsigned int shift)
+{
+  return (v >> (shift & 31)) ^ (v << ((32 - shift) & 31));
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector4x_u32
+vec_vshasigma_u32(vector4x_u32 v, unsigned int a, unsigned int b)
+{
+  asm ("vshasigmaw %0,%1,%2,%3"
+       : "=v" (v)
+       : "v" (v), "g" (a), "g" (b)
+       : "memory");
+  return v;
+}
+
+
+/* SHA2 round in vector registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do                             \
+    {                                                         \
+      t1  = (h);                                              \
+      t1 += ((k) + (w));                                      \
+      t1 += Cho((e),(f),(g));                                 \
+      t1 += Sum1((e));                                        \
+      t2  = Sum0((a));                                        \
+      t2 += Maj((a),(b),(c));                                 \
+      d  += t1;                                               \
+      h   = t1 + t2;                                          \
+    } while (0)
+
+#define Cho(b, c, d)  (vec_sel(d, c, b))
+
+#define Maj(c, d, b)  (vec_sel(c, b, c ^ d))
+
+#define Sum0(x)       (vec_vshasigma_u32(x, 1, 0))
+
+#define Sum1(x)       (vec_vshasigma_u32(x, 1, 15))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3))
+#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10))
+
+#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
+#define W(i) ({ w[i&0x0f] +=    w[(i-7) &0x0f];  \
+               w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+               w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+               w[i&0x0f]; })
+
+#define I2(i) ( w2[i] = buf_get_be32(64 + data + i * 4), I(i) )
+#define W2(i) ({ w2[i]  = w2[i-7];       \
+                w2[i] += S1(w2[i-2]);   \
+                w2[i] += S0(w2[i-15]);  \
+                w2[i] += w2[i-16];      \
+                W(i); })
+#define R2(i) ( w2[i] )
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha256_transform_ppc8(u32 state[8], const unsigned char *data,
+                           size_t nblks)
+{
+  /* GPRs used for message expansion as vector intrinsics based generates
+   * slower code. */
+  vector4x_u32 h0, h1, h2, h3, h4, h5, h6, h7;
+  vector4x_u32 h0_h3, h4_h7;
+  vector4x_u32 a, b, c, d, e, f, g, h, t1, t2;
+  u32 w[16];
+  u32 w2[64];
+
+  h0_h3 = vec_vsx_ld (4 * 0, state);
+  h4_h7 = vec_vsx_ld (4 * 4, state);
+
+  h0 = h0_h3;
+  h1 = vec_rol_elems (h0_h3, 1);
+  h2 = vec_rol_elems (h0_h3, 2);
+  h3 = vec_rol_elems (h0_h3, 3);
+  h4 = h4_h7;
+  h5 = vec_rol_elems (h4_h7, 1);
+  h6 = vec_rol_elems (h4_h7, 2);
+  h7 = vec_rol_elems (h4_h7, 3);
+
+  while (nblks >= 2)
+    {
+      a = h0;
+      b = h1;
+      c = h2;
+      d = h3;
+      e = h4;
+      f = h5;
+      g = h6;
+      h = h7;
+
+      R(a, b, c, d, e, f, g, h, K[0], I2(0));
+      R(h, a, b, c, d, e, f, g, K[1], I2(1));
+      R(g, h, a, b, c, d, e, f, K[2], I2(2));
+      R(f, g, h, a, b, c, d, e, K[3], I2(3));
+      R(e, f, g, h, a, b, c, d, K[4], I2(4));
+      R(d, e, f, g, h, a, b, c, K[5], I2(5));
+      R(c, d, e, f, g, h, a, b, K[6], I2(6));
+      R(b, c, d, e, f, g, h, a, K[7], I2(7));
+      R(a, b, c, d, e, f, g, h, K[8], I2(8));
+      R(h, a, b, c, d, e, f, g, K[9], I2(9));
+      R(g, h, a, b, c, d, e, f, K[10], I2(10));
+      R(f, g, h, a, b, c, d, e, K[11], I2(11));
+      R(e, f, g, h, a, b, c, d, K[12], I2(12));
+      R(d, e, f, g, h, a, b, c, K[13], I2(13));
+      R(c, d, e, f, g, h, a, b, K[14], I2(14));
+      R(b, c, d, e, f, g, h, a, K[15], I2(15));
+      data += 64 * 2;
+
+      R(a, b, c, d, e, f, g, h, K[16], W2(16));
+      R(h, a, b, c, d, e, f, g, K[17], W2(17));
+      R(g, h, a, b, c, d, e, f, K[18], W2(18));
+      R(f, g, h, a, b, c, d, e, K[19], W2(19));
+      R(e, f, g, h, a, b, c, d, K[20], W2(20));
+      R(d, e, f, g, h, a, b, c, K[21], W2(21));
+      R(c, d, e, f, g, h, a, b, K[22], W2(22));
+      R(b, c, d, e, f, g, h, a, K[23], W2(23));
+      R(a, b, c, d, e, f, g, h, K[24], W2(24));
+      R(h, a, b, c, d, e, f, g, K[25], W2(25));
+      R(g, h, a, b, c, d, e, f, K[26], W2(26));
+      R(f, g, h, a, b, c, d, e, K[27], W2(27));
+      R(e, f, g, h, a, b, c, d, K[28], W2(28));
+      R(d, e, f, g, h, a, b, c, K[29], W2(29));
+      R(c, d, e, f, g, h, a, b, K[30], W2(30));
+      R(b, c, d, e, f, g, h, a, K[31], W2(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W2(32));
+      R(h, a, b, c, d, e, f, g, K[33], W2(33));
+      R(g, h, a, b, c, d, e, f, K[34], W2(34));
+      R(f, g, h, a, b, c, d, e, K[35], W2(35));
+      R(e, f, g, h, a, b, c, d, K[36], W2(36));
+      R(d, e, f, g, h, a, b, c, K[37], W2(37));
+      R(c, d, e, f, g, h, a, b, K[38], W2(38));
+      R(b, c, d, e, f, g, h, a, K[39], W2(39));
+      R(a, b, c, d, e, f, g, h, K[40], W2(40));
+      R(h, a, b, c, d, e, f, g, K[41], W2(41));
+      R(g, h, a, b, c, d, e, f, K[42], W2(42));
+      R(f, g, h, a, b, c, d, e, K[43], W2(43));
+      R(e, f, g, h, a, b, c, d, K[44], W2(44));
+      R(d, e, f, g, h, a, b, c, K[45], W2(45));
+      R(c, d, e, f, g, h, a, b, K[46], W2(46));
+      R(b, c, d, e, f, g, h, a, K[47], W2(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W2(48));
+      R(h, a, b, c, d, e, f, g, K[49], W2(49));
+      R(g, h, a, b, c, d, e, f, K[50], W2(50));
+      R(f, g, h, a, b, c, d, e, K[51], W2(51));
+      R(e, f, g, h, a, b, c, d, K[52], W2(52));
+      R(d, e, f, g, h, a, b, c, K[53], W2(53));
+      R(c, d, e, f, g, h, a, b, K[54], W2(54));
+      R(b, c, d, e, f, g, h, a, K[55], W2(55));
+      R(a, b, c, d, e, f, g, h, K[56], W2(56));
+      R(h, a, b, c, d, e, f, g, K[57], W2(57));
+      R(g, h, a, b, c, d, e, f, K[58], W2(58));
+      R(f, g, h, a, b, c, d, e, K[59], W2(59));
+      R(e, f, g, h, a, b, c, d, K[60], W2(60));
+      R(d, e, f, g, h, a, b, c, K[61], W2(61));
+      R(c, d, e, f, g, h, a, b, K[62], W2(62));
+      R(b, c, d, e, f, g, h, a, K[63], W2(63));
+
+      h0 += a;
+      h1 += b;
+      h2 += c;
+      h3 += d;
+      h4 += e;
+      h5 += f;
+      h6 += g;
+      h7 += h;
+
+      a = h0;
+      b = h1;
+      c = h2;
+      d = h3;
+      e = h4;
+      f = h5;
+      g = h6;
+      h = h7;
+
+      R(a, b, c, d, e, f, g, h, K[0], R2(0));
+      R(h, a, b, c, d, e, f, g, K[1], R2(1));
+      R(g, h, a, b, c, d, e, f, K[2], R2(2));
+      R(f, g, h, a, b, c, d, e, K[3], R2(3));
+      R(e, f, g, h, a, b, c, d, K[4], R2(4));
+      R(d, e, f, g, h, a, b, c, K[5], R2(5));
+      R(c, d, e, f, g, h, a, b, K[6], R2(6));
+      R(b, c, d, e, f, g, h, a, K[7], R2(7));
+      R(a, b, c, d, e, f, g, h, K[8], R2(8));
+      R(h, a, b, c, d, e, f, g, K[9], R2(9));
+      R(g, h, a, b, c, d, e, f, K[10], R2(10));
+      R(f, g, h, a, b, c, d, e, K[11], R2(11));
+      R(e, f, g, h, a, b, c, d, K[12], R2(12));
+      R(d, e, f, g, h, a, b, c, K[13], R2(13));
+      R(c, d, e, f, g, h, a, b, K[14], R2(14));
+      R(b, c, d, e, f, g, h, a, K[15], R2(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], R2(16));
+      R(h, a, b, c, d, e, f, g, K[17], R2(17));
+      R(g, h, a, b, c, d, e, f, K[18], R2(18));
+      R(f, g, h, a, b, c, d, e, K[19], R2(19));
+      R(e, f, g, h, a, b, c, d, K[20], R2(20));
+      R(d, e, f, g, h, a, b, c, K[21], R2(21));
+      R(c, d, e, f, g, h, a, b, K[22], R2(22));
+      R(b, c, d, e, f, g, h, a, K[23], R2(23));
+      R(a, b, c, d, e, f, g, h, K[24], R2(24));
+      R(h, a, b, c, d, e, f, g, K[25], R2(25));
+      R(g, h, a, b, c, d, e, f, K[26], R2(26));
+      R(f, g, h, a, b, c, d, e, K[27], R2(27));
+      R(e, f, g, h, a, b, c, d, K[28], R2(28));
+      R(d, e, f, g, h, a, b, c, K[29], R2(29));
+      R(c, d, e, f, g, h, a, b, K[30], R2(30));
+      R(b, c, d, e, f, g, h, a, K[31], R2(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], R2(32));
+      R(h, a, b, c, d, e, f, g, K[33], R2(33));
+      R(g, h, a, b, c, d, e, f, K[34], R2(34));
+      R(f, g, h, a, b, c, d, e, K[35], R2(35));
+      R(e, f, g, h, a, b, c, d, K[36], R2(36));
+      R(d, e, f, g, h, a, b, c, K[37], R2(37));
+      R(c, d, e, f, g, h, a, b, K[38], R2(38));
+      R(b, c, d, e, f, g, h, a, K[39], R2(39));
+      R(a, b, c, d, e, f, g, h, K[40], R2(40));
+      R(h, a, b, c, d, e, f, g, K[41], R2(41));
+      R(g, h, a, b, c, d, e, f, K[42], R2(42));
+      R(f, g, h, a, b, c, d, e, K[43], R2(43));
+      R(e, f, g, h, a, b, c, d, K[44], R2(44));
+      R(d, e, f, g, h, a, b, c, K[45], R2(45));
+      R(c, d, e, f, g, h, a, b, K[46], R2(46));
+      R(b, c, d, e, f, g, h, a, K[47], R2(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], R2(48));
+      R(h, a, b, c, d, e, f, g, K[49], R2(49));
+      R(g, h, a, b, c, d, e, f, K[50], R2(50));
+      R(f, g, h, a, b, c, d, e, K[51], R2(51));
+      R(e, f, g, h, a, b, c, d, K[52], R2(52));
+      R(d, e, f, g, h, a, b, c, K[53], R2(53));
+      R(c, d, e, f, g, h, a, b, K[54], R2(54));
+      R(b, c, d, e, f, g, h, a, K[55], R2(55));
+      R(a, b, c, d, e, f, g, h, K[56], R2(56));
+      R(h, a, b, c, d, e, f, g, K[57], R2(57));
+      R(g, h, a, b, c, d, e, f, K[58], R2(58));
+      R(f, g, h, a, b, c, d, e, K[59], R2(59));
+      R(e, f, g, h, a, b, c, d, K[60], R2(60));
+      R(d, e, f, g, h, a, b, c, K[61], R2(61));
+      R(c, d, e, f, g, h, a, b, K[62], R2(62));
+      R(b, c, d, e, f, g, h, a, K[63], R2(63));
+
+      h0 += a;
+      h1 += b;
+      h2 += c;
+      h3 += d;
+      h4 += e;
+      h5 += f;
+      h6 += g;
+      h7 += h;
+
+      nblks -= 2;
+    }
+
+  while (nblks)
+    {
+      a = h0;
+      b = h1;
+      c = h2;
+      d = h3;
+      e = h4;
+      f = h5;
+      g = h6;
+      h = h7;
+
+      R(a, b, c, d, e, f, g, h, K[0], I(0));
+      R(h, a, b, c, d, e, f, g, K[1], I(1));
+      R(g, h, a, b, c, d, e, f, K[2], I(2));
+      R(f, g, h, a, b, c, d, e, K[3], I(3));
+      R(e, f, g, h, a, b, c, d, K[4], I(4));
+      R(d, e, f, g, h, a, b, c, K[5], I(5));
+      R(c, d, e, f, g, h, a, b, K[6], I(6));
+      R(b, c, d, e, f, g, h, a, K[7], I(7));
+      R(a, b, c, d, e, f, g, h, K[8], I(8));
+      R(h, a, b, c, d, e, f, g, K[9], I(9));
+      R(g, h, a, b, c, d, e, f, K[10], I(10));
+      R(f, g, h, a, b, c, d, e, K[11], I(11));
+      R(e, f, g, h, a, b, c, d, K[12], I(12));
+      R(d, e, f, g, h, a, b, c, K[13], I(13));
+      R(c, d, e, f, g, h, a, b, K[14], I(14));
+      R(b, c, d, e, f, g, h, a, K[15], I(15));
+      data += 64;
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      h0 += a;
+      h1 += b;
+      h2 += c;
+      h3 += d;
+      h4 += e;
+      h5 += f;
+      h6 += g;
+      h7 += h;
+
+      nblks--;
+    }
+
+  h0_h3 = vec_merge_idx0_elems (h0, h1, h2, h3);
+  h4_h7 = vec_merge_idx0_elems (h4, h5, h6, h7);
+  vec_vsx_st (h0_h3, 4 * 0, state);
+  vec_vsx_st (h4_h7, 4 * 4, state);
+
+  return sizeof(w2) + sizeof(w);
+}
+#undef R
+#undef Cho
+#undef Maj
+#undef Sum0
+#undef Sum1
+#undef S0
+#undef S1
+#undef I
+#undef W
+#undef I2
+#undef W2
+#undef R2
+
+
+/* SHA2 round in general purpose registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do                                 \
+          {                                                       \
+            t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + ((k) + (w));\
+            t2 = Sum0((a)) + Maj((a),(b),(c));                    \
+            d += t1;                                              \
+            h  = t1 + t2;                                         \
+          } while (0)
+
+#define Cho(x, y, z)  ((x & y) + (~x & z))
+
+#define Maj(z, x, y)  ((x & y) + (z & (x ^ y)))
+
+#define Sum0(x)       (ror (x, 2) ^ ror (x ^ ror (x, 22-13), 13))
+
+#define Sum1(x)       (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3))
+#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10))
+
+#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
+#define WN(i) ({ w[i&0x0f] +=    w[(i-7) &0x0f];  \
+                w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+                w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+                w[i&0x0f]; })
+#define W(i) ({ u32 r = w[i&0x0f]; WN(i); r; })
+#define L(i) w[i&0x0f]
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha256_transform_ppc9(u32 state[8], const unsigned char *data,
+                           size_t nblks)
+{
+  /* GPRs used for round function and message expansion as vector intrinsics
+   * based generates slower code for POWER9. */
+  u32 a, b, c, d, e, f, g, h, t1, t2;
+  u32 w[16];
+
+  a = state[0];
+  b = state[1];
+  c = state[2];
+  d = state[3];
+  e = state[4];
+  f = state[5];
+  g = state[6];
+  h = state[7];
+
+  while (nblks >= 2)
+    {
+      I(0); I(1); I(2); I(3);
+      I(4); I(5); I(6); I(7);
+      I(8); I(9); I(10); I(11);
+      I(12); I(13); I(14); I(15);
+      data += 64;
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], L(48));
+      R(h, a, b, c, d, e, f, g, K[49], L(49));
+      R(g, h, a, b, c, d, e, f, K[50], L(50));
+      R(f, g, h, a, b, c, d, e, K[51], L(51));
+      I(0); I(1); I(2); I(3);
+      R(e, f, g, h, a, b, c, d, K[52], L(52));
+      R(d, e, f, g, h, a, b, c, K[53], L(53));
+      R(c, d, e, f, g, h, a, b, K[54], L(54));
+      R(b, c, d, e, f, g, h, a, K[55], L(55));
+      I(4); I(5); I(6); I(7);
+      R(a, b, c, d, e, f, g, h, K[56], L(56));
+      R(h, a, b, c, d, e, f, g, K[57], L(57));
+      R(g, h, a, b, c, d, e, f, K[58], L(58));
+      R(f, g, h, a, b, c, d, e, K[59], L(59));
+      I(8); I(9); I(10); I(11);
+      R(e, f, g, h, a, b, c, d, K[60], L(60));
+      R(d, e, f, g, h, a, b, c, K[61], L(61));
+      R(c, d, e, f, g, h, a, b, K[62], L(62));
+      R(b, c, d, e, f, g, h, a, K[63], L(63));
+      I(12); I(13); I(14); I(15);
+      data += 64;
+
+      a += state[0];
+      b += state[1];
+      c += state[2];
+      d += state[3];
+      e += state[4];
+      f += state[5];
+      g += state[6];
+      h += state[7];
+      state[0] = a;
+      state[1] = b;
+      state[2] = c;
+      state[3] = d;
+      state[4] = e;
+      state[5] = f;
+      state[6] = g;
+      state[7] = h;
+
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], L(48));
+      R(h, a, b, c, d, e, f, g, K[49], L(49));
+      R(g, h, a, b, c, d, e, f, K[50], L(50));
+      R(f, g, h, a, b, c, d, e, K[51], L(51));
+      R(e, f, g, h, a, b, c, d, K[52], L(52));
+      R(d, e, f, g, h, a, b, c, K[53], L(53));
+      R(c, d, e, f, g, h, a, b, K[54], L(54));
+      R(b, c, d, e, f, g, h, a, K[55], L(55));
+      R(a, b, c, d, e, f, g, h, K[56], L(56));
+      R(h, a, b, c, d, e, f, g, K[57], L(57));
+      R(g, h, a, b, c, d, e, f, K[58], L(58));
+      R(f, g, h, a, b, c, d, e, K[59], L(59));
+      R(e, f, g, h, a, b, c, d, K[60], L(60));
+      R(d, e, f, g, h, a, b, c, K[61], L(61));
+      R(c, d, e, f, g, h, a, b, K[62], L(62));
+      R(b, c, d, e, f, g, h, a, K[63], L(63));
+
+      a += state[0];
+      b += state[1];
+      c += state[2];
+      d += state[3];
+      e += state[4];
+      f += state[5];
+      g += state[6];
+      h += state[7];
+      state[0] = a;
+      state[1] = b;
+      state[2] = c;
+      state[3] = d;
+      state[4] = e;
+      state[5] = f;
+      state[6] = g;
+      state[7] = h;
+
+      nblks -= 2;
+    }
+
+  while (nblks)
+    {
+      I(0); I(1); I(2); I(3);
+      I(4); I(5); I(6); I(7);
+      I(8); I(9); I(10); I(11);
+      I(12); I(13); I(14); I(15);
+      data += 64;
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], L(48));
+      R(h, a, b, c, d, e, f, g, K[49], L(49));
+      R(g, h, a, b, c, d, e, f, K[50], L(50));
+      R(f, g, h, a, b, c, d, e, K[51], L(51));
+      R(e, f, g, h, a, b, c, d, K[52], L(52));
+      R(d, e, f, g, h, a, b, c, K[53], L(53));
+      R(c, d, e, f, g, h, a, b, K[54], L(54));
+      R(b, c, d, e, f, g, h, a, K[55], L(55));
+      R(a, b, c, d, e, f, g, h, K[56], L(56));
+      R(h, a, b, c, d, e, f, g, K[57], L(57));
+      R(g, h, a, b, c, d, e, f, K[58], L(58));
+      R(f, g, h, a, b, c, d, e, K[59], L(59));
+      R(e, f, g, h, a, b, c, d, K[60], L(60));
+      R(d, e, f, g, h, a, b, c, K[61], L(61));
+      R(c, d, e, f, g, h, a, b, K[62], L(62));
+      R(b, c, d, e, f, g, h, a, K[63], L(63));
+
+      a += state[0];
+      b += state[1];
+      c += state[2];
+      d += state[3];
+      e += state[4];
+      f += state[5];
+      g += state[6];
+      h += state[7];
+      state[0] = a;
+      state[1] = b;
+      state[2] = c;
+      state[3] = d;
+      state[4] = e;
+      state[5] = f;
+      state[6] = g;
+      state[7] = h;
+
+      nblks--;
+    }
+
+  return sizeof(w);
+}
+
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
diff --git a/grub-core/lib/libgcrypt/cipher/sha256-ssse3-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha256-ssse3-amd64.S
new file mode 100644
index 000000000..401ff6f44
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha256-ssse3-amd64.S
@@ -0,0 +1,528 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+;   notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+;   notice, this list of conditions and the following disclaimer in the
+;   documentation and/or other materials provided with the
+;   distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+;   contributors may be used to endorse or promote products derived from
+;   this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; This code is described in an Intel White-Paper:
+; "Fast SHA-256 Implementations on Intel Architecture Processors"
+;
+; To find it, surf to http://www.intel.com/p/en_US/embedded
+; and search for that title.
+; The paper is expected to be released roughly at the end of April, 2012
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 1 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ *  by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Note: original implementation was named as SHA256-SSE4. However, only SSSE3
+ *       is required.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA256)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+#define        MOVDQ movdqu /* assume buffers not aligned */
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Define Macros*/
+
+/* addm [mem], reg
+ * Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+       add     p2, p1; \
+       mov     p1, p2;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+/* COPY_XMM_AND_BSWAP xmm, [mem], byte_flip_mask
+ * Load xmm with mem and byte swap each dword */
+#define COPY_XMM_AND_BSWAP(p1, p2, p3) \
+       MOVDQ p1, p2; \
+       pshufb p1, p3;
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;*/
+
+#define X0 xmm4
+#define X1 xmm5
+#define X2 xmm6
+#define X3 xmm7
+
+#define XTMP0 xmm0
+#define XTMP1 xmm1
+#define XTMP2 xmm2
+#define XTMP3 xmm3
+#define XTMP4 xmm8
+#define XFER xmm9
+
+#define SHUF_00BA xmm10 /* shuffle xBxA -> 00BA */
+#define SHUF_DC00 xmm11 /* shuffle xDxC -> DC00 */
+#define BYTE_FLIP_MASK xmm12
+
+#define NUM_BLKS rdx   /* 3rd arg */
+#define CTX rsi        /* 2nd arg */
+#define INP rdi        /* 1st arg */
+
+#define SRND rdi       /* clobbers INP */
+#define c ecx
+#define d r8d
+#define e edx
+
+#define TBL rbp
+#define a eax
+#define b ebx
+
+#define f r9d
+#define g r10d
+#define h r11d
+
+#define y0 r13d
+#define y1 r14d
+#define y2 r15d
+
+
+
+#define _INP_END_SIZE  8
+#define _INP_SIZE      8
+#define _XFER_SIZE     8
+#define _XMM_SAVE_SIZE 0
+/* STACK_SIZE plus pushes must be an odd multiple of 8 */
+#define _ALIGN_SIZE    8
+
+#define _INP_END       0
+#define _INP           (_INP_END  + _INP_END_SIZE)
+#define _XFER          (_INP      + _INP_SIZE)
+#define _XMM_SAVE      (_XFER     + _XFER_SIZE + _ALIGN_SIZE)
+#define STACK_SIZE     (_XMM_SAVE + _XMM_SAVE_SIZE)
+
+
+#define FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+               /* compute s0 four at a time and s1 two at a time */; \
+               /* compute W[-16] + W[-7] 4 at a time */; \
+               movdqa  XTMP0, X3; \
+       mov     y0, e           /* y0 = e */; \
+       ror     y0, (25-11)     /* y0 = e >> (25-11) */; \
+       mov     y1, a           /* y1 = a */; \
+               palignr XTMP0, X2, 4    /* XTMP0 = W[-7] */; \
+       ror     y1, (22-13)     /* y1 = a >> (22-13) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       mov     y2, f           /* y2 = f */; \
+       ror     y0, (11-6)      /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+               movdqa  XTMP1, X1; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       xor     y2, g           /* y2 = f^g */; \
+               paddd   XTMP0, X0       /* XTMP0 = W[-7] + W[-16] */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       ror     y1, (13-2)      /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+               /* compute s0 */; \
+               palignr XTMP1, X0, 4    /* XTMP1 = W[-15] */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       ror     y0, 6           /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+               movdqa  XTMP2, XTMP1    /* XTMP2 = W[-15] */; \
+       ror     y1, 2           /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       add     y2, [rsp + _XFER + 0*4] /* y2 = k + w + S1 + CH */; \
+               movdqa  XTMP3, XTMP1    /* XTMP3 = W[-15] */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               pslld   XTMP1, (32-7); \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               psrld   XTMP2, 7; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+               por     XTMP1, XTMP2    /* XTMP1 = W[-15] ror 7 */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+               movdqa  XTMP2, XTMP3    /* XTMP2 = W[-15] */; \
+       mov     y0, e           /* y0 = e */; \
+       mov     y1, a           /* y1 = a */; \
+               movdqa  XTMP4, XTMP3    /* XTMP4 = W[-15] */; \
+       ror     y0, (25-11)     /* y0 = e >> (25-11) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       mov     y2, f           /* y2 = f */; \
+       ror     y1, (22-13)     /* y1 = a >> (22-13) */; \
+               pslld   XTMP3, (32-18); \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       ror     y0, (11-6)      /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+       xor     y2, g           /* y2 = f^g */; \
+               psrld   XTMP2, 18; \
+       ror     y1, (13-2)      /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       ror     y0, 6           /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+               pxor    XTMP1, XTMP3; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+               psrld   XTMP4, 3        /* XTMP4 = W[-15] >> 3 */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       add     y2, [rsp + _XFER + 1*4] /* y2 = k + w + S1 + CH */; \
+       ror     y1, 2           /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+               pxor    XTMP1, XTMP2    /* XTMP1 = W[-15] ror 7 ^ W[-15] ror 18 
*/; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               pxor    XTMP1, XTMP4    /* XTMP1 = s0 */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               /* compute low s1 */; \
+               pshufd  XTMP2, X3, 0b11111010   /* XTMP2 = W[-2] {BBAA} */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+               paddd   XTMP0, XTMP1    /* XTMP0 = W[-16] + W[-7] + s0 */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+               movdqa  XTMP3, XTMP2    /* XTMP3 = W[-2] {BBAA} */; \
+       mov     y0, e           /* y0 = e */; \
+       mov     y1, a           /* y1 = a */; \
+       ror     y0, (25-11)     /* y0 = e >> (25-11) */; \
+               movdqa  XTMP4, XTMP2    /* XTMP4 = W[-2] {BBAA} */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       ror     y1, (22-13)     /* y1 = a >> (22-13) */; \
+       mov     y2, f           /* y2 = f */; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       ror     y0, (11-6)      /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+               psrlq   XTMP2, 17       /* XTMP2 = W[-2] ror 17 {xBxA} */; \
+       xor     y2, g           /* y2 = f^g */; \
+               psrlq   XTMP3, 19       /* XTMP3 = W[-2] ror 19 {xBxA} */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+               psrld   XTMP4, 10       /* XTMP4 = W[-2] >> 10 {BBAA} */; \
+       ror     y1, (13-2)      /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+       ror     y0, 6           /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+               pxor    XTMP2, XTMP3; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       ror     y1, 2           /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, [rsp + _XFER + 2*4] /* y2 = k + w + S1 + CH */; \
+               pxor    XTMP4, XTMP2    /* XTMP4 = s1 {xBxA} */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               pshufb  XTMP4, SHUF_00BA        /* XTMP4 = s1 {00BA} */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               paddd   XTMP0, XTMP4    /* XTMP0 = {..., ..., W[1], W[0]} */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+               /* compute high s1 */; \
+               pshufd  XTMP2, XTMP0, 0b01010000 /* XTMP2 = W[-2] {DDCC} */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+               movdqa  XTMP3, XTMP2    /* XTMP3 = W[-2] {DDCC} */; \
+       mov     y0, e           /* y0 = e */; \
+       ror     y0, (25-11)     /* y0 = e >> (25-11) */; \
+       mov     y1, a           /* y1 = a */; \
+               movdqa  X0,    XTMP2    /* X0    = W[-2] {DDCC} */; \
+       ror     y1, (22-13)     /* y1 = a >> (22-13) */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       mov     y2, f           /* y2 = f */; \
+       ror     y0, (11-6)      /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+               psrlq   XTMP2, 17       /* XTMP2 = W[-2] ror 17 {xDxC} */; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       xor     y2, g           /* y2 = f^g */; \
+               psrlq   XTMP3, 19       /* XTMP3 = W[-2] ror 19 {xDxC} */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       ror     y1, (13-2)      /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+               psrld   X0,    10       /* X0 = W[-2] >> 10 {DDCC} */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       ror     y0, 6           /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+               pxor    XTMP2, XTMP3; \
+       ror     y1, 2           /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       add     y2, [rsp + _XFER + 3*4] /* y2 = k + w + S1 + CH */; \
+               pxor    X0, XTMP2       /* X0 = s1 {xDxC} */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+               pshufb  X0, SHUF_DC00   /* X0 = s1 {DC00} */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+               paddd   X0, XTMP0       /* X0 = {W[3], W[2], W[1], W[0]} */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+#define FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h) \
+       FOUR_ROUNDS_AND_SCHED_0(X0, X1, X2, X3, a, b, c, d, e, f, g, h); \
+       FOUR_ROUNDS_AND_SCHED_1(X0, X1, X2, X3, h, a, b, c, d, e, f, g); \
+       FOUR_ROUNDS_AND_SCHED_2(X0, X1, X2, X3, g, h, a, b, c, d, e, f); \
+       FOUR_ROUNDS_AND_SCHED_3(X0, X1, X2, X3, f, g, h, a, b, c, d, e);
+
+/* input is [rsp + _XFER + %1 * 4] */
+#define DO_ROUND(i1, a, b, c, d, e, f, g, h) \
+       mov     y0, e           /* y0 = e */; \
+       ror     y0, (25-11)     /* y0 = e >> (25-11) */; \
+       mov     y1, a           /* y1 = a */; \
+       xor     y0, e           /* y0 = e ^ (e >> (25-11)) */; \
+       ror     y1, (22-13)     /* y1 = a >> (22-13) */; \
+       mov     y2, f           /* y2 = f */; \
+       xor     y1, a           /* y1 = a ^ (a >> (22-13) */; \
+       ror     y0, (11-6)      /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */; \
+       xor     y2, g           /* y2 = f^g */; \
+       xor     y0, e           /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */; \
+       ror     y1, (13-2)      /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */; \
+       and     y2, e           /* y2 = (f^g)&e */; \
+       xor     y1, a           /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */; \
+       ror     y0, 6           /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */; \
+       xor     y2, g           /* y2 = CH = ((f^g)&e)^g */; \
+       add     y2, y0          /* y2 = S1 + CH */; \
+       ror     y1, 2           /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */; \
+       add     y2, [rsp + _XFER + i1 * 4]      /* y2 = k + w + S1 + CH */; \
+       mov     y0, a           /* y0 = a */; \
+       add     h, y2           /* h = h + S1 + CH + k + w */; \
+       mov     y2, a           /* y2 = a */; \
+       or      y0, c           /* y0 = a|c */; \
+       add     d, h            /* d = d + h + S1 + CH + k + w */; \
+       and     y2, c           /* y2 = a&c */; \
+       and     y0, b           /* y0 = (a|c)&b */; \
+       add     h, y1           /* h = h + S1 + CH + k + w + S0 */; \
+       or      y0, y2          /* y0 = MAJ = (a|c)&b)|(a&c) */; \
+       lea     h, [h + y0]     /* h = h + S1 + CH + k + w + S0 + MAJ */
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; void sha256_sse4(void *input_data, UINT32 digest[8], UINT64 num_blks)
+;; arg 1 : pointer to input data
+;; arg 2 : pointer to digest
+;; arg 3 : Num blocks
+*/
+.text
+.globl _gcry_sha256_transform_amd64_ssse3
+ELF(.type  _gcry_sha256_transform_amd64_ssse3,@function;)
+.align 16
+_gcry_sha256_transform_amd64_ssse3:
+       CFI_STARTPROC()
+       push    rbx
+       CFI_PUSH(rbx)
+       push    rbp
+       CFI_PUSH(rbp)
+       push    r13
+       CFI_PUSH(r13)
+       push    r14
+       CFI_PUSH(r14)
+       push    r15
+       CFI_PUSH(r15)
+
+       sub     rsp, STACK_SIZE
+       CFI_ADJUST_CFA_OFFSET(STACK_SIZE);
+
+       shl     NUM_BLKS, 6     /* convert to bytes */
+       jz      .Ldone_hash
+       add     NUM_BLKS, INP   /* pointer to end of data */
+       mov     [rsp + _INP_END], NUM_BLKS
+
+       /* load initial digest */
+       mov     a,[4*0 + CTX]
+       mov     b,[4*1 + CTX]
+       mov     c,[4*2 + CTX]
+       mov     d,[4*3 + CTX]
+       mov     e,[4*4 + CTX]
+       mov     f,[4*5 + CTX]
+       mov     g,[4*6 + CTX]
+       mov     h,[4*7 + CTX]
+
+       movdqa  BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+       movdqa  SHUF_00BA, [.L_SHUF_00BA ADD_RIP]
+       movdqa  SHUF_DC00, [.L_SHUF_DC00 ADD_RIP]
+
+.Loop0:
+       lea     TBL, [.LK256 ADD_RIP]
+
+       /* byte swap first 16 dwords */
+       COPY_XMM_AND_BSWAP(X0, [INP + 0*16], BYTE_FLIP_MASK)
+       COPY_XMM_AND_BSWAP(X1, [INP + 1*16], BYTE_FLIP_MASK)
+       COPY_XMM_AND_BSWAP(X2, [INP + 2*16], BYTE_FLIP_MASK)
+       COPY_XMM_AND_BSWAP(X3, [INP + 3*16], BYTE_FLIP_MASK)
+
+       mov     [rsp + _INP], INP
+
+       /* schedule 48 input dwords, by doing 3 rounds of 16 each */
+       mov     SRND, 3
+.align 16
+.Loop1:
+       movdqa  XFER, [TBL + 0*16]
+       paddd   XFER, X0
+       movdqa  [rsp + _XFER], XFER
+       FOUR_ROUNDS_AND_SCHED(X0, X1, X2, X3, a, b, c, d, e, f, g, h)
+
+       movdqa  XFER, [TBL + 1*16]
+       paddd   XFER, X1
+       movdqa  [rsp + _XFER], XFER
+       FOUR_ROUNDS_AND_SCHED(X1, X2, X3, X0, e, f, g, h, a, b, c, d)
+
+       movdqa  XFER, [TBL + 2*16]
+       paddd   XFER, X2
+       movdqa  [rsp + _XFER], XFER
+       FOUR_ROUNDS_AND_SCHED(X2, X3, X0, X1, a, b, c, d, e, f, g, h)
+
+       movdqa  XFER, [TBL + 3*16]
+       paddd   XFER, X3
+       movdqa  [rsp + _XFER], XFER
+       add     TBL, 4*16
+       FOUR_ROUNDS_AND_SCHED(X3, X0, X1, X2, e, f, g, h, a, b, c, d)
+
+       sub     SRND, 1
+       jne     .Loop1
+
+       mov     SRND, 2
+.Loop2:
+       paddd   X0, [TBL + 0*16]
+       movdqa  [rsp + _XFER], X0
+       DO_ROUND(0, a, b, c, d, e, f, g, h)
+       DO_ROUND(1, h, a, b, c, d, e, f, g)
+       DO_ROUND(2, g, h, a, b, c, d, e, f)
+       DO_ROUND(3, f, g, h, a, b, c, d, e)
+       paddd   X1, [TBL + 1*16]
+       movdqa  [rsp + _XFER], X1
+       add     TBL, 2*16
+       DO_ROUND(0, e, f, g, h, a, b, c, d)
+       DO_ROUND(1, d, e, f, g, h, a, b, c)
+       DO_ROUND(2, c, d, e, f, g, h, a, b)
+       DO_ROUND(3, b, c, d, e, f, g, h, a)
+
+       movdqa  X0, X2
+       movdqa  X1, X3
+
+       sub     SRND, 1
+       jne     .Loop2
+
+       addm([4*0 + CTX],a)
+       addm([4*1 + CTX],b)
+       addm([4*2 + CTX],c)
+       addm([4*3 + CTX],d)
+       addm([4*4 + CTX],e)
+       addm([4*5 + CTX],f)
+       addm([4*6 + CTX],g)
+       addm([4*7 + CTX],h)
+
+       mov     INP, [rsp + _INP]
+       add     INP, 64
+       cmp     INP, [rsp + _INP_END]
+       jne     .Loop0
+
+       pxor    xmm0, xmm0
+       pxor    xmm1, xmm1
+       pxor    xmm2, xmm2
+       pxor    xmm3, xmm3
+       pxor    xmm4, xmm4
+       pxor    xmm5, xmm5
+       pxor    xmm6, xmm6
+       pxor    xmm7, xmm7
+       pxor    xmm8, xmm8
+       pxor    xmm9, xmm9
+       pxor    xmm10, xmm10
+       pxor    xmm11, xmm11
+       pxor    xmm12, xmm12
+
+.Ldone_hash:
+       pxor    XFER, XFER
+       movdqa  [rsp + _XFER], XFER
+       xor     eax, eax
+
+       add     rsp, STACK_SIZE
+       CFI_ADJUST_CFA_OFFSET(-STACK_SIZE);
+
+       pop     r15
+       CFI_POP(r15)
+       pop     r14
+       CFI_POP(r14)
+       pop     r13
+       CFI_POP(r13)
+       pop     rbp
+       CFI_POP(rbp)
+       pop     rbx
+       CFI_POP(rbx)
+
+       ret_spec_stop
+       CFI_ENDPROC()
+
+
+.align 16
+.LK256:
+       .long   0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+       .long   0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+       .long   0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+       .long   0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+       .long   0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+       .long   0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+       .long   0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+       .long   0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+       .long   0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+       .long   0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+       .long   0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+       .long   0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+       .long   0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+       .long   0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+       .long   0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+       .long   0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203
+
+/* shuffle xBxA -> 00BA */
+.L_SHUF_00BA:              .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100
+
+/* shuffle xDxC -> DC00 */
+.L_SHUF_DC00:              .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha256.c 
b/grub-core/lib/libgcrypt/cipher/sha256.c
index 309fa3b98..24cab566d 100644
--- a/grub-core/lib/libgcrypt/cipher/sha256.c
+++ b/grub-core/lib/libgcrypt/cipher/sha256.c
@@ -42,106 +42,380 @@
 
 #include "g10lib.h"
 #include "bithelp.h"
+#include "bufhelp.h"
 #include "cipher.h"
 #include "hash-common.h"
 
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2/BMI2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+/* USE_SHAEXT indicates whether to compile with Intel SHA Extension code. */
+#undef USE_SHAEXT
+#if defined(HAVE_GCC_INLINE_ASM_SHAEXT) && \
+    defined(HAVE_GCC_INLINE_ASM_SSE41) && \
+    defined(ENABLE_SHAEXT_SUPPORT)
+# define USE_SHAEXT 1
+#endif
+
+/* USE_ARM_CE indicates whether to enable ARMv8 Crypto Extension assembly
+ * code. */
+#undef USE_ARM_CE
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO)
+#  define USE_ARM_CE 1
+# elif defined(__AARCH64EL__) \
+       && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+       && defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO)
+#  define USE_ARM_CE 1
+# endif
+#endif
+
+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code. */
+#undef USE_PPC_CRYPTO
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+#  if __GNUC__ >= 4
+#   define USE_PPC_CRYPTO 1
+#  endif
+# endif
+#endif
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+
 typedef struct {
-  u32  h0,h1,h2,h3,h4,h5,h6,h7;
-  u32  nblocks;
-  byte buf[64];
-  int  count;
+  gcry_md_block_ctx_t bctx;
+  u32  h[8];
+#ifdef USE_S390X_CRYPTO
+  u32  final_len_msb, final_len_lsb; /* needs to be right after h[7]. */
+  int  use_s390x_crypto;
+#endif
 } SHA256_CONTEXT;
 
 
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2) || \
+    defined(USE_SHAEXT)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16 + sizeof(void *) * 4)
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_SSSE3
+unsigned int _gcry_sha256_transform_amd64_ssse3(const void *input_data,
+                                                u32 state[8],
+                                                size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+                                size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_amd64_ssse3 (data, hd->h, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int _gcry_sha256_transform_amd64_avx(const void *input_data,
+                                              u32 state[8],
+                                              size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx(void *ctx, const unsigned char *data,
+                              size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_amd64_avx (data, hd->h, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX2
+unsigned int _gcry_sha256_transform_amd64_avx2(const void *input_data,
+                                               u32 state[8],
+                                               size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha256_transform_amd64_avx2(void *ctx, const unsigned char *data,
+                               size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_amd64_avx2 (data, hd->h, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_SHAEXT
+/* Does not need ASM_FUNC_ABI */
+unsigned int
+_gcry_sha256_transform_intel_shaext(u32 state[8],
+                                    const unsigned char *input_data,
+                                    size_t num_blks);
+
+static unsigned int
+do_sha256_transform_intel_shaext(void *ctx, const unsigned char *data,
+                                 size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_intel_shaext (hd->h, data, nblks);
+}
+#endif
+
+#ifdef USE_ARM_CE
+unsigned int _gcry_sha256_transform_armv8_ce(u32 state[8],
+                                             const void *input_data,
+                                             size_t num_blks);
+
+static unsigned int
+do_sha256_transform_armv8_ce(void *ctx, const unsigned char *data,
+                             size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_armv8_ce (hd->h, data, nblks);
+}
+#endif
+
+#ifdef USE_PPC_CRYPTO
+unsigned int _gcry_sha256_transform_ppc8(u32 state[8],
+                                        const unsigned char *input_data,
+                                        size_t num_blks);
+
+unsigned int _gcry_sha256_transform_ppc9(u32 state[8],
+                                        const unsigned char *input_data,
+                                        size_t num_blks);
+
+static unsigned int
+do_sha256_transform_ppc8(void *ctx, const unsigned char *data, size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_ppc8 (hd->h, data, nblks);
+}
+
+static unsigned int
+do_sha256_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+  return _gcry_sha256_transform_ppc9 (hd->h, data, nblks);
+}
+#endif
+
+#ifdef USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+do_sha256_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
+{
+  SHA256_CONTEXT *hd = ctx;
+
+  kimd_execute (KMID_FUNCTION_SHA256, hd->h, data, nblks * 64);
+  return 0;
+}
+
+static unsigned int
+do_sha256_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
+                      u32 len_msb, u32 len_lsb)
+{
+  SHA256_CONTEXT *hd = ctx;
+
+  /* Make sure that 'final_len' is positioned at correct offset relative
+   * to 'h[0]'. This is because we are passing 'h[0]' pointer as start of
+   * parameter block to 'klmd' instruction. */
+
+  gcry_assert (offsetof (SHA256_CONTEXT, final_len_msb)
+              - offsetof (SHA256_CONTEXT, h[0]) == 8 * sizeof(u32));
+  gcry_assert (offsetof (SHA256_CONTEXT, final_len_lsb)
+              - offsetof (SHA256_CONTEXT, final_len_msb) == 1 * sizeof(u32));
+
+  hd->final_len_msb = len_msb;
+  hd->final_len_lsb = len_lsb;
+
+  klmd_execute (KMID_FUNCTION_SHA256, hd->h, data, datalen);
+  return 0;
+}
+#endif
+
+
+static unsigned int
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks);
+
+
+static void
+sha256_common_init (SHA256_CONTEXT *hd)
+{
+  unsigned int features = _gcry_get_hw_features ();
+
+  hd->bctx.nblocks = 0;
+  hd->bctx.nblocks_high = 0;
+  hd->bctx.count = 0;
+  hd->bctx.blocksize_shift = _gcry_ctz(64);
+
+  /* Order of feature checks is important here; last match will be
+   * selected.  Keep slower implementations at the top and faster at
+   * the bottom.  */
+  hd->bctx.bwrite = do_transform_generic;
+#ifdef USE_SSSE3
+  if ((features & HWF_INTEL_SSSE3) != 0)
+    hd->bctx.bwrite = do_sha256_transform_amd64_ssse3;
+#endif
+#ifdef USE_AVX
+  /* AVX implementation uses SHLD which is known to be slow on non-Intel CPUs.
+   * Therefore use this implementation on Intel CPUs only. */
+  if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+    hd->bctx.bwrite = do_sha256_transform_amd64_avx;
+#endif
+#ifdef USE_AVX2
+  if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+    hd->bctx.bwrite = do_sha256_transform_amd64_avx2;
+#endif
+#ifdef USE_SHAEXT
+  if ((features & HWF_INTEL_SHAEXT) && (features & HWF_INTEL_SSE4_1))
+    hd->bctx.bwrite = do_sha256_transform_intel_shaext;
+#endif
+#ifdef USE_ARM_CE
+  if ((features & HWF_ARM_SHA2) != 0)
+    hd->bctx.bwrite = do_sha256_transform_armv8_ce;
+#endif
+#ifdef USE_PPC_CRYPTO
+  if ((features & HWF_PPC_VCRYPTO) != 0)
+    hd->bctx.bwrite = do_sha256_transform_ppc8;
+  if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0)
+    hd->bctx.bwrite = do_sha256_transform_ppc9;
+#endif
+#ifdef USE_S390X_CRYPTO
+  hd->use_s390x_crypto = 0;
+  if ((features & HWF_S390X_MSA) != 0)
+    {
+      if ((kimd_query () & km_function_to_mask (KMID_FUNCTION_SHA256)) &&
+         (klmd_query () & km_function_to_mask (KMID_FUNCTION_SHA256)))
+       {
+         hd->bctx.bwrite = do_sha256_transform_s390x;
+         hd->use_s390x_crypto = 1;
+       }
+    }
+#endif
+  (void)features;
+}
+
+
 static void
-sha256_init (void *context)
+sha256_init (void *context, unsigned int flags)
 {
   SHA256_CONTEXT *hd = context;
 
-  hd->h0 = 0x6a09e667;
-  hd->h1 = 0xbb67ae85;
-  hd->h2 = 0x3c6ef372;
-  hd->h3 = 0xa54ff53a;
-  hd->h4 = 0x510e527f;
-  hd->h5 = 0x9b05688c;
-  hd->h6 = 0x1f83d9ab;
-  hd->h7 = 0x5be0cd19;
-
-  hd->nblocks = 0;
-  hd->count = 0;
+  (void)flags;
+
+  hd->h[0] = 0x6a09e667;
+  hd->h[1] = 0xbb67ae85;
+  hd->h[2] = 0x3c6ef372;
+  hd->h[3] = 0xa54ff53a;
+  hd->h[4] = 0x510e527f;
+  hd->h[5] = 0x9b05688c;
+  hd->h[6] = 0x1f83d9ab;
+  hd->h[7] = 0x5be0cd19;
+
+  sha256_common_init (hd);
 }
 
 
 static void
-sha224_init (void *context)
+sha224_init (void *context, unsigned int flags)
 {
   SHA256_CONTEXT *hd = context;
 
-  hd->h0 = 0xc1059ed8;
-  hd->h1 = 0x367cd507;
-  hd->h2 = 0x3070dd17;
-  hd->h3 = 0xf70e5939;
-  hd->h4 = 0xffc00b31;
-  hd->h5 = 0x68581511;
-  hd->h6 = 0x64f98fa7;
-  hd->h7 = 0xbefa4fa4;
-
-  hd->nblocks = 0;
-  hd->count = 0;
+  (void)flags;
+
+  hd->h[0] = 0xc1059ed8;
+  hd->h[1] = 0x367cd507;
+  hd->h[2] = 0x3070dd17;
+  hd->h[3] = 0xf70e5939;
+  hd->h[4] = 0xffc00b31;
+  hd->h[5] = 0x68581511;
+  hd->h[6] = 0x64f98fa7;
+  hd->h[7] = 0xbefa4fa4;
+
+  sha256_common_init (hd);
 }
 
 
 /*
   Transform the message X which consists of 16 32-bit-words. See FIPS
   180-2 for details.  */
-#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3))       /* (4.6) */
-#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10))     /* (4.7) */
 #define R(a,b,c,d,e,f,g,h,k,w) do                                 \
           {                                                       \
             t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + (k) + (w);  \
             t2 = Sum0((a)) + Maj((a),(b),(c));                    \
-            h = g;                                                \
-            g = f;                                                \
-            f = e;                                                \
-            e = d + t1;                                           \
-            d = c;                                                \
-            c = b;                                                \
-            b = a;                                                \
-            a = t1 + t2;                                          \
+            d += t1;                                              \
+            h  = t1 + t2;                                         \
           } while (0)
 
 /* (4.2) same as SHA-1's F1.  */
-static inline u32
-Cho (u32 x, u32 y, u32 z)
-{
-  return (z ^ (x & (y ^ z)));
-}
+#define Cho(x, y, z)  (z ^ (x & (y ^ z)))
 
 /* (4.3) same as SHA-1's F3 */
-static inline u32
-Maj (u32 x, u32 y, u32 z)
-{
-  return ((x & y) | (z & (x|y)));
-}
+#define Maj(x, y, z)  ((x & y) + (z & (x ^ y)))
 
 /* (4.4) */
-static inline u32
-Sum0 (u32 x)
-{
-  return (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22));
-}
+#define Sum0(x)       (ror (x, 2) ^ ror (x, 13) ^ ror (x, 22))
 
 /* (4.5) */
-static inline u32
-Sum1 (u32 x)
-{
-  return (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25));
-}
+#define Sum1(x)       (ror (x, 6) ^ ror (x, 11) ^ ror (x, 25))
 
-
-static void
-transform (SHA256_CONTEXT *hd, const unsigned char *data)
+/* Message expansion */
+#define S0(x) (ror ((x), 7) ^ ror ((x), 18) ^ ((x) >> 3))       /* (4.6) */
+#define S1(x) (ror ((x), 17) ^ ror ((x), 19) ^ ((x) >> 10))     /* (4.7) */
+#define I(i) ( w[i] = buf_get_be32(data + i * 4) )
+#define W(i) ( w[i&0x0f] =    S1(w[(i-2) &0x0f]) \
+                            +    w[(i-7) &0x0f]  \
+                            + S0(w[(i-15)&0x0f]) \
+                            +    w[(i-16)&0x0f] )
+
+static unsigned int
+do_transform_generic (void *ctx, const unsigned char *data, size_t nblks)
 {
+  SHA256_CONTEXT *hd = ctx;
   static const u32 K[64] = {
     0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
     0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
@@ -161,145 +435,110 @@ transform (SHA256_CONTEXT *hd, const unsigned char 
*data)
     0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
   };
 
-  u32 a,b,c,d,e,f,g,h,t1,t2;
-  u32 x[16];
-  u32 w[64];
-  int i;
-
-  a = hd->h0;
-  b = hd->h1;
-  c = hd->h2;
-  d = hd->h3;
-  e = hd->h4;
-  f = hd->h5;
-  g = hd->h6;
-  h = hd->h7;
-
-#ifdef WORDS_BIGENDIAN
-  memcpy (x, data, 64);
-#else
-  {
-    byte *p2;
-
-    for (i=0, p2=(byte*)x; i < 16; i++, p2 += 4 )
-      {
-        p2[3] = *data++;
-        p2[2] = *data++;
-        p2[1] = *data++;
-        p2[0] = *data++;
-      }
-  }
-#endif
-
-  for (i=0; i < 16; i++)
-    w[i] = x[i];
-  for (; i < 64; i++)
-    w[i] = S1(w[i-2]) + w[i-7] + S0(w[i-15]) + w[i-16];
-
-  for (i=0; i < 64;)
+  do
     {
-#if 0
-      R(a,b,c,d,e,f,g,h,K[i],w[i]);
-      i++;
-#else
-      t1 = h + Sum1 (e) + Cho (e, f, g) + K[i] + w[i];
-      t2 = Sum0 (a) + Maj (a, b, c);
-      d += t1;
-      h  = t1 + t2;
-
-      t1 = g + Sum1 (d) + Cho (d, e, f) + K[i+1] + w[i+1];
-      t2 = Sum0 (h) + Maj (h, a, b);
-      c += t1;
-      g  = t1 + t2;
-
-      t1 = f + Sum1 (c) + Cho (c, d, e) + K[i+2] + w[i+2];
-      t2 = Sum0 (g) + Maj (g, h, a);
-      b += t1;
-      f  = t1 + t2;
-
-      t1 = e + Sum1 (b) + Cho (b, c, d) + K[i+3] + w[i+3];
-      t2 = Sum0 (f) + Maj (f, g, h);
-      a += t1;
-      e  = t1 + t2;
-
-      t1 = d + Sum1 (a) + Cho (a, b, c) + K[i+4] + w[i+4];
-      t2 = Sum0 (e) + Maj (e, f, g);
-      h += t1;
-      d  = t1 + t2;
-
-      t1 = c + Sum1 (h) + Cho (h, a, b) + K[i+5] + w[i+5];
-      t2 = Sum0 (d) + Maj (d, e, f);
-      g += t1;
-      c  = t1 + t2;
-
-      t1 = b + Sum1 (g) + Cho (g, h, a) + K[i+6] + w[i+6];
-      t2 = Sum0 (c) + Maj (c, d, e);
-      f += t1;
-      b  = t1 + t2;
-
-      t1 = a + Sum1 (f) + Cho (f, g, h) + K[i+7] + w[i+7];
-      t2 = Sum0 (b) + Maj (b, c, d);
-      e += t1;
-      a  = t1 + t2;
-
-      i += 8;
-#endif
+
+      u32 a,b,c,d,e,f,g,h,t1,t2;
+      u32 w[16];
+
+      a = hd->h[0];
+      b = hd->h[1];
+      c = hd->h[2];
+      d = hd->h[3];
+      e = hd->h[4];
+      f = hd->h[5];
+      g = hd->h[6];
+      h = hd->h[7];
+
+      R(a, b, c, d, e, f, g, h, K[0], I(0));
+      R(h, a, b, c, d, e, f, g, K[1], I(1));
+      R(g, h, a, b, c, d, e, f, K[2], I(2));
+      R(f, g, h, a, b, c, d, e, K[3], I(3));
+      R(e, f, g, h, a, b, c, d, K[4], I(4));
+      R(d, e, f, g, h, a, b, c, K[5], I(5));
+      R(c, d, e, f, g, h, a, b, K[6], I(6));
+      R(b, c, d, e, f, g, h, a, K[7], I(7));
+      R(a, b, c, d, e, f, g, h, K[8], I(8));
+      R(h, a, b, c, d, e, f, g, K[9], I(9));
+      R(g, h, a, b, c, d, e, f, K[10], I(10));
+      R(f, g, h, a, b, c, d, e, K[11], I(11));
+      R(e, f, g, h, a, b, c, d, K[12], I(12));
+      R(d, e, f, g, h, a, b, c, K[13], I(13));
+      R(c, d, e, f, g, h, a, b, K[14], I(14));
+      R(b, c, d, e, f, g, h, a, K[15], I(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      hd->h[0] += a;
+      hd->h[1] += b;
+      hd->h[2] += c;
+      hd->h[3] += d;
+      hd->h[4] += e;
+      hd->h[5] += f;
+      hd->h[6] += g;
+      hd->h[7] += h;
+
+      data += 64;
     }
+  while (--nblks);
 
-  hd->h0 += a;
-  hd->h1 += b;
-  hd->h2 += c;
-  hd->h3 += d;
-  hd->h4 += e;
-  hd->h5 += f;
-  hd->h6 += g;
-  hd->h7 += h;
+  return 26*4 + 32 + 3 * sizeof(void*);
 }
+
 #undef S0
 #undef S1
 #undef R
 
 
-/* Update the message digest with the contents of INBUF with length
-  INLEN.  */
-static void
-sha256_write (void *context, const void *inbuf_arg, size_t inlen)
-{
-  const unsigned char *inbuf = inbuf_arg;
-  SHA256_CONTEXT *hd = context;
-
-  if (hd->count == 64)
-    { /* flush the buffer */
-      transform (hd, hd->buf);
-      _gcry_burn_stack (74*4+32);
-      hd->count = 0;
-      hd->nblocks++;
-    }
-  if (!inbuf)
-    return;
-  if (hd->count)
-    {
-      for (; inlen && hd->count < 64; inlen--)
-        hd->buf[hd->count++] = *inbuf++;
-      sha256_write (hd, NULL, 0);
-      if (!inlen)
-        return;
-    }
-
-  while (inlen >= 64)
-    {
-      transform (hd, inbuf);
-      hd->count = 0;
-      hd->nblocks++;
-      inlen -= 64;
-      inbuf += 64;
-    }
-  _gcry_burn_stack (74*4+32);
-  for (; inlen && hd->count < 64; inlen--)
-    hd->buf[hd->count++] = *inbuf++;
-}
-
-
 /*
    The routine finally terminates the computation and returns the
    digest.  The handle is prepared for a new cycle, but adding bytes
@@ -309,18 +548,22 @@ static void
 sha256_final(void *context)
 {
   SHA256_CONTEXT *hd = context;
-  u32 t, msb, lsb;
+  u32 t, th, msb, lsb;
   byte *p;
+  unsigned int burn;
 
-  sha256_write (hd, NULL, 0); /* flush */;
+  t = hd->bctx.nblocks;
+  if (sizeof t == sizeof hd->bctx.nblocks)
+    th = hd->bctx.nblocks_high;
+  else
+    th = hd->bctx.nblocks >> 32;
 
-  t = hd->nblocks;
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
-  msb = t >> 26;
+  msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
-  if ((lsb += hd->count) < t)
+  if ((lsb += hd->bctx.count) < t)
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
@@ -328,39 +571,39 @@ sha256_final(void *context)
   msb <<= 3;
   msb |= t >> 29;
 
-  if (hd->count < 56)
-    { /* enough room */
-      hd->buf[hd->count++] = 0x80; /* pad */
-      while (hd->count < 56)
-        hd->buf[hd->count++] = 0;  /* pad */
-    }
-  else
-    { /* need one extra block */
-      hd->buf[hd->count++] = 0x80; /* pad character */
-      while (hd->count < 64)
-        hd->buf[hd->count++] = 0;
-      sha256_write (hd, NULL, 0);  /* flush */;
-      memset (hd->buf, 0, 56 ); /* fill next block with zeroes */
+  if (0)
+    { }
+#ifdef USE_S390X_CRYPTO
+  else if (hd->use_s390x_crypto)
+    {
+      burn = do_sha256_final_s390x (hd, hd->bctx.buf, hd->bctx.count, msb, 
lsb);
     }
-  /* append the 64 bit count */
-  hd->buf[56] = msb >> 24;
-  hd->buf[57] = msb >> 16;
-  hd->buf[58] = msb >>  8;
-  hd->buf[59] = msb;
-  hd->buf[60] = lsb >> 24;
-  hd->buf[61] = lsb >> 16;
-  hd->buf[62] = lsb >>  8;
-  hd->buf[63] = lsb;
-  transform (hd, hd->buf);
-  _gcry_burn_stack (74*4+32);
-
-  p = hd->buf;
-#ifdef WORDS_BIGENDIAN
-#define X(a) do { *(u32*)p = hd->h##a ; p += 4; } while(0)
-#else /* little endian */
-#define X(a) do { *p++ = hd->h##a >> 24; *p++ = hd->h##a >> 16;         \
-                 *p++ = hd->h##a >> 8; *p++ = hd->h##a; } while(0)
 #endif
+  else if (hd->bctx.count < 56)  /* enough room */
+    {
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+      if (hd->bctx.count < 56)
+       memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+      /* append the 64 bit count */
+      buf_put_be32(hd->bctx.buf + 56, msb);
+      buf_put_be32(hd->bctx.buf + 60, lsb);
+      burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
+    }
+  else  /* need one extra block */
+    {
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+      /* fill pad and next block with zeroes */
+      memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+      /* append the 64 bit count */
+      buf_put_be32(hd->bctx.buf + 64 + 56, msb);
+      buf_put_be32(hd->bctx.buf + 64 + 60, lsb);
+      burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 2);
+    }
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_be32(p, hd->h[a]); p += 4; } while(0)
   X(0);
   X(1);
   X(2);
@@ -370,6 +613,10 @@ sha256_final(void *context)
   X(6);
   X(7);
 #undef X
+
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
 }
 
 static byte *
@@ -377,7 +624,45 @@ sha256_read (void *context)
 {
   SHA256_CONTEXT *hd = context;
 
-  return hd->buf;
+  return hd->bctx.buf;
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer iov
+ * into outbuf which must have a size of 32 bytes.  */
+static void
+_gcry_sha256_hash_buffers (void *outbuf, size_t nbytes,
+                          const gcry_buffer_t *iov, int iovcnt)
+{
+  SHA256_CONTEXT hd;
+
+  (void)nbytes;
+
+  sha256_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
+  sha256_final (&hd);
+  memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer iov
+ * into outbuf which must have a size of 28 bytes.  */
+static void
+_gcry_sha224_hash_buffers (void *outbuf, size_t nbytes,
+                          const gcry_buffer_t *iov, int iovcnt)
+{
+  SHA256_CONTEXT hd;
+
+  (void)nbytes;
+
+  sha224_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
+  sha256_final (&hd);
+  memcpy (outbuf, hd.bctx.buf, 28);
 }
 
 
@@ -503,52 +788,54 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
 
 
 
-static byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */
+static const byte asn224[19] = /* Object ID is 2.16.840.1.101.3.4.2.4 */
   { 0x30, 0x2D, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86, 0x48,
     0x01, 0x65, 0x03, 0x04, 0x02, 0x04, 0x05, 0x00, 0x04,
     0x1C
   };
 
-static gcry_md_oid_spec_t oid_spec_sha224[] =
+static const gcry_md_oid_spec_t oid_spec_sha224[] =
   {
     /* From RFC3874, Section 4 */
     { "2.16.840.1.101.3.4.2.4" },
+    /* ANSI X9.62  ecdsaWithSHA224 */
+    { "1.2.840.10045.4.3.1" },
     { NULL },
   };
 
-static byte asn256[19] = /* Object ID is  2.16.840.1.101.3.4.2.1 */
+static const byte asn256[19] = /* Object ID is  2.16.840.1.101.3.4.2.1 */
   { 0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
     0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x01, 0x05,
     0x00, 0x04, 0x20 };
 
-static gcry_md_oid_spec_t oid_spec_sha256[] =
+static const gcry_md_oid_spec_t oid_spec_sha256[] =
   {
     /* According to the OpenPGP draft rfc2440-bis06 */
     { "2.16.840.1.101.3.4.2.1" },
     /* PKCS#1 sha256WithRSAEncryption */
     { "1.2.840.113549.1.1.11" },
+    /* ANSI X9.62  ecdsaWithSHA256 */
+    { "1.2.840.10045.4.3.2" },
 
     { NULL },
   };
 
-gcry_md_spec_t _gcry_digest_spec_sha224 =
+const gcry_md_spec_t _gcry_digest_spec_sha224 =
   {
+    GCRY_MD_SHA224, {0, 1},
     "SHA224", asn224, DIM (asn224), oid_spec_sha224, 28,
-    sha224_init, sha256_write, sha256_final, sha256_read,
-    sizeof (SHA256_CONTEXT)
-  };
-md_extra_spec_t _gcry_digest_extraspec_sha224 =
-  {
+    sha224_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
+    _gcry_sha224_hash_buffers,
+    sizeof (SHA256_CONTEXT),
     run_selftests
   };
 
-gcry_md_spec_t _gcry_digest_spec_sha256 =
+const gcry_md_spec_t _gcry_digest_spec_sha256 =
   {
+    GCRY_MD_SHA256, {0, 1},
     "SHA256", asn256, DIM (asn256), oid_spec_sha256, 32,
-    sha256_init, sha256_write, sha256_final, sha256_read,
-    sizeof (SHA256_CONTEXT)
-  };
-md_extra_spec_t _gcry_digest_extraspec_sha256 =
-  {
+    sha256_init, _gcry_md_block_write, sha256_final, sha256_read, NULL,
+    _gcry_sha256_hash_buffers,
+    sizeof (SHA256_CONTEXT),
     run_selftests
   };
diff --git a/grub-core/lib/libgcrypt/cipher/sha512-arm.S 
b/grub-core/lib/libgcrypt/cipher/sha512-arm.S
new file mode 100644
index 000000000..94ec0141e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha512-arm.S
@@ -0,0 +1,464 @@
+/* sha512-arm.S  -  ARM assembly implementation of SHA-512 transform
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+#define hd_h ((hd_g) + 8)
+
+/* register macros */
+#define RK    %r2
+
+#define RElo %r0
+#define REhi %r1
+
+#define RT1lo %r3
+#define RT1hi %r4
+#define RT2lo %r5
+#define RT2hi %r6
+#define RWlo  %r7
+#define RWhi  %r8
+#define RT3lo %r9
+#define RT3hi %r10
+#define RT4lo %r11
+#define RT4hi %ip
+
+#define RRND  %lr
+
+/* variable offsets in stack */
+#define ctx (0)
+#define data ((ctx) + 4)
+#define nblks ((data) + 4)
+#define _a ((nblks) + 4)
+#define _b ((_a) + 8)
+#define _c ((_b) + 8)
+#define _d ((_c) + 8)
+#define _e ((_d) + 8)
+#define _f ((_e) + 8)
+#define _g ((_f) + 8)
+#define _h ((_g) + 8)
+
+#define w(i) ((_h) + 8 + ((i) % 16) * 8)
+
+#define STACK_MAX (w(15) + 8)
+
+/* helper macros */
+#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
+    ldrb rout, [rsrc, #((offs) + 3)]; \
+    ldrb rtmp, [rsrc, #((offs) + 2)]; \
+    orr rout, rout, rtmp, lsl #8; \
+    ldrb rtmp, [rsrc, #((offs) + 1)]; \
+    orr rout, rout, rtmp, lsl #16; \
+    ldrb rtmp, [rsrc, #((offs) + 0)]; \
+    orr rout, rout, rtmp, lsl #24;
+
+#ifdef __ARMEL__
+    /* bswap on little-endian */
+#ifdef HAVE_ARM_ARCH_V6
+    #define be_to_host(reg, rtmp) \
+       rev reg, reg;
+#else
+    #define be_to_host(reg, rtmp) \
+       eor rtmp, reg, reg, ror #16; \
+       mov rtmp, rtmp, lsr #8; \
+       bic rtmp, rtmp, #65280; \
+       eor reg, rtmp, reg, ror #8;
+#endif
+#else
+    /* nop on big-endian */
+    #define be_to_host(reg, rtmp) /*_*/
+#endif
+
+#define host_to_host(x, y) /*_*/
+
+#define read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, 
convert, rtmp) \
+    ldr lo0, [rin, #((offs) + 0 * 8 + 4)]; \
+    ldr hi0, [rin, #((offs) + 0 * 8 + 0)]; \
+    ldr lo1, [rin, #((offs) + 1 * 8 + 4)]; \
+    ldr hi1, [rin, #((offs) + 1 * 8 + 0)]; \
+    ldr lo2, [rin, #((offs) + 2 * 8 + 4)]; \
+    convert(lo0, rtmp); \
+    ldr hi2, [rin, #((offs) + 2 * 8 + 0)]; \
+    convert(hi0, rtmp); \
+    ldr lo3, [rin, #((offs) + 3 * 8 + 4)]; \
+    convert(lo1, rtmp); \
+    ldr hi3, [rin, #((offs) + 3 * 8 + 0)]; \
+    convert(hi1, rtmp); \
+    convert(lo2, rtmp); \
+    convert(hi2, rtmp); \
+    convert(lo3, rtmp); \
+    convert(hi3, rtmp);
+
+#define read_be64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, 
rtmp0) \
+    read_u64_aligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, hi3, 
be_to_host, rtmp0)
+
+/* need to handle unaligned reads by byte reads */
+#define read_be64_unaligned_4(rin, offs, lo0, hi0, lo1, hi1, lo2, hi2, lo3, 
hi3, rtmp0) \
+    ldr_unaligned_be(lo0, rin, (offs) + 0 * 8 + 4, rtmp0); \
+    ldr_unaligned_be(hi0, rin, (offs) + 0 * 8 + 0, rtmp0); \
+    ldr_unaligned_be(lo1, rin, (offs) + 1 * 8 + 4, rtmp0); \
+    ldr_unaligned_be(hi1, rin, (offs) + 1 * 8 + 0, rtmp0); \
+    ldr_unaligned_be(lo2, rin, (offs) + 2 * 8 + 4, rtmp0); \
+    ldr_unaligned_be(hi2, rin, (offs) + 2 * 8 + 0, rtmp0); \
+    ldr_unaligned_be(lo3, rin, (offs) + 3 * 8 + 4, rtmp0); \
+    ldr_unaligned_be(hi3, rin, (offs) + 3 * 8 + 0, rtmp0);
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+
+/* Round function */
+
+#define R(_a,_b,_c,_d,_e,_f,_g,_h,W,wi) \
+    /* Message expansion, t1 = _h + w[i] */ \
+    W(_a,_h,wi); \
+    \
+    /* w = Sum1(_e) */ \
+    mov RWlo, RElo, lsr#14; \
+    ldm RK!, {RT2lo-RT2hi}; \
+    mov RWhi, REhi, lsr#14; \
+    eor RWlo, RWlo, RElo, lsr#18; \
+    eor RWhi, RWhi, REhi, lsr#18; \
+    ldr RT3lo, [%sp, #(_f)]; \
+    adds RT1lo, RT2lo; /* t1 += K */ \
+    ldr RT3hi, [%sp, #(_f) + 4]; \
+    adc RT1hi, RT2hi; \
+    ldr RT4lo, [%sp, #(_g)]; \
+    eor RWlo, RWlo, RElo, lsl#23; \
+    ldr RT4hi, [%sp, #(_g) + 4]; \
+    eor RWhi, RWhi, REhi, lsl#23; \
+    eor RWlo, RWlo, REhi, lsl#18; \
+    eor RWhi, RWhi, RElo, lsl#18; \
+    eor RWlo, RWlo, REhi, lsl#14; \
+    eor RWhi, RWhi, RElo, lsl#14; \
+    eor RWlo, RWlo, REhi, lsr#9; \
+    eor RWhi, RWhi, RElo, lsr#9; \
+    \
+    /* Cho(_e,_f,_g) => (_e & _f) ^ (~_e & _g) */ \
+    adds RT1lo, RWlo; /* t1 += Sum1(_e) */ \
+    and RT3lo, RT3lo, RElo; \
+    adc RT1hi, RWhi; \
+    and RT3hi, RT3hi, REhi; \
+    bic RT4lo, RT4lo, RElo; \
+    bic RT4hi, RT4hi, REhi; \
+    eor RT3lo, RT3lo, RT4lo; \
+    eor RT3hi, RT3hi, RT4hi; \
+    \
+    /* Load D */ \
+    /* t1 += Cho(_e,_f,_g) */ \
+    ldr RElo, [%sp, #(_d)]; \
+    adds RT1lo, RT3lo; \
+    ldr REhi, [%sp, #(_d) + 4]; \
+    adc RT1hi, RT3hi; \
+    \
+    /* Load A */ \
+    ldr RT3lo, [%sp, #(_a)]; \
+    \
+    /* _d += t1 */ \
+    adds RElo, RT1lo; \
+    ldr RT3hi, [%sp, #(_a) + 4]; \
+    adc REhi, RT1hi; \
+    \
+    /* Store D */ \
+    str RElo, [%sp, #(_d)]; \
+    \
+    /* t2 = Sum0(_a) */ \
+    mov RT2lo, RT3lo, lsr#28; \
+    str REhi, [%sp, #(_d) + 4]; \
+    mov RT2hi, RT3hi, lsr#28; \
+    ldr RWlo, [%sp, #(_b)]; \
+    eor RT2lo, RT2lo, RT3lo, lsl#30; \
+    ldr RWhi, [%sp, #(_b) + 4]; \
+    eor RT2hi, RT2hi, RT3hi, lsl#30; \
+    eor RT2lo, RT2lo, RT3lo, lsl#25; \
+    eor RT2hi, RT2hi, RT3hi, lsl#25; \
+    eor RT2lo, RT2lo, RT3hi, lsl#4; \
+    eor RT2hi, RT2hi, RT3lo, lsl#4; \
+    eor RT2lo, RT2lo, RT3hi, lsr#2; \
+    eor RT2hi, RT2hi, RT3lo, lsr#2; \
+    eor RT2lo, RT2lo, RT3hi, lsr#7; \
+    eor RT2hi, RT2hi, RT3lo, lsr#7; \
+    \
+    /* t2 += t1 */ \
+    adds RT2lo, RT1lo; \
+    ldr RT1lo, [%sp, #(_c)]; \
+    adc RT2hi, RT1hi; \
+    \
+    /* Maj(_a,_b,_c) => ((_a & _b) ^ (_c & (_a ^ _b))) */ \
+    ldr RT1hi, [%sp, #(_c) + 4]; \
+    and RT4lo, RWlo, RT3lo; \
+    and RT4hi, RWhi, RT3hi; \
+    eor RWlo, RWlo, RT3lo; \
+    eor RWhi, RWhi, RT3hi; \
+    and RWlo, RWlo, RT1lo; \
+    and RWhi, RWhi, RT1hi; \
+    eor RWlo, RWlo, RT4lo; \
+    eor RWhi, RWhi, RT4hi; \
+
+/* Message expansion */
+
+#define W_0_63(_a,_h,i) \
+    ldr RT3lo, [%sp, #(w(i-2))]; \
+    adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
+    ldr RT3hi, [%sp, #(w(i-2)) + 4]; \
+    adc RT2hi, RWhi; \
+    /* nw = S1(w[i-2]) */ \
+    ldr RT1lo, [%sp, #(_h)]; /* Load H */ \
+    mov RWlo, RT3lo, lsr#19; \
+    str RT2lo, [%sp, #(_a)]; \
+    eor RWlo, RWlo, RT3lo, lsl#3; \
+    ldr RT1hi, [%sp, #(_h) + 4]; \
+    mov RWhi, RT3hi, lsr#19; \
+    ldr RT2lo, [%sp, #(w(i-7))]; \
+    eor RWhi, RWhi, RT3hi, lsl#3; \
+    str RT2hi, [%sp, #(_a) + 4]; \
+    eor RWlo, RWlo, RT3lo, lsr#6; \
+    ldr RT2hi, [%sp, #(w(i-7)) + 4]; \
+    eor RWhi, RWhi, RT3hi, lsr#6; \
+    eor RWlo, RWlo, RT3hi, lsl#13; \
+    eor RWhi, RWhi, RT3lo, lsl#13; \
+    eor RWlo, RWlo, RT3hi, lsr#29; \
+    eor RWhi, RWhi, RT3lo, lsr#29; \
+    ldr RT3lo, [%sp, #(w(i-15))]; \
+    eor RWlo, RWlo, RT3hi, lsl#26; \
+    ldr RT3hi, [%sp, #(w(i-15)) + 4]; \
+    \
+    adds RT2lo, RWlo; /* nw += w[i-7] */ \
+    ldr RWlo, [%sp, #(w(i-16))]; \
+    adc RT2hi, RWhi; \
+    mov RT4lo, RT3lo, lsr#1; /* S0(w[i-15]) */ \
+    ldr RWhi, [%sp, #(w(i-16)) + 4]; \
+    mov RT4hi, RT3hi, lsr#1; \
+    adds RT2lo, RWlo; /* nw += w[i-16] */ \
+    eor RT4lo, RT4lo, RT3lo, lsr#8; \
+    eor RT4hi, RT4hi, RT3hi, lsr#8; \
+    eor RT4lo, RT4lo, RT3lo, lsr#7; \
+    eor RT4hi, RT4hi, RT3hi, lsr#7; \
+    eor RT4lo, RT4lo, RT3hi, lsl#31; \
+    eor RT4hi, RT4hi, RT3lo, lsl#31; \
+    eor RT4lo, RT4lo, RT3hi, lsl#24; \
+    eor RT4hi, RT4hi, RT3lo, lsl#24; \
+    eor RT4lo, RT4lo, RT3hi, lsl#25; \
+    adc RT2hi, RWhi; \
+    \
+    /* nw += S0(w[i-15]) */ \
+    adds RT2lo, RT4lo; \
+    adc RT2hi, RT4hi; \
+    \
+    /* w[0] = nw */ \
+    str RT2lo, [%sp, #(w(i))]; \
+    adds RT1lo, RWlo; \
+    str RT2hi, [%sp, #(w(i)) + 4]; \
+    adc RT1hi, RWhi;
+
+#define W_64_79(_a,_h,i) \
+    adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */ \
+    ldr RWlo, [%sp, #(w(i-16))]; \
+    adc RT2hi, RWhi; \
+    ldr RWhi, [%sp, #(w(i-16)) + 4]; \
+    ldr RT1lo, [%sp, #(_h)]; /* Load H */ \
+    ldr RT1hi, [%sp, #(_h) + 4]; \
+    str RT2lo, [%sp, #(_a)]; \
+    str RT2hi, [%sp, #(_a) + 4]; \
+    adds RT1lo, RWlo; \
+    adc RT1hi, RWhi;
+
+.align 3
+.globl _gcry_sha512_transform_arm
+.type  _gcry_sha512_transform_arm,%function;
+
+_gcry_sha512_transform_arm:
+       /* Input:
+        *      %r0: SHA512_CONTEXT
+        *      %r1: data
+        *      %r2: u64 k[] constants
+        *      %r3: nblks
+        */
+       push {%r4-%r11, %ip, %lr};
+       sub %sp, %sp, #STACK_MAX;
+       movs RWlo, %r3;
+       str %r0, [%sp, #(ctx)];
+
+       beq .Ldone;
+
+.Loop_blocks:
+       str RWlo, [%sp, #nblks];
+
+       /* Load context to stack */
+       add RWhi, %sp, #(_a);
+       ldm %r0!,  {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+       stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+       ldm %r0,  {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+       stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       /* Load input to w[16] */
+
+       /* test if data is unaligned */
+       tst %r1, #3;
+       beq 1f;
+
+       /* unaligned load */
+       add RWhi, %sp, #(w(0));
+       read_be64_unaligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+       stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       read_be64_unaligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+       stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       read_be64_unaligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+       stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       read_be64_unaligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+       b 2f;
+1:
+       /* aligned load */
+       add RWhi, %sp, #(w(0));
+       read_be64_aligned_4(%r1, 0 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+       stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       read_be64_aligned_4(%r1, 4 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+       stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       read_be64_aligned_4(%r1, 8 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+       stm RWhi!, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       read_be64_aligned_4(%r1, 12 * 8, RT1lo, RT1hi, RT2lo, RT2hi, RT3lo, 
RT3hi, RT4lo, RT4hi, RWlo);
+2:
+       add %r1, #(16 * 8);
+       stm RWhi, {RT1lo,RT1hi,RT2lo,RT2hi,RT3lo,RT3hi,RT4lo,RT4hi}
+       str %r1, [%sp, #(data)];
+
+       /* preload E & A */
+       ldr RElo, [%sp, #(_e)];
+       ldr REhi, [%sp, #(_e) + 4];
+       mov RWlo, #0;
+       ldr RT2lo, [%sp, #(_a)];
+       mov RRND, #(80-16);
+       ldr RT2hi, [%sp, #(_a) + 4];
+       mov RWhi, #0;
+
+.Loop_rounds:
+       R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 16);
+       R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 17);
+       R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 18);
+       R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 19);
+       R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 20);
+       R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 21);
+       R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 22);
+       R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 23);
+       R(_a, _b, _c, _d, _e, _f, _g, _h, W_0_63, 24);
+       R(_h, _a, _b, _c, _d, _e, _f, _g, W_0_63, 25);
+       R(_g, _h, _a, _b, _c, _d, _e, _f, W_0_63, 26);
+       R(_f, _g, _h, _a, _b, _c, _d, _e, W_0_63, 27);
+       R(_e, _f, _g, _h, _a, _b, _c, _d, W_0_63, 28);
+       R(_d, _e, _f, _g, _h, _a, _b, _c, W_0_63, 29);
+       R(_c, _d, _e, _f, _g, _h, _a, _b, W_0_63, 30);
+       R(_b, _c, _d, _e, _f, _g, _h, _a, W_0_63, 31);
+
+       subs RRND, #16;
+       bne .Loop_rounds;
+
+       R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 16);
+       R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 17);
+       R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 18);
+       R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 19);
+       R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 20);
+       R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 21);
+       R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 22);
+       R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 23);
+       R(_a, _b, _c, _d, _e, _f, _g, _h, W_64_79, 24);
+       R(_h, _a, _b, _c, _d, _e, _f, _g, W_64_79, 25);
+       R(_g, _h, _a, _b, _c, _d, _e, _f, W_64_79, 26);
+       R(_f, _g, _h, _a, _b, _c, _d, _e, W_64_79, 27);
+       R(_e, _f, _g, _h, _a, _b, _c, _d, W_64_79, 28);
+       R(_d, _e, _f, _g, _h, _a, _b, _c, W_64_79, 29);
+       R(_c, _d, _e, _f, _g, _h, _a, _b, W_64_79, 30);
+       R(_b, _c, _d, _e, _f, _g, _h, _a, W_64_79, 31);
+
+       ldr %r0, [%sp, #(ctx)];
+       adds RT2lo, RWlo; /* _h = t2 + Maj(_a,_b,_c) */
+       ldr %r1, [%sp, #(data)];
+       adc RT2hi, RWhi;
+
+       ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+       adds RT1lo, RT2lo;
+       ldr RT2lo, [%sp, #(_b + 0)];
+       adc  RT1hi, RT2hi;
+       ldr RT2hi, [%sp, #(_b + 4)];
+       adds RWlo, RT2lo;
+       ldr RT2lo, [%sp, #(_c + 0)];
+       adc  RWhi, RT2hi;
+       ldr RT2hi, [%sp, #(_c + 4)];
+       adds RT3lo, RT2lo;
+       ldr RT2lo, [%sp, #(_d + 0)];
+       adc  RT3hi, RT2hi;
+       ldr RT2hi, [%sp, #(_d + 4)];
+       adds RT4lo, RT2lo;
+       ldr RT2lo, [%sp, #(_e + 0)];
+       adc  RT4hi, RT2hi;
+       stm %r0!, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+
+       ldr RT2hi, [%sp, #(_e + 4)];
+       ldm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+       adds RT1lo, RT2lo;
+       ldr RT2lo, [%sp, #(_f + 0)];
+       adc  RT1hi, RT2hi;
+       ldr RT2hi, [%sp, #(_f + 4)];
+       adds RWlo, RT2lo;
+       ldr RT2lo, [%sp, #(_g + 0)];
+       adc  RWhi, RT2hi;
+       ldr RT2hi, [%sp, #(_g + 4)];
+       adds RT3lo, RT2lo;
+       ldr RT2lo, [%sp, #(_h + 0)];
+       adc  RT3hi, RT2hi;
+       ldr RT2hi, [%sp, #(_h + 4)];
+       adds RT4lo, RT2lo;
+       adc  RT4hi, RT2hi;
+       stm %r0, {RT1lo,RT1hi,RWlo,RWhi,RT3lo,RT3hi,RT4lo,RT4hi}
+       sub %r0, %r0, #(4 * 8);
+       ldr RWlo, [%sp, #nblks];
+
+       sub RK, #(80 * 8);
+       subs RWlo, #1;
+       bne .Loop_blocks;
+
+.Ldone:
+       mov %r0, #STACK_MAX;
+__out:
+       add %sp, %sp, #STACK_MAX;
+       pop {%r4-%r11, %ip, %pc};
+.size _gcry_sha512_transform_arm,.-_gcry_sha512_transform_arm;
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha512-armv7-neon.S 
b/grub-core/lib/libgcrypt/cipher/sha512-armv7-neon.S
new file mode 100644
index 000000000..2b186b477
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha512-armv7-neon.S
@@ -0,0 +1,452 @@
+/* sha512-armv7-neon.S  -  ARM/NEON assembly implementation of SHA-512 
transform
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \
+    defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_NEON)
+
+.text
+
+.syntax unified
+.fpu neon
+.arm
+
+/* structure of SHA512_CONTEXT */
+#define hd_a 0
+#define hd_b ((hd_a) + 8)
+#define hd_c ((hd_b) + 8)
+#define hd_d ((hd_c) + 8)
+#define hd_e ((hd_d) + 8)
+#define hd_f ((hd_e) + 8)
+#define hd_g ((hd_f) + 8)
+
+/* register macros */
+#define RK %r2
+
+#define RA d0
+#define RB d1
+#define RC d2
+#define RD d3
+#define RE d4
+#define RF d5
+#define RG d6
+#define RH d7
+
+#define RT0 d8
+#define RT1 d9
+#define RT2 d10
+#define RT3 d11
+#define RT4 d12
+#define RT5 d13
+#define RT6 d14
+#define RT7 d15
+
+#define RT01q q4
+#define RT23q q5
+#define RT45q q6
+#define RT67q q7
+
+#define RW0 d16
+#define RW1 d17
+#define RW2 d18
+#define RW3 d19
+#define RW4 d20
+#define RW5 d21
+#define RW6 d22
+#define RW7 d23
+#define RW8 d24
+#define RW9 d25
+#define RW10 d26
+#define RW11 d27
+#define RW12 d28
+#define RW13 d29
+#define RW14 d30
+#define RW15 d31
+
+#define RW01q q8
+#define RW23q q9
+#define RW45q q10
+#define RW67q q11
+#define RW89q q12
+#define RW1011q q13
+#define RW1213q q14
+#define RW1415q q15
+
+#define CLEAR_REG(reg) vmov.i8 reg, #0;
+
+/***********************************************************************
+ * ARM assembly implementation of sha512 transform
+ ***********************************************************************/
+#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, 
rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
+       /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+       vshr.u64 RT2, re, #14; \
+       vshl.u64 RT3, re, #64 - 14; \
+       interleave_op(arg1); \
+       vshr.u64 RT4, re, #18; \
+       vshl.u64 RT5, re, #64 - 18; \
+       vld1.64 {RT0}, [RK]!; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, re, #41; \
+       vshl.u64 RT5, re, #64 - 41; \
+       vadd.u64 RT0, RT0, rw0; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vmov.64 RT7, re; \
+       veor.64 RT1, RT2, RT3; \
+       vbsl.64 RT7, rf, rg; \
+       \
+       vadd.u64 RT1, RT1, rh; \
+       vshr.u64 RT2, ra, #28; \
+       vshl.u64 RT3, ra, #64 - 28; \
+       vadd.u64 RT1, RT1, RT0; \
+       vshr.u64 RT4, ra, #34; \
+       vshl.u64 RT5, ra, #64 - 34; \
+       vadd.u64 RT1, RT1, RT7; \
+       \
+       /* h = Sum0 (a) + Maj (a, b, c); */ \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, ra, #39; \
+       vshl.u64 RT5, ra, #64 - 39; \
+       veor.64 RT0, ra, rb; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vbsl.64 RT0, rc, rb; \
+       vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+       veor.64 rh, RT2, RT3; \
+       \
+       /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+       vshr.u64 RT2, rd, #14; \
+       vshl.u64 RT3, rd, #64 - 14; \
+       vadd.u64 rh, rh, RT0; \
+       vshr.u64 RT4, rd, #18; \
+       vshl.u64 RT5, rd, #64 - 18; \
+       vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+       vld1.64 {RT0}, [RK]!; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, rd, #41; \
+       vshl.u64 RT5, rd, #64 - 41; \
+       vadd.u64 RT0, RT0, rw1; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vmov.64 RT7, rd; \
+       veor.64 RT1, RT2, RT3; \
+       vbsl.64 RT7, re, rf; \
+       \
+       vadd.u64 RT1, RT1, rg; \
+       vshr.u64 RT2, rh, #28; \
+       vshl.u64 RT3, rh, #64 - 28; \
+       vadd.u64 RT1, RT1, RT0; \
+       vshr.u64 RT4, rh, #34; \
+       vshl.u64 RT5, rh, #64 - 34; \
+       vadd.u64 RT1, RT1, RT7; \
+       \
+       /* g = Sum0 (h) + Maj (h, a, b); */ \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, rh, #39; \
+       vshl.u64 RT5, rh, #64 - 39; \
+       veor.64 RT0, rh, ra; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vbsl.64 RT0, rb, ra; \
+       vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+       veor.64 rg, RT2, RT3; \
+       \
+       /* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
+       /* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
+       \
+       /**** S0(w[1:2]) */ \
+       \
+       /* w[0:1] += w[9:10] */ \
+       /* RT23q = rw1:rw2 */ \
+       vext.u64 RT23q, rw01q, rw23q, #1; \
+       vadd.u64 rw0, rw9; \
+       vadd.u64 rg, rg, RT0; \
+       vadd.u64 rw1, rw10;\
+       vadd.u64 rg, rg, RT1; /* g+=t1; */ \
+       \
+       vshr.u64 RT45q, RT23q, #1; \
+       vshl.u64 RT67q, RT23q, #64 - 1; \
+       vshr.u64 RT01q, RT23q, #8; \
+       veor.u64 RT45q, RT45q, RT67q; \
+       vshl.u64 RT67q, RT23q, #64 - 8; \
+       veor.u64 RT45q, RT45q, RT01q; \
+       vshr.u64 RT01q, RT23q, #7; \
+       veor.u64 RT45q, RT45q, RT67q; \
+       \
+       /**** S1(w[14:15]) */ \
+       vshr.u64 RT23q, rw1415q, #6; \
+       veor.u64 RT01q, RT01q, RT45q; \
+       vshr.u64 RT45q, rw1415q, #19; \
+       vshl.u64 RT67q, rw1415q, #64 - 19; \
+       veor.u64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT45q, rw1415q, #61; \
+       veor.u64 RT23q, RT23q, RT67q; \
+       vshl.u64 RT67q, rw1415q, #64 - 61; \
+       veor.u64 RT23q, RT23q, RT45q; \
+       vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
+       veor.u64 RT01q, RT23q, RT67q;
+#define vadd_RT01q(rw01q) \
+       /* w[0:1] += S(w[14:15]) */ \
+       vadd.u64 rw01q, RT01q;
+
+#define dummy(_) /*_*/
+
+#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, 
interleave_op1, arg1, interleave_op2, arg2) \
+       /* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
+       vshr.u64 RT2, re, #14; \
+       vshl.u64 RT3, re, #64 - 14; \
+       interleave_op1(arg1); \
+       vshr.u64 RT4, re, #18; \
+       vshl.u64 RT5, re, #64 - 18; \
+       interleave_op2(arg2); \
+       vld1.64 {RT0}, [RK]!; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, re, #41; \
+       vshl.u64 RT5, re, #64 - 41; \
+       vadd.u64 RT0, RT0, rw0; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vmov.64 RT7, re; \
+       veor.64 RT1, RT2, RT3; \
+       vbsl.64 RT7, rf, rg; \
+       \
+       vadd.u64 RT1, RT1, rh; \
+       vshr.u64 RT2, ra, #28; \
+       vshl.u64 RT3, ra, #64 - 28; \
+       vadd.u64 RT1, RT1, RT0; \
+       vshr.u64 RT4, ra, #34; \
+       vshl.u64 RT5, ra, #64 - 34; \
+       vadd.u64 RT1, RT1, RT7; \
+       \
+       /* h = Sum0 (a) + Maj (a, b, c); */ \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, ra, #39; \
+       vshl.u64 RT5, ra, #64 - 39; \
+       veor.64 RT0, ra, rb; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vbsl.64 RT0, rc, rb; \
+       vadd.u64 rd, rd, RT1; /* d+=t1; */ \
+       veor.64 rh, RT2, RT3; \
+       \
+       /* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
+       vshr.u64 RT2, rd, #14; \
+       vshl.u64 RT3, rd, #64 - 14; \
+       vadd.u64 rh, rh, RT0; \
+       vshr.u64 RT4, rd, #18; \
+       vshl.u64 RT5, rd, #64 - 18; \
+       vadd.u64 rh, rh, RT1; /* h+=t1; */ \
+       vld1.64 {RT0}, [RK]!; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, rd, #41; \
+       vshl.u64 RT5, rd, #64 - 41; \
+       vadd.u64 RT0, RT0, rw1; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vmov.64 RT7, rd; \
+       veor.64 RT1, RT2, RT3; \
+       vbsl.64 RT7, re, rf; \
+       \
+       vadd.u64 RT1, RT1, rg; \
+       vshr.u64 RT2, rh, #28; \
+       vshl.u64 RT3, rh, #64 - 28; \
+       vadd.u64 RT1, RT1, RT0; \
+       vshr.u64 RT4, rh, #34; \
+       vshl.u64 RT5, rh, #64 - 34; \
+       vadd.u64 RT1, RT1, RT7; \
+       \
+       /* g = Sum0 (h) + Maj (h, a, b); */ \
+       veor.64 RT23q, RT23q, RT45q; \
+       vshr.u64 RT4, rh, #39; \
+       vshl.u64 RT5, rh, #64 - 39; \
+       veor.64 RT0, rh, ra; \
+       veor.64 RT23q, RT23q, RT45q; \
+       vbsl.64 RT0, rb, ra; \
+       vadd.u64 rc, rc, RT1; /* c+=t1; */ \
+       veor.64 rg, RT2, RT3;
+#define vadd_rg_RT0(rg) \
+       vadd.u64 rg, rg, RT0;
+#define vadd_rg_RT1(rg) \
+       vadd.u64 rg, rg, RT1; /* g+=t1; */
+
+.align 3
+.globl _gcry_sha512_transform_armv7_neon
+.type  _gcry_sha512_transform_armv7_neon,%function;
+
+_gcry_sha512_transform_armv7_neon:
+       /* Input:
+        *      %r0: SHA512_CONTEXT
+        *      %r1: data
+        *      %r2: u64 k[] constants
+        *      %r3: nblks
+        */
+       push {%lr};
+
+       mov %lr, #0;
+
+       /* Load context to d0-d7 */
+       vld1.64 {RA-RD}, [%r0]!;
+       vld1.64 {RE-RH}, [%r0];
+       sub %r0, #(4*8);
+
+       /* Load input to w[16], d16-d31 */
+       /* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
+       vld1.64 {RW0-RW3}, [%r1]!;
+       vld1.64 {RW4-RW7}, [%r1]!;
+       vld1.64 {RW8-RW11}, [%r1]!;
+       vld1.64 {RW12-RW15}, [%r1]!;
+#ifdef __ARMEL__
+       /* byteswap */
+       vrev64.8 RW01q, RW01q;
+       vrev64.8 RW23q, RW23q;
+       vrev64.8 RW45q, RW45q;
+       vrev64.8 RW67q, RW67q;
+       vrev64.8 RW89q, RW89q;
+       vrev64.8 RW1011q, RW1011q;
+       vrev64.8 RW1213q, RW1213q;
+       vrev64.8 RW1415q, RW1415q;
+#endif
+
+       /* EABI says that d8-d15 must be preserved by callee. */
+       vpush {RT0-RT7};
+
+.Loop:
+       rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, 
RW23q, RW1415q, RW9, RW10, dummy, _);
+       b .Lenter_rounds;
+
+.Loop_rounds:
+       rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2, 
RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
+.Lenter_rounds:
+       rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4, 
RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
+       rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6, 
RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
+       rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8, 
RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
+       rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10, 
RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
+       rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12, 
RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
+       add %lr, #16;
+       rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14, 
RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
+       cmp %lr, #64;
+       rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0, 
RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
+       bne .Loop_rounds;
+
+       subs %r3, #1;
+
+       rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, vadd_RT01q, 
RW1415q, dummy, _);
+       rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, vadd_rg_RT0, 
RG, vadd_rg_RT1, RG);
+       beq .Lhandle_tail;
+       vld1.64 {RW0-RW3}, [%r1]!;
+       rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, 
RE, vadd_rg_RT1, RE);
+       rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, 
RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+       vrev64.8 RW01q, RW01q;
+       vrev64.8 RW23q, RW23q;
+#endif
+       vld1.64 {RW4-RW7}, [%r1]!;
+       rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, 
RA, vadd_rg_RT1, RA);
+       rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, 
RG, vadd_rg_RT1, RG);
+#ifdef __ARMEL__
+       vrev64.8 RW45q, RW45q;
+       vrev64.8 RW67q, RW67q;
+#endif
+       vld1.64 {RW8-RW11}, [%r1]!;
+       rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, 
RE, vadd_rg_RT1, RE);
+       rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, 
RC, vadd_rg_RT1, RC);
+#ifdef __ARMEL__
+       vrev64.8 RW89q, RW89q;
+       vrev64.8 RW1011q, RW1011q;
+#endif
+       vld1.64 {RW12-RW15}, [%r1]!;
+       vadd_rg_RT0(RA);
+       vadd_rg_RT1(RA);
+
+       /* Load context */
+       vld1.64 {RT0-RT3}, [%r0]!;
+       vld1.64 {RT4-RT7}, [%r0];
+       sub %r0, #(4*8);
+
+#ifdef __ARMEL__
+       vrev64.8 RW1213q, RW1213q;
+       vrev64.8 RW1415q, RW1415q;
+#endif
+
+       vadd.u64 RA, RT0;
+       vadd.u64 RB, RT1;
+       vadd.u64 RC, RT2;
+       vadd.u64 RD, RT3;
+       vadd.u64 RE, RT4;
+       vadd.u64 RF, RT5;
+       vadd.u64 RG, RT6;
+       vadd.u64 RH, RT7;
+
+       /* Store the first half of context */
+       vst1.64 {RA-RD}, [%r0]!;
+       sub RK, $(8*80);
+       vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+       mov %lr, #0;
+       sub %r0, #(4*8);
+
+       b .Loop;
+.ltorg
+
+.Lhandle_tail:
+       rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, vadd_rg_RT0, 
RE, vadd_rg_RT1, RE);
+       rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, vadd_rg_RT0, 
RC, vadd_rg_RT1, RC);
+       rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, vadd_rg_RT0, 
RA, vadd_rg_RT1, RA);
+       rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, vadd_rg_RT0, 
RG, vadd_rg_RT1, RG);
+       rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, vadd_rg_RT0, 
RE, vadd_rg_RT1, RE);
+       rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, vadd_rg_RT0, 
RC, vadd_rg_RT1, RC);
+
+       /* Load context to d16-d23 */
+       vld1.64 {RW0-RW3}, [%r0]!;
+       vadd_rg_RT0(RA);
+       vld1.64 {RW4-RW7}, [%r0];
+       vadd_rg_RT1(RA);
+       sub %r0, #(4*8);
+
+       vadd.u64 RA, RW0;
+       vadd.u64 RB, RW1;
+       vadd.u64 RC, RW2;
+       vadd.u64 RD, RW3;
+       vadd.u64 RE, RW4;
+       vadd.u64 RF, RW5;
+       vadd.u64 RG, RW6;
+       vadd.u64 RH, RW7;
+
+       /* Store the first half of context */
+       vst1.64 {RA-RD}, [%r0]!;
+
+       /* Clear used registers */
+       /* d16-d31 */
+       CLEAR_REG(RW01q);
+       CLEAR_REG(RW23q);
+       CLEAR_REG(RW45q);
+       CLEAR_REG(RW67q);
+       vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
+       CLEAR_REG(RW89q);
+       CLEAR_REG(RW1011q);
+       CLEAR_REG(RW1213q);
+       CLEAR_REG(RW1415q);
+       /* d8-d15 */
+       vpop {RT0-RT7};
+       /* d0-d7 (q0-q3) */
+       CLEAR_REG(%q0);
+       CLEAR_REG(%q1);
+       CLEAR_REG(%q2);
+       CLEAR_REG(%q3);
+
+       eor %r0, %r0;
+       pop {%pc};
+.size _gcry_sha512_transform_armv7_neon,.-_gcry_sha512_transform_armv7_neon;
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha512-avx-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha512-avx-amd64.S
new file mode 100644
index 000000000..bfc4435d5
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha512-avx-amd64.S
@@ -0,0 +1,461 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+;   notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+;   notice, this list of conditions and the following disclaimer in the
+;   documentation and/or other materials provided with the
+;   distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+;   contributors may be used to endorse or promote products derived from
+;   this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ *  by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AVX) && defined(USE_SHA512)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+.text
+
+/* Virtual Registers */
+#define msg rdi /* ARG1 */
+#define digest rsi /* ARG2 */
+#define msglen rdx /* ARG3 */
+#define T1 rcx
+#define T2 r8
+#define a_64 r9
+#define b_64 r10
+#define c_64 r11
+#define d_64 r12
+#define e_64 r13
+#define f_64 r14
+#define g_64 r15
+#define h_64 rbx
+#define tmp0 rax
+
+/*
+; Local variables (stack frame)
+; Note: frame_size must be an odd multiple of 8 bytes to XMM align RSP
+*/
+#define frame_W 0 /* Message Schedule */
+#define frame_W_size (80 * 8)
+#define frame_WK ((frame_W) + (frame_W_size)) /* W[t] + K[t] | W[t+1] + K[t+1] 
*/
+#define frame_WK_size (2 * 8)
+#define frame_GPRSAVE ((frame_WK) + (frame_WK_size))
+#define frame_GPRSAVE_size (5 * 8)
+#define frame_size ((frame_GPRSAVE) + (frame_GPRSAVE_size))
+
+
+/* Useful QWORD "arrays" for simpler memory references */
+#define MSG(i)    msg    + 8*(i)               /* Input message (arg1) */
+#define DIGEST(i) digest + 8*(i)               /* Output Digest (arg2) */
+#define K_t(i)    .LK512   + 8*(i) ADD_RIP     /* SHA Constants (static mem) */
+#define W_t(i)    rsp + frame_W  + 8*(i)       /* Message Schedule (stack 
frame) */
+#define WK_2(i)   rsp + frame_WK + 8*((i) % 2) /* W[t]+K[t] (stack frame) */
+/* MSG, DIGEST, K_t, W_t are arrays */
+/* WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even 
*/
+
+#define RORQ(p1, p2) \
+       /* shld is faster than ror on Intel Sandybridge */ \
+       shld    p1, p1, (64 - p2)
+
+#define SHA512_Round(t, a, b, c, d, e, f, g, h) \
+       /* Compute Round %%t */; \
+       mov     T1,   f        /* T1 = f */; \
+       mov     tmp0, e        /* tmp = e */; \
+       xor     T1,   g        /* T1 = f ^ g */; \
+       RORQ(   tmp0, 23) /* 41     ; tmp = e ror 23 */; \
+       and     T1,   e        /* T1 = (f ^ g) & e */; \
+       xor     tmp0, e        /* tmp = (e ror 23) ^ e */; \
+       xor     T1,   g        /* T1 = ((f ^ g) & e) ^ g = CH(e,f,g) */; \
+       add     T1,   [WK_2(t)] /* W[t] + K[t] from message scheduler */; \
+       RORQ(   tmp0, 4) /* 18      ; tmp = ((e ror 23) ^ e) ror 4 */; \
+       xor     tmp0, e        /* tmp = (((e ror 23) ^ e) ror 4) ^ e */; \
+       mov     T2,   a        /* T2 = a */; \
+       add     T1,   h        /* T1 = CH(e,f,g) + W[t] + K[t] + h */; \
+       RORQ(   tmp0, 14) /* 14     ; tmp = ((((e ror23)^e)ror4)^e)ror14 = 
S1(e) */; \
+       add     T1,   tmp0        /* T1 = CH(e,f,g) + W[t] + K[t] + S1(e) */; \
+       mov     tmp0, a        /* tmp = a */; \
+       xor     T2,   c        /* T2 = a ^ c */; \
+       and     tmp0, c        /* tmp = a & c */; \
+       and     T2,   b        /* T2 = (a ^ c) & b */; \
+       xor     T2,   tmp0        /* T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) 
*/; \
+       mov     tmp0, a        /* tmp = a */; \
+       RORQ(   tmp0, 5) /* 39      ; tmp = a ror 5 */; \
+       xor     tmp0, a        /* tmp = (a ror 5) ^ a */; \
+       add     d, T1          /* e(next_state) = d + T1  */; \
+       RORQ(   tmp0, 6) /* 34      ; tmp = ((a ror 5) ^ a) ror 6 */; \
+       xor     tmp0, a        /* tmp = (((a ror 5) ^ a) ror 6) ^ a */; \
+       lea     h, [T1 + T2]   /* a(next_state) = T1 + Maj(a,b,c) */; \
+       RORQ(   tmp0, 28) /* 28     ; tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) 
*/; \
+       add     h, tmp0        /* a(next_state) = T1 + Maj(a,b,c) S0(a) */
+
+#define SHA512_2Sched_2Round_avx_PART1(t, a, b, c, d, e, f, g, h) \
+       /* \
+       ; Compute rounds %%t-2 and %%t-1 \
+       ; Compute message schedule QWORDS %%t and %%t+1 \
+       ; \
+       ;   Two rounds are computed based on the values for K[t-2]+W[t-2] and \
+       ; K[t-1]+W[t-1] which were previously stored at WK_2 by the message \
+       ; scheduler. \
+       ;   The two new schedule QWORDS are stored at [W_t(%%t)] and 
[W_t(%%t+1)]. \
+       ; They are then added to their respective SHA512 constants at \
+       ; [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] \
+       ;   For brievity, the comments following vectored instructions only 
refer to \
+       ; the first of a pair of QWORDS. \
+       ; Eg. XMM4=W[t-2] really means XMM4={W[t-2]|W[t-1]} \
+       ;   The computation of the message schedule and the rounds are tightly \
+       ; stitched to take advantage of instruction-level parallelism. \
+       ; For clarity, integer instructions (for the rounds calculation) are 
indented \
+       ; by one tab. Vectored instructions (for the message scheduler) are 
indented \
+       ; by two tabs. \
+       */ \
+       \
+               vmovdqa xmm4, [W_t(t-2)]   /* XMM4 = W[t-2] */; \
+               vmovdqu xmm5, [W_t(t-15)]  /* XMM5 = W[t-15] */; \
+       mov     T1,   f; \
+               vpsrlq  xmm0, xmm4, 61       /* XMM0 = W[t-2]>>61 */; \
+       mov     tmp0, e; \
+               vpsrlq  xmm6, xmm5, 1        /* XMM6 = W[t-15]>>1 */; \
+       xor     T1,   g; \
+       RORQ(   tmp0, 23) /* 41 */; \
+               vpsrlq  xmm1, xmm4, 19       /* XMM1 = W[t-2]>>19 */; \
+       and     T1,   e; \
+       xor     tmp0, e; \
+               vpxor   xmm0, xmm0, xmm1           /* XMM0 = W[t-2]>>61 ^ 
W[t-2]>>19 */; \
+       xor     T1,   g; \
+       add     T1,   [WK_2(t)]; \
+               vpsrlq  xmm7, xmm5, 8        /* XMM7 = W[t-15]>>8 */; \
+       RORQ(   tmp0, 4) /* 18 */; \
+               vpsrlq  xmm2, xmm4, 6        /* XMM2 = W[t-2]>>6 */; \
+       xor     tmp0, e; \
+       mov     T2,   a; \
+       add     T1,   h; \
+               vpxor   xmm6, xmm6, xmm7           /* XMM6 = W[t-15]>>1 ^ 
W[t-15]>>8 */; \
+       RORQ(   tmp0, 14) /* 14 */; \
+       add     T1,   tmp0; \
+               vpsrlq  xmm8, xmm5, 7        /* XMM8 = W[t-15]>>7 */; \
+       mov     tmp0, a; \
+       xor     T2,   c; \
+               vpsllq  xmm3, xmm4, (64-61)  /* XMM3 = W[t-2]<<3 */; \
+       and     tmp0, c; \
+       and     T2,   b; \
+               vpxor   xmm2, xmm2, xmm3           /* XMM2 = W[t-2]>>6 ^ 
W[t-2]<<3 */; \
+       xor     T2,   tmp0; \
+       mov     tmp0, a; \
+               vpsllq  xmm9, xmm5, (64-1)   /* XMM9 = W[t-15]<<63 */; \
+       RORQ(   tmp0, 5) /* 39 */; \
+               vpxor   xmm8, xmm8, xmm9           /* XMM8 = W[t-15]>>7 ^ 
W[t-15]<<63 */; \
+       xor     tmp0, a; \
+       add     d, T1; \
+       RORQ(   tmp0, 6) /* 34 */; \
+       xor     tmp0, a; \
+               vpxor   xmm6, xmm6, xmm8           /* XMM6 = W[t-15]>>1 ^ 
W[t-15]>>8 ^ W[t-15]>>7 ^ W[t-15]<<63 */; \
+       lea     h, [T1 + T2]; \
+       RORQ(   tmp0, 28) /* 28 */; \
+               vpsllq  xmm4, xmm4, (64-19)        /* XMM4 = W[t-2]<<25 */; \
+       add     h, tmp0
+
+#define SHA512_2Sched_2Round_avx_PART2(t, a, b, c, d, e, f, g, h) \
+               vpxor   xmm0, xmm0, xmm4           /* XMM0 = W[t-2]>>61 ^ 
W[t-2]>>19 ^ W[t-2]<<25 */; \
+       mov     T1, f; \
+               vpxor   xmm0, xmm0, xmm2           /* XMM0 = s1(W[t-2]) */; \
+       mov     tmp0, e; \
+       xor     T1,   g; \
+               vpaddq  xmm0, xmm0, [W_t(t-16)]  /* XMM0 = s1(W[t-2]) + W[t-16] 
*/; \
+               vmovdqu xmm1, [W_t(t- 7)]  /* XMM1 = W[t-7] */; \
+       RORQ(   tmp0, 23) /* 41 */; \
+       and     T1,   e; \
+       xor     tmp0, e; \
+       xor     T1,   g; \
+               vpsllq  xmm5, xmm5, (64-8)         /* XMM5 = W[t-15]<<56 */; \
+       add     T1,   [WK_2(t+1)]; \
+               vpxor   xmm6, xmm6, xmm5           /* XMM6 = s0(W[t-15]) */; \
+       RORQ(   tmp0, 4) /* 18 */; \
+               vpaddq  xmm0, xmm0, xmm6           /* XMM0 = s1(W[t-2]) + 
W[t-16] + s0(W[t-15]) */; \
+       xor     tmp0, e; \
+               vpaddq  xmm0, xmm0, xmm1           /* XMM0 = W[t] = s1(W[t-2]) 
+ W[t-7] + s0(W[t-15]) + W[t-16] */; \
+       mov     T2,   a; \
+       add     T1,   h; \
+       RORQ(   tmp0, 14) /* 14 */; \
+       add     T1,   tmp0; \
+               vmovdqa [W_t(t)], xmm0      /* Store W[t] */; \
+               vpaddq  xmm0, xmm0, [K_t(t)]        /* Compute W[t]+K[t] */; \
+               vmovdqa [WK_2(t)], xmm0       /* Store W[t]+K[t] for next 
rounds */; \
+       mov     tmp0, a; \
+       xor     T2,   c; \
+       and     tmp0, c; \
+       and     T2,   b; \
+       xor     T2,   tmp0; \
+       mov     tmp0, a; \
+       RORQ(   tmp0, 5) /* 39 */; \
+       xor     tmp0, a; \
+       add     d, T1; \
+       RORQ(   tmp0, 6) /* 34 */; \
+       xor     tmp0, a; \
+       lea     h, [T1 + T2]; \
+       RORQ(   tmp0, 28) /* 28 */; \
+       add     h, tmp0
+
+#define SHA512_2Sched_2Round_avx(t, a, b, c, d, e, f, g, h) \
+       SHA512_2Sched_2Round_avx_PART1(t, a, b, c, d, e, f, g, h); \
+       SHA512_2Sched_2Round_avx_PART2(t, h, a, b, c, d, e, f, g)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; void sha512_avx(const void* M, void* D, uint64_t L);
+; Purpose: Updates the SHA512 digest stored at D with the message stored in M.
+; The size of the message pointed to by M must be an integer multiple of SHA512
+;   message blocks.
+; L is the message length in SHA512 blocks
+*/
+.globl _gcry_sha512_transform_amd64_avx
+ELF(.type _gcry_sha512_transform_amd64_avx,@function;)
+.align 16
+_gcry_sha512_transform_amd64_avx:
+       CFI_STARTPROC()
+       xor eax, eax
+
+       cmp     msglen, 0
+       je      .Lnowork
+
+       vzeroupper
+
+       /* Allocate Stack Space */
+       sub     rsp, frame_size
+       CFI_ADJUST_CFA_OFFSET(frame_size);
+
+       /* Save GPRs */
+       mov     [rsp + frame_GPRSAVE + 8 * 0], rbx
+       mov     [rsp + frame_GPRSAVE + 8 * 1], r12
+       mov     [rsp + frame_GPRSAVE + 8 * 2], r13
+       mov     [rsp + frame_GPRSAVE + 8 * 3], r14
+       mov     [rsp + frame_GPRSAVE + 8 * 4], r15
+       CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0);
+       CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1);
+       CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2);
+       CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3);
+       CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4);
+
+.Lupdateblock:
+
+       /* Load state variables */
+       mov     a_64, [DIGEST(0)]
+       mov     b_64, [DIGEST(1)]
+       mov     c_64, [DIGEST(2)]
+       mov     d_64, [DIGEST(3)]
+       mov     e_64, [DIGEST(4)]
+       mov     f_64, [DIGEST(5)]
+       mov     g_64, [DIGEST(6)]
+       mov     h_64, [DIGEST(7)]
+
+       /* BSWAP 2 QWORDS */
+       vmovdqa xmm1, [.LXMM_QWORD_BSWAP ADD_RIP]
+       vmovdqu xmm0, [MSG(0)]
+       vpshufb xmm0, xmm0, xmm1     /* BSWAP */
+       vmovdqa [W_t(0)], xmm0       /* Store Scheduled Pair */
+       vpaddq  xmm0, xmm0, [K_t(0)] /* Compute W[t]+K[t] */
+       vmovdqa [WK_2(0)], xmm0      /* Store into WK for rounds */
+
+       #define T_2_14(t, a, b, c, d, e, f, g, h) \
+               /* BSWAP 2 QWORDS, Compute 2 Rounds */; \
+               vmovdqu xmm0, [MSG(t)]; \
+               vpshufb xmm0, xmm0, xmm1     /* BSWAP */; \
+               SHA512_Round(((t) - 2), a##_64, b##_64, c##_64, d##_64, \
+                                       e##_64, f##_64, g##_64, h##_64); \
+               vmovdqa [W_t(t)], xmm0       /* Store Scheduled Pair */; \
+               vpaddq  xmm0, xmm0, [K_t(t)] /* Compute W[t]+K[t] */; \
+               SHA512_Round(((t) - 1), h##_64, a##_64, b##_64, c##_64, \
+                                       d##_64, e##_64, f##_64, g##_64); \
+               vmovdqa [WK_2(t)], xmm0      /* W[t]+K[t] into WK */
+
+       #define T_16_78(t, a, b, c, d, e, f, g, h) \
+               SHA512_2Sched_2Round_avx((t), a##_64, b##_64, c##_64, d##_64, \
+                                             e##_64, f##_64, g##_64, h##_64)
+
+       #define T_80(t, a, b, c, d, e, f, g, h) \
+               /* Compute 2 Rounds */; \
+               SHA512_Round((t - 2), a##_64, b##_64, c##_64, d##_64, \
+                                     e##_64, f##_64, g##_64, h##_64); \
+               SHA512_Round((t - 1), h##_64, a##_64, b##_64, c##_64, \
+                                     d##_64, e##_64, f##_64, g##_64)
+
+       T_2_14(2, a, b, c, d, e, f, g, h)
+       T_2_14(4, g, h, a, b, c, d, e, f)
+       T_2_14(6, e, f, g, h, a, b, c, d)
+       T_2_14(8, c, d, e, f, g, h, a, b)
+       T_2_14(10, a, b, c, d, e, f, g, h)
+       T_2_14(12, g, h, a, b, c, d, e, f)
+       T_2_14(14, e, f, g, h, a, b, c, d)
+       T_16_78(16, c, d, e, f, g, h, a, b)
+       T_16_78(18, a, b, c, d, e, f, g, h)
+       T_16_78(20, g, h, a, b, c, d, e, f)
+       T_16_78(22, e, f, g, h, a, b, c, d)
+       T_16_78(24, c, d, e, f, g, h, a, b)
+       T_16_78(26, a, b, c, d, e, f, g, h)
+       T_16_78(28, g, h, a, b, c, d, e, f)
+       T_16_78(30, e, f, g, h, a, b, c, d)
+       T_16_78(32, c, d, e, f, g, h, a, b)
+       T_16_78(34, a, b, c, d, e, f, g, h)
+       T_16_78(36, g, h, a, b, c, d, e, f)
+       T_16_78(38, e, f, g, h, a, b, c, d)
+       T_16_78(40, c, d, e, f, g, h, a, b)
+       T_16_78(42, a, b, c, d, e, f, g, h)
+       T_16_78(44, g, h, a, b, c, d, e, f)
+       T_16_78(46, e, f, g, h, a, b, c, d)
+       T_16_78(48, c, d, e, f, g, h, a, b)
+       T_16_78(50, a, b, c, d, e, f, g, h)
+       T_16_78(52, g, h, a, b, c, d, e, f)
+       T_16_78(54, e, f, g, h, a, b, c, d)
+       T_16_78(56, c, d, e, f, g, h, a, b)
+       T_16_78(58, a, b, c, d, e, f, g, h)
+       T_16_78(60, g, h, a, b, c, d, e, f)
+       T_16_78(62, e, f, g, h, a, b, c, d)
+       T_16_78(64, c, d, e, f, g, h, a, b)
+       T_16_78(66, a, b, c, d, e, f, g, h)
+       T_16_78(68, g, h, a, b, c, d, e, f)
+       T_16_78(70, e, f, g, h, a, b, c, d)
+       T_16_78(72, c, d, e, f, g, h, a, b)
+       T_16_78(74, a, b, c, d, e, f, g, h)
+       T_16_78(76, g, h, a, b, c, d, e, f)
+       T_16_78(78, e, f, g, h, a, b, c, d)
+       T_80(80, c, d, e, f, g, h, a, b)
+
+       /* Update digest */
+       add     [DIGEST(0)], a_64
+       add     [DIGEST(1)], b_64
+       add     [DIGEST(2)], c_64
+       add     [DIGEST(3)], d_64
+       add     [DIGEST(4)], e_64
+       add     [DIGEST(5)], f_64
+       add     [DIGEST(6)], g_64
+       add     [DIGEST(7)], h_64
+
+       /* Advance to next message block */
+       add     msg, 16*8
+       dec     msglen
+       jnz     .Lupdateblock
+
+       /* Restore GPRs */
+       mov     rbx, [rsp + frame_GPRSAVE + 8 * 0]
+       mov     r12, [rsp + frame_GPRSAVE + 8 * 1]
+       mov     r13, [rsp + frame_GPRSAVE + 8 * 2]
+       mov     r14, [rsp + frame_GPRSAVE + 8 * 3]
+       mov     r15, [rsp + frame_GPRSAVE + 8 * 4]
+       CFI_RESTORE(rbx)
+       CFI_RESTORE(r12)
+       CFI_RESTORE(r13)
+       CFI_RESTORE(r14)
+       CFI_RESTORE(r15)
+
+       vzeroall
+
+       /* Burn stack */
+       mov eax, 0
+.Lerase_stack:
+       vmovdqu [rsp + rax], ymm0
+       add eax, 32
+       cmp eax, frame_W_size
+       jne .Lerase_stack
+       vmovdqu [rsp + frame_WK], xmm0
+       xor     eax, eax
+
+       /* Restore Stack Pointer */
+       add     rsp, frame_size
+       CFI_ADJUST_CFA_OFFSET(-frame_size);
+
+.Lnowork:
+       ret_spec_stop
+       CFI_ENDPROC()
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Binary Data
+*/
+
+.align 16
+
+/* Mask for byte-swapping a couple of qwords in an XMM register using 
(v)pshufb. */
+.LXMM_QWORD_BSWAP:
+       .octa 0x08090a0b0c0d0e0f0001020304050607
+
+/* K[t] used in SHA512 hashing */
+.LK512:
+       .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad 0x3956c25bf348b538,0x59f111f1b605d019
+       .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad 0xd807aa98a3030242,0x12835b0145706fbe
+       .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad 0x06ca6351e003826f,0x142929670a0e6e70
+       .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad 0x81c2c92e47edaee6,0x92722c851482353b
+       .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad 0xd192e819d6ef5218,0xd69906245565a910
+       .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad 0x90befffa23631e28,0xa4506cebde82bde9
+       .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad 0xca273eceea26619c,0xd186b8c721c0c207
+       .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad 0x113f9804bef90dae,0x1b710b35131c471b
+       .quad 0x28db77f523047d84,0x32caab7b40c72493
+       .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S
new file mode 100644
index 000000000..a431e196a
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S
@@ -0,0 +1,502 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+;   notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+;   notice, this list of conditions and the following disclaimer in the
+;   documentation and/or other materials provided with the
+;   distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+;   contributors may be used to endorse or promote products derived from
+;   this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; This code schedules 1 blocks at a time, with 4 lanes per block
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ *  by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AVX2) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(USE_SHA512)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+.text
+
+/* Virtual Registers */
+#define Y_0 ymm4
+#define Y_1 ymm5
+#define Y_2 ymm6
+#define Y_3 ymm7
+
+#define YTMP0 ymm0
+#define YTMP1 ymm1
+#define YTMP2 ymm2
+#define YTMP3 ymm3
+#define YTMP4 ymm8
+#define XFER YTMP0
+
+#define BYTE_FLIP_MASK ymm9
+#define MASK_YMM_LO ymm10
+#define MASK_YMM_LOx xmm10
+
+#define INP rdi /* 1st arg */
+#define CTX rsi /* 2nd arg */
+#define NUM_BLKS rdx /* 3rd arg */
+#define c rcx
+#define d r8
+#define e rdx
+#define y3 rdi
+
+#define TBL rbp
+
+#define a rax
+#define b rbx
+
+#define f r9
+#define g r10
+#define h r11
+
+#define T1 r12
+#define y0 r13
+#define y1 r14
+#define y2 r15
+
+#define y4 r12
+
+/* Local variables (stack frame) */
+#define frame_XFER      0
+#define frame_XFER_size (4*4*8)
+#define frame_SRND      (frame_XFER + frame_XFER_size)
+#define frame_SRND_size (1*8)
+#define frame_INP      (frame_SRND + frame_SRND_size)
+#define frame_INP_size (1*8)
+#define frame_NBLKS      (frame_INP + frame_INP_size)
+#define frame_NBLKS_size (1*8)
+#define frame_RSPSAVE      (frame_NBLKS + frame_NBLKS_size)
+#define frame_RSPSAVE_size (1*8)
+#define frame_GPRSAVE      (frame_RSPSAVE + frame_RSPSAVE_size)
+#define frame_GPRSAVE_size (6*8)
+#define frame_size (frame_GPRSAVE + frame_GPRSAVE_size)
+
+#define        VMOVDQ vmovdqu /*; assume buffers not aligned  */
+
+/* addm [mem], reg */
+/* Add reg to mem using reg-mem add and store */
+#define addm(p1, p2) \
+       add     p2, p1; \
+       mov     p1, p2;
+
+
+/* COPY_YMM_AND_BSWAP ymm, [mem], byte_flip_mask */
+/* Load ymm with mem and byte swap each dword */
+#define COPY_YMM_AND_BSWAP(p1, p2, p3) \
+       VMOVDQ p1, p2; \
+       vpshufb p1, p1, p3
+
+/* %macro MY_VPALIGNR  YDST, YSRC1, YSRC2, RVAL */
+/* YDST = {YSRC1, YSRC2} >> RVAL*8 */
+#define MY_VPALIGNR(YDST, YSRC1, YSRC2, RVAL) \
+       vperm2i128 YDST, YSRC1, YSRC2, 0x3 /* YDST = {YS1_LO, YS2_HI} */; \
+       vpalignr   YDST, YDST, YSRC2, RVAL /* YDST = {YDS1, YS2} >> RVAL*8 */
+
+#define ONE_ROUND_PART1(XFERIN, a, b, c, d, e, f, g, h) \
+       /* h += Sum1 (e) + Ch (e, f, g) + (k[t] + w[0]); \
+        * d += h; \
+        * h += Sum0 (a) + Maj (a, b, c); \
+        * \
+        * Ch(x, y, z) => ((x & y) + (~x & z)) \
+        * Maj(x, y, z) => ((x & y) + (z & (x ^ y))) \
+        */ \
+       \
+       mov y3, e; \
+       add h, [XFERIN]; \
+       and y3, f; \
+       rorx y0, e, 41; \
+       rorx y1, e, 18; \
+       lea h, [h + y3]; \
+       andn y3, e, g; \
+       rorx T1, a, 34; \
+       xor y0, y1; \
+       lea h, [h + y3]
+
+#define ONE_ROUND_PART2(a, b, c, d, e, f, g, h) \
+       rorx y2, a, 39; \
+       rorx y1, e, 14; \
+       mov y3, a; \
+       xor T1, y2; \
+       xor y0, y1; \
+       xor y3, b; \
+       lea h, [h + y0]; \
+       mov y0, a; \
+       rorx y2, a, 28; \
+       add d, h; \
+       and y3, c; \
+       xor T1, y2; \
+       lea h, [h + y3]; \
+       lea h, [h + T1]; \
+       and y0, b; \
+       lea h, [h + y0]
+
+#define ONE_ROUND(XFERIN, a, b, c, d, e, f, g, h) \
+       ONE_ROUND_PART1(XFERIN, a, b, c, d, e, f, g, h); \
+       ONE_ROUND_PART2(a, b, c, d, e, f, g, h)
+
+#define FOUR_ROUNDS_AND_SCHED(X, Y_0, Y_1, Y_2, Y_3, a, b, c, d, e, f, g, h) \
+       /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 0 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               /* Extract w[t-7] */; \
+               MY_VPALIGNR(    YTMP0, Y_3, Y_2, 8)             /* YTMP0 = 
W[-7] */; \
+               /* Calculate w[t-16] + w[t-7] */; \
+               vpaddq          YTMP0, YTMP0, Y_0               /* YTMP0 = 
W[-7] + W[-16] */; \
+               /* Extract w[t-15] */; \
+               MY_VPALIGNR(    YTMP1, Y_1, Y_0, 8)             /* YTMP1 = 
W[-15] */; \
+               \
+               /* Calculate sigma0 */; \
+               \
+               /* Calculate w[t-15] ror 1 */; \
+               vpsrlq          YTMP2, YTMP1, 1; \
+               vpsllq          YTMP3, YTMP1, (64-1); \
+               vpor            YTMP3, YTMP3, YTMP2             /* YTMP3 = 
W[-15] ror 1 */; \
+               /* Calculate w[t-15] shr 7 */; \
+               vpsrlq          YTMP4, YTMP1, 7                 /* YTMP4 = 
W[-15] >> 7 */; \
+       \
+       ONE_ROUND(rsp+frame_XFER+0*8+X*32, a, b, c, d, e, f, g, h); \
+       \
+       /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 1 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               /* Calculate w[t-15] ror 8 */; \
+               vpsrlq          YTMP2, YTMP1, 8; \
+               vpsllq          YTMP1, YTMP1, (64-8); \
+               vpor            YTMP1, YTMP1, YTMP2             /* YTMP1 = 
W[-15] ror 8 */; \
+               /* XOR the three components */; \
+               vpxor           YTMP3, YTMP3, YTMP4             /* YTMP3 = 
W[-15] ror 1 ^ W[-15] >> 7 */; \
+               vpxor           YTMP1, YTMP3, YTMP1             /* YTMP1 = s0 
*/; \
+               \
+               /* Add three components, w[t-16], w[t-7] and sigma0 */; \
+               vpaddq          YTMP0, YTMP0, YTMP1             /* YTMP0 = 
W[-16] + W[-7] + s0 */; \
+               /* Move to appropriate lanes for calculating w[16] and w[17] 
*/; \
+               vperm2i128      Y_0, YTMP0, YTMP0, 0x0          /* Y_0 = W[-16] 
+ W[-7] + s0 {BABA} */; \
+               /* Move to appropriate lanes for calculating w[18] and w[19] 
*/; \
+               vpand           YTMP0, YTMP0, MASK_YMM_LO       /* YTMP0 = 
W[-16] + W[-7] + s0 {DC00} */; \
+               \
+               /* Calculate w[16] and w[17] in both 128 bit lanes */; \
+               \
+               /* Calculate sigma1 for w[16] and w[17] on both 128 bit lanes 
*/; \
+               vperm2i128      YTMP2, Y_3, Y_3, 0x11           /* YTMP2 = 
W[-2] {BABA} */; \
+               vpsrlq          YTMP4, YTMP2, 6                 /* YTMP4 = 
W[-2] >> 6 {BABA} */; \
+       \
+       ONE_ROUND(rsp+frame_XFER+1*8+X*32, h, a, b, c, d, e, f, g); \
+       \
+       /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 2 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               vpsrlq          YTMP3, YTMP2, 19                /* YTMP3 = 
W[-2] >> 19 {BABA} */; \
+               vpsllq          YTMP1, YTMP2, (64-19)           /* YTMP1 = 
W[-2] << 19 {BABA} */; \
+               vpor            YTMP3, YTMP3, YTMP1             /* YTMP3 = 
W[-2] ror 19 {BABA} */; \
+               vpxor           YTMP4, YTMP4, YTMP3             /* YTMP4 = 
W[-2] ror 19 ^ W[-2] >> 6 {BABA} */; \
+               vpsrlq          YTMP3, YTMP2, 61                /* YTMP3 = 
W[-2] >> 61 {BABA} */; \
+               vpsllq          YTMP1, YTMP2, (64-61)           /* YTMP1 = 
W[-2] << 61 {BABA} */; \
+               vpor            YTMP3, YTMP3, YTMP1             /* YTMP3 = 
W[-2] ror 61 {BABA} */; \
+               vpxor           YTMP4, YTMP4, YTMP3             /* YTMP4 = s1 = 
(W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) {BABA} */; \
+               \
+               /* Add sigma1 to the other compunents to get w[16] and w[17] 
*/; \
+               vpaddq          Y_0, Y_0, YTMP4                 /* Y_0 = {W[1], 
W[0], W[1], W[0]} */; \
+               \
+               /* Calculate sigma1 for w[18] and w[19] for upper 128 bit lane 
*/; \
+               vpsrlq          YTMP4, Y_0, 6                   /* YTMP4 = 
W[-2] >> 6 {DC--} */; \
+       \
+       ONE_ROUND(rsp+frame_XFER+2*8+X*32, g, h, a, b, c, d, e, f); \
+       \
+       /*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; RND N + 3 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */; \
+               vpsrlq          YTMP3, Y_0, 19                  /* YTMP3 = 
W[-2] >> 19 {DC--} */; \
+               vpsllq          YTMP1, Y_0, (64-19)             /* YTMP1 = 
W[-2] << 19 {DC--} */; \
+               vpor            YTMP3, YTMP3, YTMP1             /* YTMP3 = 
W[-2] ror 19 {DC--} */; \
+               vpxor           YTMP4, YTMP4, YTMP3             /* YTMP4 = 
W[-2] ror 19 ^ W[-2] >> 6 {DC--} */; \
+               vpsrlq          YTMP3, Y_0, 61                  /* YTMP3 = 
W[-2] >> 61 {DC--} */; \
+               vpsllq          YTMP1, Y_0, (64-61)             /* YTMP1 = 
W[-2] << 61 {DC--} */; \
+               vpor            YTMP3, YTMP3, YTMP1             /* YTMP3 = 
W[-2] ror 61 {DC--} */; \
+               vpxor           YTMP4, YTMP4, YTMP3             /* YTMP4 = s1 = 
(W[-2] ror 19) ^ (W[-2] ror 61) ^ (W[-2] >> 6) {DC--} */; \
+               \
+               /* Add the sigma0 + w[t-7] + w[t-16] for w[18] and w[19] to 
newly calculated sigma1 to get w[18] and w[19] */; \
+               vpaddq          YTMP2, YTMP0, YTMP4             /* YTMP2 = 
{W[3], W[2], --, --} */; \
+               \
+               /* Form w[19, w[18], w17], w[16] */; \
+               vpblendd        Y_0, Y_0, YTMP2, 0xF0           /* Y_0 = {W[3], 
W[2], W[1], W[0]} */; \
+       \
+       ONE_ROUND_PART1(rsp+frame_XFER+3*8+X*32, f, g, h, a, b, c, d, e); \
+               vpaddq          XFER, Y_0, [TBL + (4+X)*32]; \
+               vmovdqa         [rsp + frame_XFER + X*32], XFER; \
+       ONE_ROUND_PART2(f, g, h, a, b, c, d, e)
+
+#define DO_4ROUNDS(X, a, b, c, d, e, f, g, h) \
+       ONE_ROUND(rsp+frame_XFER+0*8+X*32, a, b, c, d, e, f, g, h); \
+       ONE_ROUND(rsp+frame_XFER+1*8+X*32, h, a, b, c, d, e, f, g); \
+       ONE_ROUND(rsp+frame_XFER+2*8+X*32, g, h, a, b, c, d, e, f); \
+       ONE_ROUND(rsp+frame_XFER+3*8+X*32, f, g, h, a, b, c, d, e)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; void sha512_rorx(const void* M, void* D, uint64_t L);
+; Purpose: Updates the SHA512 digest stored at D with the message stored in M.
+; The size of the message pointed to by M must be an integer multiple of SHA512
+;   message blocks.
+; L is the message length in SHA512 blocks
+*/
+.globl _gcry_sha512_transform_amd64_avx2
+ELF(.type _gcry_sha512_transform_amd64_avx2,@function;)
+.align 16
+_gcry_sha512_transform_amd64_avx2:
+       CFI_STARTPROC()
+       xor eax, eax
+
+       cmp rdx, 0
+       je .Lnowork
+
+       vzeroupper
+
+       /* Allocate Stack Space */
+       mov     rax, rsp
+       CFI_DEF_CFA_REGISTER(rax);
+       sub     rsp, frame_size
+       and     rsp, ~(0x40 - 1)
+       mov     [rsp + frame_RSPSAVE], rax
+       CFI_CFA_ON_STACK(frame_RSPSAVE, 0)
+
+       /* Save GPRs */
+       mov     [rsp + frame_GPRSAVE + 8 * 0], rbp
+       mov     [rsp + frame_GPRSAVE + 8 * 1], rbx
+       mov     [rsp + frame_GPRSAVE + 8 * 2], r12
+       mov     [rsp + frame_GPRSAVE + 8 * 3], r13
+       mov     [rsp + frame_GPRSAVE + 8 * 4], r14
+       mov     [rsp + frame_GPRSAVE + 8 * 5], r15
+       CFI_REG_ON_STACK(rbp, frame_GPRSAVE + 8 * 0)
+       CFI_REG_ON_STACK(rbx, frame_GPRSAVE + 8 * 1)
+       CFI_REG_ON_STACK(r12, frame_GPRSAVE + 8 * 2)
+       CFI_REG_ON_STACK(r13, frame_GPRSAVE + 8 * 3)
+       CFI_REG_ON_STACK(r14, frame_GPRSAVE + 8 * 4)
+       CFI_REG_ON_STACK(r15, frame_GPRSAVE + 8 * 5)
+
+       mov     [rsp + frame_NBLKS], NUM_BLKS
+
+       /*; load initial digest */
+       mov     a,[8*0 + CTX]
+       mov     b,[8*1 + CTX]
+       mov     c,[8*2 + CTX]
+       mov     d,[8*3 + CTX]
+       mov     e,[8*4 + CTX]
+       mov     f,[8*5 + CTX]
+       mov     g,[8*6 + CTX]
+       mov     h,[8*7 + CTX]
+
+       vmovdqa BYTE_FLIP_MASK, [.LPSHUFFLE_BYTE_FLIP_MASK ADD_RIP]
+       vmovdqa MASK_YMM_LO, [.LMASK_YMM_LO ADD_RIP]
+
+       lea     TBL,[.LK512 ADD_RIP]
+
+       /*; byte swap first 16 dwords */
+       COPY_YMM_AND_BSWAP(Y_0, [INP + 0*32], BYTE_FLIP_MASK)
+       COPY_YMM_AND_BSWAP(Y_1, [INP + 1*32], BYTE_FLIP_MASK)
+       COPY_YMM_AND_BSWAP(Y_2, [INP + 2*32], BYTE_FLIP_MASK)
+       COPY_YMM_AND_BSWAP(Y_3, [INP + 3*32], BYTE_FLIP_MASK)
+
+       add     INP, 128
+       mov     [rsp + frame_INP], INP
+
+       vpaddq  XFER, Y_0, [TBL + 0*32]
+       vmovdqa [rsp + frame_XFER + 0*32], XFER
+       vpaddq  XFER, Y_1, [TBL + 1*32]
+       vmovdqa [rsp + frame_XFER + 1*32], XFER
+       vpaddq  XFER, Y_2, [TBL + 2*32]
+       vmovdqa [rsp + frame_XFER + 2*32], XFER
+       vpaddq  XFER, Y_3, [TBL + 3*32]
+       vmovdqa [rsp + frame_XFER + 3*32], XFER
+
+       /*; schedule 64 input dwords, by doing 12 rounds of 4 each */
+       mov     qword ptr [rsp + frame_SRND], 4
+
+.align 16
+.Loop0:
+       FOUR_ROUNDS_AND_SCHED(0, Y_0, Y_1, Y_2, Y_3, a, b, c, d, e, f, g, h)
+       FOUR_ROUNDS_AND_SCHED(1, Y_1, Y_2, Y_3, Y_0, e, f, g, h, a, b, c, d)
+       FOUR_ROUNDS_AND_SCHED(2, Y_2, Y_3, Y_0, Y_1, a, b, c, d, e, f, g, h)
+       FOUR_ROUNDS_AND_SCHED(3, Y_3, Y_0, Y_1, Y_2, e, f, g, h, a, b, c, d)
+       add     TBL, 4*32
+
+       sub     qword ptr [rsp + frame_SRND], 1
+       jne     .Loop0
+
+       sub     qword ptr [rsp + frame_NBLKS], 1
+       je      .Ldone_hash
+
+       mov     INP, [rsp + frame_INP]
+
+       lea     TBL,[.LK512 ADD_RIP]
+
+       /* load next block and byte swap */
+       COPY_YMM_AND_BSWAP(Y_0, [INP + 0*32], BYTE_FLIP_MASK)
+       COPY_YMM_AND_BSWAP(Y_1, [INP + 1*32], BYTE_FLIP_MASK)
+       COPY_YMM_AND_BSWAP(Y_2, [INP + 2*32], BYTE_FLIP_MASK)
+       COPY_YMM_AND_BSWAP(Y_3, [INP + 3*32], BYTE_FLIP_MASK)
+
+       add     INP, 128
+       mov     [rsp + frame_INP], INP
+
+       DO_4ROUNDS(0, a, b, c, d, e, f, g, h)
+       vpaddq  XFER, Y_0, [TBL + 0*32]
+       vmovdqa [rsp + frame_XFER + 0*32], XFER
+       DO_4ROUNDS(1, e, f, g, h, a, b, c, d)
+       vpaddq  XFER, Y_1, [TBL + 1*32]
+       vmovdqa [rsp + frame_XFER + 1*32], XFER
+       DO_4ROUNDS(2, a, b, c, d, e, f, g, h)
+       vpaddq  XFER, Y_2, [TBL + 2*32]
+       vmovdqa [rsp + frame_XFER + 2*32], XFER
+       DO_4ROUNDS(3, e, f, g, h, a, b, c, d)
+       vpaddq  XFER, Y_3, [TBL + 3*32]
+       vmovdqa [rsp + frame_XFER + 3*32], XFER
+
+       addm([8*0 + CTX],a)
+       addm([8*1 + CTX],b)
+       addm([8*2 + CTX],c)
+       addm([8*3 + CTX],d)
+       addm([8*4 + CTX],e)
+       addm([8*5 + CTX],f)
+       addm([8*6 + CTX],g)
+       addm([8*7 + CTX],h)
+
+       /*; schedule 64 input dwords, by doing 12 rounds of 4 each */
+       mov     qword ptr [rsp + frame_SRND],4
+
+       jmp     .Loop0
+
+.Ldone_hash:
+       vzeroall
+
+       DO_4ROUNDS(0, a, b, c, d, e, f, g, h)
+       vmovdqa [rsp + frame_XFER + 0*32], ymm0 /* burn stack */
+       DO_4ROUNDS(1, e, f, g, h, a, b, c, d)
+       vmovdqa [rsp + frame_XFER + 1*32], ymm0 /* burn stack */
+       DO_4ROUNDS(2, a, b, c, d, e, f, g, h)
+       vmovdqa [rsp + frame_XFER + 2*32], ymm0 /* burn stack */
+       DO_4ROUNDS(3, e, f, g, h, a, b, c, d)
+       vmovdqa [rsp + frame_XFER + 3*32], ymm0 /* burn stack */
+
+       addm([8*0 + CTX],a)
+       xor     eax, eax /* burn stack */
+       addm([8*1 + CTX],b)
+       addm([8*2 + CTX],c)
+       addm([8*3 + CTX],d)
+       addm([8*4 + CTX],e)
+       addm([8*5 + CTX],f)
+       addm([8*6 + CTX],g)
+       addm([8*7 + CTX],h)
+
+       /* Restore GPRs */
+       mov     rbp, [rsp + frame_GPRSAVE + 8 * 0]
+       mov     rbx, [rsp + frame_GPRSAVE + 8 * 1]
+       mov     r12, [rsp + frame_GPRSAVE + 8 * 2]
+       mov     r13, [rsp + frame_GPRSAVE + 8 * 3]
+       mov     r14, [rsp + frame_GPRSAVE + 8 * 4]
+       mov     r15, [rsp + frame_GPRSAVE + 8 * 5]
+       CFI_RESTORE(rbp)
+       CFI_RESTORE(rbx)
+       CFI_RESTORE(r12)
+       CFI_RESTORE(r13)
+       CFI_RESTORE(r14)
+       CFI_RESTORE(r15)
+
+       /* Restore Stack Pointer */
+       mov     rsp, [rsp + frame_RSPSAVE]
+       CFI_DEF_CFA_REGISTER(rsp)
+
+.Lnowork:
+       ret_spec_stop
+       CFI_ENDPROC()
+
+/*;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; */
+/*;; Binary Data */
+
+.align 64
+/* K[t] used in SHA512 hashing */
+.LK512:
+       .quad   0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad   0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad   0x3956c25bf348b538,0x59f111f1b605d019
+       .quad   0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad   0xd807aa98a3030242,0x12835b0145706fbe
+       .quad   0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad   0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad   0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad   0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad   0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad   0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad   0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad   0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad   0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad   0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad   0x06ca6351e003826f,0x142929670a0e6e70
+       .quad   0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad   0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad   0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad   0x81c2c92e47edaee6,0x92722c851482353b
+       .quad   0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad   0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad   0xd192e819d6ef5218,0xd69906245565a910
+       .quad   0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad   0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad   0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad   0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad   0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad   0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad   0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad   0x90befffa23631e28,0xa4506cebde82bde9
+       .quad   0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad   0xca273eceea26619c,0xd186b8c721c0c207
+       .quad   0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad   0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad   0x113f9804bef90dae,0x1b710b35131c471b
+       .quad   0x28db77f523047d84,0x32caab7b40c72493
+       .quad   0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad   0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad   0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+.align 32
+
+/* Mask for byte-swapping a couple of qwords in an XMM register using 
(v)pshufb. */
+.LPSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607
+                          .octa 0x18191a1b1c1d1e1f1011121314151617
+
+.LMASK_YMM_LO:            .octa 0x00000000000000000000000000000000
+                          .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha512-ppc.c 
b/grub-core/lib/libgcrypt/cipher/sha512-ppc.c
new file mode 100644
index 000000000..31ea25bf9
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha512-ppc.c
@@ -0,0 +1,969 @@
+/* sha512-ppc.c - PowerPC vcrypto implementation of SHA-512 transform
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \
+    defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+    defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \
+    defined(USE_SHA512) && \
+    __GNUC__ >= 4
+
+#include <altivec.h>
+#include "bufhelp.h"
+
+
+typedef vector unsigned char vector16x_u8;
+typedef vector unsigned long long vector2x_u64;
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static const u64 K[80] =
+  {
+    U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+    U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+    U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+    U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+    U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+    U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+    U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+    U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+    U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+    U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+    U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+    U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+    U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+    U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+    U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+    U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+    U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+    U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+    U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+    U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+    U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+    U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+    U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+    U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+    U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+    U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+    U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+    U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+    U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+    U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+    U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+    U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+    U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+    U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+    U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+    U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+    U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+    U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+    U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+    U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+  };
+
+
+static ASM_FUNC_ATTR_INLINE u64
+ror64 (u64 v, u64 shift)
+{
+  return (v >> (shift & 63)) ^ (v << ((64 - shift) & 63));
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_rol_elems(vector2x_u64 v, unsigned int idx)
+{
+#ifndef WORDS_BIGENDIAN
+  return vec_sld (v, v, (16 - (8 * idx)) & 15);
+#else
+  return vec_sld (v, v, (8 * idx) & 15);
+#endif
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1)
+{
+  return vec_mergeh (v0, v1);
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b)
+{
+  __asm__ ("vshasigmad %0,%1,%2,%3"
+          : "=v" (v)
+          : "v" (v), "g" (a), "g" (b)
+          : "memory");
+  return v;
+}
+
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+vec_u64_load(unsigned long offset, const void *ptr)
+{
+  vector2x_u64 vecu64;
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ ("lxvd2x %x0,0,%1\n\t"
+            : "=wa" (vecu64)
+            : "r" ((uintptr_t)ptr)
+            : "memory");
+  else
+#endif
+    __asm__ ("lxvd2x %x0,%1,%2\n\t"
+            : "=wa" (vecu64)
+            : "r" (offset), "r" ((uintptr_t)ptr)
+            : "memory", "r0");
+#ifndef WORDS_BIGENDIAN
+  __asm__ ("xxswapd %x0, %x1"
+          : "=wa" (vecu64)
+          : "wa" (vecu64));
+#endif
+  return vecu64;
+}
+
+
+static ASM_FUNC_ATTR_INLINE void
+vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr)
+{
+#ifndef WORDS_BIGENDIAN
+  __asm__ ("xxswapd %x0, %x1"
+          : "=wa" (vecu64)
+          : "wa" (vecu64));
+#endif
+#if __GNUC__ >= 4
+  if (__builtin_constant_p (offset) && offset == 0)
+    __asm__ ("stxvd2x %x0,0,%1\n\t"
+            :
+            : "wa" (vecu64), "r" ((uintptr_t)ptr)
+            : "memory");
+  else
+#endif
+    __asm__ ("stxvd2x %x0,%1,%2\n\t"
+            :
+            : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr)
+            : "memory", "r0");
+}
+
+
+/* SHA2 round in vector registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do                             \
+    {                                                         \
+      t1  = (h);                                              \
+      t1 += ((k) + (w));                                      \
+      t1 += Cho((e),(f),(g));                                 \
+      t1 += Sum1((e));                                        \
+      t2  = Sum0((a));                                        \
+      t2 += Maj((a),(b),(c));                                 \
+      d  += t1;                                               \
+      h   = t1 + t2;                                          \
+    } while (0)
+
+#define Cho(b, c, d)  (vec_sel(d, c, b))
+
+#define Maj(c, d, b)  (vec_sel(c, b, c ^ d))
+
+#define Sum0(x)       (vec_vshasigma_u64(x, 1, 0))
+
+#define Sum1(x)       (vec_vshasigma_u64(x, 1, 15))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror64 ((x), 1) ^ ror64 ((x), 8) ^ ((x) >> 7))
+#define S1(x) (ror64 ((x), 19) ^ ror64 ((x), 61) ^ ((x) >> 6))
+
+#define I(i) ( w[i] = buf_get_be64(data + i * 8) )
+#define WN(i) ({ w[i&0x0f] +=    w[(i-7) &0x0f];  \
+                w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+                w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+                w[i&0x0f]; })
+#define W(i) ({ u64 r = w[i&0x0f]; WN(i); r; })
+#define L(i) w[i&0x0f]
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha512_transform_ppc8(u64 state[8],
+                           const unsigned char *data, size_t nblks)
+{
+  /* GPRs used for message expansion as vector intrinsics based generates
+   * slower code. */
+  vector2x_u64 h0, h1, h2, h3, h4, h5, h6, h7;
+  vector2x_u64 a, b, c, d, e, f, g, h, t1, t2;
+  u64 w[16];
+
+  h0 = vec_u64_load (8 * 0, (unsigned long long *)state);
+  h1 = vec_rol_elems (h0, 1);
+  h2 = vec_u64_load (8 * 2, (unsigned long long *)state);
+  h3 = vec_rol_elems (h2, 1);
+  h4 = vec_u64_load (8 * 4, (unsigned long long *)state);
+  h5 = vec_rol_elems (h4, 1);
+  h6 = vec_u64_load (8 * 6, (unsigned long long *)state);
+  h7 = vec_rol_elems (h6, 1);
+
+  while (nblks >= 2)
+    {
+      a = h0;
+      b = h1;
+      c = h2;
+      d = h3;
+      e = h4;
+      f = h5;
+      g = h6;
+      h = h7;
+
+      I(0); I(1); I(2); I(3);
+      I(4); I(5); I(6); I(7);
+      I(8); I(9); I(10); I(11);
+      I(12); I(13); I(14); I(15);
+      data += 128;
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      R(a, b, c, d, e, f, g, h, K[64], L(64));
+      R(h, a, b, c, d, e, f, g, K[65], L(65));
+      R(g, h, a, b, c, d, e, f, K[66], L(66));
+      R(f, g, h, a, b, c, d, e, K[67], L(67));
+      I(0); I(1); I(2); I(3);
+      R(e, f, g, h, a, b, c, d, K[68], L(68));
+      R(d, e, f, g, h, a, b, c, K[69], L(69));
+      R(c, d, e, f, g, h, a, b, K[70], L(70));
+      R(b, c, d, e, f, g, h, a, K[71], L(71));
+      I(4); I(5); I(6); I(7);
+      R(a, b, c, d, e, f, g, h, K[72], L(72));
+      R(h, a, b, c, d, e, f, g, K[73], L(73));
+      R(g, h, a, b, c, d, e, f, K[74], L(74));
+      R(f, g, h, a, b, c, d, e, K[75], L(75));
+      I(8); I(9); I(10); I(11);
+      R(e, f, g, h, a, b, c, d, K[76], L(76));
+      R(d, e, f, g, h, a, b, c, K[77], L(77));
+      R(c, d, e, f, g, h, a, b, K[78], L(78));
+      R(b, c, d, e, f, g, h, a, K[79], L(79));
+      I(12); I(13); I(14); I(15);
+      data += 128;
+
+      h0 += a;
+      h1 += b;
+      h2 += c;
+      h3 += d;
+      h4 += e;
+      h5 += f;
+      h6 += g;
+      h7 += h;
+      a = h0;
+      b = h1;
+      c = h2;
+      d = h3;
+      e = h4;
+      f = h5;
+      g = h6;
+      h = h7;
+
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      R(a, b, c, d, e, f, g, h, K[64], L(64));
+      R(h, a, b, c, d, e, f, g, K[65], L(65));
+      R(g, h, a, b, c, d, e, f, K[66], L(66));
+      R(f, g, h, a, b, c, d, e, K[67], L(67));
+      R(e, f, g, h, a, b, c, d, K[68], L(68));
+      R(d, e, f, g, h, a, b, c, K[69], L(69));
+      R(c, d, e, f, g, h, a, b, K[70], L(70));
+      R(b, c, d, e, f, g, h, a, K[71], L(71));
+      R(a, b, c, d, e, f, g, h, K[72], L(72));
+      R(h, a, b, c, d, e, f, g, K[73], L(73));
+      R(g, h, a, b, c, d, e, f, K[74], L(74));
+      R(f, g, h, a, b, c, d, e, K[75], L(75));
+      R(e, f, g, h, a, b, c, d, K[76], L(76));
+      R(d, e, f, g, h, a, b, c, K[77], L(77));
+      R(c, d, e, f, g, h, a, b, K[78], L(78));
+      R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+      h0 += a;
+      h1 += b;
+      h2 += c;
+      h3 += d;
+      h4 += e;
+      h5 += f;
+      h6 += g;
+      h7 += h;
+
+      nblks -= 2;
+    }
+
+  while (nblks)
+    {
+      a = h0;
+      b = h1;
+      c = h2;
+      d = h3;
+      e = h4;
+      f = h5;
+      g = h6;
+      h = h7;
+
+      I(0); I(1); I(2); I(3);
+      I(4); I(5); I(6); I(7);
+      I(8); I(9); I(10); I(11);
+      I(12); I(13); I(14); I(15);
+      data += 128;
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      R(a, b, c, d, e, f, g, h, K[64], L(64));
+      R(h, a, b, c, d, e, f, g, K[65], L(65));
+      R(g, h, a, b, c, d, e, f, K[66], L(66));
+      R(f, g, h, a, b, c, d, e, K[67], L(67));
+      R(e, f, g, h, a, b, c, d, K[68], L(68));
+      R(d, e, f, g, h, a, b, c, K[69], L(69));
+      R(c, d, e, f, g, h, a, b, K[70], L(70));
+      R(b, c, d, e, f, g, h, a, K[71], L(71));
+      R(a, b, c, d, e, f, g, h, K[72], L(72));
+      R(h, a, b, c, d, e, f, g, K[73], L(73));
+      R(g, h, a, b, c, d, e, f, K[74], L(74));
+      R(f, g, h, a, b, c, d, e, K[75], L(75));
+      R(e, f, g, h, a, b, c, d, K[76], L(76));
+      R(d, e, f, g, h, a, b, c, K[77], L(77));
+      R(c, d, e, f, g, h, a, b, K[78], L(78));
+      R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+      h0 += a;
+      h1 += b;
+      h2 += c;
+      h3 += d;
+      h4 += e;
+      h5 += f;
+      h6 += g;
+      h7 += h;
+
+      nblks--;
+    }
+
+  h0 = vec_merge_idx0_elems (h0, h1);
+  h2 = vec_merge_idx0_elems (h2, h3);
+  h4 = vec_merge_idx0_elems (h4, h5);
+  h6 = vec_merge_idx0_elems (h6, h7);
+  vec_u64_store (h0, 8 * 0, (unsigned long long *)state);
+  vec_u64_store (h2, 8 * 2, (unsigned long long *)state);
+  vec_u64_store (h4, 8 * 4, (unsigned long long *)state);
+  vec_u64_store (h6, 8 * 6, (unsigned long long *)state);
+
+  return sizeof(w);
+}
+#undef R
+#undef Cho
+#undef Maj
+#undef Sum0
+#undef Sum1
+#undef S0
+#undef S1
+#undef I
+#undef W
+#undef I2
+#undef W2
+#undef R2
+
+
+/* SHA2 round in general purpose registers */
+#define R(a,b,c,d,e,f,g,h,k,w) do                                 \
+          {                                                       \
+            t1 = (h) + Sum1((e)) + Cho((e),(f),(g)) + ((k) + (w));\
+            t2 = Sum0((a)) + Maj((a),(b),(c));                    \
+            d += t1;                                              \
+            h  = t1 + t2;                                         \
+          } while (0)
+
+#define Cho(x, y, z)  ((x & y) + (~x & z))
+
+#define Maj(z, x, y)  ((x & y) + (z & (x ^ y)))
+
+#define Sum0(x)       (ror64(x, 28) ^ ror64(x ^ ror64(x, 39-34), 34))
+
+#define Sum1(x)       (ror64(x, 14) ^ ror64(x, 18) ^ ror64(x, 41))
+
+
+/* Message expansion on general purpose registers */
+#define S0(x) (ror64 ((x), 1) ^ ror64 ((x), 8) ^ ((x) >> 7))
+#define S1(x) (ror64 ((x), 19) ^ ror64 ((x), 61) ^ ((x) >> 6))
+
+#define I(i) ( w[i] = buf_get_be64(data + i * 8) )
+#define WN(i) ({ w[i&0x0f] +=    w[(i-7) &0x0f];  \
+                w[i&0x0f] += S0(w[(i-15)&0x0f]); \
+                w[i&0x0f] += S1(w[(i-2) &0x0f]); \
+                w[i&0x0f]; })
+#define W(i) ({ u64 r = w[i&0x0f]; WN(i); r; })
+#define L(i) w[i&0x0f]
+
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha512_transform_ppc9(u64 state[8], const unsigned char *data,
+                           size_t nblks)
+{
+  /* GPRs used for round function and message expansion as vector intrinsics
+   * based generates slower code for POWER9. */
+  u64 a, b, c, d, e, f, g, h, t1, t2;
+  u64 w[16];
+
+  a = state[0];
+  b = state[1];
+  c = state[2];
+  d = state[3];
+  e = state[4];
+  f = state[5];
+  g = state[6];
+  h = state[7];
+
+  while (nblks >= 2)
+    {
+      I(0); I(1); I(2); I(3);
+      I(4); I(5); I(6); I(7);
+      I(8); I(9); I(10); I(11);
+      I(12); I(13); I(14); I(15);
+      data += 128;
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      R(a, b, c, d, e, f, g, h, K[64], L(64));
+      R(h, a, b, c, d, e, f, g, K[65], L(65));
+      R(g, h, a, b, c, d, e, f, K[66], L(66));
+      R(f, g, h, a, b, c, d, e, K[67], L(67));
+      I(0); I(1); I(2); I(3);
+      R(e, f, g, h, a, b, c, d, K[68], L(68));
+      R(d, e, f, g, h, a, b, c, K[69], L(69));
+      R(c, d, e, f, g, h, a, b, K[70], L(70));
+      R(b, c, d, e, f, g, h, a, K[71], L(71));
+      I(4); I(5); I(6); I(7);
+      R(a, b, c, d, e, f, g, h, K[72], L(72));
+      R(h, a, b, c, d, e, f, g, K[73], L(73));
+      R(g, h, a, b, c, d, e, f, K[74], L(74));
+      R(f, g, h, a, b, c, d, e, K[75], L(75));
+      I(8); I(9); I(10); I(11);
+      R(e, f, g, h, a, b, c, d, K[76], L(76));
+      R(d, e, f, g, h, a, b, c, K[77], L(77));
+      R(c, d, e, f, g, h, a, b, K[78], L(78));
+      R(b, c, d, e, f, g, h, a, K[79], L(79));
+      I(12); I(13); I(14); I(15);
+      data += 128;
+
+      a += state[0];
+      b += state[1];
+      c += state[2];
+      d += state[3];
+      e += state[4];
+      f += state[5];
+      g += state[6];
+      h += state[7];
+      state[0] = a;
+      state[1] = b;
+      state[2] = c;
+      state[3] = d;
+      state[4] = e;
+      state[5] = f;
+      state[6] = g;
+      state[7] = h;
+
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      R(a, b, c, d, e, f, g, h, K[64], L(64));
+      R(h, a, b, c, d, e, f, g, K[65], L(65));
+      R(g, h, a, b, c, d, e, f, K[66], L(66));
+      R(f, g, h, a, b, c, d, e, K[67], L(67));
+      R(e, f, g, h, a, b, c, d, K[68], L(68));
+      R(d, e, f, g, h, a, b, c, K[69], L(69));
+      R(c, d, e, f, g, h, a, b, K[70], L(70));
+      R(b, c, d, e, f, g, h, a, K[71], L(71));
+      R(a, b, c, d, e, f, g, h, K[72], L(72));
+      R(h, a, b, c, d, e, f, g, K[73], L(73));
+      R(g, h, a, b, c, d, e, f, K[74], L(74));
+      R(f, g, h, a, b, c, d, e, K[75], L(75));
+      R(e, f, g, h, a, b, c, d, K[76], L(76));
+      R(d, e, f, g, h, a, b, c, K[77], L(77));
+      R(c, d, e, f, g, h, a, b, K[78], L(78));
+      R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+      a += state[0];
+      b += state[1];
+      c += state[2];
+      d += state[3];
+      e += state[4];
+      f += state[5];
+      g += state[6];
+      h += state[7];
+      state[0] = a;
+      state[1] = b;
+      state[2] = c;
+      state[3] = d;
+      state[4] = e;
+      state[5] = f;
+      state[6] = g;
+      state[7] = h;
+
+      nblks -= 2;
+    }
+
+  while (nblks)
+    {
+      I(0); I(1); I(2); I(3);
+      I(4); I(5); I(6); I(7);
+      I(8); I(9); I(10); I(11);
+      I(12); I(13); I(14); I(15);
+      data += 128;
+      R(a, b, c, d, e, f, g, h, K[0], W(0));
+      R(h, a, b, c, d, e, f, g, K[1], W(1));
+      R(g, h, a, b, c, d, e, f, K[2], W(2));
+      R(f, g, h, a, b, c, d, e, K[3], W(3));
+      R(e, f, g, h, a, b, c, d, K[4], W(4));
+      R(d, e, f, g, h, a, b, c, K[5], W(5));
+      R(c, d, e, f, g, h, a, b, K[6], W(6));
+      R(b, c, d, e, f, g, h, a, K[7], W(7));
+      R(a, b, c, d, e, f, g, h, K[8], W(8));
+      R(h, a, b, c, d, e, f, g, K[9], W(9));
+      R(g, h, a, b, c, d, e, f, K[10], W(10));
+      R(f, g, h, a, b, c, d, e, K[11], W(11));
+      R(e, f, g, h, a, b, c, d, K[12], W(12));
+      R(d, e, f, g, h, a, b, c, K[13], W(13));
+      R(c, d, e, f, g, h, a, b, K[14], W(14));
+      R(b, c, d, e, f, g, h, a, K[15], W(15));
+
+      R(a, b, c, d, e, f, g, h, K[16], W(16));
+      R(h, a, b, c, d, e, f, g, K[17], W(17));
+      R(g, h, a, b, c, d, e, f, K[18], W(18));
+      R(f, g, h, a, b, c, d, e, K[19], W(19));
+      R(e, f, g, h, a, b, c, d, K[20], W(20));
+      R(d, e, f, g, h, a, b, c, K[21], W(21));
+      R(c, d, e, f, g, h, a, b, K[22], W(22));
+      R(b, c, d, e, f, g, h, a, K[23], W(23));
+      R(a, b, c, d, e, f, g, h, K[24], W(24));
+      R(h, a, b, c, d, e, f, g, K[25], W(25));
+      R(g, h, a, b, c, d, e, f, K[26], W(26));
+      R(f, g, h, a, b, c, d, e, K[27], W(27));
+      R(e, f, g, h, a, b, c, d, K[28], W(28));
+      R(d, e, f, g, h, a, b, c, K[29], W(29));
+      R(c, d, e, f, g, h, a, b, K[30], W(30));
+      R(b, c, d, e, f, g, h, a, K[31], W(31));
+
+      R(a, b, c, d, e, f, g, h, K[32], W(32));
+      R(h, a, b, c, d, e, f, g, K[33], W(33));
+      R(g, h, a, b, c, d, e, f, K[34], W(34));
+      R(f, g, h, a, b, c, d, e, K[35], W(35));
+      R(e, f, g, h, a, b, c, d, K[36], W(36));
+      R(d, e, f, g, h, a, b, c, K[37], W(37));
+      R(c, d, e, f, g, h, a, b, K[38], W(38));
+      R(b, c, d, e, f, g, h, a, K[39], W(39));
+      R(a, b, c, d, e, f, g, h, K[40], W(40));
+      R(h, a, b, c, d, e, f, g, K[41], W(41));
+      R(g, h, a, b, c, d, e, f, K[42], W(42));
+      R(f, g, h, a, b, c, d, e, K[43], W(43));
+      R(e, f, g, h, a, b, c, d, K[44], W(44));
+      R(d, e, f, g, h, a, b, c, K[45], W(45));
+      R(c, d, e, f, g, h, a, b, K[46], W(46));
+      R(b, c, d, e, f, g, h, a, K[47], W(47));
+
+      R(a, b, c, d, e, f, g, h, K[48], W(48));
+      R(h, a, b, c, d, e, f, g, K[49], W(49));
+      R(g, h, a, b, c, d, e, f, K[50], W(50));
+      R(f, g, h, a, b, c, d, e, K[51], W(51));
+      R(e, f, g, h, a, b, c, d, K[52], W(52));
+      R(d, e, f, g, h, a, b, c, K[53], W(53));
+      R(c, d, e, f, g, h, a, b, K[54], W(54));
+      R(b, c, d, e, f, g, h, a, K[55], W(55));
+      R(a, b, c, d, e, f, g, h, K[56], W(56));
+      R(h, a, b, c, d, e, f, g, K[57], W(57));
+      R(g, h, a, b, c, d, e, f, K[58], W(58));
+      R(f, g, h, a, b, c, d, e, K[59], W(59));
+      R(e, f, g, h, a, b, c, d, K[60], W(60));
+      R(d, e, f, g, h, a, b, c, K[61], W(61));
+      R(c, d, e, f, g, h, a, b, K[62], W(62));
+      R(b, c, d, e, f, g, h, a, K[63], W(63));
+
+      R(a, b, c, d, e, f, g, h, K[64], L(64));
+      R(h, a, b, c, d, e, f, g, K[65], L(65));
+      R(g, h, a, b, c, d, e, f, K[66], L(66));
+      R(f, g, h, a, b, c, d, e, K[67], L(67));
+      R(e, f, g, h, a, b, c, d, K[68], L(68));
+      R(d, e, f, g, h, a, b, c, K[69], L(69));
+      R(c, d, e, f, g, h, a, b, K[70], L(70));
+      R(b, c, d, e, f, g, h, a, K[71], L(71));
+      R(a, b, c, d, e, f, g, h, K[72], L(72));
+      R(h, a, b, c, d, e, f, g, K[73], L(73));
+      R(g, h, a, b, c, d, e, f, K[74], L(74));
+      R(f, g, h, a, b, c, d, e, K[75], L(75));
+      R(e, f, g, h, a, b, c, d, K[76], L(76));
+      R(d, e, f, g, h, a, b, c, K[77], L(77));
+      R(c, d, e, f, g, h, a, b, K[78], L(78));
+      R(b, c, d, e, f, g, h, a, K[79], L(79));
+
+      a += state[0];
+      b += state[1];
+      c += state[2];
+      d += state[3];
+      e += state[4];
+      f += state[5];
+      g += state[6];
+      h += state[7];
+      state[0] = a;
+      state[1] = b;
+      state[2] = c;
+      state[3] = d;
+      state[4] = e;
+      state[5] = f;
+      state[6] = g;
+      state[7] = h;
+
+      nblks--;
+    }
+
+  return sizeof(w);
+}
+
+#endif /* ENABLE_PPC_CRYPTO_SUPPORT */
diff --git a/grub-core/lib/libgcrypt/cipher/sha512-ssse3-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sha512-ssse3-amd64.S
new file mode 100644
index 000000000..9cc308920
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha512-ssse3-amd64.S
@@ -0,0 +1,467 @@
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright (c) 2012, Intel Corporation
+;
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are
+; met:
+;
+; * Redistributions of source code must retain the above copyright
+;   notice, this list of conditions and the following disclaimer.
+;
+; * Redistributions in binary form must reproduce the above copyright
+;   notice, this list of conditions and the following disclaimer in the
+;   documentation and/or other materials provided with the
+;   distribution.
+;
+; * Neither the name of the Intel Corporation nor the names of its
+;   contributors may be used to endorse or promote products derived from
+;   this software without specific prior written permission.
+;
+;
+; THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+; EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+; IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+; PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+; CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+*/
+/*
+ * Conversion to GAS assembly and integration to libgcrypt
+ *  by Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * Note: original implementation was named as SHA512-SSE4. However, only SSSE3
+ *       is required.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512)
+
+#include "asm-common-amd64.h"
+
+.intel_syntax noprefix
+
+.text
+
+/* Virtual Registers */
+#define msg rdi /* ARG1 */
+#define digest rsi /* ARG2 */
+#define msglen rdx /* ARG3 */
+#define T1 rcx
+#define T2 r8
+#define a_64 r9
+#define b_64 r10
+#define c_64 r11
+#define d_64 r12
+#define e_64 r13
+#define f_64 r14
+#define g_64 r15
+#define h_64 rbx
+#define tmp0 rax
+
+/*
+; Local variables (stack frame)
+; Note: frame_size must be an odd multiple of 8 bytes to XMM align RSP
+*/
+#define frame_W 0 /* Message Schedule */
+#define frame_W_size (80 * 8)
+#define frame_WK ((frame_W) + (frame_W_size)) /* W[t] + K[t] | W[t+1] + K[t+1] 
*/
+#define frame_WK_size (2 * 8)
+#define frame_GPRSAVE ((frame_WK) + (frame_WK_size))
+#define frame_GPRSAVE_size (5 * 8)
+#define frame_size ((frame_GPRSAVE) + (frame_GPRSAVE_size))
+
+
+/* Useful QWORD "arrays" for simpler memory references */
+#define MSG(i)    msg    + 8*(i)               /* Input message (arg1) */
+#define DIGEST(i) digest + 8*(i)               /* Output Digest (arg2) */
+#define K_t(i)    .LK512   + 8*(i) ADD_RIP     /* SHA Constants (static mem) */
+#define W_t(i)    rsp + frame_W  + 8*(i)       /* Message Schedule (stack 
frame) */
+#define WK_2(i)   rsp + frame_WK + 8*((i) % 2) /* W[t]+K[t] (stack frame) */
+/* MSG, DIGEST, K_t, W_t are arrays */
+/* WK_2(t) points to 1 of 2 qwords at frame.WK depdending on t being odd/even 
*/
+
+#define SHA512_Round(t, a, b, c, d, e, f, g, h) \
+       /* Compute Round %%t */; \
+       mov     T1,   f        /* T1 = f */; \
+       mov     tmp0, e        /* tmp = e */; \
+       xor     T1,   g        /* T1 = f ^ g */; \
+       ror     tmp0, 23 /* 41     ; tmp = e ror 23 */; \
+       and     T1,   e        /* T1 = (f ^ g) & e */; \
+       xor     tmp0, e        /* tmp = (e ror 23) ^ e */; \
+       xor     T1,   g        /* T1 = ((f ^ g) & e) ^ g = CH(e,f,g) */; \
+       add     T1,   [WK_2(t)] /* W[t] + K[t] from message scheduler */; \
+       ror     tmp0, 4 /* 18      ; tmp = ((e ror 23) ^ e) ror 4 */; \
+       xor     tmp0, e        /* tmp = (((e ror 23) ^ e) ror 4) ^ e */; \
+       mov     T2,   a        /* T2 = a */; \
+       add     T1,   h        /* T1 = CH(e,f,g) + W[t] + K[t] + h */; \
+       ror     tmp0, 14 /* 14     ; tmp = ((((e ror23)^e)ror4)^e)ror14 = S1(e) 
*/; \
+       add     T1,   tmp0     /* T1 = CH(e,f,g) + W[t] + K[t] + S1(e) */; \
+       mov     tmp0, a        /* tmp = a */; \
+       xor     T2,   c        /* T2 = a ^ c */; \
+       and     tmp0, c        /* tmp = a & c */; \
+       and     T2,   b        /* T2 = (a ^ c) & b */; \
+       xor     T2,   tmp0     /* T2 = ((a ^ c) & b) ^ (a & c) = Maj(a,b,c) */; 
\
+       mov     tmp0, a        /* tmp = a */; \
+       ror     tmp0, 5 /* 39      ; tmp = a ror 5 */; \
+       xor     tmp0, a        /* tmp = (a ror 5) ^ a */; \
+       add     d, T1          /* e(next_state) = d + T1  */; \
+       ror     tmp0, 6 /* 34      ; tmp = ((a ror 5) ^ a) ror 6 */; \
+       xor     tmp0, a        /* tmp = (((a ror 5) ^ a) ror 6) ^ a */; \
+       lea     h, [T1 + T2]   /* a(next_state) = T1 + Maj(a,b,c) */; \
+       ror     tmp0, 28 /* 28     ; tmp = ((((a ror5)^a)ror6)^a)ror28 = S0(a) 
*/; \
+       add     h, tmp0        /* a(next_state) = T1 + Maj(a,b,c) S0(a) */
+
+#define SHA512_2Sched_2Round_sse_PART1(t, a, b, c, d, e, f, g, h) \
+       /* \
+       ; Compute rounds %%t-2 and %%t-1 \
+       ; Compute message schedule QWORDS %%t and %%t+1 \
+       ; \
+       ;   Two rounds are computed based on the values for K[t-2]+W[t-2] and \
+       ; K[t-1]+W[t-1] which were previously stored at WK_2 by the message \
+       ; scheduler. \
+       ;   The two new schedule QWORDS are stored at [W_t(%%t)] and 
[W_t(%%t+1)]. \
+       ; They are then added to their respective SHA512 constants at \
+       ; [K_t(%%t)] and [K_t(%%t+1)] and stored at dqword [WK_2(%%t)] \
+       ;   For brievity, the comments following vectored instructions only 
refer to \
+       ; the first of a pair of QWORDS. \
+       ; Eg. XMM2=W[t-2] really means XMM2={W[t-2]|W[t-1]} \
+       ;   The computation of the message schedule and the rounds are tightly \
+       ; stitched to take advantage of instruction-level parallelism. \
+       ; For clarity, integer instructions (for the rounds calculation) are 
indented \
+       ; by one tab. Vectored instructions (for the message scheduler) are 
indented \
+       ; by two tabs. \
+       */ \
+       \
+       mov     T1, f; \
+               movdqa  xmm2, [W_t(t-2)]  /* XMM2 = W[t-2] */; \
+       xor     T1,   g; \
+       and     T1,   e; \
+               movdqa  xmm0, xmm2          /* XMM0 = W[t-2] */; \
+       xor     T1,   g; \
+       add     T1,   [WK_2(t)]; \
+               movdqu  xmm5, [W_t(t-15)] /* XMM5 = W[t-15] */; \
+       mov     tmp0, e; \
+       ror     tmp0, 23 /* 41 */; \
+               movdqa  xmm3, xmm5          /* XMM3 = W[t-15] */; \
+       xor     tmp0, e; \
+       ror     tmp0, 4 /* 18 */; \
+               psrlq   xmm0, 61 - 19       /* XMM0 = W[t-2] >> 42 */; \
+       xor     tmp0, e; \
+       ror     tmp0, 14 /* 14 */; \
+               psrlq   xmm3, (8 - 7)       /* XMM3 = W[t-15] >> 1 */; \
+       add     T1,   tmp0; \
+       add     T1,   h; \
+               pxor    xmm0, xmm2          /* XMM0 = (W[t-2] >> 42) ^ W[t-2] 
*/; \
+       mov     T2,   a; \
+       xor     T2,   c; \
+               pxor    xmm3, xmm5          /* XMM3 = (W[t-15] >> 1) ^ W[t-15] 
*/; \
+       and     T2,   b; \
+       mov     tmp0, a; \
+               psrlq   xmm0, 19 - 6        /* XMM0 = ((W[t-2]>>42)^W[t-2])>>13 
*/; \
+       and     tmp0, c; \
+       xor     T2,   tmp0; \
+               psrlq   xmm3, (7 - 1)       /* XMM3 = ((W[t-15]>>1)^W[t-15])>>6 
*/; \
+       mov     tmp0, a; \
+       ror     tmp0, 5 /* 39 */; \
+               pxor    xmm0, xmm2          /* XMM0 = 
(((W[t-2]>>42)^W[t-2])>>13)^W[t-2] */; \
+       xor     tmp0, a; \
+       ror     tmp0, 6 /* 34 */; \
+               pxor    xmm3, xmm5          /* XMM3 = 
(((W[t-15]>>1)^W[t-15])>>6)^W[t-15] */; \
+       xor     tmp0, a; \
+       ror     tmp0, 28 /* 28 */; \
+               psrlq   xmm0, 6             /* XMM0 = 
((((W[t-2]>>42)^W[t-2])>>13)^W[t-2])>>6 */; \
+       add     T2,   tmp0; \
+       add     d, T1; \
+               psrlq   xmm3, 1             /* XMM3 = 
(((W[t-15]>>1)^W[t-15])>>6)^W[t-15]>>1 */; \
+       lea     h, [T1 + T2]
+
+#define SHA512_2Sched_2Round_sse_PART2(t, a, b, c, d, e, f, g, h) \
+               movdqa  xmm1, xmm2          /* XMM1 = W[t-2] */; \
+       mov     T1,   f; \
+       xor     T1,   g; \
+               movdqa  xmm4, xmm5          /* XMM4 = W[t-15] */; \
+       and     T1,   e; \
+       xor     T1,   g; \
+               psllq   xmm1, (64 - 19) - (64 - 61) /* XMM1 = W[t-2] << 42 */; \
+       add     T1,   [WK_2(t+1)]; \
+       mov     tmp0, e; \
+               psllq   xmm4, (64 - 1) - (64 - 8) /* XMM4 = W[t-15] << 7 */; \
+       ror     tmp0, 23 /* 41 */; \
+       xor     tmp0, e; \
+               pxor    xmm1, xmm2          /* XMM1 = (W[t-2] << 42)^W[t-2] */; 
\
+       ror     tmp0, 4 /* 18 */; \
+       xor     tmp0, e; \
+               pxor    xmm4, xmm5          /* XMM4 = (W[t-15]<<7)^W[t-15] */; \
+       ror     tmp0, 14 /* 14 */; \
+       add     T1,   tmp0; \
+               psllq   xmm1, (64 - 61)     /* XMM1 = ((W[t-2] << 
42)^W[t-2])<<3 */; \
+       add     T1,   h; \
+       mov     T2,   a; \
+               psllq   xmm4, (64 - 8)      /* XMM4 = 
((W[t-15]<<7)^W[t-15])<<56 */; \
+       xor     T2,   c; \
+       and     T2,   b; \
+               pxor    xmm0, xmm1          /* XMM0 = s1(W[t-2]) */; \
+       mov     tmp0, a; \
+       and     tmp0, c; \
+               movdqu  xmm1, [W_t(t- 7)] /* XMM1 = W[t-7] */; \
+       xor     T2,   tmp0; \
+               pxor    xmm3, xmm4          /* XMM3 = s0(W[t-15]) */; \
+       mov     tmp0, a; \
+               paddq   xmm0, xmm3          /* XMM0 = s1(W[t-2]) + s0(W[t-15]) 
*/; \
+       ror     tmp0, 5 /* 39 */; \
+               paddq   xmm0, [W_t(t-16)] /* XMM0 = s1(W[t-2]) + s0(W[t-15]) + 
W[t-16] */; \
+       xor     tmp0, a; \
+               paddq   xmm0, xmm1          /* XMM0 = s1(W[t-2]) + W[t-7] + 
s0(W[t-15]) + W[t-16] */; \
+       ror     tmp0, 6 /* 34 */; \
+               movdqa  [W_t(t)], xmm0     /* Store scheduled qwords */; \
+       xor     tmp0, a; \
+               paddq   xmm0, [K_t(t)]      /* Compute W[t]+K[t] */; \
+       ror     tmp0, 28 /* 28 */; \
+               movdqa  [WK_2(t)], xmm0     /* Store W[t]+K[t] for next rounds 
*/; \
+       add     T2,   tmp0; \
+       add     d, T1; \
+       lea     h, [T1 + T2]
+
+#define SHA512_2Sched_2Round_sse(t, a, b, c, d, e, f, g, h) \
+       SHA512_2Sched_2Round_sse_PART1(t, a, b, c, d, e, f, g, h); \
+       SHA512_2Sched_2Round_sse_PART2(t, h, a, b, c, d, e, f, g)
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; void sha512_sse4(const void* M, void* D, uint64_t L);
+; Purpose: Updates the SHA512 digest stored at D with the message stored in M.
+; The size of the message pointed to by M must be an integer multiple of SHA512
+;   message blocks.
+; L is the message length in SHA512 blocks.
+*/
+.globl _gcry_sha512_transform_amd64_ssse3
+ELF(.type _gcry_sha512_transform_amd64_ssse3,@function;)
+.align 16
+_gcry_sha512_transform_amd64_ssse3:
+       CFI_STARTPROC()
+       xor eax, eax
+
+       cmp msglen, 0
+       je .Lnowork
+
+       /* Allocate Stack Space */
+       sub     rsp, frame_size
+       CFI_ADJUST_CFA_OFFSET(frame_size);
+
+       /* Save GPRs */
+       mov     [rsp + frame_GPRSAVE + 8 * 0], rbx
+       mov     [rsp + frame_GPRSAVE + 8 * 1], r12
+       mov     [rsp + frame_GPRSAVE + 8 * 2], r13
+       mov     [rsp + frame_GPRSAVE + 8 * 3], r14
+       mov     [rsp + frame_GPRSAVE + 8 * 4], r15
+       CFI_REL_OFFSET(rbx, frame_GPRSAVE + 8 * 0);
+       CFI_REL_OFFSET(r12, frame_GPRSAVE + 8 * 1);
+       CFI_REL_OFFSET(r13, frame_GPRSAVE + 8 * 2);
+       CFI_REL_OFFSET(r14, frame_GPRSAVE + 8 * 3);
+       CFI_REL_OFFSET(r15, frame_GPRSAVE + 8 * 4);
+
+.Lupdateblock:
+
+       /* Load state variables */
+       mov     a_64, [DIGEST(0)]
+       mov     b_64, [DIGEST(1)]
+       mov     c_64, [DIGEST(2)]
+       mov     d_64, [DIGEST(3)]
+       mov     e_64, [DIGEST(4)]
+       mov     f_64, [DIGEST(5)]
+       mov     g_64, [DIGEST(6)]
+       mov     h_64, [DIGEST(7)]
+
+       /* BSWAP 2 QWORDS */
+       movdqa  xmm1, [.LXMM_QWORD_BSWAP ADD_RIP]
+       movdqu  xmm0, [MSG(0)]
+       pshufb  xmm0, xmm1      /* BSWAP */
+       movdqa  [W_t(0)], xmm0  /* Store Scheduled Pair */
+       paddq   xmm0, [K_t(0)]  /* Compute W[t]+K[t] */
+       movdqa  [WK_2(0)], xmm0 /* Store into WK for rounds */
+
+       #define T_2_14(t, a, b, c, d, e, f, g, h) \
+               /* BSWAP 2 QWORDS; Compute 2 Rounds */; \
+               movdqu  xmm0, [MSG(t)]; \
+               pshufb  xmm0, xmm1      /* BSWAP */; \
+               SHA512_Round(((t) - 2), a##_64, b##_64, c##_64, d##_64, \
+                                       e##_64, f##_64, g##_64, h##_64); \
+               movdqa  [W_t(t)], xmm0  /* Store Scheduled Pair */; \
+               paddq   xmm0, [K_t(t)]  /* Compute W[t]+K[t] */; \
+               SHA512_Round(((t) - 1), h##_64, a##_64, b##_64, c##_64, \
+                                       d##_64, e##_64, f##_64, g##_64); \
+               movdqa  [WK_2(t)], xmm0 /* Store W[t]+K[t] into WK */
+
+       #define T_16_78(t, a, b, c, d, e, f, g, h) \
+               SHA512_2Sched_2Round_sse((t), a##_64, b##_64, c##_64, d##_64, \
+                                             e##_64, f##_64, g##_64, h##_64)
+
+       #define T_80(t, a, b, c, d, e, f, g, h) \
+               /* Compute 2 Rounds */; \
+               SHA512_Round((t - 2), a##_64, b##_64, c##_64, d##_64, \
+                                     e##_64, f##_64, g##_64, h##_64); \
+               SHA512_Round((t - 1), h##_64, a##_64, b##_64, c##_64, \
+                                     d##_64, e##_64, f##_64, g##_64)
+
+       T_2_14(2, a, b, c, d, e, f, g, h)
+       T_2_14(4, g, h, a, b, c, d, e, f)
+       T_2_14(6, e, f, g, h, a, b, c, d)
+       T_2_14(8, c, d, e, f, g, h, a, b)
+       T_2_14(10, a, b, c, d, e, f, g, h)
+       T_2_14(12, g, h, a, b, c, d, e, f)
+       T_2_14(14, e, f, g, h, a, b, c, d)
+       T_16_78(16, c, d, e, f, g, h, a, b)
+       T_16_78(18, a, b, c, d, e, f, g, h)
+       T_16_78(20, g, h, a, b, c, d, e, f)
+       T_16_78(22, e, f, g, h, a, b, c, d)
+       T_16_78(24, c, d, e, f, g, h, a, b)
+       T_16_78(26, a, b, c, d, e, f, g, h)
+       T_16_78(28, g, h, a, b, c, d, e, f)
+       T_16_78(30, e, f, g, h, a, b, c, d)
+       T_16_78(32, c, d, e, f, g, h, a, b)
+       T_16_78(34, a, b, c, d, e, f, g, h)
+       T_16_78(36, g, h, a, b, c, d, e, f)
+       T_16_78(38, e, f, g, h, a, b, c, d)
+       T_16_78(40, c, d, e, f, g, h, a, b)
+       T_16_78(42, a, b, c, d, e, f, g, h)
+       T_16_78(44, g, h, a, b, c, d, e, f)
+       T_16_78(46, e, f, g, h, a, b, c, d)
+       T_16_78(48, c, d, e, f, g, h, a, b)
+       T_16_78(50, a, b, c, d, e, f, g, h)
+       T_16_78(52, g, h, a, b, c, d, e, f)
+       T_16_78(54, e, f, g, h, a, b, c, d)
+       T_16_78(56, c, d, e, f, g, h, a, b)
+       T_16_78(58, a, b, c, d, e, f, g, h)
+       T_16_78(60, g, h, a, b, c, d, e, f)
+       T_16_78(62, e, f, g, h, a, b, c, d)
+       T_16_78(64, c, d, e, f, g, h, a, b)
+       T_16_78(66, a, b, c, d, e, f, g, h)
+       T_16_78(68, g, h, a, b, c, d, e, f)
+       T_16_78(70, e, f, g, h, a, b, c, d)
+       T_16_78(72, c, d, e, f, g, h, a, b)
+       T_16_78(74, a, b, c, d, e, f, g, h)
+       T_16_78(76, g, h, a, b, c, d, e, f)
+       T_16_78(78, e, f, g, h, a, b, c, d)
+       T_80(80, c, d, e, f, g, h, a, b)
+
+       /* Update digest */
+       add     [DIGEST(0)], a_64
+       add     [DIGEST(1)], b_64
+       add     [DIGEST(2)], c_64
+       add     [DIGEST(3)], d_64
+       add     [DIGEST(4)], e_64
+       add     [DIGEST(5)], f_64
+       add     [DIGEST(6)], g_64
+       add     [DIGEST(7)], h_64
+
+       /* Advance to next message block */
+       add     msg, 16*8
+       dec     msglen
+       jnz     .Lupdateblock
+
+       /* Restore GPRs */
+       mov     rbx, [rsp + frame_GPRSAVE + 8 * 0]
+       mov     r12, [rsp + frame_GPRSAVE + 8 * 1]
+       mov     r13, [rsp + frame_GPRSAVE + 8 * 2]
+       mov     r14, [rsp + frame_GPRSAVE + 8 * 3]
+       mov     r15, [rsp + frame_GPRSAVE + 8 * 4]
+       CFI_RESTORE(rbx)
+       CFI_RESTORE(r12)
+       CFI_RESTORE(r13)
+       CFI_RESTORE(r14)
+       CFI_RESTORE(r15)
+
+       pxor    xmm0, xmm0
+       pxor    xmm1, xmm1
+       pxor    xmm2, xmm2
+       pxor    xmm3, xmm3
+       pxor    xmm4, xmm4
+       pxor    xmm5, xmm5
+
+       /* Burn stack */
+       mov eax, 0
+.Lerase_stack:
+       movdqu [rsp + rax], xmm0
+       add eax, 16
+       cmp eax, frame_W_size
+       jne .Lerase_stack
+       movdqu [rsp + frame_WK], xmm0
+       xor     eax, eax
+
+       /* Restore Stack Pointer */
+       add     rsp, frame_size
+       CFI_ADJUST_CFA_OFFSET(-frame_size);
+
+.Lnowork:
+       ret_spec_stop
+       CFI_ENDPROC()
+
+/*
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Binary Data
+*/
+
+.align 16
+
+/* Mask for byte-swapping a couple of qwords in an XMM register using 
(v)pshufb. */
+.LXMM_QWORD_BSWAP:
+       .octa 0x08090a0b0c0d0e0f0001020304050607
+
+/* K[t] used in SHA512 hashing */
+.LK512:
+       .quad 0x428a2f98d728ae22,0x7137449123ef65cd
+       .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+       .quad 0x3956c25bf348b538,0x59f111f1b605d019
+       .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
+       .quad 0xd807aa98a3030242,0x12835b0145706fbe
+       .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+       .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
+       .quad 0x9bdc06a725c71235,0xc19bf174cf692694
+       .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
+       .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+       .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
+       .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+       .quad 0x983e5152ee66dfab,0xa831c66d2db43210
+       .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
+       .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
+       .quad 0x06ca6351e003826f,0x142929670a0e6e70
+       .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
+       .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+       .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
+       .quad 0x81c2c92e47edaee6,0x92722c851482353b
+       .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
+       .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
+       .quad 0xd192e819d6ef5218,0xd69906245565a910
+       .quad 0xf40e35855771202a,0x106aa07032bbd1b8
+       .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
+       .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+       .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+       .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+       .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
+       .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
+       .quad 0x90befffa23631e28,0xa4506cebde82bde9
+       .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
+       .quad 0xca273eceea26619c,0xd186b8c721c0c207
+       .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+       .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
+       .quad 0x113f9804bef90dae,0x1b710b35131c471b
+       .quad 0x28db77f523047d84,0x32caab7b40c72493
+       .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+       .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+       .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha512-ssse3-i386.c 
b/grub-core/lib/libgcrypt/cipher/sha512-ssse3-i386.c
new file mode 100644
index 000000000..0fc98d8ed
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sha512-ssse3-i386.c
@@ -0,0 +1,404 @@
+/* sha512-ssse3-i386.c - i386/SSSE3 implementation of SHA-512 transform
+ * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * SHA512 Message Expansion (I2 and W2 macros) based on implementation
+ * from file "sha512-ssse3-amd64.s":
+ ************************************************************************
+ * Copyright (c) 2012, Intel Corporation
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the
+ *   distribution.
+ *
+ * * Neither the name of the Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived from
+ *   this software without specific prior written permission.
+ *
+ *
+ * THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ ************************************************************************
+ */
+
+#include <config.h>
+
+#if defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \
+    defined(HAVE_GCC_INLINE_ASM_SSSE3) && defined(USE_SHA512)
+
+#include "bufhelp.h"
+
+
+#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */
+/* Prevent compiler from issuing SSE/MMX instructions between asm blocks. */
+#  pragma GCC target("no-sse")
+#  pragma GCC target("no-mmx")
+#endif
+#if __clang__
+#  pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = 
function)
+#  pragma clang attribute push (__attribute__((target("no-mmx"))), apply_to = 
function)
+#endif
+
+
+#define ALWAYS_INLINE inline __attribute__((always_inline))
+#define NO_INLINE __attribute__((noinline))
+#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function))
+
+#define ASM_FUNC_ATTR          NO_INSTRUMENT_FUNCTION
+#define ASM_FUNC_ATTR_INLINE   ASM_FUNC_ATTR ALWAYS_INLINE
+#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE
+
+
+static const u64 K[80] __attribute__ ((aligned (16))) =
+  {
+    U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+    U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+    U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+    U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+    U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+    U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+    U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+    U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+    U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+    U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+    U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+    U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+    U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+    U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+    U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+    U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+    U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+    U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+    U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+    U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+    U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+    U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+    U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+    U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+    U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+    U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+    U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+    U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+    U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+    U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+    U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+    U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+    U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+    U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+    U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+    U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+    U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+    U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+    U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+    U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+  };
+
+static const unsigned char bshuf_mask[16] __attribute__ ((aligned (16))) =
+  { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 };
+
+
+/* SHA2 round */
+#define RA "%%mm0"
+#define RB "%%mm1"
+#define RC "%%mm2"
+#define RD "%%mm3"
+#define RE "%%mm4"
+#define RF "%%mm5"
+#define RG "%%mm6"
+#define RH "%%mm7"
+
+#define Rx(a,b,c,d,e,f,g,h,wk) \
+       asm volatile (/* Cho + Sum1 */                                  \
+                     "movq2dq "a", %%xmm2;\n\t"                        \
+                     "movq "e", "a";\n\t"                              \
+                     "movq2dq "c", %%xmm3;\n\t"                        \
+                     "movq "e", "c";\n\t"                              \
+                     "movq2dq "b", %%xmm4;\n\t"                        \
+                     "movq "e", "b";\n\t"                              \
+                     "psrlq $(41-18), "c";\n\t"                        \
+                     "pandn "g", "a";\n\t"                             \
+                     "pxor "e", "c";\n\t"                              \
+                     "pand "f", "b";\n\t"                              \
+                     "psrlq $(18-14), "c";\n\t"                        \
+                     "paddq "a", "h";\n\t"                             \
+                     wk(a)                                             \
+                     "pxor "e", "c";\n\t"                              \
+                     "paddq "b", "h";\n\t"                             \
+                     "psrlq $(14), "c";\n\t"                           \
+                     "movq "e", "b";\n\t"                              \
+                     "psllq $(50-46), "b";\n\t"                        \
+                     "paddq "a", "h";\n\t"                             \
+                     "movdq2q %%xmm2, "a";\n\t"                        \
+                     "pxor "e", "b";\n\t"                              \
+                     "psllq $(46-23), "b";\n\t"                        \
+                     "pxor "e", "b";\n\t"                              \
+                     "psllq $(23), "b";\n\t"                           \
+                     "pxor "b", "c";\n\t"                              \
+                     "movdq2q %%xmm4, "b";\n\t"                        \
+                     "paddq "c", "h";\n\t"                             \
+                     "movdq2q %%xmm3, "c";\n\t"                        \
+                     \
+                     /* Maj + Sum0 */ \
+                     "movq2dq "e", %%xmm2;\n\t"                        \
+                     "movq "a", "e";\n\t"                              \
+                     "movq2dq "g", %%xmm3;\n\t"                        \
+                     "movq "a", "g";\n\t"                              \
+                     "movq2dq "f", %%xmm4;\n\t"                        \
+                     "movq "a", "f";\n\t"                              \
+                     "psrlq $(39-34), "g";\n\t"                        \
+                     "pxor "b", "e";\n\t"                              \
+                     "pxor "a", "g";\n\t"                              \
+                     "pand "b", "f";\n\t"                              \
+                     "psrlq $(34-28), "g";\n\t"                        \
+                     "pand "c", "e";\n\t"                              \
+                     "pxor "a", "g";\n\t"                              \
+                     "paddq "h", "d";\n\t"                             \
+                     "paddq "f", "h";\n\t"                             \
+                     "movdq2q %%xmm4, "f";\n\t"                        \
+                     "psrlq $28, "g";\n\t"                             \
+                     "paddq "e", "h";\n\t"                             \
+                     "movq "a", "e";\n\t"                              \
+                     "psllq $(36-30), "e";\n\t"                        \
+                     "pxor "a", "e";\n\t"                              \
+                     "psllq $(30-25), "e";\n\t"                        \
+                     "pxor "a", "e";\n\t"                              \
+                     "psllq $(25), "e";\n\t"                           \
+                     "pxor "e", "g";\n\t"                              \
+                     "movdq2q %%xmm2, "e";\n\t"                        \
+                     "paddq "g", "h";\n\t"                             \
+                     "movdq2q %%xmm3, "g";\n\t"                        \
+                     \
+                     :                                                 \
+                     :                                                 \
+                     : "memory" )
+
+#define WK0(tmp)      "movdq2q %%xmm0, "tmp";\n\t"                     \
+                     "pshufd $0xee, %%xmm0, %%xmm0;\n\t"
+
+#define WK1(tmp)      "movdq2q %%xmm0, "tmp";\n\t"
+
+/* Message expansion */
+#define I2(i) \
+       asm volatile ("movdqu %[inbuf], %%xmm0;\n\t"                    \
+                     "pshufb %%xmm6, %%xmm0;\n\t"                      \
+                     "movdqu %%xmm0, %[w];\n\t"                        \
+                     "paddq %[k], %%xmm0;\n\t"                         \
+                     :                                                 \
+                     : [k] "m" (K[i]),                                 \
+                       [w] "m" (w[i]),                                 \
+                       [inbuf] "m" (data[(i)*8])                       \
+                     : "memory" )
+
+#define W2(i) \
+       asm volatile ("movdqu %[w_t_m_2], %%xmm2;\n\t"                  \
+                     "movdqa %%xmm2, %%xmm0;\n\t"                      \
+                     "movdqu %[w_t_m_15], %%xmm5;\n\t"                 \
+                     :                                                 \
+                     : [w_t_m_2] "m" (w[(i)-2]),                       \
+                       [w_t_m_15] "m" (w[(i)-15])                      \
+                     : "memory" );                                     \
+       asm volatile ("movdqa %%xmm5, %%xmm3;\n\t"                      \
+                     "psrlq $(61-19), %%xmm0;\n\t"                     \
+                     "psrlq $(8-7), %%xmm3;\n\t"                       \
+                     "pxor %%xmm2, %%xmm0;\n\t"                        \
+                     "pxor %%xmm5, %%xmm3;\n\t"                        \
+                     "psrlq $(19-6), %%xmm0;\n\t"                      \
+                     "psrlq $(7-1), %%xmm3;\n\t"                       \
+                     "pxor %%xmm2, %%xmm0;\n\t"                        \
+                     "pxor %%xmm5, %%xmm3;\n\t"                        \
+                     "psrlq $6, %%xmm0;\n\t"                           \
+                     "psrlq $1, %%xmm3;\n\t"                           \
+                     "movdqa %%xmm2, %%xmm1;\n\t"                      \
+                     "movdqa %%xmm5, %%xmm4;\n\t"                      \
+                     "psllq $(61-19), %%xmm1;\n\t"                     \
+                     "psllq $(8-1), %%xmm4;\n\t"                       \
+                     "pxor %%xmm2, %%xmm1;\n\t"                        \
+                     "pxor %%xmm5, %%xmm4;\n\t"                        \
+                     "psllq $(64-61), %%xmm1;\n\t"                     \
+                     "psllq $(64-8), %%xmm4;\n\t"                      \
+                     "pxor %%xmm1, %%xmm0;\n\t"                        \
+                     "movdqu %[w_t_m_16], %%xmm2;\n\t"                 \
+                     "pxor %%xmm4, %%xmm3;\n\t"                        \
+                     "movdqu %[w_t_m_7], %%xmm1;\n\t"                  \
+                     :                                                 \
+                     : [w_t_m_7] "m" (w[(i)-7]),                       \
+                       [w_t_m_16] "m" (w[(i)-16])                      \
+                     : "memory" );                                     \
+       asm volatile ("paddq %%xmm3, %%xmm0;\n\t"                       \
+                     "paddq %%xmm2, %%xmm0;\n\t"                       \
+                     "paddq %%xmm1, %%xmm0;\n\t"                       \
+                     "movdqu %%xmm0, %[w_t_m_0];\n\t"                  \
+                     "paddq %[k], %%xmm0;\n\t"                         \
+                     : [w_t_m_0] "=m" (w[(i)-0])                       \
+                     : [k] "m" (K[i])                                  \
+                     : "memory" )
+
+unsigned int ASM_FUNC_ATTR
+_gcry_sha512_transform_i386_ssse3(u64 state[8], const unsigned char *data,
+                                 size_t nblks)
+{
+  unsigned int t;
+  u64 w[80];
+
+  /* Load state to MMX registers. */
+  asm volatile ("movq 8*0(%[state]), "RA";\n\t"
+               "movq 8*1(%[state]), "RB";\n\t"
+               "movq 8*2(%[state]), "RC";\n\t"
+               "movq 8*3(%[state]), "RD";\n\t"
+               "movq 8*4(%[state]), "RE";\n\t"
+               "movq 8*5(%[state]), "RF";\n\t"
+               "movq 8*6(%[state]), "RG";\n\t"
+               "movq 8*7(%[state]), "RH";\n\t"
+               :
+               : [state] "r" (state)
+               : "memory" );
+
+  asm volatile ("movdqa %[bshuf_mask], %%xmm6;\n\t"
+               :
+               : [bshuf_mask] "m" (*bshuf_mask)
+               : "memory" );
+
+  while (nblks)
+    {
+      I2(0);
+      Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+      Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+      I2(2);
+      Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+      Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+      I2(4);
+      Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+      Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+      I2(6);
+      Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+      Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+      I2(8);
+      Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+      Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+      I2(10);
+      Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+      Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+      I2(12);
+      Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+      Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+      I2(14);
+      Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+      Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+      data += 128;
+
+      for (t = 16; t < 80; t += 16)
+       {
+         W2(t + 0);
+         Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+         Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+         W2(t + 2);
+         Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+         Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+         W2(t + 4);
+         Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+         Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+         W2(t + 6);
+         Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+         Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+         W2(t + 8);
+         Rx(RA, RB, RC, RD, RE, RF, RG, RH, WK0);
+         Rx(RH, RA, RB, RC, RD, RE, RF, RG, WK1);
+         W2(t + 10);
+         Rx(RG, RH, RA, RB, RC, RD, RE, RF, WK0);
+         Rx(RF, RG, RH, RA, RB, RC, RD, RE, WK1);
+         W2(t + 12);
+         Rx(RE, RF, RG, RH, RA, RB, RC, RD, WK0);
+         Rx(RD, RE, RF, RG, RH, RA, RB, RC, WK1);
+         W2(t + 14);
+         Rx(RC, RD, RE, RF, RG, RH, RA, RB, WK0);
+         Rx(RB, RC, RD, RE, RF, RG, RH, RA, WK1);
+       }
+
+      asm volatile ("paddq 8*0(%[state]), "RA";\n\t"
+                   "paddq 8*1(%[state]), "RB";\n\t"
+                   "paddq 8*2(%[state]), "RC";\n\t"
+                   "paddq 8*3(%[state]), "RD";\n\t"
+                   "paddq 8*4(%[state]), "RE";\n\t"
+                   "paddq 8*5(%[state]), "RF";\n\t"
+                   "paddq 8*6(%[state]), "RG";\n\t"
+                   "paddq 8*7(%[state]), "RH";\n\t"
+                   "movq "RA", 8*0(%[state]);\n\t"
+                   "movq "RB", 8*1(%[state]);\n\t"
+                   "movq "RC", 8*2(%[state]);\n\t"
+                   "movq "RD", 8*3(%[state]);\n\t"
+                   "movq "RE", 8*4(%[state]);\n\t"
+                   "movq "RF", 8*5(%[state]);\n\t"
+                   "movq "RG", 8*6(%[state]);\n\t"
+                   "movq "RH", 8*7(%[state]);\n\t"
+                   :
+                   : [state] "r" (state)
+                   : "memory" );
+
+      nblks--;
+    }
+
+  /* Clear registers */
+  asm volatile ("pxor %%xmm0, %%xmm0;\n\t"
+               "pxor %%xmm1, %%xmm1;\n\t"
+               "pxor %%xmm2, %%xmm2;\n\t"
+               "pxor %%xmm3, %%xmm3;\n\t"
+               "pxor %%xmm4, %%xmm4;\n\t"
+               "pxor %%xmm5, %%xmm5;\n\t"
+               "pxor %%xmm6, %%xmm6;\n\t"
+               "pxor %%mm0, %%mm0;\n\t"
+               "pxor %%mm1, %%mm1;\n\t"
+               "pxor %%mm2, %%mm2;\n\t"
+               "pxor %%mm3, %%mm3;\n\t"
+               "pxor %%mm4, %%mm4;\n\t"
+               "pxor %%mm5, %%mm5;\n\t"
+               "pxor %%mm6, %%mm6;\n\t"
+               "pxor %%mm7, %%mm7;\n\t"
+               "emms;\n\t"
+              :
+              :
+              : "memory" );
+
+  return sizeof(w);
+}
+
+#if __clang__
+#  pragma clang attribute pop
+#  pragma clang attribute pop
+#endif
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sha512.c 
b/grub-core/lib/libgcrypt/cipher/sha512.c
index 2163e6009..57a74664a 100644
--- a/grub-core/lib/libgcrypt/cipher/sha512.c
+++ b/grub-core/lib/libgcrypt/cipher/sha512.c
@@ -50,53 +50,452 @@
 #include <string.h>
 #include "g10lib.h"
 #include "bithelp.h"
+#include "bufhelp.h"
 #include "cipher.h"
 #include "hash-common.h"
 
+
+/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */
+#undef USE_ARM_NEON_ASM
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \
+     && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \
+     && defined(HAVE_GCC_INLINE_ASM_NEON)
+#  define USE_ARM_NEON_ASM 1
+# endif
+#endif /*ENABLE_NEON_SUPPORT*/
+
+
+/* USE_ARM_ASM indicates whether to enable ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+# define USE_ARM_ASM 1
+#endif
+
+
+/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
+#undef USE_SSSE3
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_SSSE3 1
+#endif
+
+
+/* USE_AVX indicates whether to compile with Intel AVX code. */
+#undef USE_AVX
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX 1
+#endif
+
+
+/* USE_AVX2 indicates whether to compile with Intel AVX2/rorx code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(HAVE_INTEL_SYNTAX_PLATFORM_AS) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX2 1
+#endif
+
+
+/* USE_SSSE3_I386 indicates whether to compile with Intel SSSE3/i386 code. */
+#undef USE_SSSE3_I386
+#if defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 && \
+    defined(HAVE_GCC_INLINE_ASM_SSSE3)
+# define USE_SSSE3_I386 1
+#endif
+
+
+/* USE_PPC_CRYPTO indicates whether to enable PowerPC vector crypto
+ * accelerated code. */
+#undef USE_PPC_CRYPTO
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \
+     defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC)
+#  if __GNUC__ >= 4
+#   define USE_PPC_CRYPTO 1
+#  endif
+# endif
+#endif
+
+
+/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */
+#undef USE_S390X_CRYPTO
+#if defined(HAVE_GCC_INLINE_ASM_S390X)
+# define USE_S390X_CRYPTO 1
+#endif /* USE_S390X_CRYPTO */
+
+
 typedef struct
 {
-  u64 h0, h1, h2, h3, h4, h5, h6, h7;
-  u64 nblocks;
-  byte buf[128];
-  int count;
+  u64 h[8];
+} SHA512_STATE;
+
+typedef struct
+{
+  gcry_md_block_ctx_t bctx;
+  SHA512_STATE state;
+#ifdef USE_S390X_CRYPTO
+  u64 final_len_msb, final_len_lsb; /* needs to be right after state.h[7]. */
+  int use_s390x_crypto;
+#endif
 } SHA512_CONTEXT;
 
-static void
-sha512_init (void *context)
+
+static const u64 k[] =
+  {
+    U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
+    U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
+    U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
+    U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
+    U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
+    U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
+    U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
+    U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
+    U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
+    U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
+    U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
+    U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
+    U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
+    U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
+    U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
+    U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
+    U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
+    U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
+    U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
+    U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
+    U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
+    U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
+    U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
+    U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
+    U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
+    U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
+    U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
+    U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
+    U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
+    U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
+    U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
+    U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
+    U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
+    U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
+    U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
+    U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
+    U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
+    U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
+    U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
+    U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
+  };
+
+
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and 
additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_SSSE3) || defined(USE_AVX) || defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *))
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_ARM_NEON_ASM
+unsigned int _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
+                                                const unsigned char *data,
+                                                const u64 k[], size_t 
num_blks);
+
+static unsigned int
+do_sha512_transform_armv7_neon(void *ctx, const unsigned char *data,
+                               size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_armv7_neon (&hd->state, data, k, nblks);
+}
+#endif
+
+#ifdef USE_SSSE3
+unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
+                                                void *state,
+                                                size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_ssse3(void *ctx, const unsigned char *data,
+                                size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_amd64_ssse3 (data, &hd->state, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX
+unsigned int _gcry_sha512_transform_amd64_avx(const void *input_data,
+                                              void *state,
+                                              size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx(void *ctx, const unsigned char *data,
+                              size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_amd64_avx (data, &hd->state, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_AVX2
+unsigned int _gcry_sha512_transform_amd64_avx2(const void *input_data,
+                                               void *state,
+                                               size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sha512_transform_amd64_avx2(void *ctx, const unsigned char *data,
+                               size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_amd64_avx2 (data, &hd->state, nblks)
+         + ASM_EXTRA_STACK;
+}
+#endif
+
+#ifdef USE_SSSE3_I386
+unsigned int _gcry_sha512_transform_i386_ssse3(u64 state[8],
+                                              const unsigned char *input_data,
+                                              size_t num_blks);
+
+static unsigned int
+do_sha512_transform_i386_ssse3(void *ctx, const unsigned char *data,
+                              size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_i386_ssse3 (hd->state.h, data, nblks);
+}
+#endif
+
+
+#ifdef USE_ARM_ASM
+unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
+                                        const unsigned char *data,
+                                        const u64 k[], size_t num_blks);
+
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
 {
   SHA512_CONTEXT *hd = context;
+  return _gcry_sha512_transform_arm (&hd->state, data, k, nblks);
+}
+#else
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks);
+#endif
+
+
+#ifdef USE_PPC_CRYPTO
+unsigned int _gcry_sha512_transform_ppc8(u64 state[8],
+                                        const unsigned char *input_data,
+                                        size_t num_blks);
+
+unsigned int _gcry_sha512_transform_ppc9(u64 state[8],
+                                        const unsigned char *input_data,
+                                        size_t num_blks);
 
-  hd->h0 = U64_C(0x6a09e667f3bcc908);
-  hd->h1 = U64_C(0xbb67ae8584caa73b);
-  hd->h2 = U64_C(0x3c6ef372fe94f82b);
-  hd->h3 = U64_C(0xa54ff53a5f1d36f1);
-  hd->h4 = U64_C(0x510e527fade682d1);
-  hd->h5 = U64_C(0x9b05688c2b3e6c1f);
-  hd->h6 = U64_C(0x1f83d9abfb41bd6b);
-  hd->h7 = U64_C(0x5be0cd19137e2179);
+static unsigned int
+do_sha512_transform_ppc8(void *ctx, const unsigned char *data, size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_ppc8 (hd->state.h, data, nblks);
+}
+
+static unsigned int
+do_sha512_transform_ppc9(void *ctx, const unsigned char *data, size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
+  return _gcry_sha512_transform_ppc9 (hd->state.h, data, nblks);
+}
+#endif
+
+
+#ifdef USE_S390X_CRYPTO
+#include "asm-inline-s390x.h"
+
+static unsigned int
+do_sha512_transform_s390x (void *ctx, const unsigned char *data, size_t nblks)
+{
+  SHA512_CONTEXT *hd = ctx;
 
-  hd->nblocks = 0;
-  hd->count = 0;
+  kimd_execute (KMID_FUNCTION_SHA512, hd->state.h, data, nblks * 128);
+  return 0;
 }
 
+static unsigned int
+do_sha512_final_s390x (void *ctx, const unsigned char *data, size_t datalen,
+                      u64 len_msb, u64 len_lsb)
+{
+  SHA512_CONTEXT *hd = ctx;
+
+  /* Make sure that 'final_len' is positioned at correct offset relative
+   * to 'state.h[0]'. This is because we are passing 'state.h[0]' pointer as
+   * start of parameter block to 'klmd' instruction. */
+
+  gcry_assert (offsetof (SHA512_CONTEXT, final_len_msb)
+              - offsetof (SHA512_CONTEXT, state.h[0]) == 8 * sizeof(u64));
+  gcry_assert (offsetof (SHA512_CONTEXT, final_len_lsb)
+              - offsetof (SHA512_CONTEXT, final_len_msb) == 1 * sizeof(u64));
+
+  hd->final_len_msb = len_msb;
+  hd->final_len_lsb = len_lsb;
+
+  klmd_execute (KMID_FUNCTION_SHA512, hd->state.h, data, datalen);
+  return 0;
+}
+#endif
+
+
 static void
-sha384_init (void *context)
+sha512_init_common (SHA512_CONTEXT *ctx, unsigned int flags)
 {
-  SHA512_CONTEXT *hd = context;
+  unsigned int features = _gcry_get_hw_features ();
+
+  (void)flags;
+  (void)k;
+
+  ctx->bctx.nblocks = 0;
+  ctx->bctx.nblocks_high = 0;
+  ctx->bctx.count = 0;
+  ctx->bctx.blocksize_shift = _gcry_ctz(128);
+
+  /* Order of feature checks is important here; last match will be
+   * selected.  Keep slower implementations at the top and faster at
+   * the bottom.  */
+  ctx->bctx.bwrite = do_transform_generic;
+#ifdef USE_ARM_NEON_ASM
+  if ((features & HWF_ARM_NEON) != 0)
+    ctx->bctx.bwrite = do_sha512_transform_armv7_neon;
+#endif
+#ifdef USE_SSSE3
+  if ((features & HWF_INTEL_SSSE3) != 0)
+    ctx->bctx.bwrite = do_sha512_transform_amd64_ssse3;
+#endif
+#ifdef USE_AVX
+  if ((features & HWF_INTEL_AVX) && (features & HWF_INTEL_FAST_SHLD))
+    ctx->bctx.bwrite = do_sha512_transform_amd64_avx;
+#endif
+#ifdef USE_AVX2
+  if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+    ctx->bctx.bwrite = do_sha512_transform_amd64_avx2;
+#endif
+#ifdef USE_PPC_CRYPTO
+  if ((features & HWF_PPC_VCRYPTO) != 0)
+    ctx->bctx.bwrite = do_sha512_transform_ppc8;
+  if ((features & HWF_PPC_VCRYPTO) != 0 && (features & HWF_PPC_ARCH_3_00) != 0)
+    ctx->bctx.bwrite = do_sha512_transform_ppc9;
+#endif
+#ifdef USE_SSSE3_I386
+  if ((features & HWF_INTEL_SSSE3) != 0)
+    ctx->bctx.bwrite = do_sha512_transform_i386_ssse3;
+#endif
+#ifdef USE_S390X_CRYPTO
+  ctx->use_s390x_crypto = 0;
+  if ((features & HWF_S390X_MSA) != 0)
+    {
+      if ((kimd_query () & km_function_to_mask (KMID_FUNCTION_SHA512)) &&
+         (klmd_query () & km_function_to_mask (KMID_FUNCTION_SHA512)))
+       {
+         ctx->bctx.bwrite = do_sha512_transform_s390x;
+         ctx->use_s390x_crypto = 1;
+       }
+    }
+#endif
+  (void)features;
+}
 
-  hd->h0 = U64_C(0xcbbb9d5dc1059ed8);
-  hd->h1 = U64_C(0x629a292a367cd507);
-  hd->h2 = U64_C(0x9159015a3070dd17);
-  hd->h3 = U64_C(0x152fecd8f70e5939);
-  hd->h4 = U64_C(0x67332667ffc00b31);
-  hd->h5 = U64_C(0x8eb44a8768581511);
-  hd->h6 = U64_C(0xdb0c2e0d64f98fa7);
-  hd->h7 = U64_C(0x47b5481dbefa4fa4);
 
-  hd->nblocks = 0;
-  hd->count = 0;
+static void
+sha512_init (void *context, unsigned int flags)
+{
+  SHA512_CONTEXT *ctx = context;
+  SHA512_STATE *hd = &ctx->state;
+
+  hd->h[0] = U64_C(0x6a09e667f3bcc908);
+  hd->h[1] = U64_C(0xbb67ae8584caa73b);
+  hd->h[2] = U64_C(0x3c6ef372fe94f82b);
+  hd->h[3] = U64_C(0xa54ff53a5f1d36f1);
+  hd->h[4] = U64_C(0x510e527fade682d1);
+  hd->h[5] = U64_C(0x9b05688c2b3e6c1f);
+  hd->h[6] = U64_C(0x1f83d9abfb41bd6b);
+  hd->h[7] = U64_C(0x5be0cd19137e2179);
+
+  sha512_init_common (ctx, flags);
 }
 
+static void
+sha384_init (void *context, unsigned int flags)
+{
+  SHA512_CONTEXT *ctx = context;
+  SHA512_STATE *hd = &ctx->state;
+
+  hd->h[0] = U64_C(0xcbbb9d5dc1059ed8);
+  hd->h[1] = U64_C(0x629a292a367cd507);
+  hd->h[2] = U64_C(0x9159015a3070dd17);
+  hd->h[3] = U64_C(0x152fecd8f70e5939);
+  hd->h[4] = U64_C(0x67332667ffc00b31);
+  hd->h[5] = U64_C(0x8eb44a8768581511);
+  hd->h[6] = U64_C(0xdb0c2e0d64f98fa7);
+  hd->h[7] = U64_C(0x47b5481dbefa4fa4);
+
+  sha512_init_common (ctx, flags);
+}
+
+
+static void
+sha512_256_init (void *context, unsigned int flags)
+{
+  SHA512_CONTEXT *ctx = context;
+  SHA512_STATE *hd = &ctx->state;
+
+  hd->h[0] = U64_C(0x22312194fc2bf72c);
+  hd->h[1] = U64_C(0x9f555fa3c84c64c2);
+  hd->h[2] = U64_C(0x2393b86b6f53b151);
+  hd->h[3] = U64_C(0x963877195940eabd);
+  hd->h[4] = U64_C(0x96283ee2a88effe3);
+  hd->h[5] = U64_C(0xbe5e1e2553863992);
+  hd->h[6] = U64_C(0x2b0199fc2c85b8aa);
+  hd->h[7] = U64_C(0x0eb72ddc81c52ca2);
+
+  sha512_init_common (ctx, flags);
+}
+
+
+static void
+sha512_224_init (void *context, unsigned int flags)
+{
+  SHA512_CONTEXT *ctx = context;
+  SHA512_STATE *hd = &ctx->state;
+
+  hd->h[0] = U64_C(0x8c3d37c819544da2);
+  hd->h[1] = U64_C(0x73e1996689dcd4d6);
+  hd->h[2] = U64_C(0x1dfab7ae32ff9c82);
+  hd->h[3] = U64_C(0x679dd514582f9fcf);
+  hd->h[4] = U64_C(0x0f6d2b697bd44da8);
+  hd->h[5] = U64_C(0x77e36f7304c48942);
+  hd->h[6] = U64_C(0x3f9d85a86a1d36c8);
+  hd->h[7] = U64_C(0x1112e6ad91d692a1);
+
+  sha512_init_common (ctx, flags);
+}
+
+
+
+#ifndef USE_ARM_ASM
 
 static inline u64
 ROTR (u64 x, u64 n)
@@ -131,214 +530,241 @@ Sum1 (u64 x)
 /****************
  * Transform the message W which consists of 16 64-bit-words
  */
-static void
-transform (SHA512_CONTEXT *hd, const unsigned char *data)
+static unsigned int
+do_transform_generic (void *context, const unsigned char *data, size_t nblks)
 {
-  u64 a, b, c, d, e, f, g, h;
-  u64 w[80];
-  int t;
-  static const u64 k[] =
+  SHA512_CONTEXT *ctx = context;
+  SHA512_STATE *hd = &ctx->state;
+
+  do
     {
-      U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
-      U64_C(0xb5c0fbcfec4d3b2f), U64_C(0xe9b5dba58189dbbc),
-      U64_C(0x3956c25bf348b538), U64_C(0x59f111f1b605d019),
-      U64_C(0x923f82a4af194f9b), U64_C(0xab1c5ed5da6d8118),
-      U64_C(0xd807aa98a3030242), U64_C(0x12835b0145706fbe),
-      U64_C(0x243185be4ee4b28c), U64_C(0x550c7dc3d5ffb4e2),
-      U64_C(0x72be5d74f27b896f), U64_C(0x80deb1fe3b1696b1),
-      U64_C(0x9bdc06a725c71235), U64_C(0xc19bf174cf692694),
-      U64_C(0xe49b69c19ef14ad2), U64_C(0xefbe4786384f25e3),
-      U64_C(0x0fc19dc68b8cd5b5), U64_C(0x240ca1cc77ac9c65),
-      U64_C(0x2de92c6f592b0275), U64_C(0x4a7484aa6ea6e483),
-      U64_C(0x5cb0a9dcbd41fbd4), U64_C(0x76f988da831153b5),
-      U64_C(0x983e5152ee66dfab), U64_C(0xa831c66d2db43210),
-      U64_C(0xb00327c898fb213f), U64_C(0xbf597fc7beef0ee4),
-      U64_C(0xc6e00bf33da88fc2), U64_C(0xd5a79147930aa725),
-      U64_C(0x06ca6351e003826f), U64_C(0x142929670a0e6e70),
-      U64_C(0x27b70a8546d22ffc), U64_C(0x2e1b21385c26c926),
-      U64_C(0x4d2c6dfc5ac42aed), U64_C(0x53380d139d95b3df),
-      U64_C(0x650a73548baf63de), U64_C(0x766a0abb3c77b2a8),
-      U64_C(0x81c2c92e47edaee6), U64_C(0x92722c851482353b),
-      U64_C(0xa2bfe8a14cf10364), U64_C(0xa81a664bbc423001),
-      U64_C(0xc24b8b70d0f89791), U64_C(0xc76c51a30654be30),
-      U64_C(0xd192e819d6ef5218), U64_C(0xd69906245565a910),
-      U64_C(0xf40e35855771202a), U64_C(0x106aa07032bbd1b8),
-      U64_C(0x19a4c116b8d2d0c8), U64_C(0x1e376c085141ab53),
-      U64_C(0x2748774cdf8eeb99), U64_C(0x34b0bcb5e19b48a8),
-      U64_C(0x391c0cb3c5c95a63), U64_C(0x4ed8aa4ae3418acb),
-      U64_C(0x5b9cca4f7763e373), U64_C(0x682e6ff3d6b2b8a3),
-      U64_C(0x748f82ee5defb2fc), U64_C(0x78a5636f43172f60),
-      U64_C(0x84c87814a1f0ab72), U64_C(0x8cc702081a6439ec),
-      U64_C(0x90befffa23631e28), U64_C(0xa4506cebde82bde9),
-      U64_C(0xbef9a3f7b2c67915), U64_C(0xc67178f2e372532b),
-      U64_C(0xca273eceea26619c), U64_C(0xd186b8c721c0c207),
-      U64_C(0xeada7dd6cde0eb1e), U64_C(0xf57d4f7fee6ed178),
-      U64_C(0x06f067aa72176fba), U64_C(0x0a637dc5a2c898a6),
-      U64_C(0x113f9804bef90dae), U64_C(0x1b710b35131c471b),
-      U64_C(0x28db77f523047d84), U64_C(0x32caab7b40c72493),
-      U64_C(0x3c9ebe0a15c9bebc), U64_C(0x431d67c49c100d4c),
-      U64_C(0x4cc5d4becb3e42b6), U64_C(0x597f299cfc657e2a),
-      U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
-    };
-
-  /* get values from the chaining vars */
-  a = hd->h0;
-  b = hd->h1;
-  c = hd->h2;
-  d = hd->h3;
-  e = hd->h4;
-  f = hd->h5;
-  g = hd->h6;
-  h = hd->h7;
-
-#ifdef WORDS_BIGENDIAN
-  memcpy (w, data, 128);
-#else
-  {
-    int i;
-    byte *p2;
-
-    for (i = 0, p2 = (byte *) w; i < 16; i++, p2 += 8)
-      {
-       p2[7] = *data++;
-       p2[6] = *data++;
-       p2[5] = *data++;
-       p2[4] = *data++;
-       p2[3] = *data++;
-       p2[2] = *data++;
-       p2[1] = *data++;
-       p2[0] = *data++;
-      }
-  }
-#endif
+      u64 a, b, c, d, e, f, g, h;
+      u64 w[16];
+      int t;
+
+      /* get values from the chaining vars */
+      a = hd->h[0];
+      b = hd->h[1];
+      c = hd->h[2];
+      d = hd->h[3];
+      e = hd->h[4];
+      f = hd->h[5];
+      g = hd->h[6];
+      h = hd->h[7];
+
+      for ( t = 0; t < 16; t++ )
+        w[t] = buf_get_be64(data + t * 8);
 
 #define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
 #define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
 
-  for (t = 16; t < 80; t++)
-    w[t] = S1 (w[t - 2]) + w[t - 7] + S0 (w[t - 15]) + w[t - 16];
-
-
-  for (t = 0; t < 80; )
-    {
-      u64 t1, t2;
-
-      /* Performance on a AMD Athlon(tm) Dual Core Processor 4050e
-         with gcc 4.3.3 using gcry_md_hash_buffer of each 10000 bytes
-         initialized to 0,1,2,3...255,0,... and 1000 iterations:
-
-         Not unrolled with macros:  440ms
-         Unrolled with macros:      350ms
-         Unrolled with inline:      330ms
-      */
-#if 0 /* Not unrolled.  */
-      t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t];
-      t2 = Sum0 (a) + Maj (a, b, c);
-      h = g;
-      g = f;
-      f = e;
-      e = d + t1;
-      d = c;
-      c = b;
-      b = a;
-      a = t1 + t2;
-      t++;
-#else /* Unrolled to interweave the chain variables.  */
-      t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t];
-      t2 = Sum0 (a) + Maj (a, b, c);
-      d += t1;
-      h  = t1 + t2;
-
-      t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[t+1];
-      t2 = Sum0 (h) + Maj (h, a, b);
-      c += t1;
-      g  = t1 + t2;
-
-      t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[t+2];
-      t2 = Sum0 (g) + Maj (g, h, a);
-      b += t1;
-      f  = t1 + t2;
-
-      t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[t+3];
-      t2 = Sum0 (f) + Maj (f, g, h);
-      a += t1;
-      e  = t1 + t2;
-
-      t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[t+4];
-      t2 = Sum0 (e) + Maj (e, f, g);
-      h += t1;
-      d  = t1 + t2;
-
-      t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[t+5];
-      t2 = Sum0 (d) + Maj (d, e, f);
-      g += t1;
-      c  = t1 + t2;
-
-      t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[t+6];
-      t2 = Sum0 (c) + Maj (c, d, e);
-      f += t1;
-      b  = t1 + t2;
-
-      t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[t+7];
-      t2 = Sum0 (b) + Maj (b, c, d);
-      e += t1;
-      a  = t1 + t2;
-
-      t += 8;
-#endif
-    }
-
-  /* Update chaining vars.  */
-  hd->h0 += a;
-  hd->h1 += b;
-  hd->h2 += c;
-  hd->h3 += d;
-  hd->h4 += e;
-  hd->h5 += f;
-  hd->h6 += g;
-  hd->h7 += h;
-}
-
-
-/* Update the message digest with the contents
- * of INBUF with length INLEN.
- */
-static void
-sha512_write (void *context, const void *inbuf_arg, size_t inlen)
-{
-  const unsigned char *inbuf = inbuf_arg;
-  SHA512_CONTEXT *hd = context;
-
-  if (hd->count == 128)
-    {                          /* flush the buffer */
-      transform (hd, hd->buf);
-      _gcry_burn_stack (768);
-      hd->count = 0;
-      hd->nblocks++;
-    }
-  if (!inbuf)
-    return;
-  if (hd->count)
-    {
-      for (; inlen && hd->count < 128; inlen--)
-       hd->buf[hd->count++] = *inbuf++;
-      sha512_write (context, NULL, 0);
-      if (!inlen)
-       return;
+      for (t = 0; t < 80 - 16; )
+        {
+          u64 t1, t2;
+
+          t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+          w[0] += S1 (w[14]) + w[9] + S0 (w[1]);
+          t2 = Sum0 (a) + Maj (a, b, c);
+          d += t1;
+          h = t1 + t2;
+
+          t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+          w[1] += S1 (w[15]) + w[10] + S0 (w[2]);
+          t2 = Sum0 (h) + Maj (h, a, b);
+          c += t1;
+          g  = t1 + t2;
+
+          t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+          w[2] += S1 (w[0]) + w[11] + S0 (w[3]);
+          t2 = Sum0 (g) + Maj (g, h, a);
+          b += t1;
+          f  = t1 + t2;
+
+          t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+          w[3] += S1 (w[1]) + w[12] + S0 (w[4]);
+          t2 = Sum0 (f) + Maj (f, g, h);
+          a += t1;
+          e  = t1 + t2;
+
+          t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+          w[4] += S1 (w[2]) + w[13] + S0 (w[5]);
+          t2 = Sum0 (e) + Maj (e, f, g);
+          h += t1;
+          d  = t1 + t2;
+
+          t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+          w[5] += S1 (w[3]) + w[14] + S0 (w[6]);
+          t2 = Sum0 (d) + Maj (d, e, f);
+          g += t1;
+          c  = t1 + t2;
+
+          t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+          w[6] += S1 (w[4]) + w[15] + S0 (w[7]);
+          t2 = Sum0 (c) + Maj (c, d, e);
+          f += t1;
+          b  = t1 + t2;
+
+          t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+          w[7] += S1 (w[5]) + w[0] + S0 (w[8]);
+          t2 = Sum0 (b) + Maj (b, c, d);
+          e += t1;
+          a  = t1 + t2;
+
+          t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+          w[8] += S1 (w[6]) + w[1] + S0 (w[9]);
+          t2 = Sum0 (a) + Maj (a, b, c);
+          d += t1;
+          h  = t1 + t2;
+
+          t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+          w[9] += S1 (w[7]) + w[2] + S0 (w[10]);
+          t2 = Sum0 (h) + Maj (h, a, b);
+          c += t1;
+          g  = t1 + t2;
+
+          t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+          w[10] += S1 (w[8]) + w[3] + S0 (w[11]);
+          t2 = Sum0 (g) + Maj (g, h, a);
+          b += t1;
+          f  = t1 + t2;
+
+          t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+          w[11] += S1 (w[9]) + w[4] + S0 (w[12]);
+          t2 = Sum0 (f) + Maj (f, g, h);
+          a += t1;
+          e  = t1 + t2;
+
+          t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+          w[12] += S1 (w[10]) + w[5] + S0 (w[13]);
+          t2 = Sum0 (e) + Maj (e, f, g);
+          h += t1;
+          d  = t1 + t2;
+
+          t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+          w[13] += S1 (w[11]) + w[6] + S0 (w[14]);
+          t2 = Sum0 (d) + Maj (d, e, f);
+          g += t1;
+          c  = t1 + t2;
+
+          t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+          w[14] += S1 (w[12]) + w[7] + S0 (w[15]);
+          t2 = Sum0 (c) + Maj (c, d, e);
+          f += t1;
+          b  = t1 + t2;
+
+          t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+          w[15] += S1 (w[13]) + w[8] + S0 (w[0]);
+          t2 = Sum0 (b) + Maj (b, c, d);
+          e += t1;
+          a  = t1 + t2;
+
+          t += 16;
+        }
+
+      for (; t < 80; )
+        {
+          u64 t1, t2;
+
+          t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[0];
+          t2 = Sum0 (a) + Maj (a, b, c);
+          d += t1;
+          h  = t1 + t2;
+
+          t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+1] + w[1];
+          t2 = Sum0 (h) + Maj (h, a, b);
+          c += t1;
+          g  = t1 + t2;
+
+          t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+2] + w[2];
+          t2 = Sum0 (g) + Maj (g, h, a);
+          b += t1;
+          f  = t1 + t2;
+
+          t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+3] + w[3];
+          t2 = Sum0 (f) + Maj (f, g, h);
+          a += t1;
+          e  = t1 + t2;
+
+          t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+4] + w[4];
+          t2 = Sum0 (e) + Maj (e, f, g);
+          h += t1;
+          d  = t1 + t2;
+
+          t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+5] + w[5];
+          t2 = Sum0 (d) + Maj (d, e, f);
+          g += t1;
+          c  = t1 + t2;
+
+          t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+6] + w[6];
+          t2 = Sum0 (c) + Maj (c, d, e);
+          f += t1;
+          b  = t1 + t2;
+
+          t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+7] + w[7];
+          t2 = Sum0 (b) + Maj (b, c, d);
+          e += t1;
+          a  = t1 + t2;
+
+          t1 = h + Sum1 (e) + Ch (e, f, g) + k[t+8] + w[8];
+          t2 = Sum0 (a) + Maj (a, b, c);
+          d += t1;
+          h  = t1 + t2;
+
+          t1 = g + Sum1 (d) + Ch (d, e, f) + k[t+9] + w[9];
+          t2 = Sum0 (h) + Maj (h, a, b);
+          c += t1;
+          g  = t1 + t2;
+
+          t1 = f + Sum1 (c) + Ch (c, d, e) + k[t+10] + w[10];
+          t2 = Sum0 (g) + Maj (g, h, a);
+          b += t1;
+          f  = t1 + t2;
+
+          t1 = e + Sum1 (b) + Ch (b, c, d) + k[t+11] + w[11];
+          t2 = Sum0 (f) + Maj (f, g, h);
+          a += t1;
+          e  = t1 + t2;
+
+          t1 = d + Sum1 (a) + Ch (a, b, c) + k[t+12] + w[12];
+          t2 = Sum0 (e) + Maj (e, f, g);
+          h += t1;
+          d  = t1 + t2;
+
+          t1 = c + Sum1 (h) + Ch (h, a, b) + k[t+13] + w[13];
+          t2 = Sum0 (d) + Maj (d, e, f);
+          g += t1;
+          c  = t1 + t2;
+
+          t1 = b + Sum1 (g) + Ch (g, h, a) + k[t+14] + w[14];
+          t2 = Sum0 (c) + Maj (c, d, e);
+          f += t1;
+          b  = t1 + t2;
+
+          t1 = a + Sum1 (f) + Ch (f, g, h) + k[t+15] + w[15];
+          t2 = Sum0 (b) + Maj (b, c, d);
+          e += t1;
+          a  = t1 + t2;
+
+          t += 16;
+        }
+
+      /* Update chaining vars.  */
+      hd->h[0] += a;
+      hd->h[1] += b;
+      hd->h[2] += c;
+      hd->h[3] += d;
+      hd->h[4] += e;
+      hd->h[5] += f;
+      hd->h[6] += g;
+      hd->h[7] += h;
+
+      data += 128;
     }
+  while (--nblks);
 
-  while (inlen >= 128)
-    {
-      transform (hd, inbuf);
-      hd->count = 0;
-      hd->nblocks++;
-      inlen -= 128;
-      inbuf += 128;
-    }
-  _gcry_burn_stack (768);
-  for (; inlen && hd->count < 128; inlen--)
-    hd->buf[hd->count++] = *inbuf++;
+  return (8 + 16) * sizeof(u64) + sizeof(u32) + 3 * sizeof(void*);
 }
+#endif /*!USE_ARM_ASM*/
 
 
 /* The routine final terminates the computation and
@@ -353,18 +779,22 @@ static void
 sha512_final (void *context)
 {
   SHA512_CONTEXT *hd = context;
-  u64 t, msb, lsb;
+  unsigned int burn;
+  u64 t, th, msb, lsb;
   byte *p;
 
-  sha512_write (context, NULL, 0); /* flush */ ;
+  t = hd->bctx.nblocks;
+  /* if (sizeof t == sizeof hd->bctx.nblocks) */
+  th = hd->bctx.nblocks_high;
+  /* else */
+  /*   th = hd->bctx.nblocks >> 64; In case we ever use u128  */
 
-  t = hd->nblocks;
   /* multiply by 128 to make a byte count */
   lsb = t << 7;
-  msb = t >> 57;
+  msb = (th << 7) | (t >> 57);
   /* add the count */
   t = lsb;
-  if ((lsb += hd->count) < t)
+  if ((lsb += hd->bctx.count) < t)
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
@@ -372,50 +802,41 @@ sha512_final (void *context)
   msb <<= 3;
   msb |= t >> 61;
 
-  if (hd->count < 112)
-    {                          /* enough room */
-      hd->buf[hd->count++] = 0x80;     /* pad */
-      while (hd->count < 112)
-       hd->buf[hd->count++] = 0;       /* pad */
+  if (0)
+    { }
+#ifdef USE_S390X_CRYPTO
+  else if (hd->use_s390x_crypto)
+    {
+      burn = do_sha512_final_s390x (hd, hd->bctx.buf, hd->bctx.count, msb, 
lsb);
     }
+#endif
   else
-    {                          /* need one extra block */
-      hd->buf[hd->count++] = 0x80;     /* pad character */
-      while (hd->count < 128)
-       hd->buf[hd->count++] = 0;
-      sha512_write (context, NULL, 0); /* flush */ ;
-      memset (hd->buf, 0, 112);        /* fill next block with zeroes */
+    {
+      if (hd->bctx.count < 112)
+       {
+         /* enough room */
+         hd->bctx.buf[hd->bctx.count++] = 0x80;  /* pad */
+         if (hd->bctx.count < 112)
+           memset (&hd->bctx.buf[hd->bctx.count], 0, 112 - hd->bctx.count);
+       }
+      else
+       {
+         /* need one extra block */
+         hd->bctx.buf[hd->bctx.count++] = 0x80;  /* pad character */
+         if (hd->bctx.count < 128)
+           memset (&hd->bctx.buf[hd->bctx.count], 0, 128 - hd->bctx.count);
+         hd->bctx.count = 128;
+         _gcry_md_block_write (context, NULL, 0); /* flush */
+         memset (hd->bctx.buf, 0, 112);  /* fill next block with zeroes */
+       }
+      /* append the 128 bit count */
+      buf_put_be64(hd->bctx.buf + 112, msb);
+      buf_put_be64(hd->bctx.buf + 120, lsb);
+      burn = (*hd->bctx.bwrite) (hd, hd->bctx.buf, 1);
     }
-  /* append the 128 bit count */
-  hd->buf[112] = msb >> 56;
-  hd->buf[113] = msb >> 48;
-  hd->buf[114] = msb >> 40;
-  hd->buf[115] = msb >> 32;
-  hd->buf[116] = msb >> 24;
-  hd->buf[117] = msb >> 16;
-  hd->buf[118] = msb >> 8;
-  hd->buf[119] = msb;
-
-  hd->buf[120] = lsb >> 56;
-  hd->buf[121] = lsb >> 48;
-  hd->buf[122] = lsb >> 40;
-  hd->buf[123] = lsb >> 32;
-  hd->buf[124] = lsb >> 24;
-  hd->buf[125] = lsb >> 16;
-  hd->buf[126] = lsb >> 8;
-  hd->buf[127] = lsb;
-  transform (hd, hd->buf);
-  _gcry_burn_stack (768);
-
-  p = hd->buf;
-#ifdef WORDS_BIGENDIAN
-#define X(a) do { *(u64*)p = hd->h##a ; p += 8; } while (0)
-#else /* little endian */
-#define X(a) do { *p++ = hd->h##a >> 56; *p++ = hd->h##a >> 48;              \
-                  *p++ = hd->h##a >> 40; *p++ = hd->h##a >> 32;              \
-                  *p++ = hd->h##a >> 24; *p++ = hd->h##a >> 16;              \
-                  *p++ = hd->h##a >> 8;  *p++ = hd->h##a; } while (0)
-#endif
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_be64(p, hd->state.h[a]); p += 8; } while (0)
   X (0);
   X (1);
   X (2);
@@ -427,13 +848,96 @@ sha512_final (void *context)
   X (6);
   X (7);
 #undef X
+
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
 }
 
 static byte *
 sha512_read (void *context)
 {
   SHA512_CONTEXT *hd = (SHA512_CONTEXT *) context;
-  return hd->buf;
+  return hd->bctx.buf;
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer iov
+ * into outbuf which must have a size of 64 bytes.  */
+static void
+_gcry_sha512_hash_buffers (void *outbuf, size_t nbytes,
+                          const gcry_buffer_t *iov, int iovcnt)
+{
+  SHA512_CONTEXT hd;
+
+  (void)nbytes;
+
+  sha512_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
+  sha512_final (&hd);
+  memcpy (outbuf, hd.bctx.buf, 64);
+}
+
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer iov
+ * into outbuf which must have a size of 48 bytes.  */
+static void
+_gcry_sha384_hash_buffers (void *outbuf, size_t nbytes,
+                          const gcry_buffer_t *iov, int iovcnt)
+{
+  SHA512_CONTEXT hd;
+
+  (void)nbytes;
+
+  sha384_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
+  sha512_final (&hd);
+  memcpy (outbuf, hd.bctx.buf, 48);
+}
+
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer iov
+ * into outbuf which must have a size of 32 bytes.  */
+static void
+_gcry_sha512_256_hash_buffers (void *outbuf, size_t nbytes,
+                              const gcry_buffer_t *iov, int iovcnt)
+{
+  SHA512_CONTEXT hd;
+
+  (void)nbytes;
+
+  sha512_256_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
+  sha512_final (&hd);
+  memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer iov
+ * into outbuf which must have a size of 28 bytes.  */
+static void
+_gcry_sha512_224_hash_buffers (void *outbuf, size_t nbytes,
+                              const gcry_buffer_t *iov, int iovcnt)
+{
+  SHA512_CONTEXT hd;
+
+  (void)nbytes;
+
+  sha512_224_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
+  sha512_final (&hd);
+  memcpy (outbuf, hd.bctx.buf, 28);
 }
 
 
@@ -546,6 +1050,102 @@ selftests_sha512 (int extended, selftest_report_func_t 
report)
   return GPG_ERR_SELFTEST_FAILED;
 }
 
+static gpg_err_code_t
+selftests_sha512_224 (int extended, selftest_report_func_t report)
+{
+  const char *what;
+  const char *errtxt;
+
+  what = "short string";
+  errtxt = _gcry_hash_selftest_check_one
+    (GCRY_MD_SHA512_224, 0,
+     "abc", 3,
+     "\x46\x34\x27\x0F\x70\x7B\x6A\x54\xDA\xAE\x75\x30\x46\x08\x42\xE2"
+     "\x0E\x37\xED\x26\x5C\xEE\xE9\xA4\x3E\x89\x24\xAA",
+     28);
+  if (errtxt)
+    goto failed;
+
+  if (extended)
+    {
+      what = "long string";
+      errtxt = _gcry_hash_selftest_check_one
+        (GCRY_MD_SHA512_224, 0,
+         "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+         "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+         "\x23\xFE\xC5\xBB\x94\xD6\x0B\x23\x30\x81\x92\x64\x0B\x0C\x45\x33"
+         "\x35\xD6\x64\x73\x4F\xE4\x0E\x72\x68\x67\x4A\xF9",
+         28);
+      if (errtxt)
+        goto failed;
+
+      what = "one million \"a\"";
+      errtxt = _gcry_hash_selftest_check_one
+        (GCRY_MD_SHA512_224, 1,
+         NULL, 0,
+         "\x37\xab\x33\x1d\x76\xf0\xd3\x6d\xe4\x22\xbd\x0e\xde\xb2\x2a\x28"
+         "\xac\xcd\x48\x7b\x7a\x84\x53\xae\x96\x5d\xd2\x87",
+         28);
+      if (errtxt)
+        goto failed;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("digest", GCRY_MD_SHA512_224, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+static gpg_err_code_t
+selftests_sha512_256 (int extended, selftest_report_func_t report)
+{
+  const char *what;
+  const char *errtxt;
+
+  what = "short string";
+  errtxt = _gcry_hash_selftest_check_one
+    (GCRY_MD_SHA512_256, 0,
+     "abc", 3,
+     "\x53\x04\x8E\x26\x81\x94\x1E\xF9\x9B\x2E\x29\xB7\x6B\x4C\x7D\xAB"
+     "\xE4\xC2\xD0\xC6\x34\xFC\x6D\x46\xE0\xE2\xF1\x31\x07\xE7\xAF\x23",
+     32);
+  if (errtxt)
+    goto failed;
+
+  if (extended)
+    {
+      what = "long string";
+      errtxt = _gcry_hash_selftest_check_one
+        (GCRY_MD_SHA512_256, 0,
+         "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+         "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112,
+         "\x39\x28\xE1\x84\xFB\x86\x90\xF8\x40\xDA\x39\x88\x12\x1D\x31\xBE"
+         "\x65\xCB\x9D\x3E\xF8\x3E\xE6\x14\x6F\xEA\xC8\x61\xE1\x9B\x56\x3A",
+         32);
+      if (errtxt)
+        goto failed;
+
+      what = "one million \"a\"";
+      errtxt = _gcry_hash_selftest_check_one
+        (GCRY_MD_SHA512_256, 1,
+         NULL, 0,
+         "\x9a\x59\xa0\x52\x93\x01\x87\xa9\x70\x38\xca\xe6\x92\xf3\x07\x08"
+         "\xaa\x64\x91\x92\x3e\xf5\x19\x43\x94\xdc\x68\xd5\x6c\x74\xfb\x21",
+         32);
+      if (errtxt)
+        goto failed;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("digest", GCRY_MD_SHA512_256, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
 
 /* Run a full self-test for ALGO and return 0 on success.  */
 static gpg_err_code_t
@@ -561,6 +1161,12 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
     case GCRY_MD_SHA512:
       ec = selftests_sha512 (extended, report);
       break;
+    case GCRY_MD_SHA512_224:
+      ec = selftests_sha512_224 (extended, report);
+      break;
+    case GCRY_MD_SHA512_256:
+      ec = selftests_sha512_256 (extended, report);
+      break;
     default:
       ec = GPG_ERR_DIGEST_ALGO;
       break;
@@ -572,58 +1178,112 @@ run_selftests (int algo, int extended, 
selftest_report_func_t report)
 
 
 
-static byte sha512_asn[] =     /* Object ID is 2.16.840.1.101.3.4.2.3 */
+static const byte sha512_asn[] =       /* Object ID is 2.16.840.1.101.3.4.2.3 
*/
   {
     0x30, 0x51, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
     0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x03, 0x05,
     0x00, 0x04, 0x40
   };
 
-static gcry_md_oid_spec_t oid_spec_sha512[] =
+static const gcry_md_oid_spec_t oid_spec_sha512[] =
   {
     { "2.16.840.1.101.3.4.2.3" },
 
     /* PKCS#1 sha512WithRSAEncryption */
     { "1.2.840.113549.1.1.13" },
+    /* ANSI X9.62  ecdsaWithSHA512 */
+    { "1.2.840.10045.4.3.4" },
 
     { NULL }
   };
 
-gcry_md_spec_t _gcry_digest_spec_sha512 =
+const gcry_md_spec_t _gcry_digest_spec_sha512 =
   {
+    GCRY_MD_SHA512, {0, 1},
     "SHA512", sha512_asn, DIM (sha512_asn), oid_spec_sha512, 64,
-    sha512_init, sha512_write, sha512_final, sha512_read,
+    sha512_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+    _gcry_sha512_hash_buffers,
     sizeof (SHA512_CONTEXT),
-  };
-md_extra_spec_t _gcry_digest_extraspec_sha512 =
-  {
     run_selftests
   };
 
-static byte sha384_asn[] =     /* Object ID is 2.16.840.1.101.3.4.2.2 */
+static const byte sha384_asn[] =       /* Object ID is 2.16.840.1.101.3.4.2.2 
*/
   {
     0x30, 0x41, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
     0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x02, 0x05,
     0x00, 0x04, 0x30
   };
 
-static gcry_md_oid_spec_t oid_spec_sha384[] =
+static const gcry_md_oid_spec_t oid_spec_sha384[] =
   {
     { "2.16.840.1.101.3.4.2.2" },
 
     /* PKCS#1 sha384WithRSAEncryption */
     { "1.2.840.113549.1.1.12" },
 
+    /* SHA384WithECDSA: RFC 7427 (A.3.3.) */
+    { "1.2.840.10045.4.3.3" },
+
+    /* ANSI X9.62  ecdsaWithSHA384 */
+    { "1.2.840.10045.4.3.3" },
+
     { NULL },
   };
 
-gcry_md_spec_t _gcry_digest_spec_sha384 =
+const gcry_md_spec_t _gcry_digest_spec_sha384 =
   {
+    GCRY_MD_SHA384, {0, 1},
     "SHA384", sha384_asn, DIM (sha384_asn), oid_spec_sha384, 48,
-    sha384_init, sha512_write, sha512_final, sha512_read,
+    sha384_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+    _gcry_sha384_hash_buffers,
+    sizeof (SHA512_CONTEXT),
+    run_selftests
+  };
+
+static const byte sha512_256_asn[] =
+  {
+    0x30, 0x31, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
+    0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x06, 0x05,
+    0x00, 0x04, 0x20
+  };
+
+static const gcry_md_oid_spec_t oid_spec_sha512_256[] =
+  {
+    { "2.16.840.1.101.3.4.2.6" },
+
+    { NULL },
+  };
+
+const gcry_md_spec_t _gcry_digest_spec_sha512_256 =
+  {
+    GCRY_MD_SHA512_256, {0, 1},
+    "SHA512_256", sha512_256_asn, DIM (sha512_256_asn), oid_spec_sha512_256, 
32,
+    sha512_256_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+    _gcry_sha512_256_hash_buffers,
     sizeof (SHA512_CONTEXT),
+    run_selftests
   };
-md_extra_spec_t _gcry_digest_extraspec_sha384 =
+
+static const byte sha512_224_asn[] =
   {
+    0x30, 0x2d, 0x30, 0x0d, 0x06, 0x09, 0x60, 0x86,
+    0x48, 0x01, 0x65, 0x03, 0x04, 0x02, 0x05, 0x05,
+    0x00, 0x04, 0x1c
+  };
+
+static const gcry_md_oid_spec_t oid_spec_sha512_224[] =
+  {
+    { "2.16.840.1.101.3.4.2.5" },
+
+    { NULL },
+  };
+
+const gcry_md_spec_t _gcry_digest_spec_sha512_224 =
+  {
+    GCRY_MD_SHA512_224, {0, 1},
+    "SHA512_224", sha512_224_asn, DIM (sha512_224_asn), oid_spec_sha512_224, 
28,
+    sha512_224_init, _gcry_md_block_write, sha512_final, sha512_read, NULL,
+    _gcry_sha512_224_hash_buffers,
+    sizeof (SHA512_CONTEXT),
     run_selftests
   };
diff --git a/grub-core/lib/libgcrypt/cipher/sm3-aarch64.S 
b/grub-core/lib/libgcrypt/cipher/sm3-aarch64.S
new file mode 100644
index 000000000..3fb890063
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sm3-aarch64.S
@@ -0,0 +1,657 @@
+/* sm3-aarch64.S - ARMv8/AArch64 accelerated SM3 transform function
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__) && \
+    defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+    defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \
+    defined(USE_SM3)
+
+.cpu generic+simd
+
+/* Constants */
+
+.text
+.align 4
+ELF(.type _gcry_sm3_aarch64_consts,@object)
+_gcry_sm3_aarch64_consts:
+.LKtable:
+  .long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb
+  .long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc
+  .long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce
+  .long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6
+  .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
+  .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
+  .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
+  .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+  .long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53
+  .long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d
+  .long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4
+  .long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43
+  .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
+  .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
+  .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
+  .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+ELF(.size _gcry_sm3_aarch64_consts,.-_gcry_sm3_aarch64_consts)
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+#define state_h5 20
+#define state_h6 24
+#define state_h7 28
+
+/* Stack structure */
+
+#define STACK_W_SIZE        (32 * 2 * 3)
+
+#define STACK_W             (0)
+#define STACK_SIZE          (STACK_W + STACK_W_SIZE)
+
+/* Register macros */
+
+#define RSTATE x0
+#define RDATA  x1
+#define RNBLKS x2
+#define RKPTR  x28
+#define RFRAME x29
+
+#define ra w3
+#define rb w4
+#define rc w5
+#define rd w6
+#define re w7
+#define rf w8
+#define rg w9
+#define rh w10
+
+#define t0 w11
+#define t1 w12
+#define t2 w13
+#define t3 w14
+#define t4 w15
+#define t5 w16
+#define t6 w17
+
+#define k_even w19
+#define k_odd w20
+
+#define addr0 x21
+#define addr1 x22
+
+#define s0 w23
+#define s1 w24
+#define s2 w25
+#define s3 w26
+
+#define W0 v0
+#define W1 v1
+#define W2 v2
+#define W3 v3
+#define W4 v4
+#define W5 v5
+
+#define XTMP0 v6
+#define XTMP1 v7
+#define XTMP2 v16
+#define XTMP3 v17
+#define XTMP4 v18
+#define XTMP5 v19
+#define XTMP6 v20
+
+/* Helper macros. */
+
+#define _(...) /*_*/
+
+#define clear_vec(x) \
+        movi x.8h, #0;
+
+#define rolw(o, a, n) \
+        ror o, a, #(32 - n);
+
+/* Round function macros. */
+
+#define GG1_1(x, y, z, o, t) \
+        eor o, x, y;
+#define GG1_2(x, y, z, o, t) \
+        eor o, o, z;
+#define GG1_3(x, y, z, o, t)
+
+#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t)
+#define FF1_2(x, y, z, o, t)
+#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t)
+
+#define GG2_1(x, y, z, o, t) \
+        bic o, z, x;
+#define GG2_2(x, y, z, o, t) \
+        and t, y, x;
+#define GG2_3(x, y, z, o, t) \
+        eor o, o, t;
+
+#define FF2_1(x, y, z, o, t) \
+        eor o, x, y;
+#define FF2_2(x, y, z, o, t) \
+        and t, x, y; \
+        and o, o, z;
+#define FF2_3(x, y, z, o, t) \
+        eor o, o, t;
+
+#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, 
iop_param) \
+        K_LOAD(round); \
+        ldr t5, [sp, #(wtype##_W1_ADDR(round, widx))]; \
+        rolw(t0, a, 12);                              /* rol(a, 12) => t0 */ \
+      IOP(1, iop_param); \
+        FF##i##_1(a, b, c, t1, t2); \
+        ldr t6, [sp, #(wtype##_W1W2_ADDR(round, widx))]; \
+        add k, k, e; \
+      IOP(2, iop_param); \
+        GG##i##_1(e, f, g, t3, t4); \
+        FF##i##_2(a, b, c, t1, t2); \
+      IOP(3, iop_param); \
+        add k, k, t0; \
+        add h, h, t5; \
+        add d, d, t6;                                 /* w1w2 + d => d */ \
+      IOP(4, iop_param); \
+        rolw(k, k, 7);                                /* rol (t0 + e + t), 7) 
=> k */ \
+        GG##i##_2(e, f, g, t3, t4); \
+        add h, h, k;                                  /* h + w1 + k => h */ \
+      IOP(5, iop_param); \
+        FF##i##_3(a, b, c, t1, t2); \
+        eor t0, t0, k;                                /* k ^ t0 => t0 */ \
+        GG##i##_3(e, f, g, t3, t4); \
+        add d, d, t1;                                 /* FF(a,b,c) + d => d */ 
\
+      IOP(6, iop_param); \
+        add t3, t3, h;                                /* GG(e,f,g) + h => t3 
*/ \
+        rolw(b, b, 9);                                /* rol(b, 9) => b */ \
+        eor h, t3, t3, ror #(32-9); \
+      IOP(7, iop_param); \
+        add d, d, t0;                                 /* t0 + d => d */ \
+        rolw(f, f, 19);                               /* rol(f, 19) => f */ \
+      IOP(8, iop_param); \
+        eor h, h, t3, ror #(32-17);                   /* P0(t3) => h */ \
+
+#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, 
iop_param) \
+        R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, 
wtype, IOP, iop_param)
+
+#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, 
iop_param) \
+        R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, 
wtype, IOP, iop_param)
+
+#define KL(round) \
+        ldp k_even, k_odd, [RKPTR, #(4*(round))];
+
+/* Input expansion macros. */
+
+/* Byte-swapped input address. */
+#define IW_W_ADDR(round, widx, offs) \
+        (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))
+
+/* Expanded input address. */
+#define XW_W_ADDR(round, widx, offs) \
+        (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))
+
+/* Rounds 1-12, byte-swapped input block addresses. */
+#define IW_W1_ADDR(round, widx)   IW_W_ADDR(round, widx, 32)
+#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48)
+
+/* Rounds 1-12, expanded input block addresses. */
+#define XW_W1_ADDR(round, widx)   XW_W_ADDR(round, widx, 0)
+#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16)
+
+/* Input block loading.
+ * Interleaving within round function needed for in-order CPUs. */
+#define LOAD_W_VEC_1_1() \
+        add addr0, sp, #IW_W1_ADDR(0, 0);
+#define LOAD_W_VEC_1_2() \
+        add addr1, sp, #IW_W1_ADDR(4, 0);
+#define LOAD_W_VEC_1_3() \
+        ld1 {W0.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_4() \
+        ld1 {W1.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_5() \
+        ld1 {W2.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_6() \
+        ld1 {W3.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_7() \
+        rev32 XTMP0.16b, W0.16b;
+#define LOAD_W_VEC_1_8() \
+        rev32 XTMP1.16b, W1.16b;
+#define LOAD_W_VEC_2_1() \
+        rev32 XTMP2.16b, W2.16b;
+#define LOAD_W_VEC_2_2() \
+        rev32 XTMP3.16b, W3.16b;
+#define LOAD_W_VEC_2_3() \
+        eor XTMP4.16b, XTMP1.16b, XTMP0.16b;
+#define LOAD_W_VEC_2_4() \
+        eor XTMP5.16b, XTMP2.16b, XTMP1.16b;
+#define LOAD_W_VEC_2_5() \
+        st1 {XTMP0.16b}, [addr0], #16;
+#define LOAD_W_VEC_2_6() \
+        st1 {XTMP4.16b}, [addr0]; \
+        add addr0, sp, #IW_W1_ADDR(8, 0);
+#define LOAD_W_VEC_2_7() \
+        eor XTMP6.16b, XTMP3.16b, XTMP2.16b;
+#define LOAD_W_VEC_2_8() \
+        ext W0.16b, XTMP0.16b, XTMP0.16b, #8;  /* W0: xx, w0, xx, xx */
+#define LOAD_W_VEC_3_1() \
+        mov W2.16b, XTMP1.16b;                 /* W2: xx, w6, w5, w4 */
+#define LOAD_W_VEC_3_2() \
+        st1 {XTMP1.16b}, [addr1], #16;
+#define LOAD_W_VEC_3_3() \
+        st1 {XTMP5.16b}, [addr1]; \
+        ext W1.16b, XTMP0.16b, XTMP0.16b, #4;  /* W1: xx, w3, w2, w1 */
+#define LOAD_W_VEC_3_4() \
+        ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */
+#define LOAD_W_VEC_3_5() \
+        ext W4.16b, XTMP2.16b, XTMP3.16b, #8;  /* W4: xx, w12, w11, w10 */
+#define LOAD_W_VEC_3_6() \
+        st1 {XTMP2.16b}, [addr0], #16;
+#define LOAD_W_VEC_3_7() \
+        st1 {XTMP6.16b}, [addr0];
+#define LOAD_W_VEC_3_8() \
+        ext W5.16b, XTMP3.16b, XTMP3.16b, #4;  /* W5: xx, w15, w14, w13 */
+
+#define LOAD_W_VEC_1(iop_num, ...) \
+        LOAD_W_VEC_1_##iop_num()
+#define LOAD_W_VEC_2(iop_num, ...) \
+        LOAD_W_VEC_2_##iop_num()
+#define LOAD_W_VEC_3(iop_num, ...) \
+        LOAD_W_VEC_3_##iop_num()
+
+/* Message scheduling. Note: 3 words per vector register.
+ * Interleaving within round function needed for in-order CPUs. */
+#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \
+        /* Load (w[i - 16]) => XTMP0 */ \
+        /* Load (w[i - 13]) => XTMP5 */ \
+        ext XTMP0.16b, w0.16b, w0.16b, #12;    /* XTMP0: w0, xx, xx, xx */
+#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \
+        ext XTMP5.16b, w1.16b, w1.16b, #12;
+#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \
+        ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */
+#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \
+        ext XTMP5.16b, XTMP5.16b, w2.16b, #12;
+#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \
+        /* w[i - 9] == w3 */ \
+        /* W3 ^ XTMP0 => XTMP0 */ \
+        eor XTMP0.16b, XTMP0.16b, w3.16b;
+#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \
+        /* w[i - 3] == w5 */ \
+        /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \
+        /* rol(XTMP5, 7) => XTMP1 */ \
+        add addr0, sp, #XW_W1_ADDR((round), 0); \
+        shl XTMP2.4s, w5.4s, #15;
+#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \
+        shl XTMP1.4s, XTMP5.4s, #7;
+#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \
+        sri XTMP2.4s, w5.4s, #(32-15);
+#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \
+        sri XTMP1.4s, XTMP5.4s, #(32-7);
+#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \
+        eor XTMP0.16b, XTMP0.16b, XTMP2.16b;
+#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \
+        /* w[i - 6] == W4 */ \
+        /* W4 ^ XTMP1 => XTMP1 */ \
+        eor XTMP1.16b, XTMP1.16b, w4.16b;
+#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \
+        /* P1(XTMP0) ^ XTMP1 => W0 */ \
+        shl XTMP3.4s, XTMP0.4s, #15;
+#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \
+        shl XTMP4.4s, XTMP0.4s, #23;
+#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \
+        eor w0.16b, XTMP1.16b, XTMP0.16b;
+#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \
+        sri XTMP3.4s, XTMP0.4s, #(32-15);
+#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \
+        sri XTMP4.4s, XTMP0.4s, #(32-23);
+#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \
+        eor w0.16b, w0.16b, XTMP3.16b;
+#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \
+        /* Load (w[i - 3]) => XTMP2 */ \
+        ext XTMP2.16b, w4.16b, w4.16b, #12;
+#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \
+        eor w0.16b, w0.16b, XTMP4.16b;
+#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \
+        ext XTMP2.16b, XTMP2.16b, w5.16b, #12;
+#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \
+        /* W1 ^ W2 => XTMP3 */ \
+        eor XTMP3.16b, XTMP2.16b, w0.16b;
+#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5)
+#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \
+        st1 { XTMP2.16b-XTMP3.16b }, [addr0];
+#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5)
+
+#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \
+        SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5)
+#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \
+        SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5)
+#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \
+        SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5)
+
+#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \
+        SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0)
+#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \
+        SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0)
+#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \
+        SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0)
+
+#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \
+        SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1)
+#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \
+        SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1)
+#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \
+        SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1)
+
+#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \
+        SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2)
+#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \
+        SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2)
+#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \
+        SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2)
+
+#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \
+        SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3)
+#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \
+        SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3)
+#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \
+        SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3)
+
+#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \
+        SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4)
+#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \
+        SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4)
+#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \
+        SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4)
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sm3_transform_aarch64 (void *ctx, const unsigned char *data,
+ *                              size_t nblks)
+ */
+.align 3
+.globl _gcry_sm3_transform_aarch64
+ELF(.type _gcry_sm3_transform_aarch64,%function;)
+_gcry_sm3_transform_aarch64:
+  CFI_STARTPROC();
+
+  ldp ra, rb, [RSTATE, #0];
+  ldp rc, rd, [RSTATE, #8];
+  ldp re, rf, [RSTATE, #16];
+  ldp rg, rh, [RSTATE, #24];
+
+  stp x28, x29, [sp, #-16]!;
+  CFI_ADJUST_CFA_OFFSET(16);
+  CFI_REG_ON_STACK(28, 0);
+  CFI_REG_ON_STACK(29, 8);
+  stp x19, x20, [sp, #-16]!;
+  CFI_ADJUST_CFA_OFFSET(16);
+  CFI_REG_ON_STACK(19, 0);
+  CFI_REG_ON_STACK(20, 8);
+  stp x21, x22, [sp, #-16]!;
+  CFI_ADJUST_CFA_OFFSET(16);
+  CFI_REG_ON_STACK(21, 0);
+  CFI_REG_ON_STACK(22, 8);
+  stp x23, x24, [sp, #-16]!;
+  CFI_ADJUST_CFA_OFFSET(16);
+  CFI_REG_ON_STACK(23, 0);
+  CFI_REG_ON_STACK(24, 8);
+  stp x25, x26, [sp, #-16]!;
+  CFI_ADJUST_CFA_OFFSET(16);
+  CFI_REG_ON_STACK(25, 0);
+  CFI_REG_ON_STACK(26, 8);
+  mov RFRAME, sp;
+  CFI_DEF_CFA_REGISTER(RFRAME);
+
+  sub addr0, sp, #STACK_SIZE;
+  GET_DATA_POINTER(RKPTR, .LKtable);
+  and sp, addr0, #(~63);
+
+  /* Preload first block. */
+  LOAD_W_VEC_1(1, 0);
+  LOAD_W_VEC_1(2, 0);
+  LOAD_W_VEC_1(3, 0);
+  LOAD_W_VEC_1(4, 0);
+  LOAD_W_VEC_1(5, 0);
+  LOAD_W_VEC_1(6, 0);
+  LOAD_W_VEC_1(7, 0);
+  LOAD_W_VEC_1(8, 0);
+  LOAD_W_VEC_2(1, 0);
+  LOAD_W_VEC_2(2, 0);
+  LOAD_W_VEC_2(3, 0);
+  LOAD_W_VEC_2(4, 0);
+  LOAD_W_VEC_2(5, 0);
+  LOAD_W_VEC_2(6, 0);
+  LOAD_W_VEC_2(7, 0);
+  LOAD_W_VEC_2(8, 0);
+  LOAD_W_VEC_3(1, 0);
+  LOAD_W_VEC_3(2, 0);
+  LOAD_W_VEC_3(3, 0);
+  LOAD_W_VEC_3(4, 0);
+  LOAD_W_VEC_3(5, 0);
+  LOAD_W_VEC_3(6, 0);
+  LOAD_W_VEC_3(7, 0);
+  LOAD_W_VEC_3(8, 0);
+
+.balign 16
+.Loop:
+  /* Transform 0-3 */
+  R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0);
+  R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  1, 1, IW, _, 0);
+  R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0);
+  R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  3, 3, IW, _, 0);
+
+  /* Transform 4-7 + Precalc 12-14 */
+  R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0);
+  R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  5, 1, IW, _, 0);
+  R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, 
SCHED_W_W0W1W2W3W4W5_1, 12);
+  R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  7, 3, IW, 
SCHED_W_W0W1W2W3W4W5_2, 12);
+
+  /* Transform 8-11 + Precalc 12-17 */
+  R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, 
SCHED_W_W0W1W2W3W4W5_3, 12);
+  R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  9, 1, IW, 
SCHED_W_W1W2W3W4W5W0_1, 15);
+  R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, 
SCHED_W_W1W2W3W4W5W0_2, 15);
+  R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  11, 3, IW, 
SCHED_W_W1W2W3W4W5W0_3, 15);
+
+  /* Transform 12-14 + Precalc 18-20 */
+  R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, 
SCHED_W_W2W3W4W5W0W1_1, 18);
+  R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  13, 1, XW, 
SCHED_W_W2W3W4W5W0W1_2, 18);
+  R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, 
SCHED_W_W2W3W4W5W0W1_3, 18);
+
+  /* Transform 15-17 + Precalc 21-23 */
+  R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  15, 0, XW, 
SCHED_W_W3W4W5W0W1W2_1, 21);
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, 
SCHED_W_W3W4W5W0W1W2_2, 21);
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  17, 2, XW, 
SCHED_W_W3W4W5W0W1W2_3, 21);
+
+  /* Transform 18-20 + Precalc 24-26 */
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, 
SCHED_W_W4W5W0W1W2W3_1, 24)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  19, 1, XW, 
SCHED_W_W4W5W0W1W2W3_2, 24)
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, 
SCHED_W_W4W5W0W1W2W3_3, 24)
+
+  /* Transform 21-23 + Precalc 27-29 */
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  21, 0, XW, 
SCHED_W_W5W0W1W2W3W4_1, 27)
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, 
SCHED_W_W5W0W1W2W3W4_2, 27)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  23, 2, XW, 
SCHED_W_W5W0W1W2W3W4_3, 27)
+
+  /* Transform 24-26 + Precalc 30-32 */
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, 
SCHED_W_W0W1W2W3W4W5_1, 30)
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  25, 1, XW, 
SCHED_W_W0W1W2W3W4W5_2, 30)
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, 
SCHED_W_W0W1W2W3W4W5_3, 30)
+
+  /* Transform 27-29 + Precalc 33-35 */
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  27, 0, XW, 
SCHED_W_W1W2W3W4W5W0_1, 33)
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, 
SCHED_W_W1W2W3W4W5W0_2, 33)
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  29, 2, XW, 
SCHED_W_W1W2W3W4W5W0_3, 33)
+
+  /* Transform 30-32 + Precalc 36-38 */
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, 
SCHED_W_W2W3W4W5W0W1_1, 36)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  31, 1, XW, 
SCHED_W_W2W3W4W5W0W1_2, 36)
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, 
SCHED_W_W2W3W4W5W0W1_3, 36)
+
+  /* Transform 33-35 + Precalc 39-41 */
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  33, 0, XW, 
SCHED_W_W3W4W5W0W1W2_1, 39)
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, 
SCHED_W_W3W4W5W0W1W2_2, 39)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  35, 2, XW, 
SCHED_W_W3W4W5W0W1W2_3, 39)
+
+  /* Transform 36-38 + Precalc 42-44 */
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, 
SCHED_W_W4W5W0W1W2W3_1, 42)
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  37, 1, XW, 
SCHED_W_W4W5W0W1W2W3_2, 42)
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, 
SCHED_W_W4W5W0W1W2W3_3, 42)
+
+  /* Transform 39-41 + Precalc 45-47 */
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  39, 0, XW, 
SCHED_W_W5W0W1W2W3W4_1, 45)
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, 
SCHED_W_W5W0W1W2W3W4_2, 45)
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  41, 2, XW, 
SCHED_W_W5W0W1W2W3W4_3, 45)
+
+  /* Transform 42-44 + Precalc 48-50 */
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, 
SCHED_W_W0W1W2W3W4W5_1, 48)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  43, 1, XW, 
SCHED_W_W0W1W2W3W4W5_2, 48)
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, 
SCHED_W_W0W1W2W3W4W5_3, 48)
+
+  /* Transform 45-47 + Precalc 51-53 */
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  45, 0, XW, 
SCHED_W_W1W2W3W4W5W0_1, 51)
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, 
SCHED_W_W1W2W3W4W5W0_2, 51)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  47, 2, XW, 
SCHED_W_W1W2W3W4W5W0_3, 51)
+
+  /* Transform 48-50 + Precalc 54-56 */
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, 
SCHED_W_W2W3W4W5W0W1_1, 54)
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  49, 1, XW, 
SCHED_W_W2W3W4W5W0W1_2, 54)
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, 
SCHED_W_W2W3W4W5W0W1_3, 54)
+
+  /* Transform 51-53 + Precalc 57-59 */
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  51, 0, XW, 
SCHED_W_W3W4W5W0W1W2_1, 57)
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, 
SCHED_W_W3W4W5W0W1W2_2, 57)
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  53, 2, XW, 
SCHED_W_W3W4W5W0W1W2_3, 57)
+
+  /* Transform 54-56 + Precalc 60-62 */
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, 
SCHED_W_W4W5W0W1W2W3_1, 60)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  55, 1, XW, 
SCHED_W_W4W5W0W1W2W3_2, 60)
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, 
SCHED_W_W4W5W0W1W2W3_3, 60)
+
+  /* Transform 57-59 + Precalc 63 */
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  57, 0, XW, 
SCHED_W_W5W0W1W2W3W4_1, 63)
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, 
SCHED_W_W5W0W1W2W3W4_2, 63)
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  59, 2, XW, 
SCHED_W_W5W0W1W2W3W4_3, 63)
+
+  /* Transform 60 */
+  R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _);
+  subs RNBLKS, RNBLKS, #1;
+  b.eq .Lend;
+
+  /* Transform 61-63 + Preload next block */
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  61, 1, XW, LOAD_W_VEC_1, _);
+  ldp s0, s1, [RSTATE, #0];
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _);
+  ldp s2, s3, [RSTATE, #8];
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  63, 0, XW, LOAD_W_VEC_3, _);
+
+  /* Update the chaining variables. */
+  eor ra, ra, s0;
+  eor rb, rb, s1;
+  ldp s0, s1, [RSTATE, #16];
+  eor rc, rc, s2;
+  ldp k_even, k_odd, [RSTATE, #24];
+  eor rd, rd, s3;
+  eor re, re, s0;
+  stp ra, rb, [RSTATE, #0];
+  eor rf, rf, s1;
+  stp rc, rd, [RSTATE, #8];
+  eor rg, rg, k_even;
+  stp re, rf, [RSTATE, #16];
+  eor rh, rh, k_odd;
+  stp rg, rh, [RSTATE, #24];
+  b .Loop;
+
+.Lend:
+  /* Transform 61-63 */
+  R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  61, 1, XW, _, _);
+  ldp s0, s1, [RSTATE, #0];
+  R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _);
+  ldp s2, s3, [RSTATE, #8];
+  R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  63, 0, XW, _, _);
+
+  /* Update the chaining variables. */
+  eor ra, ra, s0;
+  clear_vec(W0);
+  eor rb, rb, s1;
+  clear_vec(W1);
+  ldp s0, s1, [RSTATE, #16];
+  clear_vec(W2);
+  eor rc, rc, s2;
+  clear_vec(W3);
+  ldp k_even, k_odd, [RSTATE, #24];
+  clear_vec(W4);
+  eor rd, rd, s3;
+  clear_vec(W5);
+  eor re, re, s0;
+  clear_vec(XTMP0);
+  stp ra, rb, [RSTATE, #0];
+  clear_vec(XTMP1);
+  eor rf, rf, s1;
+  clear_vec(XTMP2);
+  stp rc, rd, [RSTATE, #8];
+  clear_vec(XTMP3);
+  eor rg, rg, k_even;
+  clear_vec(XTMP4);
+  stp re, rf, [RSTATE, #16];
+  clear_vec(XTMP5);
+  eor rh, rh, k_odd;
+  clear_vec(XTMP6);
+  stp rg, rh, [RSTATE, #24];
+
+  /* Clear message expansion area */
+  add addr0, sp, #STACK_W;
+  eor x0, x0, x0; // stack burned
+  st1 {W0.16b-W3.16b}, [addr0], #64;
+  st1 {W0.16b-W3.16b}, [addr0], #64;
+  st1 {W0.16b-W3.16b}, [addr0];
+
+  mov sp, RFRAME;
+  CFI_DEF_CFA_REGISTER(sp);
+
+  ldp x25, x26, [sp], #16;
+  CFI_ADJUST_CFA_OFFSET(-16);
+  CFI_RESTORE(x25);
+  CFI_RESTORE(x26);
+  ldp x23, x24, [sp], #16;
+  CFI_ADJUST_CFA_OFFSET(-16);
+  CFI_RESTORE(x23);
+  CFI_RESTORE(x24);
+  ldp x21, x22, [sp], #16;
+  CFI_ADJUST_CFA_OFFSET(-16);
+  CFI_RESTORE(x21);
+  CFI_RESTORE(x22);
+  ldp x19, x20, [sp], #16;
+  CFI_ADJUST_CFA_OFFSET(-16);
+  CFI_RESTORE(x19);
+  CFI_RESTORE(x20);
+  ldp x28, x29, [sp], #16;
+  CFI_ADJUST_CFA_OFFSET(-16);
+  CFI_RESTORE(x28);
+  CFI_RESTORE(x29);
+  ret_spec_stop
+  CFI_ENDPROC();
+ELF(.size _gcry_sm3_transform_aarch64, .-_gcry_sm3_transform_aarch64;)
+
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sm3-avx-bmi2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sm3-avx-bmi2-amd64.S
new file mode 100644
index 000000000..d9b6206a8
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sm3-avx-bmi2-amd64.S
@@ -0,0 +1,553 @@
+/* sm3-avx-bmi2-amd64.S - Intel AVX/BMI2 accelerated SM3 transform function
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64__
+#include <config.h>
+
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(HAVE_GCC_INLINE_ASM_AVX) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    defined(USE_SM3)
+
+#include "asm-common-amd64.h"
+
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+#define state_h5 20
+#define state_h6 24
+#define state_h7 28
+
+/* Constants */
+
+.text
+.align 16
+ELF(.type _gcry_sm3_avx2_consts,@object)
+_gcry_sm3_avx2_consts:
+.Lbe32mask:
+  .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f
+ELF(.size _gcry_sm3_avx2_consts,.-_gcry_sm3_avx2_consts)
+
+/* Round constant macros */
+
+#define K0   2043430169  /* 0x79cc4519 */
+#define K1   -208106958  /* 0xf3988a32 */
+#define K2   -416213915  /* 0xe7311465 */
+#define K3   -832427829  /* 0xce6228cb */
+#define K4  -1664855657  /* 0x9cc45197 */
+#define K5    965255983  /* 0x3988a32f */
+#define K6   1930511966  /* 0x7311465e */
+#define K7   -433943364  /* 0xe6228cbc */
+#define K8   -867886727  /* 0xcc451979 */
+#define K9  -1735773453  /* 0x988a32f3 */
+#define K10   823420391  /* 0x311465e7 */
+#define K11  1646840782  /* 0x6228cbce */
+#define K12 -1001285732  /* 0xc451979c */
+#define K13 -2002571463  /* 0x88a32f39 */
+#define K14   289824371  /* 0x11465e73 */
+#define K15   579648742  /* 0x228cbce6 */
+#define K16 -1651869049  /* 0x9d8a7a87 */
+#define K17   991229199  /* 0x3b14f50f */
+#define K18  1982458398  /* 0x7629ea1e */
+#define K19  -330050500  /* 0xec53d43c */
+#define K20  -660100999  /* 0xd8a7a879 */
+#define K21 -1320201997  /* 0xb14f50f3 */
+#define K22  1654563303  /* 0x629ea1e7 */
+#define K23  -985840690  /* 0xc53d43ce */
+#define K24 -1971681379  /* 0x8a7a879d */
+#define K25   351604539  /* 0x14f50f3b */
+#define K26   703209078  /* 0x29ea1e76 */
+#define K27  1406418156  /* 0x53d43cec */
+#define K28 -1482130984  /* 0xa7a879d8 */
+#define K29  1330705329  /* 0x4f50f3b1 */
+#define K30 -1633556638  /* 0x9ea1e762 */
+#define K31  1027854021  /* 0x3d43cec5 */
+#define K32  2055708042  /* 0x7a879d8a */
+#define K33  -183551212  /* 0xf50f3b14 */
+#define K34  -367102423  /* 0xea1e7629 */
+#define K35  -734204845  /* 0xd43cec53 */
+#define K36 -1468409689  /* 0xa879d8a7 */
+#define K37  1358147919  /* 0x50f3b14f */
+#define K38 -1578671458  /* 0xa1e7629e */
+#define K39  1137624381  /* 0x43cec53d */
+#define K40 -2019718534  /* 0x879d8a7a */
+#define K41   255530229  /* 0x0f3b14f5 */
+#define K42   511060458  /* 0x1e7629ea */
+#define K43  1022120916  /* 0x3cec53d4 */
+#define K44  2044241832  /* 0x79d8a7a8 */
+#define K45  -206483632  /* 0xf3b14f50 */
+#define K46  -412967263  /* 0xe7629ea1 */
+#define K47  -825934525  /* 0xcec53d43 */
+#define K48 -1651869049  /* 0x9d8a7a87 */
+#define K49   991229199  /* 0x3b14f50f */
+#define K50  1982458398  /* 0x7629ea1e */
+#define K51  -330050500  /* 0xec53d43c */
+#define K52  -660100999  /* 0xd8a7a879 */
+#define K53 -1320201997  /* 0xb14f50f3 */
+#define K54  1654563303  /* 0x629ea1e7 */
+#define K55  -985840690  /* 0xc53d43ce */
+#define K56 -1971681379  /* 0x8a7a879d */
+#define K57   351604539  /* 0x14f50f3b */
+#define K58   703209078  /* 0x29ea1e76 */
+#define K59  1406418156  /* 0x53d43cec */
+#define K60 -1482130984  /* 0xa7a879d8 */
+#define K61  1330705329  /* 0x4f50f3b1 */
+#define K62 -1633556638  /* 0x9ea1e762 */
+#define K63  1027854021  /* 0x3d43cec5 */
+
+/* Register macros */
+
+#define RSTATE %rdi
+#define RDATA  %rsi
+#define RNBLKS %rdx
+
+#define t0 %eax
+#define t1 %ebx
+#define t2 %ecx
+
+#define a %r8d
+#define b %r9d
+#define c %r10d
+#define d %r11d
+#define e %r12d
+#define f %r13d
+#define g %r14d
+#define h %r15d
+
+#define W0 %xmm0
+#define W1 %xmm1
+#define W2 %xmm2
+#define W3 %xmm3
+#define W4 %xmm4
+#define W5 %xmm5
+
+#define XTMP0 %xmm6
+#define XTMP1 %xmm7
+#define XTMP2 %xmm8
+#define XTMP3 %xmm9
+#define XTMP4 %xmm10
+#define XTMP5 %xmm11
+#define XTMP6 %xmm12
+
+#define BSWAP_REG %xmm15
+
+/* Stack structure */
+
+#define STACK_W_SIZE        (32 * 2 * 3)
+#define STACK_REG_SAVE_SIZE (64)
+
+#define STACK_W             (0)
+#define STACK_REG_SAVE      (STACK_W + STACK_W_SIZE)
+#define STACK_SIZE          (STACK_REG_SAVE + STACK_REG_SAVE_SIZE)
+
+/* Instruction helpers. */
+
+#define roll2(v, reg) \
+        roll $(v), reg;
+
+#define roll3mov(v, src, dst) \
+        movl src, dst; \
+        roll $(v), dst;
+
+#define roll3(v, src, dst) \
+        rorxl $(32-(v)), src, dst;
+
+#define addl2(a, out) \
+        leal (a, out), out;
+
+/* Round function macros. */
+
+#define GG1(x, y, z, o, t) \
+        movl x, o; \
+        xorl y, o; \
+        xorl z, o;
+
+#define FF1(x, y, z, o, t) GG1(x, y, z, o, t)
+
+#define GG2(x, y, z, o, t) \
+        andnl z, x, o; \
+        movl y, t; \
+        andl x, t; \
+        addl2(t, o);
+
+#define FF2(x, y, z, o, t) \
+        movl y, o; \
+        xorl x, o; \
+        movl y, t; \
+        andl x, t; \
+        andl z, o; \
+        xorl t, o;
+
+#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype) \
+        /* rol(a, 12) => t0 */ \
+          roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 
*/ \
+        /* rol (t0 + e + t), 7) => t1 */ \
+          leal K##round(t0, e, 1), t1; \
+          roll2(7, t1); \
+        /* h + w1 => h */ \
+          addl wtype##_W1_ADDR(round, widx), h; \
+        /* h + t1 => h */ \
+          addl2(t1, h); \
+        /* t1 ^ t0 => t0 */ \
+          xorl t1, t0; \
+        /* w1w2 + d => d */ \
+          addl wtype##_W1W2_ADDR(round, widx), d; \
+        /* FF##i(a,b,c) => t1 */ \
+          FF##i(a, b, c, t1, t2); \
+        /* d + t1 => d */ \
+          addl2(t1, d); \
+        /* GG#i(e,f,g) => t2 */ \
+          GG##i(e, f, g, t2, t1); \
+        /* h + t2 => h */ \
+          addl2(t2, h); \
+        /* rol (f, 19) => f */ \
+          roll2(19, f); \
+        /* d + t0 => d */ \
+          addl2(t0, d); \
+        /* rol (b, 9) => b */ \
+          roll2(9, b); \
+        /* P0(h) => h */ \
+          roll3(9, h, t2); \
+          roll3(17, h, t1); \
+          xorl t2, h; \
+          xorl t1, h;
+
+#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \
+        R(1, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \
+        R(2, a, b, c, d, e, f, g, h, round, widx, wtype)
+
+/* Input expansion macros. */
+
+/* Byte-swapped input address. */
+#define IW_W_ADDR(round, widx, offs) \
+        (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp)
+
+/* Expanded input address. */
+#define XW_W_ADDR(round, widx, offs) \
+        (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 
4))(%rsp)
+
+/* Rounds 1-12, byte-swapped input block addresses. */
+#define IW_W1_ADDR(round, widx)   IW_W_ADDR(round, widx, 0)
+#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32)
+
+/* Rounds 1-12, expanded input block addresses. */
+#define XW_W1_ADDR(round, widx)   XW_W_ADDR(round, widx, 0)
+#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32)
+
+/* Input block loading. */
+#define LOAD_W_XMM_1() \
+        vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */ \
+        vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */ \
+        vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */ \
+        vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */\
+        vpshufb BSWAP_REG, XTMP0, XTMP0; \
+        vpshufb BSWAP_REG, XTMP1, XTMP1; \
+        vpshufb BSWAP_REG, XTMP2, XTMP2; \
+        vpshufb BSWAP_REG, XTMP3, XTMP3; \
+        vpxor XTMP0, XTMP1, XTMP4; \
+        vpxor XTMP1, XTMP2, XTMP5; \
+        vpxor XTMP2, XTMP3, XTMP6; \
+        leaq 64(RDATA), RDATA; \
+        vmovdqa XTMP0, IW_W1_ADDR(0, 0); \
+        vmovdqa XTMP4, IW_W1W2_ADDR(0, 0); \
+        vmovdqa XTMP1, IW_W1_ADDR(4, 0); \
+        vmovdqa XTMP5, IW_W1W2_ADDR(4, 0);
+
+#define LOAD_W_XMM_2() \
+        vmovdqa XTMP2, IW_W1_ADDR(8, 0); \
+        vmovdqa XTMP6, IW_W1W2_ADDR(8, 0);
+
+#define LOAD_W_XMM_3() \
+        vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */ \
+        vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */ \
+        vmovdqa XTMP1, W2;              /* W2: xx, w6, w5, w4 */ \
+        vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */ \
+        vpalignr $8, XTMP2, XTMP3, W4;  /* W4: xx, w12, w11, w10 */ \
+        vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */
+
+/* Message scheduling. Note: 3 words per XMM register. */
+#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5) \
+        /* Load (w[i - 16]) => XTMP0 */ \
+        vpshufd $0b10111111, w0, XTMP0; \
+        vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */ \
+        /* Load (w[i - 13]) => XTMP1 */ \
+        vpshufd $0b10111111, w1, XTMP1; \
+        vpalignr $12, XTMP1, w2, XTMP1; \
+        /* w[i - 9] == w3 */ \
+        /* XMM3 ^ XTMP0 => XTMP0 */ \
+        vpxor w3, XTMP0, XTMP0;
+
+#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5) \
+        /* w[i - 3] == w5 */ \
+        /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \
+        vpslld $15, w5, XTMP2; \
+        vpsrld $(32-15), w5, XTMP3; \
+        vpxor XTMP2, XTMP3, XTMP3; \
+        vpxor XTMP3, XTMP0, XTMP0; \
+        /* rol(XTMP1, 7) => XTMP1 */ \
+        vpslld $7, XTMP1, XTMP5; \
+        vpsrld $(32-7), XTMP1, XTMP1; \
+        vpxor XTMP5, XTMP1, XTMP1; \
+        /* XMM4 ^ XTMP1 => XTMP1 */ \
+        vpxor w4, XTMP1, XTMP1; \
+        /* w[i - 6] == XMM4 */ \
+        /* P1(XTMP0) ^ XTMP1 => XMM0 */ \
+        vpslld $15, XTMP0, XTMP5; \
+        vpsrld $(32-15), XTMP0, XTMP6; \
+        vpslld $23, XTMP0, XTMP2; \
+        vpsrld $(32-23), XTMP0, XTMP3; \
+        vpxor XTMP0, XTMP1, XTMP1; \
+        vpxor XTMP6, XTMP5, XTMP5; \
+        vpxor XTMP3, XTMP2, XTMP2; \
+        vpxor XTMP2, XTMP5, XTMP5; \
+        vpxor XTMP5, XTMP1, w0;
+
+#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5) \
+        /* W1 in XMM12 */ \
+        vpshufd $0b10111111, w4, XTMP4; \
+        vpalignr $12, XTMP4, w5, XTMP4; \
+        vmovdqa XTMP4, XW_W1_ADDR((round), 0); \
+        /* W1 ^ W2 => XTMP1 */ \
+        vpxor w0, XTMP4, XTMP1; \
+        vmovdqa XTMP1, XW_W1W2_ADDR((round), 0);
+
+/*
+ * Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
+ *
+ * unsigned int
+ * _gcry_sm3_transform_amd64_avx_bmi2 (void *ctx, const unsigned char *data,
+ *                                     size_t nblks)
+ */
+.globl _gcry_sm3_transform_amd64_avx_bmi2
+ELF(.type _gcry_sm3_transform_amd64_avx_bmi2,@function)
+.align 16
+_gcry_sm3_transform_amd64_avx_bmi2:
+  /* input:
+   *   %rdi: ctx, CTX
+   *   %rsi: data (64*nblks bytes)
+   *   %rdx: nblks
+   */
+  CFI_STARTPROC();
+
+  vzeroupper;
+
+  pushq %rbp;
+  CFI_PUSH(%rbp);
+  movq %rsp, %rbp;
+  CFI_DEF_CFA_REGISTER(%rbp);
+
+  movq %rdx, RNBLKS;
+
+  subq $STACK_SIZE, %rsp;
+  andq $(~63), %rsp;
+
+  movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp);
+  CFI_REL_OFFSET(%rbx, STACK_REG_SAVE + 0 * 8);
+  movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp);
+  CFI_REL_OFFSET(%r15, STACK_REG_SAVE + 1 * 8);
+  movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp);
+  CFI_REL_OFFSET(%r14, STACK_REG_SAVE + 2 * 8);
+  movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp);
+  CFI_REL_OFFSET(%r13, STACK_REG_SAVE + 3 * 8);
+  movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp);
+  CFI_REL_OFFSET(%r12, STACK_REG_SAVE + 4 * 8);
+
+  vmovdqa .Lbe32mask rRIP, BSWAP_REG;
+
+  /* Get the values of the chaining variables. */
+  movl state_h0(RSTATE), a;
+  movl state_h1(RSTATE), b;
+  movl state_h2(RSTATE), c;
+  movl state_h3(RSTATE), d;
+  movl state_h4(RSTATE), e;
+  movl state_h5(RSTATE), f;
+  movl state_h6(RSTATE), g;
+  movl state_h7(RSTATE), h;
+
+.align 16
+.Loop:
+  /* Load data part1. */
+  LOAD_W_XMM_1();
+
+  leaq -1(RNBLKS), RNBLKS;
+
+  /* Transform 0-3 + Load data part2. */
+  R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2();
+  R1(d, a, b, c, h, e, f, g, 1, 1, IW);
+  R1(c, d, a, b, g, h, e, f, 2, 2, IW);
+  R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3();
+
+  /* Transform 4-7 + Precalc 12-14. */
+  R1(a, b, c, d, e, f, g, h, 4, 0, IW);
+  R1(d, a, b, c, h, e, f, g, 5, 1, IW);
+  R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5);
+  R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5);
+
+  /* Transform 8-11 + Precalc 12-17. */
+  R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5);
+  R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0);
+  R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0);
+  R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0);
+
+  /* Transform 12-14 + Precalc 18-20 */
+  R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1);
+  R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1);
+  R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1);
+
+  /* Transform 15-17 + Precalc 21-23 */
+  R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2);
+  R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2);
+  R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2);
+
+  /* Transform 18-20 + Precalc 24-26 */
+  R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3);
+  R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3);
+  R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3);
+
+  /* Transform 21-23 + Precalc 27-29 */
+  R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4);
+  R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4);
+  R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4);
+
+  /* Transform 24-26 + Precalc 30-32 */
+  R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5);
+  R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5);
+  R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5);
+
+  /* Transform 27-29 + Precalc 33-35 */
+  R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0);
+  R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0);
+  R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0);
+
+  /* Transform 30-32 + Precalc 36-38 */
+  R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1);
+  R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1);
+  R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1);
+
+  /* Transform 33-35 + Precalc 39-41 */
+  R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2);
+  R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2);
+  R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2);
+
+  /* Transform 36-38 + Precalc 42-44 */
+  R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3);
+  R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3);
+  R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3);
+
+  /* Transform 39-41 + Precalc 45-47 */
+  R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4);
+  R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4);
+  R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4);
+
+  /* Transform 42-44 + Precalc 48-50 */
+  R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5);
+  R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5);
+  R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5);
+
+  /* Transform 45-47 + Precalc 51-53 */
+  R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0);
+  R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0);
+  R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0);
+
+  /* Transform 48-50 + Precalc 54-56 */
+  R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1);
+  R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1);
+  R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1);
+
+  /* Transform 51-53 + Precalc 57-59 */
+  R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2);
+  R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2);
+  R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2);
+
+  /* Transform 54-56 + Precalc 60-62 */
+  R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3);
+  R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3);
+  R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3);
+
+  /* Transform 57-59 + Precalc 63 */
+  R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4);
+  R2(c, d, a, b, g, h, e, f, 58, 1, XW);
+  R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4);
+
+  /* Transform 60-62 + Precalc 63 */
+  R2(a, b, c, d, e, f, g, h, 60, 0, XW);
+  R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4);
+  R2(c, d, a, b, g, h, e, f, 62, 2, XW);
+
+  /* Transform 63 */
+  R2(b, c, d, a, f, g, h, e, 63, 0, XW);
+
+  /* Update the chaining variables. */
+  xorl state_h0(RSTATE), a;
+  xorl state_h1(RSTATE), b;
+  xorl state_h2(RSTATE), c;
+  xorl state_h3(RSTATE), d;
+  movl a, state_h0(RSTATE);
+  movl b, state_h1(RSTATE);
+  movl c, state_h2(RSTATE);
+  movl d, state_h3(RSTATE);
+  xorl state_h4(RSTATE), e;
+  xorl state_h5(RSTATE), f;
+  xorl state_h6(RSTATE), g;
+  xorl state_h7(RSTATE), h;
+  movl e, state_h4(RSTATE);
+  movl f, state_h5(RSTATE);
+  movl g, state_h6(RSTATE);
+  movl h, state_h7(RSTATE);
+
+  cmpq $0, RNBLKS;
+  jne .Loop;
+
+  vzeroall;
+
+  movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx;
+  CFI_RESTORE(%rbx);
+  movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15;
+  CFI_RESTORE(%r15);
+  movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14;
+  CFI_RESTORE(%r14);
+  movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13;
+  CFI_RESTORE(%r13);
+  movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12;
+  CFI_RESTORE(%r12);
+
+  vmovdqa %xmm0, IW_W1_ADDR(0, 0);
+  vmovdqa %xmm0, IW_W1W2_ADDR(0, 0);
+  vmovdqa %xmm0, IW_W1_ADDR(4, 0);
+  vmovdqa %xmm0, IW_W1W2_ADDR(4, 0);
+  vmovdqa %xmm0, IW_W1_ADDR(8, 0);
+  vmovdqa %xmm0, IW_W1W2_ADDR(8, 0);
+  xorl %eax, %eax; /* stack burned */
+
+  leave;
+  CFI_LEAVE();
+  ret_spec_stop;
+  CFI_ENDPROC();
+ELF(.size _gcry_sm3_transform_amd64_avx_bmi2,
+          .-_gcry_sm3_transform_amd64_avx_bmi2;)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/sm3.c 
b/grub-core/lib/libgcrypt/cipher/sm3.c
new file mode 100644
index 000000000..0ab5f5067
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sm3.c
@@ -0,0 +1,537 @@
+/* sm3.c - SM3 hash function
+ * Copyright (C) 2017 Jia Zhang
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+/*  Test vectors:
+
+    "abc"
+    SM3: 66c7f0f4 62eeedd9 d1f2d46b dc10e4e2 4167c487 5cf2f7a2 297da02b 
8f4ba8e0
+
+    "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd"
+    SM3: debe9ff9 2275b8a1 38604889 c18e5a4d 6fdb70e5 387e5765 293dcba3 
9c0c5732
+
+    "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+    SM3: 639b6cc5 e64d9e37 a390b192 df4fa1ea 0720ab74 7ff692b9 f38c4e66 
ad7b8c05
+
+    "a" one million times
+    SM3: c8aaf894 29554029 e231941a 2acc0ad6 1ff2a5ac d8fadd25 847a3a73 
2b3b02c3
+
+ */
+
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+/* USE_AVX_BMI2 indicates whether to compile with Intel AVX/BMI2 code. */
+#undef USE_AVX_BMI2
+#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \
+    defined(HAVE_GCC_INLINE_ASM_BMI2) && \
+    (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AVX_BMI2 1
+#endif
+
+/* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly
+ * code. */
+#undef USE_AARCH64_SIMD
+#ifdef ENABLE_NEON_SUPPORT
+# if defined(__AARCH64EL__) \
+       && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \
+       && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON)
+#  define USE_AARCH64_SIMD 1
+# endif
+#endif
+
+
+typedef struct {
+  gcry_md_block_ctx_t bctx;
+  u32 h[8];
+} SM3_CONTEXT;
+
+
+/* AMD64 assembly implementations use SystemV ABI, ABI conversion and 
additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AVX_BMI2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+#  define ASM_EXTRA_STACK (10 * 16 + 4 * sizeof(void *))
+# else
+#  define ASM_FUNC_ABI
+#  define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
+#ifdef USE_AVX_BMI2
+unsigned int _gcry_sm3_transform_amd64_avx_bmi2(void *state,
+                                                const void *input_data,
+                                                size_t num_blks) ASM_FUNC_ABI;
+
+static unsigned int
+do_sm3_transform_amd64_avx_bmi2(void *context, const unsigned char *data,
+                                size_t nblks)
+{
+  SM3_CONTEXT *hd = context;
+  unsigned int nburn = _gcry_sm3_transform_amd64_avx_bmi2 (hd->h, data, nblks);
+  nburn += nburn ? ASM_EXTRA_STACK : 0;
+  return nburn;
+}
+#endif /* USE_AVX_BMI2 */
+
+#ifdef USE_AARCH64_SIMD
+unsigned int _gcry_sm3_transform_aarch64(void *state, const void *input_data,
+                                         size_t num_blks);
+
+static unsigned int
+do_sm3_transform_aarch64(void *context, const unsigned char *data, size_t 
nblks)
+{
+  SM3_CONTEXT *hd = context;
+  return _gcry_sm3_transform_aarch64 (hd->h, data, nblks);
+}
+#endif /* USE_AARCH64_SIMD */
+
+
+static unsigned int
+transform (void *c, const unsigned char *data, size_t nblks);
+
+
+static void
+sm3_init (void *context, unsigned int flags)
+{
+  SM3_CONTEXT *hd = context;
+  unsigned int features = _gcry_get_hw_features ();
+
+  (void)flags;
+
+  hd->h[0] = 0x7380166f;
+  hd->h[1] = 0x4914b2b9;
+  hd->h[2] = 0x172442d7;
+  hd->h[3] = 0xda8a0600;
+  hd->h[4] = 0xa96f30bc;
+  hd->h[5] = 0x163138aa;
+  hd->h[6] = 0xe38dee4d;
+  hd->h[7] = 0xb0fb0e4e;
+
+  hd->bctx.nblocks = 0;
+  hd->bctx.nblocks_high = 0;
+  hd->bctx.count = 0;
+  hd->bctx.blocksize_shift = _gcry_ctz(64);
+  hd->bctx.bwrite = transform;
+
+#ifdef USE_AVX_BMI2
+  if ((features & HWF_INTEL_AVX2) && (features & HWF_INTEL_BMI2))
+    hd->bctx.bwrite = do_sm3_transform_amd64_avx_bmi2;
+#endif
+#ifdef USE_AARCH64_SIMD
+  if (features & HWF_ARM_NEON)
+    hd->bctx.bwrite = do_sm3_transform_aarch64;
+#endif
+
+  (void)features;
+}
+
+
+/*
+  Transform the message X which consists of 16 32-bit-words. See
+  GM/T 004-2012 for details.  */
+#define R(i,a,b,c,d,e,f,g,h,t,w1,w2) do                               \
+          {                                                           \
+            ss1 = rol ((rol ((a), 12) + (e) + (t)), 7);               \
+            ss2 = ss1 ^ rol ((a), 12);                                \
+            d += FF##i(a,b,c) + ss2 + ((w1) ^ (w2));                  \
+            h += GG##i(e,f,g) + ss1 + (w1);                           \
+            b = rol ((b), 9);                                         \
+            f = rol ((f), 19);                                        \
+            h = P0 ((h));                                             \
+          } while (0)
+
+#define R1(a,b,c,d,e,f,g,h,t,w1,w2) R(1,a,b,c,d,e,f,g,h,t,w1,w2)
+#define R2(a,b,c,d,e,f,g,h,t,w1,w2) R(2,a,b,c,d,e,f,g,h,t,w1,w2)
+
+#define FF1(x, y, z)  (x ^ y ^ z)
+
+#define FF2(x, y, z)  ((x & y) | (x & z) | (y & z))
+
+#define GG1(x, y, z)  (x ^ y ^ z)
+
+#define GG2(x, y, z)  ((x & y) | ( ~x & z))
+
+/* Message expansion */
+#define P0(x) ((x) ^ rol ((x), 9) ^ rol ((x), 17))
+#define P1(x) ((x) ^ rol ((x), 15) ^ rol ((x), 23))
+#define I(i)  ( w[i] = buf_get_be32(data + i * 4) )
+#define W1(i) ( w[i&0x0f] )
+#define W2(i) ( w[i&0x0f] =   P1(w[i    &0x0f] \
+                               ^ w[(i-9)&0x0f] \
+                               ^ rol (w[(i-3)&0x0f], 15)) \
+                            ^ rol (w[(i-13)&0x0f], 7) \
+                            ^ w[(i-6)&0x0f] )
+
+static unsigned int
+transform_blk (void *ctx, const unsigned char *data)
+{
+  SM3_CONTEXT *hd = ctx;
+  static const u32 K[64] = {
+    0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb,
+    0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc,
+    0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce,
+    0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6,
+    0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+    0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+    0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+    0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5,
+    0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53,
+    0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d,
+    0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4,
+    0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43,
+    0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c,
+    0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce,
+    0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec,
+    0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+  };
+
+  u32 a,b,c,d,e,f,g,h,ss1,ss2;
+  u32 w[16];
+
+  a = hd->h[0];
+  b = hd->h[1];
+  c = hd->h[2];
+  d = hd->h[3];
+  e = hd->h[4];
+  f = hd->h[5];
+  g = hd->h[6];
+  h = hd->h[7];
+
+  R1(a, b, c, d, e, f, g, h, K[0], I(0), I(4));
+  R1(d, a, b, c, h, e, f, g, K[1], I(1), I(5));
+  R1(c, d, a, b, g, h, e, f, K[2], I(2), I(6));
+  R1(b, c, d, a, f, g, h, e, K[3], I(3), I(7));
+  R1(a, b, c, d, e, f, g, h, K[4], W1(4), I(8));
+  R1(d, a, b, c, h, e, f, g, K[5], W1(5), I(9));
+  R1(c, d, a, b, g, h, e, f, K[6], W1(6), I(10));
+  R1(b, c, d, a, f, g, h, e, K[7], W1(7), I(11));
+  R1(a, b, c, d, e, f, g, h, K[8], W1(8), I(12));
+  R1(d, a, b, c, h, e, f, g, K[9], W1(9), I(13));
+  R1(c, d, a, b, g, h, e, f, K[10], W1(10), I(14));
+  R1(b, c, d, a, f, g, h, e, K[11], W1(11), I(15));
+  R1(a, b, c, d, e, f, g, h, K[12], W1(12), W2(16));
+  R1(d, a, b, c, h, e, f, g, K[13], W1(13), W2(17));
+  R1(c, d, a, b, g, h, e, f, K[14], W1(14), W2(18));
+  R1(b, c, d, a, f, g, h, e, K[15], W1(15), W2(19));
+
+  R2(a, b, c, d, e, f, g, h, K[16], W1(16), W2(20));
+  R2(d, a, b, c, h, e, f, g, K[17], W1(17), W2(21));
+  R2(c, d, a, b, g, h, e, f, K[18], W1(18), W2(22));
+  R2(b, c, d, a, f, g, h, e, K[19], W1(19), W2(23));
+  R2(a, b, c, d, e, f, g, h, K[20], W1(20), W2(24));
+  R2(d, a, b, c, h, e, f, g, K[21], W1(21), W2(25));
+  R2(c, d, a, b, g, h, e, f, K[22], W1(22), W2(26));
+  R2(b, c, d, a, f, g, h, e, K[23], W1(23), W2(27));
+  R2(a, b, c, d, e, f, g, h, K[24], W1(24), W2(28));
+  R2(d, a, b, c, h, e, f, g, K[25], W1(25), W2(29));
+  R2(c, d, a, b, g, h, e, f, K[26], W1(26), W2(30));
+  R2(b, c, d, a, f, g, h, e, K[27], W1(27), W2(31));
+  R2(a, b, c, d, e, f, g, h, K[28], W1(28), W2(32));
+  R2(d, a, b, c, h, e, f, g, K[29], W1(29), W2(33));
+  R2(c, d, a, b, g, h, e, f, K[30], W1(30), W2(34));
+  R2(b, c, d, a, f, g, h, e, K[31], W1(31), W2(35));
+
+  R2(a, b, c, d, e, f, g, h, K[32], W1(32), W2(36));
+  R2(d, a, b, c, h, e, f, g, K[33], W1(33), W2(37));
+  R2(c, d, a, b, g, h, e, f, K[34], W1(34), W2(38));
+  R2(b, c, d, a, f, g, h, e, K[35], W1(35), W2(39));
+  R2(a, b, c, d, e, f, g, h, K[36], W1(36), W2(40));
+  R2(d, a, b, c, h, e, f, g, K[37], W1(37), W2(41));
+  R2(c, d, a, b, g, h, e, f, K[38], W1(38), W2(42));
+  R2(b, c, d, a, f, g, h, e, K[39], W1(39), W2(43));
+  R2(a, b, c, d, e, f, g, h, K[40], W1(40), W2(44));
+  R2(d, a, b, c, h, e, f, g, K[41], W1(41), W2(45));
+  R2(c, d, a, b, g, h, e, f, K[42], W1(42), W2(46));
+  R2(b, c, d, a, f, g, h, e, K[43], W1(43), W2(47));
+  R2(a, b, c, d, e, f, g, h, K[44], W1(44), W2(48));
+  R2(d, a, b, c, h, e, f, g, K[45], W1(45), W2(49));
+  R2(c, d, a, b, g, h, e, f, K[46], W1(46), W2(50));
+  R2(b, c, d, a, f, g, h, e, K[47], W1(47), W2(51));
+
+  R2(a, b, c, d, e, f, g, h, K[48], W1(48), W2(52));
+  R2(d, a, b, c, h, e, f, g, K[49], W1(49), W2(53));
+  R2(c, d, a, b, g, h, e, f, K[50], W1(50), W2(54));
+  R2(b, c, d, a, f, g, h, e, K[51], W1(51), W2(55));
+  R2(a, b, c, d, e, f, g, h, K[52], W1(52), W2(56));
+  R2(d, a, b, c, h, e, f, g, K[53], W1(53), W2(57));
+  R2(c, d, a, b, g, h, e, f, K[54], W1(54), W2(58));
+  R2(b, c, d, a, f, g, h, e, K[55], W1(55), W2(59));
+  R2(a, b, c, d, e, f, g, h, K[56], W1(56), W2(60));
+  R2(d, a, b, c, h, e, f, g, K[57], W1(57), W2(61));
+  R2(c, d, a, b, g, h, e, f, K[58], W1(58), W2(62));
+  R2(b, c, d, a, f, g, h, e, K[59], W1(59), W2(63));
+  R2(a, b, c, d, e, f, g, h, K[60], W1(60), W2(64));
+  R2(d, a, b, c, h, e, f, g, K[61], W1(61), W2(65));
+  R2(c, d, a, b, g, h, e, f, K[62], W1(62), W2(66));
+  R2(b, c, d, a, f, g, h, e, K[63], W1(63), W2(67));
+
+  hd->h[0] ^= a;
+  hd->h[1] ^= b;
+  hd->h[2] ^= c;
+  hd->h[3] ^= d;
+  hd->h[4] ^= e;
+  hd->h[5] ^= f;
+  hd->h[6] ^= g;
+  hd->h[7] ^= h;
+
+  return /*burn_stack*/ 26*4+32;
+}
+#undef P0
+#undef P1
+#undef R
+#undef R1
+#undef R2
+
+static unsigned int
+transform (void *ctx, const unsigned char *data, size_t nblks)
+{
+  SM3_CONTEXT *hd = ctx;
+  unsigned int burn;
+
+  do
+    {
+      burn = transform_blk (hd, data);
+      data += 64;
+    }
+  while (--nblks);
+
+  return burn;
+}
+
+
+/*
+   The routine finally terminates the computation and returns the
+   digest.  The handle is prepared for a new cycle, but adding bytes
+   to the handle will the destroy the returned buffer.  Returns: 32
+   bytes with the message the digest.  */
+static void
+sm3_final(void *context)
+{
+  SM3_CONTEXT *hd = context;
+  u32 t, th, msb, lsb;
+  byte *p;
+  unsigned int burn;
+
+  t = hd->bctx.nblocks;
+  if (sizeof t == sizeof hd->bctx.nblocks)
+    th = hd->bctx.nblocks_high;
+  else
+    th = hd->bctx.nblocks >> 32;
+
+  /* multiply by 64 to make a byte count */
+  lsb = t << 6;
+  msb = (th << 6) | (t >> 26);
+  /* add the count */
+  t = lsb;
+  if ((lsb += hd->bctx.count) < t)
+    msb++;
+  /* multiply by 8 to make a bit count */
+  t = lsb;
+  lsb <<= 3;
+  msb <<= 3;
+  msb |= t >> 29;
+
+  if (hd->bctx.count < 56)  /* enough room */
+    {
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */
+      if (hd->bctx.count < 56)
+       memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+      /* append the 64 bit count */
+      buf_put_be32(hd->bctx.buf + 56, msb);
+      buf_put_be32(hd->bctx.buf + 60, lsb);
+      burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 1 );
+    }
+  else  /* need one extra block */
+    {
+      hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */
+      /* fill pad and next block with zeroes */
+      memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+      /* append the 64 bit count */
+      buf_put_be32(hd->bctx.buf + 64 + 56, msb);
+      buf_put_be32(hd->bctx.buf + 64 + 60, lsb);
+      burn = (*hd->bctx.bwrite) ( hd, hd->bctx.buf, 2 );
+    }
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_be32(p, hd->h[a]); p += 4; } while(0)
+  X(0);
+  X(1);
+  X(2);
+  X(3);
+  X(4);
+  X(5);
+  X(6);
+  X(7);
+#undef X
+
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
+}
+
+static byte *
+sm3_read (void *context)
+{
+  SM3_CONTEXT *hd = context;
+
+  return hd->bctx.buf;
+}
+
+
+/* Shortcut functions which puts the hash value of the supplied buffer iov
+ * into outbuf which must have a size of 32 bytes.  */
+static void
+_gcry_sm3_hash_buffers (void *outbuf, size_t nbytes,
+                       const gcry_buffer_t *iov, int iovcnt)
+{
+  SM3_CONTEXT hd;
+
+  (void)nbytes;
+
+  sm3_init (&hd, 0);
+  for (;iovcnt > 0; iov++, iovcnt--)
+    _gcry_md_block_write (&hd,
+                          (const char*)iov[0].data + iov[0].off, iov[0].len);
+  sm3_final (&hd);
+  memcpy (outbuf, hd.bctx.buf, 32);
+}
+
+
+
+/*
+     Self-test section.
+ */
+
+
+static gpg_err_code_t
+selftests_sm3 (int extended, selftest_report_func_t report)
+{
+  const char *what;
+  const char *errtxt;
+
+  what = "short string (spec example 1)";
+  errtxt = _gcry_hash_selftest_check_one
+    (GCRY_MD_SM3, 0,
+     "abc", 3,
+     "\x66\xc7\xf0\xf4\x62\xee\xed\xd9\xd1\xf2\xd4\x6b\xdc\x10\xe4\xe2"
+     "\x41\x67\xc4\x87\x5c\xf2\xf7\xa2\x29\x7d\xa0\x2b\x8f\x4b\xa8\xe0", 32);
+  if (errtxt)
+    goto failed;
+
+  if (extended)
+    {
+      what = "long string (spec example 2)";
+      errtxt = _gcry_hash_selftest_check_one
+        (GCRY_MD_SM3, 0,
+         "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd", 
64,
+         "\xde\xbe\x9f\xf9\x22\x75\xb8\xa1\x38\x60\x48\x89\xc1\x8e\x5a\x4d"
+         "\x6f\xdb\x70\xe5\x38\x7e\x57\x65\x29\x3d\xcb\xa3\x9c\x0c\x57\x32",
+         32);
+      if (errtxt)
+        goto failed;
+
+      what = "long string";
+      errtxt = _gcry_hash_selftest_check_one
+        (GCRY_MD_SM3, 0,
+         "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq", 56,
+         "\x63\x9b\x6c\xc5\xe6\x4d\x9e\x37\xa3\x90\xb1\x92\xdf\x4f\xa1\xea"
+         "\x07\x20\xab\x74\x7f\xf6\x92\xb9\xf3\x8c\x4e\x66\xad\x7b\x8c\x05",
+         32);
+      if (errtxt)
+        goto failed;
+
+      what = "one million \"a\"";
+      errtxt = _gcry_hash_selftest_check_one
+        (GCRY_MD_SM3, 1,
+         NULL, 0,
+         "\xc8\xaa\xf8\x94\x29\x55\x40\x29\xe2\x31\x94\x1a\x2a\xcc\x0a\xd6"
+         "\x1f\xf2\xa5\xac\xd8\xfa\xdd\x25\x84\x7a\x3a\x73\x2b\x3b\x02\xc3",
+         32);
+      if (errtxt)
+        goto failed;
+    }
+
+  return 0; /* Succeeded. */
+
+ failed:
+  if (report)
+    report ("digest", GCRY_MD_SM3, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+/* Run a full self-test for ALGO and return 0 on success.  */
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+  gpg_err_code_t ec;
+
+  switch (algo)
+    {
+    case GCRY_MD_SM3:
+      ec = selftests_sm3 (extended, report);
+      break;
+    default:
+      ec = GPG_ERR_DIGEST_ALGO;
+      break;
+
+    }
+  return ec;
+}
+
+static const byte asn_sm3[] = /* Object ID is 1.2.156.10197.401 */
+  { 0x30, 0x2F, 0x30, 0x0B, 0x06, 0x07, 0x2A, 0x81,
+    0x1C, 0xCF, 0x55, 0x83, 0x11, 0x05, 0x00, 0x04,
+    0x20 };
+
+static const gcry_md_oid_spec_t oid_spec_sm3[] =
+  {
+    /* China Electronics Standardization Instutute,
+       OID White paper (2015), Table 6 */
+    { "1.2.156.10197.401" },
+    { NULL },
+  };
+
+const gcry_md_spec_t _gcry_digest_spec_sm3 =
+  {
+    GCRY_MD_SM3, {0, 0},
+    "SM3", asn_sm3, DIM (asn_sm3), oid_spec_sm3, 32,
+    sm3_init, _gcry_md_block_write, sm3_final, sm3_read, NULL,
+    _gcry_sm3_hash_buffers,
+    sizeof (SM3_CONTEXT),
+    run_selftests
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/sm4-aesni-avx-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sm4-aesni-avx-amd64.S
new file mode 100644
index 000000000..7a99e070d
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sm4-aesni-avx-amd64.S
@@ -0,0 +1,987 @@
+/* sm4-avx-aesni-amd64.S  -  AES-NI/AVX implementation of SM4 cipher
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Based on SM4 AES-NI work by Markku-Juhani O. Saarinen at:
+ *  https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+/* vector registers */
+#define RX0          %xmm0
+#define RX1          %xmm1
+#define MASK_4BIT    %xmm2
+#define RTMP0        %xmm3
+#define RTMP1        %xmm4
+#define RTMP2        %xmm5
+#define RTMP3        %xmm6
+#define RTMP4        %xmm7
+
+#define RA0          %xmm8
+#define RA1          %xmm9
+#define RA2          %xmm10
+#define RA3          %xmm11
+
+#define RB0          %xmm12
+#define RB1          %xmm13
+#define RB2          %xmm14
+#define RB3          %xmm15
+
+#define RNOT         %xmm0
+#define RBSWAP       %xmm1
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+/* Transpose four 32-bit words between 128-bit vectors. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+       vpunpckhdq x1, x0, t2; \
+       vpunpckldq x1, x0, x0; \
+       \
+       vpunpckldq x3, x2, t1; \
+       vpunpckhdq x3, x2, x2; \
+       \
+       vpunpckhqdq t1, x0, x1; \
+       vpunpcklqdq t1, x0, x0; \
+       \
+       vpunpckhqdq x2, t2, x3; \
+       vpunpcklqdq x2, t2, x2;
+
+/* post-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+       vpand x, mask4bit, tmp0; \
+       vpandn x, mask4bit, x; \
+       vpsrld $4, x, x; \
+       \
+       vpshufb tmp0, lo_t, tmp0; \
+       vpshufb x, hi_t, x; \
+       vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction. */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+       vpandn mask4bit, x, tmp0; \
+       vpsrld $4, x, x; \
+       vpand x, mask4bit, x; \
+       \
+       vpshufb tmp0, lo_t, tmp0; \
+       vpshufb x, hi_t, x; \
+       vpxor tmp0, x, x;
+
+/**********************************************************************
+  4-way && 8-way SM4 with AES-NI and AVX
+ **********************************************************************/
+
+.text
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+       .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+       .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+       .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+       .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+       .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+       .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+       .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+       .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+       .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+       .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+       .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+       .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+       .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+       .long 0x0f0f0f0f
+
+.align 8
+.globl _gcry_sm4_aesni_avx_expand_key
+ELF(.type   _gcry_sm4_aesni_avx_expand_key,@function;)
+_gcry_sm4_aesni_avx_expand_key:
+       /* input:
+        *      %rdi: 128-bit key
+        *      %rsi: rkey_enc
+        *      %rdx: rkey_dec
+        *      %rcx: fk array
+        *      %r8: ck array
+        */
+       CFI_STARTPROC();
+
+       vmovd 0*4(%rdi), RA0;
+       vmovd 1*4(%rdi), RA1;
+       vmovd 2*4(%rdi), RA2;
+       vmovd 3*4(%rdi), RA3;
+
+       vmovdqa .Lbswap32_mask rRIP, RTMP2;
+       vpshufb RTMP2, RA0, RA0;
+       vpshufb RTMP2, RA1, RA1;
+       vpshufb RTMP2, RA2, RA2;
+       vpshufb RTMP2, RA3, RA3;
+
+       vmovd 0*4(%rcx), RB0;
+       vmovd 1*4(%rcx), RB1;
+       vmovd 2*4(%rcx), RB2;
+       vmovd 3*4(%rcx), RB3;
+       vpxor RB0, RA0, RA0;
+       vpxor RB1, RA1, RA1;
+       vpxor RB2, RA2, RA2;
+       vpxor RB3, RA3, RA3;
+
+       vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+       vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
+       vmovdqa .Lpre_tf_hi_s rRIP, RB0;
+       vmovdqa .Lpost_tf_lo_s rRIP, RB1;
+       vmovdqa .Lpost_tf_hi_s rRIP, RB2;
+       vmovdqa .Linv_shift_row rRIP, RB3;
+
+#define ROUND(round, s0, s1, s2, s3) \
+       vbroadcastss (4*(round))(%r8), RX0; \
+       vpxor s1, RX0, RX0; \
+       vpxor s2, RX0, RX0; \
+       vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+       \
+       /* sbox, non-linear part */ \
+       transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
+       vaesenclast MASK_4BIT, RX0, RX0; \
+       transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
+       \
+       /* linear part */ \
+       vpshufb RB3, RX0, RX0; \
+       vpxor RX0, s0, s0; /* s0 ^ x */ \
+       vpslld $13, RX0, RTMP0; \
+       vpsrld $19, RX0, RTMP1; \
+       vpslld $23, RX0, RTMP2; \
+       vpsrld $9, RX0, RTMP3; \
+       vpxor RTMP0, RTMP1, RTMP1;  \
+       vpxor RTMP2, RTMP3, RTMP3;  \
+       vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,13) */ \
+       vpxor RTMP3, s0, s0; /* s0 ^ x ^ rol(x,13) ^ rol(x,23) */
+
+       leaq (32*4)(%r8), %rax;
+       leaq (32*4)(%rdx), %rdx;
+.align 16
+.Lroundloop_expand_key:
+       leaq (-4*4)(%rdx), %rdx;
+       ROUND(0, RA0, RA1, RA2, RA3);
+       ROUND(1, RA1, RA2, RA3, RA0);
+       ROUND(2, RA2, RA3, RA0, RA1);
+       ROUND(3, RA3, RA0, RA1, RA2);
+       leaq (4*4)(%r8), %r8;
+       vmovd RA0, (0*4)(%rsi);
+       vmovd RA1, (1*4)(%rsi);
+       vmovd RA2, (2*4)(%rsi);
+       vmovd RA3, (3*4)(%rsi);
+       vmovd RA0, (3*4)(%rdx);
+       vmovd RA1, (2*4)(%rdx);
+       vmovd RA2, (1*4)(%rdx);
+       vmovd RA3, (0*4)(%rdx);
+       leaq (4*4)(%rsi), %rsi;
+       cmpq %rax, %r8;
+       jne .Lroundloop_expand_key;
+
+#undef ROUND
+
+       vzeroall;
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_expand_key,.-_gcry_sm4_aesni_avx_expand_key;)
+
+.align 8
+ELF(.type   sm4_aesni_avx_crypt_blk1_4,@function;)
+sm4_aesni_avx_crypt_blk1_4:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: dst (1..4 blocks)
+        *      %rdx: src (1..4 blocks)
+        *      %rcx: num blocks (1..4)
+        */
+       CFI_STARTPROC();
+
+       vmovdqu 0*16(%rdx), RA0;
+       vmovdqa RA0, RA1;
+       vmovdqa RA0, RA2;
+       vmovdqa RA0, RA3;
+       cmpq $2, %rcx;
+       jb .Lblk4_load_input_done;
+       vmovdqu 1*16(%rdx), RA1;
+       je .Lblk4_load_input_done;
+       vmovdqu 2*16(%rdx), RA2;
+       cmpq $3, %rcx;
+       je .Lblk4_load_input_done;
+       vmovdqu 3*16(%rdx), RA3;
+
+.Lblk4_load_input_done:
+
+       vmovdqa .Lbswap32_mask rRIP, RTMP2;
+       vpshufb RTMP2, RA0, RA0;
+       vpshufb RTMP2, RA1, RA1;
+       vpshufb RTMP2, RA2, RA2;
+       vpshufb RTMP2, RA3, RA3;
+
+       vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+       vmovdqa .Lpre_tf_lo_s rRIP, RTMP4;
+       vmovdqa .Lpre_tf_hi_s rRIP, RB0;
+       vmovdqa .Lpost_tf_lo_s rRIP, RB1;
+       vmovdqa .Lpost_tf_hi_s rRIP, RB2;
+       vmovdqa .Linv_shift_row rRIP, RB3;
+       vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP2;
+       vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP3;
+       transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3) \
+       vbroadcastss (4*(round))(%rdi), RX0; \
+       vpxor s1, RX0, RX0; \
+       vpxor s2, RX0, RX0; \
+       vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+       \
+       /* sbox, non-linear part */ \
+       transform_pre(RX0, RTMP4, RB0, MASK_4BIT, RTMP0); \
+       vaesenclast MASK_4BIT, RX0, RX0; \
+       transform_post(RX0, RB1, RB2, MASK_4BIT, RTMP0); \
+       \
+       /* linear part */ \
+       vpshufb RB3, RX0, RTMP0; \
+       vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+       vpshufb RTMP2, RX0, RTMP1; \
+       vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+       vpshufb RTMP3, RX0, RTMP1; \
+       vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+       vpshufb .Linv_shift_row_rol_24 rRIP, RX0, RTMP1; \
+       vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+       vpslld $2, RTMP0, RTMP1; \
+       vpsrld $30, RTMP0, RTMP0; \
+       vpxor RTMP0, s0, s0;  \
+       vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ 
rol(x,24) */
+
+       leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk4:
+       ROUND(0, RA0, RA1, RA2, RA3);
+       ROUND(1, RA1, RA2, RA3, RA0);
+       ROUND(2, RA2, RA3, RA0, RA1);
+       ROUND(3, RA3, RA0, RA1, RA2);
+       leaq (4*4)(%rdi), %rdi;
+       cmpq %rax, %rdi;
+       jne .Lroundloop_blk4;
+
+#undef ROUND
+
+       vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+       vpshufb RTMP2, RA0, RA0;
+       vpshufb RTMP2, RA1, RA1;
+       vpshufb RTMP2, RA2, RA2;
+       vpshufb RTMP2, RA3, RA3;
+
+       vmovdqu RA0, 0*16(%rsi);
+       cmpq $2, %rcx;
+       jb .Lblk4_store_output_done;
+       vmovdqu RA1, 1*16(%rsi);
+       je .Lblk4_store_output_done;
+       vmovdqu RA2, 2*16(%rsi);
+       cmpq $3, %rcx;
+       je .Lblk4_store_output_done;
+       vmovdqu RA3, 3*16(%rsi);
+
+.Lblk4_store_output_done:
+       vzeroall;
+       xorl %eax, %eax;
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size sm4_aesni_avx_crypt_blk1_4,.-sm4_aesni_avx_crypt_blk1_4;)
+
+.align 8
+ELF(.type __sm4_crypt_blk8,@function;)
+__sm4_crypt_blk8:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel
+        *                                              ciphertext blocks
+        * output:
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: eight parallel plaintext
+        *                                              blocks
+        */
+       CFI_STARTPROC();
+
+       vmovdqa .Lbswap32_mask rRIP, RTMP2;
+       vpshufb RTMP2, RA0, RA0;
+       vpshufb RTMP2, RA1, RA1;
+       vpshufb RTMP2, RA2, RA2;
+       vpshufb RTMP2, RA3, RA3;
+       vpshufb RTMP2, RB0, RB0;
+       vpshufb RTMP2, RB1, RB1;
+       vpshufb RTMP2, RB2, RB2;
+       vpshufb RTMP2, RB3, RB3;
+
+       vbroadcastss .L0f0f0f0f rRIP, MASK_4BIT;
+       transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+       vbroadcastss (4*(round))(%rdi), RX0; \
+       vmovdqa .Lpre_tf_lo_s rRIP, RTMP4; \
+       vmovdqa .Lpre_tf_hi_s rRIP, RTMP1; \
+       vmovdqa RX0, RX1; \
+       vpxor s1, RX0, RX0; \
+       vpxor s2, RX0, RX0; \
+       vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+           vmovdqa .Lpost_tf_lo_s rRIP, RTMP2; \
+           vmovdqa .Lpost_tf_hi_s rRIP, RTMP3; \
+           vpxor r1, RX1, RX1; \
+           vpxor r2, RX1, RX1; \
+           vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+       \
+       /* sbox, non-linear part */ \
+       transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+           transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+           vmovdqa .Linv_shift_row rRIP, RTMP4; \
+       vaesenclast MASK_4BIT, RX0, RX0; \
+           vaesenclast MASK_4BIT, RX1, RX1; \
+       transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+           transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+       \
+       /* linear part */ \
+       vpshufb RTMP4, RX0, RTMP0; \
+       vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+           vpshufb RTMP4, RX1, RTMP2; \
+           vmovdqa .Linv_shift_row_rol_8 rRIP, RTMP4; \
+           vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+       vpshufb RTMP4, RX0, RTMP1; \
+       vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+           vpshufb RTMP4, RX1, RTMP3; \
+           vmovdqa .Linv_shift_row_rol_16 rRIP, RTMP4; \
+           vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+       vpshufb RTMP4, RX0, RTMP1; \
+       vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+           vpshufb RTMP4, RX1, RTMP3; \
+           vmovdqa .Linv_shift_row_rol_24 rRIP, RTMP4; \
+           vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+       vpshufb RTMP4, RX0, RTMP1; \
+       vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+       vpslld $2, RTMP0, RTMP1; \
+       vpsrld $30, RTMP0, RTMP0; \
+       vpxor RTMP0, s0, s0;  \
+       vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ 
rol(x,24) */ \
+           vpshufb RTMP4, RX1, RTMP3; \
+           vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+           vpslld $2, RTMP2, RTMP3; \
+           vpsrld $30, RTMP2, RTMP2; \
+           vpxor RTMP2, r0, r0;  \
+           vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ 
rol(x,24) */
+
+       leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+       ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+       ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+       ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+       ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+       leaq (4*4)(%rdi), %rdi;
+       cmpq %rax, %rdi;
+       jne .Lroundloop_blk8;
+
+#undef ROUND
+
+       vmovdqa .Lbswap128_mask rRIP, RTMP2;
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+       vpshufb RTMP2, RA0, RA0;
+       vpshufb RTMP2, RA1, RA1;
+       vpshufb RTMP2, RA2, RA2;
+       vpshufb RTMP2, RA3, RA3;
+       vpshufb RTMP2, RB0, RB0;
+       vpshufb RTMP2, RB1, RB1;
+       vpshufb RTMP2, RB2, RB2;
+       vpshufb RTMP2, RB3, RB3;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __sm4_crypt_blk8,.-__sm4_crypt_blk8;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_crypt_blk1_8
+ELF(.type   _gcry_sm4_aesni_avx_crypt_blk1_8,@function;)
+_gcry_sm4_aesni_avx_crypt_blk1_8:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: dst (1..8 blocks)
+        *      %rdx: src (1..8 blocks)
+        *      %rcx: num blocks (1..8)
+        */
+       CFI_STARTPROC();
+
+       cmpq $5, %rcx;
+       jb sm4_aesni_avx_crypt_blk1_4;
+       vmovdqu (0 * 16)(%rdx), RA0;
+       vmovdqu (1 * 16)(%rdx), RA1;
+       vmovdqu (2 * 16)(%rdx), RA2;
+       vmovdqu (3 * 16)(%rdx), RA3;
+       vmovdqu (4 * 16)(%rdx), RB0;
+       vmovdqa RB0, RB1;
+       vmovdqa RB0, RB2;
+       vmovdqa RB0, RB3;
+       je .Lblk8_load_input_done;
+       vmovdqu (5 * 16)(%rdx), RB1;
+       cmpq $7, %rcx;
+       jb .Lblk8_load_input_done;
+       vmovdqu (6 * 16)(%rdx), RB2;
+       je .Lblk8_load_input_done;
+       vmovdqu (7 * 16)(%rdx), RB3;
+
+.Lblk8_load_input_done:
+       call __sm4_crypt_blk8;
+
+       cmpq $6, %rcx;
+       vmovdqu RA0, (0 * 16)(%rsi);
+       vmovdqu RA1, (1 * 16)(%rsi);
+       vmovdqu RA2, (2 * 16)(%rsi);
+       vmovdqu RA3, (3 * 16)(%rsi);
+       vmovdqu RB0, (4 * 16)(%rsi);
+       jb .Lblk8_store_output_done;
+       vmovdqu RB1, (5 * 16)(%rsi);
+       je .Lblk8_store_output_done;
+       vmovdqu RB2, (6 * 16)(%rsi);
+       cmpq $7, %rcx;
+       je .Lblk8_store_output_done;
+       vmovdqu RB3, (7 * 16)(%rsi);
+
+.Lblk8_store_output_done:
+       vzeroall;
+       xorl %eax, %eax;
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_crypt_blk1_8,.-_gcry_sm4_aesni_avx_crypt_blk1_8;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ctr_enc
+ELF(.type   _gcry_sm4_aesni_avx_ctr_enc,@function;)
+_gcry_sm4_aesni_avx_ctr_enc:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+
+       /* load IV and byteswap */
+       vmovdqu (%rcx), RA0;
+
+       vmovdqa .Lbswap128_mask rRIP, RBSWAP;
+       vpshufb RBSWAP, RA0, RTMP0; /* be => le */
+
+       vpcmpeqd RNOT, RNOT, RNOT;
+       vpsrldq $8, RNOT, RNOT; /* low: -1, high: 0 */
+
+#define inc_le128(x, minus_one, tmp) \
+       vpcmpeqq minus_one, x, tmp; \
+       vpsubq minus_one, x, x; \
+       vpslldq $8, tmp, tmp; \
+       vpsubq tmp, x, x;
+
+       /* construct IVs */
+       inc_le128(RTMP0, RNOT, RTMP2); /* +1 */
+       vpshufb RBSWAP, RTMP0, RA1;
+       inc_le128(RTMP0, RNOT, RTMP2); /* +2 */
+       vpshufb RBSWAP, RTMP0, RA2;
+       inc_le128(RTMP0, RNOT, RTMP2); /* +3 */
+       vpshufb RBSWAP, RTMP0, RA3;
+       inc_le128(RTMP0, RNOT, RTMP2); /* +4 */
+       vpshufb RBSWAP, RTMP0, RB0;
+       inc_le128(RTMP0, RNOT, RTMP2); /* +5 */
+       vpshufb RBSWAP, RTMP0, RB1;
+       inc_le128(RTMP0, RNOT, RTMP2); /* +6 */
+       vpshufb RBSWAP, RTMP0, RB2;
+       inc_le128(RTMP0, RNOT, RTMP2); /* +7 */
+       vpshufb RBSWAP, RTMP0, RB3;
+       inc_le128(RTMP0, RNOT, RTMP2); /* +8 */
+       vpshufb RBSWAP, RTMP0, RTMP1;
+
+       /* store new IV */
+       vmovdqu RTMP1, (%rcx);
+
+       call __sm4_crypt_blk8;
+
+       vpxor (0 * 16)(%rdx), RA0, RA0;
+       vpxor (1 * 16)(%rdx), RA1, RA1;
+       vpxor (2 * 16)(%rdx), RA2, RA2;
+       vpxor (3 * 16)(%rdx), RA3, RA3;
+       vpxor (4 * 16)(%rdx), RB0, RB0;
+       vpxor (5 * 16)(%rdx), RB1, RB1;
+       vpxor (6 * 16)(%rdx), RB2, RB2;
+       vpxor (7 * 16)(%rdx), RB3, RB3;
+
+       vmovdqu RA0, (0 * 16)(%rsi);
+       vmovdqu RA1, (1 * 16)(%rsi);
+       vmovdqu RA2, (2 * 16)(%rsi);
+       vmovdqu RA3, (3 * 16)(%rsi);
+       vmovdqu RB0, (4 * 16)(%rsi);
+       vmovdqu RB1, (5 * 16)(%rsi);
+       vmovdqu RB2, (6 * 16)(%rsi);
+       vmovdqu RB3, (7 * 16)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ctr_enc,.-_gcry_sm4_aesni_avx_ctr_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_cbc_dec
+ELF(.type   _gcry_sm4_aesni_avx_cbc_dec,@function;)
+_gcry_sm4_aesni_avx_cbc_dec:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       vmovdqu (0 * 16)(%rdx), RA0;
+       vmovdqu (1 * 16)(%rdx), RA1;
+       vmovdqu (2 * 16)(%rdx), RA2;
+       vmovdqu (3 * 16)(%rdx), RA3;
+       vmovdqu (4 * 16)(%rdx), RB0;
+       vmovdqu (5 * 16)(%rdx), RB1;
+       vmovdqu (6 * 16)(%rdx), RB2;
+       vmovdqu (7 * 16)(%rdx), RB3;
+
+       call __sm4_crypt_blk8;
+
+       vmovdqu (7 * 16)(%rdx), RNOT;
+       vpxor (%rcx), RA0, RA0;
+       vpxor (0 * 16)(%rdx), RA1, RA1;
+       vpxor (1 * 16)(%rdx), RA2, RA2;
+       vpxor (2 * 16)(%rdx), RA3, RA3;
+       vpxor (3 * 16)(%rdx), RB0, RB0;
+       vpxor (4 * 16)(%rdx), RB1, RB1;
+       vpxor (5 * 16)(%rdx), RB2, RB2;
+       vpxor (6 * 16)(%rdx), RB3, RB3;
+       vmovdqu RNOT, (%rcx); /* store new IV */
+
+       vmovdqu RA0, (0 * 16)(%rsi);
+       vmovdqu RA1, (1 * 16)(%rsi);
+       vmovdqu RA2, (2 * 16)(%rsi);
+       vmovdqu RA3, (3 * 16)(%rsi);
+       vmovdqu RB0, (4 * 16)(%rsi);
+       vmovdqu RB1, (5 * 16)(%rsi);
+       vmovdqu RB2, (6 * 16)(%rsi);
+       vmovdqu RB3, (7 * 16)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_cbc_dec,.-_gcry_sm4_aesni_avx_cbc_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_cfb_dec
+ELF(.type   _gcry_sm4_aesni_avx_cfb_dec,@function;)
+_gcry_sm4_aesni_avx_cfb_dec:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       /* Load input */
+       vmovdqu (%rcx), RA0;
+       vmovdqu 0 * 16(%rdx), RA1;
+       vmovdqu 1 * 16(%rdx), RA2;
+       vmovdqu 2 * 16(%rdx), RA3;
+       vmovdqu 3 * 16(%rdx), RB0;
+       vmovdqu 4 * 16(%rdx), RB1;
+       vmovdqu 5 * 16(%rdx), RB2;
+       vmovdqu 6 * 16(%rdx), RB3;
+
+       /* Update IV */
+       vmovdqu 7 * 16(%rdx), RNOT;
+       vmovdqu RNOT, (%rcx);
+
+       call __sm4_crypt_blk8;
+
+       vpxor (0 * 16)(%rdx), RA0, RA0;
+       vpxor (1 * 16)(%rdx), RA1, RA1;
+       vpxor (2 * 16)(%rdx), RA2, RA2;
+       vpxor (3 * 16)(%rdx), RA3, RA3;
+       vpxor (4 * 16)(%rdx), RB0, RB0;
+       vpxor (5 * 16)(%rdx), RB1, RB1;
+       vpxor (6 * 16)(%rdx), RB2, RB2;
+       vpxor (7 * 16)(%rdx), RB3, RB3;
+
+       vmovdqu RA0, (0 * 16)(%rsi);
+       vmovdqu RA1, (1 * 16)(%rsi);
+       vmovdqu RA2, (2 * 16)(%rsi);
+       vmovdqu RA3, (3 * 16)(%rsi);
+       vmovdqu RB0, (4 * 16)(%rsi);
+       vmovdqu RB1, (5 * 16)(%rsi);
+       vmovdqu RB2, (6 * 16)(%rsi);
+       vmovdqu RB3, (7 * 16)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_cfb_dec,.-_gcry_sm4_aesni_avx_cfb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ocb_enc
+ELF(.type _gcry_sm4_aesni_avx_ocb_enc,@function;)
+
+_gcry_sm4_aesni_avx_ocb_enc:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[8])
+        */
+       CFI_STARTPROC();
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rcx), RTMP0;
+       vmovdqu (%r8), RTMP1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         vmovdqu (n * 16)(%rdx), xreg; \
+         vpxor (lreg), RTMP0, RTMP0; \
+         vpxor xreg, RTMP1, RTMP1; \
+         vpxor RTMP0, xreg, xreg; \
+         vmovdqu RTMP0, (n * 16)(%rsi);
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, RA0);
+       OCB_INPUT(1, %r11, RA1);
+       OCB_INPUT(2, %r12, RA2);
+       OCB_INPUT(3, %r13, RA3);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, RB0);
+       OCB_INPUT(5, %r11, RB1);
+       OCB_INPUT(6, %r12, RB2);
+       OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0, (%rcx);
+       vmovdqu RTMP1, (%r8);
+
+       movq (0 * 8)(%rsp), %r10;
+       CFI_RESTORE(%r10);
+       movq (1 * 8)(%rsp), %r11;
+       CFI_RESTORE(%r11);
+       movq (2 * 8)(%rsp), %r12;
+       CFI_RESTORE(%r12);
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r13);
+
+       call __sm4_crypt_blk8;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vpxor (0 * 16)(%rsi), RA0, RA0;
+       vpxor (1 * 16)(%rsi), RA1, RA1;
+       vpxor (2 * 16)(%rsi), RA2, RA2;
+       vpxor (3 * 16)(%rsi), RA3, RA3;
+       vpxor (4 * 16)(%rsi), RB0, RB0;
+       vpxor (5 * 16)(%rsi), RB1, RB1;
+       vpxor (6 * 16)(%rsi), RB2, RB2;
+       vpxor (7 * 16)(%rsi), RB3, RB3;
+
+       vmovdqu RA0, (0 * 16)(%rsi);
+       vmovdqu RA1, (1 * 16)(%rsi);
+       vmovdqu RA2, (2 * 16)(%rsi);
+       vmovdqu RA3, (3 * 16)(%rsi);
+       vmovdqu RB0, (4 * 16)(%rsi);
+       vmovdqu RB1, (5 * 16)(%rsi);
+       vmovdqu RB2, (6 * 16)(%rsi);
+       vmovdqu RB3, (7 * 16)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ocb_enc,.-_gcry_sm4_aesni_avx_ocb_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ocb_dec
+ELF(.type _gcry_sm4_aesni_avx_ocb_dec,@function;)
+
+_gcry_sm4_aesni_avx_ocb_dec:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: dst (8 blocks)
+        *      %rdx: src (8 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[8])
+        */
+       CFI_STARTPROC();
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       movdqu (%rcx), RTMP0;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         vmovdqu (n * 16)(%rdx), xreg; \
+         vpxor (lreg), RTMP0, RTMP0; \
+         vpxor RTMP0, xreg, xreg; \
+         vmovdqu RTMP0, (n * 16)(%rsi);
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, RA0);
+       OCB_INPUT(1, %r11, RA1);
+       OCB_INPUT(2, %r12, RA2);
+       OCB_INPUT(3, %r13, RA3);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, RB0);
+       OCB_INPUT(5, %r11, RB1);
+       OCB_INPUT(6, %r12, RB2);
+       OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0, (%rcx);
+
+       movq (0 * 8)(%rsp), %r10;
+       CFI_RESTORE(%r10);
+       movq (1 * 8)(%rsp), %r11;
+       CFI_RESTORE(%r11);
+       movq (2 * 8)(%rsp), %r12;
+       CFI_RESTORE(%r12);
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r13);
+
+       call __sm4_crypt_blk8;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vmovdqu (%r8), RTMP0;
+
+       vpxor (0 * 16)(%rsi), RA0, RA0;
+       vpxor (1 * 16)(%rsi), RA1, RA1;
+       vpxor (2 * 16)(%rsi), RA2, RA2;
+       vpxor (3 * 16)(%rsi), RA3, RA3;
+       vpxor (4 * 16)(%rsi), RB0, RB0;
+       vpxor (5 * 16)(%rsi), RB1, RB1;
+       vpxor (6 * 16)(%rsi), RB2, RB2;
+       vpxor (7 * 16)(%rsi), RB3, RB3;
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+
+       vmovdqu RA0, (0 * 16)(%rsi);
+       vpxor RA0, RTMP0, RTMP0;
+       vmovdqu RA1, (1 * 16)(%rsi);
+       vpxor RA1, RTMP0, RTMP0;
+       vmovdqu RA2, (2 * 16)(%rsi);
+       vpxor RA2, RTMP0, RTMP0;
+       vmovdqu RA3, (3 * 16)(%rsi);
+       vpxor RA3, RTMP0, RTMP0;
+       vmovdqu RB0, (4 * 16)(%rsi);
+       vpxor RB0, RTMP0, RTMP0;
+       vmovdqu RB1, (5 * 16)(%rsi);
+       vpxor RB1, RTMP0, RTMP0;
+       vmovdqu RB2, (6 * 16)(%rsi);
+       vpxor RB2, RTMP0, RTMP0;
+       vmovdqu RB3, (7 * 16)(%rsi);
+       vpxor RB3, RTMP0, RTMP0;
+
+       vmovdqu RTMP0, (%r8);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ocb_dec,.-_gcry_sm4_aesni_avx_ocb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx_ocb_auth
+ELF(.type _gcry_sm4_aesni_avx_ocb_auth,@function;)
+
+_gcry_sm4_aesni_avx_ocb_auth:
+       /* input:
+        *      %rdi: round key array, CTX
+        *      %rsi: abuf (8 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[8])
+        */
+       CFI_STARTPROC();
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rdx), RTMP0;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+
+#define OCB_INPUT(n, lreg, xreg) \
+         vmovdqu (n * 16)(%rsi), xreg; \
+         vpxor (lreg), RTMP0, RTMP0; \
+         vpxor RTMP0, xreg, xreg;
+       movq (0 * 8)(%r8), %r10;
+       movq (1 * 8)(%r8), %r11;
+       movq (2 * 8)(%r8), %r12;
+       movq (3 * 8)(%r8), %r13;
+       OCB_INPUT(0, %r10, RA0);
+       OCB_INPUT(1, %r11, RA1);
+       OCB_INPUT(2, %r12, RA2);
+       OCB_INPUT(3, %r13, RA3);
+       movq (4 * 8)(%r8), %r10;
+       movq (5 * 8)(%r8), %r11;
+       movq (6 * 8)(%r8), %r12;
+       movq (7 * 8)(%r8), %r13;
+       OCB_INPUT(4, %r10, RB0);
+       OCB_INPUT(5, %r11, RB1);
+       OCB_INPUT(6, %r12, RB2);
+       OCB_INPUT(7, %r13, RB3);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0, (%rdx);
+
+       movq (0 * 8)(%rsp), %r10;
+       CFI_RESTORE(%r10);
+       movq (1 * 8)(%rsp), %r11;
+       CFI_RESTORE(%r11);
+       movq (2 * 8)(%rsp), %r12;
+       CFI_RESTORE(%r12);
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r13);
+
+       call __sm4_crypt_blk8;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vmovdqu (%rcx), RTMP0;
+       vpxor RB0, RA0, RA0;
+       vpxor RB1, RA1, RA1;
+       vpxor RB2, RA2, RA2;
+       vpxor RB3, RA3, RA3;
+
+       vpxor RTMP0, RA3, RA3;
+       vpxor RA2, RA0, RA0;
+       vpxor RA3, RA1, RA1;
+
+       vpxor RA1, RA0, RA0;
+       vmovdqu RA0, (%rcx);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx_ocb_auth,.-_gcry_sm4_aesni_avx_ocb_auth;)
+
+#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/sm4-aesni-avx2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/sm4-aesni-avx2-amd64.S
new file mode 100644
index 000000000..7a8b9558f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sm4-aesni-avx2-amd64.S
@@ -0,0 +1,851 @@
+/* sm4-avx2-amd64.S  -  AVX2 implementation of SM4 cipher
+ *
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Based on SM4 AES-NI work by Markku-Juhani O. Saarinen at:
+ *  https://github.com/mjosaarinen/sm4ni
+ */
+
+#include <config.h>
+
+#ifdef __x86_64
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \
+    defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+/* vector registers */
+#define RX0          %ymm0
+#define RX1          %ymm1
+#define MASK_4BIT    %ymm2
+#define RTMP0        %ymm3
+#define RTMP1        %ymm4
+#define RTMP2        %ymm5
+#define RTMP3        %ymm6
+#define RTMP4        %ymm7
+
+#define RA0          %ymm8
+#define RA1          %ymm9
+#define RA2          %ymm10
+#define RA3          %ymm11
+
+#define RB0          %ymm12
+#define RB1          %ymm13
+#define RB2          %ymm14
+#define RB3          %ymm15
+
+#define RNOT         %ymm0
+#define RBSWAP       %ymm1
+
+#define RX0x         %xmm0
+#define RX1x         %xmm1
+#define MASK_4BITx   %xmm2
+
+#define RNOTx        %xmm0
+#define RBSWAPx      %xmm1
+
+#define RTMP0x       %xmm3
+#define RTMP1x       %xmm4
+#define RTMP2x       %xmm5
+#define RTMP3x       %xmm6
+#define RTMP4x       %xmm7
+
+/**********************************************************************
+  helper macros
+ **********************************************************************/
+
+/* Transpose four 32-bit words between 128-bit vector lanes. */
+#define transpose_4x4(x0, x1, x2, x3, t1, t2) \
+       vpunpckhdq x1, x0, t2; \
+       vpunpckldq x1, x0, x0; \
+       \
+       vpunpckldq x3, x2, t1; \
+       vpunpckhdq x3, x2, x2; \
+       \
+       vpunpckhqdq t1, x0, x1; \
+       vpunpcklqdq t1, x0, x0; \
+       \
+       vpunpckhqdq x2, t2, x3; \
+       vpunpcklqdq x2, t2, x2;
+
+/* post-SubByte transform. */
+#define transform_pre(x, lo_t, hi_t, mask4bit, tmp0) \
+       vpand x, mask4bit, tmp0; \
+       vpandn x, mask4bit, x; \
+       vpsrld $4, x, x; \
+       \
+       vpshufb tmp0, lo_t, tmp0; \
+       vpshufb x, hi_t, x; \
+       vpxor tmp0, x, x;
+
+/* post-SubByte transform. Note: x has been XOR'ed with mask4bit by
+ * 'vaeslastenc' instruction. */
+#define transform_post(x, lo_t, hi_t, mask4bit, tmp0) \
+       vpandn mask4bit, x, tmp0; \
+       vpsrld $4, x, x; \
+       vpand x, mask4bit, x; \
+       \
+       vpshufb tmp0, lo_t, tmp0; \
+       vpshufb x, hi_t, x; \
+       vpxor tmp0, x, x;
+
+/**********************************************************************
+  16-way SM4 with AES-NI and AVX
+ **********************************************************************/
+
+.text
+.align 16
+
+/*
+ * Following four affine transform look-up tables are from work by
+ * Markku-Juhani O. Saarinen, at https://github.com/mjosaarinen/sm4ni
+ *
+ * These allow exposing SM4 S-Box from AES SubByte.
+ */
+
+/* pre-SubByte affine transform, from SM4 field to AES field. */
+.Lpre_tf_lo_s:
+       .quad 0x9197E2E474720701, 0xC7C1B4B222245157
+.Lpre_tf_hi_s:
+       .quad 0xE240AB09EB49A200, 0xF052B91BF95BB012
+
+/* post-SubByte affine transform, from AES field to SM4 field. */
+.Lpost_tf_lo_s:
+       .quad 0x5B67F2CEA19D0834, 0xEDD14478172BBE82
+.Lpost_tf_hi_s:
+       .quad 0xAE7201DD73AFDC00, 0x11CDBE62CC1063BF
+
+/* For isolating SubBytes from AESENCLAST, inverse shift row */
+.Linv_shift_row:
+       .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b
+       .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03
+
+/* Inverse shift row + Rotate left by 8 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_8:
+       .byte 0x07, 0x00, 0x0d, 0x0a, 0x0b, 0x04, 0x01, 0x0e
+       .byte 0x0f, 0x08, 0x05, 0x02, 0x03, 0x0c, 0x09, 0x06
+
+/* Inverse shift row + Rotate left by 16 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_16:
+       .byte 0x0a, 0x07, 0x00, 0x0d, 0x0e, 0x0b, 0x04, 0x01
+       .byte 0x02, 0x0f, 0x08, 0x05, 0x06, 0x03, 0x0c, 0x09
+
+/* Inverse shift row + Rotate left by 24 bits on 32-bit words with vpshufb */
+.Linv_shift_row_rol_24:
+       .byte 0x0d, 0x0a, 0x07, 0x00, 0x01, 0x0e, 0x0b, 0x04
+       .byte 0x05, 0x02, 0x0f, 0x08, 0x09, 0x06, 0x03, 0x0c
+
+/* For CTR-mode IV byteswap */
+.Lbswap128_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+/* For input word byte-swap */
+.Lbswap32_mask:
+       .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+
+.align 4
+/* 4-bit mask */
+.L0f0f0f0f:
+       .long 0x0f0f0f0f
+
+.align 8
+ELF(.type   __sm4_crypt_blk16,@function;)
+__sm4_crypt_blk16:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+        *                                              plaintext blocks
+        * output:
+        *      RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3: sixteen parallel
+        *                                              ciphertext blocks
+        */
+       CFI_STARTPROC();
+
+       vbroadcasti128 .Lbswap32_mask rRIP, RTMP2;
+       vpshufb RTMP2, RA0, RA0;
+       vpshufb RTMP2, RA1, RA1;
+       vpshufb RTMP2, RA2, RA2;
+       vpshufb RTMP2, RA3, RA3;
+       vpshufb RTMP2, RB0, RB0;
+       vpshufb RTMP2, RB1, RB1;
+       vpshufb RTMP2, RB2, RB2;
+       vpshufb RTMP2, RB3, RB3;
+
+       vpbroadcastd .L0f0f0f0f rRIP, MASK_4BIT;
+       transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+
+#define ROUND(round, s0, s1, s2, s3, r0, r1, r2, r3) \
+       vpbroadcastd (4*(round))(%rdi), RX0; \
+       vbroadcasti128 .Lpre_tf_lo_s rRIP, RTMP4; \
+       vbroadcasti128 .Lpre_tf_hi_s rRIP, RTMP1; \
+       vmovdqa RX0, RX1; \
+       vpxor s1, RX0, RX0; \
+       vpxor s2, RX0, RX0; \
+       vpxor s3, RX0, RX0; /* s1 ^ s2 ^ s3 ^ rk */ \
+           vbroadcasti128 .Lpost_tf_lo_s rRIP, RTMP2; \
+           vbroadcasti128 .Lpost_tf_hi_s rRIP, RTMP3; \
+           vpxor r1, RX1, RX1; \
+           vpxor r2, RX1, RX1; \
+           vpxor r3, RX1, RX1; /* r1 ^ r2 ^ r3 ^ rk */ \
+       \
+       /* sbox, non-linear part */ \
+       transform_pre(RX0, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+           transform_pre(RX1, RTMP4, RTMP1, MASK_4BIT, RTMP0); \
+       vextracti128 $1, RX0, RTMP4x; \
+           vextracti128 $1, RX1, RTMP0x; \
+       vaesenclast MASK_4BITx, RX0x, RX0x; \
+       vaesenclast MASK_4BITx, RTMP4x, RTMP4x; \
+           vaesenclast MASK_4BITx, RX1x, RX1x; \
+           vaesenclast MASK_4BITx, RTMP0x, RTMP0x; \
+       vinserti128 $1, RTMP4x, RX0, RX0; \
+       vbroadcasti128 .Linv_shift_row rRIP, RTMP4; \
+           vinserti128 $1, RTMP0x, RX1, RX1; \
+       transform_post(RX0, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+           transform_post(RX1, RTMP2, RTMP3, MASK_4BIT, RTMP0); \
+       \
+       /* linear part */ \
+       vpshufb RTMP4, RX0, RTMP0; \
+       vpxor RTMP0, s0, s0; /* s0 ^ x */ \
+           vpshufb RTMP4, RX1, RTMP2; \
+           vbroadcasti128 .Linv_shift_row_rol_8 rRIP, RTMP4; \
+           vpxor RTMP2, r0, r0; /* r0 ^ x */ \
+       vpshufb RTMP4, RX0, RTMP1; \
+       vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) */ \
+           vpshufb RTMP4, RX1, RTMP3; \
+           vbroadcasti128 .Linv_shift_row_rol_16 rRIP, RTMP4; \
+           vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) */ \
+       vpshufb RTMP4, RX0, RTMP1; \
+       vpxor RTMP1, RTMP0, RTMP0; /* x ^ rol(x,8) ^ rol(x,16) */ \
+           vpshufb RTMP4, RX1, RTMP3; \
+           vbroadcasti128 .Linv_shift_row_rol_24 rRIP, RTMP4; \
+           vpxor RTMP3, RTMP2, RTMP2; /* x ^ rol(x,8) ^ rol(x,16) */ \
+       vpshufb RTMP4, RX0, RTMP1; \
+       vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,24) */ \
+       vpslld $2, RTMP0, RTMP1; \
+       vpsrld $30, RTMP0, RTMP0; \
+       vpxor RTMP0, s0, s0;  \
+       vpxor RTMP1, s0, s0; /* s0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ 
rol(x,24) */ \
+           vpshufb RTMP4, RX1, RTMP3; \
+           vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,24) */ \
+           vpslld $2, RTMP2, RTMP3; \
+           vpsrld $30, RTMP2, RTMP2; \
+           vpxor RTMP2, r0, r0;  \
+           vpxor RTMP3, r0, r0; /* r0 ^ x ^ rol(x,2) ^ rol(x,10) ^ rol(x,18) ^ 
rol(x,24) */
+
+       leaq (32*4)(%rdi), %rax;
+.align 16
+.Lroundloop_blk8:
+       ROUND(0, RA0, RA1, RA2, RA3, RB0, RB1, RB2, RB3);
+       ROUND(1, RA1, RA2, RA3, RA0, RB1, RB2, RB3, RB0);
+       ROUND(2, RA2, RA3, RA0, RA1, RB2, RB3, RB0, RB1);
+       ROUND(3, RA3, RA0, RA1, RA2, RB3, RB0, RB1, RB2);
+       leaq (4*4)(%rdi), %rdi;
+       cmpq %rax, %rdi;
+       jne .Lroundloop_blk8;
+
+#undef ROUND
+
+       vbroadcasti128 .Lbswap128_mask rRIP, RTMP2;
+
+       transpose_4x4(RA0, RA1, RA2, RA3, RTMP0, RTMP1);
+       transpose_4x4(RB0, RB1, RB2, RB3, RTMP0, RTMP1);
+       vpshufb RTMP2, RA0, RA0;
+       vpshufb RTMP2, RA1, RA1;
+       vpshufb RTMP2, RA2, RA2;
+       vpshufb RTMP2, RA3, RA3;
+       vpshufb RTMP2, RB0, RB0;
+       vpshufb RTMP2, RB1, RB1;
+       vpshufb RTMP2, RB2, RB2;
+       vpshufb RTMP2, RB3, RB3;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __sm4_crypt_blk16,.-__sm4_crypt_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+       vpcmpeqq minus_one, x, tmp; \
+       vpsubq minus_one, x, x; \
+       vpslldq $8, tmp, tmp; \
+       vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ctr_enc
+ELF(.type   _gcry_sm4_aesni_avx2_ctr_enc,@function;)
+_gcry_sm4_aesni_avx2_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+
+       movq 8(%rcx), %rax;
+       bswapq %rax;
+
+       vzeroupper;
+
+       vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+       vpcmpeqd RNOT, RNOT, RNOT;
+       vpsrldq $8, RNOT, RNOT;   /* ab: -1:0 ; cd: -1:0 */
+       vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+       /* load IV and byteswap */
+       vmovdqu (%rcx), RTMP4x;
+       vpshufb RTMP3x, RTMP4x, RTMP4x;
+       vmovdqa RTMP4x, RTMP0x;
+       inc_le128(RTMP4x, RNOTx, RTMP1x);
+       vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+       vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+       /* check need for handling 64-bit overflow and carry */
+       cmpq $(0xffffffffffffffff - 16), %rax;
+       ja .Lhandle_ctr_carry;
+
+       /* construct IVs */
+       vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+       vpshufb RTMP3, RTMP0, RA1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+       vpshufb RTMP3, RTMP0, RA2;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+       vpshufb RTMP3, RTMP0, RA3;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+       vpshufb RTMP3, RTMP0, RB0;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+       vpshufb RTMP3, RTMP0, RB1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+       vpshufb RTMP3, RTMP0, RB2;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+       vpshufb RTMP3, RTMP0, RB3;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+       vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+       jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+       /* construct IVs */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RA1; /* +3 ; +2 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RA2; /* +5 ; +4 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RA3; /* +7 ; +6 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB0; /* +9 ; +8 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB2; /* +13 ; +12 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB3; /* +15 ; +14 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vextracti128 $1, RTMP0, RTMP0x;
+       vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+       /* store new IV */
+       vmovdqu RTMP0x, (%rcx);
+
+       call __sm4_crypt_blk16;
+
+       vpxor (0 * 32)(%rdx), RA0, RA0;
+       vpxor (1 * 32)(%rdx), RA1, RA1;
+       vpxor (2 * 32)(%rdx), RA2, RA2;
+       vpxor (3 * 32)(%rdx), RA3, RA3;
+       vpxor (4 * 32)(%rdx), RB0, RB0;
+       vpxor (5 * 32)(%rdx), RB1, RB1;
+       vpxor (6 * 32)(%rdx), RB2, RB2;
+       vpxor (7 * 32)(%rdx), RB3, RB3;
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA3, (3 * 32)(%rsi);
+       vmovdqu RB0, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB3, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ctr_enc,.-_gcry_sm4_aesni_avx2_ctr_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_cbc_dec
+ELF(.type   _gcry_sm4_aesni_avx2_cbc_dec,@function;)
+_gcry_sm4_aesni_avx2_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       vmovdqu (0 * 32)(%rdx), RA0;
+       vmovdqu (1 * 32)(%rdx), RA1;
+       vmovdqu (2 * 32)(%rdx), RA2;
+       vmovdqu (3 * 32)(%rdx), RA3;
+       vmovdqu (4 * 32)(%rdx), RB0;
+       vmovdqu (5 * 32)(%rdx), RB1;
+       vmovdqu (6 * 32)(%rdx), RB2;
+       vmovdqu (7 * 32)(%rdx), RB3;
+
+       call __sm4_crypt_blk16;
+
+       vmovdqu (%rcx), RNOTx;
+       vinserti128 $1, (%rdx), RNOT, RNOT;
+       vpxor RNOT, RA0, RA0;
+       vpxor (0 * 32 + 16)(%rdx), RA1, RA1;
+       vpxor (1 * 32 + 16)(%rdx), RA2, RA2;
+       vpxor (2 * 32 + 16)(%rdx), RA3, RA3;
+       vpxor (3 * 32 + 16)(%rdx), RB0, RB0;
+       vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+       vpxor (5 * 32 + 16)(%rdx), RB2, RB2;
+       vpxor (6 * 32 + 16)(%rdx), RB3, RB3;
+       vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+       vmovdqu RNOTx, (%rcx); /* store new IV */
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA3, (3 * 32)(%rsi);
+       vmovdqu RB0, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB3, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_cbc_dec,.-_gcry_sm4_aesni_avx2_cbc_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_cfb_dec
+ELF(.type   _gcry_sm4_aesni_avx2_cfb_dec,@function;)
+_gcry_sm4_aesni_avx2_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       /* Load input */
+       vmovdqu (%rcx), RNOTx;
+       vinserti128 $1, (%rdx), RNOT, RA0;
+       vmovdqu (0 * 32 + 16)(%rdx), RA1;
+       vmovdqu (1 * 32 + 16)(%rdx), RA2;
+       vmovdqu (2 * 32 + 16)(%rdx), RA3;
+       vmovdqu (3 * 32 + 16)(%rdx), RB0;
+       vmovdqu (4 * 32 + 16)(%rdx), RB1;
+       vmovdqu (5 * 32 + 16)(%rdx), RB2;
+       vmovdqu (6 * 32 + 16)(%rdx), RB3;
+
+       /* Update IV */
+       vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+       vmovdqu RNOTx, (%rcx);
+
+       call __sm4_crypt_blk16;
+
+       vpxor (0 * 32)(%rdx), RA0, RA0;
+       vpxor (1 * 32)(%rdx), RA1, RA1;
+       vpxor (2 * 32)(%rdx), RA2, RA2;
+       vpxor (3 * 32)(%rdx), RA3, RA3;
+       vpxor (4 * 32)(%rdx), RB0, RB0;
+       vpxor (5 * 32)(%rdx), RB1, RB1;
+       vpxor (6 * 32)(%rdx), RB2, RB2;
+       vpxor (7 * 32)(%rdx), RB3, RB3;
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA3, (3 * 32)(%rsi);
+       vmovdqu RB0, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB3, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_cfb_dec,.-_gcry_sm4_aesni_avx2_cfb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ocb_enc
+ELF(.type _gcry_sm4_aesni_avx2_ocb_enc,@function;)
+
+_gcry_sm4_aesni_avx2_ocb_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rcx), RTMP0x;
+       vmovdqu (%r8), RTMP1x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RTMP1, RTMP1; \
+         vpxor yreg, RNOT, yreg; \
+         vmovdqu RNOT, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RA1);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, RA2);
+       OCB_INPUT(3, %r12, %r13, RA3);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, RB0);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, RB2);
+       OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vmovdqu RTMP0x, (%rcx);
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%r8);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __sm4_crypt_blk16;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vpxor (0 * 32)(%rsi), RA0, RA0;
+       vpxor (1 * 32)(%rsi), RA1, RA1;
+       vpxor (2 * 32)(%rsi), RA2, RA2;
+       vpxor (3 * 32)(%rsi), RA3, RA3;
+       vpxor (4 * 32)(%rsi), RB0, RB0;
+       vpxor (5 * 32)(%rsi), RB1, RB1;
+       vpxor (6 * 32)(%rsi), RB2, RB2;
+       vpxor (7 * 32)(%rsi), RB3, RB3;
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vmovdqu RA3, (3 * 32)(%rsi);
+       vmovdqu RB0, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vmovdqu RB3, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ocb_enc,.-_gcry_sm4_aesni_avx2_ocb_enc;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ocb_dec
+ELF(.type _gcry_sm4_aesni_avx2_ocb_dec,@function;)
+
+_gcry_sm4_aesni_avx2_ocb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rcx), RTMP0x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RNOT, yreg; \
+         vmovdqu RNOT, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RA1);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, RA2);
+       OCB_INPUT(3, %r12, %r13, RA3);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, RB0);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, RB2);
+       OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0x, (%rcx);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __sm4_crypt_blk16;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vmovdqu (%r8), RTMP1x;
+
+       vpxor (0 * 32)(%rsi), RA0, RA0;
+       vpxor (1 * 32)(%rsi), RA1, RA1;
+       vpxor (2 * 32)(%rsi), RA2, RA2;
+       vpxor (3 * 32)(%rsi), RA3, RA3;
+       vpxor (4 * 32)(%rsi), RB0, RB0;
+       vpxor (5 * 32)(%rsi), RB1, RB1;
+       vpxor (6 * 32)(%rsi), RB2, RB2;
+       vpxor (7 * 32)(%rsi), RB3, RB3;
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vpxor RA0, RTMP1, RTMP1;
+       vmovdqu RA1, (1 * 32)(%rsi);
+       vpxor RA1, RTMP1, RTMP1;
+       vmovdqu RA2, (2 * 32)(%rsi);
+       vpxor RA2, RTMP1, RTMP1;
+       vmovdqu RA3, (3 * 32)(%rsi);
+       vpxor RA3, RTMP1, RTMP1;
+       vmovdqu RB0, (4 * 32)(%rsi);
+       vpxor RB0, RTMP1, RTMP1;
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vpxor RB1, RTMP1, RTMP1;
+       vmovdqu RB2, (6 * 32)(%rsi);
+       vpxor RB2, RTMP1, RTMP1;
+       vmovdqu RB3, (7 * 32)(%rsi);
+       vpxor RB3, RTMP1, RTMP1;
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%r8);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ocb_dec,.-_gcry_sm4_aesni_avx2_ocb_dec;)
+
+.align 8
+.globl _gcry_sm4_aesni_avx2_ocb_auth
+ELF(.type _gcry_sm4_aesni_avx2_ocb_auth,@function;)
+
+_gcry_sm4_aesni_avx2_ocb_auth:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: abuf (16 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rdx), RTMP0x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rsi), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RNOT, yreg;
+
+       movq (0 * 8)(%r8), %r10;
+       movq (1 * 8)(%r8), %r11;
+       movq (2 * 8)(%r8), %r12;
+       movq (3 * 8)(%r8), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RA1);
+       movq (4 * 8)(%r8), %r10;
+       movq (5 * 8)(%r8), %r11;
+       movq (6 * 8)(%r8), %r12;
+       movq (7 * 8)(%r8), %r13;
+       OCB_INPUT(2, %r10, %r11, RA2);
+       OCB_INPUT(3, %r12, %r13, RA3);
+       movq (8 * 8)(%r8), %r10;
+       movq (9 * 8)(%r8), %r11;
+       movq (10 * 8)(%r8), %r12;
+       movq (11 * 8)(%r8), %r13;
+       OCB_INPUT(4, %r10, %r11, RB0);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r8), %r10;
+       movq (13 * 8)(%r8), %r11;
+       movq (14 * 8)(%r8), %r12;
+       movq (15 * 8)(%r8), %r13;
+       OCB_INPUT(6, %r10, %r11, RB2);
+       OCB_INPUT(7, %r12, %r13, RB3);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0x, (%rdx);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __sm4_crypt_blk16;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vpxor RA0, RB0, RA0;
+       vpxor RA1, RB1, RA1;
+       vpxor RA2, RB2, RA2;
+       vpxor RA3, RB3, RA3;
+
+       vpxor RA1, RA0, RA0;
+       vpxor RA3, RA2, RA2;
+
+       vpxor RA2, RA0, RTMP1;
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vpxor (%rcx), RTMP1x, RTMP1x;
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%rcx);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_sm4_aesni_avx2_ocb_auth,.-_gcry_sm4_aesni_avx2_ocb_auth;)
+
+#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/sm4.c 
b/grub-core/lib/libgcrypt/cipher/sm4.c
new file mode 100644
index 000000000..816629886
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/sm4.c
@@ -0,0 +1,1251 @@
+/* sm4.c  -  SM4 Cipher Algorithm
+ * Copyright (C) 2020 Alibaba Group.
+ * Copyright (C) 2020 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "types.h"  /* for byte and u32 typedefs */
+#include "bithelp.h"
+#include "g10lib.h"
+#include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+/* Helper macro to force alignment to 64 bytes.  */
+#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED
+# define ATTR_ALIGNED_64  __attribute__ ((aligned (64)))
+#else
+# define ATTR_ALIGNED_64
+#endif
+
+/* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */
+#undef USE_AESNI_AVX
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || 
\
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+#  define USE_AESNI_AVX 1
+# endif
+#endif
+
+/* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX2 code. */
+#undef USE_AESNI_AVX2
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || 
\
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+#  define USE_AESNI_AVX2 1
+# endif
+#endif
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+#  define ASM_FUNC_ABI
+# endif
+#endif
+
+static const char *sm4_selftest (void);
+
+static void _gcry_sm4_ctr_enc (void *context, unsigned char *ctr,
+                              void *outbuf_arg, const void *inbuf_arg,
+                              size_t nblocks);
+static void _gcry_sm4_cbc_dec (void *context, unsigned char *iv,
+                              void *outbuf_arg, const void *inbuf_arg,
+                              size_t nblocks);
+static void _gcry_sm4_cfb_dec (void *context, unsigned char *iv,
+                              void *outbuf_arg, const void *inbuf_arg,
+                              size_t nblocks);
+static size_t _gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                  const void *inbuf_arg, size_t nblocks,
+                                  int encrypt);
+static size_t _gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                 size_t nblocks);
+
+typedef struct
+{
+  u32 rkey_enc[32];
+  u32 rkey_dec[32];
+#ifdef USE_AESNI_AVX
+  unsigned int use_aesni_avx:1;
+#endif
+#ifdef USE_AESNI_AVX2
+  unsigned int use_aesni_avx2:1;
+#endif
+} SM4_context;
+
+static const u32 fk[4] =
+{
+  0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+};
+
+static struct
+{
+  volatile u32 counter_head;
+  u32 cacheline_align[64 / 4 - 1];
+  byte S[256];
+  volatile u32 counter_tail;
+} sbox_table ATTR_ALIGNED_64 =
+  {
+    0,
+    { 0, },
+    {
+      0xd6, 0x90, 0xe9, 0xfe, 0xcc, 0xe1, 0x3d, 0xb7,
+      0x16, 0xb6, 0x14, 0xc2, 0x28, 0xfb, 0x2c, 0x05,
+      0x2b, 0x67, 0x9a, 0x76, 0x2a, 0xbe, 0x04, 0xc3,
+      0xaa, 0x44, 0x13, 0x26, 0x49, 0x86, 0x06, 0x99,
+      0x9c, 0x42, 0x50, 0xf4, 0x91, 0xef, 0x98, 0x7a,
+      0x33, 0x54, 0x0b, 0x43, 0xed, 0xcf, 0xac, 0x62,
+      0xe4, 0xb3, 0x1c, 0xa9, 0xc9, 0x08, 0xe8, 0x95,
+      0x80, 0xdf, 0x94, 0xfa, 0x75, 0x8f, 0x3f, 0xa6,
+      0x47, 0x07, 0xa7, 0xfc, 0xf3, 0x73, 0x17, 0xba,
+      0x83, 0x59, 0x3c, 0x19, 0xe6, 0x85, 0x4f, 0xa8,
+      0x68, 0x6b, 0x81, 0xb2, 0x71, 0x64, 0xda, 0x8b,
+      0xf8, 0xeb, 0x0f, 0x4b, 0x70, 0x56, 0x9d, 0x35,
+      0x1e, 0x24, 0x0e, 0x5e, 0x63, 0x58, 0xd1, 0xa2,
+      0x25, 0x22, 0x7c, 0x3b, 0x01, 0x21, 0x78, 0x87,
+      0xd4, 0x00, 0x46, 0x57, 0x9f, 0xd3, 0x27, 0x52,
+      0x4c, 0x36, 0x02, 0xe7, 0xa0, 0xc4, 0xc8, 0x9e,
+      0xea, 0xbf, 0x8a, 0xd2, 0x40, 0xc7, 0x38, 0xb5,
+      0xa3, 0xf7, 0xf2, 0xce, 0xf9, 0x61, 0x15, 0xa1,
+      0xe0, 0xae, 0x5d, 0xa4, 0x9b, 0x34, 0x1a, 0x55,
+      0xad, 0x93, 0x32, 0x30, 0xf5, 0x8c, 0xb1, 0xe3,
+      0x1d, 0xf6, 0xe2, 0x2e, 0x82, 0x66, 0xca, 0x60,
+      0xc0, 0x29, 0x23, 0xab, 0x0d, 0x53, 0x4e, 0x6f,
+      0xd5, 0xdb, 0x37, 0x45, 0xde, 0xfd, 0x8e, 0x2f,
+      0x03, 0xff, 0x6a, 0x72, 0x6d, 0x6c, 0x5b, 0x51,
+      0x8d, 0x1b, 0xaf, 0x92, 0xbb, 0xdd, 0xbc, 0x7f,
+      0x11, 0xd9, 0x5c, 0x41, 0x1f, 0x10, 0x5a, 0xd8,
+      0x0a, 0xc1, 0x31, 0x88, 0xa5, 0xcd, 0x7b, 0xbd,
+      0x2d, 0x74, 0xd0, 0x12, 0xb8, 0xe5, 0xb4, 0xb0,
+      0x89, 0x69, 0x97, 0x4a, 0x0c, 0x96, 0x77, 0x7e,
+      0x65, 0xb9, 0xf1, 0x09, 0xc5, 0x6e, 0xc6, 0x84,
+      0x18, 0xf0, 0x7d, 0xec, 0x3a, 0xdc, 0x4d, 0x20,
+      0x79, 0xee, 0x5f, 0x3e, 0xd7, 0xcb, 0x39, 0x48
+    },
+    0
+  };
+
+static const u32 ck[] =
+{
+  0x00070e15, 0x1c232a31, 0x383f464d, 0x545b6269,
+  0x70777e85, 0x8c939aa1, 0xa8afb6bd, 0xc4cbd2d9,
+  0xe0e7eef5, 0xfc030a11, 0x181f262d, 0x343b4249,
+  0x50575e65, 0x6c737a81, 0x888f969d, 0xa4abb2b9,
+  0xc0c7ced5, 0xdce3eaf1, 0xf8ff060d, 0x141b2229,
+  0x30373e45, 0x4c535a61, 0x686f767d, 0x848b9299,
+  0xa0a7aeb5, 0xbcc3cad1, 0xd8dfe6ed, 0xf4fb0209,
+  0x10171e25, 0x2c333a41, 0x484f565d, 0x646b7279
+};
+
+#ifdef USE_AESNI_AVX
+extern void _gcry_sm4_aesni_avx_expand_key(const byte *key, u32 *rk_enc,
+                                          u32 *rk_dec, const u32 *fk,
+                                          const u32 *ck) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ctr_enc(const u32 *rk_enc, byte *out,
+                                       const byte *in, byte *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_cbc_dec(const u32 *rk_dec, byte *out,
+                                       const byte *in, byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_cfb_dec(const u32 *rk_enc, byte *out,
+                                       const byte *in, byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ocb_enc(const u32 *rk_enc,
+                                       unsigned char *out,
+                                       const unsigned char *in,
+                                       unsigned char *offset,
+                                       unsigned char *checksum,
+                                       const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ocb_dec(const u32 *rk_dec,
+                                       unsigned char *out,
+                                       const unsigned char *in,
+                                       unsigned char *offset,
+                                       unsigned char *checksum,
+                                       const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx_ocb_auth(const u32 *rk_enc,
+                                        const unsigned char *abuf,
+                                        unsigned char *offset,
+                                        unsigned char *checksum,
+                                        const u64 Ls[8]) ASM_FUNC_ABI;
+
+extern unsigned int
+_gcry_sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in,
+                                unsigned int num_blks) ASM_FUNC_ABI;
+
+static inline unsigned int
+sm4_aesni_avx_crypt_blk1_8(const u32 *rk, byte *out, const byte *in,
+                          unsigned int num_blks)
+{
+  return _gcry_sm4_aesni_avx_crypt_blk1_8(rk, out, in, num_blks);
+}
+
+#endif /* USE_AESNI_AVX */
+
+#ifdef USE_AESNI_AVX2
+extern void _gcry_sm4_aesni_avx2_ctr_enc(const u32 *rk_enc, byte *out,
+                                        const byte *in,
+                                        byte *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_cbc_dec(const u32 *rk_dec, byte *out,
+                                        const byte *in,
+                                        byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_cfb_dec(const u32 *rk_enc, byte *out,
+                                        const byte *in,
+                                        byte *iv) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_ocb_enc(const u32 *rk_enc,
+                                        unsigned char *out,
+                                        const unsigned char *in,
+                                        unsigned char *offset,
+                                        unsigned char *checksum,
+                                        const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_ocb_dec(const u32 *rk_dec,
+                                        unsigned char *out,
+                                        const unsigned char *in,
+                                        unsigned char *offset,
+                                        unsigned char *checksum,
+                                        const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_sm4_aesni_avx2_ocb_auth(const u32 *rk_enc,
+                                         const unsigned char *abuf,
+                                         unsigned char *offset,
+                                         unsigned char *checksum,
+                                         const u64 Ls[16]) ASM_FUNC_ABI;
+#endif /* USE_AESNI_AVX2 */
+
+static inline void prefetch_sbox_table(void)
+{
+  const volatile byte *vtab = (void *)&sbox_table;
+
+  /* Modify counters to trigger copy-on-write and unsharing if physical pages
+   * of look-up table are shared between processes.  Modifying counters also
+   * causes checksums for pages to change and hint same-page merging algorithm
+   * that these pages are frequently changing.  */
+  sbox_table.counter_head++;
+  sbox_table.counter_tail++;
+
+  /* Prefetch look-up table to cache.  */
+  (void)vtab[0 * 32];
+  (void)vtab[1 * 32];
+  (void)vtab[2 * 32];
+  (void)vtab[3 * 32];
+  (void)vtab[4 * 32];
+  (void)vtab[5 * 32];
+  (void)vtab[6 * 32];
+  (void)vtab[7 * 32];
+  (void)vtab[8 * 32 - 1];
+}
+
+static inline u32 sm4_t_non_lin_sub(u32 x)
+{
+  u32 out;
+
+  out  = (u32)sbox_table.S[(x >> 0) & 0xff] << 0;
+  out |= (u32)sbox_table.S[(x >> 8) & 0xff] << 8;
+  out |= (u32)sbox_table.S[(x >> 16) & 0xff] << 16;
+  out |= (u32)sbox_table.S[(x >> 24) & 0xff] << 24;
+
+  return out;
+}
+
+static inline u32 sm4_key_lin_sub(u32 x)
+{
+  return x ^ rol(x, 13) ^ rol(x, 23);
+}
+
+static inline u32 sm4_enc_lin_sub(u32 x)
+{
+  u32 xrol2 = rol(x, 2);
+  return x ^ xrol2 ^ rol(xrol2, 8) ^ rol(xrol2, 16) ^ rol(x, 24);
+}
+
+static inline u32 sm4_key_sub(u32 x)
+{
+  return sm4_key_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32 sm4_enc_sub(u32 x)
+{
+  return sm4_enc_lin_sub(sm4_t_non_lin_sub(x));
+}
+
+static inline u32
+sm4_round(const u32 x0, const u32 x1, const u32 x2, const u32 x3, const u32 rk)
+{
+  return x0 ^ sm4_enc_sub(x1 ^ x2 ^ x3 ^ rk);
+}
+
+static void
+sm4_expand_key (SM4_context *ctx, const byte *key)
+{
+  u32 rk[4];
+  int i;
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      _gcry_sm4_aesni_avx_expand_key (key, ctx->rkey_enc, ctx->rkey_dec,
+                                     fk, ck);
+      return;
+    }
+#endif
+
+  rk[0] = buf_get_be32(key + 4 * 0) ^ fk[0];
+  rk[1] = buf_get_be32(key + 4 * 1) ^ fk[1];
+  rk[2] = buf_get_be32(key + 4 * 2) ^ fk[2];
+  rk[3] = buf_get_be32(key + 4 * 3) ^ fk[3];
+
+  for (i = 0; i < 32; i += 4)
+    {
+      rk[0] = rk[0] ^ sm4_key_sub(rk[1] ^ rk[2] ^ rk[3] ^ ck[i + 0]);
+      rk[1] = rk[1] ^ sm4_key_sub(rk[2] ^ rk[3] ^ rk[0] ^ ck[i + 1]);
+      rk[2] = rk[2] ^ sm4_key_sub(rk[3] ^ rk[0] ^ rk[1] ^ ck[i + 2]);
+      rk[3] = rk[3] ^ sm4_key_sub(rk[0] ^ rk[1] ^ rk[2] ^ ck[i + 3]);
+      ctx->rkey_enc[i + 0] = rk[0];
+      ctx->rkey_enc[i + 1] = rk[1];
+      ctx->rkey_enc[i + 2] = rk[2];
+      ctx->rkey_enc[i + 3] = rk[3];
+      ctx->rkey_dec[31 - i - 0] = rk[0];
+      ctx->rkey_dec[31 - i - 1] = rk[1];
+      ctx->rkey_dec[31 - i - 2] = rk[2];
+      ctx->rkey_dec[31 - i - 3] = rk[3];
+    }
+
+  wipememory (rk, sizeof(rk));
+}
+
+static gcry_err_code_t
+sm4_setkey (void *context, const byte *key, const unsigned keylen,
+            cipher_bulk_ops_t *bulk_ops)
+{
+  SM4_context *ctx = context;
+  static int init = 0;
+  static const char *selftest_failed = NULL;
+  unsigned int hwf = _gcry_get_hw_features ();
+
+  (void)hwf;
+
+  if (!init)
+    {
+      init = 1;
+      selftest_failed = sm4_selftest();
+      if (selftest_failed)
+       log_error("%s\n", selftest_failed);
+    }
+  if (selftest_failed)
+    return GPG_ERR_SELFTEST_FAILED;
+
+  if (keylen != 16)
+    return GPG_ERR_INV_KEYLEN;
+
+#ifdef USE_AESNI_AVX
+  ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
+#endif
+#ifdef USE_AESNI_AVX2
+  ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
+#endif
+
+  /* Setup bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cbc_dec = _gcry_sm4_cbc_dec;
+  bulk_ops->cfb_dec = _gcry_sm4_cfb_dec;
+  bulk_ops->ctr_enc = _gcry_sm4_ctr_enc;
+  bulk_ops->ocb_crypt = _gcry_sm4_ocb_crypt;
+  bulk_ops->ocb_auth  = _gcry_sm4_ocb_auth;
+
+  sm4_expand_key (ctx, key);
+  return 0;
+}
+
+static unsigned int
+sm4_do_crypt (const u32 *rk, byte *out, const byte *in)
+{
+  u32 x[4];
+  int i;
+
+  x[0] = buf_get_be32(in + 0 * 4);
+  x[1] = buf_get_be32(in + 1 * 4);
+  x[2] = buf_get_be32(in + 2 * 4);
+  x[3] = buf_get_be32(in + 3 * 4);
+
+  for (i = 0; i < 32; i += 4)
+    {
+      x[0] = sm4_round(x[0], x[1], x[2], x[3], rk[i + 0]);
+      x[1] = sm4_round(x[1], x[2], x[3], x[0], rk[i + 1]);
+      x[2] = sm4_round(x[2], x[3], x[0], x[1], rk[i + 2]);
+      x[3] = sm4_round(x[3], x[0], x[1], x[2], rk[i + 3]);
+    }
+
+  buf_put_be32(out + 0 * 4, x[3 - 0]);
+  buf_put_be32(out + 1 * 4, x[3 - 1]);
+  buf_put_be32(out + 2 * 4, x[3 - 2]);
+  buf_put_be32(out + 3 * 4, x[3 - 3]);
+
+  return /*burn_stack*/ 4*6+sizeof(void*)*4;
+}
+
+static unsigned int
+sm4_encrypt (void *context, byte *outbuf, const byte *inbuf)
+{
+  SM4_context *ctx = context;
+
+  prefetch_sbox_table ();
+
+  return sm4_do_crypt (ctx->rkey_enc, outbuf, inbuf);
+}
+
+static unsigned int
+sm4_decrypt (void *context, byte *outbuf, const byte *inbuf)
+{
+  SM4_context *ctx = context;
+
+  prefetch_sbox_table ();
+
+  return sm4_do_crypt (ctx->rkey_dec, outbuf, inbuf);
+}
+
+static unsigned int
+sm4_do_crypt_blks2 (const u32 *rk, byte *out, const byte *in)
+{
+  u32 x[4];
+  u32 y[4];
+  u32 k;
+  int i;
+
+  /* Encrypts/Decrypts two blocks for higher instruction level
+   * parallelism. */
+
+  x[0] = buf_get_be32(in + 0 * 4);
+  x[1] = buf_get_be32(in + 1 * 4);
+  x[2] = buf_get_be32(in + 2 * 4);
+  x[3] = buf_get_be32(in + 3 * 4);
+  y[0] = buf_get_be32(in + 4 * 4);
+  y[1] = buf_get_be32(in + 5 * 4);
+  y[2] = buf_get_be32(in + 6 * 4);
+  y[3] = buf_get_be32(in + 7 * 4);
+
+  for (i = 0; i < 32; i += 4)
+    {
+      k = rk[i + 0];
+      x[0] = sm4_round(x[0], x[1], x[2], x[3], k);
+      y[0] = sm4_round(y[0], y[1], y[2], y[3], k);
+      k = rk[i + 1];
+      x[1] = sm4_round(x[1], x[2], x[3], x[0], k);
+      y[1] = sm4_round(y[1], y[2], y[3], y[0], k);
+      k = rk[i + 2];
+      x[2] = sm4_round(x[2], x[3], x[0], x[1], k);
+      y[2] = sm4_round(y[2], y[3], y[0], y[1], k);
+      k = rk[i + 3];
+      x[3] = sm4_round(x[3], x[0], x[1], x[2], k);
+      y[3] = sm4_round(y[3], y[0], y[1], y[2], k);
+    }
+
+  buf_put_be32(out + 0 * 4, x[3 - 0]);
+  buf_put_be32(out + 1 * 4, x[3 - 1]);
+  buf_put_be32(out + 2 * 4, x[3 - 2]);
+  buf_put_be32(out + 3 * 4, x[3 - 3]);
+  buf_put_be32(out + 4 * 4, y[3 - 0]);
+  buf_put_be32(out + 5 * 4, y[3 - 1]);
+  buf_put_be32(out + 6 * 4, y[3 - 2]);
+  buf_put_be32(out + 7 * 4, y[3 - 3]);
+
+  return /*burn_stack*/ 4*10+sizeof(void*)*4;
+}
+
+static unsigned int
+sm4_crypt_blocks (const u32 *rk, byte *out, const byte *in,
+                 unsigned int num_blks)
+{
+  unsigned int burn_depth = 0;
+  unsigned int nburn;
+
+  while (num_blks >= 2)
+    {
+      nburn = sm4_do_crypt_blks2 (rk, out, in);
+      burn_depth = nburn > burn_depth ? nburn : burn_depth;
+      out += 2 * 16;
+      in += 2 * 16;
+      num_blks -= 2;
+    }
+
+  while (num_blks)
+    {
+      nburn = sm4_do_crypt (rk, out, in);
+      burn_depth = nburn > burn_depth ? nburn : burn_depth;
+      out += 16;
+      in += 16;
+      num_blks--;
+    }
+
+  if (burn_depth)
+    burn_depth += sizeof(void *) * 5;
+  return burn_depth;
+}
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size 16. */
+static void
+_gcry_sm4_ctr_enc(void *context, unsigned char *ctr,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks)
+{
+  SM4_context *ctx = context;
+  byte *outbuf = outbuf_arg;
+  const byte *inbuf = inbuf_arg;
+  int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_sm4_aesni_avx2_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
+
+          nblocks -= 16;
+          outbuf += 16 * 16;
+          inbuf += 16 * 16;
+        }
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      /* Process data in 8 block chunks. */
+      while (nblocks >= 8)
+        {
+          _gcry_sm4_aesni_avx_ctr_enc(ctx->rkey_enc, outbuf, inbuf, ctr);
+
+          nblocks -= 8;
+          outbuf += 8 * 16;
+          inbuf += 8 * 16;
+        }
+    }
+#endif
+
+  /* Process remaining blocks. */
+  if (nblocks)
+    {
+      unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+                                  unsigned int num_blks);
+      byte tmpbuf[16 * 8];
+      unsigned int tmp_used = 16;
+
+      if (0)
+       ;
+#ifdef USE_AESNI_AVX
+      else if (ctx->use_aesni_avx)
+       {
+         crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+       }
+#endif
+      else
+       {
+         prefetch_sbox_table ();
+         crypt_blk1_8 = sm4_crypt_blocks;
+       }
+
+      /* Process remaining blocks. */
+      while (nblocks)
+       {
+         size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+         size_t i;
+
+         if (curr_blks * 16 > tmp_used)
+           tmp_used = curr_blks * 16;
+
+         cipher_block_cpy (tmpbuf + 0 * 16, ctr, 16);
+         for (i = 1; i < curr_blks; i++)
+           {
+             cipher_block_cpy (&tmpbuf[i * 16], ctr, 16);
+             cipher_block_add (&tmpbuf[i * 16], i, 16);
+           }
+         cipher_block_add (ctr, curr_blks, 16);
+
+         burn_stack_depth = crypt_blk1_8 (ctx->rkey_enc, tmpbuf, tmpbuf,
+                                          curr_blks);
+
+         for (i = 0; i < curr_blks; i++)
+           {
+             cipher_block_xor (outbuf, &tmpbuf[i * 16], inbuf, 16);
+             outbuf += 16;
+             inbuf += 16;
+           }
+
+         nblocks -= curr_blks;
+       }
+
+      wipememory(tmpbuf, tmp_used);
+    }
+
+  if (burn_stack_depth)
+    _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_sm4_cbc_dec(void *context, unsigned char *iv,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks)
+{
+  SM4_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_sm4_aesni_avx2_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * 16;
+          inbuf += 16 * 16;
+        }
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      /* Process data in 8 block chunks. */
+      while (nblocks >= 8)
+        {
+          _gcry_sm4_aesni_avx_cbc_dec(ctx->rkey_dec, outbuf, inbuf, iv);
+
+          nblocks -= 8;
+          outbuf += 8 * 16;
+          inbuf += 8 * 16;
+        }
+    }
+#endif
+
+  /* Process remaining blocks. */
+  if (nblocks)
+    {
+      unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+                                  unsigned int num_blks);
+      unsigned char savebuf[16 * 8];
+      unsigned int tmp_used = 16;
+
+      if (0)
+       ;
+#ifdef USE_AESNI_AVX
+      else if (ctx->use_aesni_avx)
+       {
+         crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+       }
+#endif
+      else
+       {
+         prefetch_sbox_table ();
+         crypt_blk1_8 = sm4_crypt_blocks;
+       }
+
+      /* Process remaining blocks. */
+      while (nblocks)
+       {
+         size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+         size_t i;
+
+         if (curr_blks * 16 > tmp_used)
+           tmp_used = curr_blks * 16;
+
+         burn_stack_depth = crypt_blk1_8 (ctx->rkey_dec, savebuf, inbuf,
+                                          curr_blks);
+
+         for (i = 0; i < curr_blks; i++)
+           {
+             cipher_block_xor_n_copy_2(outbuf, &savebuf[i * 16], iv, inbuf,
+                                       16);
+             outbuf += 16;
+             inbuf += 16;
+           }
+
+         nblocks -= curr_blks;
+       }
+
+      wipememory(savebuf, tmp_used);
+    }
+
+  if (burn_stack_depth)
+    _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_sm4_cfb_dec(void *context, unsigned char *iv,
+                  void *outbuf_arg, const void *inbuf_arg,
+                  size_t nblocks)
+{
+  SM4_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_sm4_aesni_avx2_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * 16;
+          inbuf += 16 * 16;
+        }
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      /* Process data in 8 block chunks. */
+      while (nblocks >= 8)
+        {
+          _gcry_sm4_aesni_avx_cfb_dec(ctx->rkey_enc, outbuf, inbuf, iv);
+
+          nblocks -= 8;
+          outbuf += 8 * 16;
+          inbuf += 8 * 16;
+        }
+    }
+#endif
+
+  /* Process remaining blocks. */
+  if (nblocks)
+    {
+      unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+                                  unsigned int num_blks);
+      unsigned char ivbuf[16 * 8];
+      unsigned int tmp_used = 16;
+
+      if (0)
+       ;
+#ifdef USE_AESNI_AVX
+      else if (ctx->use_aesni_avx)
+       {
+         crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+       }
+#endif
+      else
+       {
+         prefetch_sbox_table ();
+         crypt_blk1_8 = sm4_crypt_blocks;
+       }
+
+      /* Process remaining blocks. */
+      while (nblocks)
+       {
+         size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+         size_t i;
+
+         if (curr_blks * 16 > tmp_used)
+           tmp_used = curr_blks * 16;
+
+         cipher_block_cpy (&ivbuf[0 * 16], iv, 16);
+         for (i = 1; i < curr_blks; i++)
+           cipher_block_cpy (&ivbuf[i * 16], &inbuf[(i - 1) * 16], 16);
+         cipher_block_cpy (iv, &inbuf[(i - 1) * 16], 16);
+
+         burn_stack_depth = crypt_blk1_8 (ctx->rkey_enc, ivbuf, ivbuf,
+                                          curr_blks);
+
+         for (i = 0; i < curr_blks; i++)
+           {
+             cipher_block_xor (outbuf, inbuf, &ivbuf[i * 16], 16);
+             outbuf += 16;
+             inbuf += 16;
+           }
+
+         nblocks -= curr_blks;
+       }
+
+      wipememory(ivbuf, tmp_used);
+    }
+
+  if (burn_stack_depth)
+    _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_sm4_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                    const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+  SM4_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+  int burn_stack_depth = 0;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             if (encrypt)
+               _gcry_sm4_aesni_avx2_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
+                                            c->u_iv.iv, c->u_ctr.ctr, Ls);
+             else
+               _gcry_sm4_aesni_avx2_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
+                                            c->u_iv.iv, c->u_ctr.ctr, Ls);
+
+             nblocks -= 16;
+             outbuf += 16 * 16;
+             inbuf += 16 * 16;
+           }
+       }
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      u64 Ls[8];
+      unsigned int n = 8 - (blkn % 8);
+      u64 *l;
+
+      if (nblocks >= 8)
+       {
+         /* Use u64 to store pointers for x32 support (assembly function
+          * assumes 64-bit pointers). */
+         Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+         Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+         Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+         Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(7 + n) % 8];
+
+         /* Process data in 8 block chunks. */
+         while (nblocks >= 8)
+           {
+             blkn += 8;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+             if (encrypt)
+               _gcry_sm4_aesni_avx_ocb_enc(ctx->rkey_enc, outbuf, inbuf,
+                                           c->u_iv.iv, c->u_ctr.ctr, Ls);
+             else
+               _gcry_sm4_aesni_avx_ocb_dec(ctx->rkey_dec, outbuf, inbuf,
+                                           c->u_iv.iv, c->u_ctr.ctr, Ls);
+
+             nblocks -= 8;
+             outbuf += 8 * 16;
+             inbuf += 8 * 16;
+           }
+       }
+    }
+#endif
+
+  if (nblocks)
+    {
+      unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+                                  unsigned int num_blks);
+      const u32 *rk = encrypt ? ctx->rkey_enc : ctx->rkey_dec;
+      unsigned char tmpbuf[16 * 8];
+      unsigned int tmp_used = 16;
+
+      if (0)
+       ;
+#ifdef USE_AESNI_AVX
+      else if (ctx->use_aesni_avx)
+       {
+         crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+       }
+#endif
+      else
+       {
+         prefetch_sbox_table ();
+         crypt_blk1_8 = sm4_crypt_blocks;
+       }
+
+      while (nblocks)
+       {
+         size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+         size_t i;
+
+         if (curr_blks * 16 > tmp_used)
+           tmp_used = curr_blks * 16;
+
+         for (i = 0; i < curr_blks; i++)
+           {
+             const unsigned char *l = ocb_get_l(c, ++blkn);
+
+             /* Checksum_i = Checksum_{i-1} xor P_i  */
+             if (encrypt)
+               cipher_block_xor_1(c->u_ctr.ctr, &inbuf[i * 16], 16);
+
+             /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+             cipher_block_xor_2dst (&tmpbuf[i * 16], c->u_iv.iv, l, 16);
+             cipher_block_xor (&outbuf[i * 16], &inbuf[i * 16],
+                               c->u_iv.iv, 16);
+           }
+
+         /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+         crypt_blk1_8 (rk, outbuf, outbuf, curr_blks);
+
+         for (i = 0; i < curr_blks; i++)
+           {
+             cipher_block_xor_1 (&outbuf[i * 16], &tmpbuf[i * 16], 16);
+
+             /* Checksum_i = Checksum_{i-1} xor P_i  */
+             if (!encrypt)
+                 cipher_block_xor_1(c->u_ctr.ctr, &outbuf[i * 16], 16);
+           }
+
+         outbuf += curr_blks * 16;
+         inbuf  += curr_blks * 16;
+         nblocks -= curr_blks;
+       }
+
+      wipememory(tmpbuf, tmp_used);
+    }
+
+  c->u_mode.ocb.data_nblocks = blkn;
+
+  if (burn_stack_depth)
+    _gcry_burn_stack(burn_stack_depth);
+
+  return 0;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_sm4_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
+{
+  SM4_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+#ifdef USE_AESNI_AVX2
+  if (ctx->use_aesni_avx2)
+    {
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             _gcry_sm4_aesni_avx2_ocb_auth(ctx->rkey_enc, abuf,
+                                           c->u_mode.ocb.aad_offset,
+                                           c->u_mode.ocb.aad_sum, Ls);
+
+             nblocks -= 16;
+             abuf += 16 * 16;
+           }
+       }
+    }
+#endif
+
+#ifdef USE_AESNI_AVX
+  if (ctx->use_aesni_avx)
+    {
+      u64 Ls[8];
+      unsigned int n = 8 - (blkn % 8);
+      u64 *l;
+
+      if (nblocks >= 8)
+       {
+         /* Use u64 to store pointers for x32 support (assembly function
+           * assumes 64-bit pointers). */
+         Ls[(0 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(1 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+         Ls[(2 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(3 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+         Ls[(4 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(5 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+         Ls[(6 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+         Ls[(7 + n) % 8] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(7 + n) % 8];
+
+         /* Process data in 8 block chunks. */
+         while (nblocks >= 8)
+           {
+             blkn += 8;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 8);
+
+             _gcry_sm4_aesni_avx_ocb_auth(ctx->rkey_enc, abuf,
+                                          c->u_mode.ocb.aad_offset,
+                                          c->u_mode.ocb.aad_sum, Ls);
+
+             nblocks -= 8;
+             abuf += 8 * 16;
+           }
+       }
+    }
+#endif
+
+  if (nblocks)
+    {
+      unsigned int (*crypt_blk1_8)(const u32 *rk, byte *out, const byte *in,
+                                  unsigned int num_blks);
+      unsigned char tmpbuf[16 * 8];
+      unsigned int tmp_used = 16;
+
+      if (0)
+       ;
+#ifdef USE_AESNI_AVX
+      else if (ctx->use_aesni_avx)
+       {
+         crypt_blk1_8 = sm4_aesni_avx_crypt_blk1_8;
+       }
+#endif
+      else
+       {
+         prefetch_sbox_table ();
+         crypt_blk1_8 = sm4_crypt_blocks;
+       }
+
+      while (nblocks)
+       {
+         size_t curr_blks = nblocks > 8 ? 8 : nblocks;
+         size_t i;
+
+         if (curr_blks * 16 > tmp_used)
+           tmp_used = curr_blks * 16;
+
+         for (i = 0; i < curr_blks; i++)
+           {
+             const unsigned char *l = ocb_get_l(c, ++blkn);
+
+             /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+             cipher_block_xor_2dst (&tmpbuf[i * 16],
+                                    c->u_mode.ocb.aad_offset, l, 16);
+             cipher_block_xor_1 (&tmpbuf[i * 16], &abuf[i * 16], 16);
+           }
+
+         /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+         crypt_blk1_8 (ctx->rkey_enc, tmpbuf, tmpbuf, curr_blks);
+
+         for (i = 0; i < curr_blks; i++)
+           {
+             cipher_block_xor_1 (c->u_mode.ocb.aad_sum, &tmpbuf[i * 16], 16);
+           }
+
+         abuf += curr_blks * 16;
+         nblocks -= curr_blks;
+       }
+
+      wipememory(tmpbuf, tmp_used);
+    }
+
+  c->u_mode.ocb.aad_nblocks = blkn;
+
+  return 0;
+}
+
+/* Run the self-tests for SM4-CTR, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char*
+selftest_ctr_128 (void)
+{
+  const int nblocks = 16 - 1;
+  const int blocksize = 16;
+  const int context_size = sizeof(SM4_context);
+
+  return _gcry_selftest_helper_ctr("SM4", &sm4_setkey,
+           &sm4_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for SM4-CBC, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cbc_128 (void)
+{
+  const int nblocks = 16 - 1;
+  const int blocksize = 16;
+  const int context_size = sizeof(SM4_context);
+
+  return _gcry_selftest_helper_cbc("SM4", &sm4_setkey,
+           &sm4_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for SM4-CFB, tests bulk CFB decryption.
+   Returns NULL on success. */
+static const char*
+selftest_cfb_128 (void)
+{
+  const int nblocks = 16 - 1;
+  const int blocksize = 16;
+  const int context_size = sizeof(SM4_context);
+
+  return _gcry_selftest_helper_cfb("SM4", &sm4_setkey,
+           &sm4_encrypt, nblocks, blocksize, context_size);
+}
+
+static const char *
+sm4_selftest (void)
+{
+  SM4_context ctx;
+  byte scratch[16];
+  const char *r;
+
+  static const byte plaintext[16] = {
+    0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
+    0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
+  };
+  static const byte key[16] = {
+    0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF,
+    0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10,
+  };
+  static const byte ciphertext[16] = {
+    0x68, 0x1E, 0xDF, 0x34, 0xD2, 0x06, 0x96, 0x5E,
+    0x86, 0xB3, 0xE9, 0x4F, 0x53, 0x6E, 0x42, 0x46
+  };
+
+  memset (&ctx, 0, sizeof(ctx));
+
+  sm4_expand_key (&ctx, key);
+  sm4_encrypt (&ctx, scratch, plaintext);
+  if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
+    return "SM4 test encryption failed.";
+  sm4_decrypt (&ctx, scratch, scratch);
+  if (memcmp (scratch, plaintext, sizeof (plaintext)))
+    return "SM4 test decryption failed.";
+
+  if ( (r = selftest_ctr_128 ()) )
+    return r;
+
+  if ( (r = selftest_cbc_128 ()) )
+    return r;
+
+  if ( (r = selftest_cfb_128 ()) )
+    return r;
+
+  return NULL;
+}
+
+static gpg_err_code_t
+run_selftests (int algo, int extended, selftest_report_func_t report)
+{
+  const char *what;
+  const char *errtxt;
+
+  (void)extended;
+
+  if (algo != GCRY_CIPHER_SM4)
+    return GPG_ERR_CIPHER_ALGO;
+
+  what = "selftest";
+  errtxt = sm4_selftest ();
+  if (errtxt)
+    goto failed;
+
+  return 0;
+
+ failed:
+  if (report)
+    report ("cipher", GCRY_CIPHER_SM4, what, errtxt);
+  return GPG_ERR_SELFTEST_FAILED;
+}
+
+
+static const gcry_cipher_oid_spec_t sm4_oids[] =
+  {
+    { "1.2.156.10197.1.104.1", GCRY_CIPHER_MODE_ECB },
+    { "1.2.156.10197.1.104.2", GCRY_CIPHER_MODE_CBC },
+    { "1.2.156.10197.1.104.3", GCRY_CIPHER_MODE_OFB },
+    { "1.2.156.10197.1.104.4", GCRY_CIPHER_MODE_CFB },
+    { "1.2.156.10197.1.104.7", GCRY_CIPHER_MODE_CTR },
+    { NULL }
+  };
+
+gcry_cipher_spec_t _gcry_cipher_spec_sm4 =
+  {
+    GCRY_CIPHER_SM4, {0, 0},
+    "SM4", NULL, sm4_oids, 16, 128,
+    sizeof (SM4_context),
+    sm4_setkey, sm4_encrypt, sm4_decrypt,
+    NULL, NULL,
+    run_selftests
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/stribog.c 
b/grub-core/lib/libgcrypt/cipher/stribog.c
new file mode 100644
index 000000000..21e385ae6
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/stribog.c
@@ -0,0 +1,1362 @@
+/* stribog.c - GOST R 34.11-2012 (Stribog) hash function
+ * Copyright (C) 2013 Dmitry Eremin-Solenikov
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "g10lib.h"
+#include "bithelp.h"
+#include "bufhelp.h"
+#include "cipher.h"
+#include "hash-common.h"
+
+
+typedef struct
+{
+  gcry_md_block_ctx_t bctx;
+  union
+  {
+    u64 h[8];
+    unsigned char result[64];
+  };
+  u64 N[8];
+  u64 Sigma[8];
+} STRIBOG_CONTEXT;
+
+
+/* Pre-computed results of multiplication of bytes on A and reordered with
+   Pi[]. */
+static const u64 stribog_table[8][256] =
+{
+  /* 0 */
+  { U64_C(0xd01f715b5c7ef8e6), U64_C(0x16fa240980778325),
+    U64_C(0xa8a42e857ee049c8), U64_C(0x6ac1068fa186465b),
+    U64_C(0x6e417bd7a2e9320b), U64_C(0x665c8167a437daab),
+    U64_C(0x7666681aa89617f6), U64_C(0x4b959163700bdcf5),
+    U64_C(0xf14be6b78df36248), U64_C(0xc585bd689a625cff),
+    U64_C(0x9557d7fca67d82cb), U64_C(0x89f0b969af6dd366),
+    U64_C(0xb0833d48749f6c35), U64_C(0xa1998c23b1ecbc7c),
+    U64_C(0x8d70c431ac02a736), U64_C(0xd6dfbc2fd0a8b69e),
+    U64_C(0x37aeb3e551fa198b), U64_C(0x0b7d128a40b5cf9c),
+    U64_C(0x5a8f2008b5780cbc), U64_C(0xedec882284e333e5),
+    U64_C(0xd25fc177d3c7c2ce), U64_C(0x5e0f5d50b61778ec),
+    U64_C(0x1d873683c0c24cb9), U64_C(0xad040bcbb45d208c),
+    U64_C(0x2f89a0285b853c76), U64_C(0x5732fff6791b8d58),
+    U64_C(0x3e9311439ef6ec3f), U64_C(0xc9183a809fd3c00f),
+    U64_C(0x83adf3f5260a01ee), U64_C(0xa6791941f4e8ef10),
+    U64_C(0x103ae97d0ca1cd5d), U64_C(0x2ce948121dee1b4a),
+    U64_C(0x39738421dbf2bf53), U64_C(0x093da2a6cf0cf5b4),
+    U64_C(0xcd9847d89cbcb45f), U64_C(0xf9561c078b2d8ae8),
+    U64_C(0x9c6a755a6971777f), U64_C(0xbc1ebaa0712ef0c5),
+    U64_C(0x72e61542abf963a6), U64_C(0x78bb5fde229eb12e),
+    U64_C(0x14ba94250fceb90d), U64_C(0x844d6697630e5282),
+    U64_C(0x98ea08026a1e032f), U64_C(0xf06bbea144217f5c),
+    U64_C(0xdb6263d11ccb377a), U64_C(0x641c314b2b8ee083),
+    U64_C(0x320e96ab9b4770cf), U64_C(0x1ee7deb986a96b85),
+    U64_C(0xe96cf57a878c47b5), U64_C(0xfdd6615f8842feb8),
+    U64_C(0xc83862965601dd1b), U64_C(0x2ea9f83e92572162),
+    U64_C(0xf876441142ff97fc), U64_C(0xeb2c455608357d9d),
+    U64_C(0x5612a7e0b0c9904c), U64_C(0x6c01cbfb2d500823),
+    U64_C(0x4548a6a7fa037a2d), U64_C(0xabc4c6bf388b6ef4),
+    U64_C(0xbade77d4fdf8bebd), U64_C(0x799b07c8eb4cac3a),
+    U64_C(0x0c9d87e805b19cf0), U64_C(0xcb588aac106afa27),
+    U64_C(0xea0c1d40c1e76089), U64_C(0x2869354a1e816f1a),
+    U64_C(0xff96d17307fbc490), U64_C(0x9f0a9d602f1a5043),
+    U64_C(0x96373fc6e016a5f7), U64_C(0x5292dab8b3a6e41c),
+    U64_C(0x9b8ae0382c752413), U64_C(0x4f15ec3b7364a8a5),
+    U64_C(0x3fb349555724f12b), U64_C(0xc7c50d4415db66d7),
+    U64_C(0x92b7429ee379d1a7), U64_C(0xd37f99611a15dfda),
+    U64_C(0x231427c05e34a086), U64_C(0xa439a96d7b51d538),
+    U64_C(0xb403401077f01865), U64_C(0xdda2aea5901d7902),
+    U64_C(0x0a5d4a9c8967d288), U64_C(0xc265280adf660f93),
+    U64_C(0x8bb0094520d4e94e), U64_C(0x2a29856691385532),
+    U64_C(0x42a833c5bf072941), U64_C(0x73c64d54622b7eb2),
+    U64_C(0x07e095624504536c), U64_C(0x8a905153e906f45a),
+    U64_C(0x6f6123c16b3b2f1f), U64_C(0xc6e55552dc097bc3),
+    U64_C(0x4468feb133d16739), U64_C(0xe211e7f0c7398829),
+    U64_C(0xa2f96419f7879b40), U64_C(0x19074bdbc3ad38e9),
+    U64_C(0xf4ebc3f9474e0b0c), U64_C(0x43886bd376d53455),
+    U64_C(0xd8028beb5aa01046), U64_C(0x51f23282f5cdc320),
+    U64_C(0xe7b1c2be0d84e16d), U64_C(0x081dfab006dee8a0),
+    U64_C(0x3b33340d544b857b), U64_C(0x7f5bcabc679ae242),
+    U64_C(0x0edd37c48a08a6d8), U64_C(0x81ed43d9a9b33bc6),
+    U64_C(0xb1a3655ebd4d7121), U64_C(0x69a1eeb5e7ed6167),
+    U64_C(0xf6ab73d5c8f73124), U64_C(0x1a67a3e185c61fd5),
+    U64_C(0x2dc91004d43c065e), U64_C(0x0240b02c8fb93a28),
+    U64_C(0x90f7f2b26cc0eb8f), U64_C(0x3cd3a16f114fd617),
+    U64_C(0xaae49ea9f15973e0), U64_C(0x06c0cd748cd64e78),
+    U64_C(0xda423bc7d5192a6e), U64_C(0xc345701c16b41287),
+    U64_C(0x6d2193ede4821537), U64_C(0xfcf639494190e3ac),
+    U64_C(0x7c3b228621f1c57e), U64_C(0xfb16ac2b0494b0c0),
+    U64_C(0xbf7e529a3745d7f9), U64_C(0x6881b6a32e3f7c73),
+    U64_C(0xca78d2bad9b8e733), U64_C(0xbbfe2fc2342aa3a9),
+    U64_C(0x0dbddffecc6381e4), U64_C(0x70a6a56e2440598e),
+    U64_C(0xe4d12a844befc651), U64_C(0x8c509c2765d0ba22),
+    U64_C(0xee8c6018c28814d9), U64_C(0x17da7c1f49a59e31),
+    U64_C(0x609c4c1328e194d3), U64_C(0xb3e3d57232f44b09),
+    U64_C(0x91d7aaa4a512f69b), U64_C(0x0ffd6fd243dabbcc),
+    U64_C(0x50d26a943c1fde34), U64_C(0x6be15e9968545b4f),
+    U64_C(0x94778fea6faf9fdf), U64_C(0x2b09dd7058ea4826),
+    U64_C(0x677cd9716de5c7bf), U64_C(0x49d5214fffb2e6dd),
+    U64_C(0x0360e83a466b273c), U64_C(0x1fc786af4f7b7691),
+    U64_C(0xa0b9d435783ea168), U64_C(0xd49f0c035f118cb6),
+    U64_C(0x01205816c9d21d14), U64_C(0xac2453dd7d8f3d98),
+    U64_C(0x545217cc3f70aa64), U64_C(0x26b4028e9489c9c2),
+    U64_C(0xdec2469fd6765e3e), U64_C(0x04807d58036f7450),
+    U64_C(0xe5f17292823ddb45), U64_C(0xf30b569b024a5860),
+    U64_C(0x62dcfc3fa758aefb), U64_C(0xe84cad6c4e5e5aa1),
+    U64_C(0xccb81fce556ea94b), U64_C(0x53b282ae7a74f908),
+    U64_C(0x1b47fbf74c1402c1), U64_C(0x368eebf39828049f),
+    U64_C(0x7afbeff2ad278b06), U64_C(0xbe5e0a8cfe97caed),
+    U64_C(0xcfd8f7f413058e77), U64_C(0xf78b2bc301252c30),
+    U64_C(0x4d555c17fcdd928d), U64_C(0x5f2f05467fc565f8),
+    U64_C(0x24f4b2a21b30f3ea), U64_C(0x860dd6bbecb768aa),
+    U64_C(0x4c750401350f8f99), U64_C(0x0000000000000000),
+    U64_C(0xecccd0344d312ef1), U64_C(0xb5231806be220571),
+    U64_C(0xc105c030990d28af), U64_C(0x653c695de25cfd97),
+    U64_C(0x159acc33c61ca419), U64_C(0xb89ec7f872418495),
+    U64_C(0xa9847693b73254dc), U64_C(0x58cf90243ac13694),
+    U64_C(0x59efc832f3132b80), U64_C(0x5c4fed7c39ae42c4),
+    U64_C(0x828dabe3efd81cfa), U64_C(0xd13f294d95ace5f2),
+    U64_C(0x7d1b7a90e823d86a), U64_C(0xb643f03cf849224d),
+    U64_C(0x3df3f979d89dcb03), U64_C(0x7426d836272f2dde),
+    U64_C(0xdfe21e891fa4432a), U64_C(0x3a136c1b9d99986f),
+    U64_C(0xfa36f43dcd46add4), U64_C(0xc025982650df35bb),
+    U64_C(0x856d3e81aadc4f96), U64_C(0xc4a5e57e53b041eb),
+    U64_C(0x4708168b75ba4005), U64_C(0xaf44bbe73be41aa4),
+    U64_C(0x971767d029c4b8e3), U64_C(0xb9be9feebb939981),
+    U64_C(0x215497ecd18d9aae), U64_C(0x316e7e91dd2c57f3),
+    U64_C(0xcef8afe2dad79363), U64_C(0x3853dc371220a247),
+    U64_C(0x35ee03c9de4323a3), U64_C(0xe6919aa8c456fc79),
+    U64_C(0xe05157dc4880b201), U64_C(0x7bdbb7e464f59612),
+    U64_C(0x127a59518318f775), U64_C(0x332ecebd52956ddb),
+    U64_C(0x8f30741d23bb9d1e), U64_C(0xd922d3fd93720d52),
+    U64_C(0x7746300c61440ae2), U64_C(0x25d4eab4d2e2eefe),
+    U64_C(0x75068020eefd30ca), U64_C(0x135a01474acaea61),
+    U64_C(0x304e268714fe4ae7), U64_C(0xa519f17bb283c82c),
+    U64_C(0xdc82f6b359cf6416), U64_C(0x5baf781e7caa11a8),
+    U64_C(0xb2c38d64fb26561d), U64_C(0x34ce5bdf17913eb7),
+    U64_C(0x5d6fb56af07c5fd0), U64_C(0x182713cd0a7f25fd),
+    U64_C(0x9e2ac576e6c84d57), U64_C(0x9aaab82ee5a73907),
+    U64_C(0xa3d93c0f3e558654), U64_C(0x7e7b92aaae48ff56),
+    U64_C(0x872d8ead256575be), U64_C(0x41c8dbfff96c0e7d),
+    U64_C(0x99ca5014a3cc1e3b), U64_C(0x40e883e930be1369),
+    U64_C(0x1ca76e95091051ad), U64_C(0x4e35b42dbab6b5b1),
+    U64_C(0x05a0254ecabd6944), U64_C(0xe1710fca8152af15),
+    U64_C(0xf22b0e8dcb984574), U64_C(0xb763a82a319b3f59),
+    U64_C(0x63fca4296e8ab3ef), U64_C(0x9d4a2d4ca0a36a6b),
+    U64_C(0xe331bfe60eeb953d), U64_C(0xd5bf541596c391a2),
+    U64_C(0xf5cb9bef8e9c1618), U64_C(0x46284e9dbc685d11),
+    U64_C(0x2074cffa185f87ba), U64_C(0xbd3ee2b6b8fcedd1),
+    U64_C(0xae64e3f1f23607b0), U64_C(0xfeb68965ce29d984),
+    U64_C(0x55724fdaf6a2b770), U64_C(0x29496d5cd753720e),
+    U64_C(0xa75941573d3af204), U64_C(0x8e102c0bea69800a),
+    U64_C(0x111ab16bc573d049), U64_C(0xd7ffe439197aab8a),
+    U64_C(0xefac380e0b5a09cd), U64_C(0x48f579593660fbc9),
+    U64_C(0x22347fd697e6bd92), U64_C(0x61bc1405e13389c7),
+    U64_C(0x4ab5c975b9d9c1e1), U64_C(0x80cd1bcf606126d2),
+    U64_C(0x7186fd78ed92449a), U64_C(0x93971a882aabccb3),
+    U64_C(0x88d0e17f66bfce72), U64_C(0x27945a985d5bd4d6) },
+  /* 1 */
+  { U64_C(0xde553f8c05a811c8), U64_C(0x1906b59631b4f565),
+    U64_C(0x436e70d6b1964ff7), U64_C(0x36d343cb8b1e9d85),
+    U64_C(0x843dfacc858aab5a), U64_C(0xfdfc95c299bfc7f9),
+    U64_C(0x0f634bdea1d51fa2), U64_C(0x6d458b3b76efb3cd),
+    U64_C(0x85c3f77cf8593f80), U64_C(0x3c91315fbe737cb2),
+    U64_C(0x2148b03366ace398), U64_C(0x18f8b8264c6761bf),
+    U64_C(0xc830c1c495c9fb0f), U64_C(0x981a76102086a0aa),
+    U64_C(0xaa16012142f35760), U64_C(0x35cc54060c763cf6),
+    U64_C(0x42907d66cc45db2d), U64_C(0x8203d44b965af4bc),
+    U64_C(0x3d6f3cefc3a0e868), U64_C(0xbc73ff69d292bda7),
+    U64_C(0x8722ed0102e20a29), U64_C(0x8f8185e8cd34deb7),
+    U64_C(0x9b0561dda7ee01d9), U64_C(0x5335a0193227fad6),
+    U64_C(0xc9cecc74e81a6fd5), U64_C(0x54f5832e5c2431ea),
+    U64_C(0x99e47ba05d553470), U64_C(0xf7bee756acd226ce),
+    U64_C(0x384e05a5571816fd), U64_C(0xd1367452a47d0e6a),
+    U64_C(0xf29fde1c386ad85b), U64_C(0x320c77316275f7ca),
+    U64_C(0xd0c879e2d9ae9ab0), U64_C(0xdb7406c69110ef5d),
+    U64_C(0x45505e51a2461011), U64_C(0xfc029872e46c5323),
+    U64_C(0xfa3cb6f5f7bc0cc5), U64_C(0x031f17cd8768a173),
+    U64_C(0xbd8df2d9af41297d), U64_C(0x9d3b4f5ab43e5e3f),
+    U64_C(0x4071671b36feee84), U64_C(0x716207e7d3e3b83d),
+    U64_C(0x48d20ff2f9283a1a), U64_C(0x27769eb4757cbc7e),
+    U64_C(0x5c56ebc793f2e574), U64_C(0xa48b474f9ef5dc18),
+    U64_C(0x52cbada94ff46e0c), U64_C(0x60c7da982d8199c6),
+    U64_C(0x0e9d466edc068b78), U64_C(0x4eec2175eaf865fc),
+    U64_C(0x550b8e9e21f7a530), U64_C(0x6b7ba5bc653fec2b),
+    U64_C(0x5eb7f1ba6949d0dd), U64_C(0x57ea94e3db4c9099),
+    U64_C(0xf640eae6d101b214), U64_C(0xdd4a284182c0b0bb),
+    U64_C(0xff1d8fbf6304f250), U64_C(0xb8accb933bf9d7e8),
+    U64_C(0xe8867c478eb68c4d), U64_C(0x3f8e2692391bddc1),
+    U64_C(0xcb2fd60912a15a7c), U64_C(0xaec935dbab983d2f),
+    U64_C(0xf55ffd2b56691367), U64_C(0x80e2ce366ce1c115),
+    U64_C(0x179bf3f8edb27e1d), U64_C(0x01fe0db07dd394da),
+    U64_C(0xda8a0b76ecc37b87), U64_C(0x44ae53e1df9584cb),
+    U64_C(0xb310b4b77347a205), U64_C(0xdfab323c787b8512),
+    U64_C(0x3b511268d070b78e), U64_C(0x65e6e3d2b9396753),
+    U64_C(0x6864b271e2574d58), U64_C(0x259784c98fc789d7),
+    U64_C(0x02e11a7dfabb35a9), U64_C(0x8841a6dfa337158b),
+    U64_C(0x7ade78c39b5dcdd0), U64_C(0xb7cf804d9a2cc84a),
+    U64_C(0x20b6bd831b7f7742), U64_C(0x75bd331d3a88d272),
+    U64_C(0x418f6aab4b2d7a5e), U64_C(0xd9951cbb6babdaf4),
+    U64_C(0xb6318dfde7ff5c90), U64_C(0x1f389b112264aa83),
+    U64_C(0x492c024284fbaec0), U64_C(0xe33a0363c608f9a0),
+    U64_C(0x2688930408af28a4), U64_C(0xc7538a1a341ce4ad),
+    U64_C(0x5da8e677ee2171ae), U64_C(0x8c9e92254a5c7fc4),
+    U64_C(0x63d8cd55aae938b5), U64_C(0x29ebd8daa97a3706),
+    U64_C(0x959827b37be88aa1), U64_C(0x1484e4356adadf6e),
+    U64_C(0xa7945082199d7d6b), U64_C(0xbf6ce8a455fa1cd4),
+    U64_C(0x9cc542eac9edcae5), U64_C(0x79c16f0e1c356ca3),
+    U64_C(0x89bfab6fdee48151), U64_C(0xd4174d1830c5f0ff),
+    U64_C(0x9258048415eb419d), U64_C(0x6139d72850520d1c),
+    U64_C(0x6a85a80c18ec78f1), U64_C(0xcd11f88e0171059a),
+    U64_C(0xcceff53e7ca29140), U64_C(0xd229639f2315af19),
+    U64_C(0x90b91ef9ef507434), U64_C(0x5977d28d074a1be1),
+    U64_C(0x311360fce51d56b9), U64_C(0xc093a92d5a1f2f91),
+    U64_C(0x1a19a25bb6dc5416), U64_C(0xeb996b8a09de2d3e),
+    U64_C(0xfee3820f1ed7668a), U64_C(0xd7085ad5b7ad518c),
+    U64_C(0x7fff41890fe53345), U64_C(0xec5948bd67dde602),
+    U64_C(0x2fd5f65dbaaa68e0), U64_C(0xa5754affe32648c2),
+    U64_C(0xf8ddac880d07396c), U64_C(0x6fa491468c548664),
+    U64_C(0x0c7c5c1326bdbed1), U64_C(0x4a33158f03930fb3),
+    U64_C(0x699abfc19f84d982), U64_C(0xe4fa2054a80b329c),
+    U64_C(0x6707f9af438252fa), U64_C(0x08a368e9cfd6d49e),
+    U64_C(0x47b1442c58fd25b8), U64_C(0xbbb3dc5ebc91769b),
+    U64_C(0x1665fe489061eac7), U64_C(0x33f27a811fa66310),
+    U64_C(0x93a609346838d547), U64_C(0x30ed6d4c98cec263),
+    U64_C(0x1dd9816cd8df9f2a), U64_C(0x94662a03063b1e7b),
+    U64_C(0x83fdd9fbeb896066), U64_C(0x7b207573e68e590a),
+    U64_C(0x5f49fc0a149a4407), U64_C(0x343259b671a5a82c),
+    U64_C(0xfbc2bb458a6f981f), U64_C(0xc272b350a0a41a38),
+    U64_C(0x3aaf1fd8ada32354), U64_C(0x6cbb868b0b3c2717),
+    U64_C(0xa2b569c88d2583fe), U64_C(0xf180c9d1bf027928),
+    U64_C(0xaf37386bd64ba9f5), U64_C(0x12bacab2790a8088),
+    U64_C(0x4c0d3b0810435055), U64_C(0xb2eeb9070e9436df),
+    U64_C(0xc5b29067cea7d104), U64_C(0xdcb425f1ff132461),
+    U64_C(0x4f122cc5972bf126), U64_C(0xac282fa651230886),
+    U64_C(0xe7e537992f6393ef), U64_C(0xe61b3a2952b00735),
+    U64_C(0x709c0a57ae302ce7), U64_C(0xe02514ae416058d3),
+    U64_C(0xc44c9dd7b37445de), U64_C(0x5a68c5408022ba92),
+    U64_C(0x1c278cdca50c0bf0), U64_C(0x6e5a9cf6f18712be),
+    U64_C(0x86dce0b17f319ef3), U64_C(0x2d34ec2040115d49),
+    U64_C(0x4bcd183f7e409b69), U64_C(0x2815d56ad4a9a3dc),
+    U64_C(0x24698979f2141d0d), U64_C(0x0000000000000000),
+    U64_C(0x1ec696a15fb73e59), U64_C(0xd86b110b16784e2e),
+    U64_C(0x8e7f8858b0e74a6d), U64_C(0x063e2e8713d05fe6),
+    U64_C(0xe2c40ed3bbdb6d7a), U64_C(0xb1f1aeca89fc97ac),
+    U64_C(0xe1db191e3cb3cc09), U64_C(0x6418ee62c4eaf389),
+    U64_C(0xc6ad87aa49cf7077), U64_C(0xd6f65765ca7ec556),
+    U64_C(0x9afb6c6dda3d9503), U64_C(0x7ce05644888d9236),
+    U64_C(0x8d609f95378feb1e), U64_C(0x23a9aa4e9c17d631),
+    U64_C(0x6226c0e5d73aac6f), U64_C(0x56149953a69f0443),
+    U64_C(0xeeb852c09d66d3ab), U64_C(0x2b0ac2a753c102af),
+    U64_C(0x07c023376e03cb3c), U64_C(0x2ccae1903dc2c993),
+    U64_C(0xd3d76e2f5ec63bc3), U64_C(0x9e2458973356ff4c),
+    U64_C(0xa66a5d32644ee9b1), U64_C(0x0a427294356de137),
+    U64_C(0x783f62be61e6f879), U64_C(0x1344c70204d91452),
+    U64_C(0x5b96c8f0fdf12e48), U64_C(0xa90916ecc59bf613),
+    U64_C(0xbe92e5142829880e), U64_C(0x727d102a548b194e),
+    U64_C(0x1be7afebcb0fc0cc), U64_C(0x3e702b2244c8491b),
+    U64_C(0xd5e940a84d166425), U64_C(0x66f9f41f3e51c620),
+    U64_C(0xabe80c913f20c3ba), U64_C(0xf07ec461c2d1edf2),
+    U64_C(0xf361d3ac45b94c81), U64_C(0x0521394a94b8fe95),
+    U64_C(0xadd622162cf09c5c), U64_C(0xe97871f7f3651897),
+    U64_C(0xf4a1f09b2bba87bd), U64_C(0x095d6559b2054044),
+    U64_C(0x0bbc7f2448be75ed), U64_C(0x2af4cf172e129675),
+    U64_C(0x157ae98517094bb4), U64_C(0x9fda55274e856b96),
+    U64_C(0x914713499283e0ee), U64_C(0xb952c623462a4332),
+    U64_C(0x74433ead475b46a8), U64_C(0x8b5eb112245fb4f8),
+    U64_C(0xa34b6478f0f61724), U64_C(0x11a5dd7ffe6221fb),
+    U64_C(0xc16da49d27ccbb4b), U64_C(0x76a224d0bde07301),
+    U64_C(0x8aa0bca2598c2022), U64_C(0x4df336b86d90c48f),
+    U64_C(0xea67663a740db9e4), U64_C(0xef465f70e0b54771),
+    U64_C(0x39b008152acb8227), U64_C(0x7d1e5bf4f55e06ec),
+    U64_C(0x105bd0cf83b1b521), U64_C(0x775c2960c033e7db),
+    U64_C(0x7e014c397236a79f), U64_C(0x811cc386113255cf),
+    U64_C(0xeda7450d1a0e72d8), U64_C(0x5889df3d7a998f3b),
+    U64_C(0x2e2bfbedc779fc3a), U64_C(0xce0eef438619a4e9),
+    U64_C(0x372d4e7bf6cd095f), U64_C(0x04df34fae96b6a4f),
+    U64_C(0xf923a13870d4adb6), U64_C(0xa1aa7e050a4d228d),
+    U64_C(0xa8f71b5cb84862c9), U64_C(0xb52e9a306097fde3),
+    U64_C(0x0d8251a35b6e2a0b), U64_C(0x2257a7fee1c442eb),
+    U64_C(0x73831d9a29588d94), U64_C(0x51d4ba64c89ccf7f),
+    U64_C(0x502ab7d4b54f5ba5), U64_C(0x97793dce8153bf08),
+    U64_C(0xe5042de4d5d8a646), U64_C(0x9687307efc802bd2),
+    U64_C(0xa05473b5779eb657), U64_C(0xb4d097801d446939),
+    U64_C(0xcff0e2f3fbca3033), U64_C(0xc38cbee0dd778ee2),
+    U64_C(0x464f499c252eb162), U64_C(0xcad1dbb96f72cea6),
+    U64_C(0xba4dd1eec142e241), U64_C(0xb00fa37af42f0376) },
+  /* 2 */
+  { U64_C(0xcce4cd3aa968b245), U64_C(0x089d5484e80b7faf),
+    U64_C(0x638246c1b3548304), U64_C(0xd2fe0ec8c2355492),
+    U64_C(0xa7fbdf7ff2374eee), U64_C(0x4df1600c92337a16),
+    U64_C(0x84e503ea523b12fb), U64_C(0x0790bbfd53ab0c4a),
+    U64_C(0x198a780f38f6ea9d), U64_C(0x2ab30c8f55ec48cb),
+    U64_C(0xe0f7fed6b2c49db5), U64_C(0xb6ecf3f422cadbdc),
+    U64_C(0x409c9a541358df11), U64_C(0xd3ce8a56dfde3fe3),
+    U64_C(0xc3e9224312c8c1a0), U64_C(0x0d6dfa58816ba507),
+    U64_C(0xddf3e1b179952777), U64_C(0x04c02a42748bb1d9),
+    U64_C(0x94c2abff9f2decb8), U64_C(0x4f91752da8f8acf4),
+    U64_C(0x78682befb169bf7b), U64_C(0xe1c77a48af2ff6c4),
+    U64_C(0x0c5d7ec69c80ce76), U64_C(0x4cc1e4928fd81167),
+    U64_C(0xfeed3d24d9997b62), U64_C(0x518bb6dfc3a54a23),
+    U64_C(0x6dbf2d26151f9b90), U64_C(0xb5bc624b05ea664f),
+    U64_C(0xe86aaa525acfe21a), U64_C(0x4801ced0fb53a0be),
+    U64_C(0xc91463e6c00868ed), U64_C(0x1027a815cd16fe43),
+    U64_C(0xf67069a0319204cd), U64_C(0xb04ccc976c8abce7),
+    U64_C(0xc0b9b3fc35e87c33), U64_C(0xf380c77c58f2de65),
+    U64_C(0x50bb3241de4e2152), U64_C(0xdf93f490435ef195),
+    U64_C(0xf1e0d25d62390887), U64_C(0xaf668bfb1a3c3141),
+    U64_C(0xbc11b251f00a7291), U64_C(0x73a5eed47e427d47),
+    U64_C(0x25bee3f6ee4c3b2e), U64_C(0x43cc0beb34786282),
+    U64_C(0xc824e778dde3039c), U64_C(0xf97d86d98a327728),
+    U64_C(0xf2b043e24519b514), U64_C(0xe297ebf7880f4b57),
+    U64_C(0x3a94a49a98fab688), U64_C(0x868516cb68f0c419),
+    U64_C(0xeffa11af0964ee50), U64_C(0xa4ab4ec0d517f37d),
+    U64_C(0xa9c6b498547c567a), U64_C(0x8e18424f80fbbbb6),
+    U64_C(0x0bcdc53bcf2bc23c), U64_C(0x137739aaea3643d0),
+    U64_C(0x2c1333ec1bac2ff0), U64_C(0x8d48d3f0a7db0625),
+    U64_C(0x1e1ac3f26b5de6d7), U64_C(0xf520f81f16b2b95e),
+    U64_C(0x9f0f6ec450062e84), U64_C(0x0130849e1deb6b71),
+    U64_C(0xd45e31ab8c7533a9), U64_C(0x652279a2fd14e43f),
+    U64_C(0x3209f01e70f1c927), U64_C(0xbe71a770cac1a473),
+    U64_C(0x0e3d6be7a64b1894), U64_C(0x7ec8148cff29d840),
+    U64_C(0xcb7476c7fac3be0f), U64_C(0x72956a4a63a91636),
+    U64_C(0x37f95ec21991138f), U64_C(0x9e3fea5a4ded45f5),
+    U64_C(0x7b38ba50964902e8), U64_C(0x222e580bbde73764),
+    U64_C(0x61e253e0899f55e6), U64_C(0xfc8d2805e352ad80),
+    U64_C(0x35994be3235ac56d), U64_C(0x09add01af5e014de),
+    U64_C(0x5e8659a6780539c6), U64_C(0xb17c48097161d796),
+    U64_C(0x026015213acbd6e2), U64_C(0xd1ae9f77e515e901),
+    U64_C(0xb7dc776a3f21b0ad), U64_C(0xaba6a1b96eb78098),
+    U64_C(0x9bcf4486248d9f5d), U64_C(0x582666c536455efd),
+    U64_C(0xfdbdac9bfeb9c6f1), U64_C(0xc47999be4163cdea),
+    U64_C(0x765540081722a7ef), U64_C(0x3e548ed8ec710751),
+    U64_C(0x3d041f67cb51bac2), U64_C(0x7958af71ac82d40a),
+    U64_C(0x36c9da5c047a78fe), U64_C(0xed9a048e33af38b2),
+    U64_C(0x26ee7249c96c86bd), U64_C(0x900281bdeba65d61),
+    U64_C(0x11172c8bd0fd9532), U64_C(0xea0abf73600434f8),
+    U64_C(0x42fc8f75299309f3), U64_C(0x34a9cf7d3eb1ae1c),
+    U64_C(0x2b838811480723ba), U64_C(0x5ce64c8742ceef24),
+    U64_C(0x1adae9b01fd6570e), U64_C(0x3c349bf9d6bad1b3),
+    U64_C(0x82453c891c7b75c0), U64_C(0x97923a40b80d512b),
+    U64_C(0x4a61dbf1c198765c), U64_C(0xb48ce6d518010d3e),
+    U64_C(0xcfb45c858e480fd6), U64_C(0xd933cbf30d1e96ae),
+    U64_C(0xd70ea014ab558e3a), U64_C(0xc189376228031742),
+    U64_C(0x9262949cd16d8b83), U64_C(0xeb3a3bed7def5f89),
+    U64_C(0x49314a4ee6b8cbcf), U64_C(0xdcc3652f647e4c06),
+    U64_C(0xda635a4c2a3e2b3d), U64_C(0x470c21a940f3d35b),
+    U64_C(0x315961a157d174b4), U64_C(0x6672e81dda3459ac),
+    U64_C(0x5b76f77a1165e36e), U64_C(0x445cb01667d36ec8),
+    U64_C(0xc5491d205c88a69b), U64_C(0x456c34887a3805b9),
+    U64_C(0xffddb9bac4721013), U64_C(0x99af51a71e4649bf),
+    U64_C(0xa15be01cbc7729d5), U64_C(0x52db2760e485f7b0),
+    U64_C(0x8c78576eba306d54), U64_C(0xae560f6507d75a30),
+    U64_C(0x95f22f6182c687c9), U64_C(0x71c5fbf54489aba5),
+    U64_C(0xca44f259e728d57e), U64_C(0x88b87d2ccebbdc8d),
+    U64_C(0xbab18d32be4a15aa), U64_C(0x8be8ec93e99b611e),
+    U64_C(0x17b713e89ebdf209), U64_C(0xb31c5d284baa0174),
+    U64_C(0xeeca9531148f8521), U64_C(0xb8d198138481c348),
+    U64_C(0x8988f9b2d350b7fc), U64_C(0xb9e11c8d996aa839),
+    U64_C(0x5a4673e40c8e881f), U64_C(0x1687977683569978),
+    U64_C(0xbf4123eed72acf02), U64_C(0x4ea1f1b3b513c785),
+    U64_C(0xe767452be16f91ff), U64_C(0x7505d1b730021a7c),
+    U64_C(0xa59bca5ec8fc980c), U64_C(0xad069eda20f7e7a3),
+    U64_C(0x38f4b1bba231606a), U64_C(0x60d2d77e94743e97),
+    U64_C(0x9affc0183966f42c), U64_C(0x248e6768f3a7505f),
+    U64_C(0xcdd449a4b483d934), U64_C(0x87b59255751baf68),
+    U64_C(0x1bea6d2e023d3c7f), U64_C(0x6b1f12455b5ffcab),
+    U64_C(0x743555292de9710d), U64_C(0xd8034f6d10f5fddf),
+    U64_C(0xc6198c9f7ba81b08), U64_C(0xbb8109aca3a17edb),
+    U64_C(0xfa2d1766ad12cabb), U64_C(0xc729080166437079),
+    U64_C(0x9c5fff7b77269317), U64_C(0x0000000000000000),
+    U64_C(0x15d706c9a47624eb), U64_C(0x6fdf38072fd44d72),
+    U64_C(0x5fb6dd3865ee52b7), U64_C(0xa33bf53d86bcff37),
+    U64_C(0xe657c1b5fc84fa8e), U64_C(0xaa962527735cebe9),
+    U64_C(0x39c43525bfda0b1b), U64_C(0x204e4d2a872ce186),
+    U64_C(0x7a083ece8ba26999), U64_C(0x554b9c9db72efbfa),
+    U64_C(0xb22cd9b656416a05), U64_C(0x96a2bedea5e63a5a),
+    U64_C(0x802529a826b0a322), U64_C(0x8115ad363b5bc853),
+    U64_C(0x8375b81701901eb1), U64_C(0x3069e53f4a3a1fc5),
+    U64_C(0xbd2136cfede119e0), U64_C(0x18bafc91251d81ec),
+    U64_C(0x1d4a524d4c7d5b44), U64_C(0x05f0aedc6960daa8),
+    U64_C(0x29e39d3072ccf558), U64_C(0x70f57f6b5962c0d4),
+    U64_C(0x989fd53903ad22ce), U64_C(0xf84d024797d91c59),
+    U64_C(0x547b1803aac5908b), U64_C(0xf0d056c37fd263f6),
+    U64_C(0xd56eb535919e58d8), U64_C(0x1c7ad6d351963035),
+    U64_C(0x2e7326cd2167f912), U64_C(0xac361a443d1c8cd2),
+    U64_C(0x697f076461942a49), U64_C(0x4b515f6fdc731d2d),
+    U64_C(0x8ad8680df4700a6f), U64_C(0x41ac1eca0eb3b460),
+    U64_C(0x7d988533d80965d3), U64_C(0xa8f6300649973d0b),
+    U64_C(0x7765c4960ac9cc9e), U64_C(0x7ca801adc5e20ea2),
+    U64_C(0xdea3700e5eb59ae4), U64_C(0xa06b6482a19c42a4),
+    U64_C(0x6a2f96db46b497da), U64_C(0x27def6d7d487edcc),
+    U64_C(0x463ca5375d18b82a), U64_C(0xa6cb5be1efdc259f),
+    U64_C(0x53eba3fef96e9cc1), U64_C(0xce84d81b93a364a7),
+    U64_C(0xf4107c810b59d22f), U64_C(0x333974806d1aa256),
+    U64_C(0x0f0def79bba073e5), U64_C(0x231edc95a00c5c15),
+    U64_C(0xe437d494c64f2c6c), U64_C(0x91320523f64d3610),
+    U64_C(0x67426c83c7df32dd), U64_C(0x6eefbc99323f2603),
+    U64_C(0x9d6f7be56acdf866), U64_C(0x5916e25b2bae358c),
+    U64_C(0x7ff89012e2c2b331), U64_C(0x035091bf2720bd93),
+    U64_C(0x561b0d22900e4669), U64_C(0x28d319ae6f279e29),
+    U64_C(0x2f43a2533c8c9263), U64_C(0xd09e1be9f8fe8270),
+    U64_C(0xf740ed3e2c796fbc), U64_C(0xdb53ded237d5404c),
+    U64_C(0x62b2c25faebfe875), U64_C(0x0afd41a5d2c0a94d),
+    U64_C(0x6412fd3ce0ff8f4e), U64_C(0xe3a76f6995e42026),
+    U64_C(0x6c8fa9b808f4f0e1), U64_C(0xc2d9a6dd0f23aad1),
+    U64_C(0x8f28c6d19d10d0c7), U64_C(0x85d587744fd0798a),
+    U64_C(0xa20b71a39b579446), U64_C(0x684f83fa7c7f4138),
+    U64_C(0xe507500adba4471d), U64_C(0x3f640a46f19a6c20),
+    U64_C(0x1247bd34f7dd28a1), U64_C(0x2d23b77206474481),
+    U64_C(0x93521002cc86e0f2), U64_C(0x572b89bc8de52d18),
+    U64_C(0xfb1d93f8b0f9a1ca), U64_C(0xe95a2ecc4724896b),
+    U64_C(0x3ba420048511ddf9), U64_C(0xd63e248ab6bee54b),
+    U64_C(0x5dd6c8195f258455), U64_C(0x06a03f634e40673b),
+    U64_C(0x1f2a476c76b68da6), U64_C(0x217ec9b49ac78af7),
+    U64_C(0xecaa80102e4453c3), U64_C(0x14e78257b99d4f9a) },
+  /* 3 */
+  { U64_C(0x20329b2cc87bba05), U64_C(0x4f5eb6f86546a531),
+    U64_C(0xd4f44775f751b6b1), U64_C(0x8266a47b850dfa8b),
+    U64_C(0xbb986aa15a6ca985), U64_C(0xc979eb08f9ae0f99),
+    U64_C(0x2da6f447a2375ea1), U64_C(0x1e74275dcd7d8576),
+    U64_C(0xbc20180a800bc5f8), U64_C(0xb4a2f701b2dc65be),
+    U64_C(0xe726946f981b6d66), U64_C(0x48e6c453bf21c94c),
+    U64_C(0x42cad9930f0a4195), U64_C(0xefa47b64aacccd20),
+    U64_C(0x71180a8960409a42), U64_C(0x8bb3329bf6a44e0c),
+    U64_C(0xd34c35de2d36dacc), U64_C(0xa92f5b7cbc23dc96),
+    U64_C(0xb31a85aa68bb09c3), U64_C(0x13e04836a73161d2),
+    U64_C(0xb24dfc4129c51d02), U64_C(0x8ae44b70b7da5acd),
+    U64_C(0xe671ed84d96579a7), U64_C(0xa4bb3417d66f3832),
+    U64_C(0x4572ab38d56d2de8), U64_C(0xb1b47761ea47215c),
+    U64_C(0xe81c09cf70aba15d), U64_C(0xffbdb872ce7f90ac),
+    U64_C(0xa8782297fd5dc857), U64_C(0x0d946f6b6a4ce4a4),
+    U64_C(0xe4df1f4f5b995138), U64_C(0x9ebc71edca8c5762),
+    U64_C(0x0a2c1dc0b02b88d9), U64_C(0x3b503c115d9d7b91),
+    U64_C(0xc64376a8111ec3a2), U64_C(0xcec199a323c963e4),
+    U64_C(0xdc76a87ec58616f7), U64_C(0x09d596e073a9b487),
+    U64_C(0x14583a9d7d560daf), U64_C(0xf4c6dc593f2a0cb4),
+    U64_C(0xdd21d19584f80236), U64_C(0x4a4836983ddde1d3),
+    U64_C(0xe58866a41ae745f9), U64_C(0xf591a5b27e541875),
+    U64_C(0x891dc05074586693), U64_C(0x5b068c651810a89e),
+    U64_C(0xa30346bc0c08544f), U64_C(0x3dbf3751c684032d),
+    U64_C(0x2a1e86ec785032dc), U64_C(0xf73f5779fca830ea),
+    U64_C(0xb60c05ca30204d21), U64_C(0x0cc316802b32f065),
+    U64_C(0x8770241bdd96be69), U64_C(0xb861e18199ee95db),
+    U64_C(0xf805cad91418fcd1), U64_C(0x29e70dccbbd20e82),
+    U64_C(0xc7140f435060d763), U64_C(0x0f3a9da0e8b0cc3b),
+    U64_C(0xa2543f574d76408e), U64_C(0xbd7761e1c175d139),
+    U64_C(0x4b1f4f737ca3f512), U64_C(0x6dc2df1f2fc137ab),
+    U64_C(0xf1d05c3967b14856), U64_C(0xa742bf3715ed046c),
+    U64_C(0x654030141d1697ed), U64_C(0x07b872abda676c7d),
+    U64_C(0x3ce84eba87fa17ec), U64_C(0xc1fb0403cb79afdf),
+    U64_C(0x3e46bc7105063f73), U64_C(0x278ae987121cd678),
+    U64_C(0xa1adb4778ef47cd0), U64_C(0x26dd906c5362c2b9),
+    U64_C(0x05168060589b44e2), U64_C(0xfbfc41f9d79ac08f),
+    U64_C(0x0e6de44ba9ced8fa), U64_C(0x9feb08068bf243a3),
+    U64_C(0x7b341749d06b129b), U64_C(0x229c69e74a87929a),
+    U64_C(0xe09ee6c4427c011b), U64_C(0x5692e30e725c4c3a),
+    U64_C(0xda99a33e5e9f6e4b), U64_C(0x353dd85af453a36b),
+    U64_C(0x25241b4c90e0fee7), U64_C(0x5de987258309d022),
+    U64_C(0xe230140fc0802984), U64_C(0x93281e86a0c0b3c6),
+    U64_C(0xf229d719a4337408), U64_C(0x6f6c2dd4ad3d1f34),
+    U64_C(0x8ea5b2fbae3f0aee), U64_C(0x8331dd90c473ee4a),
+    U64_C(0x346aa1b1b52db7aa), U64_C(0xdf8f235e06042aa9),
+    U64_C(0xcc6f6b68a1354b7b), U64_C(0x6c95a6f46ebf236a),
+    U64_C(0x52d31a856bb91c19), U64_C(0x1a35ded6d498d555),
+    U64_C(0xf37eaef2e54d60c9), U64_C(0x72e181a9a3c2a61c),
+    U64_C(0x98537aad51952fde), U64_C(0x16f6c856ffaa2530),
+    U64_C(0xd960281e9d1d5215), U64_C(0x3a0745fa1ce36f50),
+    U64_C(0x0b7b642bf1559c18), U64_C(0x59a87eae9aec8001),
+    U64_C(0x5e100c05408bec7c), U64_C(0x0441f98b19e55023),
+    U64_C(0xd70dcc5534d38aef), U64_C(0x927f676de1bea707),
+    U64_C(0x9769e70db925e3e5), U64_C(0x7a636ea29115065a),
+    U64_C(0x468b201816ef11b6), U64_C(0xab81a9b73edff409),
+    U64_C(0xc0ac7de88a07bb1e), U64_C(0x1f235eb68c0391b7),
+    U64_C(0x6056b074458dd30f), U64_C(0xbe8eeac102f7ed67),
+    U64_C(0xcd381283e04b5fba), U64_C(0x5cbefecec277c4e3),
+    U64_C(0xd21b4c356c48ce0d), U64_C(0x1019c31664b35d8c),
+    U64_C(0x247362a7d19eea26), U64_C(0xebe582efb3299d03),
+    U64_C(0x02aef2cb82fc289f), U64_C(0x86275df09ce8aaa8),
+    U64_C(0x28b07427faac1a43), U64_C(0x38a9b7319e1f47cf),
+    U64_C(0xc82e92e3b8d01b58), U64_C(0x06ef0b409b1978bc),
+    U64_C(0x62f842bfc771fb90), U64_C(0x9904034610eb3b1f),
+    U64_C(0xded85ab5477a3e68), U64_C(0x90d195a663428f98),
+    U64_C(0x5384636e2ac708d8), U64_C(0xcbd719c37b522706),
+    U64_C(0xae9729d76644b0eb), U64_C(0x7c8c65e20a0c7ee6),
+    U64_C(0x80c856b007f1d214), U64_C(0x8c0b40302cc32271),
+    U64_C(0xdbcedad51fe17a8a), U64_C(0x740e8ae938dbdea0),
+    U64_C(0xa615c6dc549310ad), U64_C(0x19cc55f6171ae90b),
+    U64_C(0x49b1bdb8fe5fdd8d), U64_C(0xed0a89af2830e5bf),
+    U64_C(0x6a7aadb4f5a65bd6), U64_C(0x7e22972988f05679),
+    U64_C(0xf952b3325566e810), U64_C(0x39fecedadf61530e),
+    U64_C(0x6101c99f04f3c7ce), U64_C(0x2e5f7f6761b562ff),
+    U64_C(0xf08725d226cf5c97), U64_C(0x63af3b54860fef51),
+    U64_C(0x8ff2cb10ef411e2f), U64_C(0x884ab9bb35267252),
+    U64_C(0x4df04433e7ba8dae), U64_C(0x9afd8866d3690741),
+    U64_C(0x66b9bb34de94abb3), U64_C(0x9baaf18d92171380),
+    U64_C(0x543c11c5f0a064a5), U64_C(0x17a1b1bdbed431f1),
+    U64_C(0xb5f58eeaf3a2717f), U64_C(0xc355f6c849858740),
+    U64_C(0xec5df044694ef17e), U64_C(0xd83751f5dc6346d4),
+    U64_C(0xfc4433520dfdacf2), U64_C(0x0000000000000000),
+    U64_C(0x5a51f58e596ebc5f), U64_C(0x3285aaf12e34cf16),
+    U64_C(0x8d5c39db6dbd36b0), U64_C(0x12b731dde64f7513),
+    U64_C(0x94906c2d7aa7dfbb), U64_C(0x302b583aacc8e789),
+    U64_C(0x9d45facd090e6b3c), U64_C(0x2165e2c78905aec4),
+    U64_C(0x68d45f7f775a7349), U64_C(0x189b2c1d5664fdca),
+    U64_C(0xe1c99f2f030215da), U64_C(0x6983269436246788),
+    U64_C(0x8489af3b1e148237), U64_C(0xe94b702431d5b59c),
+    U64_C(0x33d2d31a6f4adbd7), U64_C(0xbfd9932a4389f9a6),
+    U64_C(0xb0e30e8aab39359d), U64_C(0xd1e2c715afcaf253),
+    U64_C(0x150f43763c28196e), U64_C(0xc4ed846393e2eb3d),
+    U64_C(0x03f98b20c3823c5e), U64_C(0xfd134ab94c83b833),
+    U64_C(0x556b682eb1de7064), U64_C(0x36c4537a37d19f35),
+    U64_C(0x7559f30279a5ca61), U64_C(0x799ae58252973a04),
+    U64_C(0x9c12832648707ffd), U64_C(0x78cd9c6913e92ec5),
+    U64_C(0x1d8dac7d0effb928), U64_C(0x439da0784e745554),
+    U64_C(0x413352b3cc887dcb), U64_C(0xbacf134a1b12bd44),
+    U64_C(0x114ebafd25cd494d), U64_C(0x2f08068c20cb763e),
+    U64_C(0x76a07822ba27f63f), U64_C(0xeab2fb04f25789c2),
+    U64_C(0xe3676de481fe3d45), U64_C(0x1b62a73d95e6c194),
+    U64_C(0x641749ff5c68832c), U64_C(0xa5ec4dfc97112cf3),
+    U64_C(0xf6682e92bdd6242b), U64_C(0x3f11c59a44782bb2),
+    U64_C(0x317c21d1edb6f348), U64_C(0xd65ab5be75ad9e2e),
+    U64_C(0x6b2dd45fb4d84f17), U64_C(0xfaab381296e4d44e),
+    U64_C(0xd0b5befeeeb4e692), U64_C(0x0882ef0b32d7a046),
+    U64_C(0x512a91a5a83b2047), U64_C(0x963e9ee6f85bf724),
+    U64_C(0x4e09cf132438b1f0), U64_C(0x77f701c9fb59e2fe),
+    U64_C(0x7ddb1c094b726a27), U64_C(0x5f4775ee01f5f8bd),
+    U64_C(0x9186ec4d223c9b59), U64_C(0xfeeac1998f01846d),
+    U64_C(0xac39db1ce4b89874), U64_C(0xb75b7c21715e59e0),
+    U64_C(0xafc0503c273aa42a), U64_C(0x6e3b543fec430bf5),
+    U64_C(0x704f7362213e8e83), U64_C(0x58ff0745db9294c0),
+    U64_C(0x67eec2df9feabf72), U64_C(0xa0facd9ccf8a6811),
+    U64_C(0xb936986ad890811a), U64_C(0x95c715c63bd9cb7a),
+    U64_C(0xca8060283a2c33c7), U64_C(0x507de84ee9453486),
+    U64_C(0x85ded6d05f6a96f6), U64_C(0x1cdad5964f81ade9),
+    U64_C(0xd5a33e9eb62fa270), U64_C(0x40642b588df6690a),
+    U64_C(0x7f75eec2c98e42b8), U64_C(0x2cf18dace3494a60),
+    U64_C(0x23cb100c0bf9865b), U64_C(0xeef3028febb2d9e1),
+    U64_C(0x4425d2d394133929), U64_C(0xaad6d05c7fa1e0c8),
+    U64_C(0xad6ea2f7a5c68cb5), U64_C(0xc2028f2308fb9381),
+    U64_C(0x819f2f5b468fc6d5), U64_C(0xc5bafd88d29cfffc),
+    U64_C(0x47dc59f357910577), U64_C(0x2b49ff07392e261d),
+    U64_C(0x57c59ae5332258fb), U64_C(0x73b6f842e2bcb2dd),
+    U64_C(0xcf96e04862b77725), U64_C(0x4ca73dd8a6c4996f),
+    U64_C(0x015779eb417e14c1), U64_C(0x37932a9176af8bf4) },
+  /* 4 */
+  { U64_C(0x190a2c9b249df23e), U64_C(0x2f62f8b62263e1e9),
+    U64_C(0x7a7f754740993655), U64_C(0x330b7ba4d5564d9f),
+    U64_C(0x4c17a16a46672582), U64_C(0xb22f08eb7d05f5b8),
+    U64_C(0x535f47f40bc148cc), U64_C(0x3aec5d27d4883037),
+    U64_C(0x10ed0a1825438f96), U64_C(0x516101f72c233d17),
+    U64_C(0x13cc6f949fd04eae), U64_C(0x739853c441474bfd),
+    U64_C(0x653793d90d3f5b1b), U64_C(0x5240647b96b0fc2f),
+    U64_C(0x0c84890ad27623e0), U64_C(0xd7189b32703aaea3),
+    U64_C(0x2685de3523bd9c41), U64_C(0x99317c5b11bffefa),
+    U64_C(0x0d9baa854f079703), U64_C(0x70b93648fbd48ac5),
+    U64_C(0xa80441fce30bc6be), U64_C(0x7287704bdc36ff1e),
+    U64_C(0xb65384ed33dc1f13), U64_C(0xd36417343ee34408),
+    U64_C(0x39cd38ab6e1bf10f), U64_C(0x5ab861770a1f3564),
+    U64_C(0x0ebacf09f594563b), U64_C(0xd04572b884708530),
+    U64_C(0x3cae9722bdb3af47), U64_C(0x4a556b6f2f5cbaf2),
+    U64_C(0xe1704f1f76c4bd74), U64_C(0x5ec4ed7144c6dfcf),
+    U64_C(0x16afc01d4c7810e6), U64_C(0x283f113cd629ca7a),
+    U64_C(0xaf59a8761741ed2d), U64_C(0xeed5a3991e215fac),
+    U64_C(0x3bf37ea849f984d4), U64_C(0xe413e096a56ce33c),
+    U64_C(0x2c439d3a98f020d1), U64_C(0x637559dc6404c46b),
+    U64_C(0x9e6c95d1e5f5d569), U64_C(0x24bb9836045fe99a),
+    U64_C(0x44efa466dac8ecc9), U64_C(0xc6eab2a5c80895d6),
+    U64_C(0x803b50c035220cc4), U64_C(0x0321658cba93c138),
+    U64_C(0x8f9ebc465dc7ee1c), U64_C(0xd15a5137190131d3),
+    U64_C(0x0fa5ec8668e5e2d8), U64_C(0x91c979578d1037b1),
+    U64_C(0x0642ca05693b9f70), U64_C(0xefca80168350eb4f),
+    U64_C(0x38d21b24f36a45ec), U64_C(0xbeab81e1af73d658),
+    U64_C(0x8cbfd9cae7542f24), U64_C(0xfd19cc0d81f11102),
+    U64_C(0x0ac6430fbb4dbc90), U64_C(0x1d76a09d6a441895),
+    U64_C(0x2a01573ff1cbbfa1), U64_C(0xb572e161894fde2b),
+    U64_C(0x8124734fa853b827), U64_C(0x614b1fdf43e6b1b0),
+    U64_C(0x68ac395c4238cc18), U64_C(0x21d837bfd7f7b7d2),
+    U64_C(0x20c714304a860331), U64_C(0x5cfaab726324aa14),
+    U64_C(0x74c5ba4eb50d606e), U64_C(0xf3a3030474654739),
+    U64_C(0x23e671bcf015c209), U64_C(0x45f087e947b9582a),
+    U64_C(0xd8bd77b418df4c7b), U64_C(0xe06f6c90ebb50997),
+    U64_C(0x0bd96080263c0873), U64_C(0x7e03f9410e40dcfe),
+    U64_C(0xb8e94be4c6484928), U64_C(0xfb5b0608e8ca8e72),
+    U64_C(0x1a2b49179e0e3306), U64_C(0x4e29e76961855059),
+    U64_C(0x4f36c4e6fcf4e4ba), U64_C(0x49740ee395cf7bca),
+    U64_C(0xc2963ea386d17f7d), U64_C(0x90d65ad810618352),
+    U64_C(0x12d34c1b02a1fa4d), U64_C(0xfa44258775bb3a91),
+    U64_C(0x18150f14b9ec46dd), U64_C(0x1491861e6b9a653d),
+    U64_C(0x9a1019d7ab2c3fc2), U64_C(0x3668d42d06fe13d7),
+    U64_C(0xdcc1fbb25606a6d0), U64_C(0x969490dd795a1c22),
+    U64_C(0x3549b1a1bc6dd2ef), U64_C(0xc94f5e23a0ed770e),
+    U64_C(0xb9f6686b5b39fdcb), U64_C(0xc4d4f4a6efeae00d),
+    U64_C(0xe732851a1fff2204), U64_C(0x94aad6de5eb869f9),
+    U64_C(0x3f8ff2ae07206e7f), U64_C(0xfe38a9813b62d03a),
+    U64_C(0xa7a1ad7a8bee2466), U64_C(0x7b6056c8dde882b6),
+    U64_C(0x302a1e286fc58ca7), U64_C(0x8da0fa457a259bc7),
+    U64_C(0xb3302b64e074415b), U64_C(0x5402ae7eff8b635f),
+    U64_C(0x08f8050c9cafc94b), U64_C(0xae468bf98a3059ce),
+    U64_C(0x88c355cca98dc58f), U64_C(0xb10e6d67c7963480),
+    U64_C(0xbad70de7e1aa3cf3), U64_C(0xbfb4a26e320262bb),
+    U64_C(0xcb711820870f02d5), U64_C(0xce12b7a954a75c9d),
+    U64_C(0x563ce87dd8691684), U64_C(0x9f73b65e7884618a),
+    U64_C(0x2b1e74b06cba0b42), U64_C(0x47cec1ea605b2df1),
+    U64_C(0x1c698312f735ac76), U64_C(0x5fdbcefed9b76b2c),
+    U64_C(0x831a354c8fb1cdfc), U64_C(0x820516c312c0791f),
+    U64_C(0xb74ca762aeadabf0), U64_C(0xfc06ef821c80a5e1),
+    U64_C(0x5723cbf24518a267), U64_C(0x9d4df05d5f661451),
+    U64_C(0x588627742dfd40bf), U64_C(0xda8331b73f3d39a0),
+    U64_C(0x17b0e392d109a405), U64_C(0xf965400bcf28fba9),
+    U64_C(0x7c3dbf4229a2a925), U64_C(0x023e460327e275db),
+    U64_C(0x6cd0b55a0ce126b3), U64_C(0xe62da695828e96e7),
+    U64_C(0x42ad6e63b3f373b9), U64_C(0xe50cc319381d57df),
+    U64_C(0xc5cbd729729b54ee), U64_C(0x46d1e265fd2a9912),
+    U64_C(0x6428b056904eeff8), U64_C(0x8be23040131e04b7),
+    U64_C(0x6709d5da2add2ec0), U64_C(0x075de98af44a2b93),
+    U64_C(0x8447dcc67bfbe66f), U64_C(0x6616f655b7ac9a23),
+    U64_C(0xd607b8bded4b1a40), U64_C(0x0563af89d3a85e48),
+    U64_C(0x3db1b4ad20c21ba4), U64_C(0x11f22997b8323b75),
+    U64_C(0x292032b34b587e99), U64_C(0x7f1cdace9331681d),
+    U64_C(0x8e819fc9c0b65aff), U64_C(0xa1e3677fe2d5bb16),
+    U64_C(0xcd33d225ee349da5), U64_C(0xd9a2543b85aef898),
+    U64_C(0x795e10cbfa0af76d), U64_C(0x25a4bbb9992e5d79),
+    U64_C(0x78413344677b438e), U64_C(0xf0826688cef68601),
+    U64_C(0xd27b34bba392f0eb), U64_C(0x551d8df162fad7bc),
+    U64_C(0x1e57c511d0d7d9ad), U64_C(0xdeffbdb171e4d30b),
+    U64_C(0xf4feea8e802f6caa), U64_C(0xa480c8f6317de55e),
+    U64_C(0xa0fc44f07fa40ff5), U64_C(0x95b5f551c3c9dd1a),
+    U64_C(0x22f952336d6476ea), U64_C(0x0000000000000000),
+    U64_C(0xa6be8ef5169f9085), U64_C(0xcc2cf1aa73452946),
+    U64_C(0x2e7ddb39bf12550a), U64_C(0xd526dd3157d8db78),
+    U64_C(0x486b2d6c08becf29), U64_C(0x9b0f3a58365d8b21),
+    U64_C(0xac78cdfaadd22c15), U64_C(0xbc95c7e28891a383),
+    U64_C(0x6a927f5f65dab9c3), U64_C(0xc3891d2c1ba0cb9e),
+    U64_C(0xeaa92f9f50f8b507), U64_C(0xcf0d9426c9d6e87e),
+    U64_C(0xca6e3baf1a7eb636), U64_C(0xab25247059980786),
+    U64_C(0x69b31ad3df4978fb), U64_C(0xe2512a93cc577c4c),
+    U64_C(0xff278a0ea61364d9), U64_C(0x71a615c766a53e26),
+    U64_C(0x89dc764334fc716c), U64_C(0xf87a638452594f4a),
+    U64_C(0xf2bc208be914f3da), U64_C(0x8766b94ac1682757),
+    U64_C(0xbbc82e687cdb8810), U64_C(0x626a7a53f9757088),
+    U64_C(0xa2c202f358467a2e), U64_C(0x4d0882e5db169161),
+    U64_C(0x09e7268301de7da8), U64_C(0xe897699c771ac0dc),
+    U64_C(0xc8507dac3d9cc3ed), U64_C(0xc0a878a0a1330aa6),
+    U64_C(0x978bb352e42ba8c1), U64_C(0xe9884a13ea6b743f),
+    U64_C(0x279afdbabecc28a2), U64_C(0x047c8c064ed9eaab),
+    U64_C(0x507e2278b15289f4), U64_C(0x599904fbb08cf45c),
+    U64_C(0xbd8ae46d15e01760), U64_C(0x31353da7f2b43844),
+    U64_C(0x8558ff49e68a528c), U64_C(0x76fbfc4d92ef15b5),
+    U64_C(0x3456922e211c660c), U64_C(0x86799ac55c1993b4),
+    U64_C(0x3e90d1219a51da9c), U64_C(0x2d5cbeb505819432),
+    U64_C(0x982e5fd48cce4a19), U64_C(0xdb9c1238a24c8d43),
+    U64_C(0xd439febecaa96f9b), U64_C(0x418c0bef0960b281),
+    U64_C(0x158ea591f6ebd1de), U64_C(0x1f48e69e4da66d4e),
+    U64_C(0x8afd13cf8e6fb054), U64_C(0xf5e1c9011d5ed849),
+    U64_C(0xe34e091c5126c8af), U64_C(0xad67ee7530a398f6),
+    U64_C(0x43b24dec2e82c75a), U64_C(0x75da99c1287cd48d),
+    U64_C(0x92e81cdb3783f689), U64_C(0xa3dd217cc537cecd),
+    U64_C(0x60543c50de970553), U64_C(0x93f73f54aaf2426a),
+    U64_C(0xa91b62737e7a725d), U64_C(0xf19d4507538732e2),
+    U64_C(0x77e4dfc20f9ea156), U64_C(0x7d229ccdb4d31dc6),
+    U64_C(0x1b346a98037f87e5), U64_C(0xedf4c615a4b29e94),
+    U64_C(0x4093286094110662), U64_C(0xb0114ee85ae78063),
+    U64_C(0x6ff1d0d6b672e78b), U64_C(0x6dcf96d591909250),
+    U64_C(0xdfe09e3eec9567e8), U64_C(0x3214582b4827f97c),
+    U64_C(0xb46dc2ee143e6ac8), U64_C(0xf6c0ac8da7cd1971),
+    U64_C(0xebb60c10cd8901e4), U64_C(0xf7df8f023abcad92),
+    U64_C(0x9c52d3d2c217a0b2), U64_C(0x6b8d5cd0f8ab0d20),
+    U64_C(0x3777f7a29b8fa734), U64_C(0x011f238f9d71b4e3),
+    U64_C(0xc1b75b2f3c42be45), U64_C(0x5de588fdfe551ef7),
+    U64_C(0x6eeef3592b035368), U64_C(0xaa3a07ffc4e9b365),
+    U64_C(0xecebe59a39c32a77), U64_C(0x5ba742f8976e8187),
+    U64_C(0x4b4a48e0b22d0e11), U64_C(0xddded83dcb771233),
+    U64_C(0xa59feb79ac0c51bd), U64_C(0xc7f5912a55792135) },
+  /* 5 */
+  { U64_C(0x6d6ae04668a9b08a), U64_C(0x3ab3f04b0be8c743),
+    U64_C(0xe51e166b54b3c908), U64_C(0xbe90a9eb35c2f139),
+    U64_C(0xb2c7066637f2bec1), U64_C(0xaa6945613392202c),
+    U64_C(0x9a28c36f3b5201eb), U64_C(0xddce5a93ab536994),
+    U64_C(0x0e34133ef6382827), U64_C(0x52a02ba1ec55048b),
+    U64_C(0xa2f88f97c4b2a177), U64_C(0x8640e513ca2251a5),
+    U64_C(0xcdf1d36258137622), U64_C(0xfe6cb708dedf8ddb),
+    U64_C(0x8a174a9ec8121e5d), U64_C(0x679896036b81560e),
+    U64_C(0x59ed033395795fee), U64_C(0x1dd778ab8b74edaf),
+    U64_C(0xee533ef92d9f926d), U64_C(0x2a8c79baf8a8d8f5),
+    U64_C(0x6bcf398e69b119f6), U64_C(0xe20491742fafdd95),
+    U64_C(0x276488e0809c2aec), U64_C(0xea955b82d88f5cce),
+    U64_C(0x7102c63a99d9e0c4), U64_C(0xf9763017a5c39946),
+    U64_C(0x429fa2501f151b3d), U64_C(0x4659c72bea05d59e),
+    U64_C(0x984b7fdccf5a6634), U64_C(0xf742232953fbb161),
+    U64_C(0x3041860e08c021c7), U64_C(0x747bfd9616cd9386),
+    U64_C(0x4bb1367192312787), U64_C(0x1b72a1638a6c44d3),
+    U64_C(0x4a0e68a6e8359a66), U64_C(0x169a5039f258b6ca),
+    U64_C(0xb98a2ef44edee5a4), U64_C(0xd9083fe85e43a737),
+    U64_C(0x967f6ce239624e13), U64_C(0x8874f62d3c1a7982),
+    U64_C(0x3c1629830af06e3f), U64_C(0x9165ebfd427e5a8e),
+    U64_C(0xb5dd81794ceeaa5c), U64_C(0x0de8f15a7834f219),
+    U64_C(0x70bd98ede3dd5d25), U64_C(0xaccc9ca9328a8950),
+    U64_C(0x56664eda1945ca28), U64_C(0x221db34c0f8859ae),
+    U64_C(0x26dbd637fa98970d), U64_C(0x1acdffb4f068f932),
+    U64_C(0x4585254f64090fa0), U64_C(0x72de245e17d53afa),
+    U64_C(0x1546b25d7c546cf4), U64_C(0x207e0ffffb803e71),
+    U64_C(0xfaaad2732bcf4378), U64_C(0xb462dfae36ea17bd),
+    U64_C(0xcf926fd1ac1b11fd), U64_C(0xe0672dc7dba7ba4a),
+    U64_C(0xd3fa49ad5d6b41b3), U64_C(0x8ba81449b216a3bc),
+    U64_C(0x14f9ec8a0650d115), U64_C(0x40fc1ee3eb1d7ce2),
+    U64_C(0x23a2ed9b758ce44f), U64_C(0x782c521b14fddc7e),
+    U64_C(0x1c68267cf170504e), U64_C(0xbcf31558c1ca96e6),
+    U64_C(0xa781b43b4ba6d235), U64_C(0xf6fd7dfe29ff0c80),
+    U64_C(0xb0a4bad5c3fad91e), U64_C(0xd199f51ea963266c),
+    U64_C(0x414340349119c103), U64_C(0x5405f269ed4dadf7),
+    U64_C(0xabd61bb649969dcd), U64_C(0x6813dbeae7bdc3c8),
+    U64_C(0x65fb2ab09f8931d1), U64_C(0xf1e7fae152e3181d),
+    U64_C(0xc1a67cef5a2339da), U64_C(0x7a4feea8e0f5bba1),
+    U64_C(0x1e0b9acf05783791), U64_C(0x5b8ebf8061713831),
+    U64_C(0x80e53cdbcb3af8d9), U64_C(0x7e898bd315e57502),
+    U64_C(0xc6bcfbf0213f2d47), U64_C(0x95a38e86b76e942d),
+    U64_C(0x092e94218d243cba), U64_C(0x8339debf453622e7),
+    U64_C(0xb11be402b9fe64ff), U64_C(0x57d9100d634177c9),
+    U64_C(0xcc4e8db52217cbc3), U64_C(0x3b0cae9c71ec7aa2),
+    U64_C(0xfb158ca451cbfe99), U64_C(0x2b33276d82ac6514),
+    U64_C(0x01bf5ed77a04bde1), U64_C(0xc5601994af33f779),
+    U64_C(0x75c4a3416cc92e67), U64_C(0xf3844652a6eb7fc2),
+    U64_C(0x3487e375fdd0ef64), U64_C(0x18ae430704609eed),
+    U64_C(0x4d14efb993298efb), U64_C(0x815a620cb13e4538),
+    U64_C(0x125c354207487869), U64_C(0x9eeea614ce42cf48),
+    U64_C(0xce2d3106d61fac1c), U64_C(0xbbe99247bad6827b),
+    U64_C(0x071a871f7b1c149d), U64_C(0x2e4a1cc10db81656),
+    U64_C(0x77a71ff298c149b8), U64_C(0x06a5d9c80118a97c),
+    U64_C(0xad73c27e488e34b1), U64_C(0x443a7b981e0db241),
+    U64_C(0xe3bbcfa355ab6074), U64_C(0x0af276450328e684),
+    U64_C(0x73617a896dd1871b), U64_C(0x58525de4ef7de20f),
+    U64_C(0xb7be3dcab8e6cd83), U64_C(0x19111dd07e64230c),
+    U64_C(0x842359a03e2a367a), U64_C(0x103f89f1f3401fb6),
+    U64_C(0xdc710444d157d475), U64_C(0xb835702334da5845),
+    U64_C(0x4320fc876511a6dc), U64_C(0xd026abc9d3679b8d),
+    U64_C(0x17250eee885c0b2b), U64_C(0x90dab52a387ae76f),
+    U64_C(0x31fed8d972c49c26), U64_C(0x89cba8fa461ec463),
+    U64_C(0x2ff5421677bcabb7), U64_C(0x396f122f85e41d7d),
+    U64_C(0xa09b332430bac6a8), U64_C(0xc888e8ced7070560),
+    U64_C(0xaeaf201ac682ee8f), U64_C(0x1180d7268944a257),
+    U64_C(0xf058a43628e7a5fc), U64_C(0xbd4c4b8fbbce2b07),
+    U64_C(0xa1246df34abe7b49), U64_C(0x7d5569b79be9af3c),
+    U64_C(0xa9b5a705bd9efa12), U64_C(0xdb6b835baa4bc0e8),
+    U64_C(0x05793bac8f147342), U64_C(0x21c1512881848390),
+    U64_C(0xfdb0556c50d357e5), U64_C(0x613d4fcb6a99ff72),
+    U64_C(0x03dce2648e0cda3e), U64_C(0xe949b9e6568386f0),
+    U64_C(0xfc0f0bbb2ad7ea04), U64_C(0x6a70675913b5a417),
+    U64_C(0x7f36d5046fe1c8e3), U64_C(0x0c57af8d02304ff8),
+    U64_C(0x32223abdfcc84618), U64_C(0x0891caf6f720815b),
+    U64_C(0xa63eeaec31a26fd4), U64_C(0x2507345374944d33),
+    U64_C(0x49d28ac266394058), U64_C(0xf5219f9aa7f3d6be),
+    U64_C(0x2d96fea583b4cc68), U64_C(0x5a31e1571b7585d0),
+    U64_C(0x8ed12fe53d02d0fe), U64_C(0xdfade6205f5b0e4b),
+    U64_C(0x4cabb16ee92d331a), U64_C(0x04c6657bf510cea3),
+    U64_C(0xd73c2cd6a87b8f10), U64_C(0xe1d87310a1a307ab),
+    U64_C(0x6cd5be9112ad0d6b), U64_C(0x97c032354366f3f2),
+    U64_C(0xd4e0ceb22677552e), U64_C(0x0000000000000000),
+    U64_C(0x29509bde76a402cb), U64_C(0xc27a9e8bd42fe3e4),
+    U64_C(0x5ef7842cee654b73), U64_C(0xaf107ecdbc86536e),
+    U64_C(0x3fcacbe784fcb401), U64_C(0xd55f90655c73e8cf),
+    U64_C(0xe6c2f40fdabf1336), U64_C(0xe8f6e7312c873b11),
+    U64_C(0xeb2a0555a28be12f), U64_C(0xe4a148bc2eb774e9),
+    U64_C(0x9b979db84156bc0a), U64_C(0x6eb60222e6a56ab4),
+    U64_C(0x87ffbbc4b026ec44), U64_C(0xc703a5275b3b90a6),
+    U64_C(0x47e699fc9001687f), U64_C(0x9c8d1aa73a4aa897),
+    U64_C(0x7cea3760e1ed12dd), U64_C(0x4ec80ddd1d2554c5),
+    U64_C(0x13e36b957d4cc588), U64_C(0x5d2b66486069914d),
+    U64_C(0x92b90999cc7280b0), U64_C(0x517cc9c56259deb5),
+    U64_C(0xc937b619ad03b881), U64_C(0xec30824ad997f5b2),
+    U64_C(0xa45d565fc5aa080b), U64_C(0xd6837201d27f32f1),
+    U64_C(0x635ef3789e9198ad), U64_C(0x531f75769651b96a),
+    U64_C(0x4f77530a6721e924), U64_C(0x486dd4151c3dfdb9),
+    U64_C(0x5f48dafb9461f692), U64_C(0x375b011173dc355a),
+    U64_C(0x3da9775470f4d3de), U64_C(0x8d0dcd81b30e0ac0),
+    U64_C(0x36e45fc609d888bb), U64_C(0x55baacbe97491016),
+    U64_C(0x8cb29356c90ab721), U64_C(0x76184125e2c5f459),
+    U64_C(0x99f4210bb55edbd5), U64_C(0x6f095cf59ca1d755),
+    U64_C(0x9f51f8c3b44672a9), U64_C(0x3538bda287d45285),
+    U64_C(0x50c39712185d6354), U64_C(0xf23b1885dcefc223),
+    U64_C(0x79930ccc6ef9619f), U64_C(0xed8fdc9da3934853),
+    U64_C(0xcb540aaa590bdf5e), U64_C(0x5c94389f1a6d2cac),
+    U64_C(0xe77daad8a0bbaed7), U64_C(0x28efc5090ca0bf2a),
+    U64_C(0xbf2ff73c4fc64cd8), U64_C(0xb37858b14df60320),
+    U64_C(0xf8c96ec0dfc724a7), U64_C(0x828680683f329f06),
+    U64_C(0x941cd051cd6a29cc), U64_C(0xc3c5c05cae2b5e05),
+    U64_C(0xb601631dc2e27062), U64_C(0xc01922382027843b),
+    U64_C(0x24b86a840e90f0d2), U64_C(0xd245177a276ffc52),
+    U64_C(0x0f8b4de98c3c95c6), U64_C(0x3e759530fef809e0),
+    U64_C(0x0b4d2892792c5b65), U64_C(0xc4df4743d5374a98),
+    U64_C(0xa5e20888bfaeb5ea), U64_C(0xba56cc90c0d23f9a),
+    U64_C(0x38d04cf8ffe0a09c), U64_C(0x62e1adafe495254c),
+    U64_C(0x0263bcb3f40867df), U64_C(0xcaeb547d230f62bf),
+    U64_C(0x6082111c109d4293), U64_C(0xdad4dd8cd04f7d09),
+    U64_C(0xefec602e579b2f8c), U64_C(0x1fb4c4187f7c8a70),
+    U64_C(0xffd3e9dfa4db303a), U64_C(0x7bf0b07f9af10640),
+    U64_C(0xf49ec14dddf76b5f), U64_C(0x8f6e713247066d1f),
+    U64_C(0x339d646a86ccfbf9), U64_C(0x64447467e58d8c30),
+    U64_C(0x2c29a072f9b07189), U64_C(0xd8b7613f24471ad6),
+    U64_C(0x6627c8d41185ebef), U64_C(0xa347d140beb61c96),
+    U64_C(0xde12b8f7255fb3aa), U64_C(0x9d324470404e1576),
+    U64_C(0x9306574eb6763d51), U64_C(0xa80af9d2c79a47f3),
+    U64_C(0x859c0777442e8b9b), U64_C(0x69ac853d9db97e29) },
+  /* 6 */
+  { U64_C(0xc3407dfc2de6377e), U64_C(0x5b9e93eea4256f77),
+    U64_C(0xadb58fdd50c845e0), U64_C(0x5219ff11a75bed86),
+    U64_C(0x356b61cfd90b1de9), U64_C(0xfb8f406e25abe037),
+    U64_C(0x7a5a0231c0f60796), U64_C(0x9d3cd216e1f5020b),
+    U64_C(0x0c6550fb6b48d8f3), U64_C(0xf57508c427ff1c62),
+    U64_C(0x4ad35ffa71cb407d), U64_C(0x6290a2da1666aa6d),
+    U64_C(0xe284ec2349355f9f), U64_C(0xb3c307c53d7c84ec),
+    U64_C(0x05e23c0468365a02), U64_C(0x190bac4d6c9ebfa8),
+    U64_C(0x94bbbee9e28b80fa), U64_C(0xa34fc777529cb9b5),
+    U64_C(0xcc7b39f095bcd978), U64_C(0x2426addb0ce532e3),
+    U64_C(0x7e79329312ce4fc7), U64_C(0xab09a72eebec2917),
+    U64_C(0xf8d15499f6b9d6c2), U64_C(0x1a55b8babf8c895d),
+    U64_C(0xdb8add17fb769a85), U64_C(0xb57f2f368658e81b),
+    U64_C(0x8acd36f18f3f41f6), U64_C(0x5ce3b7bba50f11d3),
+    U64_C(0x114dcc14d5ee2f0a), U64_C(0xb91a7fcded1030e8),
+    U64_C(0x81d5425fe55de7a1), U64_C(0xb6213bc1554adeee),
+    U64_C(0x80144ef95f53f5f2), U64_C(0x1e7688186db4c10c),
+    U64_C(0x3b912965db5fe1bc), U64_C(0xc281715a97e8252d),
+    U64_C(0x54a5d7e21c7f8171), U64_C(0x4b12535ccbc5522e),
+    U64_C(0x1d289cefbea6f7f9), U64_C(0x6ef5f2217d2e729e),
+    U64_C(0xe6a7dc819b0d17ce), U64_C(0x1b94b41c05829b0e),
+    U64_C(0x33d7493c622f711e), U64_C(0xdcf7f942fa5ce421),
+    U64_C(0x600fba8b7f7a8ecb), U64_C(0x46b60f011a83988e),
+    U64_C(0x235b898e0dcf4c47), U64_C(0x957ab24f588592a9),
+    U64_C(0x4354330572b5c28c), U64_C(0xa5f3ef84e9b8d542),
+    U64_C(0x8c711e02341b2d01), U64_C(0x0b1874ae6a62a657),
+    U64_C(0x1213d8e306fc19ff), U64_C(0xfe6d7c6a4d9dba35),
+    U64_C(0x65ed868f174cd4c9), U64_C(0x88522ea0e6236550),
+    U64_C(0x899322065c2d7703), U64_C(0xc01e690bfef4018b),
+    U64_C(0x915982ed8abddaf8), U64_C(0xbe675b98ec3a4e4c),
+    U64_C(0xa996bf7f82f00db1), U64_C(0xe1daf8d49a27696a),
+    U64_C(0x2effd5d3dc8986e7), U64_C(0xd153a51f2b1a2e81),
+    U64_C(0x18caa0ebd690adfb), U64_C(0x390e3134b243c51a),
+    U64_C(0x2778b92cdff70416), U64_C(0x029f1851691c24a6),
+    U64_C(0x5e7cafeacc133575), U64_C(0xfa4e4cc89fa5f264),
+    U64_C(0x5a5f9f481e2b7d24), U64_C(0x484c47ab18d764db),
+    U64_C(0x400a27f2a1a7f479), U64_C(0xaeeb9b2a83da7315),
+    U64_C(0x721c626879869734), U64_C(0x042330a2d2384851),
+    U64_C(0x85f672fd3765aff0), U64_C(0xba446b3a3e02061d),
+    U64_C(0x73dd6ecec3888567), U64_C(0xffac70ccf793a866),
+    U64_C(0xdfa9edb5294ed2d4), U64_C(0x6c6aea7014325638),
+    U64_C(0x834a5a0e8c41c307), U64_C(0xcdba35562fb2cb2b),
+    U64_C(0x0ad97808d06cb404), U64_C(0x0f3b440cb85aee06),
+    U64_C(0xe5f9c876481f213b), U64_C(0x98deee1289c35809),
+    U64_C(0x59018bbfcd394bd1), U64_C(0xe01bf47220297b39),
+    U64_C(0xde68e1139340c087), U64_C(0x9fa3ca4788e926ad),
+    U64_C(0xbb85679c840c144e), U64_C(0x53d8f3b71d55ffd5),
+    U64_C(0x0da45c5dd146caa0), U64_C(0x6f34fe87c72060cd),
+    U64_C(0x57fbc315cf6db784), U64_C(0xcee421a1fca0fdde),
+    U64_C(0x3d2d0196607b8d4b), U64_C(0x642c8a29ad42c69a),
+    U64_C(0x14aff010bdd87508), U64_C(0xac74837beac657b3),
+    U64_C(0x3216459ad821634d), U64_C(0x3fb219c70967a9ed),
+    U64_C(0x06bc28f3bb246cf7), U64_C(0xf2082c9126d562c6),
+    U64_C(0x66b39278c45ee23c), U64_C(0xbd394f6f3f2878b9),
+    U64_C(0xfd33689d9e8f8cc0), U64_C(0x37f4799eb017394f),
+    U64_C(0x108cc0b26fe03d59), U64_C(0xda4bd1b1417888d6),
+    U64_C(0xb09d1332ee6eb219), U64_C(0x2f3ed975668794b4),
+    U64_C(0x58c0871977375982), U64_C(0x7561463d78ace990),
+    U64_C(0x09876cff037e82f1), U64_C(0x7fb83e35a8c05d94),
+    U64_C(0x26b9b58a65f91645), U64_C(0xef20b07e9873953f),
+    U64_C(0x3148516d0b3355b8), U64_C(0x41cb2b541ba9e62a),
+    U64_C(0x790416c613e43163), U64_C(0xa011d380818e8f40),
+    U64_C(0x3a5025c36151f3ef), U64_C(0xd57095bdf92266d0),
+    U64_C(0x498d4b0da2d97688), U64_C(0x8b0c3a57353153a5),
+    U64_C(0x21c491df64d368e1), U64_C(0x8f2f0af5e7091bf4),
+    U64_C(0x2da1c1240f9bb012), U64_C(0xc43d59a92ccc49da),
+    U64_C(0xbfa6573e56345c1f), U64_C(0x828b56a8364fd154),
+    U64_C(0x9a41f643e0df7caf), U64_C(0xbcf843c985266aea),
+    U64_C(0x2b1de9d7b4bfdce5), U64_C(0x20059d79dedd7ab2),
+    U64_C(0x6dabe6d6ae3c446b), U64_C(0x45e81bf6c991ae7b),
+    U64_C(0x6351ae7cac68b83e), U64_C(0xa432e32253b6c711),
+    U64_C(0xd092a9b991143cd2), U64_C(0xcac711032e98b58f),
+    U64_C(0xd8d4c9e02864ac70), U64_C(0xc5fc550f96c25b89),
+    U64_C(0xd7ef8dec903e4276), U64_C(0x67729ede7e50f06f),
+    U64_C(0xeac28c7af045cf3d), U64_C(0xb15c1f945460a04a),
+    U64_C(0x9cfddeb05bfb1058), U64_C(0x93c69abce3a1fe5e),
+    U64_C(0xeb0380dc4a4bdd6e), U64_C(0xd20db1e8f8081874),
+    U64_C(0x229a8528b7c15e14), U64_C(0x44291750739fbc28),
+    U64_C(0xd3ccbd4e42060a27), U64_C(0xf62b1c33f4ed2a97),
+    U64_C(0x86a8660ae4779905), U64_C(0xd62e814a2a305025),
+    U64_C(0x477703a7a08d8add), U64_C(0x7b9b0e977af815c5),
+    U64_C(0x78c51a60a9ea2330), U64_C(0xa6adfb733aaae3b7),
+    U64_C(0x97e5aa1e3199b60f), U64_C(0x0000000000000000),
+    U64_C(0xf4b404629df10e31), U64_C(0x5564db44a6719322),
+    U64_C(0x9207961a59afec0d), U64_C(0x9624a6b88b97a45c),
+    U64_C(0x363575380a192b1c), U64_C(0x2c60cd82b595a241),
+    U64_C(0x7d272664c1dc7932), U64_C(0x7142769faa94a1c1),
+    U64_C(0xa1d0df263b809d13), U64_C(0x1630e841d4c451ae),
+    U64_C(0xc1df65ad44fa13d8), U64_C(0x13d2d445bcf20bac),
+    U64_C(0xd915c546926abe23), U64_C(0x38cf3d92084dd749),
+    U64_C(0xe766d0272103059d), U64_C(0xc7634d5effde7f2f),
+    U64_C(0x077d2455012a7ea4), U64_C(0xedbfa82ff16fb199),
+    U64_C(0xaf2a978c39d46146), U64_C(0x42953fa3c8bbd0df),
+    U64_C(0xcb061da59496a7dc), U64_C(0x25e7a17db6eb20b0),
+    U64_C(0x34aa6d6963050fba), U64_C(0xa76cf7d580a4f1e4),
+    U64_C(0xf7ea10954ee338c4), U64_C(0xfcf2643b24819e93),
+    U64_C(0xcf252d0746aeef8d), U64_C(0x4ef06f58a3f3082c),
+    U64_C(0x563acfb37563a5d7), U64_C(0x5086e740ce47c920),
+    U64_C(0x2982f186dda3f843), U64_C(0x87696aac5e798b56),
+    U64_C(0x5d22bb1d1f010380), U64_C(0x035e14f7d31236f5),
+    U64_C(0x3cec0d30da759f18), U64_C(0xf3c920379cdb7095),
+    U64_C(0xb8db736b571e22bb), U64_C(0xdd36f5e44052f672),
+    U64_C(0xaac8ab8851e23b44), U64_C(0xa857b3d938fe1fe2),
+    U64_C(0x17f1e4e76eca43fd), U64_C(0xec7ea4894b61a3ca),
+    U64_C(0x9e62c6e132e734fe), U64_C(0xd4b1991b432c7483),
+    U64_C(0x6ad6c283af163acf), U64_C(0x1ce9904904a8e5aa),
+    U64_C(0x5fbda34c761d2726), U64_C(0xf910583f4cb7c491),
+    U64_C(0xc6a241f845d06d7c), U64_C(0x4f3163fe19fd1a7f),
+    U64_C(0xe99c988d2357f9c8), U64_C(0x8eee06535d0709a7),
+    U64_C(0x0efa48aa0254fc55), U64_C(0xb4be23903c56fa48),
+    U64_C(0x763f52caabbedf65), U64_C(0xeee1bcd8227d876c),
+    U64_C(0xe345e085f33b4dcc), U64_C(0x3e731561b369bbbe),
+    U64_C(0x2843fd2067adea10), U64_C(0x2adce5710eb1ceb6),
+    U64_C(0xb7e03767ef44ccbd), U64_C(0x8db012a48e153f52),
+    U64_C(0x61ceb62dc5749c98), U64_C(0xe85d942b9959eb9b),
+    U64_C(0x4c6f7709caef2c8a), U64_C(0x84377e5b8d6bbda3),
+    U64_C(0x30895dcbb13d47eb), U64_C(0x74a04a9bc2a2fbc3),
+    U64_C(0x6b17ce251518289c), U64_C(0xe438c4d0f2113368),
+    U64_C(0x1fb784bed7bad35f), U64_C(0x9b80fae55ad16efc),
+    U64_C(0x77fe5e6c11b0cd36), U64_C(0xc858095247849129),
+    U64_C(0x08466059b97090a2), U64_C(0x01c10ca6ba0e1253),
+    U64_C(0x6988d6747c040c3a), U64_C(0x6849dad2c60a1e69),
+    U64_C(0x5147ebe67449db73), U64_C(0xc99905f4fd8a837a),
+    U64_C(0x991fe2b433cd4a5a), U64_C(0xf09734c04fc94660),
+    U64_C(0xa28ecbd1e892abe6), U64_C(0xf1563866f5c75433),
+    U64_C(0x4dae7baf70e13ed9), U64_C(0x7ce62ac27bd26b61),
+    U64_C(0x70837a39109ab392), U64_C(0x90988e4b30b3c8ab),
+    U64_C(0xb2020b63877296bf), U64_C(0x156efcb607d6675b) },
+  /* 7 */
+  { U64_C(0xe63f55ce97c331d0), U64_C(0x25b506b0015bba16),
+    U64_C(0xc8706e29e6ad9ba8), U64_C(0x5b43d3775d521f6a),
+    U64_C(0x0bfa3d577035106e), U64_C(0xab95fc172afb0e66),
+    U64_C(0xf64b63979e7a3276), U64_C(0xf58b4562649dad4b),
+    U64_C(0x48f7c3dbae0c83f1), U64_C(0xff31916642f5c8c5),
+    U64_C(0xcbb048dc1c4a0495), U64_C(0x66b8f83cdf622989),
+    U64_C(0x35c130e908e2b9b0), U64_C(0x7c761a61f0b34fa1),
+    U64_C(0x3601161cf205268d), U64_C(0x9e54ccfe2219b7d6),
+    U64_C(0x8b7d90a538940837), U64_C(0x9cd403588ea35d0b),
+    U64_C(0xbc3c6fea9ccc5b5a), U64_C(0xe5ff733b6d24aeed),
+    U64_C(0xceed22de0f7eb8d2), U64_C(0xec8581cab1ab545e),
+    U64_C(0xb96105e88ff8e71d), U64_C(0x8ca03501871a5ead),
+    U64_C(0x76ccce65d6db2a2f), U64_C(0x5883f582a7b58057),
+    U64_C(0x3f7be4ed2e8adc3e), U64_C(0x0fe7be06355cd9c9),
+    U64_C(0xee054e6c1d11be83), U64_C(0x1074365909b903a6),
+    U64_C(0x5dde9f80b4813c10), U64_C(0x4a770c7d02b6692c),
+    U64_C(0x5379c8d5d7809039), U64_C(0xb4067448161ed409),
+    U64_C(0x5f5e5026183bd6cd), U64_C(0xe898029bf4c29df9),
+    U64_C(0x7fb63c940a54d09c), U64_C(0xc5171f897f4ba8bc),
+    U64_C(0xa6f28db7b31d3d72), U64_C(0x2e4f3be7716eaa78),
+    U64_C(0x0d6771a099e63314), U64_C(0x82076254e41bf284),
+    U64_C(0x2f0fd2b42733df98), U64_C(0x5c9e76d3e2dc49f0),
+    U64_C(0x7aeb569619606cdb), U64_C(0x83478b07b2468764),
+    U64_C(0xcfadcb8d5923cd32), U64_C(0x85dac7f05b95a41e),
+    U64_C(0xb5469d1b4043a1e9), U64_C(0xb821ecbbd9a592fd),
+    U64_C(0x1b8e0b0e798c13c8), U64_C(0x62a57b6d9a0be02e),
+    U64_C(0xfcf1b793b81257f8), U64_C(0x9d94ea0bd8fe28eb),
+    U64_C(0x4cea408aeb654a56), U64_C(0x23284a47e888996c),
+    U64_C(0x2d8f1d128b893545), U64_C(0xf4cbac3132c0d8ab),
+    U64_C(0xbd7c86b9ca912eba), U64_C(0x3a268eef3dbe6079),
+    U64_C(0xf0d62f6077a9110c), U64_C(0x2735c916ade150cb),
+    U64_C(0x89fd5f03942ee2ea), U64_C(0x1acee25d2fd16628),
+    U64_C(0x90f39bab41181bff), U64_C(0x430dfe8cde39939f),
+    U64_C(0xf70b8ac4c8274796), U64_C(0x1c53aeaac6024552),
+    U64_C(0x13b410acf35e9c9b), U64_C(0xa532ab4249faa24f),
+    U64_C(0x2b1251e5625a163f), U64_C(0xd7e3e676da4841c7),
+    U64_C(0xa7b264e4e5404892), U64_C(0xda8497d643ae72d3),
+    U64_C(0x861ae105a1723b23), U64_C(0x38a6414991048aa4),
+    U64_C(0x6578dec92585b6b4), U64_C(0x0280cfa6acbaeadd),
+    U64_C(0x88bdb650c273970a), U64_C(0x9333bd5ebbff84c2),
+    U64_C(0x4e6a8f2c47dfa08b), U64_C(0x321c954db76cef2a),
+    U64_C(0x418d312a72837942), U64_C(0xb29b38bfffcdf773),
+    U64_C(0x6c022c38f90a4c07), U64_C(0x5a033a240b0f6a8a),
+    U64_C(0x1f93885f3ce5da6f), U64_C(0xc38a537e96988bc6),
+    U64_C(0x39e6a81ac759ff44), U64_C(0x29929e43cee0fce2),
+    U64_C(0x40cdd87924de0ca2), U64_C(0xe9d8ebc8a29fe819),
+    U64_C(0x0c2798f3cfbb46f4), U64_C(0x55e484223e53b343),
+    U64_C(0x4650948ecd0d2fd8), U64_C(0x20e86cb2126f0651),
+    U64_C(0x6d42c56baf5739e7), U64_C(0xa06fc1405ace1e08),
+    U64_C(0x7babbfc54f3d193b), U64_C(0x424d17df8864e67f),
+    U64_C(0xd8045870ef14980e), U64_C(0xc6d7397c85ac3781),
+    U64_C(0x21a885e1443273b1), U64_C(0x67f8116f893f5c69),
+    U64_C(0x24f5efe35706cff6), U64_C(0xd56329d076f2ab1a),
+    U64_C(0x5e1eb9754e66a32d), U64_C(0x28d2771098bd8902),
+    U64_C(0x8f6013f47dfdc190), U64_C(0x17a993fdb637553c),
+    U64_C(0xe0a219397e1012aa), U64_C(0x786b9930b5da8606),
+    U64_C(0x6e82e39e55b0a6da), U64_C(0x875a0856f72f4ec3),
+    U64_C(0x3741ff4fa458536d), U64_C(0xac4859b3957558fc),
+    U64_C(0x7ef6d5c75c09a57c), U64_C(0xc04a758b6c7f14fb),
+    U64_C(0xf9acdd91ab26ebbf), U64_C(0x7391a467c5ef9668),
+    U64_C(0x335c7c1ee1319aca), U64_C(0xa91533b18641e4bb),
+    U64_C(0xe4bf9a683b79db0d), U64_C(0x8e20faa72ba0b470),
+    U64_C(0x51f907737b3a7ae4), U64_C(0x2268a314bed5ec8c),
+    U64_C(0xd944b123b949edee), U64_C(0x31dcb3b84d8b7017),
+    U64_C(0xd3fe65279f218860), U64_C(0x097af2f1dc8ffab3),
+    U64_C(0x9b09a6fc312d0b91), U64_C(0xcc6ded78a3c4520f),
+    U64_C(0x3481d9ba5ebfcc50), U64_C(0x4f2a667f1182d56b),
+    U64_C(0xdfd9fdd4509ace94), U64_C(0x26752045fbbc252b),
+    U64_C(0xbffc491f662bc467), U64_C(0xdd593272fc202449),
+    U64_C(0x3cbbc218d46d4303), U64_C(0x91b372f817456e1f),
+    U64_C(0x681faf69bc6385a0), U64_C(0xb686bbeebaa43ed4),
+    U64_C(0x1469b5084cd0ca01), U64_C(0x98c98009cbca94ac),
+    U64_C(0x6438379a73d8c354), U64_C(0xc2caba2dc0c5fe26),
+    U64_C(0x3e3b0dbe78d7a9de), U64_C(0x50b9ee202d670f04),
+    U64_C(0x4590b27b37eab0e5), U64_C(0x6025b4cb36b10af3),
+    U64_C(0xfb2c1237079c0162), U64_C(0xa12f28130c936be8),
+    U64_C(0x4b37e52e54eb1ccc), U64_C(0x083a1ba28ad28f53),
+    U64_C(0xc10a9cd83a22611b), U64_C(0x9f1425ad7444c236),
+    U64_C(0x069d4cf7e9d3237a), U64_C(0xedc56899e7f621be),
+    U64_C(0x778c273680865fcf), U64_C(0x309c5aeb1bd605f7),
+    U64_C(0x8de0dc52d1472b4d), U64_C(0xf8ec34c2fd7b9e5f),
+    U64_C(0xea18cd3d58787724), U64_C(0xaad515447ca67b86),
+    U64_C(0x9989695a9d97e14c), U64_C(0x0000000000000000),
+    U64_C(0xf196c63321f464ec), U64_C(0x71116bc169557cb5),
+    U64_C(0xaf887f466f92c7c1), U64_C(0x972e3e0ffe964d65),
+    U64_C(0x190ec4a8d536f915), U64_C(0x95aef1a9522ca7b8),
+    U64_C(0xdc19db21aa7d51a9), U64_C(0x94ee18fa0471d258),
+    U64_C(0x8087adf248a11859), U64_C(0xc457f6da2916dd5c),
+    U64_C(0xfa6cfb6451c17482), U64_C(0xf256e0c6db13fbd1),
+    U64_C(0x6a9f60cf10d96f7d), U64_C(0x4daaa9d9bd383fb6),
+    U64_C(0x03c026f5fae79f3d), U64_C(0xde99148706c7bb74),
+    U64_C(0x2a52b8b6340763df), U64_C(0x6fc20acd03edd33a),
+    U64_C(0xd423c08320afdefa), U64_C(0xbbe1ca4e23420dc0),
+    U64_C(0x966ed75ca8cb3885), U64_C(0xeb58246e0e2502c4),
+    U64_C(0x055d6a021334bc47), U64_C(0xa47242111fa7d7af),
+    U64_C(0xe3623fcc84f78d97), U64_C(0x81c744a11efc6db9),
+    U64_C(0xaec8961539cfb221), U64_C(0xf31609958d4e8e31),
+    U64_C(0x63e5923ecc5695ce), U64_C(0x47107ddd9b505a38),
+    U64_C(0xa3afe7b5a0298135), U64_C(0x792b7063e387f3e6),
+    U64_C(0x0140e953565d75e0), U64_C(0x12f4f9ffa503e97b),
+    U64_C(0x750ce8902c3cb512), U64_C(0xdbc47e8515f30733),
+    U64_C(0x1ed3610c6ab8af8f), U64_C(0x5239218681dde5d9),
+    U64_C(0xe222d69fd2aaf877), U64_C(0xfe71783514a8bd25),
+    U64_C(0xcaf0a18f4a177175), U64_C(0x61655d9860ec7f13),
+    U64_C(0xe77fbc9dc19e4430), U64_C(0x2ccff441ddd440a5),
+    U64_C(0x16e97aaee06a20dc), U64_C(0xa855dae2d01c915b),
+    U64_C(0x1d1347f9905f30b2), U64_C(0xb7c652bdecf94b34),
+    U64_C(0xd03e43d265c6175d), U64_C(0xfdb15ec0ee4f2218),
+    U64_C(0x57644b8492e9599e), U64_C(0x07dda5a4bf8e569a),
+    U64_C(0x54a46d71680ec6a3), U64_C(0x5624a2d7c4b42c7e),
+    U64_C(0xbebca04c3076b187), U64_C(0x7d36f332a6ee3a41),
+    U64_C(0x3b6667bc6be31599), U64_C(0x695f463aea3ef040),
+    U64_C(0xad08b0e0c3282d1c), U64_C(0xb15b1e4a052a684e),
+    U64_C(0x44d05b2861b7c505), U64_C(0x15295c5b1a8dbfe1),
+    U64_C(0x744c01c37a61c0f2), U64_C(0x59c31cd1f1e8f5b7),
+    U64_C(0xef45a73f4b4ccb63), U64_C(0x6bdf899c46841a9d),
+    U64_C(0x3dfb2b4b823036e3), U64_C(0xa2ef0ee6f674f4d5),
+    U64_C(0x184e2dfb836b8cf5), U64_C(0x1134df0a5fe47646),
+    U64_C(0xbaa1231d751f7820), U64_C(0xd17eaa81339b62bd),
+    U64_C(0xb01bf71953771dae), U64_C(0x849a2ea30dc8d1fe),
+    U64_C(0x705182923f080955), U64_C(0x0ea757556301ac29),
+    U64_C(0x041d83514569c9a7), U64_C(0x0abad4042668658e),
+    U64_C(0x49b72a88f851f611), U64_C(0x8a3d79f66ec97dd7),
+    U64_C(0xcd2d042bf59927ef), U64_C(0xc930877ab0f0ee48),
+    U64_C(0x9273540deda2f122), U64_C(0xc797d02fd3f14261),
+    U64_C(0xe1e2f06a284d674a), U64_C(0xd2be8c74c97cfd80),
+    U64_C(0x9a494faf67707e71), U64_C(0xb3dbd1eca9908293),
+    U64_C(0x72d14d3493b2e388), U64_C(0xd6a30f258c153427) },
+};
+
+static const u64 C16[12][8] =
+{
+  { U64_C(0xdd806559f2a64507), U64_C(0x05767436cc744d23),
+    U64_C(0xa2422a08a460d315), U64_C(0x4b7ce09192676901),
+    U64_C(0x714eb88d7585c4fc), U64_C(0x2f6a76432e45d016),
+    U64_C(0xebcb2f81c0657c1f), U64_C(0xb1085bda1ecadae9) },
+  { U64_C(0xe679047021b19bb7), U64_C(0x55dda21bd7cbcd56),
+    U64_C(0x5cb561c2db0aa7ca), U64_C(0x9ab5176b12d69958),
+    U64_C(0x61d55e0f16b50131), U64_C(0xf3feea720a232b98),
+    U64_C(0x4fe39d460f70b5d7), U64_C(0x6fa3b58aa99d2f1a) },
+  { U64_C(0x991e96f50aba0ab2), U64_C(0xc2b6f443867adb31),
+    U64_C(0xc1c93a376062db09), U64_C(0xd3e20fe490359eb1),
+    U64_C(0xf2ea7514b1297b7b), U64_C(0x06f15e5f529c1f8b),
+    U64_C(0x0a39fc286a3d8435), U64_C(0xf574dcac2bce2fc7) },
+  { U64_C(0x220cbebc84e3d12e), U64_C(0x3453eaa193e837f1),
+    U64_C(0xd8b71333935203be), U64_C(0xa9d72c82ed03d675),
+    U64_C(0x9d721cad685e353f), U64_C(0x488e857e335c3c7d),
+    U64_C(0xf948e1a05d71e4dd), U64_C(0xef1fdfb3e81566d2) },
+  { U64_C(0x601758fd7c6cfe57), U64_C(0x7a56a27ea9ea63f5),
+    U64_C(0xdfff00b723271a16), U64_C(0xbfcd1747253af5a3),
+    U64_C(0x359e35d7800fffbd), U64_C(0x7f151c1f1686104a),
+    U64_C(0x9a3f410c6ca92363), U64_C(0x4bea6bacad474799) },
+  { U64_C(0xfa68407a46647d6e), U64_C(0xbf71c57236904f35),
+    U64_C(0x0af21f66c2bec6b6), U64_C(0xcffaa6b71c9ab7b4),
+    U64_C(0x187f9ab49af08ec6), U64_C(0x2d66c4f95142a46c),
+    U64_C(0x6fa4c33b7a3039c0), U64_C(0xae4faeae1d3ad3d9) },
+  { U64_C(0x8886564d3a14d493), U64_C(0x3517454ca23c4af3),
+    U64_C(0x06476983284a0504), U64_C(0x0992abc52d822c37),
+    U64_C(0xd3473e33197a93c9), U64_C(0x399ec6c7e6bf87c9),
+    U64_C(0x51ac86febf240954), U64_C(0xf4c70e16eeaac5ec) },
+  { U64_C(0xa47f0dd4bf02e71e), U64_C(0x36acc2355951a8d9),
+    U64_C(0x69d18d2bd1a5c42f), U64_C(0xf4892bcb929b0690),
+    U64_C(0x89b4443b4ddbc49a), U64_C(0x4eb7f8719c36de1e),
+    U64_C(0x03e7aa020c6e4141), U64_C(0x9b1f5b424d93c9a7) },
+  { U64_C(0x7261445183235adb), U64_C(0x0e38dc92cb1f2a60),
+    U64_C(0x7b2b8a9aa6079c54), U64_C(0x800a440bdbb2ceb1),
+    U64_C(0x3cd955b7e00d0984), U64_C(0x3a7d3a1b25894224),
+    U64_C(0x944c9ad8ec165fde), U64_C(0x378f5a541631229b) },
+  { U64_C(0x74b4c7fb98459ced), U64_C(0x3698fad1153bb6c3),
+    U64_C(0x7a1e6c303b7652f4), U64_C(0x9fe76702af69334b),
+    U64_C(0x1fffe18a1b336103), U64_C(0x8941e71cff8a78db),
+    U64_C(0x382ae548b2e4f3f3), U64_C(0xabbedea680056f52) },
+  { U64_C(0x6bcaa4cd81f32d1b), U64_C(0xdea2594ac06fd85d),
+    U64_C(0xefbacd1d7d476e98), U64_C(0x8a1d71efea48b9ca),
+    U64_C(0x2001802114846679), U64_C(0xd8fa6bbbebab0761),
+    U64_C(0x3002c6cd635afe94), U64_C(0x7bcd9ed0efc889fb) },
+  { U64_C(0x48bc924af11bd720), U64_C(0xfaf417d5d9b21b99),
+    U64_C(0xe71da4aa88e12852), U64_C(0x5d80ef9d1891cc86),
+    U64_C(0xf82012d430219f9b), U64_C(0xcda43c32bcdf1d77),
+    U64_C(0xd21380b00449b17a), U64_C(0x378ee767f11631ba) },
+};
+
+
+#define strido(out, temp, i) do { \
+       u64 t; \
+       t  = stribog_table[0][(temp[0] >> (i * 8)) & 0xff]; \
+       t ^= stribog_table[1][(temp[1] >> (i * 8)) & 0xff]; \
+       t ^= stribog_table[2][(temp[2] >> (i * 8)) & 0xff]; \
+       t ^= stribog_table[3][(temp[3] >> (i * 8)) & 0xff]; \
+       t ^= stribog_table[4][(temp[4] >> (i * 8)) & 0xff]; \
+       t ^= stribog_table[5][(temp[5] >> (i * 8)) & 0xff]; \
+       t ^= stribog_table[6][(temp[6] >> (i * 8)) & 0xff]; \
+       t ^= stribog_table[7][(temp[7] >> (i * 8)) & 0xff]; \
+       out[i] = t; } while(0)
+
+static void LPSX (u64 *out, const u64 *a, const u64 *b)
+{
+  u64 temp[8];
+  temp[0] = a[0] ^ b[0];
+  temp[1] = a[1] ^ b[1];
+  temp[2] = a[2] ^ b[2];
+  temp[3] = a[3] ^ b[3];
+  temp[4] = a[4] ^ b[4];
+  temp[5] = a[5] ^ b[5];
+  temp[6] = a[6] ^ b[6];
+  temp[7] = a[7] ^ b[7];
+  strido (out, temp, 0);
+  strido (out, temp, 1);
+  strido (out, temp, 2);
+  strido (out, temp, 3);
+  strido (out, temp, 4);
+  strido (out, temp, 5);
+  strido (out, temp, 6);
+  strido (out, temp, 7);
+}
+
+static inline void g (u64 *h, u64 *m, u64 *N)
+{
+  u64 K[8];
+  u64 T[8];
+  int i;
+
+  LPSX (K, h, N);
+
+  LPSX (T, K, m);
+  LPSX (K, K, C16[0]);
+  for (i = 1; i < 12; i++)
+    {
+      LPSX (T, K, T);
+      LPSX (K, K, C16[i]);
+    }
+
+  h[0] ^= T[0] ^ K[0] ^ m[0];
+  h[1] ^= T[1] ^ K[1] ^ m[1];
+  h[2] ^= T[2] ^ K[2] ^ m[2];
+  h[3] ^= T[3] ^ K[3] ^ m[3];
+  h[4] ^= T[4] ^ K[4] ^ m[4];
+  h[5] ^= T[5] ^ K[5] ^ m[5];
+  h[6] ^= T[6] ^ K[6] ^ m[6];
+  h[7] ^= T[7] ^ K[7] ^ m[7];
+}
+
+
+static unsigned int
+transform (void *context, const unsigned char *inbuf_arg, size_t datalen);
+
+
+static void
+stribog_init_512 (void *context, unsigned int flags)
+{
+  STRIBOG_CONTEXT *hd = context;
+
+  (void)flags;
+
+  memset (hd, 0, sizeof (*hd));
+
+  hd->bctx.blocksize_shift = _gcry_ctz(64);
+  hd->bctx.bwrite = transform;
+}
+
+static void
+stribog_init_256 (void *context, unsigned int flags)
+{
+  STRIBOG_CONTEXT *hd = context;
+
+  stribog_init_512 (context, flags);
+  memset (hd->h, 1, 64);
+}
+
+static void
+transform_bits (STRIBOG_CONTEXT *hd, const unsigned char *data, unsigned count)
+{
+  u64 M[8];
+  u64 l, cf;
+  int i;
+
+  for (i = 0; i < 8; i++)
+    M[i] = buf_get_le64(data + i * 8);
+
+  g (hd->h, M, hd->N);
+  l = hd->N[0];
+  hd->N[0] += count;
+  if (hd->N[0] < l)
+    { /* overflow */
+      for (i = 1; i < 8; i++)
+        {
+          hd->N[i]++;
+          if (hd->N[i] != 0)
+            break;
+        }
+    }
+
+  hd->Sigma[0] += M[0];
+  cf = 0;
+  for (i = 1; i < 8; i++)
+    {
+      if (hd->Sigma[i-1] != M[i-1])
+       cf = (hd->Sigma[i-1] < M[i-1]);
+      hd->Sigma[i] += M[i] + cf;
+    }
+}
+
+static unsigned int
+transform_blk (void *context, const unsigned char *inbuf_arg)
+{
+  STRIBOG_CONTEXT *hd = context;
+
+  transform_bits (hd, inbuf_arg, 64 * 8);
+
+  return /* burn_stack */ 768;
+}
+
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
+{
+  unsigned int burn;
+
+  do
+    {
+      burn = transform_blk (c, data);
+      data += 64;
+    }
+  while (--nblks);
+
+  return burn;
+}
+
+/*
+   The routine finally terminates the computation and returns the
+   digest.  The handle is prepared for a new cycle, but adding bytes
+   to the handle will the destroy the returned buffer.  Returns: 32
+   bytes with the message the digest.  */
+static void
+stribog_final (void *context)
+{
+  STRIBOG_CONTEXT *hd = context;
+  u64 Z[8] = {};
+  int i;
+
+  /* PAD. It does not count towards message length */
+  i = hd->bctx.count;
+  /* After flush we have at least one byte free) */
+  hd->bctx.buf[i++] = 1;
+  if (i < 64)
+    memset (&hd->bctx.buf[i], 0, 64 - i);
+  i = 64;
+  transform_bits (hd, hd->bctx.buf, hd->bctx.count * 8);
+
+  g (hd->h, hd->N, Z);
+  g (hd->h, hd->Sigma, Z);
+
+  for (i = 0; i < 8; i++)
+    hd->h[i] = le_bswap64(hd->h[i]);
+
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (768);
+}
+
+static byte *
+stribog_read_512 (void *context)
+{
+  STRIBOG_CONTEXT *hd = context;
+
+  return hd->result;
+}
+
+static byte *
+stribog_read_256 (void *context)
+{
+  STRIBOG_CONTEXT *hd = context;
+
+  return hd->result + 32;
+}
+
+static const gcry_md_oid_spec_t oid_spec_stribog256[] =
+  {
+    /* id-tc26-signwithdigest-gost3410-12-256 */
+    { "1.2.643.7.1.1.3.2" },
+    /* id-tc26-gost3411-12-256 */
+    { "1.2.643.7.1.1.2.2" },
+    { NULL },
+  };
+
+static const gcry_md_oid_spec_t oid_spec_stribog512[] =
+  {
+    /* id-tc26-signwithdigest-gost3410-12-512 */
+    { "1.2.643.7.1.1.3.3" },
+    /* id-tc26-gost3411-12-512 */
+    { "1.2.643.7.1.1.2.3" },
+    { NULL },
+  };
+
+const gcry_md_spec_t _gcry_digest_spec_stribog_256 =
+  {
+    GCRY_MD_STRIBOG256, {0, 0},
+    "STRIBOG256", NULL, 0, oid_spec_stribog256, 32,
+    stribog_init_256, _gcry_md_block_write, stribog_final, stribog_read_256,
+    NULL, NULL,
+    sizeof (STRIBOG_CONTEXT)
+  };
+
+const gcry_md_spec_t _gcry_digest_spec_stribog_512 =
+  {
+    GCRY_MD_STRIBOG512, {0, 0},
+    "STRIBOG512", NULL, 0, oid_spec_stribog512, 64,
+    stribog_init_512, _gcry_md_block_write, stribog_final, stribog_read_512,
+    NULL, NULL,
+    sizeof (STRIBOG_CONTEXT)
+  };
diff --git a/grub-core/lib/libgcrypt/cipher/test-getrusage.c 
b/grub-core/lib/libgcrypt/cipher/test-getrusage.c
deleted file mode 100644
index 978cf2de9..000000000
--- a/grub-core/lib/libgcrypt/cipher/test-getrusage.c
+++ /dev/null
@@ -1,105 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/resource.h>
-
-int
-main (int argc, char **argv)
-{
-  struct rusage buf;
-
-  if (argc > 1)
-    {
-      system (argv[1]);
-
-      if (getrusage (RUSAGE_CHILDREN, &buf ))
-        {
-          perror ("getrusage");
-          return 1;
-        }
-    }
-  else
-    {
-      if (getrusage (RUSAGE_SELF, &buf ))
-        {
-          perror ("getrusage");
-          return 1;
-        }
-    }
-
-  printf ("ru_utime   = %ld.%06ld\n",
-          buf.ru_utime.tv_sec, buf.ru_utime.tv_usec);
-  printf ("ru_stime   = %ld.%06ld\n",
-          buf.ru_stime.tv_sec, buf.ru_stime.tv_usec);
-  printf ("ru_maxrss  = %ld\n", buf.ru_maxrss   );
-  printf ("ru_ixrss   = %ld\n", buf.ru_ixrss    );
-  printf ("ru_idrss   = %ld\n", buf.ru_idrss    );
-  printf ("ru_isrss   = %ld\n", buf.ru_isrss    );
-  printf ("ru_minflt  = %ld\n", buf.ru_minflt   );
-  printf ("ru_majflt  = %ld\n", buf.ru_majflt   );
-  printf ("ru_nswap   = %ld\n", buf.ru_nswap    );
-  printf ("ru_inblock = %ld\n", buf.ru_inblock  );
-  printf ("ru_oublock = %ld\n", buf.ru_oublock  );
-  printf ("ru_msgsnd  = %ld\n", buf.ru_msgsnd   );
-  printf ("ru_msgrcv  = %ld\n", buf.ru_msgrcv   );
-  printf ("ru_nsignals= %ld\n", buf.ru_nsignals );
-  printf ("ru_nvcsw   = %ld\n", buf.ru_nvcsw    );
-  printf ("ru_nivcsw  = %ld\n", buf.ru_nivcsw   );
-
-  fprintf (stderr, "ru_utime   ru_stime   ru_minflt  ru_nccsw  ru_nivcsw\n");
-  fprintf (stderr, "%ld.%06ld  %ld.%06ld  %5ld       %5ld      %5ld\n");
-
-
-  return 0;
-}
-
-
-/* Codesnippet for debugging in random.c. */
-#if 0
-static void
-collect_rusage_stats (struct rusage *rb)
-{
-  static int idx;
-  static struct rusage buf[100];
-
-  if (!rb)
-    {
-      int i;
-
-      fprintf (stderr, "ru_utime   ru_stime   ru_minflt  ru_nvcsw  
ru_nivcsw\n");
-      for (i=0; i < idx; i++)
-        fprintf (stderr, "%ld.%06ld   %ld.%06ld %5ld       %5ld      %5ld\n",
-                 buf[i].ru_utime.tv_sec, buf[i].ru_utime.tv_usec,
-                 buf[i].ru_stime.tv_sec, buf[i].ru_stime.tv_usec,
-                 buf[i].ru_minflt,
-                 buf[i].ru_nvcsw,
-                 buf[i].ru_nivcsw);
-    }
-  else if (idx < DIM(buf))
-    {
-      buf[idx++] = *rb;
-    }
-}
-#endif
-/*
- void
- _gcry_random_dump_stats()
- {
-@@ -233,8 +261,11 @@
-                  rndstats.naddbytes, rndstats.addbytes,
-        rndstats.mixkey, rndstats.ngetbytes1, rndstats.getbytes1,
-                    rndstats.ngetbytes2, rndstats.getbytes2 );
-+
-+    collect_rusage_stats (NULL);
- }
-
-========
-
-     getrusage (RUSAGE_SELF, &buf );
-+    collect_rusage_stats (&buf);
-     add_randomness( &buf, sizeof buf, 1 );
-     memset( &buf, 0, sizeof buf );
-   }
-
-*/
-
-
diff --git a/grub-core/lib/libgcrypt/cipher/tiger.c 
b/grub-core/lib/libgcrypt/cipher/tiger.c
index d4ad514a9..ae55359ca 100644
--- a/grub-core/lib/libgcrypt/cipher/tiger.c
+++ b/grub-core/lib/libgcrypt/cipher/tiger.c
@@ -27,16 +27,14 @@
 
 #include "g10lib.h"
 #include "cipher.h"
-
-/* We really need a 64 bit type for this code.  */
-#ifdef HAVE_U64_TYPEDEF
+#include "hash-common.h"
+#include "bithelp.h"
+#include "bufhelp.h"
 
 typedef struct
 {
+  gcry_md_block_ctx_t bctx;
   u64  a, b, c;
-  byte buf[64];
-  int  count;
-  u32  nblocks;
   int  variant;  /* 0 = old code, 1 = fixed code, 2 - TIGER2.  */
 } TIGER_CONTEXT;
 
@@ -588,6 +586,9 @@ static u64 sbox4[256] = {
   U64_C(0xc83223f1720aef96) /* 1022 */, U64_C(0xc3a0396f7363a51f) /* 1023 */
 };
 
+static unsigned int
+transform ( void *ctx, const unsigned char *data, size_t nblks );
+
 static void
 do_init (void *context, int variant)
 {
@@ -596,130 +597,103 @@ do_init (void *context, int variant)
   hd->a = 0x0123456789abcdefLL;
   hd->b = 0xfedcba9876543210LL;
   hd->c = 0xf096a5b4c3b2e187LL;
-  hd->nblocks = 0;
-  hd->count = 0;
+
+  hd->bctx.nblocks = 0;
+  hd->bctx.nblocks_high = 0;
+  hd->bctx.count = 0;
+  hd->bctx.blocksize_shift = _gcry_ctz(64);
+  hd->bctx.bwrite = transform;
   hd->variant = variant;
 }
 
 static void
-tiger_init (void *context)
+tiger_init (void *context, unsigned int flags)
 {
+  (void)flags;
+
   do_init (context, 0);
 }
 
 static void
-tiger1_init (void *context)
+tiger1_init (void *context, unsigned int flags)
 {
-  do_init (context, 1);
-}
+  (void)flags;
 
-static void
-tiger2_init (void *context)
-{
-  do_init (context, 2);
+  do_init (context, 1);
 }
 
 static void
-tiger_round( u64 *ra, u64 *rb, u64 *rc, u64 x, int mul )
+tiger2_init (void *context, unsigned int flags)
 {
-  u64 a = *ra;
-  u64 b = *rb;
-  u64 c = *rc;
-
-  c ^= x;
-  a -= (  sbox1[  c        & 0xff ] ^ sbox2[ (c >> 16) & 0xff ]
-        ^ sbox3[ (c >> 32) & 0xff ] ^ sbox4[ (c >> 48) & 0xff ]);
-  b += (  sbox4[ (c >>  8) & 0xff ] ^ sbox3[ (c >> 24) & 0xff ]
-        ^ sbox2[ (c >> 40) & 0xff ] ^ sbox1[ (c >> 56) & 0xff ]);
-  b *= mul;
-
-  *ra = a;
-  *rb = b;
-  *rc = c;
-}
+  (void)flags;
 
-
-static void
-pass( u64 *ra, u64 *rb, u64 *rc, u64 *x, int mul )
-{
-  u64 a = *ra;
-  u64 b = *rb;
-  u64 c = *rc;
-
-  tiger_round( &a, &b, &c, x[0], mul );
-  tiger_round( &b, &c, &a, x[1], mul );
-  tiger_round( &c, &a, &b, x[2], mul );
-  tiger_round( &a, &b, &c, x[3], mul );
-  tiger_round( &b, &c, &a, x[4], mul );
-  tiger_round( &c, &a, &b, x[5], mul );
-  tiger_round( &a, &b, &c, x[6], mul );
-  tiger_round( &b, &c, &a, x[7], mul );
-
-  *ra = a;
-  *rb = b;
-  *rc = c;
+  do_init (context, 2);
 }
 
 
-static void
-key_schedule( u64 *x )
-{
-  x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL;
-  x[1] ^= x[0];
-  x[2] += x[1];
-  x[3] -= x[2] ^ ((~x[1]) << 19 );
-  x[4] ^= x[3];
-  x[5] += x[4];
-  x[6] -= x[5] ^ ((~x[4]) >> 23 );
-  x[7] ^= x[6];
-  x[0] += x[7];
-  x[1] -= x[0] ^ ((~x[7]) << 19 );
-  x[2] ^= x[1];
-  x[3] += x[2];
-  x[4] -= x[3] ^ ((~x[2]) >> 23 );
-  x[5] ^= x[4];
-  x[6] += x[5];
-  x[7] -= x[6] ^ 0x0123456789abcdefLL;
-}
+#define tiger_round(xa, xb, xc, xx, xmul) { \
+  xc ^= xx; \
+  xa -= (  sbox1[  (xc)        & 0xff ] ^ sbox2[ ((xc) >> 16) & 0xff ] \
+         ^ sbox3[ ((xc) >> 32) & 0xff ] ^ sbox4[ ((xc) >> 48) & 0xff ]); \
+  xb += (  sbox4[ ((xc) >>  8) & 0xff ] ^ sbox3[ ((xc) >> 24) & 0xff ] \
+         ^ sbox2[ ((xc) >> 40) & 0xff ] ^ sbox1[ ((xc) >> 56) & 0xff ]); \
+  xb *= xmul; }
+
+
+#define pass(ya, yb, yc, yx, ymul) { \
+  tiger_round( ya, yb, yc, yx[0], ymul ); \
+  tiger_round( yb, yc, ya, yx[1], ymul ); \
+  tiger_round( yc, ya, yb, yx[2], ymul ); \
+  tiger_round( ya, yb, yc, yx[3], ymul ); \
+  tiger_round( yb, yc, ya, yx[4], ymul ); \
+  tiger_round( yc, ya, yb, yx[5], ymul ); \
+  tiger_round( ya, yb, yc, yx[6], ymul ); \
+  tiger_round( yb, yc, ya, yx[7], ymul ); }
+
+
+#define key_schedule(x) { \
+  x[0] -= x[7] ^ 0xa5a5a5a5a5a5a5a5LL; \
+  x[1] ^= x[0]; \
+  x[2] += x[1]; \
+  x[3] -= x[2] ^ ((~x[1]) << 19 ); \
+  x[4] ^= x[3]; \
+  x[5] += x[4]; \
+  x[6] -= x[5] ^ ((~x[4]) >> 23 ); \
+  x[7] ^= x[6]; \
+  x[0] += x[7]; \
+  x[1] -= x[0] ^ ((~x[7]) << 19 ); \
+  x[2] ^= x[1]; \
+  x[3] += x[2]; \
+  x[4] -= x[3] ^ ((~x[2]) >> 23 ); \
+  x[5] ^= x[4]; \
+  x[6] += x[5]; \
+  x[7] -= x[6] ^ 0x0123456789abcdefLL; }
 
 
 /****************
  * Transform the message DATA which consists of 512 bytes (8 words)
  */
-static void
-transform ( TIGER_CONTEXT *hd, const unsigned char *data )
+static unsigned int
+transform_blk ( void *ctx, const unsigned char *data )
 {
+  TIGER_CONTEXT *hd = ctx;
   u64 a,b,c,aa,bb,cc;
   u64 x[8];
-#ifdef WORDS_BIGENDIAN
-#define MKWORD(d,n) \
-               (  ((u64)(d)[8*(n)+7]) << 56 | ((u64)(d)[8*(n)+6]) << 48  \
-                | ((u64)(d)[8*(n)+5]) << 40 | ((u64)(d)[8*(n)+4]) << 32  \
-                | ((u64)(d)[8*(n)+3]) << 24 | ((u64)(d)[8*(n)+2]) << 16  \
-                | ((u64)(d)[8*(n)+1]) << 8  | ((u64)(d)[8*(n)  ])       )
-  x[0] = MKWORD(data, 0);
-  x[1] = MKWORD(data, 1);
-  x[2] = MKWORD(data, 2);
-  x[3] = MKWORD(data, 3);
-  x[4] = MKWORD(data, 4);
-  x[5] = MKWORD(data, 5);
-  x[6] = MKWORD(data, 6);
-  x[7] = MKWORD(data, 7);
-#undef MKWORD
-#else
-  memcpy( &x[0], data, 64 );
-#endif
+  int i;
+
+  for ( i = 0; i < 8; i++ )
+    x[i] = buf_get_le64(data + i * 8);
 
   /* save */
   a = aa = hd->a;
   b = bb = hd->b;
   c = cc = hd->c;
 
-  pass( &a, &b, &c, x, 5);
+  pass( a, b, c, x, 5);
   key_schedule( x );
-  pass( &c, &a, &b, x, 7);
+  pass( c, a, b, x, 7);
   key_schedule( x );
-  pass( &b, &c, &a, x, 9);
+  pass( b, c, a, x, 9);
 
   /* feedforward */
   a ^= aa;
@@ -729,48 +703,24 @@ transform ( TIGER_CONTEXT *hd, const unsigned char *data )
   hd->a = a;
   hd->b = b;
   hd->c = c;
-}
 
+  return /*burn_stack*/ 21*8+11*sizeof(void*);
+}
 
 
-/* Update the message digest with the contents
- * of INBUF with length INLEN.
- */
-static void
-tiger_write ( void *context, const void *inbuf_arg, size_t inlen)
+static unsigned int
+transform ( void *c, const unsigned char *data, size_t nblks )
 {
-  const unsigned char *inbuf = inbuf_arg;
-  TIGER_CONTEXT *hd = context;
+  unsigned int burn;
 
-  if( hd->count == 64 ) /* flush the buffer */
+  do
     {
-      transform( hd, hd->buf );
-      _gcry_burn_stack (21*8+11*sizeof(void*));
-      hd->count = 0;
-      hd->nblocks++;
-    }
-  if( !inbuf )
-    return;
-  if( hd->count )
-    {
-      for( ; inlen && hd->count < 64; inlen-- )
-        hd->buf[hd->count++] = *inbuf++;
-      tiger_write( hd, NULL, 0 );
-      if( !inlen )
-        return;
+      burn = transform_blk (c, data);
+      data += 64;
     }
+  while (--nblks);
 
-  while( inlen >= 64 )
-    {
-      transform( hd, inbuf );
-      hd->count = 0;
-      hd->nblocks++;
-      inlen -= 64;
-      inbuf += 64;
-    }
-  _gcry_burn_stack (21*8+11*sizeof(void*));
-  for( ; inlen && hd->count < 64; inlen-- )
-    hd->buf[hd->count++] = *inbuf++;
+  return burn;
 }
 
 
@@ -781,19 +731,23 @@ static void
 tiger_final( void *context )
 {
   TIGER_CONTEXT *hd = context;
-  u32 t, msb, lsb;
+  u32 t, th, msb, lsb;
   byte *p;
+  unsigned int burn;
   byte pad = hd->variant == 2? 0x80 : 0x01;
 
-  tiger_write(hd, NULL, 0); /* flush */;
+  t = hd->bctx.nblocks;
+  if (sizeof t == sizeof hd->bctx.nblocks)
+    th = hd->bctx.nblocks_high;
+  else
+    th = hd->bctx.nblocks >> 32;
 
-  t = hd->nblocks;
   /* multiply by 64 to make a byte count */
   lsb = t << 6;
-  msb = t >> 26;
+  msb = (th << 6) | (t >> 26);
   /* add the count */
   t = lsb;
-  if( (lsb += hd->count) < t )
+  if( (lsb += hd->bctx.count) < t )
     msb++;
   /* multiply by 8 to make a bit count */
   t = lsb;
@@ -801,45 +755,32 @@ tiger_final( void *context )
   msb <<= 3;
   msb |= t >> 29;
 
-  if( hd->count < 56 )  /* enough room */
+  if( hd->bctx.count < 56 )  /* enough room */
     {
-      hd->buf[hd->count++] = pad;
-      while( hd->count < 56 )
-        hd->buf[hd->count++] = 0;  /* pad */
+      hd->bctx.buf[hd->bctx.count++] = pad;
+      if (hd->bctx.count < 56)
+       memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 60, msb);
+      burn = transform( hd, hd->bctx.buf, 1 );
     }
   else  /* need one extra block */
     {
-      hd->buf[hd->count++] = pad; /* pad character */
-      while( hd->count < 64 )
-        hd->buf[hd->count++] = 0;
-      tiger_write(hd, NULL, 0);  /* flush */;
-      memset(hd->buf, 0, 56 ); /* fill next block with zeroes */
+      hd->bctx.buf[hd->bctx.count++] = pad; /* pad character */
+      /* fill pad and next block with zeroes */
+      memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56);
+
+      /* append the 64 bit count */
+      buf_put_le32(hd->bctx.buf + 64 + 56, lsb);
+      buf_put_le32(hd->bctx.buf + 64 + 60, msb);
+      burn = transform( hd, hd->bctx.buf, 2 );
     }
-  /* append the 64 bit count */
-  hd->buf[56] = lsb       ;
-  hd->buf[57] = lsb >>  8;
-  hd->buf[58] = lsb >> 16;
-  hd->buf[59] = lsb >> 24;
-  hd->buf[60] = msb       ;
-  hd->buf[61] = msb >>  8;
-  hd->buf[62] = msb >> 16;
-  hd->buf[63] = msb >> 24;
-  transform( hd, hd->buf );
-  _gcry_burn_stack (21*8+11*sizeof(void*));
-
-  p = hd->buf;
-#ifdef WORDS_BIGENDIAN
-#define X(a) do { *(u64*)p = hd->a ; p += 8; } while(0)
-#else /* little endian */
-#define X(a) do { *p++ = hd->a >> 56; *p++ = hd->a >> 48; \
-                 *p++ = hd->a >> 40; *p++ = hd->a >> 32; \
-                 *p++ = hd->a >> 24; *p++ = hd->a >> 16; \
-                 *p++ = hd->a >>  8; *p++ = hd->a;       } while(0)
-#endif
-#define Y(a) do { *p++ = hd->a      ; *p++ = hd->a >> 8;  \
-                 *p++ = hd->a >> 16; *p++ = hd->a >> 24; \
-                 *p++ = hd->a >> 32; *p++ = hd->a >> 40; \
-                 *p++ = hd->a >> 48; *p++ = hd->a >> 56; } while(0)
+
+  p = hd->bctx.buf;
+#define X(a) do { buf_put_be64(p, hd->a); p += 8; } while(0)
+#define Y(a) do { buf_put_le64(p, hd->a); p += 8; } while(0)
   if (hd->variant == 0)
     {
       X(a);
@@ -854,6 +795,10 @@ tiger_final( void *context )
     }
 #undef X
 #undef Y
+
+  hd->bctx.count = 0;
+
+  _gcry_burn_stack (burn);
 }
 
 static byte *
@@ -861,7 +806,7 @@ tiger_read( void *context )
 {
   TIGER_CONTEXT *hd = context;
 
-  return hd->buf;
+  return hd->bctx.buf;
 }
 
 
@@ -869,43 +814,47 @@ tiger_read( void *context )
 /* This is the old TIGER variant based on the unfixed reference
    implementation.  IT was used in GnupG up to 1.3.2.  We don't provide
    an OID anymore because that would not be correct.  */
-gcry_md_spec_t _gcry_digest_spec_tiger =
+const gcry_md_spec_t _gcry_digest_spec_tiger =
   {
+    GCRY_MD_TIGER, {0, 0},
     "TIGER192", NULL, 0, NULL, 24,
-    tiger_init, tiger_write, tiger_final, tiger_read,
+    tiger_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
+    NULL,
     sizeof (TIGER_CONTEXT)
   };
 
 
 
 /* This is the fixed TIGER implementation.  */
-static byte asn1[19] = /* Object ID is 1.3.6.1.4.1.11591.12.2 */
+static const byte asn1[19] = /* Object ID is 1.3.6.1.4.1.11591.12.2 */
   { 0x30, 0x29, 0x30, 0x0d, 0x06, 0x09, 0x2b, 0x06,
     0x01, 0x04, 0x01, 0xda, 0x47, 0x0c, 0x02,
     0x05, 0x00, 0x04, 0x18 };
 
-static gcry_md_oid_spec_t oid_spec_tiger1[] =
+static const gcry_md_oid_spec_t oid_spec_tiger1[] =
   {
     /* GNU.digestAlgorithm TIGER */
     { "1.3.6.1.4.1.11591.12.2" },
     { NULL }
   };
 
-gcry_md_spec_t _gcry_digest_spec_tiger1 =
+const gcry_md_spec_t _gcry_digest_spec_tiger1 =
   {
+    GCRY_MD_TIGER1, {0, 0},
     "TIGER", asn1, DIM (asn1), oid_spec_tiger1, 24,
-    tiger1_init, tiger_write, tiger_final, tiger_read,
+    tiger1_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
+    NULL,
     sizeof (TIGER_CONTEXT)
   };
 
 
 
 /* This is TIGER2 which usues a changed padding algorithm.  */
-gcry_md_spec_t _gcry_digest_spec_tiger2 =
+const gcry_md_spec_t _gcry_digest_spec_tiger2 =
   {
+    GCRY_MD_TIGER2, {0, 0},
     "TIGER2", NULL, 0, NULL, 24,
-    tiger2_init, tiger_write, tiger_final, tiger_read,
+    tiger2_init, _gcry_md_block_write, tiger_final, tiger_read, NULL,
+    NULL,
     sizeof (TIGER_CONTEXT)
   };
-
-#endif /* HAVE_U64_TYPEDEF */
diff --git a/grub-core/lib/libgcrypt/cipher/twofish-aarch64.S 
b/grub-core/lib/libgcrypt/cipher/twofish-aarch64.S
new file mode 100644
index 000000000..7941fe3ac
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/twofish-aarch64.S
@@ -0,0 +1,321 @@
+/* twofish-aarch64.S  -  ARMv8/AArch64 assembly implementation of Twofish 
cipher
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "asm-common-aarch64.h"
+
+#if defined(__AARCH64EL__)
+#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+
+.text
+
+/* structure of TWOFISH_context: */
+#define s0 0
+#define s1 ((s0) + 4 * 256)
+#define s2 ((s1) + 4 * 256)
+#define s3 ((s2) + 4 * 256)
+#define w  ((s3) + 4 * 256)
+#define k  ((w) + 4 * 8)
+
+/* register macros */
+#define CTX x0
+#define RDST x1
+#define RSRC x2
+#define CTXs0 CTX
+#define CTXs1 x3
+#define CTXs2 x4
+#define CTXs3 x5
+#define CTXw x17
+
+#define RA w6
+#define RB w7
+#define RC w8
+#define RD w9
+
+#define RX w10
+#define RY w11
+
+#define xRX x10
+#define xRY x11
+
+#define RMASK w12
+
+#define RT0 w13
+#define RT1 w14
+#define RT2 w15
+#define RT3 w16
+
+#define xRT0 x13
+#define xRT1 x14
+#define xRT2 x15
+#define xRT3 x16
+
+/* helper macros */
+#ifndef __AARCH64EL__
+       /* bswap on big-endian */
+       #define host_to_le(reg) \
+               rev reg, reg;
+       #define le_to_host(reg) \
+               rev reg, reg;
+#else
+       /* nop on little-endian */
+       #define host_to_le(reg) /*_*/
+       #define le_to_host(reg) /*_*/
+#endif
+
+#define ldr_input_aligned_le(rin, a, b, c, d) \
+       ldr a, [rin, #0]; \
+       ldr b, [rin, #4]; \
+       le_to_host(a); \
+       ldr c, [rin, #8]; \
+       le_to_host(b); \
+       ldr d, [rin, #12]; \
+       le_to_host(c); \
+       le_to_host(d);
+
+#define str_output_aligned_le(rout, a, b, c, d) \
+       le_to_host(a); \
+       le_to_host(b); \
+       str a, [rout, #0]; \
+       le_to_host(c); \
+       str b, [rout, #4]; \
+       le_to_host(d); \
+       str c, [rout, #8]; \
+       str d, [rout, #12];
+
+/* unaligned word reads/writes allowed */
+#define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \
+       ldr_input_aligned_le(rin, ra, rb, rc, rd)
+
+#define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+       str_output_aligned_le(rout, ra, rb, rc, rd)
+
+/**********************************************************************
+  1-way twofish
+ **********************************************************************/
+#define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \
+       and RT0, RMASK, b, lsr#(8 - 2); \
+       and RY, RMASK, b, lsr#(16 - 2); \
+       and RT1, RMASK, b, lsr#(24 - 2); \
+       ldr RY, [CTXs3, xRY]; \
+       and RT2, RMASK, b, lsl#(2); \
+       ldr RT0, [CTXs2, xRT0]; \
+       and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \
+       ldr RT1, [CTXs0, xRT1]; \
+       and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \
+       ldr RT2, [CTXs1, xRT2]; \
+       ldr RX, [CTXs1, xRX]; \
+       ror_a(a); \
+       \
+       eor RY, RY, RT0; \
+       ldr RT3, [CTXs2, xRT3]; \
+       and RT0, RMASK, a, lsl#(2); \
+       eor RY, RY, RT1; \
+       and RT1, RMASK, a, lsr#(24 - 2); \
+       eor RY, RY, RT2; \
+       ldr RT0, [CTXs0, xRT0]; \
+       eor RX, RX, RT3; \
+       ldr RT1, [CTXs3, xRT1]; \
+       eor RX, RX, RT0; \
+       \
+       ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+       eor RX, RX, RT1; \
+       ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+       \
+       add RT0, RX, RY, lsl #1; \
+       add RX, RX, RY; \
+       add RT0, RT0, RT3; \
+       add RX, RX, RT2; \
+       eor rd, RT0, rd, ror #31; \
+       eor rc, rc, RX;
+
+#define dummy(x) /*_*/
+
+#define ror1(r) \
+       ror r, r, #1;
+
+#define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \
+       and RT3, RMASK, b, lsl#(2 - (adj_b)); \
+       and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \
+       ror_b(b); \
+       and RT2, RMASK, a, lsl#(2); \
+       and RT0, RMASK, a, lsr#(8 - 2); \
+       \
+       ldr RY, [CTXs1, xRT3]; \
+       ldr RX, [CTXs0, xRT2]; \
+       and RT3, RMASK, b, lsr#(16 - 2); \
+       ldr RT1, [CTXs2, xRT1]; \
+       and RT2, RMASK, a, lsr#(16 - 2); \
+       ldr RT0, [CTXs1, xRT0]; \
+       \
+       ldr RT3, [CTXs3, xRT3]; \
+       eor RY, RY, RT1; \
+       \
+       and RT1, RMASK, b, lsr#(24 - 2); \
+       eor RX, RX, RT0; \
+       ldr RT2, [CTXs2, xRT2]; \
+       and RT0, RMASK, a, lsr#(24 - 2); \
+       \
+       ldr RT1, [CTXs0, xRT1]; \
+       \
+       eor RY, RY, RT3; \
+       ldr RT0, [CTXs3, xRT0]; \
+       eor RX, RX, RT2; \
+       eor RY, RY, RT1; \
+       \
+       ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+       eor RX, RX, RT0; \
+       ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+       \
+       add RT0, RX, RY, lsl #1; \
+       add RX, RX, RY; \
+       add RT0, RT0, RT1; \
+       add RX, RX, RT2; \
+       eor rd, rd, RT0; \
+       eor rc, RX, rc, ror #31;
+
+#define first_encrypt_cycle(nc) \
+       encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \
+       encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define encrypt_cycle(nc) \
+       encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+       encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define last_encrypt_cycle(nc) \
+       encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+       encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+       ror1(RA);
+
+#define first_decrypt_cycle(nc) \
+       decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \
+       decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define decrypt_cycle(nc) \
+       decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+       decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define last_decrypt_cycle(nc) \
+       decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+       decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+       ror1(RD);
+
+.globl _gcry_twofish_arm_encrypt_block
+ELF(.type   _gcry_twofish_arm_encrypt_block,%function;)
+
+_gcry_twofish_arm_encrypt_block:
+       /* input:
+        *      x0: ctx
+        *      x1: dst
+        *      x2: src
+        */
+       CFI_STARTPROC();
+
+       add CTXw, CTX, #(w);
+
+       ldr_input_le(RSRC, RA, RB, RC, RD, RT0);
+
+       /* Input whitening */
+       ldp RT0, RT1, [CTXw, #(0*8)];
+       ldp RT2, RT3, [CTXw, #(1*8)];
+       add CTXs3, CTX, #(s3);
+       add CTXs2, CTX, #(s2);
+       add CTXs1, CTX, #(s1);
+       mov RMASK, #(0xff << 2);
+       eor RA, RA, RT0;
+       eor RB, RB, RT1;
+       eor RC, RC, RT2;
+       eor RD, RD, RT3;
+
+       first_encrypt_cycle(0);
+       encrypt_cycle(1);
+       encrypt_cycle(2);
+       encrypt_cycle(3);
+       encrypt_cycle(4);
+       encrypt_cycle(5);
+       encrypt_cycle(6);
+       last_encrypt_cycle(7);
+
+       /* Output whitening */
+       ldp RT0, RT1, [CTXw, #(2*8)];
+       ldp RT2, RT3, [CTXw, #(3*8)];
+       eor RC, RC, RT0;
+       eor RD, RD, RT1;
+       eor RA, RA, RT2;
+       eor RB, RB, RT3;
+
+       str_output_le(RDST, RC, RD, RA, RB, RT0, RT1);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+.ltorg
+ELF(.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;)
+
+.globl _gcry_twofish_arm_decrypt_block
+ELF(.type   _gcry_twofish_arm_decrypt_block,%function;)
+
+_gcry_twofish_arm_decrypt_block:
+       /* input:
+        *      %r0: ctx
+        *      %r1: dst
+        *      %r2: src
+        */
+       CFI_STARTPROC();
+
+       add CTXw, CTX, #(w);
+
+       ldr_input_le(RSRC, RC, RD, RA, RB, RT0);
+
+       /* Input whitening */
+       ldp RT0, RT1, [CTXw, #(2*8)];
+       ldp RT2, RT3, [CTXw, #(3*8)];
+       add CTXs3, CTX, #(s3);
+       add CTXs2, CTX, #(s2);
+       add CTXs1, CTX, #(s1);
+       mov RMASK, #(0xff << 2);
+       eor RC, RC, RT0;
+       eor RD, RD, RT1;
+       eor RA, RA, RT2;
+       eor RB, RB, RT3;
+
+       first_decrypt_cycle(7);
+       decrypt_cycle(6);
+       decrypt_cycle(5);
+       decrypt_cycle(4);
+       decrypt_cycle(3);
+       decrypt_cycle(2);
+       decrypt_cycle(1);
+       last_decrypt_cycle(0);
+
+       /* Output whitening */
+       ldp RT0, RT1, [CTXw, #(0*8)];
+       ldp RT2, RT3, [CTXw, #(1*8)];
+       eor RA, RA, RT0;
+       eor RB, RB, RT1;
+       eor RC, RC, RT2;
+       eor RD, RD, RT3;
+
+       str_output_le(RDST, RA, RB, RC, RD, RT0, RT1);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;)
+
+#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/
+#endif /*__AARCH64EL__*/
diff --git a/grub-core/lib/libgcrypt/cipher/twofish-amd64.S 
b/grub-core/lib/libgcrypt/cipher/twofish-amd64.S
new file mode 100644
index 000000000..a7a605533
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/twofish-amd64.S
@@ -0,0 +1,1184 @@
+/* twofish-amd64.S  -  AMD64 assembly implementation of Twofish cipher
+ *
+ * Copyright (C) 2013-2015 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* structure of TWOFISH_context: */
+#define s0 0
+#define s1 ((s0) + 4 * 256)
+#define s2 ((s1) + 4 * 256)
+#define s3 ((s2) + 4 * 256)
+#define w  ((s3) + 4 * 256)
+#define k  ((w) + 4 * 8)
+
+/* register macros */
+#define CTX    %rdi
+
+#define RA     %rax
+#define RB     %rbx
+#define RC     %rcx
+#define RD     %rdx
+
+#define RAd    %eax
+#define RBd    %ebx
+#define RCd    %ecx
+#define RDd    %edx
+
+#define RAbl   %al
+#define RBbl   %bl
+#define RCbl   %cl
+#define RDbl   %dl
+
+#define RAbh   %ah
+#define RBbh   %bh
+#define RCbh   %ch
+#define RDbh   %dh
+
+#define RX     %r8
+#define RY     %r9
+
+#define RXd    %r8d
+#define RYd    %r9d
+
+#define RT0    %rsi
+#define RT1    %rbp
+#define RT2    %r10
+#define RT3    %r11
+
+#define RT0d   %esi
+#define RT1d   %ebp
+#define RT2d   %r10d
+#define RT3d   %r11d
+
+/***********************************************************************
+ * AMD64 assembly implementation of the Twofish cipher
+ ***********************************************************************/
+#define enc_g1_2(a, b, x, y) \
+       movzbl b ## bl, RT3d; \
+       movzbl b ## bh, RT1d; \
+       movzbl a ## bl, RT2d; \
+       movzbl a ## bh, RT0d; \
+       rorl $16, b ## d; \
+       rorl $16, a ## d; \
+       movl s1(CTX, RT3, 4), RYd; \
+       movzbl b ## bl, RT3d; \
+       movl s0(CTX, RT2, 4), RXd; \
+       movzbl a ## bl, RT2d; \
+       xorl s2(CTX, RT1, 4), RYd; \
+       movzbl b ## bh, RT1d; \
+       xorl s1(CTX, RT0, 4), RXd; \
+       movzbl a ## bh, RT0d; \
+       rorl $16, b ## d; \
+       rorl $16, a ## d; \
+       xorl s3(CTX, RT3, 4), RYd; \
+       xorl s2(CTX, RT2, 4), RXd; \
+       xorl s0(CTX, RT1, 4), RYd; \
+       xorl s3(CTX, RT0, 4), RXd;
+
+#define dec_g1_2(a, b, x, y) \
+       movzbl a ## bl, RT2d; \
+       movzbl a ## bh, RT0d; \
+       movzbl b ## bl, RT3d; \
+       movzbl b ## bh, RT1d; \
+       rorl $16, a ## d; \
+       rorl $16, b ## d; \
+       movl s0(CTX, RT2, 4), RXd; \
+       movzbl a ## bl, RT2d; \
+       movl s1(CTX, RT3, 4), RYd; \
+       movzbl b ## bl, RT3d; \
+       xorl s1(CTX, RT0, 4), RXd; \
+       movzbl a ## bh, RT0d; \
+       xorl s2(CTX, RT1, 4), RYd; \
+       movzbl b ## bh, RT1d; \
+       rorl $16, a ## d; \
+       rorl $16, b ## d; \
+       xorl s2(CTX, RT2, 4), RXd; \
+       xorl s3(CTX, RT3, 4), RYd; \
+       xorl s3(CTX, RT0, 4), RXd; \
+       xorl s0(CTX, RT1, 4), RYd;
+
+#define encrypt_round(ra, rb, rc, rd, n) \
+       enc_g1_2(##ra, ##rb, RX, RY); \
+       \
+       leal (RXd, RYd, 2), RT0d; \
+       addl RYd, RXd; \
+       addl (k + 8 * (n) + 4)(CTX), RT0d; \
+       roll $1, rd ## d; \
+       addl (k + 8 * (n))(CTX), RXd; \
+       xorl RT0d, rd ## d; \
+       xorl RXd, rc ## d; \
+       rorl $1, rc ## d;
+
+#define decrypt_round(ra, rb, rc, rd, n) \
+       dec_g1_2(##ra, ##rb, RX, RY); \
+       \
+       leal (RXd, RYd, 2), RT0d; \
+       addl RYd, RXd; \
+       addl (k + 8 * (n) + 4)(CTX), RT0d; \
+       roll $1, rc ## d; \
+       addl (k + 8 * (n))(CTX), RXd; \
+       xorl RXd, rc ## d; \
+       xorl RT0d, rd ## d; \
+       rorl $1, rd ## d;
+
+#define encrypt_cycle(a, b, c, d, nc) \
+       encrypt_round(##a, ##b, ##c, ##d, (nc) * 2); \
+       encrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1);
+
+#define decrypt_cycle(a, b, c, d, nc) \
+       decrypt_round(##c, ##d, ##a, ##b, (nc) * 2 + 1); \
+       decrypt_round(##a, ##b, ##c, ##d, (nc) * 2);
+
+#define inpack(in, n, x, m) \
+       movl (4 * (n))(in), x; \
+       xorl (w + 4 * (m))(CTX), x;
+
+#define outunpack(out, n, x, m) \
+       xorl (w + 4 * (m))(CTX), x; \
+       movl x, (4 * (n))(out);
+
+.align 8
+.globl _gcry_twofish_amd64_encrypt_block
+ELF(.type   _gcry_twofish_amd64_encrypt_block,@function;)
+
+_gcry_twofish_amd64_encrypt_block:
+       /* input:
+        *      %rdi: context, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       subq $(3 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(3 * 8);
+       movq %rsi, (0 * 8)(%rsp);
+       movq %rbp, (1 * 8)(%rsp);
+       movq %rbx, (2 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 1 * 8);
+       CFI_REL_OFFSET(%rbx, 2 * 8);
+
+       movq %rdx, RX;
+       inpack(RX, 0, RAd, 0);
+       inpack(RX, 1, RBd, 1);
+       inpack(RX, 2, RCd, 2);
+       inpack(RX, 3, RDd, 3);
+
+       encrypt_cycle(RA, RB, RC, RD, 0);
+       encrypt_cycle(RA, RB, RC, RD, 1);
+       encrypt_cycle(RA, RB, RC, RD, 2);
+       encrypt_cycle(RA, RB, RC, RD, 3);
+       encrypt_cycle(RA, RB, RC, RD, 4);
+       encrypt_cycle(RA, RB, RC, RD, 5);
+       encrypt_cycle(RA, RB, RC, RD, 6);
+       encrypt_cycle(RA, RB, RC, RD, 7);
+
+       movq (0 * 8)(%rsp), RX; /*dst*/
+       outunpack(RX, 0, RCd, 4);
+       outunpack(RX, 1, RDd, 5);
+       outunpack(RX, 2, RAd, 6);
+       outunpack(RX, 3, RBd, 7);
+
+       movq (2 * 8)(%rsp), %rbx;
+       movq (1 * 8)(%rsp), %rbp;
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%rbp);
+       addq $(3 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-3 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size 
_gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
+
+.align 8
+.globl _gcry_twofish_amd64_decrypt_block
+ELF(.type   _gcry_twofish_amd64_decrypt_block,@function;)
+
+_gcry_twofish_amd64_decrypt_block:
+       /* input:
+        *      %rdi: context, CTX
+        *      %rsi: dst
+        *      %rdx: src
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       subq $(3 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(3 * 8);
+       movq %rsi, (0 * 8)(%rsp);
+       movq %rbp, (1 * 8)(%rsp);
+       movq %rbx, (2 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 1 * 8);
+       CFI_REL_OFFSET(%rbx, 2 * 8);
+
+       movq %rdx, RX;
+       inpack(RX, 0, RCd, 4);
+       inpack(RX, 1, RDd, 5);
+       inpack(RX, 2, RAd, 6);
+       inpack(RX, 3, RBd, 7);
+
+       decrypt_cycle(RA, RB, RC, RD, 7);
+       decrypt_cycle(RA, RB, RC, RD, 6);
+       decrypt_cycle(RA, RB, RC, RD, 5);
+       decrypt_cycle(RA, RB, RC, RD, 4);
+       decrypt_cycle(RA, RB, RC, RD, 3);
+       decrypt_cycle(RA, RB, RC, RD, 2);
+       decrypt_cycle(RA, RB, RC, RD, 1);
+       decrypt_cycle(RA, RB, RC, RD, 0);
+
+       movq (0 * 8)(%rsp), RX; /*dst*/
+       outunpack(RX, 0, RAd, 0);
+       outunpack(RX, 1, RBd, 1);
+       outunpack(RX, 2, RCd, 2);
+       outunpack(RX, 3, RDd, 3);
+
+       movq (2 * 8)(%rsp), %rbx;
+       movq (1 * 8)(%rsp), %rbp;
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%rbp);
+       addq $(3 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-3 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size 
_gcry_twofish_amd64_encrypt_block,.-_gcry_twofish_amd64_encrypt_block;)
+
+#undef CTX
+
+#undef RA
+#undef RB
+#undef RC
+#undef RD
+
+#undef RAd
+#undef RBd
+#undef RCd
+#undef RDd
+
+#undef RAbl
+#undef RBbl
+#undef RCbl
+#undef RDbl
+
+#undef RAbh
+#undef RBbh
+#undef RCbh
+#undef RDbh
+
+#undef RX
+#undef RY
+
+#undef RXd
+#undef RYd
+
+#undef RT0
+#undef RT1
+#undef RT2
+#undef RT3
+
+#undef RT0d
+#undef RT1d
+#undef RT2d
+#undef RT3d
+
+/***********************************************************************
+ * AMD64 assembly implementation of the Twofish cipher, 3-way parallel
+ ***********************************************************************/
+#define CTX %rdi
+#define RIO %rdx
+
+#define RAB0 %rax
+#define RAB1 %rbx
+#define RAB2 %rcx
+
+#define RAB0d %eax
+#define RAB1d %ebx
+#define RAB2d %ecx
+
+#define RAB0bh %ah
+#define RAB1bh %bh
+#define RAB2bh %ch
+
+#define RAB0bl %al
+#define RAB1bl %bl
+#define RAB2bl %cl
+
+#define RCD0 %r8
+#define RCD1 %r9
+#define RCD2 %r10
+
+#define RCD0d %r8d
+#define RCD1d %r9d
+#define RCD2d %r10d
+
+#define RX0 %rbp
+#define RX1 %r11
+#define RX2 %r12
+
+#define RX0d %ebp
+#define RX1d %r11d
+#define RX2d %r12d
+
+#define RY0 %r13
+#define RY1 %r14
+#define RY2 %r15
+
+#define RY0d %r13d
+#define RY1d %r14d
+#define RY2d %r15d
+
+#define RT0 %rdx
+#define RT1 %rsi
+
+#define RT0d %edx
+#define RT1d %esi
+
+#define do16bit_ror(rot, op1, op2, T0, T1, tmp1, tmp2, ab, dst) \
+       movzbl ab ## bl,                tmp2 ## d; \
+       movzbl ab ## bh,                tmp1 ## d; \
+       rorq $(rot),                    ab; \
+       op1##l T0(CTX, tmp2, 4),        dst ## d; \
+       op2##l T1(CTX, tmp1, 4),        dst ## d;
+
+/*
+ * Combined G1 & G2 function. Reordered with help of rotates to have moves
+ * at beginning.
+ */
+#define g1g2_3(ab, cd, Tx0, Tx1, Tx2, Tx3, Ty0, Ty1, Ty2, Ty3, x, y) \
+       /* G1,1 && G2,1 */ \
+       do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 0, ab ## 0, x ## 0); \
+       do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 0, ab ## 0, y ## 0); \
+       \
+       do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 1, ab ## 1, x ## 1); \
+       do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 1, ab ## 1, y ## 1); \
+       \
+       do16bit_ror(32, mov, xor, Tx0, Tx1, RT0, x ## 2, ab ## 2, x ## 2); \
+       do16bit_ror(48, mov, xor, Ty1, Ty2, RT0, y ## 2, ab ## 2, y ## 2); \
+       \
+       /* G1,2 && G2,2 */ \
+       do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 0, x ## 0); \
+       do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 0, y ## 0); \
+       movq ab ## 0, RT0; \
+       movq cd ## 0, ab ## 0; \
+       movq RT0, cd ## 0; \
+       \
+       do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 1, x ## 1); \
+       do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 1, y ## 1); \
+       movq ab ## 1, RT0; \
+       movq cd ## 1, ab ## 1; \
+       movq RT0, cd ## 1; \
+       \
+       do16bit_ror(32, xor, xor, Tx2, Tx3, RT0, RT1, ab ## 2, x ## 2); \
+       do16bit_ror(16, xor, xor, Ty3, Ty0, RT0, RT1, ab ## 2, y ## 2); \
+       movq ab ## 2, RT0; \
+       movq cd ## 2, ab ## 2; \
+       movq RT0, cd ## 2;
+
+#define enc_round_end(ab, x, y, n) \
+       addl y ## d,                    x ## d; \
+       addl x ## d,                    y ## d; \
+       addl k+4*(2*(n))(CTX),          x ## d; \
+       xorl ab ## d,                   x ## d; \
+       addl k+4*(2*(n)+1)(CTX),        y ## d; \
+       shrq $32,                       ab; \
+       roll $1,                        ab ## d; \
+       xorl y ## d,                    ab ## d; \
+       shlq $32,                       ab; \
+       rorl $1,                        x ## d; \
+       orq x,                          ab;
+
+#define dec_round_end(ba, x, y, n) \
+       addl y ## d,                    x ## d; \
+       addl x ## d,                    y ## d; \
+       addl k+4*(2*(n))(CTX),          x ## d; \
+       addl k+4*(2*(n)+1)(CTX),        y ## d; \
+       xorl ba ## d,                   y ## d; \
+       shrq $32,                       ba; \
+       roll $1,                        ba ## d; \
+       xorl x ## d,                    ba ## d; \
+       shlq $32,                       ba; \
+       rorl $1,                        y ## d; \
+       orq y,                          ba;
+
+#define encrypt_round3(ab, cd, n) \
+       g1g2_3(ab, cd, s0, s1, s2, s3, s0, s1, s2, s3, RX, RY); \
+       \
+       enc_round_end(ab ## 0, RX0, RY0, n); \
+       enc_round_end(ab ## 1, RX1, RY1, n); \
+       enc_round_end(ab ## 2, RX2, RY2, n);
+
+#define decrypt_round3(ba, dc, n) \
+       g1g2_3(ba, dc, s1, s2, s3, s0, s3, s0, s1, s2, RY, RX); \
+       \
+       dec_round_end(ba ## 0, RX0, RY0, n); \
+       dec_round_end(ba ## 1, RX1, RY1, n); \
+       dec_round_end(ba ## 2, RX2, RY2, n);
+
+#define encrypt_cycle3(ab, cd, n) \
+       encrypt_round3(ab, cd, n*2); \
+       encrypt_round3(ab, cd, (n*2)+1);
+
+#define decrypt_cycle3(ba, dc, n) \
+       decrypt_round3(ba, dc, (n*2)+1); \
+       decrypt_round3(ba, dc, (n*2));
+
+#define inpack3(xy, m) \
+       xorq w+4*m(CTX),                xy ## 0; \
+       xorq w+4*m(CTX),                xy ## 1; \
+       xorq w+4*m(CTX),                xy ## 2;
+
+#define outunpack3(xy, m) \
+       xorq w+4*m(CTX),                xy ## 0; \
+       xorq w+4*m(CTX),                xy ## 1; \
+       xorq w+4*m(CTX),                xy ## 2;
+
+#define inpack_enc3() \
+       inpack3(RAB, 0); \
+       inpack3(RCD, 2);
+
+#define outunpack_enc3() \
+       outunpack3(RAB, 6); \
+       outunpack3(RCD, 4);
+
+#define inpack_dec3() \
+       inpack3(RAB, 4); \
+       rorq $32,                       RAB0; \
+       rorq $32,                       RAB1; \
+       rorq $32,                       RAB2; \
+       inpack3(RCD, 6); \
+       rorq $32,                       RCD0; \
+       rorq $32,                       RCD1; \
+       rorq $32,                       RCD2;
+
+#define outunpack_dec3() \
+       rorq $32,                       RCD0; \
+       rorq $32,                       RCD1; \
+       rorq $32,                       RCD2; \
+       outunpack3(RCD, 0); \
+       rorq $32,                       RAB0; \
+       rorq $32,                       RAB1; \
+       rorq $32,                       RAB2; \
+       outunpack3(RAB, 2);
+
+.align 8
+ELF(.type __twofish_enc_blk3,@function;)
+
+__twofish_enc_blk3:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three plaintext blocks
+        * output:
+        *      RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three ciphertext blocks
+        */
+       CFI_STARTPROC();
+
+       inpack_enc3();
+
+       encrypt_cycle3(RAB, RCD, 0);
+       encrypt_cycle3(RAB, RCD, 1);
+       encrypt_cycle3(RAB, RCD, 2);
+       encrypt_cycle3(RAB, RCD, 3);
+       encrypt_cycle3(RAB, RCD, 4);
+       encrypt_cycle3(RAB, RCD, 5);
+       encrypt_cycle3(RAB, RCD, 6);
+       encrypt_cycle3(RAB, RCD, 7);
+
+       outunpack_enc3();
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __twofish_enc_blk3,.-__twofish_enc_blk3;)
+
+.align 8
+ELF(.type  __twofish_dec_blk3,@function;)
+
+__twofish_dec_blk3:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RAB0,RCD0,RAB1,RCD1,RAB2,RCD2: three ciphertext blocks
+        * output:
+        *      RCD0,RAB0,RCD1,RAB1,RCD2,RAB2: three plaintext blocks
+        */
+       CFI_STARTPROC();
+
+       inpack_dec3();
+
+       decrypt_cycle3(RAB, RCD, 7);
+       decrypt_cycle3(RAB, RCD, 6);
+       decrypt_cycle3(RAB, RCD, 5);
+       decrypt_cycle3(RAB, RCD, 4);
+       decrypt_cycle3(RAB, RCD, 3);
+       decrypt_cycle3(RAB, RCD, 2);
+       decrypt_cycle3(RAB, RCD, 1);
+       decrypt_cycle3(RAB, RCD, 0);
+
+       outunpack_dec3();
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __twofish_dec_blk3,.-__twofish_dec_blk3;)
+
+.align 8
+.globl _gcry_twofish_amd64_ctr_enc
+ELF(.type   _gcry_twofish_amd64_ctr_enc,@function;)
+_gcry_twofish_amd64_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       subq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(8 * 8);
+       movq %rbp, (0 * 8)(%rsp);
+       movq %rbx, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       movq %r14, (4 * 8)(%rsp);
+       movq %r15, (5 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 0 * 8);
+       CFI_REL_OFFSET(%rbx, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+       CFI_REL_OFFSET(%r14, 4 * 8);
+       CFI_REL_OFFSET(%r15, 5 * 8);
+
+       movq %rsi, (6 * 8)(%rsp);
+       movq %rdx, (7 * 8)(%rsp);
+       movq %rcx, RX0;
+
+       /* load IV and byteswap */
+       movq 8(RX0), RT0;
+       movq 0(RX0), RT1;
+       movq RT0, RCD0;
+       movq RT1, RAB0;
+       bswapq RT0;
+       bswapq RT1;
+
+       /* construct IVs */
+       movq RT0, RCD1;
+       movq RT1, RAB1;
+       movq RT0, RCD2;
+       movq RT1, RAB2;
+       addq $1, RCD1;
+       adcq $0, RAB1;
+       bswapq RCD1;
+       bswapq RAB1;
+       addq $2, RCD2;
+       adcq $0, RAB2;
+       bswapq RCD2;
+       bswapq RAB2;
+       addq $3, RT0;
+       adcq $0, RT1;
+       bswapq RT0;
+       bswapq RT1;
+
+       /* store new IV */
+       movq RT0, 8(RX0);
+       movq RT1, 0(RX0);
+
+       call __twofish_enc_blk3;
+
+       movq (7 * 8)(%rsp), RX0; /*src*/
+       movq (6 * 8)(%rsp), RX1; /*dst*/
+
+       /* XOR key-stream with plaintext */
+       xorq (0 * 8)(RX0), RCD0;
+       xorq (1 * 8)(RX0), RAB0;
+       xorq (2 * 8)(RX0), RCD1;
+       xorq (3 * 8)(RX0), RAB1;
+       xorq (4 * 8)(RX0), RCD2;
+       xorq (5 * 8)(RX0), RAB2;
+       movq RCD0, (0 * 8)(RX1);
+       movq RAB0, (1 * 8)(RX1);
+       movq RCD1, (2 * 8)(RX1);
+       movq RAB1, (3 * 8)(RX1);
+       movq RCD2, (4 * 8)(RX1);
+       movq RAB2, (5 * 8)(RX1);
+
+       movq (0 * 8)(%rsp), %rbp;
+       movq (1 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       movq (4 * 8)(%rsp), %r14;
+       movq (5 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbp);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+       addq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ctr_enc,.-_gcry_twofish_amd64_ctr_enc;)
+
+.align 8
+.globl _gcry_twofish_amd64_cbc_dec
+ELF(.type   _gcry_twofish_amd64_cbc_dec,@function;)
+_gcry_twofish_amd64_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: iv (128bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       subq $(9 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(9 * 8);
+       movq %rbp, (0 * 8)(%rsp);
+       movq %rbx, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       movq %r14, (4 * 8)(%rsp);
+       movq %r15, (5 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 0 * 8);
+       CFI_REL_OFFSET(%rbx, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+       CFI_REL_OFFSET(%r14, 4 * 8);
+       CFI_REL_OFFSET(%r15, 5 * 8);
+
+       movq %rsi, (6 * 8)(%rsp);
+       movq %rdx, (7 * 8)(%rsp);
+       movq %rcx, (8 * 8)(%rsp);
+       movq %rdx, RX0;
+
+       /* load input */
+       movq (0 * 8)(RX0), RAB0;
+       movq (1 * 8)(RX0), RCD0;
+       movq (2 * 8)(RX0), RAB1;
+       movq (3 * 8)(RX0), RCD1;
+       movq (4 * 8)(RX0), RAB2;
+       movq (5 * 8)(RX0), RCD2;
+
+       call __twofish_dec_blk3;
+
+       movq (8 * 8)(%rsp), RT0; /*iv*/
+       movq (7 * 8)(%rsp), RX0; /*src*/
+       movq (6 * 8)(%rsp), RX1; /*dst*/
+
+       movq (4 * 8)(RX0), RY0;
+       movq (5 * 8)(RX0), RY1;
+       xorq (0 * 8)(RT0), RCD0;
+       xorq (1 * 8)(RT0), RAB0;
+       xorq (0 * 8)(RX0), RCD1;
+       xorq (1 * 8)(RX0), RAB1;
+       xorq (2 * 8)(RX0), RCD2;
+       xorq (3 * 8)(RX0), RAB2;
+       movq RY0, (0 * 8)(RT0);
+       movq RY1, (1 * 8)(RT0);
+
+       movq RCD0, (0 * 8)(RX1);
+       movq RAB0, (1 * 8)(RX1);
+       movq RCD1, (2 * 8)(RX1);
+       movq RAB1, (3 * 8)(RX1);
+       movq RCD2, (4 * 8)(RX1);
+       movq RAB2, (5 * 8)(RX1);
+
+       movq (0 * 8)(%rsp), %rbp;
+       movq (1 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       movq (4 * 8)(%rsp), %r14;
+       movq (5 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbp);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+       addq $(9 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-9 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_cbc_dec,.-_gcry_twofish_amd64_cbc_dec;)
+
+.align 8
+.globl _gcry_twofish_amd64_cfb_dec
+ELF(.type   _gcry_twofish_amd64_cfb_dec,@function;)
+_gcry_twofish_amd64_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: iv (128bit)
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_0_4
+
+       subq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(8 * 8);
+       movq %rbp, (0 * 8)(%rsp);
+       movq %rbx, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       movq %r14, (4 * 8)(%rsp);
+       movq %r15, (5 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 0 * 8);
+       CFI_REL_OFFSET(%rbx, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+       CFI_REL_OFFSET(%r14, 4 * 8);
+       CFI_REL_OFFSET(%r15, 5 * 8);
+
+       movq %rsi, (6 * 8)(%rsp);
+       movq %rdx, (7 * 8)(%rsp);
+       movq %rdx, RX0;
+       movq %rcx, RX1;
+
+       /* load input */
+       movq (0 * 8)(RX1), RAB0;
+       movq (1 * 8)(RX1), RCD0;
+       movq (0 * 8)(RX0), RAB1;
+       movq (1 * 8)(RX0), RCD1;
+       movq (2 * 8)(RX0), RAB2;
+       movq (3 * 8)(RX0), RCD2;
+
+       /* Update IV */
+       movq (4 * 8)(RX0), RY0;
+       movq (5 * 8)(RX0), RY1;
+       movq RY0, (0 * 8)(RX1);
+       movq RY1, (1 * 8)(RX1);
+
+       call __twofish_enc_blk3;
+
+       movq (7 * 8)(%rsp), RX0; /*src*/
+       movq (6 * 8)(%rsp), RX1; /*dst*/
+
+       xorq (0 * 8)(RX0), RCD0;
+       xorq (1 * 8)(RX0), RAB0;
+       xorq (2 * 8)(RX0), RCD1;
+       xorq (3 * 8)(RX0), RAB1;
+       xorq (4 * 8)(RX0), RCD2;
+       xorq (5 * 8)(RX0), RAB2;
+       movq RCD0, (0 * 8)(RX1);
+       movq RAB0, (1 * 8)(RX1);
+       movq RCD1, (2 * 8)(RX1);
+       movq RAB1, (3 * 8)(RX1);
+       movq RCD2, (4 * 8)(RX1);
+       movq RAB2, (5 * 8)(RX1);
+
+       movq (0 * 8)(%rsp), %rbp;
+       movq (1 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       movq (4 * 8)(%rsp), %r14;
+       movq (5 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbp);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+       addq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_cfb_dec,.-_gcry_twofish_amd64_cfb_dec;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_enc
+ELF(.type   _gcry_twofish_amd64_ocb_enc,@function;)
+_gcry_twofish_amd64_ocb_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[3])
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_6
+
+       subq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(8 * 8);
+       movq %rbp, (0 * 8)(%rsp);
+       movq %rbx, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       movq %r14, (4 * 8)(%rsp);
+       movq %r15, (5 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 0 * 8);
+       CFI_REL_OFFSET(%rbx, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+       CFI_REL_OFFSET(%r14, 4 * 8);
+       CFI_REL_OFFSET(%r15, 5 * 8);
+
+       movq %rsi, (6 * 8)(%rsp);
+       movq %rdx, RX0;
+       movq %rcx, RX1;
+       movq %r8, RX2;
+       movq %r9, RY0;
+       movq %rsi, RY1;
+
+       /* Load offset */
+       movq (0 * 8)(RX1), RT0;
+       movq (1 * 8)(RX1), RT1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq (RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (0 * 8)(RX0), RAB0;
+       movq (1 * 8)(RX0), RCD0;
+       /* Store Offset_i */
+       movq RT0, (0 * 8)(RY1);
+       movq RT1, (1 * 8)(RY1);
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       xor RAB0, (0 * 8)(RX2);
+       xor RCD0, (1 * 8)(RX2);
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB0;
+       xorq RT1, RCD0;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq 8(RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (2 * 8)(RX0), RAB1;
+       movq (3 * 8)(RX0), RCD1;
+       /* Store Offset_i */
+       movq RT0, (2 * 8)(RY1);
+       movq RT1, (3 * 8)(RY1);
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       xor RAB1, (0 * 8)(RX2);
+       xor RCD1, (1 * 8)(RX2);
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB1;
+       xorq RT1, RCD1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq 16(RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (4 * 8)(RX0), RAB2;
+       movq (5 * 8)(RX0), RCD2;
+       /* Store Offset_i */
+       movq RT0, (4 * 8)(RY1);
+       movq RT1, (5 * 8)(RY1);
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       xor RAB2, (0 * 8)(RX2);
+       xor RCD2, (1 * 8)(RX2);
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB2;
+       xorq RT1, RCD2;
+
+       /* Store offset */
+       movq RT0, (0 * 8)(RX1);
+       movq RT1, (1 * 8)(RX1);
+
+       /* CX_i = ENCIPHER(K, PX_i)  */
+       call __twofish_enc_blk3;
+
+       movq (6 * 8)(%rsp), RX1; /*dst*/
+
+       /* C_i = CX_i xor Offset_i  */
+       xorq RCD0, (0 * 8)(RX1);
+       xorq RAB0, (1 * 8)(RX1);
+       xorq RCD1, (2 * 8)(RX1);
+       xorq RAB1, (3 * 8)(RX1);
+       xorq RCD2, (4 * 8)(RX1);
+       xorq RAB2, (5 * 8)(RX1);
+
+       movq (0 * 8)(%rsp), %rbp;
+       movq (1 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       movq (4 * 8)(%rsp), %r14;
+       movq (5 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbp);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+       addq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ocb_enc,.-_gcry_twofish_amd64_ocb_enc;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_dec
+ELF(.type   _gcry_twofish_amd64_ocb_dec,@function;)
+_gcry_twofish_amd64_ocb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (3 blocks)
+        *      %rdx: src (3 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[3])
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_6
+
+       subq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(8 * 8);
+       movq %rbp, (0 * 8)(%rsp);
+       movq %rbx, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       movq %r14, (4 * 8)(%rsp);
+       movq %r15, (5 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 0 * 8);
+       CFI_REL_OFFSET(%rbx, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+       CFI_REL_OFFSET(%r14, 4 * 8);
+       CFI_REL_OFFSET(%r15, 5 * 8);
+
+       movq %rsi, (6 * 8)(%rsp);
+       movq %r8,  (7 * 8)(%rsp);
+       movq %rdx, RX0;
+       movq %rcx, RX1;
+       movq %r9, RY0;
+       movq %rsi, RY1;
+
+       /* Load offset */
+       movq (0 * 8)(RX1), RT0;
+       movq (1 * 8)(RX1), RT1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq (RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (0 * 8)(RX0), RAB0;
+       movq (1 * 8)(RX0), RCD0;
+       /* Store Offset_i */
+       movq RT0, (0 * 8)(RY1);
+       movq RT1, (1 * 8)(RY1);
+       /* CX_i = C_i xor Offset_i */
+       xorq RT0, RAB0;
+       xorq RT1, RCD0;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq 8(RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (2 * 8)(RX0), RAB1;
+       movq (3 * 8)(RX0), RCD1;
+       /* Store Offset_i */
+       movq RT0, (2 * 8)(RY1);
+       movq RT1, (3 * 8)(RY1);
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB1;
+       xorq RT1, RCD1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq 16(RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (4 * 8)(RX0), RAB2;
+       movq (5 * 8)(RX0), RCD2;
+       /* Store Offset_i */
+       movq RT0, (4 * 8)(RY1);
+       movq RT1, (5 * 8)(RY1);
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB2;
+       xorq RT1, RCD2;
+
+       /* Store offset */
+       movq RT0, (0 * 8)(RX1);
+       movq RT1, (1 * 8)(RX1);
+
+       /* PX_i = DECIPHER(K, CX_i)  */
+       call __twofish_dec_blk3;
+
+       movq (7 * 8)(%rsp), RX2; /*checksum*/
+       movq (6 * 8)(%rsp), RX1; /*dst*/
+
+       /* Load checksum */
+       movq (0 * 8)(RX2), RT0;
+       movq (1 * 8)(RX2), RT1;
+
+       /* P_i = PX_i xor Offset_i  */
+       xorq RCD0, (0 * 8)(RX1);
+       xorq RAB0, (1 * 8)(RX1);
+       xorq RCD1, (2 * 8)(RX1);
+       xorq RAB1, (3 * 8)(RX1);
+       xorq RCD2, (4 * 8)(RX1);
+       xorq RAB2, (5 * 8)(RX1);
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       xorq (0 * 8)(RX1), RT0;
+       xorq (1 * 8)(RX1), RT1;
+       xorq (2 * 8)(RX1), RT0;
+       xorq (3 * 8)(RX1), RT1;
+       xorq (4 * 8)(RX1), RT0;
+       xorq (5 * 8)(RX1), RT1;
+
+       /* Store checksum */
+       movq RT0, (0 * 8)(RX2);
+       movq RT1, (1 * 8)(RX2);
+
+       movq (0 * 8)(%rsp), %rbp;
+       movq (1 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       movq (4 * 8)(%rsp), %r14;
+       movq (5 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbp);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+       addq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ocb_dec,.-_gcry_twofish_amd64_ocb_dec;)
+
+.align 8
+.globl _gcry_twofish_amd64_ocb_auth
+ELF(.type   _gcry_twofish_amd64_ocb_auth,@function;)
+_gcry_twofish_amd64_ocb_auth:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: abuf (3 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[3])
+        */
+       CFI_STARTPROC();
+       ENTER_SYSV_FUNC_PARAMS_5
+
+       subq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(8 * 8);
+       movq %rbp, (0 * 8)(%rsp);
+       movq %rbx, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       movq %r14, (4 * 8)(%rsp);
+       movq %r15, (5 * 8)(%rsp);
+       CFI_REL_OFFSET(%rbp, 0 * 8);
+       CFI_REL_OFFSET(%rbx, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+       CFI_REL_OFFSET(%r14, 4 * 8);
+       CFI_REL_OFFSET(%r15, 5 * 8);
+
+       movq %rcx, (6 * 8)(%rsp);
+       movq %rsi, RX0;
+       movq %rdx, RX1;
+       movq %r8, RY0;
+
+       /* Load offset */
+       movq (0 * 8)(RX1), RT0;
+       movq (1 * 8)(RX1), RT1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq (RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (0 * 8)(RX0), RAB0;
+       movq (1 * 8)(RX0), RCD0;
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB0;
+       xorq RT1, RCD0;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq 8(RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (2 * 8)(RX0), RAB1;
+       movq (3 * 8)(RX0), RCD1;
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB1;
+       xorq RT1, RCD1;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       movq 16(RY0), RY2;
+       xorq (0 * 8)(RY2), RT0;
+       xorq (1 * 8)(RY2), RT1;
+       movq (4 * 8)(RX0), RAB2;
+       movq (5 * 8)(RX0), RCD2;
+       /* PX_i = P_i xor Offset_i */
+       xorq RT0, RAB2;
+       xorq RT1, RCD2;
+
+       /* Store offset */
+       movq RT0, (0 * 8)(RX1);
+       movq RT1, (1 * 8)(RX1);
+
+       /* C_i = ENCIPHER(K, PX_i)  */
+       call __twofish_enc_blk3;
+
+       movq (6 * 8)(%rsp), RX1; /*checksum*/
+
+       /* Checksum_i = C_i xor Checksum_i  */
+       xorq RCD0, RCD1;
+       xorq RAB0, RAB1;
+       xorq RCD1, RCD2;
+       xorq RAB1, RAB2;
+       xorq RCD2, (0 * 8)(RX1);
+       xorq RAB2, (1 * 8)(RX1);
+
+       movq (0 * 8)(%rsp), %rbp;
+       movq (1 * 8)(%rsp), %rbx;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       movq (4 * 8)(%rsp), %r14;
+       movq (5 * 8)(%rsp), %r15;
+       CFI_RESTORE(%rbp);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+       addq $(8 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-8 * 8);
+
+       EXIT_SYSV_FUNC
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_amd64_ocb_auth,.-_gcry_twofish_amd64_ocb_auth;)
+
+#endif /*USE_TWOFISH*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/twofish-arm.S 
b/grub-core/lib/libgcrypt/cipher/twofish-arm.S
new file mode 100644
index 000000000..2e1da6cd1
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/twofish-arm.S
@@ -0,0 +1,363 @@
+/* twofish-arm.S  -  ARM assembly implementation of Twofish cipher
+ *
+ * Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#if defined(__ARMEL__)
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+.text
+
+.syntax unified
+.arm
+
+/* structure of TWOFISH_context: */
+#define s0 0
+#define s1 ((s0) + 4 * 256)
+#define s2 ((s1) + 4 * 256)
+#define s3 ((s2) + 4 * 256)
+#define w  ((s3) + 4 * 256)
+#define k  ((w) + 4 * 8)
+
+/* register macros */
+#define CTX %r0
+#define CTXs0 %r0
+#define CTXs1 %r1
+#define CTXs3 %r7
+
+#define RA %r3
+#define RB %r4
+#define RC %r5
+#define RD %r6
+
+#define RX %r2
+#define RY %ip
+
+#define RMASK %lr
+
+#define RT0 %r8
+#define RT1 %r9
+#define RT2 %r10
+#define RT3 %r11
+
+/* helper macros */
+#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
+       ldrb rout, [rsrc, #((offs) + 0)]; \
+       ldrb rtmp, [rsrc, #((offs) + 1)]; \
+       orr rout, rout, rtmp, lsl #8; \
+       ldrb rtmp, [rsrc, #((offs) + 2)]; \
+       orr rout, rout, rtmp, lsl #16; \
+       ldrb rtmp, [rsrc, #((offs) + 3)]; \
+       orr rout, rout, rtmp, lsl #24;
+
+#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
+       mov rtmp0, rin, lsr #8; \
+       strb rin, [rdst, #((offs) + 0)]; \
+       mov rtmp1, rin, lsr #16; \
+       strb rtmp0, [rdst, #((offs) + 1)]; \
+       mov rtmp0, rin, lsr #24; \
+       strb rtmp1, [rdst, #((offs) + 2)]; \
+       strb rtmp0, [rdst, #((offs) + 3)];
+
+#ifndef __ARMEL__
+       /* bswap on big-endian */
+       #define host_to_le(reg) \
+               rev reg, reg;
+       #define le_to_host(reg) \
+               rev reg, reg;
+#else
+       /* nop on little-endian */
+       #define host_to_le(reg) /*_*/
+       #define le_to_host(reg) /*_*/
+#endif
+
+#define ldr_input_aligned_le(rin, a, b, c, d) \
+       ldr a, [rin, #0]; \
+       ldr b, [rin, #4]; \
+       le_to_host(a); \
+       ldr c, [rin, #8]; \
+       le_to_host(b); \
+       ldr d, [rin, #12]; \
+       le_to_host(c); \
+       le_to_host(d);
+
+#define str_output_aligned_le(rout, a, b, c, d) \
+       le_to_host(a); \
+       le_to_host(b); \
+       str a, [rout, #0]; \
+       le_to_host(c); \
+       str b, [rout, #4]; \
+       le_to_host(d); \
+       str c, [rout, #8]; \
+       str d, [rout, #12];
+
+#ifdef __ARM_FEATURE_UNALIGNED
+       /* unaligned word reads/writes allowed */
+       #define ldr_input_le(rin, ra, rb, rc, rd, rtmp) \
+               ldr_input_aligned_le(rin, ra, rb, rc, rd)
+
+       #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+               str_output_aligned_le(rout, ra, rb, rc, rd)
+#else
+       /* need to handle unaligned reads/writes by byte reads */
+       #define ldr_input_le(rin, ra, rb, rc, rd, rtmp0) \
+               tst rin, #3; \
+               beq 1f; \
+                       ldr_unaligned_le(ra, rin, 0, rtmp0); \
+                       ldr_unaligned_le(rb, rin, 4, rtmp0); \
+                       ldr_unaligned_le(rc, rin, 8, rtmp0); \
+                       ldr_unaligned_le(rd, rin, 12, rtmp0); \
+                       b 2f; \
+               1:;\
+                       ldr_input_aligned_le(rin, ra, rb, rc, rd); \
+               2:;
+
+       #define str_output_le(rout, ra, rb, rc, rd, rtmp0, rtmp1) \
+               tst rout, #3; \
+               beq 1f; \
+                       str_unaligned_le(ra, rout, 0, rtmp0, rtmp1); \
+                       str_unaligned_le(rb, rout, 4, rtmp0, rtmp1); \
+                       str_unaligned_le(rc, rout, 8, rtmp0, rtmp1); \
+                       str_unaligned_le(rd, rout, 12, rtmp0, rtmp1); \
+                       b 2f; \
+               1:;\
+                       str_output_aligned_le(rout, ra, rb, rc, rd); \
+               2:;
+#endif
+
+/**********************************************************************
+  1-way twofish
+ **********************************************************************/
+#define encrypt_round(a, b, rc, rd, n, ror_a, adj_a) \
+       and RT0, RMASK, b, lsr#(8 - 2); \
+       and RY, RMASK, b, lsr#(16 - 2); \
+       add RT0, RT0, #(s2 - s1); \
+       and RT1, RMASK, b, lsr#(24 - 2); \
+       ldr RY, [CTXs3, RY]; \
+       and RT2, RMASK, b, lsl#(2); \
+       ldr RT0, [CTXs1, RT0]; \
+       and RT3, RMASK, a, lsr#(16 - 2 + (adj_a)); \
+       ldr RT1, [CTXs0, RT1]; \
+       and RX, RMASK, a, lsr#(8 - 2 + (adj_a)); \
+       ldr RT2, [CTXs1, RT2]; \
+       add RT3, RT3, #(s2 - s1); \
+       ldr RX, [CTXs1, RX]; \
+       ror_a(a); \
+       \
+       eor RY, RY, RT0; \
+       ldr RT3, [CTXs1, RT3]; \
+       and RT0, RMASK, a, lsl#(2); \
+       eor RY, RY, RT1; \
+       and RT1, RMASK, a, lsr#(24 - 2); \
+       eor RY, RY, RT2; \
+       ldr RT0, [CTXs0, RT0]; \
+       eor RX, RX, RT3; \
+       ldr RT1, [CTXs3, RT1]; \
+       eor RX, RX, RT0; \
+       \
+       ldr RT3, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+       eor RX, RX, RT1; \
+       ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+       \
+       add RT0, RX, RY, lsl #1; \
+       add RX, RX, RY; \
+       add RT0, RT0, RT3; \
+       add RX, RX, RT2; \
+       eor rd, RT0, rd, ror #31; \
+       eor rc, rc, RX;
+
+#define dummy(x) /*_*/
+
+#define ror1(r) \
+       ror r, r, #1;
+
+#define decrypt_round(a, b, rc, rd, n, ror_b, adj_b) \
+       and RT3, RMASK, b, lsl#(2 - (adj_b)); \
+       and RT1, RMASK, b, lsr#(8 - 2 + (adj_b)); \
+       ror_b(b); \
+       and RT2, RMASK, a, lsl#(2); \
+       and RT0, RMASK, a, lsr#(8 - 2); \
+       \
+       ldr RY, [CTXs1, RT3]; \
+       add RT1, RT1, #(s2 - s1); \
+       ldr RX, [CTXs0, RT2]; \
+       and RT3, RMASK, b, lsr#(16 - 2); \
+       ldr RT1, [CTXs1, RT1]; \
+       and RT2, RMASK, a, lsr#(16 - 2); \
+       ldr RT0, [CTXs1, RT0]; \
+       \
+       add RT2, RT2, #(s2 - s1); \
+       ldr RT3, [CTXs3, RT3]; \
+       eor RY, RY, RT1; \
+       \
+       and RT1, RMASK, b, lsr#(24 - 2); \
+       eor RX, RX, RT0; \
+       ldr RT2, [CTXs1, RT2]; \
+       and RT0, RMASK, a, lsr#(24 - 2); \
+       \
+       ldr RT1, [CTXs0, RT1]; \
+       \
+       eor RY, RY, RT3; \
+       ldr RT0, [CTXs3, RT0]; \
+       eor RX, RX, RT2; \
+       eor RY, RY, RT1; \
+       \
+       ldr RT1, [CTXs3, #(k - s3 + 8 * (n) + 4)]; \
+       eor RX, RX, RT0; \
+       ldr RT2, [CTXs3, #(k - s3 + 8 * (n))]; \
+       \
+       add RT0, RX, RY, lsl #1; \
+       add RX, RX, RY; \
+       add RT0, RT0, RT1; \
+       add RX, RX, RT2; \
+       eor rd, rd, RT0; \
+       eor rc, RX, rc, ror #31;
+
+#define first_encrypt_cycle(nc) \
+       encrypt_round(RA, RB, RC, RD, (nc) * 2, dummy, 0); \
+       encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define encrypt_cycle(nc) \
+       encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+       encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1);
+
+#define last_encrypt_cycle(nc) \
+       encrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+       encrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+       ror1(RA);
+
+#define first_decrypt_cycle(nc) \
+       decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, dummy, 0); \
+       decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define decrypt_cycle(nc) \
+       decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+       decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1);
+
+#define last_decrypt_cycle(nc) \
+       decrypt_round(RC, RD, RA, RB, (nc) * 2 + 1, ror1, 1); \
+       decrypt_round(RA, RB, RC, RD, (nc) * 2, ror1, 1); \
+       ror1(RD);
+
+.align 3
+.globl _gcry_twofish_arm_encrypt_block
+.type   _gcry_twofish_arm_encrypt_block,%function;
+
+_gcry_twofish_arm_encrypt_block:
+       /* input:
+        *      %r0: ctx
+        *      %r1: dst
+        *      %r2: src
+        */
+       push {%r1, %r4-%r11, %ip, %lr};
+
+       add RY, CTXs0, #w;
+
+       ldr_input_le(%r2, RA, RB, RC, RD, RT0);
+
+       /* Input whitening */
+       ldm RY, {RT0, RT1, RT2, RT3};
+       add CTXs3, CTXs0, #(s3 - s0);
+       add CTXs1, CTXs0, #(s1 - s0);
+       mov RMASK, #(0xff << 2);
+       eor RA, RA, RT0;
+       eor RB, RB, RT1;
+       eor RC, RC, RT2;
+       eor RD, RD, RT3;
+
+       first_encrypt_cycle(0);
+       encrypt_cycle(1);
+       encrypt_cycle(2);
+       encrypt_cycle(3);
+       encrypt_cycle(4);
+       encrypt_cycle(5);
+       encrypt_cycle(6);
+       last_encrypt_cycle(7);
+
+       add RY, CTXs3, #(w + 4*4 - s3);
+       pop {%r1}; /* dst */
+
+       /* Output whitening */
+       ldm RY, {RT0, RT1, RT2, RT3};
+       eor RC, RC, RT0;
+       eor RD, RD, RT1;
+       eor RA, RA, RT2;
+       eor RB, RB, RT3;
+
+       str_output_le(%r1, RC, RD, RA, RB, RT0, RT1);
+
+       pop {%r4-%r11, %ip, %pc};
+.ltorg
+.size _gcry_twofish_arm_encrypt_block,.-_gcry_twofish_arm_encrypt_block;
+
+.align 3
+.globl _gcry_twofish_arm_decrypt_block
+.type   _gcry_twofish_arm_decrypt_block,%function;
+
+_gcry_twofish_arm_decrypt_block:
+       /* input:
+        *      %r0: ctx
+        *      %r1: dst
+        *      %r2: src
+        */
+       push {%r1, %r4-%r11, %ip, %lr};
+
+       add CTXs3, CTXs0, #(s3 - s0);
+
+       ldr_input_le(%r2, RC, RD, RA, RB, RT0);
+
+       add RY, CTXs3, #(w + 4*4 - s3);
+       add CTXs3, CTXs0, #(s3 - s0);
+
+       /* Input whitening */
+       ldm RY, {RT0, RT1, RT2, RT3};
+       add CTXs1, CTXs0, #(s1 - s0);
+       mov RMASK, #(0xff << 2);
+       eor RC, RC, RT0;
+       eor RD, RD, RT1;
+       eor RA, RA, RT2;
+       eor RB, RB, RT3;
+
+       first_decrypt_cycle(7);
+       decrypt_cycle(6);
+       decrypt_cycle(5);
+       decrypt_cycle(4);
+       decrypt_cycle(3);
+       decrypt_cycle(2);
+       decrypt_cycle(1);
+       last_decrypt_cycle(0);
+
+       add RY, CTXs0, #w;
+       pop {%r1}; /* dst */
+
+       /* Output whitening */
+       ldm RY, {RT0, RT1, RT2, RT3};
+       eor RA, RA, RT0;
+       eor RB, RB, RT1;
+       eor RC, RC, RT2;
+       eor RD, RD, RT3;
+
+       str_output_le(%r1, RA, RB, RC, RD, RT0, RT1);
+
+       pop {%r4-%r11, %ip, %pc};
+.size _gcry_twofish_arm_decrypt_block,.-_gcry_twofish_arm_decrypt_block;
+
+#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
+#endif /*__ARMEL__*/
diff --git a/grub-core/lib/libgcrypt/cipher/twofish-avx2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/twofish-avx2-amd64.S
new file mode 100644
index 000000000..930ac792c
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/twofish-avx2-amd64.S
@@ -0,0 +1,1048 @@
+/* twofish-avx2-amd64.S  -  AMD64/AVX2 assembly implementation of Twofish 
cipher
+ *
+ * Copyright (C) 2013-2017 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_TWOFISH) && 
\
+    defined(ENABLE_AVX2_SUPPORT)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* structure of TWOFISH_context: */
+#define s0     0
+#define s1     ((s0) + 4 * 256)
+#define s2     ((s1) + 4 * 256)
+#define s3     ((s2) + 4 * 256)
+#define w      ((s3) + 4 * 256)
+#define k      ((w) + 4 * 8)
+
+/* register macros */
+#define CTX    %rdi
+
+#define RROUND  %rbp
+#define RROUNDd %ebp
+#define RS0    CTX
+#define RS1    %r8
+#define RS2    %r9
+#define RS3    %r10
+#define RK     %r11
+#define RW     %rax
+
+#define RA0    %ymm8
+#define RB0    %ymm9
+#define RC0    %ymm10
+#define RD0    %ymm11
+#define RA1    %ymm12
+#define RB1    %ymm13
+#define RC1    %ymm14
+#define RD1    %ymm15
+
+/* temp regs */
+#define RX0    %ymm0
+#define RY0    %ymm1
+#define RX1    %ymm2
+#define RY1    %ymm3
+#define RT0    %ymm4
+#define RIDX   %ymm5
+
+#define RX0x   %xmm0
+#define RY0x   %xmm1
+#define RX1x   %xmm2
+#define RY1x   %xmm3
+#define RT0x   %xmm4
+#define RIDXx  %xmm5
+
+#define RTMP0   RX0
+#define RTMP0x  RX0x
+#define RTMP1   RX1
+#define RTMP1x  RX1x
+#define RTMP2   RY0
+#define RTMP2x  RY0x
+#define RTMP3   RY1
+#define RTMP3x  RY1x
+#define RTMP4   RIDX
+#define RTMP4x  RIDXx
+
+/* vpgatherdd mask and '-1' */
+#define RNOT   %ymm6
+#define RNOTx  %xmm6
+
+/* byte mask, (-1 >> 24) */
+#define RBYTE  %ymm7
+
+/**********************************************************************
+  16-way AVX2 twofish
+ **********************************************************************/
+#define init_round_constants() \
+       vpcmpeqd RNOT, RNOT, RNOT; \
+       leaq k(CTX), RK; \
+       leaq w(CTX), RW; \
+       vpsrld $24, RNOT, RBYTE; \
+       leaq s1(CTX), RS1; \
+       leaq s2(CTX), RS2; \
+       leaq s3(CTX), RS3; \
+
+#define g16(ab, rs0, rs1, rs2, rs3, xy) \
+       vpand RBYTE, ab ## 0, RIDX; \
+       vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
+       vpcmpeqd RNOT, RNOT, RNOT; \
+               \
+               vpand RBYTE, ab ## 1, RIDX; \
+               vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
+               vpcmpeqd RNOT, RNOT, RNOT; \
+       \
+       vpsrld $8, ab ## 0, RIDX; \
+       vpand RBYTE, RIDX, RIDX; \
+       vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
+       vpcmpeqd RNOT, RNOT, RNOT; \
+       vpxor RT0, xy ## 0, xy ## 0; \
+               \
+               vpsrld $8, ab ## 1, RIDX; \
+               vpand RBYTE, RIDX, RIDX; \
+               vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
+               vpcmpeqd RNOT, RNOT, RNOT; \
+               vpxor RT0, xy ## 1, xy ## 1; \
+       \
+       vpsrld $16, ab ## 0, RIDX; \
+       vpand RBYTE, RIDX, RIDX; \
+       vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
+       vpcmpeqd RNOT, RNOT, RNOT; \
+       vpxor RT0, xy ## 0, xy ## 0; \
+               \
+               vpsrld $16, ab ## 1, RIDX; \
+               vpand RBYTE, RIDX, RIDX; \
+               vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
+               vpcmpeqd RNOT, RNOT, RNOT; \
+               vpxor RT0, xy ## 1, xy ## 1; \
+       \
+       vpsrld $24, ab ## 0, RIDX; \
+       vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
+       vpcmpeqd RNOT, RNOT, RNOT; \
+       vpxor RT0, xy ## 0, xy ## 0; \
+               \
+               vpsrld $24, ab ## 1, RIDX; \
+               vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
+               vpcmpeqd RNOT, RNOT, RNOT; \
+               vpxor RT0, xy ## 1, xy ## 1;
+
+#define g1_16(a, x) \
+       g16(a, RS0, RS1, RS2, RS3, x);
+
+#define g2_16(b, y) \
+       g16(b, RS1, RS2, RS3, RS0, y);
+
+#define encrypt_round_end16(a, b, c, d, nk, r) \
+       vpaddd RY0, RX0, RX0; \
+       vpaddd RX0, RY0, RY0; \
+       vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+       vpaddd RT0, RX0, RX0; \
+       vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+       vpaddd RT0, RY0, RY0; \
+       \
+       vpxor RY0, d ## 0, d ## 0; \
+       \
+       vpxor RX0, c ## 0, c ## 0; \
+       vpsrld $1, c ## 0, RT0; \
+       vpslld $31, c ## 0, c ## 0; \
+       vpor RT0, c ## 0, c ## 0; \
+       \
+               vpaddd RY1, RX1, RX1; \
+               vpaddd RX1, RY1, RY1; \
+               vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+               vpaddd RT0, RX1, RX1; \
+               vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+               vpaddd RT0, RY1, RY1; \
+               \
+               vpxor RY1, d ## 1, d ## 1; \
+               \
+               vpxor RX1, c ## 1, c ## 1; \
+               vpsrld $1, c ## 1, RT0; \
+               vpslld $31, c ## 1, c ## 1; \
+               vpor RT0, c ## 1, c ## 1; \
+
+#define encrypt_round16(a, b, c, d, nk, r) \
+       g2_16(b, RY); \
+       \
+       vpslld $1, b ## 0, RT0; \
+       vpsrld $31, b ## 0, b ## 0; \
+       vpor RT0, b ## 0, b ## 0; \
+       \
+               vpslld $1, b ## 1, RT0; \
+               vpsrld $31, b ## 1, b ## 1; \
+               vpor RT0, b ## 1, b ## 1; \
+       \
+       g1_16(a, RX); \
+       \
+       encrypt_round_end16(a, b, c, d, nk, r);
+
+#define encrypt_round_first16(a, b, c, d, nk, r) \
+       vpslld $1, d ## 0, RT0; \
+       vpsrld $31, d ## 0, d ## 0; \
+       vpor RT0, d ## 0, d ## 0; \
+       \
+               vpslld $1, d ## 1, RT0; \
+               vpsrld $31, d ## 1, d ## 1; \
+               vpor RT0, d ## 1, d ## 1; \
+       \
+       encrypt_round16(a, b, c, d, nk, r);
+
+#define encrypt_round_last16(a, b, c, d, nk, r) \
+       g2_16(b, RY); \
+       \
+       g1_16(a, RX); \
+       \
+       encrypt_round_end16(a, b, c, d, nk, r);
+
+#define decrypt_round_end16(a, b, c, d, nk, r) \
+       vpaddd RY0, RX0, RX0; \
+       vpaddd RX0, RY0, RY0; \
+       vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+       vpaddd RT0, RX0, RX0; \
+       vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+       vpaddd RT0, RY0, RY0; \
+       \
+       vpxor RX0, c ## 0, c ## 0; \
+       \
+       vpxor RY0, d ## 0, d ## 0; \
+       vpsrld $1, d ## 0, RT0; \
+       vpslld $31, d ## 0, d ## 0; \
+       vpor RT0, d ## 0, d ## 0; \
+       \
+               vpaddd RY1, RX1, RX1; \
+               vpaddd RX1, RY1, RY1; \
+               vpbroadcastd ((nk)+((r)*8))(RK), RT0; \
+               vpaddd RT0, RX1, RX1; \
+               vpbroadcastd 4+((nk)+((r)*8))(RK), RT0; \
+               vpaddd RT0, RY1, RY1; \
+               \
+               vpxor RX1, c ## 1, c ## 1; \
+               \
+               vpxor RY1, d ## 1, d ## 1; \
+               vpsrld $1, d ## 1, RT0; \
+               vpslld $31, d ## 1, d ## 1; \
+               vpor RT0, d ## 1, d ## 1;
+
+#define decrypt_round16(a, b, c, d, nk, r) \
+       g1_16(a, RX); \
+       \
+       vpslld $1, a ## 0, RT0; \
+       vpsrld $31, a ## 0, a ## 0; \
+       vpor RT0, a ## 0, a ## 0; \
+       \
+               vpslld $1, a ## 1, RT0; \
+               vpsrld $31, a ## 1, a ## 1; \
+               vpor RT0, a ## 1, a ## 1; \
+       \
+       g2_16(b, RY); \
+       \
+       decrypt_round_end16(a, b, c, d, nk, r);
+
+#define decrypt_round_first16(a, b, c, d, nk, r) \
+       vpslld $1, c ## 0, RT0; \
+       vpsrld $31, c ## 0, c ## 0; \
+       vpor RT0, c ## 0, c ## 0; \
+       \
+               vpslld $1, c ## 1, RT0; \
+               vpsrld $31, c ## 1, c ## 1; \
+               vpor RT0, c ## 1, c ## 1; \
+       \
+       decrypt_round16(a, b, c, d, nk, r)
+
+#define decrypt_round_last16(a, b, c, d, nk, r) \
+       g1_16(a, RX); \
+       \
+       g2_16(b, RY); \
+       \
+       decrypt_round_end16(a, b, c, d, nk, r);
+
+#define encrypt_cycle16(r) \
+       encrypt_round16(RA, RB, RC, RD, 0, r); \
+       encrypt_round16(RC, RD, RA, RB, 8, r);
+
+#define encrypt_cycle_first16(r) \
+       encrypt_round_first16(RA, RB, RC, RD, 0, r); \
+       encrypt_round16(RC, RD, RA, RB, 8, r);
+
+#define encrypt_cycle_last16(r) \
+       encrypt_round16(RA, RB, RC, RD, 0, r); \
+       encrypt_round_last16(RC, RD, RA, RB, 8, r);
+
+#define decrypt_cycle16(r) \
+       decrypt_round16(RC, RD, RA, RB, 8, r); \
+       decrypt_round16(RA, RB, RC, RD, 0, r);
+
+#define decrypt_cycle_first16(r) \
+       decrypt_round_first16(RC, RD, RA, RB, 8, r); \
+       decrypt_round16(RA, RB, RC, RD, 0, r);
+
+#define decrypt_cycle_last16(r) \
+       decrypt_round16(RC, RD, RA, RB, 8, r); \
+       decrypt_round_last16(RA, RB, RC, RD, 0, r);
+
+#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
+       vpunpckhdq x1, x0, t2; \
+       vpunpckldq x1, x0, x0; \
+       \
+       vpunpckldq x3, x2, t1; \
+       vpunpckhdq x3, x2, x2; \
+       \
+       vpunpckhqdq t1, x0, x1; \
+       vpunpcklqdq t1, x0, x0; \
+       \
+       vpunpckhqdq x2, t2, x3; \
+       vpunpcklqdq x2, t2, x2;
+
+#define read_blocks8(offs,a,b,c,d) \
+       vmovdqu 16*offs(RIO), a; \
+       vmovdqu 16*offs+32(RIO), b; \
+       vmovdqu 16*offs+64(RIO), c; \
+       vmovdqu 16*offs+96(RIO), d; \
+       \
+       transpose_4x4(a, b, c, d, RX0, RY0);
+
+#define write_blocks8(offs,a,b,c,d) \
+       transpose_4x4(a, b, c, d, RX0, RY0); \
+       \
+       vmovdqu a, 16*offs(RIO); \
+       vmovdqu b, 16*offs+32(RIO); \
+       vmovdqu c, 16*offs+64(RIO); \
+       vmovdqu d, 16*offs+96(RIO);
+
+#define inpack_enc8(a,b,c,d) \
+       vpbroadcastd 4*0(RW), RT0; \
+       vpxor RT0, a, a; \
+       \
+       vpbroadcastd 4*1(RW), RT0; \
+       vpxor RT0, b, b; \
+       \
+       vpbroadcastd 4*2(RW), RT0; \
+       vpxor RT0, c, c; \
+       \
+       vpbroadcastd 4*3(RW), RT0; \
+       vpxor RT0, d, d;
+
+#define outunpack_enc8(a,b,c,d) \
+       vpbroadcastd 4*4(RW), RX0; \
+       vpbroadcastd 4*5(RW), RY0; \
+       vpxor RX0, c, RX0; \
+       vpxor RY0, d, RY0; \
+       \
+       vpbroadcastd 4*6(RW), RT0; \
+       vpxor RT0, a, c; \
+       vpbroadcastd 4*7(RW), RT0; \
+       vpxor RT0, b, d; \
+       \
+       vmovdqa RX0, a; \
+       vmovdqa RY0, b;
+
+#define inpack_dec8(a,b,c,d) \
+       vpbroadcastd 4*4(RW), RX0; \
+       vpbroadcastd 4*5(RW), RY0; \
+       vpxor RX0, a, RX0; \
+       vpxor RY0, b, RY0; \
+       \
+       vpbroadcastd 4*6(RW), RT0; \
+       vpxor RT0, c, a; \
+       vpbroadcastd 4*7(RW), RT0; \
+       vpxor RT0, d, b; \
+       \
+       vmovdqa RX0, c; \
+       vmovdqa RY0, d;
+
+#define outunpack_dec8(a,b,c,d) \
+       vpbroadcastd 4*0(RW), RT0; \
+       vpxor RT0, a, a; \
+       \
+       vpbroadcastd 4*1(RW), RT0; \
+       vpxor RT0, b, b; \
+       \
+       vpbroadcastd 4*2(RW), RT0; \
+       vpxor RT0, c, c; \
+       \
+       vpbroadcastd 4*3(RW), RT0; \
+       vpxor RT0, d, d;
+
+#define transpose4x4_16(a,b,c,d) \
+       transpose_4x4(a ## 0, b ## 0, c ## 0, d ## 0, RX0, RY0); \
+       transpose_4x4(a ## 1, b ## 1, c ## 1, d ## 1, RX0, RY0);
+
+#define inpack_enc16(a,b,c,d) \
+       inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
+       inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+#define outunpack_enc16(a,b,c,d) \
+       outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
+       outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+#define inpack_dec16(a,b,c,d) \
+       inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
+       inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+#define outunpack_dec16(a,b,c,d) \
+       outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
+       outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
+
+.align 8
+ELF(.type __twofish_enc_blk16,@function;)
+__twofish_enc_blk16:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+        *                                              plaintext blocks
+        * output:
+        *      RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+        *                                              ciphertext blocks
+        */
+       CFI_STARTPROC();
+       init_round_constants();
+
+       transpose4x4_16(RA, RB, RC, RD);
+       inpack_enc16(RA, RB, RC, RD);
+
+       encrypt_cycle_first16(0);
+       encrypt_cycle16(2);
+       encrypt_cycle16(4);
+       encrypt_cycle16(6);
+       encrypt_cycle16(8);
+       encrypt_cycle16(10);
+       encrypt_cycle16(12);
+       encrypt_cycle_last16(14);
+
+       outunpack_enc16(RA, RB, RC, RD);
+       transpose4x4_16(RA, RB, RC, RD);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __twofish_enc_blk16,.-__twofish_enc_blk16;)
+
+.align 8
+ELF(.type __twofish_dec_blk16,@function;)
+__twofish_dec_blk16:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+        *                                              plaintext blocks
+        * output:
+        *      RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: sixteen parallel
+        *                                              ciphertext blocks
+        */
+       CFI_STARTPROC();
+       init_round_constants();
+
+       transpose4x4_16(RA, RB, RC, RD);
+       inpack_dec16(RA, RB, RC, RD);
+
+       decrypt_cycle_first16(14);
+       decrypt_cycle16(12);
+       decrypt_cycle16(10);
+       decrypt_cycle16(8);
+       decrypt_cycle16(6);
+       decrypt_cycle16(4);
+       decrypt_cycle16(2);
+       decrypt_cycle_last16(0);
+
+       outunpack_dec16(RA, RB, RC, RD);
+       transpose4x4_16(RA, RB, RC, RD);
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size __twofish_dec_blk16,.-__twofish_dec_blk16;)
+
+#define inc_le128(x, minus_one, tmp) \
+       vpcmpeqq minus_one, x, tmp; \
+       vpsubq minus_one, x, x; \
+       vpslldq $8, tmp, tmp; \
+       vpsubq tmp, x, x;
+
+.align 8
+.globl _gcry_twofish_avx2_ctr_enc
+ELF(.type   _gcry_twofish_avx2_ctr_enc,@function;)
+_gcry_twofish_avx2_ctr_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv (big endian, 128bit)
+        */
+       CFI_STARTPROC();
+
+       movq 8(%rcx), %rax;
+       bswapq %rax;
+
+       vzeroupper;
+
+       vbroadcasti128 .Lbswap128_mask rRIP, RTMP3;
+       vpcmpeqd RNOT, RNOT, RNOT;
+       vpsrldq $8, RNOT, RNOT;   /* ab: -1:0 ; cd: -1:0 */
+       vpaddq RNOT, RNOT, RTMP2; /* ab: -2:0 ; cd: -2:0 */
+
+       /* load IV and byteswap */
+       vmovdqu (%rcx), RTMP4x;
+       vpshufb RTMP3x, RTMP4x, RTMP4x;
+       vmovdqa RTMP4x, RTMP0x;
+       inc_le128(RTMP4x, RNOTx, RTMP1x);
+       vinserti128 $1, RTMP4x, RTMP0, RTMP0;
+       vpshufb RTMP3, RTMP0, RA0; /* +1 ; +0 */
+
+       /* check need for handling 64-bit overflow and carry */
+       cmpq $(0xffffffffffffffff - 16), %rax;
+       ja .Lhandle_ctr_carry;
+
+       /* construct IVs */
+       vpsubq RTMP2, RTMP0, RTMP0; /* +3 ; +2 */
+       vpshufb RTMP3, RTMP0, RB0;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +5 ; +4 */
+       vpshufb RTMP3, RTMP0, RC0;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +7 ; +6 */
+       vpshufb RTMP3, RTMP0, RD0;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +9 ; +8 */
+       vpshufb RTMP3, RTMP0, RA1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +11 ; +10 */
+       vpshufb RTMP3, RTMP0, RB1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +13 ; +12 */
+       vpshufb RTMP3, RTMP0, RC1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +15 ; +14 */
+       vpshufb RTMP3, RTMP0, RD1;
+       vpsubq RTMP2, RTMP0, RTMP0; /* +16 */
+       vpshufb RTMP3x, RTMP0x, RTMP0x;
+
+       jmp .Lctr_carry_done;
+
+.Lhandle_ctr_carry:
+       /* construct IVs */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB0; /* +3 ; +2 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RC0; /* +5 ; +4 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RD0; /* +7 ; +6 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RA1; /* +9 ; +8 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RB1; /* +11 ; +10 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RC1; /* +13 ; +12 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vpshufb RTMP3, RTMP0, RD1; /* +15 ; +14 */
+       inc_le128(RTMP0, RNOT, RTMP1);
+       vextracti128 $1, RTMP0, RTMP0x;
+       vpshufb RTMP3x, RTMP0x, RTMP0x; /* +16 */
+
+.align 4
+.Lctr_carry_done:
+       /* store new IV */
+       vmovdqu RTMP0x, (%rcx);
+
+       call __twofish_enc_blk16;
+
+       vpxor (0 * 32)(%rdx), RA0, RA0;
+       vpxor (1 * 32)(%rdx), RB0, RB0;
+       vpxor (2 * 32)(%rdx), RC0, RC0;
+       vpxor (3 * 32)(%rdx), RD0, RD0;
+       vpxor (4 * 32)(%rdx), RA1, RA1;
+       vpxor (5 * 32)(%rdx), RB1, RB1;
+       vpxor (6 * 32)(%rdx), RC1, RC1;
+       vpxor (7 * 32)(%rdx), RD1, RD1;
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RB0, (1 * 32)(%rsi);
+       vmovdqu RC0, (2 * 32)(%rsi);
+       vmovdqu RD0, (3 * 32)(%rsi);
+       vmovdqu RA1, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RC1, (6 * 32)(%rsi);
+       vmovdqu RD1, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ctr_enc,.-_gcry_twofish_avx2_ctr_enc;)
+
+.align 8
+.globl _gcry_twofish_avx2_cbc_dec
+ELF(.type   _gcry_twofish_avx2_cbc_dec,@function;)
+_gcry_twofish_avx2_cbc_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       vmovdqu (0 * 32)(%rdx), RA0;
+       vmovdqu (1 * 32)(%rdx), RB0;
+       vmovdqu (2 * 32)(%rdx), RC0;
+       vmovdqu (3 * 32)(%rdx), RD0;
+       vmovdqu (4 * 32)(%rdx), RA1;
+       vmovdqu (5 * 32)(%rdx), RB1;
+       vmovdqu (6 * 32)(%rdx), RC1;
+       vmovdqu (7 * 32)(%rdx), RD1;
+
+       call __twofish_dec_blk16;
+
+       vmovdqu (%rcx), RNOTx;
+       vinserti128 $1, (%rdx), RNOT, RNOT;
+       vpxor RNOT, RA0, RA0;
+       vpxor (0 * 32 + 16)(%rdx), RB0, RB0;
+       vpxor (1 * 32 + 16)(%rdx), RC0, RC0;
+       vpxor (2 * 32 + 16)(%rdx), RD0, RD0;
+       vpxor (3 * 32 + 16)(%rdx), RA1, RA1;
+       vpxor (4 * 32 + 16)(%rdx), RB1, RB1;
+       vpxor (5 * 32 + 16)(%rdx), RC1, RC1;
+       vpxor (6 * 32 + 16)(%rdx), RD1, RD1;
+       vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+       vmovdqu RNOTx, (%rcx); /* store new IV */
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RB0, (1 * 32)(%rsi);
+       vmovdqu RC0, (2 * 32)(%rsi);
+       vmovdqu RD0, (3 * 32)(%rsi);
+       vmovdqu RA1, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RC1, (6 * 32)(%rsi);
+       vmovdqu RD1, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_cbc_dec,.-_gcry_twofish_avx2_cbc_dec;)
+
+.align 8
+.globl _gcry_twofish_avx2_cfb_dec
+ELF(.type   _gcry_twofish_avx2_cfb_dec,@function;)
+_gcry_twofish_avx2_cfb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: iv
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       /* Load input */
+       vmovdqu (%rcx), RNOTx;
+       vinserti128 $1, (%rdx), RNOT, RA0;
+       vmovdqu (0 * 32 + 16)(%rdx), RB0;
+       vmovdqu (1 * 32 + 16)(%rdx), RC0;
+       vmovdqu (2 * 32 + 16)(%rdx), RD0;
+       vmovdqu (3 * 32 + 16)(%rdx), RA1;
+       vmovdqu (4 * 32 + 16)(%rdx), RB1;
+       vmovdqu (5 * 32 + 16)(%rdx), RC1;
+       vmovdqu (6 * 32 + 16)(%rdx), RD1;
+
+       /* Update IV */
+       vmovdqu (7 * 32 + 16)(%rdx), RNOTx;
+       vmovdqu RNOTx, (%rcx);
+
+       call __twofish_enc_blk16;
+
+       vpxor (0 * 32)(%rdx), RA0, RA0;
+       vpxor (1 * 32)(%rdx), RB0, RB0;
+       vpxor (2 * 32)(%rdx), RC0, RC0;
+       vpxor (3 * 32)(%rdx), RD0, RD0;
+       vpxor (4 * 32)(%rdx), RA1, RA1;
+       vpxor (5 * 32)(%rdx), RB1, RB1;
+       vpxor (6 * 32)(%rdx), RC1, RC1;
+       vpxor (7 * 32)(%rdx), RD1, RD1;
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RB0, (1 * 32)(%rsi);
+       vmovdqu RC0, (2 * 32)(%rsi);
+       vmovdqu RD0, (3 * 32)(%rsi);
+       vmovdqu RA1, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RC1, (6 * 32)(%rsi);
+       vmovdqu RD1, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_cfb_dec,.-_gcry_twofish_avx2_cfb_dec;)
+
+.align 8
+.globl _gcry_twofish_avx2_ocb_enc
+ELF(.type _gcry_twofish_avx2_ocb_enc,@function;)
+
+_gcry_twofish_avx2_ocb_enc:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rcx), RTMP0x;
+       vmovdqu (%r8), RTMP1x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RTMP1, RTMP1; \
+         vpxor yreg, RNOT, yreg; \
+         vmovdqu RNOT, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RB0);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, RC0);
+       OCB_INPUT(3, %r12, %r13, RD0);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, RA1);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, RC1);
+       OCB_INPUT(7, %r12, %r13, RD1);
+#undef OCB_INPUT
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vmovdqu RTMP0x, (%rcx);
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%r8);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __twofish_enc_blk16;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vpxor (0 * 32)(%rsi), RA0, RA0;
+       vpxor (1 * 32)(%rsi), RB0, RB0;
+       vpxor (2 * 32)(%rsi), RC0, RC0;
+       vpxor (3 * 32)(%rsi), RD0, RD0;
+       vpxor (4 * 32)(%rsi), RA1, RA1;
+       vpxor (5 * 32)(%rsi), RB1, RB1;
+       vpxor (6 * 32)(%rsi), RC1, RC1;
+       vpxor (7 * 32)(%rsi), RD1, RD1;
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vmovdqu RB0, (1 * 32)(%rsi);
+       vmovdqu RC0, (2 * 32)(%rsi);
+       vmovdqu RD0, (3 * 32)(%rsi);
+       vmovdqu RA1, (4 * 32)(%rsi);
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vmovdqu RC1, (6 * 32)(%rsi);
+       vmovdqu RD1, (7 * 32)(%rsi);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ocb_enc,.-_gcry_twofish_avx2_ocb_enc;)
+
+.align 8
+.globl _gcry_twofish_avx2_ocb_dec
+ELF(.type _gcry_twofish_avx2_ocb_dec,@function;)
+
+_gcry_twofish_avx2_ocb_dec:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: dst (16 blocks)
+        *      %rdx: src (16 blocks)
+        *      %rcx: offset
+        *      %r8 : checksum
+        *      %r9 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rcx), RTMP0x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rdx), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RNOT, yreg; \
+         vmovdqu RNOT, (n * 32)(%rsi);
+
+       movq (0 * 8)(%r9), %r10;
+       movq (1 * 8)(%r9), %r11;
+       movq (2 * 8)(%r9), %r12;
+       movq (3 * 8)(%r9), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RB0);
+       movq (4 * 8)(%r9), %r10;
+       movq (5 * 8)(%r9), %r11;
+       movq (6 * 8)(%r9), %r12;
+       movq (7 * 8)(%r9), %r13;
+       OCB_INPUT(2, %r10, %r11, RC0);
+       OCB_INPUT(3, %r12, %r13, RD0);
+       movq (8 * 8)(%r9), %r10;
+       movq (9 * 8)(%r9), %r11;
+       movq (10 * 8)(%r9), %r12;
+       movq (11 * 8)(%r9), %r13;
+       OCB_INPUT(4, %r10, %r11, RA1);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r9), %r10;
+       movq (13 * 8)(%r9), %r11;
+       movq (14 * 8)(%r9), %r12;
+       movq (15 * 8)(%r9), %r13;
+       OCB_INPUT(6, %r10, %r11, RC1);
+       OCB_INPUT(7, %r12, %r13, RD1);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0x, (%rcx);
+       mov %r8, %rcx
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __twofish_dec_blk16;
+
+       vmovdqu (%rcx), RTMP1x;
+
+       vpxor (0 * 32)(%rsi), RA0, RA0;
+       vpxor (1 * 32)(%rsi), RB0, RB0;
+       vpxor (2 * 32)(%rsi), RC0, RC0;
+       vpxor (3 * 32)(%rsi), RD0, RD0;
+       vpxor (4 * 32)(%rsi), RA1, RA1;
+       vpxor (5 * 32)(%rsi), RB1, RB1;
+       vpxor (6 * 32)(%rsi), RC1, RC1;
+       vpxor (7 * 32)(%rsi), RD1, RD1;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       /* Checksum_i = Checksum_{i-1} xor P_i  */
+
+       vmovdqu RA0, (0 * 32)(%rsi);
+       vpxor RA0, RTMP1, RTMP1;
+       vmovdqu RB0, (1 * 32)(%rsi);
+       vpxor RB0, RTMP1, RTMP1;
+       vmovdqu RC0, (2 * 32)(%rsi);
+       vpxor RC0, RTMP1, RTMP1;
+       vmovdqu RD0, (3 * 32)(%rsi);
+       vpxor RD0, RTMP1, RTMP1;
+       vmovdqu RA1, (4 * 32)(%rsi);
+       vpxor RA1, RTMP1, RTMP1;
+       vmovdqu RB1, (5 * 32)(%rsi);
+       vpxor RB1, RTMP1, RTMP1;
+       vmovdqu RC1, (6 * 32)(%rsi);
+       vpxor RC1, RTMP1, RTMP1;
+       vmovdqu RD1, (7 * 32)(%rsi);
+       vpxor RD1, RTMP1, RTMP1;
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%rcx);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ocb_dec,.-_gcry_twofish_avx2_ocb_dec;)
+
+.align 8
+.globl _gcry_twofish_avx2_ocb_auth
+ELF(.type _gcry_twofish_avx2_ocb_auth,@function;)
+
+_gcry_twofish_avx2_ocb_auth:
+       /* input:
+        *      %rdi: ctx, CTX
+        *      %rsi: abuf (16 blocks)
+        *      %rdx: offset
+        *      %rcx: checksum
+        *      %r8 : L pointers (void *L[16])
+        */
+       CFI_STARTPROC();
+
+       vzeroupper;
+
+       subq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(4 * 8);
+
+       movq %r10, (0 * 8)(%rsp);
+       movq %r11, (1 * 8)(%rsp);
+       movq %r12, (2 * 8)(%rsp);
+       movq %r13, (3 * 8)(%rsp);
+       CFI_REL_OFFSET(%r10, 0 * 8);
+       CFI_REL_OFFSET(%r11, 1 * 8);
+       CFI_REL_OFFSET(%r12, 2 * 8);
+       CFI_REL_OFFSET(%r13, 3 * 8);
+
+       vmovdqu (%rdx), RTMP0x;
+
+       /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */
+       /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i)  */
+
+#define OCB_INPUT(n, l0reg, l1reg, yreg) \
+         vmovdqu (n * 32)(%rsi), yreg; \
+         vpxor (l0reg), RTMP0x, RNOTx; \
+         vpxor (l1reg), RNOTx, RTMP0x; \
+         vinserti128 $1, RTMP0x, RNOT, RNOT; \
+         vpxor yreg, RNOT, yreg;
+
+       movq (0 * 8)(%r8), %r10;
+       movq (1 * 8)(%r8), %r11;
+       movq (2 * 8)(%r8), %r12;
+       movq (3 * 8)(%r8), %r13;
+       OCB_INPUT(0, %r10, %r11, RA0);
+       OCB_INPUT(1, %r12, %r13, RB0);
+       movq (4 * 8)(%r8), %r10;
+       movq (5 * 8)(%r8), %r11;
+       movq (6 * 8)(%r8), %r12;
+       movq (7 * 8)(%r8), %r13;
+       OCB_INPUT(2, %r10, %r11, RC0);
+       OCB_INPUT(3, %r12, %r13, RD0);
+       movq (8 * 8)(%r8), %r10;
+       movq (9 * 8)(%r8), %r11;
+       movq (10 * 8)(%r8), %r12;
+       movq (11 * 8)(%r8), %r13;
+       OCB_INPUT(4, %r10, %r11, RA1);
+       OCB_INPUT(5, %r12, %r13, RB1);
+       movq (12 * 8)(%r8), %r10;
+       movq (13 * 8)(%r8), %r11;
+       movq (14 * 8)(%r8), %r12;
+       movq (15 * 8)(%r8), %r13;
+       OCB_INPUT(6, %r10, %r11, RC1);
+       OCB_INPUT(7, %r12, %r13, RD1);
+#undef OCB_INPUT
+
+       vmovdqu RTMP0x, (%rdx);
+
+       movq (0 * 8)(%rsp), %r10;
+       movq (1 * 8)(%rsp), %r11;
+       movq (2 * 8)(%rsp), %r12;
+       movq (3 * 8)(%rsp), %r13;
+       CFI_RESTORE(%r10);
+       CFI_RESTORE(%r11);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+
+       call __twofish_enc_blk16;
+
+       vpxor RA0, RB0, RA0;
+       vpxor RC0, RD0, RC0;
+       vpxor RA1, RB1, RA1;
+       vpxor RC1, RD1, RC1;
+
+       vpxor RA0, RC0, RA0;
+       vpxor RA1, RC1, RA1;
+
+       addq $(4 * 8), %rsp;
+       CFI_ADJUST_CFA_OFFSET(-4 * 8);
+
+       vpxor RA1, RA0, RTMP1;
+
+       vextracti128 $1, RTMP1, RNOTx;
+       vpxor (%rcx), RTMP1x, RTMP1x;
+       vpxor RNOTx, RTMP1x, RTMP1x;
+       vmovdqu RTMP1x, (%rcx);
+
+       vzeroall;
+
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_twofish_avx2_ocb_auth,.-_gcry_twofish_avx2_ocb_auth;)
+
+.align 16
+
+/* For CTR-mode IV byteswap */
+ _gcry_twofish_bswap128_mask:
+.Lbswap128_mask:
+       .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+ELF(.size _gcry_twofish_bswap128_mask,.-_gcry_twofish_bswap128_mask;)
+
+#endif /*defined(USE_TWOFISH) && defined(ENABLE_AVX2_SUPPORT)*/
+#endif /*__x86_64*/
diff --git a/grub-core/lib/libgcrypt/cipher/twofish.c 
b/grub-core/lib/libgcrypt/cipher/twofish.c
index f1a93ca88..d19e07904 100644
--- a/grub-core/lib/libgcrypt/cipher/twofish.c
+++ b/grub-core/lib/libgcrypt/cipher/twofish.c
@@ -44,18 +44,90 @@
 #include "types.h"  /* for byte and u32 typedefs */
 #include "g10lib.h"
 #include "cipher.h"
+#include "bufhelp.h"
+#include "cipher-internal.h"
+#include "cipher-selftest.h"
+
+
+#define TWOFISH_BLOCKSIZE 16
+
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+/* USE_ARM_ASM indicates whether to use ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__)
+# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+#  define USE_ARM_ASM 1
+# endif
+#endif
+# if defined(__AARCH64EL__)
+#  ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+#   define USE_ARM_ASM 1
+#  endif
+# endif
+
+/* USE_AVX2 indicates whether to compile with AMD64 AVX2 code. */
+#undef USE_AVX2
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# if defined(ENABLE_AVX2_SUPPORT)
+#  define USE_AVX2 1
+# endif
+#endif
+
 
 /* Prototype for the self-test function. */
 static const char *selftest(void);
 
+
+/* Prototypes for the bulk functions. */
+static void _gcry_twofish_ctr_enc (void *context, unsigned char *ctr,
+                                  void *outbuf_arg, const void *inbuf_arg,
+                                  size_t nblocks);
+static void _gcry_twofish_cbc_dec (void *context, unsigned char *iv,
+                                  void *outbuf_arg, const void *inbuf_arg,
+                                  size_t nblocks);
+static void _gcry_twofish_cfb_dec (void *context, unsigned char *iv,
+                                  void *outbuf_arg, const void *inbuf_arg,
+                                  size_t nblocks);
+static size_t _gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                                      const void *inbuf_arg, size_t nblocks,
+                                      int encrypt);
+static size_t _gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                                     size_t nblocks);
+
+
 /* Structure for an expanded Twofish key.  s contains the key-dependent
  * S-boxes composed with the MDS matrix; w contains the eight "whitening"
  * subkeys, K[0] through K[7]. k holds the remaining, "round" subkeys.  Note
  * that k[i] corresponds to what the Twofish paper calls K[i+8]. */
 typedef struct {
    u32 s[4][256], w[8], k[32];
+
+#ifdef USE_AVX2
+  int use_avx2;
+#endif
 } TWOFISH_context;
 
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#if defined(USE_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+#  define ASM_FUNC_ABI __attribute__((sysv_abi))
+# else
+#  define ASM_FUNC_ABI
+# endif
+#endif
+
+
 /* These two tables are the q0 and q1 permutations, exactly as described in
  * the Twofish paper. */
 
@@ -333,7 +405,8 @@ static const u32 mds[4][256] = {
  * see a non-horrible way of avoiding them, and I did manage to group the
  * statements so that each if covers four group multiplications. */
 
-static const byte poly_to_exp[255] = {
+static const u16 poly_to_exp[256] = {
+   492,
    0x00, 0x01, 0x17, 0x02, 0x2E, 0x18, 0x53, 0x03, 0x6A, 0x2F, 0x93, 0x19,
    0x34, 0x54, 0x45, 0x04, 0x5C, 0x6B, 0xB6, 0x30, 0xA6, 0x94, 0x4B, 0x1A,
    0x8C, 0x35, 0x81, 0x55, 0xAA, 0x46, 0x0D, 0x05, 0x24, 0x5D, 0x87, 0x6C,
@@ -358,7 +431,7 @@ static const byte poly_to_exp[255] = {
    0x85, 0xC8, 0xA1
 };
 
-static const byte exp_to_poly[492] = {
+static const byte exp_to_poly[492 + 256] = {
    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x4D, 0x9A, 0x79, 0xF2,
    0xA9, 0x1F, 0x3E, 0x7C, 0xF8, 0xBD, 0x37, 0x6E, 0xDC, 0xF5, 0xA7, 0x03,
    0x06, 0x0C, 0x18, 0x30, 0x60, 0xC0, 0xCD, 0xD7, 0xE3, 0x8B, 0x5B, 0xB6,
@@ -399,7 +472,7 @@ static const byte exp_to_poly[492] = {
    0x3F, 0x7E, 0xFC, 0xB5, 0x27, 0x4E, 0x9C, 0x75, 0xEA, 0x99, 0x7F, 0xFE,
    0xB1, 0x2F, 0x5E, 0xBC, 0x35, 0x6A, 0xD4, 0xE5, 0x87, 0x43, 0x86, 0x41,
    0x82, 0x49, 0x92, 0x69, 0xD2, 0xE9, 0x9F, 0x73, 0xE6, 0x81, 0x4F, 0x9E,
-   0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB
+   0x71, 0xE2, 0x89, 0x5F, 0xBE, 0x31, 0x62, 0xC4, 0xC5, 0xC7, 0xC3, 0xCB,
 };
 
 
@@ -471,14 +544,15 @@ static byte calc_sb_tbl[512] = {
     0x6F, 0x16, 0x9D, 0x25, 0x36, 0x86, 0x42, 0x56,
     0x4A, 0x55, 0x5E, 0x09, 0xC1, 0xBE, 0xE0, 0x91
 };
+
 /* Macro to perform one column of the RS matrix multiplication.  The
  * parameters a, b, c, and d are the four bytes of output; i is the index
  * of the key bytes, and w, x, y, and z, are the column of constants from
  * the RS matrix, preprocessed through the poly_to_exp table. */
 
 #define CALC_S(a, b, c, d, i, w, x, y, z) \
-   if (key[i]) { \
-      tmp = poly_to_exp[key[i] - 1]; \
+   { \
+      tmp = poly_to_exp[key[i]]; \
       (a) ^= exp_to_poly[tmp + (w)]; \
       (b) ^= exp_to_poly[tmp + (x)]; \
       (c) ^= exp_to_poly[tmp + (y)]; \
@@ -577,7 +651,7 @@ do_twofish_setkey (TWOFISH_context *ctx, const byte *key, 
const unsigned keylen)
   byte si = 0, sj = 0, sk = 0, sl = 0, sm = 0, sn = 0, so = 0, sp = 0;
 
   /* Temporary for CALC_S. */
-  byte tmp;
+  unsigned int tmp;
 
   /* Flags for self-test. */
   static int initialized = 0;
@@ -645,28 +719,15 @@ do_twofish_setkey (TWOFISH_context *ctx, const byte *key, 
const unsigned keylen)
           CALC_SB256_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
        }
 
-      /* Calculate whitening and round subkeys.  The constants are
-       * indices of subkeys, preprocessed through q0 and q1. */
-      CALC_K256 (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-      CALC_K256 (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-      CALC_K256 (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-      CALC_K256 (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-      CALC_K256 (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-      CALC_K256 (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-      CALC_K256 (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-      CALC_K256 (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-      CALC_K256 (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-      CALC_K256 (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-      CALC_K256 (k, 12, 0x18, 0x37, 0xF7, 0x71);
-      CALC_K256 (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-      CALC_K256 (k, 16, 0x43, 0x30, 0x75, 0x0F);
-      CALC_K256 (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-      CALC_K256 (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-      CALC_K256 (k, 22, 0x94, 0x06, 0x48, 0x3F);
-      CALC_K256 (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-      CALC_K256 (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-      CALC_K256 (k, 28, 0x84, 0x8A, 0x54, 0x00);
-      CALC_K256 (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+      /* Calculate whitening and round subkeys. */
+      for (i = 0; i < 8; i += 2)
+       {
+         CALC_K256 ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+       }
+      for (j = 0; j < 32; j += 2, i += 2)
+       {
+         CALC_K256 ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+       }
     }
   else
     {
@@ -676,44 +737,190 @@ do_twofish_setkey (TWOFISH_context *ctx, const byte 
*key, const unsigned keylen)
           CALC_SB_2( i, calc_sb_tbl[j], calc_sb_tbl[k] );
         }
 
-      /* Calculate whitening and round subkeys.  The constants are
-       * indices of subkeys, preprocessed through q0 and q1. */
-      CALC_K (w, 0, 0xA9, 0x75, 0x67, 0xF3);
-      CALC_K (w, 2, 0xB3, 0xC6, 0xE8, 0xF4);
-      CALC_K (w, 4, 0x04, 0xDB, 0xFD, 0x7B);
-      CALC_K (w, 6, 0xA3, 0xFB, 0x76, 0xC8);
-      CALC_K (k, 0, 0x9A, 0x4A, 0x92, 0xD3);
-      CALC_K (k, 2, 0x80, 0xE6, 0x78, 0x6B);
-      CALC_K (k, 4, 0xE4, 0x45, 0xDD, 0x7D);
-      CALC_K (k, 6, 0xD1, 0xE8, 0x38, 0x4B);
-      CALC_K (k, 8, 0x0D, 0xD6, 0xC6, 0x32);
-      CALC_K (k, 10, 0x35, 0xD8, 0x98, 0xFD);
-      CALC_K (k, 12, 0x18, 0x37, 0xF7, 0x71);
-      CALC_K (k, 14, 0xEC, 0xF1, 0x6C, 0xE1);
-      CALC_K (k, 16, 0x43, 0x30, 0x75, 0x0F);
-      CALC_K (k, 18, 0x37, 0xF8, 0x26, 0x1B);
-      CALC_K (k, 20, 0xFA, 0x87, 0x13, 0xFA);
-      CALC_K (k, 22, 0x94, 0x06, 0x48, 0x3F);
-      CALC_K (k, 24, 0xF2, 0x5E, 0xD0, 0xBA);
-      CALC_K (k, 26, 0x8B, 0xAE, 0x30, 0x5B);
-      CALC_K (k, 28, 0x84, 0x8A, 0x54, 0x00);
-      CALC_K (k, 30, 0xDF, 0xBC, 0x23, 0x9D);
+      /* Calculate whitening and round subkeys. */
+      for (i = 0; i < 8; i += 2)
+       {
+         CALC_K ( w, i, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+       }
+      for (j = 0; j < 32; j += 2, i += 2)
+       {
+         CALC_K ( k, j, q0[i], q1[i], q0[i + 1], q1[i + 1] );
+       }
     }
 
   return 0;
 }
 
 static gcry_err_code_t
-twofish_setkey (void *context, const byte *key, unsigned int keylen)
+twofish_setkey (void *context, const byte *key, unsigned int keylen,
+                cipher_bulk_ops_t *bulk_ops)
 {
   TWOFISH_context *ctx = context;
-  int rc = do_twofish_setkey (ctx, key, keylen);
+  unsigned int hwfeatures = _gcry_get_hw_features ();
+  int rc;
+
+  rc = do_twofish_setkey (ctx, key, keylen);
+
+#ifdef USE_AVX2
+  ctx->use_avx2 = 0;
+  if ((hwfeatures & HWF_INTEL_AVX2) && (hwfeatures & HWF_INTEL_FAST_VPGATHER))
+    {
+      ctx->use_avx2 = 1;
+    }
+#endif
+
+  /* Setup bulk encryption routines.  */
+  memset (bulk_ops, 0, sizeof(*bulk_ops));
+  bulk_ops->cbc_dec = _gcry_twofish_cbc_dec;
+  bulk_ops->cfb_dec = _gcry_twofish_cfb_dec;
+  bulk_ops->ctr_enc = _gcry_twofish_ctr_enc;
+  bulk_ops->ocb_crypt = _gcry_twofish_ocb_crypt;
+  bulk_ops->ocb_auth  = _gcry_twofish_ocb_auth;
+
+  (void)hwfeatures;
+
   _gcry_burn_stack (23+6*sizeof(void*));
   return rc;
 }
 
 
+#ifdef USE_AVX2
+/* Assembler implementations of Twofish using AVX2.  Process 16 block in
+   parallel.
+ */
+extern void _gcry_twofish_avx2_ctr_enc(const TWOFISH_context *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *ctr) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_cbc_dec(const TWOFISH_context *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_cfb_dec(const TWOFISH_context *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *iv) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_ocb_enc(const TWOFISH_context *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_ocb_dec(const TWOFISH_context *ctx,
+                                      unsigned char *out,
+                                      const unsigned char *in,
+                                      unsigned char *offset,
+                                      unsigned char *checksum,
+                                      const u64 Ls[16]) ASM_FUNC_ABI;
+
+extern void _gcry_twofish_avx2_ocb_auth(const TWOFISH_context *ctx,
+                                       const unsigned char *abuf,
+                                       unsigned char *offset,
+                                       unsigned char *checksum,
+                                       const u64 Ls[16]) ASM_FUNC_ABI;
+#endif
+
 
+#ifdef USE_AMD64_ASM
+
+/* Assembly implementations of Twofish. */
+extern void _gcry_twofish_amd64_encrypt_block(const TWOFISH_context *c,
+                                             byte *out, const byte *in);
+
+extern void _gcry_twofish_amd64_decrypt_block(const TWOFISH_context *c,
+                                             byte *out, const byte *in);
+
+/* These assembly implementations process three blocks in parallel. */
+extern void _gcry_twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out,
+                                       const byte *in, byte *ctr);
+
+extern void _gcry_twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out,
+                                       const byte *in, byte *iv);
+
+extern void _gcry_twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out,
+                                       const byte *in, byte *iv);
+
+extern void _gcry_twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out,
+                                       const byte *in, byte *offset,
+                                       byte *checksum, const u64 Ls[3]);
+
+extern void _gcry_twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out,
+                                       const byte *in, byte *offset,
+                                       byte *checksum, const u64 Ls[3]);
+
+extern void _gcry_twofish_amd64_ocb_auth(const TWOFISH_context *ctx,
+                                        const byte *abuf, byte *offset,
+                                        byte *checksum, const u64 Ls[3]);
+
+static inline void
+twofish_amd64_encrypt_block(const TWOFISH_context *c, byte *out, const byte 
*in)
+{
+  _gcry_twofish_amd64_encrypt_block(c, out, in);
+}
+
+static inline void
+twofish_amd64_decrypt_block(const TWOFISH_context *c, byte *out, const byte 
*in)
+{
+  _gcry_twofish_amd64_decrypt_block(c, out, in);
+}
+
+static inline void
+twofish_amd64_ctr_enc(const TWOFISH_context *c, byte *out, const byte *in,
+                      byte *ctr)
+{
+  _gcry_twofish_amd64_ctr_enc(c, out, in, ctr);
+}
+
+static inline void
+twofish_amd64_cbc_dec(const TWOFISH_context *c, byte *out, const byte *in,
+                      byte *iv)
+{
+  _gcry_twofish_amd64_cbc_dec(c, out, in, iv);
+}
+
+static inline void
+twofish_amd64_cfb_dec(const TWOFISH_context *c, byte *out, const byte *in,
+                      byte *iv)
+{
+  _gcry_twofish_amd64_cfb_dec(c, out, in, iv);
+}
+
+static inline void
+twofish_amd64_ocb_enc(const TWOFISH_context *ctx, byte *out, const byte *in,
+                     byte *offset, byte *checksum, const u64 Ls[3])
+{
+  _gcry_twofish_amd64_ocb_enc(ctx, out, in, offset, checksum, Ls);
+}
+
+static inline void
+twofish_amd64_ocb_dec(const TWOFISH_context *ctx, byte *out, const byte *in,
+                     byte *offset, byte *checksum, const u64 Ls[3])
+{
+  _gcry_twofish_amd64_ocb_dec(ctx, out, in, offset, checksum, Ls);
+}
+
+static inline void
+twofish_amd64_ocb_auth(const TWOFISH_context *ctx, const byte *abuf,
+                      byte *offset, byte *checksum, const u64 Ls[3])
+{
+  _gcry_twofish_amd64_ocb_auth(ctx, abuf, offset, checksum, Ls);
+}
+
+#elif defined(USE_ARM_ASM)
+
+/* Assembly implementations of Twofish. */
+extern void _gcry_twofish_arm_encrypt_block(const TWOFISH_context *c,
+                                             byte *out, const byte *in);
+
+extern void _gcry_twofish_arm_decrypt_block(const TWOFISH_context *c,
+                                             byte *out, const byte *in);
+
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
 /* Macros to compute the g() function in the encryption and decryption
  * rounds.  G1 is the straight g() function; G2 includes the 8-bit
  * rotation for the high 32-bit word. */
@@ -764,16 +971,40 @@ twofish_setkey (void *context, const byte *key, unsigned 
int keylen)
  * whitening subkey number m. */
 
 #define INPACK(n, x, m) \
-   x = in[4 * (n)] ^ (in[4 * (n) + 1] << 8) \
-     ^ (in[4 * (n) + 2] << 16) ^ (in[4 * (n) + 3] << 24) ^ ctx->w[m]
+   x = buf_get_le32(in + (n) * 4); \
+   x ^= ctx->w[m]
 
 #define OUTUNPACK(n, x, m) \
    x ^= ctx->w[m]; \
-   out[4 * (n)] = x; out[4 * (n) + 1] = x >> 8; \
-   out[4 * (n) + 2] = x >> 16; out[4 * (n) + 3] = x >> 24
+   buf_put_le32(out + (n) * 4, x)
+
+#endif /*!USE_AMD64_ASM*/
+
 
 /* Encrypt one block.  in and out may be the same. */
 
+#ifdef USE_AMD64_ASM
+
+static unsigned int
+twofish_encrypt (void *context, byte *out, const byte *in)
+{
+  TWOFISH_context *ctx = context;
+  twofish_amd64_encrypt_block(ctx, out, in);
+  return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#elif defined(USE_ARM_ASM)
+
+static unsigned int
+twofish_encrypt (void *context, byte *out, const byte *in)
+{
+  TWOFISH_context *ctx = context;
+  _gcry_twofish_arm_encrypt_block(ctx, out, in);
+  return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
 static void
 do_twofish_encrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
 {
@@ -806,17 +1037,41 @@ do_twofish_encrypt (const TWOFISH_context *ctx, byte 
*out, const byte *in)
   OUTUNPACK (3, b, 7);
 }
 
-static void
+static unsigned int
 twofish_encrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
   do_twofish_encrypt (ctx, out, in);
-  _gcry_burn_stack (24+3*sizeof (void*));
+  return /*burn_stack*/ (24+3*sizeof (void*));
 }
 
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
 
 /* Decrypt one block.  in and out may be the same. */
 
+#ifdef USE_AMD64_ASM
+
+static unsigned int
+twofish_decrypt (void *context, byte *out, const byte *in)
+{
+  TWOFISH_context *ctx = context;
+  twofish_amd64_decrypt_block(ctx, out, in);
+  return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#elif defined(USE_ARM_ASM)
+
+static unsigned int
+twofish_decrypt (void *context, byte *out, const byte *in)
+{
+  TWOFISH_context *ctx = context;
+  _gcry_twofish_arm_decrypt_block(ctx, out, in);
+  return /*burn_stack*/ (4*sizeof (void*));
+}
+
+#else /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
 static void
 do_twofish_decrypt (const TWOFISH_context *ctx, byte *out, const byte *in)
 {
@@ -849,13 +1104,499 @@ do_twofish_decrypt (const TWOFISH_context *ctx, byte 
*out, const byte *in)
   OUTUNPACK (3, d, 3);
 }
 
-static void
+static unsigned int
 twofish_decrypt (void *context, byte *out, const byte *in)
 {
   TWOFISH_context *ctx = context;
 
   do_twofish_decrypt (ctx, out, in);
-  _gcry_burn_stack (24+3*sizeof (void*));
+  return /*burn_stack*/ (24+3*sizeof (void*));
+}
+
+#endif /*!USE_AMD64_ASM && !USE_ARM_ASM*/
+
+
+
+/* Bulk encryption of complete blocks in CTR mode.  This function is only
+   intended for the bulk encryption feature of cipher.c.  CTR is expected to be
+   of size TWOFISH_BLOCKSIZE. */
+static void
+_gcry_twofish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg,
+                     const void *inbuf_arg, size_t nblocks)
+{
+  TWOFISH_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char tmpbuf[TWOFISH_BLOCKSIZE];
+  unsigned int burn, burn_stack_depth = 0;
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_twofish_avx2_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+          nblocks -= 16;
+          outbuf += 16 * TWOFISH_BLOCKSIZE;
+          inbuf  += 16 * TWOFISH_BLOCKSIZE;
+          did_use_avx2 = 1;
+        }
+
+      if (did_use_avx2)
+        {
+          /* twofish-avx2 assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+    }
+#endif
+
+#ifdef USE_AMD64_ASM
+  {
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+        twofish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr);
+
+        nblocks -= 3;
+        outbuf += 3 * TWOFISH_BLOCKSIZE;
+        inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+        burn = 8 * sizeof(void*);
+        if (burn > burn_stack_depth)
+          burn_stack_depth = burn;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+    /* TODO: use caching instead? */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* Encrypt the counter. */
+      burn = twofish_encrypt(ctx, tmpbuf, ctr);
+      if (burn > burn_stack_depth)
+        burn_stack_depth = burn;
+
+      /* XOR the input with the encrypted counter and store in output.  */
+      cipher_block_xor(outbuf, tmpbuf, inbuf, TWOFISH_BLOCKSIZE);
+      outbuf += TWOFISH_BLOCKSIZE;
+      inbuf  += TWOFISH_BLOCKSIZE;
+      /* Increment the counter.  */
+      cipher_block_add(ctr, 1, TWOFISH_BLOCKSIZE);
+    }
+
+  wipememory(tmpbuf, sizeof(tmpbuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CBC mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_twofish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                     const void *inbuf_arg, size_t nblocks)
+{
+  TWOFISH_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned char savebuf[TWOFISH_BLOCKSIZE];
+  unsigned int burn, burn_stack_depth = 0;
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_twofish_avx2_cbc_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * TWOFISH_BLOCKSIZE;
+          inbuf  += 16 * TWOFISH_BLOCKSIZE;
+          did_use_avx2 = 1;
+        }
+
+      if (did_use_avx2)
+        {
+          /* twofish-avx2 assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+    }
+#endif
+
+#ifdef USE_AMD64_ASM
+  {
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+        twofish_amd64_cbc_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 3;
+        outbuf += 3 * TWOFISH_BLOCKSIZE;
+        inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+        burn = 9 * sizeof(void*);
+        if (burn > burn_stack_depth)
+          burn_stack_depth = burn;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      /* INBUF is needed later and it may be identical to OUTBUF, so store
+         the intermediate result to SAVEBUF.  */
+      burn = twofish_decrypt (ctx, savebuf, inbuf);
+      if (burn > burn_stack_depth)
+        burn_stack_depth = burn;
+
+      cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+      inbuf += TWOFISH_BLOCKSIZE;
+      outbuf += TWOFISH_BLOCKSIZE;
+    }
+
+  wipememory(savebuf, sizeof(savebuf));
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+
+/* Bulk decryption of complete blocks in CFB mode.  This function is only
+   intended for the bulk encryption feature of cipher.c. */
+static void
+_gcry_twofish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg,
+                   const void *inbuf_arg, size_t nblocks)
+{
+  TWOFISH_context *ctx = context;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int burn, burn_stack_depth = 0;
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+
+      /* Process data in 16 block chunks. */
+      while (nblocks >= 16)
+        {
+          _gcry_twofish_avx2_cfb_dec(ctx, outbuf, inbuf, iv);
+
+          nblocks -= 16;
+          outbuf += 16 * TWOFISH_BLOCKSIZE;
+          inbuf  += 16 * TWOFISH_BLOCKSIZE;
+          did_use_avx2 = 1;
+        }
+
+      if (did_use_avx2)
+        {
+          /* twofish-avx2 assembly code does not use stack */
+          if (nblocks == 0)
+            burn_stack_depth = 0;
+        }
+    }
+#endif
+
+#ifdef USE_AMD64_ASM
+  {
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+        twofish_amd64_cfb_dec(ctx, outbuf, inbuf, iv);
+
+        nblocks -= 3;
+        outbuf += 3 * TWOFISH_BLOCKSIZE;
+        inbuf += 3 * TWOFISH_BLOCKSIZE;
+
+        burn = 8 * sizeof(void*);
+        if (burn > burn_stack_depth)
+          burn_stack_depth = burn;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+#endif
+
+  for ( ;nblocks; nblocks-- )
+    {
+      burn = twofish_encrypt(ctx, iv, iv);
+      if (burn > burn_stack_depth)
+        burn_stack_depth = burn;
+
+      cipher_block_xor_n_copy(outbuf, iv, inbuf, TWOFISH_BLOCKSIZE);
+      outbuf += TWOFISH_BLOCKSIZE;
+      inbuf += TWOFISH_BLOCKSIZE;
+    }
+
+  _gcry_burn_stack(burn_stack_depth);
+}
+
+/* Bulk encryption/decryption of complete blocks in OCB mode. */
+static size_t
+_gcry_twofish_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+                       const void *inbuf_arg, size_t nblocks, int encrypt)
+{
+#ifdef USE_AMD64_ASM
+  TWOFISH_context *ctx = (void *)&c->context.c;
+  unsigned char *outbuf = outbuf_arg;
+  const unsigned char *inbuf = inbuf_arg;
+  unsigned int burn, burn_stack_depth = 0;
+  u64 blkn = c->u_mode.ocb.data_nblocks;
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             if (encrypt)
+               _gcry_twofish_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+             else
+               _gcry_twofish_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv,
+                                         c->u_ctr.ctr, Ls);
+
+             nblocks -= 16;
+             outbuf += 16 * TWOFISH_BLOCKSIZE;
+             inbuf  += 16 * TWOFISH_BLOCKSIZE;
+             did_use_avx2 = 1;
+           }
+       }
+
+      if (did_use_avx2)
+       {
+         /* twofish-avx2 assembly code does not use stack */
+         if (nblocks == 0)
+           burn_stack_depth = 0;
+       }
+    }
+#endif
+
+  {
+    /* Use u64 to store pointers for x32 support (assembly function
+      * assumes 64-bit pointers). */
+    u64 Ls[3];
+
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+       Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+       Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+       Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
+       blkn += 3;
+
+       if (encrypt)
+         twofish_amd64_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+                               Ls);
+       else
+         twofish_amd64_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, c->u_ctr.ctr,
+                               Ls);
+
+       nblocks -= 3;
+       outbuf += 3 * TWOFISH_BLOCKSIZE;
+       inbuf  += 3 * TWOFISH_BLOCKSIZE;
+
+       burn = 8 * sizeof(void*);
+       if (burn > burn_stack_depth)
+         burn_stack_depth = burn;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+
+  c->u_mode.ocb.data_nblocks = blkn;
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#else
+  (void)c;
+  (void)outbuf_arg;
+  (void)inbuf_arg;
+  (void)encrypt;
+#endif
+
+  return nblocks;
+}
+
+/* Bulk authentication of complete blocks in OCB mode. */
+static size_t
+_gcry_twofish_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+                       size_t nblocks)
+{
+#ifdef USE_AMD64_ASM
+  TWOFISH_context *ctx = (void *)&c->context.c;
+  const unsigned char *abuf = abuf_arg;
+  unsigned int burn, burn_stack_depth = 0;
+  u64 blkn = c->u_mode.ocb.aad_nblocks;
+
+#ifdef USE_AVX2
+  if (ctx->use_avx2)
+    {
+      int did_use_avx2 = 0;
+      u64 Ls[16];
+      unsigned int n = 16 - (blkn % 16);
+      u64 *l;
+      int i;
+
+      if (nblocks >= 16)
+       {
+         for (i = 0; i < 16; i += 8)
+           {
+             /* Use u64 to store pointers for x32 support (assembly function
+              * assumes 64-bit pointers). */
+             Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2];
+             Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+             Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1];
+             Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0];
+           }
+
+         Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3];
+         l = &Ls[(15 + n) % 16];
+
+         /* Process data in 16 block chunks. */
+         while (nblocks >= 16)
+           {
+             blkn += 16;
+             *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16);
+
+             _gcry_twofish_avx2_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+                                         c->u_mode.ocb.aad_sum, Ls);
+
+             nblocks -= 16;
+             abuf += 16 * TWOFISH_BLOCKSIZE;
+             did_use_avx2 = 1;
+           }
+       }
+
+      if (did_use_avx2)
+       {
+         /* twofish-avx2 assembly code does not use stack */
+         if (nblocks == 0)
+           burn_stack_depth = 0;
+       }
+
+      /* Use generic code to handle smaller chunks... */
+    }
+#endif
+
+  {
+    /* Use u64 to store pointers for x32 support (assembly function
+      * assumes 64-bit pointers). */
+    u64 Ls[3];
+
+    /* Process data in 3 block chunks. */
+    while (nblocks >= 3)
+      {
+       Ls[0] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 1);
+       Ls[1] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 2);
+       Ls[2] = (uintptr_t)(const void *)ocb_get_l(c, blkn + 3);
+       blkn += 3;
+
+       twofish_amd64_ocb_auth(ctx, abuf, c->u_mode.ocb.aad_offset,
+                             c->u_mode.ocb.aad_sum, Ls);
+
+       nblocks -= 3;
+       abuf += 3 * TWOFISH_BLOCKSIZE;
+
+       burn = 8 * sizeof(void*);
+       if (burn > burn_stack_depth)
+         burn_stack_depth = burn;
+      }
+
+    /* Use generic code to handle smaller chunks... */
+  }
+
+  c->u_mode.ocb.aad_nblocks = blkn;
+
+  if (burn_stack_depth)
+    _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *));
+#else
+  (void)c;
+  (void)abuf_arg;
+#endif
+
+  return nblocks;
+}
+
+
+
+/* Run the self-tests for TWOFISH-CTR, tests IV increment of bulk CTR
+   encryption.  Returns NULL on success. */
+static const char *
+selftest_ctr (void)
+{
+  const int nblocks = 16+1;
+  const int blocksize = TWOFISH_BLOCKSIZE;
+  const int context_size = sizeof(TWOFISH_context);
+
+  return _gcry_selftest_helper_ctr("TWOFISH", &twofish_setkey,
+           &twofish_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for TWOFISH-CBC, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cbc (void)
+{
+  const int nblocks = 16+2;
+  const int blocksize = TWOFISH_BLOCKSIZE;
+  const int context_size = sizeof(TWOFISH_context);
+
+  return _gcry_selftest_helper_cbc("TWOFISH", &twofish_setkey,
+           &twofish_encrypt, nblocks, blocksize, context_size);
+}
+
+/* Run the self-tests for TWOFISH-CFB, tests bulk CBC decryption.
+   Returns NULL on success. */
+static const char *
+selftest_cfb (void)
+{
+  const int nblocks = 16+2;
+  const int blocksize = TWOFISH_BLOCKSIZE;
+  const int context_size = sizeof(TWOFISH_context);
+
+  return _gcry_selftest_helper_cfb("TWOFISH", &twofish_setkey,
+           &twofish_encrypt, nblocks, blocksize, context_size);
 }
 
 
@@ -865,7 +1606,9 @@ static const char*
 selftest (void)
 {
   TWOFISH_context ctx; /* Expanded key. */
-  byte scratch[16];    /* Encryption/decryption result buffer. */
+  byte scratch[16];    /* Encryption/decryption result buffer. */
+  cipher_bulk_ops_t bulk_ops;
+  const char *r;
 
   /* Test vectors for single encryption/decryption.  Note that I am using
    * the vectors from the Twofish paper's "known answer test", I=3 for
@@ -899,7 +1642,7 @@ selftest (void)
     0x05, 0x93, 0x1C, 0xB6, 0xD4, 0x08, 0xE7, 0xFA
   };
 
-  twofish_setkey (&ctx, key, sizeof(key));
+  twofish_setkey (&ctx, key, sizeof(key), &bulk_ops);
   twofish_encrypt (&ctx, scratch, plaintext);
   if (memcmp (scratch, ciphertext, sizeof (ciphertext)))
     return "Twofish-128 test encryption failed.";
@@ -907,7 +1650,7 @@ selftest (void)
   if (memcmp (scratch, plaintext, sizeof (plaintext)))
     return "Twofish-128 test decryption failed.";
 
-  twofish_setkey (&ctx, key_256, sizeof(key_256));
+  twofish_setkey (&ctx, key_256, sizeof(key_256), &bulk_ops);
   twofish_encrypt (&ctx, scratch, plaintext_256);
   if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256)))
     return "Twofish-256 test encryption failed.";
@@ -915,6 +1658,13 @@ selftest (void)
   if (memcmp (scratch, plaintext_256, sizeof (plaintext_256)))
     return "Twofish-256 test decryption failed.";
 
+  if ((r = selftest_ctr()) != NULL)
+    return r;
+  if ((r = selftest_cbc()) != NULL)
+    return r;
+  if ((r = selftest_cfb()) != NULL)
+    return r;
+
   return NULL;
 }
 
@@ -935,7 +1685,8 @@ int
 main()
 {
   TWOFISH_context ctx;     /* Expanded key. */
-  int i, j;                /* Loop counters. */
+  int i, j;                /* Loop counters. */
+  cipher_bulk_ops_t bulk_ops;
 
   const char *encrypt_msg; /* Message to print regarding encryption test;
                             * the printf is done outside the loop to avoid
@@ -982,13 +1733,13 @@ main()
   /* Encryption test. */
   for (i = 0; i < 125; i++)
     {
-      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]));
+      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_encrypt (&ctx, buffer[2], buffer[2]);
-      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]));
+      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_encrypt (&ctx, buffer[3], buffer[3]);
-      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2);
+      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
       for (j = 0; j < 1000; j++) {
         twofish_encrypt (&ctx, buffer[0], buffer[0]);
         twofish_encrypt (&ctx, buffer[1], buffer[1]);
@@ -1000,15 +1751,15 @@ main()
   /* Decryption test. */
   for (i = 0; i < 125; i++)
     {
-      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2);
+      twofish_setkey (&ctx, buffer[2], sizeof (buffer[2])*2, &bulk_ops);
       for (j = 0; j < 1000; j++) {
         twofish_decrypt (&ctx, buffer[0], buffer[0]);
         twofish_decrypt (&ctx, buffer[1], buffer[1]);
       }
-      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]));
+      twofish_setkey (&ctx, buffer[1], sizeof (buffer[1]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_decrypt (&ctx, buffer[3], buffer[3]);
-      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]));
+      twofish_setkey (&ctx, buffer[0], sizeof (buffer[0]), &bulk_ops);
       for (j = 0; j < 1000; j++)
         twofish_decrypt (&ctx, buffer[2], buffer[2]);
     }
@@ -1029,12 +1780,14 @@ main()
 
 gcry_cipher_spec_t _gcry_cipher_spec_twofish =
   {
+    GCRY_CIPHER_TWOFISH, {0, 0},
     "TWOFISH", NULL, NULL, 16, 256, sizeof (TWOFISH_context),
     twofish_setkey, twofish_encrypt, twofish_decrypt
   };
 
 gcry_cipher_spec_t _gcry_cipher_spec_twofish128 =
   {
+    GCRY_CIPHER_TWOFISH128, {0, 0},
     "TWOFISH128", NULL, NULL, 16, 128, sizeof (TWOFISH_context),
     twofish_setkey, twofish_encrypt, twofish_decrypt
   };
diff --git a/grub-core/lib/libgcrypt/cipher/whirlpool-sse2-amd64.S 
b/grub-core/lib/libgcrypt/cipher/whirlpool-sse2-amd64.S
new file mode 100644
index 000000000..37648faa3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/cipher/whirlpool-sse2-amd64.S
@@ -0,0 +1,348 @@
+/* whirlpool-sse2-amd64.S  -  AMD64 assembly implementation of Whirlpool
+ *
+ * Copyright (C) 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifdef __x86_64
+#include <config.h>
+#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+     defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_WHIRLPOOL)
+
+#include "asm-common-amd64.h"
+
+.text
+
+/* look-up table offsets on RTAB */
+#define RC (0)
+#define C0 (RC + (8 * 10))
+#define C1 (C0 + (8 * 256))
+#define C2 (C1 + (8 * 256))
+#define C3 (C2 + (8 * 256))
+#define C4 (C3 + (8 * 256))
+#define C5 (C4 + (8 * 256))
+#define C6 (C5 + (8 * 256))
+#define C7 (C6 + (8 * 256))
+
+/* stack variables */
+#define STACK_DATAP  (0)
+#define STACK_STATEP (STACK_DATAP + 8)
+#define STACK_ROUNDS (STACK_STATEP + 8)
+#define STACK_NBLKS  (STACK_ROUNDS + 8)
+#define STACK_RBP    (STACK_NBLKS + 8)
+#define STACK_RBX    (STACK_RBP + 8)
+#define STACK_R12    (STACK_RBX + 8)
+#define STACK_R13    (STACK_R12 + 8)
+#define STACK_R14    (STACK_R13 + 8)
+#define STACK_R15    (STACK_R14 + 8)
+#define STACK_MAX    (STACK_R15 + 8)
+
+/* register macros */
+#define RTAB   %rbp
+
+#define RI1    %rax
+#define RI2    %rbx
+#define RI3    %rcx
+#define RI4    %rdx
+
+#define RI1d   %eax
+#define RI2d   %ebx
+#define RI3d   %ecx
+#define RI4d   %edx
+
+#define RI1bl  %al
+#define RI2bl  %bl
+#define RI3bl  %cl
+#define RI4bl  %dl
+
+#define RI1bh  %ah
+#define RI2bh  %bh
+#define RI3bh  %ch
+#define RI4bh  %dh
+
+#define RB0    %r8
+#define RB1    %r9
+#define RB2    %r10
+#define RB3    %r11
+#define RB4    %r12
+#define RB5    %r13
+#define RB6    %r14
+#define RB7    %r15
+
+#define RT0    %rsi
+#define RT1    %rdi
+
+#define RT0d   %esi
+#define RT1d   %edi
+
+#define XKEY0  %xmm0
+#define XKEY1  %xmm1
+#define XKEY2  %xmm2
+#define XKEY3  %xmm3
+#define XKEY4  %xmm4
+#define XKEY5  %xmm5
+#define XKEY6  %xmm6
+#define XKEY7  %xmm7
+
+#define XSTATE0        %xmm8
+#define XSTATE1        %xmm9
+#define XSTATE2        %xmm10
+#define XSTATE3        %xmm11
+#define XSTATE4        %xmm12
+#define XSTATE5        %xmm13
+#define XSTATE6        %xmm14
+#define XSTATE7        %xmm15
+
+/***********************************************************************
+ * AMD64 assembly implementation of Whirlpool.
+ *  - Using table-lookups
+ *  - Store state in XMM registers
+ ***********************************************************************/
+#define __do_whirl(op, ri, \
+                  b0, b1, b2, b3, b4, b5, b6, b7, \
+                  load_ri, load_arg) \
+       movzbl          ri ## bl,       RT0d; \
+       movzbl          ri ## bh,       RT1d; \
+       shrq            $16,            ri; \
+       op ## q         C7(RTAB,RT0,8), b7; \
+       op ## q         C6(RTAB,RT1,8), b6; \
+       movzbl          ri ## bl,       RT0d; \
+       movzbl          ri ## bh,       RT1d; \
+       shrq            $16,            ri; \
+       op ## q         C5(RTAB,RT0,8), b5; \
+       op ## q         C4(RTAB,RT1,8), b4; \
+       movzbl          ri ## bl,       RT0d; \
+       movzbl          ri ## bh,       RT1d; \
+       shrl            $16,            ri ## d; \
+       op ## q         C3(RTAB,RT0,8), b3; \
+       op ## q         C2(RTAB,RT1,8), b2; \
+       movzbl          ri ## bl,       RT0d; \
+       movzbl          ri ## bh,       RT1d; \
+       load_ri(        load_arg,       ri); \
+       op ## q         C1(RTAB,RT0,8), b1; \
+       op ## q         C0(RTAB,RT1,8), b0;
+
+#define do_whirl(op, ri, rb_add, load_ri, load_arg) \
+       __do_whirl(op, ##ri, rb_add, load_ri, load_arg)
+
+#define dummy(...) /*_*/
+
+#define do_movq(src, dst) movq src, dst;
+
+#define RB_ADD0 RB0, RB1, RB2, RB3, RB4, RB5, RB6, RB7
+#define RB_ADD1 RB1, RB2, RB3, RB4, RB5, RB6, RB7, RB0
+#define RB_ADD2 RB2, RB3, RB4, RB5, RB6, RB7, RB0, RB1
+#define RB_ADD3 RB3, RB4, RB5, RB6, RB7, RB0, RB1, RB2
+#define RB_ADD4 RB4, RB5, RB6, RB7, RB0, RB1, RB2, RB3
+#define RB_ADD5 RB5, RB6, RB7, RB0, RB1, RB2, RB3, RB4
+#define RB_ADD6 RB6, RB7, RB0, RB1, RB2, RB3, RB4, RB5
+#define RB_ADD7 RB7, RB0, RB1, RB2, RB3, RB4, RB5, RB6
+
+.align 8
+.globl _gcry_whirlpool_transform_amd64
+ELF(.type  _gcry_whirlpool_transform_amd64,@function;)
+
+_gcry_whirlpool_transform_amd64:
+       /* input:
+        *      %rdi: state
+        *      %rsi: inblk
+        *      %rdx: nblks
+        *      %rcx: look-up tables
+        */
+       CFI_STARTPROC();
+       cmp $0, %rdx;
+       je .Lskip;
+
+       subq $STACK_MAX, %rsp;
+       CFI_ADJUST_CFA_OFFSET(STACK_MAX);
+       movq %rbp, STACK_RBP(%rsp);
+       movq %rbx, STACK_RBX(%rsp);
+       movq %r12, STACK_R12(%rsp);
+       movq %r13, STACK_R13(%rsp);
+       movq %r14, STACK_R14(%rsp);
+       movq %r15, STACK_R15(%rsp);
+       CFI_REL_OFFSET(%rbp, STACK_RBP);
+       CFI_REL_OFFSET(%rbx, STACK_RBX);
+       CFI_REL_OFFSET(%r12, STACK_R12);
+       CFI_REL_OFFSET(%r13, STACK_R13);
+       CFI_REL_OFFSET(%r14, STACK_R14);
+       CFI_REL_OFFSET(%r15, STACK_R15);
+
+       movq %rdx, STACK_NBLKS(%rsp);
+       movq %rdi, STACK_STATEP(%rsp);
+       movq %rsi, STACK_DATAP(%rsp);
+
+       movq %rcx, RTAB;
+
+       jmp .Lfirst_block;
+
+.align 8
+.Lblock_loop:
+       movq STACK_DATAP(%rsp), %rsi;
+       movq RI1, %rdi;
+
+.Lfirst_block:
+       /* load data_block */
+       movq 0*8(%rsi), RB0;
+       movq 1*8(%rsi), RB1;
+       bswapq RB0;
+       movq 2*8(%rsi), RB2;
+       bswapq RB1;
+       movq 3*8(%rsi), RB3;
+       bswapq RB2;
+       movq 4*8(%rsi), RB4;
+       bswapq RB3;
+       movq 5*8(%rsi), RB5;
+       bswapq RB4;
+       movq RB0, XSTATE0;
+       movq 6*8(%rsi), RB6;
+       bswapq RB5;
+       movq RB1, XSTATE1;
+       movq 7*8(%rsi), RB7;
+       bswapq RB6;
+       movq RB2, XSTATE2;
+       bswapq RB7;
+       movq RB3, XSTATE3;
+       movq RB4, XSTATE4;
+       movq RB5, XSTATE5;
+       movq RB6, XSTATE6;
+       movq RB7, XSTATE7;
+
+       /* load key */
+       movq 0*8(%rdi), XKEY0;
+       movq 1*8(%rdi), XKEY1;
+       movq 2*8(%rdi), XKEY2;
+       movq 3*8(%rdi), XKEY3;
+       movq 4*8(%rdi), XKEY4;
+       movq 5*8(%rdi), XKEY5;
+       movq 6*8(%rdi), XKEY6;
+       movq 7*8(%rdi), XKEY7;
+
+       movq XKEY0, RI1;
+       movq XKEY1, RI2;
+       movq XKEY2, RI3;
+       movq XKEY3, RI4;
+
+       /* prepare and store state */
+       pxor XKEY0, XSTATE0;
+       pxor XKEY1, XSTATE1;
+       pxor XKEY2, XSTATE2;
+       pxor XKEY3, XSTATE3;
+       pxor XKEY4, XSTATE4;
+       pxor XKEY5, XSTATE5;
+       pxor XKEY6, XSTATE6;
+       pxor XKEY7, XSTATE7;
+
+       movq XSTATE0, 0*8(%rdi);
+       movq XSTATE1, 1*8(%rdi);
+       movq XSTATE2, 2*8(%rdi);
+       movq XSTATE3, 3*8(%rdi);
+       movq XSTATE4, 4*8(%rdi);
+       movq XSTATE5, 5*8(%rdi);
+       movq XSTATE6, 6*8(%rdi);
+       movq XSTATE7, 7*8(%rdi);
+
+       addq $64, STACK_DATAP(%rsp);
+       movl $(0), STACK_ROUNDS(%rsp);
+.align 8
+.Lround_loop:
+       do_whirl(mov, RI1 /*XKEY0*/, RB_ADD0, do_movq, XKEY4);
+       do_whirl(xor, RI2 /*XKEY1*/, RB_ADD1, do_movq, XKEY5);
+       do_whirl(xor, RI3 /*XKEY2*/, RB_ADD2, do_movq, XKEY6);
+       do_whirl(xor, RI4 /*XKEY3*/, RB_ADD3, do_movq, XKEY7);
+       do_whirl(xor, RI1 /*XKEY0*/, RB_ADD4, do_movq, XSTATE0);
+       do_whirl(xor, RI2 /*XKEY1*/, RB_ADD5, do_movq, XSTATE1);
+       do_whirl(xor, RI3 /*XKEY2*/, RB_ADD6, do_movq, XSTATE2);
+       do_whirl(xor, RI4 /*XKEY3*/, RB_ADD7, do_movq, XSTATE3);
+
+       movl STACK_ROUNDS(%rsp), RT0d;
+       movq RB1, XKEY1;
+       addl $1, STACK_ROUNDS(%rsp);
+       movq RB2, XKEY2;
+       movq RB3, XKEY3;
+       xorq RC(RTAB,RT0,8), RB0; /* Add round constant */
+       movq RB4, XKEY4;
+       movq RB5, XKEY5;
+       movq RB0, XKEY0;
+       movq RB6, XKEY6;
+       movq RB7, XKEY7;
+
+       do_whirl(xor, RI1 /*XSTATE0*/, RB_ADD0, do_movq, XSTATE4);
+       do_whirl(xor, RI2 /*XSTATE1*/, RB_ADD1, do_movq, XSTATE5);
+       do_whirl(xor, RI3 /*XSTATE2*/, RB_ADD2, do_movq, XSTATE6);
+       do_whirl(xor, RI4 /*XSTATE3*/, RB_ADD3, do_movq, XSTATE7);
+
+       cmpl $10, STACK_ROUNDS(%rsp);
+       je .Lis_last_round;
+
+       do_whirl(xor, RI1 /*XSTATE4*/, RB_ADD4, do_movq, XKEY0);
+       do_whirl(xor, RI2 /*XSTATE5*/, RB_ADD5, do_movq, XKEY1);
+       do_whirl(xor, RI3 /*XSTATE6*/, RB_ADD6, do_movq, XKEY2);
+       do_whirl(xor, RI4 /*XSTATE7*/, RB_ADD7, do_movq, XKEY3);
+       movq RB0, XSTATE0;
+       movq RB1, XSTATE1;
+       movq RB2, XSTATE2;
+       movq RB3, XSTATE3;
+       movq RB4, XSTATE4;
+       movq RB5, XSTATE5;
+       movq RB6, XSTATE6;
+       movq RB7, XSTATE7;
+
+       jmp .Lround_loop;
+.align 8
+.Lis_last_round:
+       do_whirl(xor, RI1 /*XSTATE4*/, RB_ADD4, dummy, _);
+       movq STACK_STATEP(%rsp), RI1;
+       do_whirl(xor, RI2 /*XSTATE5*/, RB_ADD5, dummy, _);
+       do_whirl(xor, RI3 /*XSTATE6*/, RB_ADD6, dummy, _);
+       do_whirl(xor, RI4 /*XSTATE7*/, RB_ADD7, dummy, _);
+
+       /* store state */
+       xorq RB0, 0*8(RI1);
+       xorq RB1, 1*8(RI1);
+       xorq RB2, 2*8(RI1);
+       xorq RB3, 3*8(RI1);
+       xorq RB4, 4*8(RI1);
+       xorq RB5, 5*8(RI1);
+       xorq RB6, 6*8(RI1);
+       xorq RB7, 7*8(RI1);
+
+       subq $1, STACK_NBLKS(%rsp);
+       jnz .Lblock_loop;
+
+       movq STACK_RBP(%rsp), %rbp;
+       movq STACK_RBX(%rsp), %rbx;
+       movq STACK_R12(%rsp), %r12;
+       movq STACK_R13(%rsp), %r13;
+       movq STACK_R14(%rsp), %r14;
+       movq STACK_R15(%rsp), %r15;
+       CFI_RESTORE(%rbp);
+       CFI_RESTORE(%rbx);
+       CFI_RESTORE(%r12);
+       CFI_RESTORE(%r13);
+       CFI_RESTORE(%r14);
+       CFI_RESTORE(%r15);
+       addq $STACK_MAX, %rsp;
+       CFI_ADJUST_CFA_OFFSET(-STACK_MAX);
+.Lskip:
+       movl $(STACK_MAX + 8), %eax;
+       ret_spec_stop;
+       CFI_ENDPROC();
+ELF(.size _gcry_whirlpool_transform_amd64,.-_gcry_whirlpool_transform_amd64;)
+
+#endif
+#endif
diff --git a/grub-core/lib/libgcrypt/cipher/whirlpool.c 
b/grub-core/lib/libgcrypt/cipher/whirlpool.c
index c89a57292..ecb9a416f 100644
--- a/grub-core/lib/libgcrypt/cipher/whirlpool.c
+++ b/grub-core/lib/libgcrypt/cipher/whirlpool.c
@@ -14,20 +14,19 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 /* This is an implementation of the Whirlpool hashing algorithm, which
    has been developed by Vincent Rijmen and Paulo S. L. M. Barreto;
    it's homepage is located at:
-   http://planeta.terra.com.br/informatica/paulobarreto/WhirlpoolPage.html.
+   http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html
 
    The S-Boxes and the structure of the main transformation function,
    which implements an optimized version of the algorithm, is taken
    from the reference implementation available from
-   http://planeta.terra.com.br/informatica/paulobarreto/whirlpool.zip.  */
+   http://www.larc.usp.br/~pbarreto/whirlpool.zip
+ */
 
 #include <config.h>
 #include <stdio.h>
@@ -38,7 +37,17 @@
 #include "g10lib.h"
 #include "cipher.h"
 
-#include "bithelp.h"
+#include "bufhelp.h"
+#include "hash-common.h"
+
+/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */
+#undef USE_AMD64_ASM
+#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+    defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AMD64_ASM 1
+#endif
+
+
 
 /* Size of a whirlpool block (in bytes).  */
 #define BLOCK_SIZE 64
@@ -52,10 +61,13 @@
 typedef u64 whirlpool_block_t[BLOCK_SIZE / 8];
 
 typedef struct {
+  gcry_md_block_ctx_t bctx;
   whirlpool_block_t hash_state;
-  unsigned char buffer[BLOCK_SIZE];
-  size_t count;
-  unsigned char length[32];
+  int use_bugemu;
+  struct {
+    size_t count;
+    unsigned char length[32];
+  } bugemu;
 } whirlpool_context_t;
 
 
@@ -66,30 +78,13 @@ typedef struct {
    counter.  */
 #define buffer_to_block(buffer, block, i) \
   for (i = 0; i < 8; i++) \
-    (block)[i] = ((u64) (0 \
-                         | (((u64) (buffer)[i * 8 + 0]) << 56) \
-                         | (((u64) (buffer)[i * 8 + 1]) << 48) \
-                         | (((u64) (buffer)[i * 8 + 2]) << 40) \
-                         | (((u64) (buffer)[i * 8 + 3]) << 32) \
-                         | (((u64) (buffer)[i * 8 + 4]) << 24) \
-                         | (((u64) (buffer)[i * 8 + 5]) << 16) \
-                         | (((u64) (buffer)[i * 8 + 6]) <<  8) \
-                         | (((u64) (buffer)[i * 8 + 7]) <<  0)));
+    (block)[i] = buf_get_be64((buffer) + i * 8);
 
 /* Convert the block BLOCK into a buffer BUFFER, using I as
    counter.  */
 #define block_to_buffer(buffer, block, i) \
   for (i = 0; i < 8; i++) \
-    { \
-      (buffer)[i * 8 + 0] = (block[i] >> 56) & 0xFF; \
-      (buffer)[i * 8 + 1] = (block[i] >> 48) & 0xFF; \
-      (buffer)[i * 8 + 2] = (block[i] >> 40) & 0xFF; \
-      (buffer)[i * 8 + 3] = (block[i] >> 32) & 0xFF; \
-      (buffer)[i * 8 + 4] = (block[i] >> 24) & 0xFF; \
-      (buffer)[i * 8 + 5] = (block[i] >> 16) & 0xFF; \
-      (buffer)[i * 8 + 6] = (block[i] >>  8) & 0xFF; \
-      (buffer)[i * 8 + 7] = (block[i] >>  0) & 0xFF; \
-    }
+    buf_put_be64((buffer) + i * 8, (block)[i]);
 
 /* Copy the block BLOCK_SRC to BLOCK_DST, using I as counter.  */
 #define block_copy(block_dst, block_src, i) \
@@ -103,8 +98,15 @@ typedef struct {
 
 
 
+
+struct whirlpool_tables_s {
+  u64 RC[R];
+  u64 C[8][256];
+};
+
+static const struct whirlpool_tables_s tab =
+{
 /* Round constants.  */
-static const u64 rc[R] =
   {
     U64_C (0x1823c6e887b8014f),
     U64_C (0x36a6d2f5796f9152),
@@ -116,13 +118,9 @@ static const u64 rc[R] =
     U64_C (0xe427418ba77d95d8),
     U64_C (0xfbee7c66dd17479e),
     U64_C (0xca2dbf07ad5a8333),
-  };
-
-
-
+  },
 /* Main lookup boxes.  */
-static const u64 C0[256] =
-  {
+  { {
     U64_C (0x18186018c07830d8), U64_C (0x23238c2305af4626),
     U64_C (0xc6c63fc67ef991b8), U64_C (0xe8e887e8136fcdfb),
     U64_C (0x878726874ca113cb), U64_C (0xb8b8dab8a9626d11),
@@ -251,10 +249,7 @@ static const u64 C0[256] =
     U64_C (0x98985a98b4c22d2c), U64_C (0xa4a4aaa4490e55ed),
     U64_C (0x2828a0285d885075), U64_C (0x5c5c6d5cda31b886),
     U64_C (0xf8f8c7f8933fed6b), U64_C (0x8686228644a411c2),
-  };
-
-static const u64 C1[256] =
-  {
+  }, {
     U64_C (0xd818186018c07830), U64_C (0x2623238c2305af46),
     U64_C (0xb8c6c63fc67ef991), U64_C (0xfbe8e887e8136fcd),
     U64_C (0xcb878726874ca113), U64_C (0x11b8b8dab8a9626d),
@@ -383,10 +378,7 @@ static const u64 C1[256] =
     U64_C (0x2c98985a98b4c22d), U64_C (0xeda4a4aaa4490e55),
     U64_C (0x752828a0285d8850), U64_C (0x865c5c6d5cda31b8),
     U64_C (0x6bf8f8c7f8933fed), U64_C (0xc28686228644a411),
-  };
-
-static const u64 C2[256] =
-  {
+  }, {
     U64_C (0x30d818186018c078), U64_C (0x462623238c2305af),
     U64_C (0x91b8c6c63fc67ef9), U64_C (0xcdfbe8e887e8136f),
     U64_C (0x13cb878726874ca1), U64_C (0x6d11b8b8dab8a962),
@@ -515,10 +507,7 @@ static const u64 C2[256] =
     U64_C (0x2d2c98985a98b4c2), U64_C (0x55eda4a4aaa4490e),
     U64_C (0x50752828a0285d88), U64_C (0xb8865c5c6d5cda31),
     U64_C (0xed6bf8f8c7f8933f), U64_C (0x11c28686228644a4),
-  };
-
-static const u64 C3[256] =
-  {
+  }, {
     U64_C (0x7830d818186018c0), U64_C (0xaf462623238c2305),
     U64_C (0xf991b8c6c63fc67e), U64_C (0x6fcdfbe8e887e813),
     U64_C (0xa113cb878726874c), U64_C (0x626d11b8b8dab8a9),
@@ -647,10 +636,7 @@ static const u64 C3[256] =
     U64_C (0xc22d2c98985a98b4), U64_C (0x0e55eda4a4aaa449),
     U64_C (0x8850752828a0285d), U64_C (0x31b8865c5c6d5cda),
     U64_C (0x3fed6bf8f8c7f893), U64_C (0xa411c28686228644),
-  };
-
-static const u64 C4[256] =
-  {
+  }, {
     U64_C (0xc07830d818186018), U64_C (0x05af462623238c23),
     U64_C (0x7ef991b8c6c63fc6), U64_C (0x136fcdfbe8e887e8),
     U64_C (0x4ca113cb87872687), U64_C (0xa9626d11b8b8dab8),
@@ -779,10 +765,7 @@ static const u64 C4[256] =
     U64_C (0xb4c22d2c98985a98), U64_C (0x490e55eda4a4aaa4),
     U64_C (0x5d8850752828a028), U64_C (0xda31b8865c5c6d5c),
     U64_C (0x933fed6bf8f8c7f8), U64_C (0x44a411c286862286),
-  };
-
-static const u64 C5[256] =
-  {
+  }, {
     U64_C (0x18c07830d8181860), U64_C (0x2305af462623238c),
     U64_C (0xc67ef991b8c6c63f), U64_C (0xe8136fcdfbe8e887),
     U64_C (0x874ca113cb878726), U64_C (0xb8a9626d11b8b8da),
@@ -911,10 +894,7 @@ static const u64 C5[256] =
     U64_C (0x98b4c22d2c98985a), U64_C (0xa4490e55eda4a4aa),
     U64_C (0x285d8850752828a0), U64_C (0x5cda31b8865c5c6d),
     U64_C (0xf8933fed6bf8f8c7), U64_C (0x8644a411c2868622),
-  };
-
-static const u64 C6[256] =
-  {
+  }, {
     U64_C (0x6018c07830d81818), U64_C (0x8c2305af46262323),
     U64_C (0x3fc67ef991b8c6c6), U64_C (0x87e8136fcdfbe8e8),
     U64_C (0x26874ca113cb8787), U64_C (0xdab8a9626d11b8b8),
@@ -1043,10 +1023,7 @@ static const u64 C6[256] =
     U64_C (0x5a98b4c22d2c9898), U64_C (0xaaa4490e55eda4a4),
     U64_C (0xa0285d8850752828), U64_C (0x6d5cda31b8865c5c),
     U64_C (0xc7f8933fed6bf8f8), U64_C (0x228644a411c28686),
-  };
-
-static const u64 C7[256] =
-  {
+  }, {
     U64_C (0x186018c07830d818), U64_C (0x238c2305af462623),
     U64_C (0xc63fc67ef991b8c6), U64_C (0xe887e8136fcdfbe8),
     U64_C (0x8726874ca113cb87), U64_C (0xb8dab8a9626d11b8),
@@ -1175,25 +1152,77 @@ static const u64 C7[256] =
     U64_C (0x985a98b4c22d2c98), U64_C (0xa4aaa4490e55eda4),
     U64_C (0x28a0285d88507528), U64_C (0x5c6d5cda31b8865c),
     U64_C (0xf8c7f8933fed6bf8), U64_C (0x86228644a411c286),
-  };
+  } }
+};
+#define C tab.C
+#define C0 C[0]
+#define C1 C[1]
+#define C2 C[2]
+#define C3 C[3]
+#define C4 C[4]
+#define C5 C[5]
+#define C6 C[6]
+#define C7 C[7]
+#define rc tab.RC
+
+
+
+static unsigned int
+whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks);
 
 
 
 static void
-whirlpool_init (void *ctx)
+whirlpool_init (void *ctx, unsigned int flags)
 {
   whirlpool_context_t *context = ctx;
 
   memset (context, 0, sizeof (*context));
+
+  context->bctx.blocksize_shift = _gcry_ctz(BLOCK_SIZE);
+  context->bctx.bwrite = whirlpool_transform;
+  if ((flags & GCRY_MD_FLAG_BUGEMU1))
+    {
+      memset (&context->bugemu, 0, sizeof context->bugemu);
+      context->use_bugemu = 1;
+    }
+  else
+    context->use_bugemu = 0;
 }
 
 
+#ifdef USE_AMD64_ASM
+
+#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+#else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+#endif
+
+extern unsigned int
+_gcry_whirlpool_transform_amd64(u64 *state, const unsigned char *data,
+    size_t nblks, const struct whirlpool_tables_s *tables) ASM_FUNC_ABI;
+
+static unsigned int
+whirlpool_transform (void *ctx, const unsigned char *data, size_t nblks)
+{
+  whirlpool_context_t *context = ctx;
+
+  return _gcry_whirlpool_transform_amd64(
+               context->hash_state, data, nblks, &tab) + ASM_EXTRA_STACK;
+}
+
+#else /* USE_AMD64_ASM */
+
 /*
  * Transform block.
  */
-static void
-whirlpool_transform (whirlpool_context_t *context, const unsigned char *data)
+static unsigned int
+whirlpool_transform_blk (void *ctx, const unsigned char *data)
 {
+  whirlpool_context_t *context = ctx;
   whirlpool_block_t data_block;
   whirlpool_block_t key;
   whirlpool_block_t state;
@@ -1285,11 +1314,33 @@ whirlpool_transform (whirlpool_context_t *context, 
const unsigned char *data)
 
   block_xor (context->hash_state, data_block, i);
   block_xor (context->hash_state, state, i);
+
+  return /*burn_stack*/ 4 * sizeof(whirlpool_block_t) + 2 * sizeof(int) +
+                        4 * sizeof(void*);
 }
 
+static unsigned int
+whirlpool_transform ( void *c, const unsigned char *data, size_t nblks )
+{
+  unsigned int burn;
+
+  do
+    {
+      burn = whirlpool_transform_blk (c, data);
+      data += BLOCK_SIZE;
+    }
+  while (--nblks);
+
+  return burn;
+}
+
+#endif /* !USE_AMD64_ASM */
+
+
+/* Bug compatibility Whirlpool version.  */
 static void
-whirlpool_add (whirlpool_context_t *context,
-              const void *buffer_arg, size_t buffer_n)
+whirlpool_add_bugemu (whirlpool_context_t *context,
+                      const void *buffer_arg, size_t buffer_n)
 {
   const unsigned char *buffer = buffer_arg;
   u64 buffer_size;
@@ -1298,40 +1349,37 @@ whirlpool_add (whirlpool_context_t *context,
 
   buffer_size = buffer_n;
 
-  if (context->count == BLOCK_SIZE)
+  if (context->bugemu.count == BLOCK_SIZE)
     {
       /* Flush the buffer.  */
-      whirlpool_transform (context, context->buffer);
-      /*_gcry_burn_stack (80+6*sizeof(void*));*/ /* FIXME */
-      context->count = 0;
+      whirlpool_transform (context, context->bctx.buf, 1);
+      context->bugemu.count = 0;
     }
   if (! buffer)
     return; /* Nothing to add.  */
 
-  if (context->count)
+  if (context->bugemu.count)
     {
-      while (buffer_n && (context->count < BLOCK_SIZE))
+      while (buffer_n && (context->bugemu.count < BLOCK_SIZE))
        {
-         context->buffer[context->count++] = *buffer++;
+         context->bctx.buf[context->bugemu.count++] = *buffer++;
          buffer_n--;
        }
-      whirlpool_add (context, NULL, 0);
+      whirlpool_add_bugemu (context, NULL, 0);
       if (!buffer_n)
-       /* Done.  */
-        return;
+        return; /* Done.  This is the bug we emulate.  */
     }
-  /*_gcry_burn_stack (80+6*sizeof(void*));*/ /* FIXME */
 
   while (buffer_n >= BLOCK_SIZE)
     {
-      whirlpool_transform (context, buffer);
-      context->count = 0;
+      whirlpool_transform (context, buffer, 1);
+      context->bugemu.count = 0;
       buffer_n -= BLOCK_SIZE;
       buffer += BLOCK_SIZE;
     }
-  while (buffer_n && (context->count < BLOCK_SIZE))
+  while (buffer_n && (context->bugemu.count < BLOCK_SIZE))
     {
-      context->buffer[context->count++] = *buffer++;
+      context->bctx.buf[context->bugemu.count++] = *buffer++;
       buffer_n--;
     }
 
@@ -1343,20 +1391,65 @@ whirlpool_add (whirlpool_context_t *context,
       if (! (buffer_size || carry))
        break;
 
-      carry += context->length[32 - i] + (buffer_size & 0xFF);
-      context->length[32 - i] = carry;
+      carry += context->bugemu.length[32 - i] + (buffer_size & 0xFF);
+      context->bugemu.length[32 - i] = carry;
       buffer_size >>= 8;
       carry >>= 8;
     }
   gcry_assert (! (buffer_size || carry));
 }
 
+
+/* Bug compatibility Whirlpool version.  */
+static void
+whirlpool_final_bugemu (void *ctx)
+{
+  whirlpool_context_t *context = ctx;
+  unsigned int i;
+
+  /* Flush.  */
+  whirlpool_add_bugemu (context, NULL, 0);
+
+  /* Pad.  */
+  context->bctx.buf[context->bugemu.count++] = 0x80;
+
+  if (context->bugemu.count > 32)
+    {
+      /* An extra block is necessary.  */
+      while (context->bugemu.count < 64)
+       context->bctx.buf[context->bugemu.count++] = 0;
+      whirlpool_add_bugemu (context, NULL, 0);
+    }
+  while (context->bugemu.count < 32)
+    context->bctx.buf[context->bugemu.count++] = 0;
+
+  /* Add length of message.  */
+  memcpy (context->bctx.buf + context->bugemu.count,
+          context->bugemu.length, 32);
+  context->bugemu.count += 32;
+  whirlpool_add_bugemu (context, NULL, 0);
+
+  block_to_buffer (context->bctx.buf, context->hash_state, i);
+}
+
+
 static void
 whirlpool_write (void *ctx, const void *buffer, size_t buffer_n)
 {
   whirlpool_context_t *context = ctx;
 
-  whirlpool_add (context, buffer, buffer_n);
+  if (context->use_bugemu)
+    {
+      whirlpool_add_bugemu (context, buffer, buffer_n);
+    }
+  else
+    {
+      u64 old_nblocks = context->bctx.nblocks;
+
+      _gcry_md_block_write (context, buffer, buffer_n);
+
+      gcry_assert (old_nblocks <= context->bctx.nblocks);
+    }
 }
 
 static void
@@ -1364,29 +1457,64 @@ whirlpool_final (void *ctx)
 {
   whirlpool_context_t *context = ctx;
   unsigned int i;
+  u64 t, th, lsb, msb;
+  unsigned char *length;
+
+  if (context->use_bugemu)
+    {
+      whirlpool_final_bugemu (ctx);
+      return;
+    }
+
+  t = context->bctx.nblocks;
+  /* if (sizeof t == sizeof context->bctx.nblocks) */
+  th = context->bctx.nblocks_high;
+  /* else */
+  /*   th = context->bctx.nblocks >> 64; In case we ever use u128 */
+
+  /* multiply by 64 to make a byte count */
+  lsb = t << 6;
+  msb = (th << 6) | (t >> 58);
+  /* add the count */
+  t = lsb;
+  if ((lsb += context->bctx.count) < t)
+    msb++;
+  /* multiply by 8 to make a bit count */
+  t = lsb;
+  lsb <<= 3;
+  msb <<= 3;
+  msb |= t >> 61;
 
   /* Flush.  */
-  whirlpool_add (context, NULL, 0);
+  whirlpool_write (context, NULL, 0);
 
   /* Pad.  */
-  context->buffer[context->count++] = 0x80;
+  context->bctx.buf[context->bctx.count++] = 0x80;
 
-  if (context->count > 32)
+  if (context->bctx.count > 32)
     {
       /* An extra block is necessary.  */
-      while (context->count < 64)
-       context->buffer[context->count++] = 0;
-      whirlpool_add (context, NULL, 0);
+      if (context->bctx.count < 64)
+       memset (&context->bctx.buf[context->bctx.count], 0,
+               64 - context->bctx.count);
+      context->bctx.count = 64;
+      whirlpool_write (context, NULL, 0);
     }
-  while (context->count < 32)
-    context->buffer[context->count++] = 0;
+  if (context->bctx.count < 32)
+    memset (&context->bctx.buf[context->bctx.count], 0,
+           32 - context->bctx.count);
+  context->bctx.count = 32;
 
   /* Add length of message.  */
-  memcpy (context->buffer + context->count, context->length, 32);
-  context->count += 32;
-  whirlpool_add (context, NULL, 0);
+  length = context->bctx.buf + context->bctx.count;
+  buf_put_be64(&length[0 * 8], 0);
+  buf_put_be64(&length[1 * 8], 0);
+  buf_put_be64(&length[2 * 8], msb);
+  buf_put_be64(&length[3 * 8], lsb);
+  context->bctx.count += 32;
+  whirlpool_write (context, NULL, 0);
 
-  block_to_buffer (context->buffer, context->hash_state, i);
+  block_to_buffer (context->bctx.buf, context->hash_state, i);
 }
 
 static byte *
@@ -1394,12 +1522,14 @@ whirlpool_read (void *ctx)
 {
   whirlpool_context_t *context = ctx;
 
-  return context->buffer;
+  return context->bctx.buf;
 }
 
-gcry_md_spec_t _gcry_digest_spec_whirlpool =
+const gcry_md_spec_t _gcry_digest_spec_whirlpool =
   {
+    GCRY_MD_WHIRLPOOL, {0, 0},
     "WHIRLPOOL", NULL, 0, NULL, 64,
-    whirlpool_init, whirlpool_write, whirlpool_final, whirlpool_read,
+    whirlpool_init, whirlpool_write, whirlpool_final, whirlpool_read, NULL,
+    NULL,
     sizeof (whirlpool_context_t)
   };
diff --git a/grub-core/lib/libgcrypt/compat/Makefile.am 
b/grub-core/lib/libgcrypt/compat/Makefile.am
new file mode 100644
index 000000000..f0ddf34df
--- /dev/null
+++ b/grub-core/lib/libgcrypt/compat/Makefile.am
@@ -0,0 +1,48 @@
+# Makefile for compat directory
+# Copyright (C) 2010 Free Software Foundation, Inc.
+#
+# This file is part of Libgcrypt.
+#
+# Libgcrypt is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# Libgcrypt is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+# Process this file with automake to produce Makefile.in
+
+# We use this libcompat to work around problems with LIBOBJ stuff.
+# For example, we need some of the compat files also in tests/ but the
+# suggested way to do this (using the automake option subdir-objects)
+# leads to problems with "make distclean": The distclean target in
+# tests is run before the one src and it removes the deps files of the
+# libobj files which are in src.  Now when it comes to run make in src
+# the icnluded files are gone - bummer.  Instead of try to fix this
+# issue it seems better not to use subdir-objects but build them all
+# into a compat library and always link against that library.  This
+# also avoids the problem that a dependency on LTLIBOBJ is not setup
+# if -- disable-static was used.
+
+# Need to include ../src in addition to top_srcdir because gcrypt.h is
+# a built header.
+AM_CPPFLAGS = -I../src -I$(top_srcdir)/src $(GPG_ERROR_CFLAGS)
+
+noinst_LTLIBRARIES = libcompat.la
+
+# We only need one file so that the library is guaranteed to have at
+# least one member.
+libcompat_la_SOURCES = compat.c libcompat.h
+libcompat_la_DEPENDENCIES = @LTLIBOBJS@
+libcompat_la_LIBADD =       @LTLIBOBJS@
+
+# AC_LIBOBJ files are:
+#    getpid.c
+#    clock.c
+#
diff --git a/grub-core/lib/libgcrypt/compat/clock.c 
b/grub-core/lib/libgcrypt/compat/clock.c
new file mode 100644
index 000000000..2a2c205f3
--- /dev/null
+++ b/grub-core/lib/libgcrypt/compat/clock.c
@@ -0,0 +1,36 @@
+/* clock.c - Replacement for WindowsCE
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This file is free software; as a special exception the author gives
+   unlimited permission to copy and/or distribute it, with or without
+   modifications, as long as this notice is preserved.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY, to the extent permitted by law; without even
+   the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+   PURPOSE.  */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef HAVE_W32CE_SYSTEM
+#include <windows.h>
+#include <time.h>
+#include <assert.h>
+
+clock_t
+_gcry_clock (void)
+{
+  assert (CLOCKS_PER_SEC == 1000);
+#warning Replace by a correct implementation.
+  /* It seems that GetProcessTimes is available in the kernel but
+     without a declaration.  If that fails we would need to walk over
+     all threads and tally up the GetThreadTimes.  */
+
+  return GetTickCount ();
+}
+
+#else
+# error No replacement function for clock known
+#endif
diff --git a/grub-core/lib/libgcrypt/compat/compat.c 
b/grub-core/lib/libgcrypt/compat/compat.c
new file mode 100644
index 000000000..d7ca1b5d4
--- /dev/null
+++ b/grub-core/lib/libgcrypt/compat/compat.c
@@ -0,0 +1,40 @@
+/* compat.c - Dummy file to avoid an empty library.
+ * Copyright (C) 2010  Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "../src/g10lib.h"
+
+
+const char *
+_gcry_compat_identification (void)
+{
+  /* For complete list of copyright holders see the file AUTHORS in
+     the source distribution.  */
+  static const char blurb[] =
+    "\n\n"
+    "This is Libgcrypt " PACKAGE_VERSION " - The GNU Crypto Library\n"
+    "Copyright (C) 2012-2022 g10 Code GmbH\n"
+    "Copyright (C) 2013-2022 Jussi Kivilinna\n"
+    "Copyright (C) 2000-2018 Free Software Foundation, Inc.\n"
+    "\n"
+    "(" BUILD_REVISION " " BUILD_TIMESTAMP ")\n"
+    "\n\n";
+  return blurb;
+}
diff --git a/grub-core/lib/libgcrypt/compat/getpid.c 
b/grub-core/lib/libgcrypt/compat/getpid.c
new file mode 100644
index 000000000..032387c36
--- /dev/null
+++ b/grub-core/lib/libgcrypt/compat/getpid.c
@@ -0,0 +1,29 @@
+/* getpid.c - Replacement for WindowsCE
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+   This file is free software; as a special exception the author gives
+   unlimited permission to copy and/or distribute it, with or without
+   modifications, as long as this notice is preserved.
+
+   This file is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY, to the extent permitted by law; without even
+   the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+   PURPOSE.  */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifdef HAVE_W32CE_SYSTEM
+#include <windows.h>
+#include <sys/types.h>
+
+pid_t
+_gcry_getpid (void)
+{
+  return GetCurrentProcessId ();
+}
+
+#else
+# error No replacement function for getpid known
+#endif
diff --git a/grub-core/lib/libgcrypt/compat/libcompat.h 
b/grub-core/lib/libgcrypt/compat/libcompat.h
new file mode 100644
index 000000000..b5a764912
--- /dev/null
+++ b/grub-core/lib/libgcrypt/compat/libcompat.h
@@ -0,0 +1,37 @@
+/* libcomapt.h - Prototypes for AC_REPLACE_FUNCtions.
+ * Copyright (C) 2010 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_LIBCOMPAT_H
+#define GCRY_LIBCOMPAT_H
+
+const char *_gcry_compat_identification (void);
+
+
+#ifndef HAVE_GETPID
+pid_t _gcry_getpid (void);
+#define getpid() _gcry_getpid ()
+#endif
+
+#ifndef HAVE_CLOCK
+clock_t _gcry_clock (void);
+#define clock() _gcry_clock ()
+#endif
+
+
+#endif /*GCRY_LIBCOMPAT_H*/
diff --git a/grub-core/lib/libgcrypt/config.h.in 
b/grub-core/lib/libgcrypt/config.h.in
new file mode 100644
index 000000000..883969db2
--- /dev/null
+++ b/grub-core/lib/libgcrypt/config.h.in
@@ -0,0 +1,823 @@
+/* config.h.in.  Generated from configure.ac by autoheader.  */
+
+
+#ifndef _GCRYPT_CONFIG_H_INCLUDED
+#define _GCRYPT_CONFIG_H_INCLUDED
+
+/* Enable gpg-error's strerror macro for W32CE.  */
+#define GPG_ERR_ENABLE_ERRNO_MACROS 1
+
+
+/* Define if building universal (internal helper macro) */
+#undef AC_APPLE_UNIVERSAL_BUILD
+
+/* Defined if --disable-asm was used to configure */
+#undef ASM_DISABLED
+
+/* GIT commit id revision used to build this package */
+#undef BUILD_REVISION
+
+/* The time this package was configured for a build */
+#undef BUILD_TIMESTAMP
+
+/* configure did not test for endianness */
+#undef DISABLED_ENDIAN_CHECK
+
+/* Define if you don't want the default EGD socket name. For details see
+   cipher/rndegd.c */
+#undef EGD_SOCKET_NAME
+
+/* Enable support for Intel AES-NI instructions. */
+#undef ENABLE_AESNI_SUPPORT
+
+/* Enable support for ARMv8 Crypto Extension instructions. */
+#undef ENABLE_ARM_CRYPTO_SUPPORT
+
+/* Enable support for Intel AVX2 instructions. */
+#undef ENABLE_AVX2_SUPPORT
+
+/* Enable support for Intel AVX instructions. */
+#undef ENABLE_AVX_SUPPORT
+
+/* Enable support for Intel DRNG (RDRAND instruction). */
+#undef ENABLE_DRNG_SUPPORT
+
+/* Enable forcing 'soft' HW feature bits on (for testing). */
+#undef ENABLE_FORCE_SOFT_HWFEATURES
+
+/* Define to support an HMAC based integrity check */
+#undef ENABLE_HMAC_BINARY_CHECK
+
+/* Enable support for the jitter entropy collector. */
+#undef ENABLE_JENT_SUPPORT
+
+/* Enable support for ARM NEON instructions. */
+#undef ENABLE_NEON_SUPPORT
+
+/* Enable support for the PadLock engine. */
+#undef ENABLE_PADLOCK_SUPPORT
+
+/* Enable support for Intel PCLMUL instructions. */
+#undef ENABLE_PCLMUL_SUPPORT
+
+/* Enable support for POWER 8 (PowerISA 2.07) crypto extension. */
+#undef ENABLE_PPC_CRYPTO_SUPPORT
+
+/* Enable support for Intel SHAEXT instructions. */
+#undef ENABLE_SHAEXT_SUPPORT
+
+/* Enable support for Intel SSE4.1 instructions. */
+#undef ENABLE_SSE41_SUPPORT
+
+/* Define FIPS module version for certification */
+#undef FIPS_MODULE_VERSION
+
+/* Define to use the GNU C visibility attribute. */
+#undef GCRY_USE_VISIBILITY
+
+/* The default error source for libgcrypt. */
+#undef GPG_ERR_SOURCE_DEFAULT
+
+/* Defined if ARM architecture is v6 or newer */
+#undef HAVE_ARM_ARCH_V6
+
+/* Define to 1 if you have the `atexit' function. */
+#undef HAVE_ATEXIT
+
+/* Defined if the mlock() call does not work */
+#undef HAVE_BROKEN_MLOCK
+
+/* Defined if compiler has '__builtin_bswap32' intrinsic */
+#undef HAVE_BUILTIN_BSWAP32
+
+/* Defined if compiler has '__builtin_bswap64' intrinsic */
+#undef HAVE_BUILTIN_BSWAP64
+
+/* Defined if compiler has '__builtin_clz' intrinsic */
+#undef HAVE_BUILTIN_CLZ
+
+/* Defined if compiler has '__builtin_clzl' intrinsic */
+#undef HAVE_BUILTIN_CLZL
+
+/* Defined if compiler has '__builtin_ctz' intrinsic */
+#undef HAVE_BUILTIN_CTZ
+
+/* Defined if compiler has '__builtin_ctzl' intrinsic */
+#undef HAVE_BUILTIN_CTZL
+
+/* Define to 1 if the system has the type `byte'. */
+#undef HAVE_BYTE
+
+/* Define to 1 if you have the `clock' function. */
+#undef HAVE_CLOCK
+
+/* Define to 1 if you have the `clock_gettime' function. */
+#undef HAVE_CLOCK_GETTIME
+
+/* Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto
+   intrinsics */
+#undef HAVE_COMPATIBLE_CC_PPC_ALTIVEC
+
+/* Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto
+   intrinsics with extra GCC flags */
+#undef HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS
+
+/* Defined if underlying assembler is compatible with ARMv8/Aarch64 assembly
+   implementations */
+#undef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS
+
+/* Defined if underlying assembler is compatible with amd64 assembly
+   implementations */
+#undef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS
+
+/* Defined if underlying assembler is compatible with ARM assembly
+   implementations */
+#undef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+/* Defined if underlying assembler is compatible with WIN64 assembly
+   implementations */
+#undef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+
+/* Defined for Alpha platforms */
+#undef HAVE_CPU_ARCH_ALPHA
+
+/* Defined for ARM AArch64 platforms */
+#undef HAVE_CPU_ARCH_ARM
+
+/* Defined for M68k platforms */
+#undef HAVE_CPU_ARCH_M68K
+
+/* Defined for MIPS platforms */
+#undef HAVE_CPU_ARCH_MIPS
+
+/* Defined for PPC platforms */
+#undef HAVE_CPU_ARCH_PPC
+
+/* Defined for s390x/zSeries platforms */
+#undef HAVE_CPU_ARCH_S390X
+
+/* Defined for SPARC platforms */
+#undef HAVE_CPU_ARCH_SPARC
+
+/* Defined for the x86 platforms */
+#undef HAVE_CPU_ARCH_X86
+
+/* defined if the system supports a random device */
+#undef HAVE_DEV_RANDOM
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you don't have `vprintf' but do have `_doprnt.' */
+#undef HAVE_DOPRNT
+
+/* defined if we run on some of the PCDOS like systems (DOS, Windoze. OS/2)
+   with special properties like no file modes */
+#undef HAVE_DOSISH_SYSTEM
+
+/* defined if we must run on a stupid file system */
+#undef HAVE_DRIVE_LETTERS
+
+/* Define to 1 if you have the `elf_aux_info' function. */
+#undef HAVE_ELF_AUX_INFO
+
+/* Define to 1 if you have the `explicit_bzero' function. */
+#undef HAVE_EXPLICIT_BZERO
+
+/* Define to 1 if you have the `explicit_memset' function. */
+#undef HAVE_EXPLICIT_MEMSET
+
+/* Define to 1 if you have the `fcntl' function. */
+#undef HAVE_FCNTL
+
+/* Define to 1 if you have the `flockfile' function. */
+#undef HAVE_FLOCKFILE
+
+/* Define to 1 if you have the `ftruncate' function. */
+#undef HAVE_FTRUNCATE
+
+/* Defined if underlying assembler supports for CFI directives */
+#undef HAVE_GCC_ASM_CFI_DIRECTIVES
+
+/* Defined if underlying assembler supports for ELF directives */
+#undef HAVE_GCC_ASM_ELF_DIRECTIVES
+
+/* Define if inline asm memory barrier is supported */
+#undef HAVE_GCC_ASM_VOLATILE_MEMORY
+
+/* Defined if a GCC style "__attribute__ ((aligned (n))" is supported */
+#undef HAVE_GCC_ATTRIBUTE_ALIGNED
+
+/* Defined if a GCC style "__attribute__ ((may_alias))" is supported */
+#undef HAVE_GCC_ATTRIBUTE_MAY_ALIAS
+
+/* Defined if compiler supports "__attribute__ ((ms_abi))" function attribute
+   */
+#undef HAVE_GCC_ATTRIBUTE_MS_ABI
+
+/* Defined if a GCC style "__attribute__ ((packed))" is supported */
+#undef HAVE_GCC_ATTRIBUTE_PACKED
+
+/* Defined if compiler supports "__attribute__ ((sysv_abi))" function
+   attribute */
+#undef HAVE_GCC_ATTRIBUTE_SYSV_ABI
+
+/* Defined if default calling convention is 'ms_abi' */
+#undef HAVE_GCC_DEFAULT_ABI_IS_MS_ABI
+
+/* Defined if default calling convention is 'sysv_abi' */
+#undef HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI
+
+/* Defined if inline assembler supports AArch32 Crypto Extension instructions
+   */
+#undef HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO
+
+/* Defined if inline assembler supports AArch64 Crypto Extension instructions
+   */
+#undef HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO
+
+/* Defined if inline assembler supports AArch64 NEON instructions */
+#undef HAVE_GCC_INLINE_ASM_AARCH64_NEON
+
+/* Defined if inline assembler supports AVX instructions */
+#undef HAVE_GCC_INLINE_ASM_AVX
+
+/* Defined if inline assembler supports AVX2 instructions */
+#undef HAVE_GCC_INLINE_ASM_AVX2
+
+/* Defined if inline assembler supports BMI2 instructions */
+#undef HAVE_GCC_INLINE_ASM_BMI2
+
+/* Defined if inline assembler supports NEON instructions */
+#undef HAVE_GCC_INLINE_ASM_NEON
+
+/* Defined if inline assembler supports PCLMUL instructions */
+#undef HAVE_GCC_INLINE_ASM_PCLMUL
+
+/* Defined if inline assembler supports PowerPC AltiVec/VSX/crypto
+   instructions */
+#undef HAVE_GCC_INLINE_ASM_PPC_ALTIVEC
+
+/* Defined if inline assembler supports PowerISA 3.00 instructions */
+#undef HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00
+
+/* Defined if inline assembler supports zSeries instructions */
+#undef HAVE_GCC_INLINE_ASM_S390X
+
+/* Defined if inline assembler supports zSeries vector instructions */
+#undef HAVE_GCC_INLINE_ASM_S390X_VX
+
+/* Defined if inline assembler supports SHA Extensions instructions */
+#undef HAVE_GCC_INLINE_ASM_SHAEXT
+
+/* Defined if inline assembler supports SSE4.1 instructions */
+#undef HAVE_GCC_INLINE_ASM_SSE41
+
+/* Defined if inline assembler supports SSSE3 instructions */
+#undef HAVE_GCC_INLINE_ASM_SSSE3
+
+/* Defined if inline assembler supports VAES and VPCLMUL instructions */
+#undef HAVE_GCC_INLINE_ASM_VAES_VPCLMUL
+
+/* Define to 1 if you have the `getauxval' function. */
+#undef HAVE_GETAUXVAL
+
+/* Define to 1 if you have the `getentropy' function. */
+#undef HAVE_GETENTROPY
+
+/* Define to 1 if you have the `gethrtime' function. */
+#undef HAVE_GETHRTIME
+
+/* Define to 1 if you have the `getpagesize' function. */
+#undef HAVE_GETPAGESIZE
+
+/* Define to 1 if you have the `getpid' function. */
+#undef HAVE_GETPID
+
+/* Define to 1 if you have the `getrusage' function. */
+#undef HAVE_GETRUSAGE
+
+/* Define to 1 if you have the `gettimeofday' function. */
+#undef HAVE_GETTIMEOFDAY
+
+/* Defined if underlying assembler is compatible with Intel syntax assembly
+   implementations */
+#undef HAVE_INTEL_SYNTAX_PLATFORM_AS
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the `rt' library (-lrt). */
+#undef HAVE_LIBRT
+
+/* Define to 1 if you have the `memmove' function. */
+#undef HAVE_MEMMOVE
+
+/* Define to 1 if you have the <minix/config.h> header file. */
+#undef HAVE_MINIX_CONFIG_H
+
+/* Defined if the system supports an mlock() call */
+#undef HAVE_MLOCK
+
+/* Define to 1 if you have the `mmap' function. */
+#undef HAVE_MMAP
+
+/* Defined if the GNU Pth is available */
+#undef HAVE_PTH
+
+/* Define if we have pthread. */
+#undef HAVE_PTHREAD
+
+/* Define to 1 if you have the `raise' function. */
+#undef HAVE_RAISE
+
+/* Define to 1 if you have the `rand' function. */
+#undef HAVE_RAND
+
+/* Define to 1 if you have the <spawn.h> header file. */
+#undef HAVE_SPAWN_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdio.h> header file. */
+#undef HAVE_STDIO_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the `stpcpy' function. */
+#undef HAVE_STPCPY
+
+/* Define to 1 if you have the `strcasecmp' function. */
+#undef HAVE_STRCASECMP
+
+/* Define to 1 if you have the `strerror' function. */
+#undef HAVE_STRERROR
+
+/* Define to 1 if you have the `stricmp' function. */
+#undef HAVE_STRICMP
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the `strtoul' function. */
+#undef HAVE_STRTOUL
+
+/* Defined if compiler has '__sync_synchronize' intrinsic */
+#undef HAVE_SYNC_SYNCHRONIZE
+
+/* Define to 1 if you have the `syscall' function. */
+#undef HAVE_SYSCALL
+
+/* Define to 1 if you have the `sysconf' function. */
+#undef HAVE_SYSCONF
+
+/* Define to 1 if you have the `syslog' function. */
+#undef HAVE_SYSLOG
+
+/* Define to 1 if you have the <sys/auxv.h> header file. */
+#undef HAVE_SYS_AUXV_H
+
+/* Define to 1 if you have the <sys/capability.h> header file. */
+#undef HAVE_SYS_CAPABILITY_H
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#undef HAVE_SYS_MMAN_H
+
+/* Define to 1 if you have the <sys/random.h> header file. */
+#undef HAVE_SYS_RANDOM_H
+
+/* Define to 1 if you have the <sys/socket.h> header file. */
+#undef HAVE_SYS_SOCKET_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if the system has the type `u16'. */
+#undef HAVE_U16
+
+/* Define to 1 if the system has the type `u32'. */
+#undef HAVE_U32
+
+/* Define to 1 if the system has the type `u64'. */
+#undef HAVE_U64
+
+/* Define to 1 if the system has the type `uintptr_t'. */
+#undef HAVE_UINTPTR_T
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if the system has the type `ushort'. */
+#undef HAVE_USHORT
+
+/* Defined if variable length arrays are supported */
+#undef HAVE_VLA
+
+/* Define to 1 if you have the `vprintf' function. */
+#undef HAVE_VPRINTF
+
+/* Defined if we run on WindowsCE */
+#undef HAVE_W32CE_SYSTEM
+
+/* Defined if we run on a W32 API based system */
+#undef HAVE_W32_SYSTEM
+
+/* Define to 1 if you have the `wait4' function. */
+#undef HAVE_WAIT4
+
+/* Define to 1 if you have the `waitpid' function. */
+#undef HAVE_WAITPID
+
+/* Define to 1 if you have the <wchar.h> header file. */
+#undef HAVE_WCHAR_H
+
+/* Define to 1 if you have the <ws2tcpip.h> header file. */
+#undef HAVE_WS2TCPIP_H
+
+/* Defined if this is not a regular release */
+#undef IS_DEVELOPMENT_VERSION
+
+/* List of available cipher algorithms */
+#undef LIBGCRYPT_CIPHERS
+
+/* List of available digest algorithms */
+#undef LIBGCRYPT_DIGESTS
+
+/* List of available KDF algorithms */
+#undef LIBGCRYPT_KDFS
+
+/* List of available public key cipher algorithms */
+#undef LIBGCRYPT_PUBKEY_CIPHERS
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+   */
+#undef LT_OBJDIR
+
+/* Define to use the (obsolete) malloc guarding feature */
+#undef M_GUARD
+
+/* defined to the name of the strong random device */
+#undef NAME_OF_DEV_RANDOM
+
+/* defined to the name of the weaker random device */
+#undef NAME_OF_DEV_URANDOM
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the home page for this package. */
+#undef PACKAGE_URL
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* A human readable text with the name of the OS */
+#undef PRINTABLE_OS_NAME
+
+/* The size of `uint64_t', as computed by sizeof. */
+#undef SIZEOF_UINT64_T
+
+/* The size of `unsigned int', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_INT
+
+/* The size of `unsigned long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG
+
+/* The size of `unsigned long long', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_LONG_LONG
+
+/* The size of `unsigned short', as computed by sizeof. */
+#undef SIZEOF_UNSIGNED_SHORT
+
+/* The size of `void *', as computed by sizeof. */
+#undef SIZEOF_VOID_P
+
+/* Define to 1 if all of the C90 standard headers exist (not just the ones
+   required in a freestanding environment). This macro is provided for
+   backward compatibility; new code need not use it. */
+#undef STDC_HEADERS
+
+/* Defined if this module should be included */
+#undef USE_AES
+
+/* Defined if this module should be included */
+#undef USE_ARCFOUR
+
+/* Defined if this module should be included */
+#undef USE_BLAKE2
+
+/* Defined if this module should be included */
+#undef USE_BLOWFISH
+
+/* Defined if this module should be included */
+#undef USE_CAMELLIA
+
+/* define if capabilities should be used */
+#undef USE_CAPABILITIES
+
+/* Defined if this module should be included */
+#undef USE_CAST5
+
+/* Defined if this module should be included */
+#undef USE_CHACHA20
+
+/* Defined if this module should be included */
+#undef USE_CRC
+
+/* Defined if this module should be included */
+#undef USE_DES
+
+/* Defined if this module should be included */
+#undef USE_DSA
+
+/* Defined if this module should be included */
+#undef USE_ECC
+
+/* Defined if this module should be included */
+#undef USE_ELGAMAL
+
+/* Defined if the GNU Portable Thread Library should be used */
+#undef USE_GNU_PTH
+
+/* Defined if this module should be included */
+#undef USE_GOST28147
+
+/* Defined if this module should be included */
+#undef USE_GOST_R_3411_12
+
+/* Defined if this module should be included */
+#undef USE_GOST_R_3411_94
+
+/* Defined if this module should be included */
+#undef USE_IDEA
+
+/* Defined if this module should be included */
+#undef USE_MD2
+
+/* Defined if this module should be included */
+#undef USE_MD4
+
+/* Defined if this module should be included */
+#undef USE_MD5
+
+/* set this to limit filenames to the 8.3 format */
+#undef USE_ONLY_8DOT3
+
+/* defined if we use posix_spawn in test program */
+#undef USE_POSIX_SPAWN_FOR_TESTS
+
+/* Defined if this module should be included */
+#undef USE_RFC2268
+
+/* Defined if this module should be included */
+#undef USE_RMD160
+
+/* Defined if the EGD based RNG should be used. */
+#undef USE_RNDEGD
+
+/* Defined if the getentropy RNG should be used. */
+#undef USE_RNDGETENTROPY
+
+/* Defined if the /dev/random RNG should be used. */
+#undef USE_RNDOLDLINUX
+
+/* Defined if the default Unix RNG should be used. */
+#undef USE_RNDUNIX
+
+/* Defined if the Windows specific RNG should be used. */
+#undef USE_RNDW32
+
+/* Defined if the WindowsCE specific RNG should be used. */
+#undef USE_RNDW32CE
+
+/* Defined if this module should be included */
+#undef USE_RSA
+
+/* Defined if this module should be included */
+#undef USE_SALSA20
+
+/* Defined if this module should be included */
+#undef USE_SCRYPT
+
+/* Defined if this module should be included */
+#undef USE_SEED
+
+/* Defined if this module should be included */
+#undef USE_SERPENT
+
+/* Defined if this module should be included */
+#undef USE_SHA1
+
+/* Defined if this module should be included */
+#undef USE_SHA256
+
+/* Defined if this module should be included */
+#undef USE_SHA3
+
+/* Defined if this module should be included */
+#undef USE_SHA512
+
+/* Defined if this module should be included */
+#undef USE_SM3
+
+/* Defined if this module should be included */
+#undef USE_SM4
+
+/* Enable extensions on AIX 3, Interix.  */
+#ifndef _ALL_SOURCE
+# undef _ALL_SOURCE
+#endif
+/* Enable general extensions on macOS.  */
+#ifndef _DARWIN_C_SOURCE
+# undef _DARWIN_C_SOURCE
+#endif
+/* Enable general extensions on Solaris.  */
+#ifndef __EXTENSIONS__
+# undef __EXTENSIONS__
+#endif
+/* Enable GNU extensions on systems that have them.  */
+#ifndef _GNU_SOURCE
+# undef _GNU_SOURCE
+#endif
+/* Enable X/Open compliant socket functions that do not require linking
+   with -lxnet on HP-UX 11.11.  */
+#ifndef _HPUX_ALT_XOPEN_SOCKET_API
+# undef _HPUX_ALT_XOPEN_SOCKET_API
+#endif
+/* Identify the host operating system as Minix.
+   This macro does not affect the system headers' behavior.
+   A future release of Autoconf may stop defining this macro.  */
+#ifndef _MINIX
+# undef _MINIX
+#endif
+/* Enable general extensions on NetBSD.
+   Enable NetBSD compatibility extensions on Minix.  */
+#ifndef _NETBSD_SOURCE
+# undef _NETBSD_SOURCE
+#endif
+/* Enable OpenBSD compatibility extensions on NetBSD.
+   Oddly enough, this does nothing on OpenBSD.  */
+#ifndef _OPENBSD_SOURCE
+# undef _OPENBSD_SOURCE
+#endif
+/* Define to 1 if needed for POSIX-compatible behavior.  */
+#ifndef _POSIX_SOURCE
+# undef _POSIX_SOURCE
+#endif
+/* Define to 2 if needed for POSIX-compatible behavior.  */
+#ifndef _POSIX_1_SOURCE
+# undef _POSIX_1_SOURCE
+#endif
+/* Enable POSIX-compatible threading on Solaris.  */
+#ifndef _POSIX_PTHREAD_SEMANTICS
+# undef _POSIX_PTHREAD_SEMANTICS
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-5:2014.  */
+#ifndef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
+# undef __STDC_WANT_IEC_60559_ATTRIBS_EXT__
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-1:2014.  */
+#ifndef __STDC_WANT_IEC_60559_BFP_EXT__
+# undef __STDC_WANT_IEC_60559_BFP_EXT__
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-2:2015.  */
+#ifndef __STDC_WANT_IEC_60559_DFP_EXT__
+# undef __STDC_WANT_IEC_60559_DFP_EXT__
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-4:2015.  */
+#ifndef __STDC_WANT_IEC_60559_FUNCS_EXT__
+# undef __STDC_WANT_IEC_60559_FUNCS_EXT__
+#endif
+/* Enable extensions specified by ISO/IEC TS 18661-3:2015.  */
+#ifndef __STDC_WANT_IEC_60559_TYPES_EXT__
+# undef __STDC_WANT_IEC_60559_TYPES_EXT__
+#endif
+/* Enable extensions specified by ISO/IEC TR 24731-2:2010.  */
+#ifndef __STDC_WANT_LIB_EXT2__
+# undef __STDC_WANT_LIB_EXT2__
+#endif
+/* Enable extensions specified by ISO/IEC 24747:2009.  */
+#ifndef __STDC_WANT_MATH_SPEC_FUNCS__
+# undef __STDC_WANT_MATH_SPEC_FUNCS__
+#endif
+/* Enable extensions on HP NonStop.  */
+#ifndef _TANDEM_SOURCE
+# undef _TANDEM_SOURCE
+#endif
+/* Enable X/Open extensions.  Define to 500 only if necessary
+   to make mbstate_t available.  */
+#ifndef _XOPEN_SOURCE
+# undef _XOPEN_SOURCE
+#endif
+
+
+/* Defined if this module should be included */
+#undef USE_TIGER
+
+/* Defined if this module should be included */
+#undef USE_TWOFISH
+
+/* Defined if this module should be included */
+#undef USE_WHIRLPOOL
+
+/* Version number of package */
+#undef VERSION
+
+/* Defined if compiled symbols have a leading underscore */
+#undef WITH_SYMBOL_UNDERSCORE
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+   significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined AC_APPLE_UNIVERSAL_BUILD
+# if defined __BIG_ENDIAN__
+#  define WORDS_BIGENDIAN 1
+# endif
+#else
+# ifndef WORDS_BIGENDIAN
+#  undef WORDS_BIGENDIAN
+# endif
+#endif
+
+/* Expose all libc features (__DARWIN_C_FULL). */
+#undef _DARWIN_C_SOURCE
+
+/* To allow the use of Libgcrypt in multithreaded programs we have to use
+    special features from the library. */
+#ifndef _REENTRANT
+# define _REENTRANT 1
+#endif
+
+
+/* Define to supported assembler block keyword, if plain 'asm' was not
+   supported */
+#undef asm
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to `__inline__' or `__inline' if that's what the C compiler
+   calls it, or to nothing if 'inline' is not supported under any name.  */
+#ifndef __cplusplus
+#undef inline
+#endif
+
+/* Define as a signed integer type capable of holding a process identifier. */
+#undef pid_t
+
+/* Define to `unsigned int' if <sys/types.h> does not define. */
+#undef size_t
+
+/* type to use in place of socklen_t if not defined */
+#undef socklen_t
+
+/* Define to the type of an unsigned integer type wide enough to hold a
+   pointer, if such a type exists, and if the system does not define it. */
+#undef uintptr_t
+
+
+#define _GCRYPT_IN_LIBGCRYPT 1
+
+/* Add .note.gnu.property section for Intel CET in assembler sources
+   when CET is enabled.  */
+#if defined(__ASSEMBLER__) && defined(__CET__)
+# include <cet.h>
+#endif
+
+/* If the configure check for endianness has been disabled, get it from
+   OS macros.  This is intended for making fat binary builds on OS X.  */
+#ifdef DISABLED_ENDIAN_CHECK
+# if defined(__BIG_ENDIAN__)
+#  define WORDS_BIGENDIAN 1
+# elif defined(__LITTLE_ENDIAN__)
+#  undef WORDS_BIGENDIAN
+# else
+#  error "No endianness found"
+# endif
+#endif /*DISABLED_ENDIAN_CHECK*/
+
+/* We basically use the original Camellia source.  Make sure the symbols
+   properly prefixed.  */
+#define CAMELLIA_EXT_SYM_PREFIX _gcry_
+
+#endif /*_GCRYPT_CONFIG_H_INCLUDED*/
+
diff --git a/grub-core/lib/libgcrypt/configure 
b/grub-core/lib/libgcrypt/configure
new file mode 100755
index 000000000..131114c68
--- /dev/null
+++ b/grub-core/lib/libgcrypt/configure
@@ -0,0 +1,25141 @@
+#! /bin/sh
+# From configure.ac Revision.
+# Guess values for system-dependent variables and create Makefiles.
+# Generated by GNU Autoconf 2.71 for libgcrypt 1.10.3.
+#
+# Report bugs to <https://bugs.gnupg.org>.
+#
+#
+# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
+# Inc.
+#
+#
+# This configure script is free software; the Free Software Foundation
+# gives unlimited permission to copy, distribute and modify it.
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+as_nop=:
+if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1
+then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else $as_nop
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+
+# Reset variables that may have inherited troublesome values from
+# the environment.
+
+# IFS needs to be set, to space, tab, and newline, in precisely that order.
+# (If _AS_PATH_WALK were called with IFS unset, it would have the
+# side effect of setting IFS to empty, thus disabling word splitting.)
+# Quoting is to prevent editors from complaining about space-tab.
+as_nl='
+'
+export as_nl
+IFS=" ""       $as_nl"
+
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# Ensure predictable behavior from utilities with locale-dependent output.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# We cannot yet rely on "unset" to work, but we need these variables
+# to be unset--not just set to an empty or harmless value--now, to
+# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh).  This construct
+# also avoids known problems related to "unset" and subshell syntax
+# in other old shells (e.g. bash 2.01 and pdksh 5.2.14).
+for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH
+do eval test \${$as_var+y} \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+
+# Ensure that fds 0, 1, and 2 are open.
+if (exec 3>&0) 2>/dev/null; then :; else exec 0</dev/null; fi
+if (exec 3>&1) 2>/dev/null; then :; else exec 1>/dev/null; fi
+if (exec 3>&2)            ; then :; else exec 2>/dev/null; fi
+
+# The user is always right.
+if ${PATH_SEPARATOR+false} :; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    test -r "$as_dir$0" && as_myself=$as_dir$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute 
file name" >&2
+  exit 1
+fi
+
+
+# Use a proper internal environment variable to ensure we don't fall
+  # into an infinite loop, continuously re-executing ourselves.
+  if test x"${_as_can_reexec}" != xno && test "x$CONFIG_SHELL" != x; then
+    _as_can_reexec=no; export _as_can_reexec;
+    # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+  fi
+  # We don't want this to propagate to other subprocesses.
+          { _as_can_reexec=; unset _as_can_reexec;}
+if test "x$CONFIG_SHELL" = x; then
+  as_bourne_compatible="as_nop=:
+if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1
+then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '\${1+\"\$@\"}'='\"\$@\"'
+  setopt NO_GLOB_SUBST
+else \$as_nop
+  case \`(set -o) 2>/dev/null\` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+"
+  as_required="as_fn_return () { (exit \$1); }
+as_fn_success () { as_fn_return 0; }
+as_fn_failure () { as_fn_return 1; }
+as_fn_ret_success () { return 0; }
+as_fn_ret_failure () { return 1; }
+
+exitcode=0
+as_fn_success || { exitcode=1; echo as_fn_success failed.; }
+as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; }
+as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; }
+as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; }
+if ( set x; as_fn_ret_success y && test x = \"\$1\" )
+then :
+
+else \$as_nop
+  exitcode=1; echo positional parameters were not saved.
+fi
+test x\$exitcode = x0 || exit 1
+blah=\$(echo \$(echo blah))
+test x\"\$blah\" = xblah || exit 1
+test -x / || exit 1"
+  as_suggested="  
as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" 
as_lineno_1a=\$LINENO
+  as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" 
as_lineno_2a=\$LINENO
+  eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" &&
+  test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = 
\"x\$as_lineno_2'\$as_run'\"' || exit 1
+
+  test -n \"\${ZSH_VERSION+set}\${BASH_VERSION+set}\" || (
+    
ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+    ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
+    ECHO=\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO\$ECHO
+    PATH=/empty FPATH=/empty; export PATH FPATH
+    test \"X\`printf %s \$ECHO\`\" = \"X\$ECHO\" \\
+      || test \"X\`print -r -- \$ECHO\`\" = \"X\$ECHO\" ) || exit 1
+test \$(( 1 + 1 )) = 2 || exit 1"
+  if (eval "$as_required") 2>/dev/null
+then :
+  as_have_required=yes
+else $as_nop
+  as_have_required=no
+fi
+  if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null
+then :
+
+else $as_nop
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+  as_found=:
+  case $as_dir in #(
+        /*)
+          for as_base in sh bash ksh sh5; do
+            # Try only shells that exist, to save several forks.
+            as_shell=$as_dir$as_base
+            if { test -f "$as_shell" || test -f "$as_shell.exe"; } &&
+                   as_run=a "$as_shell" -c 
"$as_bourne_compatible""$as_required" 2>/dev/null
+then :
+  CONFIG_SHELL=$as_shell as_have_required=yes
+                  if as_run=a "$as_shell" -c 
"$as_bourne_compatible""$as_suggested" 2>/dev/null
+then :
+  break 2
+fi
+fi
+          done;;
+       esac
+  as_found=false
+done
+IFS=$as_save_IFS
+if $as_found
+then :
+
+else $as_nop
+  if { test -f "$SHELL" || test -f "$SHELL.exe"; } &&
+             as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 
2>/dev/null
+then :
+  CONFIG_SHELL=$SHELL as_have_required=yes
+fi
+fi
+
+
+      if test "x$CONFIG_SHELL" != x
+then :
+  export CONFIG_SHELL
+             # We cannot yet assume a decent shell, so we have to provide a
+# neutralization value for shells without unset; and this also
+# works around shells that cannot unset nonexistent variables.
+# Preserve -v and -x to the replacement shell.
+BASH_ENV=/dev/null
+ENV=/dev/null
+(unset BASH_ENV) >/dev/null 2>&1 && unset BASH_ENV ENV
+case $- in # ((((
+  *v*x* | *x*v* ) as_opts=-vx ;;
+  *v* ) as_opts=-v ;;
+  *x* ) as_opts=-x ;;
+  * ) as_opts= ;;
+esac
+exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"}
+# Admittedly, this is quite paranoid, since all the known shells bail
+# out after a failed `exec'.
+printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2
+exit 255
+fi
+
+    if test x$as_have_required = xno
+then :
+  printf "%s\n" "$0: This script requires a shell more modern than all"
+  printf "%s\n" "$0: the shells that I found on your system."
+  if test ${ZSH_VERSION+y} ; then
+    printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should"
+    printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later."
+  else
+    printf "%s\n" "$0: Please tell bug-autoconf@gnu.org and
+$0: https://bugs.gnupg.org about your system, including any
+$0: error possibly output before this message. Then install
+$0: a modern shell, or manually run the script under such a
+$0: shell if you do have one."
+  fi
+  exit 1
+fi
+fi
+fi
+SHELL=${CONFIG_SHELL-/bin/sh}
+export SHELL
+# Unset more variables known to interfere with behavior of common tools.
+CLICOLOR_FORCE= GREP_OPTIONS=
+unset CLICOLOR_FORCE GREP_OPTIONS
+
+## --------------------- ##
+## M4sh Shell Functions. ##
+## --------------------- ##
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+# as_fn_nop
+# ---------
+# Do nothing but, unlike ":", preserve the value of $?.
+as_fn_nop ()
+{
+  return $?
+}
+as_nop=as_fn_nop
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$as_dir" : 'X\(//\)[^/]' \| \
+        X"$as_dir" : 'X\(//\)$' \| \
+        X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null
+then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else $as_nop
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null
+then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else $as_nop
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+# as_fn_nop
+# ---------
+# Do nothing but, unlike ":", preserve the value of $?.
+as_fn_nop ()
+{
+  return $?
+}
+as_nop=as_fn_nop
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} 
as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  printf "%s\n" "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+        X"$0" : 'X\(//\)$' \| \
+        X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+  as_lineno_1=$LINENO as_lineno_1a=$LINENO
+  as_lineno_2=$LINENO as_lineno_2a=$LINENO
+  eval 'test "x$as_lineno_1'$as_run'" != "x$as_lineno_2'$as_run'" &&
+  test "x`expr $as_lineno_1'$as_run' + 1`" = "x$as_lineno_2'$as_run'"' || {
+  # Blame Lee E. McMahon (1931-1989) for sed's syntax.  :-)
+  sed -n '
+    p
+    /[$]LINENO/=
+  ' <$as_myself |
+    sed '
+      s/[$]LINENO.*/&-/
+      t lineno
+      b
+      :lineno
+      N
+      :loop
+      s/[$]LINENO\([^'$as_cr_alnum'_].*\n\)\(.*\)/\2\1\2/
+      t loop
+      s/-\n.*//
+    ' >$as_me.lineno &&
+  chmod +x "$as_me.lineno" ||
+    { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a 
POSIX shell" >&2; as_fn_exit 1; }
+
+  # If we had to re-execute with $CONFIG_SHELL, we're ensured to have
+  # already done that, so ensure we don't try to do so again and fall
+  # in an infinite loop.  This has already happened in practice.
+  _as_can_reexec=no; export _as_can_reexec
+  # Don't try to exec as it changes $[0], causing all sort of problems
+  # (the dirname of $[0] is not the place where we might find the
+  # original and so on.  Autoconf is especially sensitive to this).
+  . "./$as_me.lineno"
+  # Exit status is that of the last command.
+  exit
+}
+
+
+# Determine whether it's possible to make 'echo' print without a newline.
+# These variables are no longer used directly by Autoconf, but are AC_SUBSTed
+# for compatibility with existing Makefiles.
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='        ';;     # ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='        ';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+# For backward compatibility with old third-party macros, we provide
+# the shell variables $as_echo and $as_echo_n.  New code should use
+# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively.
+as_echo='printf %s\n'
+as_echo_n='printf %s'
+
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 
'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+SHELL=${CONFIG_SHELL-/bin/sh}
+
+
+test -n "$DJDIR" || exec 7<&0 </dev/null
+exec 6>&1
+
+# Name of the host.
+# hostname on some systems (SVR3.2, old GNU/Linux) returns a bogus exit status,
+# so uname gets run too.
+ac_hostname=`(hostname || uname -n) 2>/dev/null | sed 1q`
+
+#
+# Initializations.
+#
+ac_default_prefix=/usr/local
+ac_clean_files=
+ac_config_libobj_dir=.
+LIBOBJS=
+cross_compiling=no
+subdirs=
+MFLAGS=
+MAKEFLAGS=
+
+# Identity of this package.
+PACKAGE_NAME='libgcrypt'
+PACKAGE_TARNAME='libgcrypt'
+PACKAGE_VERSION='1.10.3'
+PACKAGE_STRING='libgcrypt 1.10.3'
+PACKAGE_BUGREPORT='https://bugs.gnupg.org'
+PACKAGE_URL=''
+
+ac_unique_file="src/libgcrypt.vers"
+ac_config_libobj_dir=compat
+# Factoring default headers for most tests.
+ac_includes_default="\
+#include <stddef.h>
+#ifdef HAVE_STDIO_H
+# include <stdio.h>
+#endif
+#ifdef HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#ifdef HAVE_STRING_H
+# include <string.h>
+#endif
+#ifdef HAVE_INTTYPES_H
+# include <inttypes.h>
+#endif
+#ifdef HAVE_STDINT_H
+# include <stdint.h>
+#endif
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#ifdef HAVE_SYS_TYPES_H
+# include <sys/types.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif"
+
+ac_header_c_list=
+ac_func_c_list=
+ac_subst_vars='am__EXEEXT_FALSE
+am__EXEEXT_TRUE
+LTLIBOBJS
+BUILD_TIMESTAMP
+BUILD_FILEVERSION
+BUILD_VERSION
+BUILD_REVISION
+BUILD_DOC_FALSE
+BUILD_DOC_TRUE
+GCRYPT_HWF_MODULES
+LIBGCRYPT_DIGESTS
+LIBGCRYPT_PUBKEY_CIPHERS
+LIBGCRYPT_CIPHERS
+GCRYPT_RANDOM
+GCRYPT_KDFS
+GCRYPT_DIGESTS
+GCRYPT_PUBKEY_CIPHERS
+GCRYPT_CIPHERS
+USE_ECC_FALSE
+USE_ECC_TRUE
+USE_ELGAMAL_FALSE
+USE_ELGAMAL_TRUE
+USE_RSA_FALSE
+USE_RSA_TRUE
+USE_DSA_FALSE
+USE_DSA_TRUE
+LIBGCRYPT_THREAD_MODULES
+LIBGCRYPT_CONFIG_HOST
+LIBGCRYPT_CONFIG_CFLAGS
+LIBGCRYPT_CONFIG_LIBS
+LIBGCRYPT_CONFIG_API_VERSION
+NOEXECSTACK_FLAGS
+CROSS_COMPILING_FALSE
+CROSS_COMPILING_TRUE
+DL_LIBS
+LIBOBJS
+ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_FALSE
+ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_TRUE
+MPI_MOD_C_UDIV_QRNND_FALSE
+MPI_MOD_C_UDIV_QRNND_TRUE
+MPI_MOD_C_UDIV_FALSE
+MPI_MOD_C_UDIV_TRUE
+MPI_MOD_C_MPIH_RSHIFT_FALSE
+MPI_MOD_C_MPIH_RSHIFT_TRUE
+MPI_MOD_C_MPIH_LSHIFT_FALSE
+MPI_MOD_C_MPIH_LSHIFT_TRUE
+MPI_MOD_C_MPIH_MUL3_FALSE
+MPI_MOD_C_MPIH_MUL3_TRUE
+MPI_MOD_C_MPIH_MUL2_FALSE
+MPI_MOD_C_MPIH_MUL2_TRUE
+MPI_MOD_C_MPIH_MUL1_FALSE
+MPI_MOD_C_MPIH_MUL1_TRUE
+MPI_MOD_C_MPIH_SUB1_FALSE
+MPI_MOD_C_MPIH_SUB1_TRUE
+MPI_MOD_C_MPIH_ADD1_FALSE
+MPI_MOD_C_MPIH_ADD1_TRUE
+MPI_MOD_ASM_UDIV_QRNND_FALSE
+MPI_MOD_ASM_UDIV_QRNND_TRUE
+MPI_MOD_ASM_UDIV_FALSE
+MPI_MOD_ASM_UDIV_TRUE
+MPI_MOD_ASM_MPIH_RSHIFT_FALSE
+MPI_MOD_ASM_MPIH_RSHIFT_TRUE
+MPI_MOD_ASM_MPIH_LSHIFT_FALSE
+MPI_MOD_ASM_MPIH_LSHIFT_TRUE
+MPI_MOD_ASM_MPIH_MUL3_FALSE
+MPI_MOD_ASM_MPIH_MUL3_TRUE
+MPI_MOD_ASM_MPIH_MUL2_FALSE
+MPI_MOD_ASM_MPIH_MUL2_TRUE
+MPI_MOD_ASM_MPIH_MUL1_FALSE
+MPI_MOD_ASM_MPIH_MUL1_TRUE
+MPI_MOD_ASM_MPIH_SUB1_FALSE
+MPI_MOD_ASM_MPIH_SUB1_TRUE
+MPI_MOD_ASM_MPIH_ADD1_FALSE
+MPI_MOD_ASM_MPIH_ADD1_TRUE
+MPI_SFLAGS
+PTH_LIBS
+PTH_CFLAGS
+PTH_CONFIG
+GPG_ERROR_MT_LIBS
+GPG_ERROR_MT_CFLAGS
+GPG_ERROR_LIBS
+GPG_ERROR_CFLAGS
+GPGRT_CONFIG
+GPG_ERROR_CONFIG
+HAVE_LD_VERSION_SCRIPT_FALSE
+HAVE_LD_VERSION_SCRIPT_TRUE
+ENABLE_INSTRUMENTATION_MUNGING_FALSE
+ENABLE_INSTRUMENTATION_MUNGING_TRUE
+ENABLE_O_FLAG_MUNGING_FALSE
+ENABLE_O_FLAG_MUNGING_TRUE
+DEF_HMAC_BINARY_CHECK
+USE_HMAC_BINARY_CHECK_FALSE
+USE_HMAC_BINARY_CHECK_TRUE
+READELF
+OBJCOPY
+RUN_LARGE_DATA_TESTS
+ENABLE_RANDOM_DAEMON_FALSE
+ENABLE_RANDOM_DAEMON_TRUE
+emacs_local_vars_end
+emacs_local_vars_read_only
+emacs_local_vars_begin
+HAVE_W32CE_SYSTEM_FALSE
+HAVE_W32CE_SYSTEM_TRUE
+HAVE_W32_SYSTEM_FALSE
+HAVE_W32_SYSTEM_TRUE
+RC
+OTOOL64
+OTOOL
+LIPO
+NMEDIT
+DSYMUTIL
+MANIFEST_TOOL
+RANLIB
+ac_ct_AR
+AR
+LN_S
+NM
+ac_ct_DUMPBIN
+DUMPBIN
+LD
+FGREP
+EGREP
+GREP
+SED
+LIBTOOL
+OBJDUMP
+DLLTOOL
+AS
+EXEEXT_FOR_BUILD
+CC_FOR_BUILD
+VERSION_NUMBER
+LDADD_FOR_TESTS_KLUDGE
+am__fastdepCCAS_FALSE
+am__fastdepCCAS_TRUE
+CCASDEPMODE
+CCASFLAGS
+CCAS
+CPP
+SYSROOT
+am__fastdepCC_FALSE
+am__fastdepCC_TRUE
+CCDEPMODE
+am__nodep
+AMDEPBACKSLASH
+AMDEP_FALSE
+AMDEP_TRUE
+am__include
+DEPDIR
+OBJEXT
+EXEEXT
+ac_ct_CC
+CPPFLAGS
+LDFLAGS
+CFLAGS
+CC
+MAINT
+MAINTAINER_MODE_FALSE
+MAINTAINER_MODE_TRUE
+host_os
+host_vendor
+host_cpu
+host
+build_os
+build_vendor
+build_cpu
+build
+AM_BACKSLASH
+AM_DEFAULT_VERBOSITY
+AM_DEFAULT_V
+AM_V
+CSCOPE
+ETAGS
+CTAGS
+am__untar
+am__tar
+AMTAR
+am__leading_dot
+SET_MAKE
+AWK
+mkdir_p
+MKDIR_P
+INSTALL_STRIP_PROGRAM
+STRIP
+install_sh
+MAKEINFO
+AUTOHEADER
+AUTOMAKE
+AUTOCONF
+ACLOCAL
+VERSION
+PACKAGE
+CYGPATH_W
+am__isrc
+INSTALL_DATA
+INSTALL_SCRIPT
+INSTALL_PROGRAM
+LIBGCRYPT_LT_REVISION
+LIBGCRYPT_LT_AGE
+LIBGCRYPT_LT_CURRENT
+target_alias
+host_alias
+build_alias
+LIBS
+ECHO_T
+ECHO_N
+ECHO_C
+DEFS
+mandir
+localedir
+libdir
+psdir
+pdfdir
+dvidir
+htmldir
+infodir
+docdir
+oldincludedir
+includedir
+runstatedir
+localstatedir
+sharedstatedir
+sysconfdir
+datadir
+datarootdir
+libexecdir
+sbindir
+bindir
+program_transform_name
+prefix
+exec_prefix
+PACKAGE_URL
+PACKAGE_BUGREPORT
+PACKAGE_STRING
+PACKAGE_VERSION
+PACKAGE_TARNAME
+PACKAGE_NAME
+PATH_SEPARATOR
+SHELL
+am__quote'
+ac_subst_files=''
+ac_user_opts='
+enable_option_checking
+enable_silent_rules
+enable_maintainer_mode
+enable_dependency_tracking
+enable_static
+enable_shared
+with_pic
+enable_fast_install
+with_gnu_ld
+with_sysroot
+enable_libtool_lock
+enable_endian_check
+enable_ciphers
+enable_pubkey_ciphers
+enable_digests
+enable_kdfs
+enable_random
+enable_dev_random
+with_egd_socket
+enable_random_daemon
+enable_asm
+enable_m_guard
+enable_large_data_tests
+enable_force_soft_hwfeatures
+with_capabilities
+enable_hmac_binary_check
+with_fips_module_version
+enable_jent_support
+enable_padlock_support
+enable_aesni_support
+enable_shaext_support
+enable_pclmul_support
+enable_sse41_support
+enable_drng_support
+enable_avx_support
+enable_avx2_support
+enable_neon_support
+enable_arm_crypto_support
+enable_ppc_crypto_support
+enable_O_flag_munging
+enable_instrumentation_munging
+enable_amd64_as_feature_detection
+enable_ld_version_script
+with_libtool_modification
+with_libgpg_error_prefix
+with_gpg_error_prefix
+with_pth_prefix
+enable_mpi_path
+enable_optimization
+enable_noexecstack
+enable_doc
+enable_build_timestamp
+'
+      ac_precious_vars='build_alias
+host_alias
+target_alias
+CC
+CFLAGS
+LDFLAGS
+LIBS
+CPPFLAGS
+SYSROOT
+CPP
+CCAS
+CCASFLAGS'
+
+
+# Initialize some variables set by options.
+ac_init_help=
+ac_init_version=false
+ac_unrecognized_opts=
+ac_unrecognized_sep=
+# The variables have the same names as the options, with
+# dashes changed to underlines.
+cache_file=/dev/null
+exec_prefix=NONE
+no_create=
+no_recursion=
+prefix=NONE
+program_prefix=NONE
+program_suffix=NONE
+program_transform_name=s,x,x,
+silent=
+site=
+srcdir=
+verbose=
+x_includes=NONE
+x_libraries=NONE
+
+# Installation directory options.
+# These are left unexpanded so users can "make install exec_prefix=/foo"
+# and all the variables that are supposed to be based on exec_prefix
+# by default will actually change.
+# Use braces instead of parens because sh, perl, etc. also accept them.
+# (The list follows the same order as the GNU Coding Standards.)
+bindir='${exec_prefix}/bin'
+sbindir='${exec_prefix}/sbin'
+libexecdir='${exec_prefix}/libexec'
+datarootdir='${prefix}/share'
+datadir='${datarootdir}'
+sysconfdir='${prefix}/etc'
+sharedstatedir='${prefix}/com'
+localstatedir='${prefix}/var'
+runstatedir='${localstatedir}/run'
+includedir='${prefix}/include'
+oldincludedir='/usr/include'
+docdir='${datarootdir}/doc/${PACKAGE_TARNAME}'
+infodir='${datarootdir}/info'
+htmldir='${docdir}'
+dvidir='${docdir}'
+pdfdir='${docdir}'
+psdir='${docdir}'
+libdir='${exec_prefix}/lib'
+localedir='${datarootdir}/locale'
+mandir='${datarootdir}/man'
+
+ac_prev=
+ac_dashdash=
+for ac_option
+do
+  # If the previous option needs an argument, assign it.
+  if test -n "$ac_prev"; then
+    eval $ac_prev=\$ac_option
+    ac_prev=
+    continue
+  fi
+
+  case $ac_option in
+  *=?*) ac_optarg=`expr "X$ac_option" : '[^=]*=\(.*\)'` ;;
+  *=)   ac_optarg= ;;
+  *)    ac_optarg=yes ;;
+  esac
+
+  case $ac_dashdash$ac_option in
+  --)
+    ac_dashdash=yes ;;
+
+  -bindir | --bindir | --bindi | --bind | --bin | --bi)
+    ac_prev=bindir ;;
+  -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*)
+    bindir=$ac_optarg ;;
+
+  -build | --build | --buil | --bui | --bu)
+    ac_prev=build_alias ;;
+  -build=* | --build=* | --buil=* | --bui=* | --bu=*)
+    build_alias=$ac_optarg ;;
+
+  -cache-file | --cache-file | --cache-fil | --cache-fi \
+  | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c)
+    ac_prev=cache_file ;;
+  -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \
+  | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*)
+    cache_file=$ac_optarg ;;
+
+  --config-cache | -C)
+    cache_file=config.cache ;;
+
+  -datadir | --datadir | --datadi | --datad)
+    ac_prev=datadir ;;
+  -datadir=* | --datadir=* | --datadi=* | --datad=*)
+    datadir=$ac_optarg ;;
+
+  -datarootdir | --datarootdir | --datarootdi | --datarootd | --dataroot \
+  | --dataroo | --dataro | --datar)
+    ac_prev=datarootdir ;;
+  -datarootdir=* | --datarootdir=* | --datarootdi=* | --datarootd=* \
+  | --dataroot=* | --dataroo=* | --dataro=* | --datar=*)
+    datarootdir=$ac_optarg ;;
+
+  -disable-* | --disable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: \`$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) 
ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--disable-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=no ;;
+
+  -docdir | --docdir | --docdi | --doc | --do)
+    ac_prev=docdir ;;
+  -docdir=* | --docdir=* | --docdi=* | --doc=* | --do=*)
+    docdir=$ac_optarg ;;
+
+  -dvidir | --dvidir | --dvidi | --dvid | --dvi | --dv)
+    ac_prev=dvidir ;;
+  -dvidir=* | --dvidir=* | --dvidi=* | --dvid=* | --dvi=* | --dv=*)
+    dvidir=$ac_optarg ;;
+
+  -enable-* | --enable-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid feature name: \`$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"enable_$ac_useropt"
+"*) ;;
+      *) 
ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--enable-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval enable_$ac_useropt=\$ac_optarg ;;
+
+  -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \
+  | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \
+  | --exec | --exe | --ex)
+    ac_prev=exec_prefix ;;
+  -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \
+  | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \
+  | --exec=* | --exe=* | --ex=*)
+    exec_prefix=$ac_optarg ;;
+
+  -gas | --gas | --ga | --g)
+    # Obsolete; use --with-gas.
+    with_gas=yes ;;
+
+  -help | --help | --hel | --he | -h)
+    ac_init_help=long ;;
+  -help=r* | --help=r* | --hel=r* | --he=r* | -hr*)
+    ac_init_help=recursive ;;
+  -help=s* | --help=s* | --hel=s* | --he=s* | -hs*)
+    ac_init_help=short ;;
+
+  -host | --host | --hos | --ho)
+    ac_prev=host_alias ;;
+  -host=* | --host=* | --hos=* | --ho=*)
+    host_alias=$ac_optarg ;;
+
+  -htmldir | --htmldir | --htmldi | --htmld | --html | --htm | --ht)
+    ac_prev=htmldir ;;
+  -htmldir=* | --htmldir=* | --htmldi=* | --htmld=* | --html=* | --htm=* \
+  | --ht=*)
+    htmldir=$ac_optarg ;;
+
+  -includedir | --includedir | --includedi | --included | --include \
+  | --includ | --inclu | --incl | --inc)
+    ac_prev=includedir ;;
+  -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \
+  | --includ=* | --inclu=* | --incl=* | --inc=*)
+    includedir=$ac_optarg ;;
+
+  -infodir | --infodir | --infodi | --infod | --info | --inf)
+    ac_prev=infodir ;;
+  -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*)
+    infodir=$ac_optarg ;;
+
+  -libdir | --libdir | --libdi | --libd)
+    ac_prev=libdir ;;
+  -libdir=* | --libdir=* | --libdi=* | --libd=*)
+    libdir=$ac_optarg ;;
+
+  -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \
+  | --libexe | --libex | --libe)
+    ac_prev=libexecdir ;;
+  -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \
+  | --libexe=* | --libex=* | --libe=*)
+    libexecdir=$ac_optarg ;;
+
+  -localedir | --localedir | --localedi | --localed | --locale)
+    ac_prev=localedir ;;
+  -localedir=* | --localedir=* | --localedi=* | --localed=* | --locale=*)
+    localedir=$ac_optarg ;;
+
+  -localstatedir | --localstatedir | --localstatedi | --localstated \
+  | --localstate | --localstat | --localsta | --localst | --locals)
+    ac_prev=localstatedir ;;
+  -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \
+  | --localstate=* | --localstat=* | --localsta=* | --localst=* | --locals=*)
+    localstatedir=$ac_optarg ;;
+
+  -mandir | --mandir | --mandi | --mand | --man | --ma | --m)
+    ac_prev=mandir ;;
+  -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*)
+    mandir=$ac_optarg ;;
+
+  -nfp | --nfp | --nf)
+    # Obsolete; use --without-fp.
+    with_fp=no ;;
+
+  -no-create | --no-create | --no-creat | --no-crea | --no-cre \
+  | --no-cr | --no-c | -n)
+    no_create=yes ;;
+
+  -no-recursion | --no-recursion | --no-recursio | --no-recursi \
+  | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r)
+    no_recursion=yes ;;
+
+  -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \
+  | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \
+  | --oldin | --oldi | --old | --ol | --o)
+    ac_prev=oldincludedir ;;
+  -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \
+  | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \
+  | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*)
+    oldincludedir=$ac_optarg ;;
+
+  -prefix | --prefix | --prefi | --pref | --pre | --pr | --p)
+    ac_prev=prefix ;;
+  -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*)
+    prefix=$ac_optarg ;;
+
+  -program-prefix | --program-prefix | --program-prefi | --program-pref \
+  | --program-pre | --program-pr | --program-p)
+    ac_prev=program_prefix ;;
+  -program-prefix=* | --program-prefix=* | --program-prefi=* \
+  | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*)
+    program_prefix=$ac_optarg ;;
+
+  -program-suffix | --program-suffix | --program-suffi | --program-suff \
+  | --program-suf | --program-su | --program-s)
+    ac_prev=program_suffix ;;
+  -program-suffix=* | --program-suffix=* | --program-suffi=* \
+  | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*)
+    program_suffix=$ac_optarg ;;
+
+  -program-transform-name | --program-transform-name \
+  | --program-transform-nam | --program-transform-na \
+  | --program-transform-n | --program-transform- \
+  | --program-transform | --program-transfor \
+  | --program-transfo | --program-transf \
+  | --program-trans | --program-tran \
+  | --progr-tra | --program-tr | --program-t)
+    ac_prev=program_transform_name ;;
+  -program-transform-name=* | --program-transform-name=* \
+  | --program-transform-nam=* | --program-transform-na=* \
+  | --program-transform-n=* | --program-transform-=* \
+  | --program-transform=* | --program-transfor=* \
+  | --program-transfo=* | --program-transf=* \
+  | --program-trans=* | --program-tran=* \
+  | --progr-tra=* | --program-tr=* | --program-t=*)
+    program_transform_name=$ac_optarg ;;
+
+  -pdfdir | --pdfdir | --pdfdi | --pdfd | --pdf | --pd)
+    ac_prev=pdfdir ;;
+  -pdfdir=* | --pdfdir=* | --pdfdi=* | --pdfd=* | --pdf=* | --pd=*)
+    pdfdir=$ac_optarg ;;
+
+  -psdir | --psdir | --psdi | --psd | --ps)
+    ac_prev=psdir ;;
+  -psdir=* | --psdir=* | --psdi=* | --psd=* | --ps=*)
+    psdir=$ac_optarg ;;
+
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil)
+    silent=yes ;;
+
+  -runstatedir | --runstatedir | --runstatedi | --runstated \
+  | --runstate | --runstat | --runsta | --runst | --runs \
+  | --run | --ru | --r)
+    ac_prev=runstatedir ;;
+  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
+  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
+  | --run=* | --ru=* | --r=*)
+    runstatedir=$ac_optarg ;;
+
+  -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
+    ac_prev=sbindir ;;
+  -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
+  | --sbi=* | --sb=*)
+    sbindir=$ac_optarg ;;
+
+  -sharedstatedir | --sharedstatedir | --sharedstatedi \
+  | --sharedstated | --sharedstate | --sharedstat | --sharedsta \
+  | --sharedst | --shareds | --shared | --share | --shar \
+  | --sha | --sh)
+    ac_prev=sharedstatedir ;;
+  -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \
+  | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \
+  | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \
+  | --sha=* | --sh=*)
+    sharedstatedir=$ac_optarg ;;
+
+  -site | --site | --sit)
+    ac_prev=site ;;
+  -site=* | --site=* | --sit=*)
+    site=$ac_optarg ;;
+
+  -srcdir | --srcdir | --srcdi | --srcd | --src | --sr)
+    ac_prev=srcdir ;;
+  -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*)
+    srcdir=$ac_optarg ;;
+
+  -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \
+  | --syscon | --sysco | --sysc | --sys | --sy)
+    ac_prev=sysconfdir ;;
+  -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \
+  | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*)
+    sysconfdir=$ac_optarg ;;
+
+  -target | --target | --targe | --targ | --tar | --ta | --t)
+    ac_prev=target_alias ;;
+  -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*)
+    target_alias=$ac_optarg ;;
+
+  -v | -verbose | --verbose | --verbos | --verbo | --verb)
+    verbose=yes ;;
+
+  -version | --version | --versio | --versi | --vers | -V)
+    ac_init_version=: ;;
+
+  -with-* | --with-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: \`$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) 
ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--with-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=\$ac_optarg ;;
+
+  -without-* | --without-*)
+    ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'`
+    # Reject names that are not valid shell variable names.
+    expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null &&
+      as_fn_error $? "invalid package name: \`$ac_useropt'"
+    ac_useropt_orig=$ac_useropt
+    ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'`
+    case $ac_user_opts in
+      *"
+"with_$ac_useropt"
+"*) ;;
+      *) 
ac_unrecognized_opts="$ac_unrecognized_opts$ac_unrecognized_sep--without-$ac_useropt_orig"
+        ac_unrecognized_sep=', ';;
+    esac
+    eval with_$ac_useropt=no ;;
+
+  --x)
+    # Obsolete; use --with-x.
+    with_x=yes ;;
+
+  -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \
+  | --x-incl | --x-inc | --x-in | --x-i)
+    ac_prev=x_includes ;;
+  -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \
+  | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*)
+    x_includes=$ac_optarg ;;
+
+  -x-libraries | --x-libraries | --x-librarie | --x-librari \
+  | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l)
+    ac_prev=x_libraries ;;
+  -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \
+  | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*)
+    x_libraries=$ac_optarg ;;
+
+  -*) as_fn_error $? "unrecognized option: \`$ac_option'
+Try \`$0 --help' for more information"
+    ;;
+
+  *=*)
+    ac_envvar=`expr "x$ac_option" : 'x\([^=]*\)='`
+    # Reject names that are not valid shell variable names.
+    case $ac_envvar in #(
+      '' | [0-9]* | *[!_$as_cr_alnum]* )
+      as_fn_error $? "invalid variable name: \`$ac_envvar'" ;;
+    esac
+    eval $ac_envvar=\$ac_optarg
+    export $ac_envvar ;;
+
+  *)
+    # FIXME: should be removed in autoconf 3.0.
+    printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" 
>&2
+    expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null &&
+      printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2
+    : "${build_alias=$ac_option} ${host_alias=$ac_option} 
${target_alias=$ac_option}"
+    ;;
+
+  esac
+done
+
+if test -n "$ac_prev"; then
+  ac_option=--`echo $ac_prev | sed 's/_/-/g'`
+  as_fn_error $? "missing argument to $ac_option"
+fi
+
+if test -n "$ac_unrecognized_opts"; then
+  case $enable_option_checking in
+    no) ;;
+    fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;;
+    *)     printf "%s\n" "$as_me: WARNING: unrecognized options: 
$ac_unrecognized_opts" >&2 ;;
+  esac
+fi
+
+# Check all directory arguments for consistency.
+for ac_var in  exec_prefix prefix bindir sbindir libexecdir datarootdir \
+               datadir sysconfdir sharedstatedir localstatedir includedir \
+               oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
+               libdir localedir mandir runstatedir
+do
+  eval ac_val=\$$ac_var
+  # Remove trailing slashes.
+  case $ac_val in
+    */ )
+      ac_val=`expr "X$ac_val" : 'X\(.*[^/]\)' \| "X$ac_val" : 'X\(.*\)'`
+      eval $ac_var=\$ac_val;;
+  esac
+  # Be sure to have absolute directory names.
+  case $ac_val in
+    [\\/$]* | ?:[\\/]* )  continue;;
+    NONE | '' ) case $ac_var in *prefix ) continue;; esac;;
+  esac
+  as_fn_error $? "expected an absolute directory name for --$ac_var: $ac_val"
+done
+
+# There might be people who depend on the old broken behavior: `$host'
+# used to hold the argument of --host etc.
+# FIXME: To remove some day.
+build=$build_alias
+host=$host_alias
+target=$target_alias
+
+# FIXME: To remove some day.
+if test "x$host_alias" != x; then
+  if test "x$build_alias" = x; then
+    cross_compiling=maybe
+  elif test "x$build_alias" != "x$host_alias"; then
+    cross_compiling=yes
+  fi
+fi
+
+ac_tool_prefix=
+test -n "$host_alias" && ac_tool_prefix=$host_alias-
+
+test "$silent" = yes && exec 6>/dev/null
+
+
+ac_pwd=`pwd` && test -n "$ac_pwd" &&
+ac_ls_di=`ls -di .` &&
+ac_pwd_ls_di=`cd "$ac_pwd" && ls -di .` ||
+  as_fn_error $? "working directory cannot be determined"
+test "X$ac_ls_di" = "X$ac_pwd_ls_di" ||
+  as_fn_error $? "pwd does not report name of working directory"
+
+
+# Find the source files, if location was not specified.
+if test -z "$srcdir"; then
+  ac_srcdir_defaulted=yes
+  # Try the directory containing this script, then the parent directory.
+  ac_confdir=`$as_dirname -- "$as_myself" ||
+$as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$as_myself" : 'X\(//\)[^/]' \| \
+        X"$as_myself" : 'X\(//\)$' \| \
+        X"$as_myself" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$as_myself" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+  srcdir=$ac_confdir
+  if test ! -r "$srcdir/$ac_unique_file"; then
+    srcdir=..
+  fi
+else
+  ac_srcdir_defaulted=no
+fi
+if test ! -r "$srcdir/$ac_unique_file"; then
+  test "$ac_srcdir_defaulted" = yes && srcdir="$ac_confdir or .."
+  as_fn_error $? "cannot find sources ($ac_unique_file) in $srcdir"
+fi
+ac_msg="sources are in $srcdir, but \`cd $srcdir' does not work"
+ac_abs_confdir=`(
+       cd "$srcdir" && test -r "./$ac_unique_file" || as_fn_error $? "$ac_msg"
+       pwd)`
+# When building in place, set srcdir=.
+if test "$ac_abs_confdir" = "$ac_pwd"; then
+  srcdir=.
+fi
+# Remove unnecessary trailing slashes from srcdir.
+# Double slashes in file names in object file debugging info
+# mess up M-x gdb in Emacs.
+case $srcdir in
+*/) srcdir=`expr "X$srcdir" : 'X\(.*[^/]\)' \| "X$srcdir" : 'X\(.*\)'`;;
+esac
+for ac_var in $ac_precious_vars; do
+  eval ac_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_env_${ac_var}_value=\$${ac_var}
+  eval ac_cv_env_${ac_var}_set=\${${ac_var}+set}
+  eval ac_cv_env_${ac_var}_value=\$${ac_var}
+done
+
+#
+# Report the --help message.
+#
+if test "$ac_init_help" = "long"; then
+  # Omit some internal or obsolete options to make the list less imposing.
+  # This message is too long to be a string in the A/UX 3.1 sh.
+  cat <<_ACEOF
+\`configure' configures libgcrypt 1.10.3 to adapt to many kinds of systems.
+
+Usage: $0 [OPTION]... [VAR=VALUE]...
+
+To assign environment variables (e.g., CC, CFLAGS...), specify them as
+VAR=VALUE.  See below for descriptions of some of the useful variables.
+
+Defaults for the options are specified in brackets.
+
+Configuration:
+  -h, --help              display this help and exit
+      --help=short        display options specific to this package
+      --help=recursive    display the short help of all the included packages
+  -V, --version           display version information and exit
+  -q, --quiet, --silent   do not print \`checking ...' messages
+      --cache-file=FILE   cache test results in FILE [disabled]
+  -C, --config-cache      alias for \`--cache-file=config.cache'
+  -n, --no-create         do not create output files
+      --srcdir=DIR        find the sources in DIR [configure dir or \`..']
+
+Installation directories:
+  --prefix=PREFIX         install architecture-independent files in PREFIX
+                          [$ac_default_prefix]
+  --exec-prefix=EPREFIX   install architecture-dependent files in EPREFIX
+                          [PREFIX]
+
+By default, \`make install' will install all the files in
+\`$ac_default_prefix/bin', \`$ac_default_prefix/lib' etc.  You can specify
+an installation prefix other than \`$ac_default_prefix' using \`--prefix',
+for instance \`--prefix=\$HOME'.
+
+For better control, use the options below.
+
+Fine tuning of the installation directories:
+  --bindir=DIR            user executables [EPREFIX/bin]
+  --sbindir=DIR           system admin executables [EPREFIX/sbin]
+  --libexecdir=DIR        program executables [EPREFIX/libexec]
+  --sysconfdir=DIR        read-only single-machine data [PREFIX/etc]
+  --sharedstatedir=DIR    modifiable architecture-independent data [PREFIX/com]
+  --localstatedir=DIR     modifiable single-machine data [PREFIX/var]
+  --runstatedir=DIR       modifiable per-process data [LOCALSTATEDIR/run]
+  --libdir=DIR            object code libraries [EPREFIX/lib]
+  --includedir=DIR        C header files [PREFIX/include]
+  --oldincludedir=DIR     C header files for non-gcc [/usr/include]
+  --datarootdir=DIR       read-only arch.-independent data root [PREFIX/share]
+  --datadir=DIR           read-only architecture-independent data [DATAROOTDIR]
+  --infodir=DIR           info documentation [DATAROOTDIR/info]
+  --localedir=DIR         locale-dependent data [DATAROOTDIR/locale]
+  --mandir=DIR            man documentation [DATAROOTDIR/man]
+  --docdir=DIR            documentation root [DATAROOTDIR/doc/libgcrypt]
+  --htmldir=DIR           html documentation [DOCDIR]
+  --dvidir=DIR            dvi documentation [DOCDIR]
+  --pdfdir=DIR            pdf documentation [DOCDIR]
+  --psdir=DIR             ps documentation [DOCDIR]
+_ACEOF
+
+  cat <<\_ACEOF
+
+Program names:
+  --program-prefix=PREFIX            prepend PREFIX to installed program names
+  --program-suffix=SUFFIX            append SUFFIX to installed program names
+  --program-transform-name=PROGRAM   run sed PROGRAM on installed program names
+
+System types:
+  --build=BUILD     configure for building on BUILD [guessed]
+  --host=HOST       cross-compile to build programs to run on HOST [BUILD]
+_ACEOF
+fi
+
+if test -n "$ac_init_help"; then
+  case $ac_init_help in
+     short | recursive ) echo "Configuration of libgcrypt 1.10.3:";;
+   esac
+  cat <<\_ACEOF
+
+Optional Features:
+  --disable-option-checking  ignore unrecognized --enable/--with options
+  --disable-FEATURE       do not include FEATURE (same as --enable-FEATURE=no)
+  --enable-FEATURE[=ARG]  include FEATURE [ARG=yes]
+  --enable-silent-rules   less verbose build output (undo: "make V=1")
+  --disable-silent-rules  verbose build output (undo: "make V=0")
+  --enable-maintainer-mode
+                          enable make rules and dependencies not useful (and
+                          sometimes confusing) to the casual installer
+  --enable-dependency-tracking
+                          do not reject slow dependency extractors
+  --disable-dependency-tracking
+                          speeds up one-time build
+  --enable-static[=PKGS]  build static libraries [default=no]
+  --enable-shared[=PKGS]  build shared libraries [default=yes]
+  --enable-fast-install[=PKGS]
+                          optimize for fast installation [default=yes]
+  --disable-libtool-lock  avoid locking (might break parallel builds)
+  --disable-endian-check  disable the endian check and trust the OS provided
+                          macros
+  --enable-ciphers=ciphers
+                          select the symmetric ciphers to include
+  --enable-pubkey-ciphers=ciphers
+                          select the public-key ciphers to include
+  --enable-digests=digests
+                          select the message digests to include
+  --enable-kfds=kdfs      select the KDFs to include
+  --enable-random=name    select which random number generator to use
+  --disable-dev-random    disable the use of dev random
+  --enable-random-daemon  Build the experimental gcryptrnd
+  --disable-asm           Disable MPI and cipher assembler modules
+  --enable-m-guard        Enable memory guard facility
+  --enable-large-data-tests
+                          Enable the real long ruinning large data tests
+  --enable-force-soft-hwfeatures
+                          Enable forcing 'soft' HW feature bits on
+  --enable-hmac-binary-check
+                          Enable library integrity check
+  --disable-jent-support  Disable support for the Jitter entropy collector
+  --disable-padlock-support
+                          Disable support for the PadLock Engine of VIA
+                          processors
+  --disable-aesni-support Disable support for the Intel AES-NI instructions
+  --disable-shaext-support
+                          Disable support for the Intel SHAEXT instructions
+  --disable-pclmul-support
+                          Disable support for the Intel PCLMUL instructions
+  --disable-sse41-support Disable support for the Intel SSE4.1 instructions
+  --disable-drng-support  Disable support for the Intel DRNG (RDRAND
+                          instruction)
+  --disable-avx-support   Disable support for the Intel AVX instructions
+  --disable-avx2-support  Disable support for the Intel AVX2 instructions
+  --disable-neon-support  Disable support for the ARM NEON instructions
+  --disable-arm-crypto-support
+                          Disable support for the ARMv8 Crypto Extension
+                          instructions
+  --disable-ppc-crypto-support
+                          Disable support for the PPC crypto instructions
+                          introduced in POWER 8 (PowerISA 2.07)
+  --disable-O-flag-munging
+                          Disable modification of the cc -O flag
+  --disable-instrumentation-munging
+                          Disable modification of the cc instrumentation
+                          options
+  --disable-amd64-as-feature-detection
+                          Disable the auto-detection of AMD64 as(1) features
+  --enable-ld-version-script
+                          enable/disable use of linker version script.
+                          (default is system dependent)
+  --enable-mpi-path=EXTRA_PATH
+                          prepend EXTRA_PATH to list of CPU specific
+                          optimizations
+  --disable-optimization  disable compiler optimization
+  --disable-noexecstack   disable non executable stack support
+  --disable-doc           do not build the documentation
+  --enable-build-timestamp
+                          set an explicit build timestamp for reproducibility.
+                          (default is the current time in ISO-8601 format)
+
+Optional Packages:
+  --with-PACKAGE[=ARG]    use PACKAGE [ARG=yes]
+  --without-PACKAGE       do not use PACKAGE (same as --with-PACKAGE=no)
+  --with-pic[=PKGS]       try to use only PIC/non-PIC objects [default=use
+                          both]
+  --with-gnu-ld           assume the C compiler uses GNU ld [default=no]
+  --with-sysroot=DIR Search for dependent libraries within DIR
+                        (or the compiler's sysroot if not specified).
+  --with-egd-socket=NAME  Use NAME for the EGD socket)
+  --with-capabilities     Use linux capabilities [default=no]
+  --with-fips-module-version=VERSION
+                          Specify the FIPS module version for the build
+  --with-libtool-modification=apply|never|try
+                          how to handle libtool modification (default=never)
+  --with-libgpg-error-prefix=PFX
+                          prefix where GPG Error is installed (optional)
+
+  --with-pth-prefix=PFX   prefix where GNU Pth is installed (optional)
+
+Some influential environment variables:
+  CC          C compiler command
+  CFLAGS      C compiler flags
+  LDFLAGS     linker flags, e.g. -L<lib dir> if you have libraries in a
+              nonstandard directory <lib dir>
+  LIBS        libraries to pass to the linker, e.g. -l<library>
+  CPPFLAGS    (Objective) C/C++ preprocessor flags, e.g. -I<include dir> if
+              you have headers in a nonstandard directory <include dir>
+  SYSROOT     locate config scripts also below that directory
+  CPP         C preprocessor
+  CCAS        assembler compiler command (defaults to CC)
+  CCASFLAGS   assembler compiler flags (defaults to CFLAGS)
+
+Use these variables to override the choices made by `configure' or to help
+it to find libraries and programs with nonstandard names/locations.
+
+Report bugs to <https://bugs.gnupg.org>.
+_ACEOF
+ac_status=$?
+fi
+
+if test "$ac_init_help" = "recursive"; then
+  # If there are subdirs, report their specific --help.
+  for ac_dir in : $ac_subdirs_all; do test "x$ac_dir" = x: && continue
+    test -d "$ac_dir" ||
+      { cd "$srcdir" && ac_pwd=`pwd` && srcdir=. && test -d "$ac_dir"; } ||
+      continue
+    ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 
's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+    cd "$ac_dir" || { ac_status=$?; continue; }
+    # Check for configure.gnu first; this name is used for a wrapper for
+    # Metaconfig's "Configure" on case-insensitive file systems.
+    if test -f "$ac_srcdir/configure.gnu"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure.gnu" --help=recursive
+    elif test -f "$ac_srcdir/configure"; then
+      echo &&
+      $SHELL "$ac_srcdir/configure" --help=recursive
+    else
+      printf "%s\n" "$as_me: WARNING: no configuration information is in 
$ac_dir" >&2
+    fi || ac_status=$?
+    cd "$ac_pwd" || { ac_status=$?; break; }
+  done
+fi
+
+test -n "$ac_init_help" && exit $ac_status
+if $ac_init_version; then
+  cat <<\_ACEOF
+libgcrypt configure 1.10.3
+generated by GNU Autoconf 2.71
+
+Copyright (C) 2021 Free Software Foundation, Inc.
+This configure script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it.
+_ACEOF
+  exit
+fi
+
+## ------------------------ ##
+## Autoconf initialization. ##
+## ------------------------ ##
+
+# ac_fn_c_try_compile LINENO
+# --------------------------
+# Try to compile conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest.beam
+  if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+        test -z "$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest.$ac_objext
+then :
+  ac_retval=0
+else $as_nop
+  printf "%s\n" "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_compile
+
+# ac_fn_c_check_header_compile LINENO HEADER VAR INCLUDES
+# -------------------------------------------------------
+# Tests whether HEADER exists and can be compiled using the include files in
+# INCLUDES, setting the cache variable VAR accordingly.
+ac_fn_c_check_header_compile ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+printf %s "checking for $2... " >&6; }
+if eval test \${$3+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+#include <$2>
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  eval "$3=yes"
+else $as_nop
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" 
>&5
+printf "%s\n" "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_header_compile
+
+# ac_fn_c_try_cpp LINENO
+# ----------------------
+# Try to preprocess conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_cpp ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_cpp conftest.$ac_ext"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_cpp conftest.$ac_ext") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } > conftest.i && {
+        test -z "$ac_c_preproc_warn_flag$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       }
+then :
+  ac_retval=0
+else $as_nop
+  printf "%s\n" "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+    ac_retval=1
+fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_cpp
+
+# ac_fn_c_try_link LINENO
+# -----------------------
+# Try to link conftest.$ac_ext, and return whether this succeeded.
+ac_fn_c_try_link ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    grep -v '^ *+' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+    mv -f conftest.er1 conftest.err
+  fi
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && {
+        test -z "$ac_c_werror_flag" ||
+        test ! -s conftest.err
+       } && test -s conftest$ac_exeext && {
+        test "$cross_compiling" = yes ||
+        test -x conftest$ac_exeext
+       }
+then :
+  ac_retval=0
+else $as_nop
+  printf "%s\n" "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=1
+fi
+  # Delete the IPA/IPO (Inter Procedural Analysis/Optimization) information
+  # created by the PGI compiler (conftest_ipa8_conftest.oo), as it would
+  # interfere with the next link command; also delete a directory that is
+  # left behind by Apple's compiler.  We do this before executing the actions.
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_link
+
+# ac_fn_c_check_func LINENO FUNC VAR
+# ----------------------------------
+# Tests whether FUNC exists, setting the cache variable VAR accordingly
+ac_fn_c_check_func ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+printf %s "checking for $2... " >&6; }
+if eval test \${$3+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+/* Define $2 to an innocuous variant, in case <limits.h> declares $2.
+   For example, HP-UX 11i <limits.h> declares gettimeofday.  */
+#define $2 innocuous_$2
+
+/* System header to define __stub macros and hopefully few prototypes,
+   which can conflict with char $2 (); below.  */
+
+#include <limits.h>
+#undef $2
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char $2 ();
+/* The GNU C library defines this for functions which it implements
+    to always fail with ENOSYS.  Some functions are actually named
+    something starting with __ and the normal name is an alias.  */
+#if defined __stub_$2 || defined __stub___$2
+choke me
+#endif
+
+int
+main (void)
+{
+return $2 ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  eval "$3=yes"
+else $as_nop
+  eval "$3=no"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" 
>&5
+printf "%s\n" "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_func
+
+# ac_fn_c_try_run LINENO
+# ----------------------
+# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that
+# executables *can* be run.
+ac_fn_c_try_run ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && { ac_try='./conftest$ac_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+then :
+  ac_retval=0
+else $as_nop
+  printf "%s\n" "$as_me: program exited with status $ac_status" >&5
+       printf "%s\n" "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+       ac_retval=$ac_status
+fi
+  rm -rf conftest.dSYM conftest_ipa8_conftest.oo
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_try_run
+
+# ac_fn_c_compute_int LINENO EXPR VAR INCLUDES
+# --------------------------------------------
+# Tries to find the compile-time value of EXPR in a program that includes
+# INCLUDES, setting VAR accordingly. Returns whether the value could be
+# computed
+ac_fn_c_compute_int ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  if test "$cross_compiling" = yes; then
+    # Depending upon the size, compute the lo and hi bounds.
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main (void)
+{
+static int test_array [1 - 2 * !(($2) >= 0)];
+test_array [0] = 0;
+return test_array [0];
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_lo=0 ac_mid=0
+  while :; do
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main (void)
+{
+static int test_array [1 - 2 * !(($2) <= $ac_mid)];
+test_array [0] = 0;
+return test_array [0];
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_hi=$ac_mid; break
+else $as_nop
+  as_fn_arith $ac_mid + 1 && ac_lo=$as_val
+                       if test $ac_lo -le $ac_mid; then
+                         ac_lo= ac_hi=
+                         break
+                       fi
+                       as_fn_arith 2 '*' $ac_mid + 1 && ac_mid=$as_val
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+  done
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main (void)
+{
+static int test_array [1 - 2 * !(($2) < 0)];
+test_array [0] = 0;
+return test_array [0];
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_hi=-1 ac_mid=-1
+  while :; do
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main (void)
+{
+static int test_array [1 - 2 * !(($2) >= $ac_mid)];
+test_array [0] = 0;
+return test_array [0];
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_lo=$ac_mid; break
+else $as_nop
+  as_fn_arith '(' $ac_mid ')' - 1 && ac_hi=$as_val
+                       if test $ac_mid -le $ac_hi; then
+                         ac_lo= ac_hi=
+                         break
+                       fi
+                       as_fn_arith 2 '*' $ac_mid && ac_mid=$as_val
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+  done
+else $as_nop
+  ac_lo= ac_hi=
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+# Binary search between lo and hi bounds.
+while test "x$ac_lo" != "x$ac_hi"; do
+  as_fn_arith '(' $ac_hi - $ac_lo ')' / 2 + $ac_lo && ac_mid=$as_val
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main (void)
+{
+static int test_array [1 - 2 * !(($2) <= $ac_mid)];
+test_array [0] = 0;
+return test_array [0];
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_hi=$ac_mid
+else $as_nop
+  as_fn_arith '(' $ac_mid ')' + 1 && ac_lo=$as_val
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+done
+case $ac_lo in #((
+?*) eval "$3=\$ac_lo"; ac_retval=0 ;;
+'') ac_retval=1 ;;
+esac
+  else
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+static long int longval (void) { return $2; }
+static unsigned long int ulongval (void) { return $2; }
+#include <stdio.h>
+#include <stdlib.h>
+int
+main (void)
+{
+
+  FILE *f = fopen ("conftest.val", "w");
+  if (! f)
+    return 1;
+  if (($2) < 0)
+    {
+      long int i = longval ();
+      if (i != ($2))
+       return 1;
+      fprintf (f, "%ld", i);
+    }
+  else
+    {
+      unsigned long int i = ulongval ();
+      if (i != ($2))
+       return 1;
+      fprintf (f, "%lu", i);
+    }
+  /* Do not output a trailing newline, as this causes \r\n confusion
+     on some platforms.  */
+  return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"
+then :
+  echo >>conftest.val; read $3 <conftest.val; ac_retval=0
+else $as_nop
+  ac_retval=1
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+rm -f conftest.val
+
+  fi
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+  as_fn_set_status $ac_retval
+
+} # ac_fn_c_compute_int
+
+# ac_fn_c_check_type LINENO TYPE VAR INCLUDES
+# -------------------------------------------
+# Tests whether TYPE exists after having included INCLUDES, setting cache
+# variable VAR accordingly.
+ac_fn_c_check_type ()
+{
+  as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $2" >&5
+printf %s "checking for $2... " >&6; }
+if eval test \${$3+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  eval "$3=no"
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main (void)
+{
+if (sizeof ($2))
+        return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$4
+int
+main (void)
+{
+if (sizeof (($2)))
+           return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+else $as_nop
+  eval "$3=yes"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+eval ac_res=\$$3
+              { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_res" 
>&5
+printf "%s\n" "$ac_res" >&6; }
+  eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno
+
+} # ac_fn_c_check_type
+ac_configure_args_raw=
+for ac_arg
+do
+  case $ac_arg in
+  *\'*)
+    ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+  esac
+  as_fn_append ac_configure_args_raw " '$ac_arg'"
+done
+
+case $ac_configure_args_raw in
+  *$as_nl*)
+    ac_safe_unquote= ;;
+  *)
+    ac_unsafe_z='|&;<>()$`\\"*?[ ''    ' # This string ends in space, tab.
+    ac_unsafe_a="$ac_unsafe_z#~"
+    ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g"
+    ac_configure_args_raw=`      printf "%s\n" "$ac_configure_args_raw" | sed 
"$ac_safe_unquote"`;;
+esac
+
+cat >config.log <<_ACEOF
+This file contains any messages produced by compilers while
+running configure, to aid debugging if configure makes a mistake.
+
+It was created by libgcrypt $as_me 1.10.3, which was
+generated by GNU Autoconf 2.71.  Invocation command line was
+
+  $ $0$ac_configure_args_raw
+
+_ACEOF
+exec 5>>config.log
+{
+cat <<_ASUNAME
+## --------- ##
+## Platform. ##
+## --------- ##
+
+hostname = `(hostname || uname -n) 2>/dev/null | sed 1q`
+uname -m = `(uname -m) 2>/dev/null || echo unknown`
+uname -r = `(uname -r) 2>/dev/null || echo unknown`
+uname -s = `(uname -s) 2>/dev/null || echo unknown`
+uname -v = `(uname -v) 2>/dev/null || echo unknown`
+
+/usr/bin/uname -p = `(/usr/bin/uname -p) 2>/dev/null || echo unknown`
+/bin/uname -X     = `(/bin/uname -X) 2>/dev/null     || echo unknown`
+
+/bin/arch              = `(/bin/arch) 2>/dev/null              || echo unknown`
+/usr/bin/arch -k       = `(/usr/bin/arch -k) 2>/dev/null       || echo unknown`
+/usr/convex/getsysinfo = `(/usr/convex/getsysinfo) 2>/dev/null || echo unknown`
+/usr/bin/hostinfo      = `(/usr/bin/hostinfo) 2>/dev/null      || echo unknown`
+/bin/machine           = `(/bin/machine) 2>/dev/null           || echo unknown`
+/usr/bin/oslevel       = `(/usr/bin/oslevel) 2>/dev/null       || echo unknown`
+/bin/universe          = `(/bin/universe) 2>/dev/null          || echo unknown`
+
+_ASUNAME
+
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    printf "%s\n" "PATH: $as_dir"
+  done
+IFS=$as_save_IFS
+
+} >&5
+
+cat >&5 <<_ACEOF
+
+
+## ----------- ##
+## Core tests. ##
+## ----------- ##
+
+_ACEOF
+
+
+# Keep a trace of the command line.
+# Strip out --no-create and --no-recursion so they do not pile up.
+# Strip out --silent because we don't want to record it for future runs.
+# Also quote any args containing shell meta-characters.
+# Make two passes to allow for proper duplicate-argument suppression.
+ac_configure_args=
+ac_configure_args0=
+ac_configure_args1=
+ac_must_keep_next=false
+for ac_pass in 1 2
+do
+  for ac_arg
+  do
+    case $ac_arg in
+    -no-create | --no-c* | -n | -no-recursion | --no-r*) continue ;;
+    -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+    | -silent | --silent | --silen | --sile | --sil)
+      continue ;;
+    *\'*)
+      ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    case $ac_pass in
+    1) as_fn_append ac_configure_args0 " '$ac_arg'" ;;
+    2)
+      as_fn_append ac_configure_args1 " '$ac_arg'"
+      if test $ac_must_keep_next = true; then
+       ac_must_keep_next=false # Got value, back to normal.
+      else
+       case $ac_arg in
+         *=* | --config-cache | -C | -disable-* | --disable-* \
+         | -enable-* | --enable-* | -gas | --g* | -nfp | --nf* \
+         | -q | -quiet | --q* | -silent | --sil* | -v | -verb* \
+         | -with-* | --with-* | -without-* | --without-* | --x)
+           case "$ac_configure_args0 " in
+             "$ac_configure_args1"*" '$ac_arg' "* ) continue ;;
+           esac
+           ;;
+         -* ) ac_must_keep_next=true ;;
+       esac
+      fi
+      as_fn_append ac_configure_args " '$ac_arg'"
+      ;;
+    esac
+  done
+done
+{ ac_configure_args0=; unset ac_configure_args0;}
+{ ac_configure_args1=; unset ac_configure_args1;}
+
+# When interrupted or exit'd, cleanup temporary files, and complete
+# config.log.  We remove comments because anyway the quotes in there
+# would cause problems or look ugly.
+# WARNING: Use '\'' to represent an apostrophe within the trap.
+# WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug.
+trap 'exit_status=$?
+  # Sanitize IFS.
+  IFS=" ""     $as_nl"
+  # Save into config.log some information that might help in debugging.
+  {
+    echo
+
+    printf "%s\n" "## ---------------- ##
+## Cache variables. ##
+## ---------------- ##"
+    echo
+    # The following way of writing the cache mishandles newlines in values,
+(
+  for ac_var in `(set) 2>&1 | sed -n 
'\''s/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'\''`; do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache 
variable $ac_var contains a newline" >&5
+printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" 
>&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+  (set) 2>&1 |
+    case $as_nl`(ac_space='\'' '\''; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      sed -n \
+       "s/'\''/'\''\\\\'\'''\''/g;
+         
s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\''\\2'\''/p"
+      ;; #(
+    *)
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+)
+    echo
+
+    printf "%s\n" "## ----------------- ##
+## Output variables. ##
+## ----------------- ##"
+    echo
+    for ac_var in $ac_subst_vars
+    do
+      eval ac_val=\$$ac_var
+      case $ac_val in
+      *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed 
"s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+      esac
+      printf "%s\n" "$ac_var='\''$ac_val'\''"
+    done | sort
+    echo
+
+    if test -n "$ac_subst_files"; then
+      printf "%s\n" "## ------------------- ##
+## File substitutions. ##
+## ------------------- ##"
+      echo
+      for ac_var in $ac_subst_files
+      do
+       eval ac_val=\$$ac_var
+       case $ac_val in
+       *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed 
"s/'\''/'\''\\\\\\\\'\'''\''/g"`;;
+       esac
+       printf "%s\n" "$ac_var='\''$ac_val'\''"
+      done | sort
+      echo
+    fi
+
+    if test -s confdefs.h; then
+      printf "%s\n" "## ----------- ##
+## confdefs.h. ##
+## ----------- ##"
+      echo
+      cat confdefs.h
+      echo
+    fi
+    test "$ac_signal" != 0 &&
+      printf "%s\n" "$as_me: caught signal $ac_signal"
+    printf "%s\n" "$as_me: exit $exit_status"
+  } >&5
+  rm -f core *.core core.conftest.* &&
+    rm -f -r conftest* confdefs* conf$$* $ac_clean_files &&
+    exit $exit_status
+' 0
+for ac_signal in 1 2 13 15; do
+  trap 'ac_signal='$ac_signal'; as_fn_exit 1' $ac_signal
+done
+ac_signal=0
+
+# confdefs.h avoids OS command line length limits that DEFS can exceed.
+rm -f -r conftest* confdefs.h
+
+printf "%s\n" "/* confdefs.h */" > confdefs.h
+
+# Predefined preprocessor variables.
+
+printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h
+
+printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h
+
+
+# Let the site file select an alternate cache file if it wants to.
+# Prefer an explicitly selected file to automatically selected ones.
+if test -n "$CONFIG_SITE"; then
+  ac_site_files="$CONFIG_SITE"
+elif test "x$prefix" != xNONE; then
+  ac_site_files="$prefix/share/config.site $prefix/etc/config.site"
+else
+  ac_site_files="$ac_default_prefix/share/config.site 
$ac_default_prefix/etc/config.site"
+fi
+
+for ac_site_file in $ac_site_files
+do
+  case $ac_site_file in #(
+  */*) :
+     ;; #(
+  *) :
+    ac_site_file=./$ac_site_file ;;
+esac
+  if test -f "$ac_site_file" && test -r "$ac_site_file"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script 
$ac_site_file" >&5
+printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;}
+    sed 's/^/| /' "$ac_site_file" >&5
+    . "$ac_site_file" \
+      || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in 
\`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "failed to load site script $ac_site_file
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+done
+
+if test -r "$cache_file"; then
+  # Some versions of bash will fail to source /dev/null (special files
+  # actually), so we avoid doing that.  DJGPP emulates it as a regular file.
+  if test /dev/null != "$cache_file" && test -f "$cache_file"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" 
>&5
+printf "%s\n" "$as_me: loading cache $cache_file" >&6;}
+    case $cache_file in
+      [\\/]* | ?:[\\/]* ) . "$cache_file";;
+      *)                      . "./$cache_file";;
+    esac
+  fi
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5
+printf "%s\n" "$as_me: creating cache $cache_file" >&6;}
+  >$cache_file
+fi
+
+as_fn_append ac_header_c_list " stdio.h stdio_h HAVE_STDIO_H"
+# Test code for whether the C compiler supports C89 (global declarations)
+ac_c_conftest_c89_globals='
+/* Does the compiler advertise C89 conformance?
+   Do not test the value of __STDC__, because some compilers set it to 0
+   while being otherwise adequately conformant. */
+#if !defined __STDC__
+# error "Compiler does not advertise C89 conformance"
+#endif
+
+#include <stddef.h>
+#include <stdarg.h>
+struct stat;
+/* Most of the following tests are stolen from RCS 5.7 src/conf.sh.  */
+struct buf { int x; };
+struct buf * (*rcsopen) (struct buf *, struct stat *, int);
+static char *e (p, i)
+     char **p;
+     int i;
+{
+  return p[i];
+}
+static char *f (char * (*g) (char **, int), char **p, ...)
+{
+  char *s;
+  va_list v;
+  va_start (v,p);
+  s = g (p, va_arg (v,int));
+  va_end (v);
+  return s;
+}
+
+/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default.  It has
+   function prototypes and stuff, but not \xHH hex character constants.
+   These do not provoke an error unfortunately, instead are silently treated
+   as an "x".  The following induces an error, until -std is added to get
+   proper ANSI mode.  Curiously \x00 != x always comes out true, for an
+   array size at least.  It is necessary to write \x00 == 0 to get something
+   that is true only with -std.  */
+int osf4_cc_array ['\''\x00'\'' == 0 ? 1 : -1];
+
+/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters
+   inside strings and character constants.  */
+#define FOO(x) '\''x'\''
+int xlc6_cc_array[FOO(a) == '\''x'\'' ? 1 : -1];
+
+int test (int i, double x);
+struct s1 {int (*f) (int a);};
+struct s2 {int (*f) (double a);};
+int pairnames (int, char **, int *(*)(struct buf *, struct stat *, int),
+               int, int);'
+
+# Test code for whether the C compiler supports C89 (body of main).
+ac_c_conftest_c89_main='
+ok |= (argc == 0 || f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]);
+'
+
+# Test code for whether the C compiler supports C99 (global declarations)
+ac_c_conftest_c99_globals='
+// Does the compiler advertise C99 conformance?
+#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 199901L
+# error "Compiler does not advertise C99 conformance"
+#endif
+
+#include <stdbool.h>
+extern int puts (const char *);
+extern int printf (const char *, ...);
+extern int dprintf (int, const char *, ...);
+extern void *malloc (size_t);
+
+// Check varargs macros.  These examples are taken from C99 6.10.3.5.
+// dprintf is used instead of fprintf to avoid needing to declare
+// FILE and stderr.
+#define debug(...) dprintf (2, __VA_ARGS__)
+#define showlist(...) puts (#__VA_ARGS__)
+#define report(test,...) ((test) ? puts (#test) : printf (__VA_ARGS__))
+static void
+test_varargs_macros (void)
+{
+  int x = 1234;
+  int y = 5678;
+  debug ("Flag");
+  debug ("X = %d\n", x);
+  showlist (The first, second, and third items.);
+  report (x>y, "x is %d but y is %d", x, y);
+}
+
+// Check long long types.
+#define BIG64 18446744073709551615ull
+#define BIG32 4294967295ul
+#define BIG_OK (BIG64 / BIG32 == 4294967297ull && BIG64 % BIG32 == 0)
+#if !BIG_OK
+  #error "your preprocessor is broken"
+#endif
+#if BIG_OK
+#else
+  #error "your preprocessor is broken"
+#endif
+static long long int bignum = -9223372036854775807LL;
+static unsigned long long int ubignum = BIG64;
+
+struct incomplete_array
+{
+  int datasize;
+  double data[];
+};
+
+struct named_init {
+  int number;
+  const wchar_t *name;
+  double average;
+};
+
+typedef const char *ccp;
+
+static inline int
+test_restrict (ccp restrict text)
+{
+  // See if C++-style comments work.
+  // Iterate through items via the restricted pointer.
+  // Also check for declarations in for loops.
+  for (unsigned int i = 0; *(text+i) != '\''\0'\''; ++i)
+    continue;
+  return 0;
+}
+
+// Check varargs and va_copy.
+static bool
+test_varargs (const char *format, ...)
+{
+  va_list args;
+  va_start (args, format);
+  va_list args_copy;
+  va_copy (args_copy, args);
+
+  const char *str = "";
+  int number = 0;
+  float fnumber = 0;
+
+  while (*format)
+    {
+      switch (*format++)
+       {
+       case '\''s'\'': // string
+         str = va_arg (args_copy, const char *);
+         break;
+       case '\''d'\'': // int
+         number = va_arg (args_copy, int);
+         break;
+       case '\''f'\'': // float
+         fnumber = va_arg (args_copy, double);
+         break;
+       default:
+         break;
+       }
+    }
+  va_end (args_copy);
+  va_end (args);
+
+  return *str && number && fnumber;
+}
+'
+
+# Test code for whether the C compiler supports C99 (body of main).
+ac_c_conftest_c99_main='
+  // Check bool.
+  _Bool success = false;
+  success |= (argc != 0);
+
+  // Check restrict.
+  if (test_restrict ("String literal") == 0)
+    success = true;
+  char *restrict newvar = "Another string";
+
+  // Check varargs.
+  success &= test_varargs ("s, d'\'' f .", "string", 65, 34.234);
+  test_varargs_macros ();
+
+  // Check flexible array members.
+  struct incomplete_array *ia =
+    malloc (sizeof (struct incomplete_array) + (sizeof (double) * 10));
+  ia->datasize = 10;
+  for (int i = 0; i < ia->datasize; ++i)
+    ia->data[i] = i * 1.234;
+
+  // Check named initializers.
+  struct named_init ni = {
+    .number = 34,
+    .name = L"Test wide string",
+    .average = 543.34343,
+  };
+
+  ni.number = 58;
+
+  int dynamic_array[ni.number];
+  dynamic_array[0] = argv[0][0];
+  dynamic_array[ni.number - 1] = 543;
+
+  // work around unused variable warnings
+  ok |= (!success || bignum == 0LL || ubignum == 0uLL || newvar[0] == '\''x'\''
+        || dynamic_array[ni.number - 1] != 543);
+'
+
+# Test code for whether the C compiler supports C11 (global declarations)
+ac_c_conftest_c11_globals='
+// Does the compiler advertise C11 conformance?
+#if !defined __STDC_VERSION__ || __STDC_VERSION__ < 201112L
+# error "Compiler does not advertise C11 conformance"
+#endif
+
+// Check _Alignas.
+char _Alignas (double) aligned_as_double;
+char _Alignas (0) no_special_alignment;
+extern char aligned_as_int;
+char _Alignas (0) _Alignas (int) aligned_as_int;
+
+// Check _Alignof.
+enum
+{
+  int_alignment = _Alignof (int),
+  int_array_alignment = _Alignof (int[100]),
+  char_alignment = _Alignof (char)
+};
+_Static_assert (0 < -_Alignof (int), "_Alignof is signed");
+
+// Check _Noreturn.
+int _Noreturn does_not_return (void) { for (;;) continue; }
+
+// Check _Static_assert.
+struct test_static_assert
+{
+  int x;
+  _Static_assert (sizeof (int) <= sizeof (long int),
+                  "_Static_assert does not work in struct");
+  long int y;
+};
+
+// Check UTF-8 literals.
+#define u8 syntax error!
+char const utf8_literal[] = u8"happens to be ASCII" "another string";
+
+// Check duplicate typedefs.
+typedef long *long_ptr;
+typedef long int *long_ptr;
+typedef long_ptr long_ptr;
+
+// Anonymous structures and unions -- taken from C11 6.7.2.1 Example 1.
+struct anonymous
+{
+  union {
+    struct { int i; int j; };
+    struct { int k; long int l; } w;
+  };
+  int m;
+} v1;
+'
+
+# Test code for whether the C compiler supports C11 (body of main).
+ac_c_conftest_c11_main='
+  _Static_assert ((offsetof (struct anonymous, i)
+                  == offsetof (struct anonymous, w.k)),
+                 "Anonymous union alignment botch");
+  v1.i = 2;
+  v1.w.k = 5;
+  ok |= v1.i != 5;
+'
+
+# Test code for whether the C compiler supports C11 (complete).
+ac_c_conftest_c11_program="${ac_c_conftest_c89_globals}
+${ac_c_conftest_c99_globals}
+${ac_c_conftest_c11_globals}
+
+int
+main (int argc, char **argv)
+{
+  int ok = 0;
+  ${ac_c_conftest_c89_main}
+  ${ac_c_conftest_c99_main}
+  ${ac_c_conftest_c11_main}
+  return ok;
+}
+"
+
+# Test code for whether the C compiler supports C99 (complete).
+ac_c_conftest_c99_program="${ac_c_conftest_c89_globals}
+${ac_c_conftest_c99_globals}
+
+int
+main (int argc, char **argv)
+{
+  int ok = 0;
+  ${ac_c_conftest_c89_main}
+  ${ac_c_conftest_c99_main}
+  return ok;
+}
+"
+
+# Test code for whether the C compiler supports C89 (complete).
+ac_c_conftest_c89_program="${ac_c_conftest_c89_globals}
+
+int
+main (int argc, char **argv)
+{
+  int ok = 0;
+  ${ac_c_conftest_c89_main}
+  return ok;
+}
+"
+
+as_fn_append ac_header_c_list " stdlib.h stdlib_h HAVE_STDLIB_H"
+as_fn_append ac_header_c_list " string.h string_h HAVE_STRING_H"
+as_fn_append ac_header_c_list " inttypes.h inttypes_h HAVE_INTTYPES_H"
+as_fn_append ac_header_c_list " stdint.h stdint_h HAVE_STDINT_H"
+as_fn_append ac_header_c_list " strings.h strings_h HAVE_STRINGS_H"
+as_fn_append ac_header_c_list " sys/stat.h sys_stat_h HAVE_SYS_STAT_H"
+as_fn_append ac_header_c_list " sys/types.h sys_types_h HAVE_SYS_TYPES_H"
+as_fn_append ac_header_c_list " unistd.h unistd_h HAVE_UNISTD_H"
+as_fn_append ac_header_c_list " wchar.h wchar_h HAVE_WCHAR_H"
+as_fn_append ac_header_c_list " minix/config.h minix_config_h 
HAVE_MINIX_CONFIG_H"
+as_fn_append ac_header_c_list " sys/socket.h sys_socket_h HAVE_SYS_SOCKET_H"
+as_fn_append ac_func_c_list " vprintf HAVE_VPRINTF"
+
+# Auxiliary files required by this configure script.
+ac_aux_files="ltmain.sh compile config.guess config.sub missing install-sh"
+
+# Locations in which to look for auxiliary files.
+ac_aux_dir_candidates="${srcdir}/build-aux"
+
+# Search for a directory containing all of the required auxiliary files,
+# $ac_aux_files, from the $PATH-style list $ac_aux_dir_candidates.
+# If we don't find one directory that contains all the files we need,
+# we report the set of missing files from the *first* directory in
+# $ac_aux_dir_candidates and give up.
+ac_missing_aux_files=""
+ac_first_candidate=:
+printf "%s\n" "$as_me:${as_lineno-$LINENO}: looking for aux files: 
$ac_aux_files" >&5
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_found=false
+for as_dir in $ac_aux_dir_candidates
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+  as_found=:
+
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}:  trying $as_dir" >&5
+  ac_aux_dir_found=yes
+  ac_install_sh=
+  for ac_aux in $ac_aux_files
+  do
+    # As a special case, if "install-sh" is required, that requirement
+    # can be satisfied by any of "install-sh", "install.sh", or "shtool",
+    # and $ac_install_sh is set appropriately for whichever one is found.
+    if test x"$ac_aux" = x"install-sh"
+    then
+      if test -f "${as_dir}install-sh"; then
+        printf "%s\n" "$as_me:${as_lineno-$LINENO}:   ${as_dir}install-sh 
found" >&5
+        ac_install_sh="${as_dir}install-sh -c"
+      elif test -f "${as_dir}install.sh"; then
+        printf "%s\n" "$as_me:${as_lineno-$LINENO}:   ${as_dir}install.sh 
found" >&5
+        ac_install_sh="${as_dir}install.sh -c"
+      elif test -f "${as_dir}shtool"; then
+        printf "%s\n" "$as_me:${as_lineno-$LINENO}:   ${as_dir}shtool found" 
>&5
+        ac_install_sh="${as_dir}shtool install -c"
+      else
+        ac_aux_dir_found=no
+        if $ac_first_candidate; then
+          ac_missing_aux_files="${ac_missing_aux_files} install-sh"
+        else
+          break
+        fi
+      fi
+    else
+      if test -f "${as_dir}${ac_aux}"; then
+        printf "%s\n" "$as_me:${as_lineno-$LINENO}:   ${as_dir}${ac_aux} 
found" >&5
+      else
+        ac_aux_dir_found=no
+        if $ac_first_candidate; then
+          ac_missing_aux_files="${ac_missing_aux_files} ${ac_aux}"
+        else
+          break
+        fi
+      fi
+    fi
+  done
+  if test "$ac_aux_dir_found" = yes; then
+    ac_aux_dir="$as_dir"
+    break
+  fi
+  ac_first_candidate=false
+
+  as_found=false
+done
+IFS=$as_save_IFS
+if $as_found
+then :
+
+else $as_nop
+  as_fn_error $? "cannot find required auxiliary files:$ac_missing_aux_files" 
"$LINENO" 5
+fi
+
+
+# These three variables are undocumented and unsupported,
+# and are intended to be withdrawn in a future Autoconf release.
+# They can cause serious problems if a builder's source tree is in a directory
+# whose full name contains unusual characters.
+if test -f "${ac_aux_dir}config.guess"; then
+  ac_config_guess="$SHELL ${ac_aux_dir}config.guess"
+fi
+if test -f "${ac_aux_dir}config.sub"; then
+  ac_config_sub="$SHELL ${ac_aux_dir}config.sub"
+fi
+if test -f "$ac_aux_dir/configure"; then
+  ac_configure="$SHELL ${ac_aux_dir}configure"
+fi
+
+# Check that the precious variables saved in the cache have kept the same
+# value.
+ac_cache_corrupted=false
+for ac_var in $ac_precious_vars; do
+  eval ac_old_set=\$ac_cv_env_${ac_var}_set
+  eval ac_new_set=\$ac_env_${ac_var}_set
+  eval ac_old_val=\$ac_cv_env_${ac_var}_value
+  eval ac_new_val=\$ac_env_${ac_var}_value
+  case $ac_old_set,$ac_new_set in
+    set,)
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set 
to \`$ac_old_val' in the previous run" >&5
+printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the 
previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,set)
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not 
set in the previous run" >&5
+printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;}
+      ac_cache_corrupted=: ;;
+    ,);;
+    *)
+      if test "x$ac_old_val" != "x$ac_new_val"; then
+       # differences in whitespace do not lead to failure.
+       ac_old_val_w=`echo x $ac_old_val`
+       ac_new_val_w=`echo x $ac_new_val`
+       if test "$ac_old_val_w" != "$ac_new_val_w"; then
+         { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has 
changed since the previous run:" >&5
+printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" 
>&2;}
+         ac_cache_corrupted=:
+       else
+         { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring 
whitespace changes in \`$ac_var' since the previous run:" >&5
+printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' 
since the previous run:" >&2;}
+         eval $ac_var=\$ac_old_val
+       fi
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}:   former value:  
\`$ac_old_val'" >&5
+printf "%s\n" "$as_me:   former value:  \`$ac_old_val'" >&2;}
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}:   current value: 
\`$ac_new_val'" >&5
+printf "%s\n" "$as_me:   current value: \`$ac_new_val'" >&2;}
+      fi;;
+  esac
+  # Pass precious variables to config.status.
+  if test "$ac_new_set" = set; then
+    case $ac_new_val in
+    *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed 
"s/'/'\\\\\\\\''/g"` ;;
+    *) ac_arg=$ac_var=$ac_new_val ;;
+    esac
+    case " $ac_configure_args " in
+      *" '$ac_arg' "*) ;; # Avoid dups.  Use of quotes ensures accuracy.
+      *) as_fn_append ac_configure_args " '$ac_arg'" ;;
+    esac
+  fi
+done
+if $ac_cache_corrupted; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the 
environment can compromise the build" >&5
+printf "%s\n" "$as_me: error: changes in the environment can compromise the 
build" >&2;}
+  as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file'
+           and start over" "$LINENO" 5
+fi
+## -------------------- ##
+## Main body of script. ##
+## -------------------- ##
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+
+# LT Version numbers, remember to change them just *before* a release.
+#   (Code changed:                     REVISION++)
+#   (Interfaces added/removed/changed: CURRENT++, REVISION=0)
+#   (Interfaces added:                 AGE++)
+#   (Interfaces removed:               AGE=0)
+#
+#   (Interfaces removed:    CURRENT++, AGE=0, REVISION=0)
+#   (Interfaces added:      CURRENT++, AGE++, REVISION=0)
+#   (No interfaces changed:                   REVISION++)
+LIBGCRYPT_LT_CURRENT=24
+LIBGCRYPT_LT_AGE=4
+LIBGCRYPT_LT_REVISION=3
+################################################
+
+
+
+
+
+# If the API is changed in an incompatible way: increment the next counter.
+#
+# 1.6: ABI and API change but the change is to most users irrelevant
+#      and thus the API version number has not been incremented.
+LIBGCRYPT_CONFIG_API_VERSION=1
+
+# If you change the required gpg-error version, please remove
+# unnecessary error code defines in src/gcrypt-int.h.
+NEED_GPG_ERROR_VERSION=1.27
+
+
+
+am__api_version='1.16'
+
+
+
+  # Find a good install program.  We prefer a C program (faster),
+# so one script is as good as another.  But avoid the broken or
+# incompatible versions:
+# SysV /etc/install, /usr/sbin/install
+# SunOS /usr/etc/install
+# IRIX /sbin/install
+# AIX /bin/install
+# AmigaOS /C/install, which installs bootblocks on floppy discs
+# AIX 4 /usr/bin/installbsd, which doesn't work without a -g flag
+# AFS /usr/afsws/bin/install, which mishandles nonexistent args
+# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff"
+# OS/2's system install, which has a completely different semantic
+# ./install, which can be erroneously created by make from ./install.sh.
+# Reject install programs that cannot install multiple files.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a BSD-compatible 
install" >&5
+printf %s "checking for a BSD-compatible install... " >&6; }
+if test -z "$INSTALL"; then
+if test ${ac_cv_path_install+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    # Account for fact that we put trailing slashes in our PATH walk.
+case $as_dir in #((
+  ./ | /[cC]/* | \
+  /etc/* | /usr/sbin/* | /usr/etc/* | /sbin/* | /usr/afsws/bin/* | \
+  ?:[\\/]os2[\\/]install[\\/]* | ?:[\\/]OS2[\\/]INSTALL[\\/]* | \
+  /usr/ucb/* ) ;;
+  *)
+    # OSF1 and SCO ODT 3.0 have their own names for install.
+    # Don't use installbsd from OSF since it installs stuff as root
+    # by default.
+    for ac_prog in ginstall scoinst install; do
+      for ac_exec_ext in '' $ac_executable_extensions; do
+       if as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext"; then
+         if test $ac_prog = install &&
+           grep dspmsg "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+           # AIX install.  It has an incompatible calling convention.
+           :
+         elif test $ac_prog = install &&
+           grep pwplus "$as_dir$ac_prog$ac_exec_ext" >/dev/null 2>&1; then
+           # program-specific install script used by HP pwplus--don't use.
+           :
+         else
+           rm -rf conftest.one conftest.two conftest.dir
+           echo one > conftest.one
+           echo two > conftest.two
+           mkdir conftest.dir
+           if "$as_dir$ac_prog$ac_exec_ext" -c conftest.one conftest.two 
"`pwd`/conftest.dir/" &&
+             test -s conftest.one && test -s conftest.two &&
+             test -s conftest.dir/conftest.one &&
+             test -s conftest.dir/conftest.two
+           then
+             ac_cv_path_install="$as_dir$ac_prog$ac_exec_ext -c"
+             break 3
+           fi
+         fi
+       fi
+      done
+    done
+    ;;
+esac
+
+  done
+IFS=$as_save_IFS
+
+rm -rf conftest.one conftest.two conftest.dir
+
+fi
+  if test ${ac_cv_path_install+y}; then
+    INSTALL=$ac_cv_path_install
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for INSTALL within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    INSTALL=$ac_install_sh
+  fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $INSTALL" >&5
+printf "%s\n" "$INSTALL" >&6; }
+
+# Use test -z because SunOS4 sh mishandles braces in ${var-val}.
+# It thinks the first close brace ends the variable substitution.
+test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}'
+
+test -z "$INSTALL_SCRIPT" && INSTALL_SCRIPT='${INSTALL}'
+
+test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644'
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether build 
environment is sane" >&5
+printf %s "checking whether build environment is sane... " >&6; }
+# Reject unsafe characters in $srcdir or the absolute working directory
+# name.  Accept space and tab only in the latter.
+am_lf='
+'
+case `pwd` in
+  *[\\\"\#\$\&\'\`$am_lf]*)
+    as_fn_error $? "unsafe absolute working directory name" "$LINENO" 5;;
+esac
+case $srcdir in
+  *[\\\"\#\$\&\'\`$am_lf\ \    ]*)
+    as_fn_error $? "unsafe srcdir value: '$srcdir'" "$LINENO" 5;;
+esac
+
+# Do 'set' in a subshell so we don't clobber the current shell's
+# arguments.  Must try -L first in case configure is actually a
+# symlink; some systems play weird games with the mod time of symlinks
+# (eg FreeBSD returns the mod time of the symlink's containing
+# directory).
+if (
+   am_has_slept=no
+   for am_try in 1 2; do
+     echo "timestamp, slept: $am_has_slept" > conftest.file
+     set X `ls -Lt "$srcdir/configure" conftest.file 2> /dev/null`
+     if test "$*" = "X"; then
+       # -L didn't work.
+       set X `ls -t "$srcdir/configure" conftest.file`
+     fi
+     if test "$*" != "X $srcdir/configure conftest.file" \
+       && test "$*" != "X conftest.file $srcdir/configure"; then
+
+       # If neither matched, then we have a broken ls.  This can happen
+       # if, for instance, CONFIG_SHELL is bash and it inherits a
+       # broken ls alias from the environment.  This has actually
+       # happened.  Such a system could not be considered "sane".
+       as_fn_error $? "ls -t appears to fail.  Make sure there is not a broken
+  alias in your environment" "$LINENO" 5
+     fi
+     if test "$2" = conftest.file || test $am_try -eq 2; then
+       break
+     fi
+     # Just in case.
+     sleep 1
+     am_has_slept=yes
+   done
+   test "$2" = conftest.file
+   )
+then
+   # Ok.
+   :
+else
+   as_fn_error $? "newly created file is older than distributed files!
+Check your system clock" "$LINENO" 5
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+# If we didn't sleep, we still need to ensure time stamps of config.status and
+# generated files are strictly newer.
+am_sleep_pid=
+if grep 'slept: no' conftest.file >/dev/null 2>&1; then
+  ( sleep 1 ) &
+  am_sleep_pid=$!
+fi
+
+rm -f conftest.file
+
+test "$program_prefix" != NONE &&
+  program_transform_name="s&^&$program_prefix&;$program_transform_name"
+# Use a double $ so make ignores it.
+test "$program_suffix" != NONE &&
+  program_transform_name="s&\$&$program_suffix&;$program_transform_name"
+# Double any \ or $.
+# By default was `s,x,x', remove it if useless.
+ac_script='s/[\\$]/&&/g;s/;s,x,x,$//'
+program_transform_name=`printf "%s\n" "$program_transform_name" | sed 
"$ac_script"`
+
+
+# Expand $ac_aux_dir to an absolute path.
+am_aux_dir=`cd "$ac_aux_dir" && pwd`
+
+
+  if test x"${MISSING+set}" != xset; then
+  MISSING="\${SHELL} '$am_aux_dir/missing'"
+fi
+# Use eval to expand $SHELL
+if eval "$MISSING --is-lightweight"; then
+  am_missing_run="$MISSING "
+else
+  am_missing_run=
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: 'missing' script is 
too old or missing" >&5
+printf "%s\n" "$as_me: WARNING: 'missing' script is too old or missing" >&2;}
+fi
+
+if test x"${install_sh+set}" != xset; then
+  case $am_aux_dir in
+  *\ * | *\    *)
+    install_sh="\${SHELL} '$am_aux_dir/install-sh'" ;;
+  *)
+    install_sh="\${SHELL} $am_aux_dir/install-sh"
+  esac
+fi
+
+# Installed binaries are usually stripped using 'strip' when the user
+# run "make install-strip".  However 'strip' might not be the right
+# tool to use in cross-compilation environments, therefore Automake
+# will honor the 'STRIP' environment variable to overrule this program.
+if test "$cross_compiling" != no; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_STRIP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+printf "%s\n" "$STRIP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_STRIP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+printf "%s\n" "$ac_ct_STRIP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+fi
+INSTALL_STRIP_PROGRAM="\$(install_sh) -c -s"
+
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a race-free mkdir 
-p" >&5
+printf %s "checking for a race-free mkdir -p... " >&6; }
+if test -z "$MKDIR_P"; then
+  if test ${ac_cv_path_mkdir+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/opt/sfw/bin
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_prog in mkdir gmkdir; do
+        for ac_exec_ext in '' $ac_executable_extensions; do
+          as_fn_executable_p "$as_dir$ac_prog$ac_exec_ext" || continue
+          case `"$as_dir$ac_prog$ac_exec_ext" --version 2>&1` in #(
+            'mkdir ('*'coreutils) '* | \
+            'BusyBox '* | \
+            'mkdir (fileutils) '4.1*)
+              ac_cv_path_mkdir=$as_dir$ac_prog$ac_exec_ext
+              break 3;;
+          esac
+        done
+       done
+  done
+IFS=$as_save_IFS
+
+fi
+
+  test -d ./--version && rmdir ./--version
+  if test ${ac_cv_path_mkdir+y}; then
+    MKDIR_P="$ac_cv_path_mkdir -p"
+  else
+    # As a last resort, use the slow shell script.  Don't cache a
+    # value for MKDIR_P within a source directory, because that will
+    # break other packages using the cache if that directory is
+    # removed, or if the value is a relative name.
+    MKDIR_P="$ac_install_sh -d"
+  fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MKDIR_P" >&5
+printf "%s\n" "$MKDIR_P" >&6; }
+
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with 
args.
+set dummy $ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_AWK+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AWK="$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+printf "%s\n" "$AWK" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} 
sets \$(MAKE)" >&5
+printf %s "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
+set x ${MAKE-make}
+ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
+if eval test \${ac_cv_prog_make_${ac_make}_set+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat >conftest.make <<\_ACEOF
+SHELL = /bin/sh
+all:
+       @echo '@@@%%%=$(MAKE)=@@@%%%'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
+case `${MAKE-make} -f conftest.make 2>/dev/null` in
+  *@@@%%%=?*=@@@%%%*)
+    eval ac_cv_prog_make_${ac_make}_set=yes;;
+  *)
+    eval ac_cv_prog_make_${ac_make}_set=no;;
+esac
+rm -f conftest.make
+fi
+if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+  SET_MAKE=
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+rm -rf .tst 2>/dev/null
+mkdir .tst 2>/dev/null
+if test -d .tst; then
+  am__leading_dot=.
+else
+  am__leading_dot=_
+fi
+rmdir .tst 2>/dev/null
+
+# Check whether --enable-silent-rules was given.
+if test ${enable_silent_rules+y}
+then :
+  enableval=$enable_silent_rules;
+fi
+
+case $enable_silent_rules in # (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=1;;
+esac
+am_make=${MAKE-make}
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make 
supports nested variables" >&5
+printf %s "checking whether $am_make supports nested variables... " >&6; }
+if test ${am_cv_make_support_nested_variables+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if printf "%s\n" 'TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+       @$(TRUE)
+.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$am_cv_make_support_nested_variables" >&5
+printf "%s\n" "$am_cv_make_support_nested_variables" >&6; }
+if test $am_cv_make_support_nested_variables = yes; then
+    AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AM_BACKSLASH='\'
+
+if test "`cd $srcdir && pwd`" != "`pwd`"; then
+  # Use -I$(srcdir) only when $(srcdir) != ., so that make's output
+  # is not polluted with repeated "-I."
+  am__isrc=' -I$(srcdir)'
+  # test to see if srcdir already configured
+  if test -f $srcdir/config.status; then
+    as_fn_error $? "source directory already configured; run \"make 
distclean\" there first" "$LINENO" 5
+  fi
+fi
+
+# test whether we have cygpath
+if test -z "$CYGPATH_W"; then
+  if (cygpath --version) >/dev/null 2>/dev/null; then
+    CYGPATH_W='cygpath -w'
+  else
+    CYGPATH_W=echo
+  fi
+fi
+
+
+# Define the identity of the package.
+ PACKAGE='libgcrypt'
+ VERSION='1.10.3'
+
+
+printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
+
+
+printf "%s\n" "#define VERSION \"$VERSION\"" >>confdefs.h
+
+# Some tools Automake needs.
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal-${am__api_version}"}
+
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake-${am__api_version}"}
+
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+
+MAKEINFO=${MAKEINFO-"${am_missing_run}makeinfo"}
+
+# For better backward compatibility.  To be removed once Automake 1.9.x
+# dies out for good.  For more background, see:
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00001.html>
+# <https://lists.gnu.org/archive/html/automake/2012-07/msg00014.html>
+mkdir_p='$(MKDIR_P)'
+
+# We need awk for the "check" target (and possibly the TAP driver).  The
+# system "awk" is bad on some platforms.
+# Always define AMTAR for backward compatibility.  Yes, it's still used
+# in the wild :-(  We should find a proper way to deprecate it ...
+AMTAR='$${TAR-tar}'
+
+
+# We'll loop over all known methods to create a tar archive until one works.
+_am_tools='gnutar  pax cpio none'
+
+am__tar='$${TAR-tar} chof - "$$tardir"' am__untar='$${TAR-tar} xf -'
+
+
+
+
+
+# Variables for tags utilities; see am/tags.am
+if test -z "$CTAGS"; then
+  CTAGS=ctags
+fi
+
+if test -z "$ETAGS"; then
+  ETAGS=etags
+fi
+
+if test -z "$CSCOPE"; then
+  CSCOPE=cscope
+fi
+
+
+
+# POSIX will say in a future version that running "rm -f" with no argument
+# is OK; and we want to be able to make that assumption in our Makefile
+# recipes.  So use an aggressive probe to check that the usage we want is
+# actually supported "in the wild" to an acceptable degree.
+# See automake bug#10828.
+# To make any issue more visible, cause the running configure to be aborted
+# by default if the 'rm' program in use doesn't match our expectations; the
+# user can still override this though.
+if rm -f && rm -fr && rm -rf; then : OK; else
+  cat >&2 <<'END'
+Oops!
+
+Your 'rm' program seems unable to run without file operands specified
+on the command line, even when the '-f' option is present.  This is contrary
+to the behaviour of most rm programs out there, and not conforming with
+the upcoming POSIX standard: <http://austingroupbugs.net/view.php?id=542>
+
+Please tell bug-automake@gnu.org about your system, including the value
+of your $PATH and any error possibly output before this message.  This
+can help us improve future automake versions.
+
+END
+  if test x"$ACCEPT_INFERIOR_RM_PROGRAM" = x"yes"; then
+    echo 'Configuration will proceed anyway, since you have set the' >&2
+    echo 'ACCEPT_INFERIOR_RM_PROGRAM variable to "yes"' >&2
+    echo >&2
+  else
+    cat >&2 <<'END'
+Aborting the configuration process, to ensure you take notice of the issue.
+
+You can download and install GNU coreutils to get an 'rm' implementation
+that behaves properly: <https://www.gnu.org/software/coreutils/>.
+
+If you want to complete the configuration process using your problematic
+'rm' anyway, export the environment variable ACCEPT_INFERIOR_RM_PROGRAM
+to "yes", and re-run configure.
+
+END
+    as_fn_error $? "Your 'rm' program is bad, sorry." "$LINENO" 5
+  fi
+fi
+
+ac_config_headers="$ac_config_headers config.h"
+
+
+
+
+
+  # Make sure we can run config.sub.
+$SHELL "${ac_aux_dir}config.sub" sun4 >/dev/null 2>&1 ||
+  as_fn_error $? "cannot run $SHELL ${ac_aux_dir}config.sub" "$LINENO" 5
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking build system type" >&5
+printf %s "checking build system type... " >&6; }
+if test ${ac_cv_build+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_build_alias=$build_alias
+test "x$ac_build_alias" = x &&
+  ac_build_alias=`$SHELL "${ac_aux_dir}config.guess"`
+test "x$ac_build_alias" = x &&
+  as_fn_error $? "cannot guess build type; you must specify one" "$LINENO" 5
+ac_cv_build=`$SHELL "${ac_aux_dir}config.sub" $ac_build_alias` ||
+  as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $ac_build_alias failed" 
"$LINENO" 5
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_build" >&5
+printf "%s\n" "$ac_cv_build" >&6; }
+case $ac_cv_build in
+*-*-*) ;;
+*) as_fn_error $? "invalid value of canonical build" "$LINENO" 5;;
+esac
+build=$ac_cv_build
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_build
+shift
+build_cpu=$1
+build_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+build_os=$*
+IFS=$ac_save_IFS
+case $build_os in *\ *) build_os=`echo "$build_os" | sed 's/ /-/g'`;; esac
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking host system type" >&5
+printf %s "checking host system type... " >&6; }
+if test ${ac_cv_host+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "x$host_alias" = x; then
+  ac_cv_host=$ac_cv_build
+else
+  ac_cv_host=`$SHELL "${ac_aux_dir}config.sub" $host_alias` ||
+    as_fn_error $? "$SHELL ${ac_aux_dir}config.sub $host_alias failed" 
"$LINENO" 5
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_host" >&5
+printf "%s\n" "$ac_cv_host" >&6; }
+case $ac_cv_host in
+*-*-*) ;;
+*) as_fn_error $? "invalid value of canonical host" "$LINENO" 5;;
+esac
+host=$ac_cv_host
+ac_save_IFS=$IFS; IFS='-'
+set x $ac_cv_host
+shift
+host_cpu=$1
+host_vendor=$2
+shift; shift
+# Remember, the first character of IFS is used to create $*,
+# except with old shells:
+host_os=$*
+IFS=$ac_save_IFS
+case $host_os in *\ *) host_os=`echo "$host_os" | sed 's/ /-/g'`;; esac
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to enable 
maintainer-specific portions of Makefiles" >&5
+printf %s "checking whether to enable maintainer-specific portions of 
Makefiles... " >&6; }
+    # Check whether --enable-maintainer-mode was given.
+if test ${enable_maintainer_mode+y}
+then :
+  enableval=$enable_maintainer_mode; USE_MAINTAINER_MODE=$enableval
+else $as_nop
+  USE_MAINTAINER_MODE=no
+fi
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $USE_MAINTAINER_MODE" 
>&5
+printf "%s\n" "$USE_MAINTAINER_MODE" >&6; }
+   if test $USE_MAINTAINER_MODE = yes; then
+  MAINTAINER_MODE_TRUE=
+  MAINTAINER_MODE_FALSE='#'
+else
+  MAINTAINER_MODE_TRUE='#'
+  MAINTAINER_MODE_FALSE=
+fi
+
+  MAINT=$MAINTAINER_MODE_TRUE
+
+
+# Check whether --enable-silent-rules was given.
+if test ${enable_silent_rules+y}
+then :
+  enableval=$enable_silent_rules;
+fi
+
+case $enable_silent_rules in # (((
+  yes) AM_DEFAULT_VERBOSITY=0;;
+   no) AM_DEFAULT_VERBOSITY=1;;
+    *) AM_DEFAULT_VERBOSITY=1;;
+esac
+am_make=${MAKE-make}
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $am_make 
supports nested variables" >&5
+printf %s "checking whether $am_make supports nested variables... " >&6; }
+if test ${am_cv_make_support_nested_variables+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if printf "%s\n" 'TRUE=$(BAR$(V))
+BAR0=false
+BAR1=true
+V=1
+am__doit:
+       @$(TRUE)
+.PHONY: am__doit' | $am_make -f - >/dev/null 2>&1; then
+  am_cv_make_support_nested_variables=yes
+else
+  am_cv_make_support_nested_variables=no
+fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$am_cv_make_support_nested_variables" >&5
+printf "%s\n" "$am_cv_make_support_nested_variables" >&6; }
+if test $am_cv_make_support_nested_variables = yes; then
+    AM_V='$(V)'
+  AM_DEFAULT_V='$(AM_DEFAULT_VERBOSITY)'
+else
+  AM_V=$AM_DEFAULT_VERBOSITY
+  AM_DEFAULT_V=$AM_DEFAULT_VERBOSITY
+fi
+AM_BACKSLASH='\'
+
+
+
+
+
+
+
+
+
+
+DEPDIR="${am__leading_dot}deps"
+
+ac_config_commands="$ac_config_commands depfiles"
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} 
supports the include directive" >&5
+printf %s "checking whether ${MAKE-make} supports the include directive... " 
>&6; }
+cat > confinc.mk << 'END'
+am__doit:
+       @echo this is the am__doit target >confinc.out
+.PHONY: am__doit
+END
+am__include="#"
+am__quote=
+# BSD make does it like this.
+echo '.include "confinc.mk" # ignored' > confmf.BSD
+# Other make implementations (GNU, Solaris 10, AIX) do it like this.
+echo 'include confinc.mk # ignored' > confmf.GNU
+_am_result=no
+for s in GNU BSD; do
+  { echo "$as_me:$LINENO: ${MAKE-make} -f confmf.$s && cat confinc.out" >&5
+   (${MAKE-make} -f confmf.$s && cat confinc.out) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); }
+  case $?:`cat confinc.out 2>/dev/null` in #(
+  '0:this is the am__doit target') :
+    case $s in #(
+  BSD) :
+    am__include='.include' am__quote='"' ;; #(
+  *) :
+    am__include='include' am__quote='' ;;
+esac ;; #(
+  *) :
+     ;;
+esac
+  if test "$am__include" != "#"; then
+    _am_result="yes ($s style)"
+    break
+  fi
+done
+rm -f confinc.* confmf.*
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${_am_result}" >&5
+printf "%s\n" "${_am_result}" >&6; }
+
+# Check whether --enable-dependency-tracking was given.
+if test ${enable_dependency_tracking+y}
+then :
+  enableval=$enable_dependency_tracking;
+fi
+
+if test "x$enable_dependency_tracking" != xno; then
+  am_depcomp="$ac_aux_dir/depcomp"
+  AMDEPBACKSLASH='\'
+  am__nodep='_no'
+fi
+ if test "x$enable_dependency_tracking" != xno; then
+  AMDEP_TRUE=
+  AMDEP_FALSE='#'
+else
+  AMDEP_TRUE='#'
+  AMDEP_FALSE=
+fi
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+printf "%s\n" "$ac_ct_CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a 
program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with 
args.
+set dummy $ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+printf "%s\n" "$ac_ct_CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}clang", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}clang; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}clang"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "clang", so it can be a program name with args.
+set dummy clang; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="clang"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+printf "%s\n" "$ac_ct_CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+fi
+
+
+test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in 
\`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" 
>&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion -version; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out"
+# Try to create an executable without -o first, disregard a.out.
+# It will help us diagnose broken compilers, and finding out an intuition
+# of exeext.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C compiler 
works" >&5
+printf %s "checking whether the C compiler works... " >&6; }
+ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'`
+
+# The possible output files:
+ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*"
+
+ac_rmfiles=
+for ac_file in $ac_files
+do
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map 
| *.inf | *.dSYM | *.o | *.obj ) ;;
+    * ) ac_rmfiles="$ac_rmfiles $ac_file";;
+  esac
+done
+rm -f $ac_rmfiles
+
+if { { ac_try="$ac_link_default"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_link_default") 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+then :
+  # Autoconf-2.13 could set the ac_cv_exeext variable to `no'.
+# So ignore a value of `no', otherwise this would lead to `EXEEXT = no'
+# in a Makefile.  We should not override ac_cv_exeext if it was cached,
+# so that the user can short-circuit this test for compilers unknown to
+# Autoconf.
+for ac_file in $ac_files ''
+do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map 
| *.inf | *.dSYM | *.o | *.obj )
+       ;;
+    [ab].out )
+       # We found the default executable, but exeext='' is most
+       # certainly right.
+       break;;
+    *.* )
+       if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no;
+       then :; else
+          ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+       fi
+       # We set ac_cv_exeext here because the later test for it is not
+       # safe: cross compilers may not add the suffix if given an `-o'
+       # argument, so we may need to know it at that point already.
+       # Even if this section looks crufty: it has the advantage of
+       # actually working.
+       break;;
+    * )
+       break;;
+  esac
+done
+test "$ac_cv_exeext" = no && ac_cv_exeext=
+
+else $as_nop
+  ac_file=''
+fi
+if test -z "$ac_file"
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+printf "%s\n" "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "C compiler cannot create executables
+See \`config.log' for more details" "$LINENO" 5; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler default 
output file name" >&5
+printf %s "checking for C compiler default output file name... " >&6; }
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5
+printf "%s\n" "$ac_file" >&6; }
+ac_exeext=$ac_cv_exeext
+
+rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out
+ac_clean_files=$ac_clean_files_save
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of 
executables" >&5
+printf %s "checking for suffix of executables... " >&6; }
+if { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+then :
+  # If both `conftest.exe' and `conftest' are `present' (well, observable)
+# catch `conftest.exe'.  For instance with Cygwin, `ls conftest' will
+# work properly (i.e., refer to `conftest.exe'), while it won't with
+# `rm'.
+for ac_file in conftest.exe conftest conftest.*; do
+  test -f "$ac_file" || continue
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map 
| *.inf | *.dSYM | *.o | *.obj ) ;;
+    *.* ) ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'`
+         break;;
+    * ) break;;
+  esac
+done
+else $as_nop
+  { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of executables: cannot compile and link
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest conftest$ac_cv_exeext
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5
+printf "%s\n" "$ac_cv_exeext" >&6; }
+
+rm -f conftest.$ac_ext
+EXEEXT=$ac_cv_exeext
+ac_exeext=$EXEEXT
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <stdio.h>
+int
+main (void)
+{
+FILE *f = fopen ("conftest.out", "w");
+ return ferror (f) || fclose (f) != 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+ac_clean_files="$ac_clean_files conftest.out"
+# Check that the compiler produces executables we can run.  If not, either
+# the compiler is broken, or we cross compile.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross 
compiling" >&5
+printf %s "checking whether we are cross compiling... " >&6; }
+if test "$cross_compiling" != yes; then
+  { { ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_link") 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+  if { ac_try='./conftest$ac_cv_exeext'
+  { { case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }; then
+    cross_compiling=no
+  else
+    if test "$cross_compiling" = maybe; then
+       cross_compiling=yes
+    else
+       { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" 
>&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot run C compiled programs.
+If you meant to cross compile, use \`--host'.
+See \`config.log' for more details" "$LINENO" 5; }
+    fi
+  fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5
+printf "%s\n" "$cross_compiling" >&6; }
+
+rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out
+ac_clean_files=$ac_clean_files_save
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object 
files" >&5
+printf %s "checking for suffix of object files... " >&6; }
+if test ${ac_cv_objext+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+rm -f conftest.o conftest.obj
+if { { ac_try="$ac_compile"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_compile") 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+then :
+  for ac_file in conftest.o conftest.obj conftest.*; do
+  test -f "$ac_file" || continue;
+  case $ac_file in
+    *.$ac_ext | *.xcoff | *.tds | *.d | *.pdb | *.xSYM | *.bb | *.bbg | *.map 
| *.inf | *.dSYM ) ;;
+    *) ac_cv_objext=`expr "$ac_file" : '.*\.\(.*\)'`
+       break;;
+  esac
+done
+else $as_nop
+  printf "%s\n" "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "cannot compute suffix of object files: cannot compile
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+rm -f conftest.$ac_cv_objext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5
+printf "%s\n" "$ac_cv_objext" >&6; }
+OBJEXT=$ac_cv_objext
+ac_objext=$OBJEXT
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler 
supports GNU C" >&5
+printf %s "checking whether the compiler supports GNU C... " >&6; }
+if test ${ac_cv_c_compiler_gnu+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_compiler_gnu=yes
+else $as_nop
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" 
>&5
+printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; }
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+y}
+ac_save_CFLAGS=$CFLAGS
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" 
>&5
+printf %s "checking whether $CC accepts -g... " >&6; }
+if test ${ac_cv_prog_cc_g+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_g=yes
+else $as_nop
+  CFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+else $as_nop
+  ac_c_werror_flag=$ac_save_c_werror_flag
+        CFLAGS="-g"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+printf "%s\n" "$ac_cv_prog_cc_g" >&6; }
+if test $ac_test_CFLAGS; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+ac_prog_cc_stdc=no
+if test x$ac_prog_cc_stdc = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to 
enable C11 features" >&5
+printf %s "checking for $CC option to enable C11 features... " >&6; }
+if test ${ac_cv_prog_cc_c11+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_prog_cc_c11=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_c_conftest_c11_program
+_ACEOF
+for ac_arg in '' -std=gnu11
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_c11=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam
+  test "x$ac_cv_prog_cc_c11" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+fi
+
+if test "x$ac_cv_prog_cc_c11" = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+printf "%s\n" "unsupported" >&6; }
+else $as_nop
+  if test "x$ac_cv_prog_cc_c11" = x
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+printf "%s\n" "none needed" >&6; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5
+printf "%s\n" "$ac_cv_prog_cc_c11" >&6; }
+     CC="$CC $ac_cv_prog_cc_c11"
+fi
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11
+  ac_prog_cc_stdc=c11
+fi
+fi
+if test x$ac_prog_cc_stdc = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to 
enable C99 features" >&5
+printf %s "checking for $CC option to enable C99 features... " >&6; }
+if test ${ac_cv_prog_cc_c99+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_prog_cc_c99=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_c_conftest_c99_program
+_ACEOF
+for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 
-AC99 -D_STDC_C99=
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_c99=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam
+  test "x$ac_cv_prog_cc_c99" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+fi
+
+if test "x$ac_cv_prog_cc_c99" = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+printf "%s\n" "unsupported" >&6; }
+else $as_nop
+  if test "x$ac_cv_prog_cc_c99" = x
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+printf "%s\n" "none needed" >&6; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5
+printf "%s\n" "$ac_cv_prog_cc_c99" >&6; }
+     CC="$CC $ac_cv_prog_cc_c99"
+fi
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99
+  ac_prog_cc_stdc=c99
+fi
+fi
+if test x$ac_prog_cc_stdc = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to 
enable C89 features" >&5
+printf %s "checking for $CC option to enable C89 features... " >&6; }
+if test ${ac_cv_prog_cc_c89+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_c_conftest_c89_program
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" 
"-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+fi
+
+if test "x$ac_cv_prog_cc_c89" = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+printf "%s\n" "unsupported" >&6; }
+else $as_nop
+  if test "x$ac_cv_prog_cc_c89" = x
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+printf "%s\n" "none needed" >&6; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+printf "%s\n" "$ac_cv_prog_cc_c89" >&6; }
+     CC="$CC $ac_cv_prog_cc_c89"
+fi
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89
+  ac_prog_cc_stdc=c89
+fi
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands 
-c and -o together" >&5
+printf %s "checking whether $CC understands -c and -o together... " >&6; }
+if test ${am_cv_prog_cc_c_o+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o 
conftest2.$ac_objext" >&5
+   ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
+printf "%s\n" "$am_cv_prog_cc_c_o" >&6; }
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+depcc="$CC"   am_compiler_list=
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of 
$depcc" >&5
+printf %s "checking dependency style of $depcc... " >&6; }
+if test ${am_cv_CC_dependencies_compiler_type+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CC_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+  case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+       continue
+      else
+       break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CC_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CC_dependencies_compiler_type=none
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$am_cv_CC_dependencies_compiler_type" >&5
+printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; }
+CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then
+  am__fastdepCC_TRUE=
+  am__fastdepCC_FALSE='#'
+else
+  am__fastdepCC_TRUE='#'
+  am__fastdepCC_FALSE=
+fi
+
+
+
+ac_header= ac_cache=
+for ac_item in $ac_header_c_list
+do
+  if test $ac_cache; then
+    ac_fn_c_check_header_compile "$LINENO" $ac_header ac_cv_header_$ac_cache 
"$ac_includes_default"
+    if eval test \"x\$ac_cv_header_$ac_cache\" = xyes; then
+      printf "%s\n" "#define $ac_item 1" >> confdefs.h
+    fi
+    ac_header= ac_cache=
+  elif test $ac_header; then
+    ac_cache=$ac_item
+  else
+    ac_header=$ac_item
+  fi
+done
+
+
+
+
+
+
+
+
+if test $ac_cv_header_stdlib_h = yes && test $ac_cv_header_string_h = yes
+then :
+
+printf "%s\n" "#define STDC_HEADERS 1" >>confdefs.h
+
+fi
+
+
+
+
+
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether it is safe to 
define __EXTENSIONS__" >&5
+printf %s "checking whether it is safe to define __EXTENSIONS__... " >&6; }
+if test ${ac_cv_safe_to_define___extensions__+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#         define __EXTENSIONS__ 1
+          $ac_includes_default
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_safe_to_define___extensions__=yes
+else $as_nop
+  ac_cv_safe_to_define___extensions__=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_safe_to_define___extensions__" >&5
+printf "%s\n" "$ac_cv_safe_to_define___extensions__" >&6; }
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether _XOPEN_SOURCE 
should be defined" >&5
+printf %s "checking whether _XOPEN_SOURCE should be defined... " >&6; }
+if test ${ac_cv_should_define__xopen_source+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_should_define__xopen_source=no
+    if test $ac_cv_header_wchar_h = yes
+then :
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+          #include <wchar.h>
+          mbstate_t x;
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+            #define _XOPEN_SOURCE 500
+            #include <wchar.h>
+            mbstate_t x;
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_should_define__xopen_source=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_should_define__xopen_source" >&5
+printf "%s\n" "$ac_cv_should_define__xopen_source" >&6; }
+
+  printf "%s\n" "#define _ALL_SOURCE 1" >>confdefs.h
+
+  printf "%s\n" "#define _DARWIN_C_SOURCE 1" >>confdefs.h
+
+  printf "%s\n" "#define _GNU_SOURCE 1" >>confdefs.h
+
+  printf "%s\n" "#define _HPUX_ALT_XOPEN_SOCKET_API 1" >>confdefs.h
+
+  printf "%s\n" "#define _NETBSD_SOURCE 1" >>confdefs.h
+
+  printf "%s\n" "#define _OPENBSD_SOURCE 1" >>confdefs.h
+
+  printf "%s\n" "#define _POSIX_PTHREAD_SEMANTICS 1" >>confdefs.h
+
+  printf "%s\n" "#define __STDC_WANT_IEC_60559_ATTRIBS_EXT__ 1" >>confdefs.h
+
+  printf "%s\n" "#define __STDC_WANT_IEC_60559_BFP_EXT__ 1" >>confdefs.h
+
+  printf "%s\n" "#define __STDC_WANT_IEC_60559_DFP_EXT__ 1" >>confdefs.h
+
+  printf "%s\n" "#define __STDC_WANT_IEC_60559_FUNCS_EXT__ 1" >>confdefs.h
+
+  printf "%s\n" "#define __STDC_WANT_IEC_60559_TYPES_EXT__ 1" >>confdefs.h
+
+  printf "%s\n" "#define __STDC_WANT_LIB_EXT2__ 1" >>confdefs.h
+
+  printf "%s\n" "#define __STDC_WANT_MATH_SPEC_FUNCS__ 1" >>confdefs.h
+
+  printf "%s\n" "#define _TANDEM_SOURCE 1" >>confdefs.h
+
+  if test $ac_cv_header_minix_config_h = yes
+then :
+  MINIX=yes
+    printf "%s\n" "#define _MINIX 1" >>confdefs.h
+
+    printf "%s\n" "#define _POSIX_SOURCE 1" >>confdefs.h
+
+    printf "%s\n" "#define _POSIX_1_SOURCE 2" >>confdefs.h
+
+else $as_nop
+  MINIX=
+fi
+  if test $ac_cv_safe_to_define___extensions__ = yes
+then :
+  printf "%s\n" "#define __EXTENSIONS__ 1" >>confdefs.h
+
+fi
+  if test $ac_cv_should_define__xopen_source = yes
+then :
+  printf "%s\n" "#define _XOPEN_SOURCE 500" >>confdefs.h
+
+fi
+
+
+
+
+
+
+
+
+
+
+
+######################
+##  Basic checks.  ### (we need some results later on (e.g. $GCC)
+######################
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ${MAKE-make} 
sets \$(MAKE)" >&5
+printf %s "checking whether ${MAKE-make} sets \$(MAKE)... " >&6; }
+set x ${MAKE-make}
+ac_make=`printf "%s\n" "$2" | sed 's/+/p/g; s/[^a-zA-Z0-9_]/_/g'`
+if eval test \${ac_cv_prog_make_${ac_make}_set+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat >conftest.make <<\_ACEOF
+SHELL = /bin/sh
+all:
+       @echo '@@@%%%=$(MAKE)=@@@%%%'
+_ACEOF
+# GNU make sometimes prints "make[1]: Entering ...", which would confuse us.
+case `${MAKE-make} -f conftest.make 2>/dev/null` in
+  *@@@%%%=?*=@@@%%%*)
+    eval ac_cv_prog_make_${ac_make}_set=yes;;
+  *)
+    eval ac_cv_prog_make_${ac_make}_set=no;;
+esac
+rm -f conftest.make
+fi
+if eval test \$ac_cv_prog_make_${ac_make}_set = yes; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+  SET_MAKE=
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+  SET_MAKE="MAKE=${MAKE-make}"
+fi
+
+missing_dir=`cd $ac_aux_dir && pwd`
+
+ACLOCAL=${ACLOCAL-"${am_missing_run}aclocal"}
+
+
+AUTOCONF=${AUTOCONF-"${am_missing_run}autoconf"}
+
+
+AUTOMAKE=${AUTOMAKE-"${am_missing_run}automake"}
+
+
+AUTOHEADER=${AUTOHEADER-"${am_missing_run}autoheader"}
+
+# AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir)
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}gcc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}gcc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "gcc", so it can be a program name with args.
+set dummy gcc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="gcc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+printf "%s\n" "$ac_ct_CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+if test -z "$CC"; then
+          if test -n "$ac_tool_prefix"; then
+    # Extract the first word of "${ac_tool_prefix}cc", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}cc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}cc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  fi
+fi
+if test -z "$CC"; then
+  # Extract the first word of "cc", so it can be a program name with args.
+set dummy cc; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+  ac_prog_rejected=no
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    if test "$as_dir$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then
+       ac_prog_rejected=yes
+       continue
+     fi
+    ac_cv_prog_CC="cc"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+if test $ac_prog_rejected = yes; then
+  # We found a bogon in the path, so make sure we never use it.
+  set dummy $ac_cv_prog_CC
+  shift
+  if test $# != 0; then
+    # We chose a different compiler from the bogus one.
+    # However, it has the same basename, so the bogon will be chosen
+    # first if we set CC to just the basename; use the full file name.
+    shift
+    ac_cv_prog_CC="$as_dir$ac_word${1+' '}$@"
+  fi
+fi
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  for ac_prog in cl.exe
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a 
program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="$ac_tool_prefix$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+    test -n "$CC" && break
+  done
+fi
+if test -z "$CC"; then
+  ac_ct_CC=$CC
+  for ac_prog in cl.exe
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with 
args.
+set dummy $ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+printf "%s\n" "$ac_ct_CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_CC" && break
+done
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+fi
+
+fi
+if test -z "$CC"; then
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}clang", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}clang; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$CC"; then
+  ac_cv_prog_CC="$CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_CC="${ac_tool_prefix}clang"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+CC=$ac_cv_prog_CC
+if test -n "$CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CC" >&5
+printf "%s\n" "$CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_CC"; then
+  ac_ct_CC=$CC
+  # Extract the first word of "clang", so it can be a program name with args.
+set dummy clang; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_CC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_CC"; then
+  ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_CC="clang"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_CC=$ac_cv_prog_ac_ct_CC
+if test -n "$ac_ct_CC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5
+printf "%s\n" "$ac_ct_CC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_CC" = x; then
+    CC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    CC=$ac_ct_CC
+  fi
+else
+  CC="$ac_cv_prog_CC"
+fi
+
+fi
+
+
+test -z "$CC" && { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in 
\`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "no acceptable C compiler found in \$PATH
+See \`config.log' for more details" "$LINENO" 5; }
+
+# Provide some information about the compiler.
+printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C compiler version" 
>&5
+set X $ac_compile
+ac_compiler=$2
+for ac_option in --version -v -V -qversion -version; do
+  { { ac_try="$ac_compiler $ac_option >&5"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\""
+printf "%s\n" "$ac_try_echo"; } >&5
+  (eval "$ac_compiler $ac_option >&5") 2>conftest.err
+  ac_status=$?
+  if test -s conftest.err; then
+    sed '10a\
+... rest of stderr output deleted ...
+         10q' conftest.err >conftest.er1
+    cat conftest.er1 >&5
+  fi
+  rm -f conftest.er1 conftest.err
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+done
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler 
supports GNU C" >&5
+printf %s "checking whether the compiler supports GNU C... " >&6; }
+if test ${ac_cv_c_compiler_gnu+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+#ifndef __GNUC__
+       choke me
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_compiler_gnu=yes
+else $as_nop
+  ac_compiler_gnu=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+ac_cv_c_compiler_gnu=$ac_compiler_gnu
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" 
>&5
+printf "%s\n" "$ac_cv_c_compiler_gnu" >&6; }
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+if test $ac_compiler_gnu = yes; then
+  GCC=yes
+else
+  GCC=
+fi
+ac_test_CFLAGS=${CFLAGS+y}
+ac_save_CFLAGS=$CFLAGS
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" 
>&5
+printf %s "checking whether $CC accepts -g... " >&6; }
+if test ${ac_cv_prog_cc_g+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_save_c_werror_flag=$ac_c_werror_flag
+   ac_c_werror_flag=yes
+   ac_cv_prog_cc_g=no
+   CFLAGS="-g"
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_g=yes
+else $as_nop
+  CFLAGS=""
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+else $as_nop
+  ac_c_werror_flag=$ac_save_c_werror_flag
+        CFLAGS="-g"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_g=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+   ac_c_werror_flag=$ac_save_c_werror_flag
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5
+printf "%s\n" "$ac_cv_prog_cc_g" >&6; }
+if test $ac_test_CFLAGS; then
+  CFLAGS=$ac_save_CFLAGS
+elif test $ac_cv_prog_cc_g = yes; then
+  if test "$GCC" = yes; then
+    CFLAGS="-g -O2"
+  else
+    CFLAGS="-g"
+  fi
+else
+  if test "$GCC" = yes; then
+    CFLAGS="-O2"
+  else
+    CFLAGS=
+  fi
+fi
+ac_prog_cc_stdc=no
+if test x$ac_prog_cc_stdc = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to 
enable C11 features" >&5
+printf %s "checking for $CC option to enable C11 features... " >&6; }
+if test ${ac_cv_prog_cc_c11+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_prog_cc_c11=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_c_conftest_c11_program
+_ACEOF
+for ac_arg in '' -std=gnu11
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_c11=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam
+  test "x$ac_cv_prog_cc_c11" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+fi
+
+if test "x$ac_cv_prog_cc_c11" = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+printf "%s\n" "unsupported" >&6; }
+else $as_nop
+  if test "x$ac_cv_prog_cc_c11" = x
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+printf "%s\n" "none needed" >&6; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c11" >&5
+printf "%s\n" "$ac_cv_prog_cc_c11" >&6; }
+     CC="$CC $ac_cv_prog_cc_c11"
+fi
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c11
+  ac_prog_cc_stdc=c11
+fi
+fi
+if test x$ac_prog_cc_stdc = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to 
enable C99 features" >&5
+printf %s "checking for $CC option to enable C99 features... " >&6; }
+if test ${ac_cv_prog_cc_c99+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_prog_cc_c99=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_c_conftest_c99_program
+_ACEOF
+for ac_arg in '' -std=gnu99 -std=c99 -c99 -qlanglvl=extc1x -qlanglvl=extc99 
-AC99 -D_STDC_C99=
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_c99=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam
+  test "x$ac_cv_prog_cc_c99" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+fi
+
+if test "x$ac_cv_prog_cc_c99" = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+printf "%s\n" "unsupported" >&6; }
+else $as_nop
+  if test "x$ac_cv_prog_cc_c99" = x
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+printf "%s\n" "none needed" >&6; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c99" >&5
+printf "%s\n" "$ac_cv_prog_cc_c99" >&6; }
+     CC="$CC $ac_cv_prog_cc_c99"
+fi
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c99
+  ac_prog_cc_stdc=c99
+fi
+fi
+if test x$ac_prog_cc_stdc = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CC option to 
enable C89 features" >&5
+printf %s "checking for $CC option to enable C89 features... " >&6; }
+if test ${ac_cv_prog_cc_c89+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_prog_cc_c89=no
+ac_save_CC=$CC
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_c_conftest_c89_program
+_ACEOF
+for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std -Ae "-Aa -D_HPUX_SOURCE" 
"-Xc -D__EXTENSIONS__"
+do
+  CC="$ac_save_CC $ac_arg"
+  if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_prog_cc_c89=$ac_arg
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam
+  test "x$ac_cv_prog_cc_c89" != "xno" && break
+done
+rm -f conftest.$ac_ext
+CC=$ac_save_CC
+fi
+
+if test "x$ac_cv_prog_cc_c89" = xno
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5
+printf "%s\n" "unsupported" >&6; }
+else $as_nop
+  if test "x$ac_cv_prog_cc_c89" = x
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5
+printf "%s\n" "none needed" >&6; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5
+printf "%s\n" "$ac_cv_prog_cc_c89" >&6; }
+     CC="$CC $ac_cv_prog_cc_c89"
+fi
+  ac_cv_prog_cc_stdc=$ac_cv_prog_cc_c89
+  ac_prog_cc_stdc=c89
+fi
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CC understands 
-c and -o together" >&5
+printf %s "checking whether $CC understands -c and -o together... " >&6; }
+if test ${am_cv_prog_cc_c_o+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+  # Make sure it works both with $CC and with simple cc.
+  # Following AC_PROG_CC_C_O, we do the test twice because some
+  # compilers refuse to overwrite an existing .o file with -o,
+  # though they will create one.
+  am_cv_prog_cc_c_o=yes
+  for am_i in 1 2; do
+    if { echo "$as_me:$LINENO: $CC -c conftest.$ac_ext -o 
conftest2.$ac_objext" >&5
+   ($CC -c conftest.$ac_ext -o conftest2.$ac_objext) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } \
+         && test -f conftest2.$ac_objext; then
+      : OK
+    else
+      am_cv_prog_cc_c_o=no
+      break
+    fi
+  done
+  rm -f core conftest*
+  unset am_i
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $am_cv_prog_cc_c_o" >&5
+printf "%s\n" "$am_cv_prog_cc_c_o" >&6; }
+if test "$am_cv_prog_cc_c_o" != yes; then
+   # Losing compiler, so override with the script.
+   # FIXME: It is wrong to rewrite CC.
+   # But if we don't then we get into trouble of one sort or another.
+   # A longer-term fix would be to have automake use am__CC in this case,
+   # and then we could set am__CC="\$(top_srcdir)/compile \$(CC)"
+   CC="$am_aux_dir/compile $CC"
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+depcc="$CC"   am_compiler_list=
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of 
$depcc" >&5
+printf %s "checking dependency style of $depcc... " >&6; }
+if test ${am_cv_CC_dependencies_compiler_type+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CC_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+  case " $depcc " in #(
+     *\ -arch\ *\ -arch\ *) am__universal=true ;;
+     esac
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+       continue
+      else
+       break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CC_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CC_dependencies_compiler_type=none
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$am_cv_CC_dependencies_compiler_type" >&5
+printf "%s\n" "$am_cv_CC_dependencies_compiler_type" >&6; }
+CCDEPMODE=depmode=$am_cv_CC_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CC_dependencies_compiler_type" = gcc3; then
+  am__fastdepCC_TRUE=
+  am__fastdepCC_FALSE='#'
+else
+  am__fastdepCC_TRUE='#'
+  am__fastdepCC_FALSE=
+fi
+
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to run the C 
preprocessor" >&5
+printf %s "checking how to run the C preprocessor... " >&6; }
+# On Suns, sometimes $CPP names a directory.
+if test -n "$CPP" && test -d "$CPP"; then
+  CPP=
+fi
+if test -z "$CPP"; then
+  if test ${ac_cv_prog_CPP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+      # Double quotes because $CC needs to be expanded
+    for CPP in "$CC -E" "$CC -E -traditional-cpp" cpp /lib/cpp
+    do
+      ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+                    Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"
+then :
+
+else $as_nop
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"
+then :
+  # Broken: success on invalid input.
+continue
+else $as_nop
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok
+then :
+  break
+fi
+
+    done
+    ac_cv_prog_CPP=$CPP
+
+fi
+  CPP=$ac_cv_prog_CPP
+else
+  ac_cv_prog_CPP=$CPP
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CPP" >&5
+printf "%s\n" "$CPP" >&6; }
+ac_preproc_ok=false
+for ac_c_preproc_warn_flag in '' yes
+do
+  # Use a header file that comes with gcc, so configuring glibc
+  # with a fresh cross-compiler works.
+  # On the NeXT, cc -E runs the code through the compiler's parser,
+  # not just through cpp. "Syntax error" is here to catch this case.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+                    Syntax error
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"
+then :
+
+else $as_nop
+  # Broken: fails on valid input.
+continue
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+  # OK, works on sane cases.  Now check whether nonexistent headers
+  # can be detected and how.
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <ac_nonexistent.h>
+_ACEOF
+if ac_fn_c_try_cpp "$LINENO"
+then :
+  # Broken: success on invalid input.
+continue
+else $as_nop
+  # Passes both tests.
+ac_preproc_ok=:
+break
+fi
+rm -f conftest.err conftest.i conftest.$ac_ext
+
+done
+# Because of `break', _AC_PREPROC_IFELSE's cleaning code was skipped.
+rm -f conftest.i conftest.err conftest.$ac_ext
+if $ac_preproc_ok
+then :
+
+else $as_nop
+  { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "C preprocessor \"$CPP\" fails sanity check
+See \`config.log' for more details" "$LINENO" 5; }
+fi
+
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+# By default we simply use the C compiler to build assembly code.
+
+test "${CCAS+set}" = set || CCAS=$CC
+test "${CCASFLAGS+set}" = set || CCASFLAGS=$CFLAGS
+
+
+
+depcc="$CCAS"   am_compiler_list=
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dependency style of 
$depcc" >&5
+printf %s "checking dependency style of $depcc... " >&6; }
+if test ${am_cv_CCAS_dependencies_compiler_type+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -z "$AMDEP_TRUE" && test -f "$am_depcomp"; then
+  # We make a subdir and do the tests there.  Otherwise we can end up
+  # making bogus files that we don't know about and never remove.  For
+  # instance it was reported that on HP-UX the gcc test will end up
+  # making a dummy file named 'D' -- because '-MD' means "put the output
+  # in D".
+  rm -rf conftest.dir
+  mkdir conftest.dir
+  # Copy depcomp to subdir because otherwise we won't find it if we're
+  # using a relative directory.
+  cp "$am_depcomp" conftest.dir
+  cd conftest.dir
+  # We will build objects and dependencies in a subdirectory because
+  # it helps to detect inapplicable dependency modes.  For instance
+  # both Tru64's cc and ICC support -MD to output dependencies as a
+  # side effect of compilation, but ICC will put the dependencies in
+  # the current directory while Tru64 will put them in the object
+  # directory.
+  mkdir sub
+
+  am_cv_CCAS_dependencies_compiler_type=none
+  if test "$am_compiler_list" = ""; then
+     am_compiler_list=`sed -n 's/^#*\([a-zA-Z0-9]*\))$/\1/p' < ./depcomp`
+  fi
+  am__universal=false
+
+
+  for depmode in $am_compiler_list; do
+    # Setup a source with many dependencies, because some compilers
+    # like to wrap large dependency lists on column 80 (with \), and
+    # we should not choose a depcomp mode which is confused by this.
+    #
+    # We need to recreate these files for each test, as the compiler may
+    # overwrite some of them when testing with obscure command lines.
+    # This happens at least with the AIX C compiler.
+    : > sub/conftest.c
+    for i in 1 2 3 4 5 6; do
+      echo '#include "conftst'$i'.h"' >> sub/conftest.c
+      # Using ": > sub/conftst$i.h" creates only sub/conftst1.h with
+      # Solaris 10 /bin/sh.
+      echo '/* dummy */' > sub/conftst$i.h
+    done
+    echo "${am__include} ${am__quote}sub/conftest.Po${am__quote}" > confmf
+
+    # We check with '-c' and '-o' for the sake of the "dashmstdout"
+    # mode.  It turns out that the SunPro C++ compiler does not properly
+    # handle '-M -o', and we need to detect this.  Also, some Intel
+    # versions had trouble with output in subdirs.
+    am__obj=sub/conftest.${OBJEXT-o}
+    am__minus_obj="-o $am__obj"
+    case $depmode in
+    gcc)
+      # This depmode causes a compiler race in universal mode.
+      test "$am__universal" = false || continue
+      ;;
+    nosideeffect)
+      # After this tag, mechanisms are not by side-effect, so they'll
+      # only be used when explicitly requested.
+      if test "x$enable_dependency_tracking" = xyes; then
+       continue
+      else
+       break
+      fi
+      ;;
+    msvc7 | msvc7msys | msvisualcpp | msvcmsys)
+      # This compiler won't grok '-c -o', but also, the minuso test has
+      # not run yet.  These depmodes are late enough in the game, and
+      # so weak that their functioning should not be impacted.
+      am__obj=conftest.${OBJEXT-o}
+      am__minus_obj=
+      ;;
+    none) break ;;
+    esac
+    if depmode=$depmode \
+       source=sub/conftest.c object=$am__obj \
+       depfile=sub/conftest.Po tmpdepfile=sub/conftest.TPo \
+       $SHELL ./depcomp $depcc -c $am__minus_obj sub/conftest.c \
+         >/dev/null 2>conftest.err &&
+       grep sub/conftst1.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep sub/conftst6.h sub/conftest.Po > /dev/null 2>&1 &&
+       grep $am__obj sub/conftest.Po > /dev/null 2>&1 &&
+       ${MAKE-make} -s -f confmf > /dev/null 2>&1; then
+      # icc doesn't choke on unknown options, it will just issue warnings
+      # or remarks (even with -Werror).  So we grep stderr for any message
+      # that says an option was ignored or not supported.
+      # When given -MP, icc 7.0 and 7.1 complain thusly:
+      #   icc: Command line warning: ignoring option '-M'; no argument required
+      # The diagnosis changed in icc 8.0:
+      #   icc: Command line remark: option '-MP' not supported
+      if (grep 'ignoring option' conftest.err ||
+          grep 'not supported' conftest.err) >/dev/null 2>&1; then :; else
+        am_cv_CCAS_dependencies_compiler_type=$depmode
+        break
+      fi
+    fi
+  done
+
+  cd ..
+  rm -rf conftest.dir
+else
+  am_cv_CCAS_dependencies_compiler_type=none
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$am_cv_CCAS_dependencies_compiler_type" >&5
+printf "%s\n" "$am_cv_CCAS_dependencies_compiler_type" >&6; }
+CCASDEPMODE=depmode=$am_cv_CCAS_dependencies_compiler_type
+
+ if
+  test "x$enable_dependency_tracking" != xno \
+  && test "$am_cv_CCAS_dependencies_compiler_type" = gcc3; then
+  am__fastdepCCAS_TRUE=
+  am__fastdepCCAS_FALSE='#'
+else
+  am__fastdepCCAS_TRUE='#'
+  am__fastdepCCAS_FALSE=
+fi
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing 
strerror" >&5
+printf %s "checking for library containing strerror... " >&6; }
+if test ${ac_cv_search_strerror+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char strerror ();
+int
+main (void)
+{
+return strerror ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' cposix
+do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_search_strerror=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext
+  if test ${ac_cv_search_strerror+y}
+then :
+  break
+fi
+done
+if test ${ac_cv_search_strerror+y}
+then :
+
+else $as_nop
+  ac_cv_search_strerror=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_strerror" 
>&5
+printf "%s\n" "$ac_cv_search_strerror" >&6; }
+ac_res=$ac_cv_search_strerror
+if test "$ac_res" != no
+then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+
+for ac_prog in gawk mawk nawk awk
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with 
args.
+set dummy $ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_AWK+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$AWK"; then
+  ac_cv_prog_AWK="$AWK" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AWK="$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AWK=$ac_cv_prog_AWK
+if test -n "$AWK"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AWK" >&5
+printf "%s\n" "$AWK" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  test -n "$AWK" && break
+done
+
+
+# Taken from mpfr-4.0.1, then modified for LDADD_FOR_TESTS_KLUDGE
+case $host in
+  *-*-linux*)
+    if test -n "$LD_LIBRARY_PATH"; then
+      saved_LDFLAGS="$LDFLAGS"
+      LDADD_FOR_TESTS_KLUDGE="-Wl,--disable-new-dtags"
+      LDFLAGS="$LDFLAGS $LDADD_FOR_TESTS_KLUDGE"
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether 
--disable-new-dtags is supported by the linker" >&5
+printf %s "checking whether --disable-new-dtags is supported by the linker... 
" >&6; }
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int main (void) { return 0; }
+
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes (use it since 
LD_LIBRARY_PATH is set)" >&5
+printf "%s\n" "yes (use it since LD_LIBRARY_PATH is set)" >&6; }
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+       LDADD_FOR_TESTS_KLUDGE=""
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+      LDFLAGS="$saved_LDFLAGS"
+    fi
+    ;;
+esac
+
+
+VERSION_NUMBER=0x010a03
+
+
+# We need to compile and run a program on the build machine.
+# Put a plausible default for CC_FOR_BUILD in Makefile.
+if test -z "$CC_FOR_BUILD"; then
+  if test "x$cross_compiling" = "xno"; then
+    CC_FOR_BUILD='$(CC)'
+  else
+    CC_FOR_BUILD=gcc
+  fi
+fi
+
+# Also set EXEEXT_FOR_BUILD.
+if test "x$cross_compiling" = "xno"; then
+  EXEEXT_FOR_BUILD='$(EXEEXT)'
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for build system 
executable suffix" >&5
+printf %s "checking for build system executable suffix... " >&6; }
+if test ${bfd_cv_build_exeext+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  rm -f conftest*
+     echo 'int main (void) { return 0; }' > conftest.c
+     bfd_cv_build_exeext=
+     ${CC_FOR_BUILD} -o conftest conftest.c 1>&5 2>&5
+     for file in conftest.*; do
+       case $file in
+       *.c | *.o | *.obj | *.ilk | *.pdb) ;;
+       *) bfd_cv_build_exeext=`echo $file | sed -e s/conftest//` ;;
+       esac
+     done
+     rm -f conftest*
+     test x"${bfd_cv_build_exeext}" = x && bfd_cv_build_exeext=no
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $bfd_cv_build_exeext" >&5
+printf "%s\n" "$bfd_cv_build_exeext" >&6; }
+  EXEEXT_FOR_BUILD=""
+  test x"${bfd_cv_build_exeext}" != xno && 
EXEEXT_FOR_BUILD=${bfd_cv_build_exeext}
+fi
+
+
+
+
+case `pwd` in
+  *\ * | *\    *)
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Libtool does not 
cope well with whitespace in \`pwd\`" >&5
+printf "%s\n" "$as_me: WARNING: Libtool does not cope well with whitespace in 
\`pwd\`" >&2;} ;;
+esac
+
+
+
+macro_version='2.4.2'
+macro_revision='1.3337'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ltmain="$ac_aux_dir/ltmain.sh"
+
+# Backslashify metacharacters that are still active within
+# double-quoted strings.
+sed_quote_subst='s/\(["`$\\]\)/\\\1/g'
+
+# Same as above, but do not quote variable references.
+double_quote_subst='s/\(["`\\]\)/\\\1/g'
+
+# Sed substitution to delay expansion of an escaped shell variable in a
+# double_quote_subst'ed string.
+delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g'
+
+# Sed substitution to delay expansion of an escaped single quote.
+delay_single_quote_subst='s/'\''/'\'\\\\\\\'\''/g'
+
+# Sed substitution to avoid accidental globbing in evaled expressions
+no_glob_subst='s/\*/\\\*/g'
+
+ECHO='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO
+ECHO=$ECHO$ECHO$ECHO$ECHO$ECHO$ECHO
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to print strings" 
>&5
+printf %s "checking how to print strings... " >&6; }
+# Test print first, because it will be a builtin if present.
+if test "X`( print -r -- -n ) 2>/dev/null`" = X-n && \
+   test "X`print -r -- $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='print -r --'
+elif test "X`printf %s $ECHO 2>/dev/null`" = "X$ECHO"; then
+  ECHO='printf %s\n'
+else
+  # Use this function as a fallback that always works.
+  func_fallback_echo ()
+  {
+    eval 'cat <<_LTECHO_EOF
+$1
+_LTECHO_EOF'
+  }
+  ECHO='func_fallback_echo'
+fi
+
+# func_echo_all arg...
+# Invoke $ECHO with all args, space-separated.
+func_echo_all ()
+{
+    $ECHO ""
+}
+
+case "$ECHO" in
+  printf*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: printf" >&5
+printf "%s\n" "printf" >&6; } ;;
+  print*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: print -r" >&5
+printf "%s\n" "print -r" >&6; } ;;
+  *) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: cat" >&5
+printf "%s\n" "cat" >&6; } ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for a sed that does not 
truncate output" >&5
+printf %s "checking for a sed that does not truncate output... " >&6; }
+if test ${ac_cv_path_SED+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+            
ac_script=s/aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa/bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb/
+     for ac_i in 1 2 3 4 5 6 7; do
+       ac_script="$ac_script$as_nl$ac_script"
+     done
+     echo "$ac_script" 2>/dev/null | sed 99q >conftest.sed
+     { ac_script=; unset ac_script;}
+     if test -z "$SED"; then
+  ac_path_SED_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_prog in sed gsed
+   do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_SED="$as_dir$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_SED" || continue
+# Check for GNU ac_path_SED and select it if it is found.
+  # Check for GNU $ac_path_SED
+case `"$ac_path_SED" --version 2>&1` in
+*GNU*)
+  ac_cv_path_SED="$ac_path_SED" ac_path_SED_found=:;;
+*)
+  ac_count=0
+  printf %s 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    printf "%s\n" '' >> "conftest.nl"
+    "$ac_path_SED" -f conftest.sed < "conftest.nl" >"conftest.out" 2>/dev/null 
|| break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_SED_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_SED="$ac_path_SED"
+      ac_path_SED_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_SED_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_SED"; then
+    as_fn_error $? "no acceptable sed could be found in \$PATH" "$LINENO" 5
+  fi
+else
+  ac_cv_path_SED=$SED
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_SED" >&5
+printf "%s\n" "$ac_cv_path_SED" >&6; }
+ SED="$ac_cv_path_SED"
+  rm -f conftest.sed
+
+test -z "$SED" && SED=sed
+Xsed="$SED -e 1s/^X//"
+
+
+
+
+
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for grep that handles 
long lines and -e" >&5
+printf %s "checking for grep that handles long lines and -e... " >&6; }
+if test ${ac_cv_path_GREP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -z "$GREP"; then
+  ac_path_GREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_prog in grep ggrep
+   do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_GREP="$as_dir$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_GREP" || continue
+# Check for GNU ac_path_GREP and select it if it is found.
+  # Check for GNU $ac_path_GREP
+case `"$ac_path_GREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_GREP="$ac_path_GREP" ac_path_GREP_found=:;;
+*)
+  ac_count=0
+  printf %s 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    printf "%s\n" 'GREP' >> "conftest.nl"
+    "$ac_path_GREP" -e 'GREP$' -e '-(cannot match)-' < "conftest.nl" 
>"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_GREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_GREP="$ac_path_GREP"
+      ac_path_GREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_GREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_GREP"; then
+    as_fn_error $? "no acceptable grep could be found in 
$PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_GREP=$GREP
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_GREP" >&5
+printf "%s\n" "$ac_cv_path_GREP" >&6; }
+ GREP="$ac_cv_path_GREP"
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for egrep" >&5
+printf %s "checking for egrep... " >&6; }
+if test ${ac_cv_path_EGREP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if echo a | $GREP -E '(a|b)' >/dev/null 2>&1
+   then ac_cv_path_EGREP="$GREP -E"
+   else
+     if test -z "$EGREP"; then
+  ac_path_EGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_prog in egrep
+   do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_EGREP="$as_dir$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_EGREP" || continue
+# Check for GNU ac_path_EGREP and select it if it is found.
+  # Check for GNU $ac_path_EGREP
+case `"$ac_path_EGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_EGREP="$ac_path_EGREP" ac_path_EGREP_found=:;;
+*)
+  ac_count=0
+  printf %s 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    printf "%s\n" 'EGREP' >> "conftest.nl"
+    "$ac_path_EGREP" 'EGREP$' < "conftest.nl" >"conftest.out" 2>/dev/null || 
break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_EGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_EGREP="$ac_path_EGREP"
+      ac_path_EGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_EGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_EGREP"; then
+    as_fn_error $? "no acceptable egrep could be found in 
$PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_EGREP=$EGREP
+fi
+
+   fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_EGREP" >&5
+printf "%s\n" "$ac_cv_path_EGREP" >&6; }
+ EGREP="$ac_cv_path_EGREP"
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for fgrep" >&5
+printf %s "checking for fgrep... " >&6; }
+if test ${ac_cv_path_FGREP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if echo 'ab*c' | $GREP -F 'ab*c' >/dev/null 2>&1
+   then ac_cv_path_FGREP="$GREP -F"
+   else
+     if test -z "$FGREP"; then
+  ac_path_FGREP_found=false
+  # Loop through the user's path and test for each of PROGNAME-LIST
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH$PATH_SEPARATOR/usr/xpg4/bin
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_prog in fgrep
+   do
+    for ac_exec_ext in '' $ac_executable_extensions; do
+      ac_path_FGREP="$as_dir$ac_prog$ac_exec_ext"
+      as_fn_executable_p "$ac_path_FGREP" || continue
+# Check for GNU ac_path_FGREP and select it if it is found.
+  # Check for GNU $ac_path_FGREP
+case `"$ac_path_FGREP" --version 2>&1` in
+*GNU*)
+  ac_cv_path_FGREP="$ac_path_FGREP" ac_path_FGREP_found=:;;
+*)
+  ac_count=0
+  printf %s 0123456789 >"conftest.in"
+  while :
+  do
+    cat "conftest.in" "conftest.in" >"conftest.tmp"
+    mv "conftest.tmp" "conftest.in"
+    cp "conftest.in" "conftest.nl"
+    printf "%s\n" 'FGREP' >> "conftest.nl"
+    "$ac_path_FGREP" FGREP < "conftest.nl" >"conftest.out" 2>/dev/null || break
+    diff "conftest.out" "conftest.nl" >/dev/null 2>&1 || break
+    as_fn_arith $ac_count + 1 && ac_count=$as_val
+    if test $ac_count -gt ${ac_path_FGREP_max-0}; then
+      # Best one so far, save it but keep looking for a better one
+      ac_cv_path_FGREP="$ac_path_FGREP"
+      ac_path_FGREP_max=$ac_count
+    fi
+    # 10*(2^10) chars as input seems more than enough
+    test $ac_count -gt 10 && break
+  done
+  rm -f conftest.in conftest.tmp conftest.nl conftest.out;;
+esac
+
+      $ac_path_FGREP_found && break 3
+    done
+  done
+  done
+IFS=$as_save_IFS
+  if test -z "$ac_cv_path_FGREP"; then
+    as_fn_error $? "no acceptable fgrep could be found in 
$PATH$PATH_SEPARATOR/usr/xpg4/bin" "$LINENO" 5
+  fi
+else
+  ac_cv_path_FGREP=$FGREP
+fi
+
+   fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_path_FGREP" >&5
+printf "%s\n" "$ac_cv_path_FGREP" >&6; }
+ FGREP="$ac_cv_path_FGREP"
+
+
+test -z "$GREP" && GREP=grep
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Check whether --with-gnu-ld was given.
+if test ${with_gnu_ld+y}
+then :
+  withval=$with_gnu_ld; test "$withval" = no || with_gnu_ld=yes
+else $as_nop
+  with_gnu_ld=no
+fi
+
+ac_prog=ld
+if test "$GCC" = yes; then
+  # Check if gcc -print-prog-name=ld gives a path.
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for ld used by $CC" 
>&5
+printf %s "checking for ld used by $CC... " >&6; }
+  case $host in
+  *-*-mingw*)
+    # gcc leaves a trailing carriage return which upsets mingw
+    ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;;
+  *)
+    ac_prog=`($CC -print-prog-name=ld) 2>&5` ;;
+  esac
+  case $ac_prog in
+    # Accept absolute paths.
+    [\\/]* | ?:[\\/]*)
+      re_direlt='/[^/][^/]*/\.\./'
+      # Canonicalize the pathname of ld
+      ac_prog=`$ECHO "$ac_prog"| $SED 's%\\\\%/%g'`
+      while $ECHO "$ac_prog" | $GREP "$re_direlt" > /dev/null 2>&1; do
+       ac_prog=`$ECHO $ac_prog| $SED "s%$re_direlt%/%"`
+      done
+      test -z "$LD" && LD="$ac_prog"
+      ;;
+  "")
+    # If it fails, then pretend we aren't using GCC.
+    ac_prog=ld
+    ;;
+  *)
+    # If it is relative, then search for the first ld in PATH.
+    with_gnu_ld=unknown
+    ;;
+  esac
+elif test "$with_gnu_ld" = yes; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GNU ld" >&5
+printf %s "checking for GNU ld... " >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for non-GNU ld" >&5
+printf %s "checking for non-GNU ld... " >&6; }
+fi
+if test ${lt_cv_path_LD+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -z "$LD"; then
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  for ac_dir in $PATH; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then
+      lt_cv_path_LD="$ac_dir/$ac_prog"
+      # Check to see if the program is GNU ld.  I'd rather use --version,
+      # but apparently some variants of GNU ld only accept -v.
+      # Break only if it was the GNU/non-GNU ld that we prefer.
+      case `"$lt_cv_path_LD" -v 2>&1 </dev/null` in
+      *GNU* | *'with BFD'*)
+       test "$with_gnu_ld" != no && break
+       ;;
+      *)
+       test "$with_gnu_ld" != yes && break
+       ;;
+      esac
+    fi
+  done
+  IFS="$lt_save_ifs"
+else
+  lt_cv_path_LD="$LD" # Let the user override the test with a path.
+fi
+fi
+
+LD="$lt_cv_path_LD"
+if test -n "$LD"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LD" >&5
+printf "%s\n" "$LD" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+test -z "$LD" && as_fn_error $? "no acceptable ld found in \$PATH" "$LINENO" 5
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if the linker ($LD) is 
GNU ld" >&5
+printf %s "checking if the linker ($LD) is GNU ld... " >&6; }
+if test ${lt_cv_prog_gnu_ld+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  # I'd rather use --version here, but apparently some GNU lds only accept -v.
+case `$LD -v 2>&1 </dev/null` in
+*GNU* | *'with BFD'*)
+  lt_cv_prog_gnu_ld=yes
+  ;;
+*)
+  lt_cv_prog_gnu_ld=no
+  ;;
+esac
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_gnu_ld" >&5
+printf "%s\n" "$lt_cv_prog_gnu_ld" >&6; }
+with_gnu_ld=$lt_cv_prog_gnu_ld
+
+
+
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for BSD- or 
MS-compatible name lister (nm)" >&5
+printf %s "checking for BSD- or MS-compatible name lister (nm)... " >&6; }
+if test ${lt_cv_path_NM+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$NM"; then
+  # Let the user override the test.
+  lt_cv_path_NM="$NM"
+else
+  lt_nm_to_check="${ac_tool_prefix}nm"
+  if test -n "$ac_tool_prefix" && test "$build" = "$host"; then
+    lt_nm_to_check="$lt_nm_to_check nm"
+  fi
+  for lt_tmp_nm in $lt_nm_to_check; do
+    lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+    for ac_dir in $PATH /usr/ccs/bin/elf /usr/ccs/bin /usr/ucb /bin; do
+      IFS="$lt_save_ifs"
+      test -z "$ac_dir" && ac_dir=.
+      tmp_nm="$ac_dir/$lt_tmp_nm"
+      if test -f "$tmp_nm" || test -f "$tmp_nm$ac_exeext" ; then
+       # Check to see if the nm accepts a BSD-compat flag.
+       # Adding the `sed 1q' prevents false positives on HP-UX, which says:
+       #   nm: unknown option "B" ignored
+       # Tru64's nm complains that /dev/null is an invalid object file
+       case `"$tmp_nm" -B /dev/null 2>&1 | sed '1q'` in
+       */dev/null* | *'Invalid file or object type'*)
+         lt_cv_path_NM="$tmp_nm -B"
+         break
+         ;;
+       *)
+         case `"$tmp_nm" -p /dev/null 2>&1 | sed '1q'` in
+         */dev/null*)
+           lt_cv_path_NM="$tmp_nm -p"
+           break
+           ;;
+         *)
+           lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but
+           continue # so that we can try to find one that supports BSD flags
+           ;;
+         esac
+         ;;
+       esac
+      fi
+    done
+    IFS="$lt_save_ifs"
+  done
+  : ${lt_cv_path_NM=no}
+fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_path_NM" >&5
+printf "%s\n" "$lt_cv_path_NM" >&6; }
+if test "$lt_cv_path_NM" != "no"; then
+  NM="$lt_cv_path_NM"
+else
+  # Didn't find any BSD compatible name lister, look for dumpbin.
+  if test -n "$DUMPBIN"; then :
+    # Let the user override the test.
+  else
+    if test -n "$ac_tool_prefix"; then
+  for ac_prog in dumpbin "link -dump"
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a 
program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_DUMPBIN+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$DUMPBIN"; then
+  ac_cv_prog_DUMPBIN="$DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DUMPBIN="$ac_tool_prefix$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DUMPBIN=$ac_cv_prog_DUMPBIN
+if test -n "$DUMPBIN"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DUMPBIN" >&5
+printf "%s\n" "$DUMPBIN" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+    test -n "$DUMPBIN" && break
+  done
+fi
+if test -z "$DUMPBIN"; then
+  ac_ct_DUMPBIN=$DUMPBIN
+  for ac_prog in dumpbin "link -dump"
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with 
args.
+set dummy $ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_DUMPBIN+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_DUMPBIN"; then
+  ac_cv_prog_ac_ct_DUMPBIN="$ac_ct_DUMPBIN" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DUMPBIN="$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DUMPBIN=$ac_cv_prog_ac_ct_DUMPBIN
+if test -n "$ac_ct_DUMPBIN"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DUMPBIN" >&5
+printf "%s\n" "$ac_ct_DUMPBIN" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_DUMPBIN" && break
+done
+
+  if test "x$ac_ct_DUMPBIN" = x; then
+    DUMPBIN=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DUMPBIN=$ac_ct_DUMPBIN
+  fi
+fi
+
+    case `$DUMPBIN -symbols /dev/null 2>&1 | sed '1q'` in
+    *COFF*)
+      DUMPBIN="$DUMPBIN -symbols"
+      ;;
+    *)
+      DUMPBIN=:
+      ;;
+    esac
+  fi
+
+  if test "$DUMPBIN" != ":"; then
+    NM="$DUMPBIN"
+  fi
+fi
+test -z "$NM" && NM=nm
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the name lister ($NM) 
interface" >&5
+printf %s "checking the name lister ($NM) interface... " >&6; }
+if test ${lt_cv_nm_interface+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_nm_interface="BSD nm"
+  echo "int some_variable = 0;" > conftest.$ac_ext
+  (eval echo "\"\$as_me:$LINENO: $ac_compile\"" >&5)
+  (eval "$ac_compile" 2>conftest.err)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
+  (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
+  cat conftest.err >&5
+  (eval echo "\"\$as_me:$LINENO: output\"" >&5)
+  cat conftest.out >&5
+  if $GREP 'External.*some_variable' conftest.out > /dev/null; then
+    lt_cv_nm_interface="MS dumpbin"
+  fi
+  rm -f conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_nm_interface" >&5
+printf "%s\n" "$lt_cv_nm_interface" >&6; }
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ln -s works" >&5
+printf %s "checking whether ln -s works... " >&6; }
+LN_S=$as_ln_s
+if test "$LN_S" = "ln -s"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no, using $LN_S" >&5
+printf "%s\n" "no, using $LN_S" >&6; }
+fi
+
+# find the maximum length of command line arguments
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking the maximum length of 
command line arguments" >&5
+printf %s "checking the maximum length of command line arguments... " >&6; }
+if test ${lt_cv_sys_max_cmd_len+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+    i=0
+  teststring="ABCD"
+
+  case $build_os in
+  msdosdjgpp*)
+    # On DJGPP, this test can blow up pretty badly due to problems in libc
+    # (any single argument exceeding 2000 bytes causes a buffer overrun
+    # during glob expansion).  Even if it were fixed, the result of this
+    # check would be larger than it should be.
+    lt_cv_sys_max_cmd_len=12288;    # 12K is about right
+    ;;
+
+  gnu*)
+    # Under GNU Hurd, this test is not required because there is
+    # no limit to the length of command line arguments.
+    # Libtool will interpret -1 as no limit whatsoever
+    lt_cv_sys_max_cmd_len=-1;
+    ;;
+
+  cygwin* | mingw* | cegcc*)
+    # On Win9x/ME, this test blows up -- it succeeds, but takes
+    # about 5 minutes as the teststring grows exponentially.
+    # Worse, since 9x/ME are not pre-emptively multitasking,
+    # you end up with a "frozen" computer, even though with patience
+    # the test eventually succeeds (with a max line length of 256k).
+    # Instead, let's just punt: use the minimum linelength reported by
+    # all of the supported platforms: 8192 (on NT/2K/XP).
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  mint*)
+    # On MiNT this can take a long time and run out of memory.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  amigaos*)
+    # On AmigaOS with pdksh, this test takes hours, literally.
+    # So we just punt and use a minimum line length of 8192.
+    lt_cv_sys_max_cmd_len=8192;
+    ;;
+
+  netbsd* | freebsd* | openbsd* | darwin* | dragonfly*)
+    # This has been around since 386BSD, at least.  Likely further.
+    if test -x /sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/sbin/sysctl -n kern.argmax`
+    elif test -x /usr/sbin/sysctl; then
+      lt_cv_sys_max_cmd_len=`/usr/sbin/sysctl -n kern.argmax`
+    else
+      lt_cv_sys_max_cmd_len=65536      # usable default for all BSDs
+    fi
+    # And add a safety zone
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+    lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    ;;
+
+  interix*)
+    # We know the value 262144 and hardcode it with a safety zone (like BSD)
+    lt_cv_sys_max_cmd_len=196608
+    ;;
+
+  os2*)
+    # The test takes a long time on OS/2.
+    lt_cv_sys_max_cmd_len=8192
+    ;;
+
+  osf*)
+    # Dr. Hans Ekkehard Plesser reports seeing a kernel panic running configure
+    # due to this test when exec_disable_arg_limit is 1 on Tru64. It is not
+    # nice to cause kernel panics so lets avoid the loop below.
+    # First set a reasonable default.
+    lt_cv_sys_max_cmd_len=16384
+    #
+    if test -x /sbin/sysconfig; then
+      case `/sbin/sysconfig -q proc exec_disable_arg_limit` in
+        *1*) lt_cv_sys_max_cmd_len=-1 ;;
+      esac
+    fi
+    ;;
+  sco3.2v5*)
+    lt_cv_sys_max_cmd_len=102400
+    ;;
+  sysv5* | sco5v6* | sysv4.2uw2*)
+    kargmax=`grep ARG_MAX /etc/conf/cf.d/stune 2>/dev/null`
+    if test -n "$kargmax"; then
+      lt_cv_sys_max_cmd_len=`echo $kargmax | sed 's/.*[         ]//'`
+    else
+      lt_cv_sys_max_cmd_len=32768
+    fi
+    ;;
+  *)
+    lt_cv_sys_max_cmd_len=`(getconf ARG_MAX) 2> /dev/null`
+    if test -n "$lt_cv_sys_max_cmd_len"; then
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 4`
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \* 3`
+    else
+      # Make teststring a little bigger before we do anything with it.
+      # a 1K string should be a reasonable start.
+      for i in 1 2 3 4 5 6 7 8 ; do
+        teststring=$teststring$teststring
+      done
+      SHELL=${SHELL-${CONFIG_SHELL-/bin/sh}}
+      # If test is not a shell built-in, we'll probably end up computing a
+      # maximum length that is only half of the actual maximum length, but
+      # we can't tell.
+      while { test "X"`env echo "$teststring$teststring" 2>/dev/null` \
+                = "X$teststring$teststring"; } >/dev/null 2>&1 &&
+             test $i != 17 # 1/2 MB should be enough
+      do
+        i=`expr $i + 1`
+        teststring=$teststring$teststring
+      done
+      # Only check the string length outside the loop.
+      lt_cv_sys_max_cmd_len=`expr "X$teststring" : ".*" 2>&1`
+      teststring=
+      # Add a significant safety factor because C++ compilers can tack on
+      # massive amounts of additional arguments before passing them to the
+      # linker.  It appears as though 1/2 is a usable value.
+      lt_cv_sys_max_cmd_len=`expr $lt_cv_sys_max_cmd_len \/ 2`
+    fi
+    ;;
+  esac
+
+fi
+
+if test -n $lt_cv_sys_max_cmd_len ; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_sys_max_cmd_len" >&5
+printf "%s\n" "$lt_cv_sys_max_cmd_len" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none" >&5
+printf "%s\n" "none" >&6; }
+fi
+max_cmd_len=$lt_cv_sys_max_cmd_len
+
+
+
+
+
+
+: ${CP="cp -f"}
+: ${MV="mv -f"}
+: ${RM="rm -f"}
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the shell 
understands some XSI constructs" >&5
+printf %s "checking whether the shell understands some XSI constructs... " 
>&6; }
+# Try some XSI features
+xsi_shell=no
+( _lt_dummy="a/b/c"
+  test 
"${_lt_dummy##*/},${_lt_dummy%/*},${_lt_dummy#??}"${_lt_dummy%"$_lt_dummy"}, \
+      = c,a/b,b/c, \
+    && eval 'test $(( 1 + 1 )) -eq 2 \
+    && test "${#_lt_dummy}" -eq 5' ) >/dev/null 2>&1 \
+  && xsi_shell=yes
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $xsi_shell" >&5
+printf "%s\n" "$xsi_shell" >&6; }
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the shell 
understands \"+=\"" >&5
+printf %s "checking whether the shell understands \"+=\"... " >&6; }
+lt_shell_append=no
+( foo=bar; set foo baz; eval "$1+=\$2" && test "$foo" = barbaz ) \
+    >/dev/null 2>&1 \
+  && lt_shell_append=yes
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_shell_append" >&5
+printf "%s\n" "$lt_shell_append" >&6; }
+
+
+if ( (MAIL=60; unset MAIL) || exit) >/dev/null 2>&1; then
+  lt_unset=unset
+else
+  lt_unset=false
+fi
+
+
+
+
+
+# test EBCDIC or ASCII
+case `echo X|tr X '\101'` in
+ A) # ASCII based system
+    # \n is not interpreted correctly by Solaris 8 /usr/ucb/tr
+  lt_SP2NL='tr \040 \012'
+  lt_NL2SP='tr \015\012 \040\040'
+  ;;
+ *) # EBCDIC based system
+  lt_SP2NL='tr \100 \n'
+  lt_NL2SP='tr \r\n \100\100'
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build 
file names to $host format" >&5
+printf %s "checking how to convert $build file names to $host format... " >&6; 
}
+if test ${lt_cv_to_host_file_cmd+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_w32
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_cygwin_to_w32
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_w32
+        ;;
+    esac
+    ;;
+  *-*-cygwin* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_host_file_cmd=func_convert_file_msys_to_cygwin
+        ;;
+      *-*-cygwin* )
+        lt_cv_to_host_file_cmd=func_convert_file_noop
+        ;;
+      * ) # otherwise, assume *nix
+        lt_cv_to_host_file_cmd=func_convert_file_nix_to_cygwin
+        ;;
+    esac
+    ;;
+  * ) # unhandled hosts (and "normal" native builds)
+    lt_cv_to_host_file_cmd=func_convert_file_noop
+    ;;
+esac
+
+fi
+
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_host_file_cmd" 
>&5
+printf "%s\n" "$lt_cv_to_host_file_cmd" >&6; }
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to convert $build 
file names to toolchain format" >&5
+printf %s "checking how to convert $build file names to toolchain format... " 
>&6; }
+if test ${lt_cv_to_tool_file_cmd+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  #assume ordinary cross tools, or native build.
+lt_cv_to_tool_file_cmd=func_convert_file_noop
+case $host in
+  *-*-mingw* )
+    case $build in
+      *-*-mingw* ) # actually msys
+        lt_cv_to_tool_file_cmd=func_convert_file_msys_to_w32
+        ;;
+    esac
+    ;;
+esac
+
+fi
+
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_to_tool_file_cmd" 
>&5
+printf "%s\n" "$lt_cv_to_tool_file_cmd" >&6; }
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $LD option to 
reload object files" >&5
+printf %s "checking for $LD option to reload object files... " >&6; }
+if test ${lt_cv_ld_reload_flag+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_ld_reload_flag='-r'
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_reload_flag" 
>&5
+printf "%s\n" "$lt_cv_ld_reload_flag" >&6; }
+reload_flag=$lt_cv_ld_reload_flag
+case $reload_flag in
+"" | " "*) ;;
+*) reload_flag=" $reload_flag" ;;
+esac
+reload_cmds='$LD$reload_flag -o $output$reload_objs'
+case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    if test "$GCC" != yes; then
+      reload_cmds=false
+    fi
+    ;;
+  darwin*)
+    if test "$GCC" = yes; then
+      reload_cmds='$LTCC $LTCFLAGS -nostdlib ${wl}-r -o $output$reload_objs'
+    else
+      reload_cmds='$LD$reload_flag -o $output$reload_objs'
+    fi
+    ;;
+esac
+
+
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}objdump", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}objdump; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_OBJDUMP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OBJDUMP=$ac_cv_prog_OBJDUMP
+if test -n "$OBJDUMP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5
+printf "%s\n" "$OBJDUMP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OBJDUMP"; then
+  ac_ct_OBJDUMP=$OBJDUMP
+  # Extract the first word of "objdump", so it can be a program name with args.
+set dummy objdump; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_OBJDUMP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_OBJDUMP"; then
+  ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OBJDUMP="objdump"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP
+if test -n "$ac_ct_OBJDUMP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5
+printf "%s\n" "$ac_ct_OBJDUMP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_OBJDUMP" = x; then
+    OBJDUMP="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OBJDUMP=$ac_ct_OBJDUMP
+  fi
+else
+  OBJDUMP="$ac_cv_prog_OBJDUMP"
+fi
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to recognize 
dependent libraries" >&5
+printf %s "checking how to recognize dependent libraries... " >&6; }
+if test ${lt_cv_deplibs_check_method+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_file_magic_cmd='$MAGIC_CMD'
+lt_cv_file_magic_test_file=
+lt_cv_deplibs_check_method='unknown'
+# Need to set the preceding variable on all platforms that support
+# interlibrary dependencies.
+# 'none' -- dependencies not supported.
+# `unknown' -- same as none, but documents that we really don't know.
+# 'pass_all' -- all dependencies passed with no checks.
+# 'test_compile' -- check by making test program.
+# 'file_magic [[regex]]' -- check by looking for files in library path
+# which responds to the $file_magic_cmd with a given extended regex.
+# If you have `file' or equivalent on your system and you're not sure
+# whether `pass_all' will *always* work, you probably want this one.
+
+case $host_os in
+aix[4-9]*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+beos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+bsdi[45]*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared 
object|dynamic lib)'
+  lt_cv_file_magic_cmd='/usr/bin/file -L'
+  lt_cv_file_magic_test_file=/shlib/libc.so
+  ;;
+
+cygwin*)
+  # func_win32_libid is a shell function defined in ltmain.sh
+  lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+  lt_cv_file_magic_cmd='func_win32_libid'
+  ;;
+
+mingw* | pw32*)
+  # Base MSYS/MinGW do not provide the 'file' command needed by
+  # func_win32_libid shell function, so use a weaker test based on 'objdump',
+  # unless we find 'file', for example because we are cross-compiling.
+  # func_win32_libid assumes BSD nm, so disallow it if using MS dumpbin.
+  if ( test "$lt_cv_nm_interface" = "BSD nm" && file / ) >/dev/null 2>&1; then
+    lt_cv_deplibs_check_method='file_magic ^x86 archive import|^x86 DLL'
+    lt_cv_file_magic_cmd='func_win32_libid'
+  else
+    # Keep this pattern in sync with the one in func_win32_libid.
+    lt_cv_deplibs_check_method='file_magic file format 
(pei*-i386(.*architecture: i386)?|pe-arm-wince|pe-x86-64)'
+    lt_cv_file_magic_cmd='$OBJDUMP -f'
+  fi
+  ;;
+
+cegcc*)
+  # use the weaker test based on 'objdump'. See mingw*.
+  lt_cv_deplibs_check_method='file_magic file format 
pe-arm-.*little(.*architecture: arm)?'
+  lt_cv_file_magic_cmd='$OBJDUMP -f'
+  ;;
+
+darwin* | rhapsody*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+freebsd* | dragonfly*)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    case $host_cpu in
+    i*86 )
+      # Not sure whether the presence of OpenBSD here was a mistake.
+      # Let's accept both of them until this is cleared up.
+      lt_cv_deplibs_check_method='file_magic 
(FreeBSD|OpenBSD|DragonFly)/i[3-9]86 (compact )?demand paged shared library'
+      lt_cv_file_magic_cmd=/usr/bin/file
+      lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*`
+      ;;
+    esac
+  else
+    lt_cv_deplibs_check_method=pass_all
+  fi
+  ;;
+
+gnu*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+haiku*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+hpux10.20* | hpux11*)
+  lt_cv_file_magic_cmd=/usr/bin/file
+  case $host_cpu in
+  ia64*)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF-[0-9][0-9]) 
shared object file - IA64'
+    lt_cv_file_magic_test_file=/usr/lib/hpux32/libc.so
+    ;;
+  hppa*64*)
+    lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|ELF[ 
-][0-9][0-9])(-bit)?( [LM]SB)? shared object( file)?[, -]* PA-RISC [0-9]\.[0-9]'
+    lt_cv_file_magic_test_file=/usr/lib/pa20_64/libc.sl
+    ;;
+  *)
+    lt_cv_deplibs_check_method='file_magic 
(s[0-9][0-9][0-9]|PA-RISC[0-9]\.[0-9]) shared library'
+    lt_cv_file_magic_test_file=/usr/lib/libc.sl
+    ;;
+  esac
+  ;;
+
+interix[3-9]*)
+  # PIC code is broken on Interix 3.x, that's why |\.a not |_pic\.a here
+  lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|\.a)$'
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $LD in
+  *-32|*"-32 ") libmagic=32-bit;;
+  *-n32|*"-n32 ") libmagic=N32;;
+  *-64|*"-64 ") libmagic=64-bit;;
+  *) libmagic=never-match;;
+  esac
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+netbsd* | netbsdelf*-gnu)
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ > /dev/null; then
+    lt_cv_deplibs_check_method='match_pattern 
/lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern /lib[^/]+(\.so|_pic\.a)$'
+  fi
+  ;;
+
+newos6*)
+  lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB 
(executable|dynamic lib)'
+  lt_cv_file_magic_cmd=/usr/bin/file
+  lt_cv_file_magic_test_file=/usr/lib/libnls.so
+  ;;
+
+*nto* | *qnx*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+openbsd*)
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test 
"$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    lt_cv_deplibs_check_method='match_pattern 
/lib[^/]+(\.so\.[0-9]+\.[0-9]+|\.so|_pic\.a)$'
+  else
+    lt_cv_deplibs_check_method='match_pattern 
/lib[^/]+(\.so\.[0-9]+\.[0-9]+|_pic\.a)$'
+  fi
+  ;;
+
+osf3* | osf4* | osf5*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+rdos*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+solaris*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+
+sysv4 | sysv4.3*)
+  case $host_vendor in
+  motorola)
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared 
object|dynamic lib) M[0-9][0-9]* Version [0-9]'
+    lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*`
+    ;;
+  ncr)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  sequent)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared 
object|dynamic lib )'
+    ;;
+  sni)
+    lt_cv_file_magic_cmd='/bin/file'
+    lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic 
lib"
+    lt_cv_file_magic_test_file=/lib/libc.so
+    ;;
+  siemens)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  pc)
+    lt_cv_deplibs_check_method=pass_all
+    ;;
+  esac
+  ;;
+
+tpf*)
+  lt_cv_deplibs_check_method=pass_all
+  ;;
+esac
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_deplibs_check_method" >&5
+printf "%s\n" "$lt_cv_deplibs_check_method" >&6; }
+
+file_magic_glob=
+want_nocaseglob=no
+if test "$build" = "$host"; then
+  case $host_os in
+  mingw* | pw32*)
+    if ( shopt | grep nocaseglob ) >/dev/null 2>&1; then
+      want_nocaseglob=yes
+    else
+      file_magic_glob=`echo 
aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ | $SED -e 
"s/\(..\)/s\/[\1]\/[\1]\/g;/g"`
+    fi
+    ;;
+  esac
+fi
+
+file_magic_cmd=$lt_cv_file_magic_cmd
+deplibs_check_method=$lt_cv_deplibs_check_method
+test -z "$deplibs_check_method" && deplibs_check_method=unknown
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}dlltool; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_DLLTOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DLLTOOL=$ac_cv_prog_DLLTOOL
+if test -n "$DLLTOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5
+printf "%s\n" "$DLLTOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DLLTOOL"; then
+  ac_ct_DLLTOOL=$DLLTOOL
+  # Extract the first word of "dlltool", so it can be a program name with args.
+set dummy dlltool; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_DLLTOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_DLLTOOL"; then
+  ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DLLTOOL="dlltool"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL
+if test -n "$ac_ct_DLLTOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5
+printf "%s\n" "$ac_ct_DLLTOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_DLLTOOL" = x; then
+    DLLTOOL="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DLLTOOL=$ac_ct_DLLTOOL
+  fi
+else
+  DLLTOOL="$ac_cv_prog_DLLTOOL"
+fi
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to associate 
runtime and link libraries" >&5
+printf %s "checking how to associate runtime and link libraries... " >&6; }
+if test ${lt_cv_sharedlib_from_linklib_cmd+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_sharedlib_from_linklib_cmd='unknown'
+
+case $host_os in
+cygwin* | mingw* | pw32* | cegcc*)
+  # two different shell functions defined in ltmain.sh
+  # decide which to use based on capabilities of $DLLTOOL
+  case `$DLLTOOL --help 2>&1` in
+  *--identify-strict*)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib
+    ;;
+  *)
+    lt_cv_sharedlib_from_linklib_cmd=func_cygming_dll_for_implib_fallback
+    ;;
+  esac
+  ;;
+*)
+  # fallback: assume linklib IS sharedlib
+  lt_cv_sharedlib_from_linklib_cmd="$ECHO"
+  ;;
+esac
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_sharedlib_from_linklib_cmd" >&5
+printf "%s\n" "$lt_cv_sharedlib_from_linklib_cmd" >&6; }
+sharedlib_from_linklib_cmd=$lt_cv_sharedlib_from_linklib_cmd
+test -z "$sharedlib_from_linklib_cmd" && sharedlib_from_linklib_cmd=$ECHO
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  for ac_prog in ar
+  do
+    # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a 
program name with args.
+set dummy $ac_tool_prefix$ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_AR+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$AR"; then
+  ac_cv_prog_AR="$AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AR="$ac_tool_prefix$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AR=$ac_cv_prog_AR
+if test -n "$AR"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AR" >&5
+printf "%s\n" "$AR" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+    test -n "$AR" && break
+  done
+fi
+if test -z "$AR"; then
+  ac_ct_AR=$AR
+  for ac_prog in ar
+do
+  # Extract the first word of "$ac_prog", so it can be a program name with 
args.
+set dummy $ac_prog; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_AR+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_AR"; then
+  ac_cv_prog_ac_ct_AR="$ac_ct_AR" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_AR="$ac_prog"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AR=$ac_cv_prog_ac_ct_AR
+if test -n "$ac_ct_AR"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AR" >&5
+printf "%s\n" "$ac_ct_AR" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  test -n "$ac_ct_AR" && break
+done
+
+  if test "x$ac_ct_AR" = x; then
+    AR="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AR=$ac_ct_AR
+  fi
+fi
+
+: ${AR=ar}
+: ${AR_FLAGS=cru}
+
+
+
+
+
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for archiver @FILE 
support" >&5
+printf %s "checking for archiver @FILE support... " >&6; }
+if test ${lt_cv_ar_at_file+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_ar_at_file=no
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  echo conftest.$ac_objext > conftest.lst
+      lt_ar_try='$AR $AR_FLAGS libconftest.a @conftest.lst >&5'
+      { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
+  (eval $lt_ar_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+      if test "$ac_status" -eq 0; then
+       # Ensure the archiver fails upon bogus file names.
+       rm -f conftest.$ac_objext libconftest.a
+       { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$lt_ar_try\""; } >&5
+  (eval $lt_ar_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+       if test "$ac_status" -ne 0; then
+          lt_cv_ar_at_file=@
+        fi
+      fi
+      rm -f conftest.* libconftest.a
+
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ar_at_file" >&5
+printf "%s\n" "$lt_cv_ar_at_file" >&6; }
+
+if test "x$lt_cv_ar_at_file" = xno; then
+  archiver_list_spec=
+else
+  archiver_list_spec=$lt_cv_ar_at_file
+fi
+
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}strip", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}strip; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_STRIP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$STRIP"; then
+  ac_cv_prog_STRIP="$STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_STRIP="${ac_tool_prefix}strip"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+STRIP=$ac_cv_prog_STRIP
+if test -n "$STRIP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $STRIP" >&5
+printf "%s\n" "$STRIP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_STRIP"; then
+  ac_ct_STRIP=$STRIP
+  # Extract the first word of "strip", so it can be a program name with args.
+set dummy strip; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_STRIP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_STRIP"; then
+  ac_cv_prog_ac_ct_STRIP="$ac_ct_STRIP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_STRIP="strip"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_STRIP=$ac_cv_prog_ac_ct_STRIP
+if test -n "$ac_ct_STRIP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_STRIP" >&5
+printf "%s\n" "$ac_ct_STRIP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_STRIP" = x; then
+    STRIP=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    STRIP=$ac_ct_STRIP
+  fi
+else
+  STRIP="$ac_cv_prog_STRIP"
+fi
+
+test -z "$STRIP" && STRIP=:
+
+
+
+
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}ranlib; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_RANLIB+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$RANLIB"; then
+  ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_RANLIB="${ac_tool_prefix}ranlib"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RANLIB=$ac_cv_prog_RANLIB
+if test -n "$RANLIB"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RANLIB" >&5
+printf "%s\n" "$RANLIB" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RANLIB"; then
+  ac_ct_RANLIB=$RANLIB
+  # Extract the first word of "ranlib", so it can be a program name with args.
+set dummy ranlib; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_RANLIB+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_RANLIB"; then
+  ac_cv_prog_ac_ct_RANLIB="$ac_ct_RANLIB" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_RANLIB="ranlib"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RANLIB=$ac_cv_prog_ac_ct_RANLIB
+if test -n "$ac_ct_RANLIB"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RANLIB" >&5
+printf "%s\n" "$ac_ct_RANLIB" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_RANLIB" = x; then
+    RANLIB=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RANLIB=$ac_ct_RANLIB
+  fi
+else
+  RANLIB="$ac_cv_prog_RANLIB"
+fi
+
+test -z "$RANLIB" && RANLIB=:
+
+
+
+
+
+
+# Determine commands to create old-style static archives.
+old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs'
+old_postinstall_cmds='chmod 644 $oldlib'
+old_postuninstall_cmds=
+
+if test -n "$RANLIB"; then
+  case $host_os in
+  openbsd*)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB -t \$tool_oldlib"
+    ;;
+  *)
+    old_postinstall_cmds="$old_postinstall_cmds~\$RANLIB \$tool_oldlib"
+    ;;
+  esac
+  old_archive_cmds="$old_archive_cmds~\$RANLIB \$tool_oldlib"
+fi
+
+case $host_os in
+  darwin*)
+    lock_old_archive_extraction=yes ;;
+  *)
+    lock_old_archive_extraction=no ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+
+# Check for command to grab the raw symbol name followed by C symbol from nm.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking command to parse $NM 
output from $compiler object" >&5
+printf %s "checking command to parse $NM output from $compiler object... " 
>&6; }
+if test ${lt_cv_sys_global_symbol_pipe+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+
+# These are sane defaults that work on at least a few old systems.
+# [They come from Ultrix.  What could be older than Ultrix?!! ;)]
+
+# Character class describing NM global symbol codes.
+symcode='[BCDEGRST]'
+
+# Regexp to match symbols that can be accessed directly from C.
+sympat='\([_A-Za-z][_A-Za-z0-9]*\)'
+
+# Define system-specific variables.
+case $host_os in
+aix*)
+  symcode='[BCDT]'
+  ;;
+cygwin* | mingw* | pw32* | cegcc*)
+  symcode='[ABCDGISTW]'
+  ;;
+hpux*)
+  if test "$host_cpu" = ia64; then
+    symcode='[ABCDEGRST]'
+  fi
+  ;;
+irix* | nonstopux*)
+  symcode='[BCDEGRST]'
+  ;;
+osf*)
+  symcode='[BCDEGQRST]'
+  ;;
+solaris*)
+  symcode='[BDRT]'
+  ;;
+sco3.2v5*)
+  symcode='[DT]'
+  ;;
+sysv4.2uw2*)
+  symcode='[DT]'
+  ;;
+sysv5* | sco5v6* | unixware* | OpenUNIX*)
+  symcode='[ABDT]'
+  ;;
+sysv4)
+  symcode='[DFNSTU]'
+  ;;
+esac
+
+# If we're using GNU nm, then use its standard symbol codes.
+case `$NM -V 2>&1` in
+*GNU* | *'with BFD'*)
+  symcode='[ABCDGIRSTW]' ;;
+esac
+
+# Transform an extracted symbol line into a proper C declaration.
+# Some systems (esp. on ia64) link data and code symbols differently,
+# so use this general approach.
+lt_cv_sys_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern int 
\1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'"
+
+# Transform an extracted symbol line into symbol name and symbol address
+lt_cv_sys_global_symbol_to_c_name_address="sed -n -e 's/^: \([^ ]*\)[ ]*$/  
{\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  {\"\2\", 
(void *) \&\2},/p'"
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix="sed -n -e 's/^: \([^ 
]*\)[ ]*$/  {\\\"\1\\\", (void *) 0},/p' -e 's/^$symcode* \([^ ]*\) \(lib[^ 
]*\)$/  {\"\2\", (void *) \&\2},/p' -e 's/^$symcode* \([^ ]*\) \([^ ]*\)$/  
{\"lib\2\", (void *) \&\2},/p'"
+
+# Handle CRLF in mingw tool chain
+opt_cr=
+case $build_os in
+mingw*)
+  opt_cr=`$ECHO 'x\{0,1\}' | tr x '\015'` # option cr in regexp
+  ;;
+esac
+
+# Try without a prefix underscore, then with it.
+for ac_symprfx in "" "_"; do
+
+  # Transform symcode, sympat, and symprfx into a raw symbol and a C symbol.
+  symxfrm="\\1 $ac_symprfx\\2 \\2"
+
+  # Write the raw and C identifiers.
+  if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+    # Fake it for dumpbin and say T for any non-static function
+    # and D for any global variable.
+    # Also find C++ and __fastcall symbols from MSVC++,
+    # which start with @ or ?.
+    lt_cv_sys_global_symbol_pipe="$AWK '"\
+"     {last_section=section; section=\$ 3};"\
+"     /^COFF SYMBOL TABLE/{for(i in hide) delete hide[i]};"\
+"     /Section length .*#relocs.*(pick any)/{hide[last_section]=1};"\
+"     \$ 0!~/External *\|/{next};"\
+"     / 0+ UNDEF /{next}; / UNDEF \([^|]\)*()/{next};"\
+"     {if(hide[section]) next};"\
+"     {f=0}; \$ 0~/\(\).*\|/{f=1}; {printf f ? \"T \" : \"D \"};"\
+"     {split(\$ 0, a, /\||\r/); split(a[2], s)};"\
+"     s[1]~/^[@?]/{print s[1], s[1]; next};"\
+"     s[1]~prfx {split(s[1],t,\"@\"); print t[1], substr(t[1],length(prfx))}"\
+"     ' prfx=^$ac_symprfx"
+  else
+    lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[     
]\($symcode$symcode*\)[         ][      
]*$ac_symprfx$sympat$opt_cr$/$symxfrm/p'"
+  fi
+  lt_cv_sys_global_symbol_pipe="$lt_cv_sys_global_symbol_pipe | sed '/ 
__gnu_lto/d'"
+
+  # Check to see that the pipe works correctly.
+  pipe_works=no
+
+  rm -f conftest*
+  cat > conftest.$ac_ext <<_LT_EOF
+#ifdef __cplusplus
+extern "C" {
+#endif
+char nm_test_var;
+void nm_test_func(void);
+void nm_test_func(void){}
+#ifdef __cplusplus
+}
+#endif
+int main(){nm_test_var='a';nm_test_func();return(0);}
+_LT_EOF
+
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    # Now try to grab the symbols.
+    nlist=conftest.nm
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM 
conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist\""; } >&5
+  (eval $NM conftest.$ac_objext \| "$lt_cv_sys_global_symbol_pipe" \> $nlist) 
2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s "$nlist"; then
+      # Try sorting and uniquifying the output.
+      if sort "$nlist" | uniq > "$nlist"T; then
+       mv -f "$nlist"T "$nlist"
+      else
+       rm -f "$nlist"T
+      fi
+
+      # Make sure that we snagged all the symbols we need.
+      if $GREP ' nm_test_var$' "$nlist" >/dev/null; then
+       if $GREP ' nm_test_func$' "$nlist" >/dev/null; then
+         cat <<_LT_EOF > conftest.$ac_ext
+/* Keep this code in sync between libtool.m4, ltmain, lt_system.h, and tests.  
*/
+#if defined(_WIN32) || defined(__CYGWIN__) || defined(_WIN32_WCE)
+/* DATA imports from DLLs on WIN32 con't be const, because runtime
+   relocations are performed -- see ld's documentation on pseudo-relocs.  */
+# define LT_DLSYM_CONST
+#elif defined(__osf__)
+/* This system does not cope well with relocations in const data.  */
+# define LT_DLSYM_CONST
+#else
+# define LT_DLSYM_CONST const
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+_LT_EOF
+         # Now generate the symbol file.
+         eval "$lt_cv_sys_global_symbol_to_cdecl"' < "$nlist" | $GREP -v main 
>> conftest.$ac_ext'
+
+         cat <<_LT_EOF >> conftest.$ac_ext
+
+/* The mapping between symbol names and symbols.  */
+LT_DLSYM_CONST struct {
+  const char *name;
+  void       *address;
+}
+lt__PROGRAM__LTX_preloaded_symbols[] =
+{
+  { "@PROGRAM@", (void *) 0 },
+_LT_EOF
+         $SED "s/^$symcode$symcode* \(.*\) \(.*\)$/  {\"\2\", (void *) 
\&\2},/" < "$nlist" | $GREP -v main >> conftest.$ac_ext
+         cat <<\_LT_EOF >> conftest.$ac_ext
+  {0, (void *) 0}
+};
+
+/* This works around a problem in FreeBSD linker */
+#ifdef FREEBSD_WORKAROUND
+static const void *lt_preloaded_setup() {
+  return lt__PROGRAM__LTX_preloaded_symbols;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+_LT_EOF
+         # Now try linking the two files.
+         mv conftest.$ac_objext conftstm.$ac_objext
+         lt_globsym_save_LIBS=$LIBS
+         lt_globsym_save_CFLAGS=$CFLAGS
+         LIBS="conftstm.$ac_objext"
+         CFLAGS="$CFLAGS$lt_prog_compiler_no_builtin_flag"
+         if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } 
>&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext}; then
+           pipe_works=yes
+         fi
+         LIBS=$lt_globsym_save_LIBS
+         CFLAGS=$lt_globsym_save_CFLAGS
+       else
+         echo "cannot find nm_test_func in $nlist" >&5
+       fi
+      else
+       echo "cannot find nm_test_var in $nlist" >&5
+      fi
+    else
+      echo "cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "$progname: failed program was:" >&5
+    cat conftest.$ac_ext >&5
+  fi
+  rm -rf conftest* conftst*
+
+  # Do not use the global_symbol_pipe unless it works.
+  if test "$pipe_works" = yes; then
+    break
+  else
+    lt_cv_sys_global_symbol_pipe=
+  fi
+done
+
+fi
+
+if test -z "$lt_cv_sys_global_symbol_pipe"; then
+  lt_cv_sys_global_symbol_to_cdecl=
+fi
+if test -z "$lt_cv_sys_global_symbol_pipe$lt_cv_sys_global_symbol_to_cdecl"; 
then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: failed" >&5
+printf "%s\n" "failed" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ok" >&5
+printf "%s\n" "ok" >&6; }
+fi
+
+# Response file support.
+if test "$lt_cv_nm_interface" = "MS dumpbin"; then
+  nm_file_list_spec='@'
+elif $NM --help 2>/dev/null | grep '[@]FILE' >/dev/null; then
+  nm_file_list_spec='@'
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for sysroot" >&5
+printf %s "checking for sysroot... " >&6; }
+
+# Check whether --with-sysroot was given.
+if test ${with_sysroot+y}
+then :
+  withval=$with_sysroot;
+else $as_nop
+  with_sysroot=no
+fi
+
+
+lt_sysroot=
+case ${with_sysroot} in #(
+ yes)
+   if test "$GCC" = yes; then
+     lt_sysroot=`$CC --print-sysroot 2>/dev/null`
+   fi
+   ;; #(
+ /*)
+   lt_sysroot=`echo "$with_sysroot" | sed -e "$sed_quote_subst"`
+   ;; #(
+ no|'')
+   ;; #(
+ *)
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${with_sysroot}" >&5
+printf "%s\n" "${with_sysroot}" >&6; }
+   as_fn_error $? "The sysroot must be an absolute path." "$LINENO" 5
+   ;;
+esac
+
+ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${lt_sysroot:-no}" >&5
+printf "%s\n" "${lt_sysroot:-no}" >&6; }
+
+
+
+
+
+# Check whether --enable-libtool-lock was given.
+if test ${enable_libtool_lock+y}
+then :
+  enableval=$enable_libtool_lock;
+fi
+
+test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes
+
+# Some flags need to be propagated to the compiler or linker for good
+# libtool support.
+case $host in
+ia64-*-hpux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.$ac_objext` in
+      *ELF-32*)
+       HPUX_IA64_MODE="32"
+       ;;
+      *ELF-64*)
+       HPUX_IA64_MODE="64"
+       ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+*-*-irix6*)
+  # Find out which ABI we are using.
+  echo '#line '$LINENO' "configure"' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    if test "$lt_cv_prog_gnu_ld" = yes; then
+      case `/usr/bin/file conftest.$ac_objext` in
+       *32-bit*)
+         LD="${LD-ld} -melf32bsmip"
+         ;;
+       *N32*)
+         LD="${LD-ld} -melf32bmipn32"
+         ;;
+       *64-bit*)
+         LD="${LD-ld} -melf64bmip"
+       ;;
+      esac
+    else
+      case `/usr/bin/file conftest.$ac_objext` in
+       *32-bit*)
+         LD="${LD-ld} -32"
+         ;;
+       *N32*)
+         LD="${LD-ld} -n32"
+         ;;
+       *64-bit*)
+         LD="${LD-ld} -64"
+         ;;
+      esac
+    fi
+  fi
+  rm -rf conftest*
+  ;;
+
+x86_64-*kfreebsd*-gnu|x86_64-*linux*|ppc*-*linux*|powerpc*-*linux*| \
+s390*-*linux*|s390*-*tpf*|sparc*-*linux*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.o` in
+      *32-bit*)
+       case $host in
+         x86_64-*kfreebsd*-gnu)
+           LD="${LD-ld} -m elf_i386_fbsd"
+           ;;
+         x86_64-*linux*)
+           LD="${LD-ld} -m elf_i386"
+           ;;
+         powerpc64le-*)
+           LD="${LD-ld} -m elf32lppclinux"
+           ;;
+         powerpc64-*)
+           LD="${LD-ld} -m elf32ppclinux"
+           ;;
+         s390x-*linux*)
+           LD="${LD-ld} -m elf_s390"
+           ;;
+         sparc64-*linux*)
+           LD="${LD-ld} -m elf32_sparc"
+           ;;
+       esac
+       ;;
+      *64-bit*)
+       case $host in
+         x86_64-*kfreebsd*-gnu)
+           LD="${LD-ld} -m elf_x86_64_fbsd"
+           ;;
+         x86_64-*linux*)
+           LD="${LD-ld} -m elf_x86_64"
+           ;;
+         powerpcle-*)
+           LD="${LD-ld} -m elf64lppc"
+           ;;
+         powerpc-*)
+           LD="${LD-ld} -m elf64ppc"
+           ;;
+         s390*-*linux*|s390*-*tpf*)
+           LD="${LD-ld} -m elf64_s390"
+           ;;
+         sparc*-*linux*)
+           LD="${LD-ld} -m elf64_sparc"
+           ;;
+       esac
+       ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+
+*-*-sco3.2v5*)
+  # On SCO OpenServer 5, we need -belf to get full-featured binaries.
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$CFLAGS -belf"
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C 
compiler needs -belf" >&5
+printf %s "checking whether the C compiler needs -belf... " >&6; }
+if test ${lt_cv_cc_needs_belf+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+     cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  lt_cv_cc_needs_belf=yes
+else $as_nop
+  lt_cv_cc_needs_belf=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+     ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_cc_needs_belf" >&5
+printf "%s\n" "$lt_cv_cc_needs_belf" >&6; }
+  if test x"$lt_cv_cc_needs_belf" != x"yes"; then
+    # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf
+    CFLAGS="$SAVE_CFLAGS"
+  fi
+  ;;
+*-*solaris*)
+  # Find out which ABI we are using.
+  echo 'int i;' > conftest.$ac_ext
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    case `/usr/bin/file conftest.o` in
+    *64-bit*)
+      case $lt_cv_prog_gnu_ld in
+      yes*)
+        case $host in
+        i?86-*-solaris*)
+          LD="${LD-ld} -m elf_x86_64"
+          ;;
+        sparc*-*-solaris*)
+          LD="${LD-ld} -m elf64_sparc"
+          ;;
+        esac
+        # GNU ld 2.21 introduced _sol2 emulations.  Use them if available.
+        if ${LD-ld} -V | grep _sol2 >/dev/null 2>&1; then
+          LD="${LD-ld}_sol2"
+        fi
+        ;;
+      *)
+       if ${LD-ld} -64 -r -o conftest2.o conftest.o >/dev/null 2>&1; then
+         LD="${LD-ld} -64"
+       fi
+       ;;
+      esac
+      ;;
+    esac
+  fi
+  rm -rf conftest*
+  ;;
+esac
+
+need_locks="$enable_libtool_lock"
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}mt", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}mt; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_MANIFEST_TOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$MANIFEST_TOOL"; then
+  ac_cv_prog_MANIFEST_TOOL="$MANIFEST_TOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_MANIFEST_TOOL="${ac_tool_prefix}mt"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+MANIFEST_TOOL=$ac_cv_prog_MANIFEST_TOOL
+if test -n "$MANIFEST_TOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MANIFEST_TOOL" >&5
+printf "%s\n" "$MANIFEST_TOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_MANIFEST_TOOL"; then
+  ac_ct_MANIFEST_TOOL=$MANIFEST_TOOL
+  # Extract the first word of "mt", so it can be a program name with args.
+set dummy mt; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_MANIFEST_TOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_MANIFEST_TOOL"; then
+  ac_cv_prog_ac_ct_MANIFEST_TOOL="$ac_ct_MANIFEST_TOOL" # Let the user 
override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_MANIFEST_TOOL="mt"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_MANIFEST_TOOL=$ac_cv_prog_ac_ct_MANIFEST_TOOL
+if test -n "$ac_ct_MANIFEST_TOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_MANIFEST_TOOL" 
>&5
+printf "%s\n" "$ac_ct_MANIFEST_TOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_MANIFEST_TOOL" = x; then
+    MANIFEST_TOOL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    MANIFEST_TOOL=$ac_ct_MANIFEST_TOOL
+  fi
+else
+  MANIFEST_TOOL="$ac_cv_prog_MANIFEST_TOOL"
+fi
+
+test -z "$MANIFEST_TOOL" && MANIFEST_TOOL=mt
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $MANIFEST_TOOL is a 
manifest tool" >&5
+printf %s "checking if $MANIFEST_TOOL is a manifest tool... " >&6; }
+if test ${lt_cv_path_mainfest_tool+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_path_mainfest_tool=no
+  echo "$as_me:$LINENO: $MANIFEST_TOOL '-?'" >&5
+  $MANIFEST_TOOL '-?' 2>conftest.err > conftest.out
+  cat conftest.err >&5
+  if $GREP 'Manifest Tool' conftest.out > /dev/null; then
+    lt_cv_path_mainfest_tool=yes
+  fi
+  rm -f conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_path_mainfest_tool" >&5
+printf "%s\n" "$lt_cv_path_mainfest_tool" >&6; }
+if test "x$lt_cv_path_mainfest_tool" != xyes; then
+  MANIFEST_TOOL=:
+fi
+
+
+
+
+
+
+  case $host_os in
+    rhapsody* | darwin*)
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dsymutil", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}dsymutil; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_DSYMUTIL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$DSYMUTIL"; then
+  ac_cv_prog_DSYMUTIL="$DSYMUTIL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DSYMUTIL="${ac_tool_prefix}dsymutil"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DSYMUTIL=$ac_cv_prog_DSYMUTIL
+if test -n "$DSYMUTIL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DSYMUTIL" >&5
+printf "%s\n" "$DSYMUTIL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DSYMUTIL"; then
+  ac_ct_DSYMUTIL=$DSYMUTIL
+  # Extract the first word of "dsymutil", so it can be a program name with 
args.
+set dummy dsymutil; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_DSYMUTIL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_DSYMUTIL"; then
+  ac_cv_prog_ac_ct_DSYMUTIL="$ac_ct_DSYMUTIL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DSYMUTIL="dsymutil"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DSYMUTIL=$ac_cv_prog_ac_ct_DSYMUTIL
+if test -n "$ac_ct_DSYMUTIL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DSYMUTIL" >&5
+printf "%s\n" "$ac_ct_DSYMUTIL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_DSYMUTIL" = x; then
+    DSYMUTIL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DSYMUTIL=$ac_ct_DSYMUTIL
+  fi
+else
+  DSYMUTIL="$ac_cv_prog_DSYMUTIL"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}nmedit", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}nmedit; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_NMEDIT+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$NMEDIT"; then
+  ac_cv_prog_NMEDIT="$NMEDIT" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_NMEDIT="${ac_tool_prefix}nmedit"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+NMEDIT=$ac_cv_prog_NMEDIT
+if test -n "$NMEDIT"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $NMEDIT" >&5
+printf "%s\n" "$NMEDIT" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_NMEDIT"; then
+  ac_ct_NMEDIT=$NMEDIT
+  # Extract the first word of "nmedit", so it can be a program name with args.
+set dummy nmedit; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_NMEDIT+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_NMEDIT"; then
+  ac_cv_prog_ac_ct_NMEDIT="$ac_ct_NMEDIT" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_NMEDIT="nmedit"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_NMEDIT=$ac_cv_prog_ac_ct_NMEDIT
+if test -n "$ac_ct_NMEDIT"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_NMEDIT" >&5
+printf "%s\n" "$ac_ct_NMEDIT" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_NMEDIT" = x; then
+    NMEDIT=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    NMEDIT=$ac_ct_NMEDIT
+  fi
+else
+  NMEDIT="$ac_cv_prog_NMEDIT"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}lipo", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}lipo; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_LIPO+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$LIPO"; then
+  ac_cv_prog_LIPO="$LIPO" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_LIPO="${ac_tool_prefix}lipo"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+LIPO=$ac_cv_prog_LIPO
+if test -n "$LIPO"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $LIPO" >&5
+printf "%s\n" "$LIPO" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_LIPO"; then
+  ac_ct_LIPO=$LIPO
+  # Extract the first word of "lipo", so it can be a program name with args.
+set dummy lipo; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_LIPO+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_LIPO"; then
+  ac_cv_prog_ac_ct_LIPO="$ac_ct_LIPO" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_LIPO="lipo"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_LIPO=$ac_cv_prog_ac_ct_LIPO
+if test -n "$ac_ct_LIPO"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_LIPO" >&5
+printf "%s\n" "$ac_ct_LIPO" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_LIPO" = x; then
+    LIPO=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    LIPO=$ac_ct_LIPO
+  fi
+else
+  LIPO="$ac_cv_prog_LIPO"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}otool", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}otool; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_OTOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$OTOOL"; then
+  ac_cv_prog_OTOOL="$OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL="${ac_tool_prefix}otool"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL=$ac_cv_prog_OTOOL
+if test -n "$OTOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL" >&5
+printf "%s\n" "$OTOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OTOOL"; then
+  ac_ct_OTOOL=$OTOOL
+  # Extract the first word of "otool", so it can be a program name with args.
+set dummy otool; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_OTOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_OTOOL"; then
+  ac_cv_prog_ac_ct_OTOOL="$ac_ct_OTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OTOOL="otool"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OTOOL=$ac_cv_prog_ac_ct_OTOOL
+if test -n "$ac_ct_OTOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL" >&5
+printf "%s\n" "$ac_ct_OTOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_OTOOL" = x; then
+    OTOOL=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OTOOL=$ac_ct_OTOOL
+  fi
+else
+  OTOOL="$ac_cv_prog_OTOOL"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}otool64", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}otool64; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_OTOOL64+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$OTOOL64"; then
+  ac_cv_prog_OTOOL64="$OTOOL64" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OTOOL64="${ac_tool_prefix}otool64"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OTOOL64=$ac_cv_prog_OTOOL64
+if test -n "$OTOOL64"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OTOOL64" >&5
+printf "%s\n" "$OTOOL64" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OTOOL64"; then
+  ac_ct_OTOOL64=$OTOOL64
+  # Extract the first word of "otool64", so it can be a program name with args.
+set dummy otool64; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_OTOOL64+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_OTOOL64"; then
+  ac_cv_prog_ac_ct_OTOOL64="$ac_ct_OTOOL64" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OTOOL64="otool64"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OTOOL64=$ac_cv_prog_ac_ct_OTOOL64
+if test -n "$ac_ct_OTOOL64"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OTOOL64" >&5
+printf "%s\n" "$ac_ct_OTOOL64" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_OTOOL64" = x; then
+    OTOOL64=":"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OTOOL64=$ac_ct_OTOOL64
+  fi
+else
+  OTOOL64="$ac_cv_prog_OTOOL64"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -single_module 
linker flag" >&5
+printf %s "checking for -single_module linker flag... " >&6; }
+if test ${lt_cv_apple_cc_single_mod+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_apple_cc_single_mod=no
+      if test -z "${LT_MULTI_MODULE}"; then
+       # By default we will add the -single_module flag. You can override
+       # by either setting the environment variable LT_MULTI_MODULE
+       # non-empty at configure time, or by adding -multi_module to the
+       # link flags.
+       rm -rf libconftest.dylib*
+       echo "int foo(void){return 1;}" > conftest.c
+       echo "$LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+-dynamiclib -Wl,-single_module conftest.c" >&5
+       $LTCC $LTCFLAGS $LDFLAGS -o libconftest.dylib \
+         -dynamiclib -Wl,-single_module conftest.c 2>conftest.err
+        _lt_result=$?
+       # If there is a non-empty error log, and "single_module"
+       # appears in it, assume the flag caused a linker warning
+        if test -s conftest.err && $GREP single_module conftest.err; then
+         cat conftest.err >&5
+       # Otherwise, if the output was created with a 0 exit code from
+       # the compiler, it worked.
+       elif test -f libconftest.dylib && test $_lt_result -eq 0; then
+         lt_cv_apple_cc_single_mod=yes
+       else
+         cat conftest.err >&5
+       fi
+       rm -rf libconftest.dylib*
+       rm -f conftest.*
+      fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_apple_cc_single_mod" >&5
+printf "%s\n" "$lt_cv_apple_cc_single_mod" >&6; }
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for 
-exported_symbols_list linker flag" >&5
+printf %s "checking for -exported_symbols_list linker flag... " >&6; }
+if test ${lt_cv_ld_exported_symbols_list+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_ld_exported_symbols_list=no
+      save_LDFLAGS=$LDFLAGS
+      echo "_main" > conftest.sym
+      LDFLAGS="$LDFLAGS -Wl,-exported_symbols_list,conftest.sym"
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  lt_cv_ld_exported_symbols_list=yes
+else $as_nop
+  lt_cv_ld_exported_symbols_list=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+       LDFLAGS="$save_LDFLAGS"
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_ld_exported_symbols_list" >&5
+printf "%s\n" "$lt_cv_ld_exported_symbols_list" >&6; }
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for -force_load 
linker flag" >&5
+printf %s "checking for -force_load linker flag... " >&6; }
+if test ${lt_cv_ld_force_load+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_ld_force_load=no
+      cat > conftest.c << _LT_EOF
+int forced_loaded() { return 2;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS -c -o conftest.o conftest.c" >&5
+      $LTCC $LTCFLAGS -c -o conftest.o conftest.c 2>&5
+      echo "$AR cru libconftest.a conftest.o" >&5
+      $AR cru libconftest.a conftest.o 2>&5
+      echo "$RANLIB libconftest.a" >&5
+      $RANLIB libconftest.a 2>&5
+      cat > conftest.c << _LT_EOF
+int main() { return 0;}
+_LT_EOF
+      echo "$LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c 
-Wl,-force_load,./libconftest.a" >&5
+      $LTCC $LTCFLAGS $LDFLAGS -o conftest conftest.c 
-Wl,-force_load,./libconftest.a 2>conftest.err
+      _lt_result=$?
+      if test -s conftest.err && $GREP force_load conftest.err; then
+       cat conftest.err >&5
+      elif test -f conftest && test $_lt_result -eq 0 && $GREP forced_load 
conftest >/dev/null 2>&1 ; then
+       lt_cv_ld_force_load=yes
+      else
+       cat conftest.err >&5
+      fi
+        rm -f conftest.err libconftest.a conftest conftest.c
+        rm -rf conftest.dSYM
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_ld_force_load" >&5
+printf "%s\n" "$lt_cv_ld_force_load" >&6; }
+    case $host_os in
+    rhapsody* | darwin1.[012])
+      _lt_dar_allow_undefined='${wl}-undefined ${wl}suppress' ;;
+    darwin1.*)
+      _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined 
${wl}suppress' ;;
+    darwin*)
+      case ${MACOSX_DEPLOYMENT_TARGET},$host in
+       10.[012]*,*|,*powerpc*)
+         _lt_dar_allow_undefined='${wl}-flat_namespace ${wl}-undefined 
${wl}suppress' ;;
+       *)
+         _lt_dar_allow_undefined='${wl}-undefined ${wl}dynamic_lookup' ;;
+      esac
+    ;;
+  esac
+    if test "$lt_cv_apple_cc_single_mod" = "yes"; then
+      _lt_dar_single_mod='$single_module'
+    fi
+    if test "$lt_cv_ld_exported_symbols_list" = "yes"; then
+      _lt_dar_export_syms=' 
${wl}-exported_symbols_list,$output_objdir/${libname}-symbols.expsym'
+    else
+      _lt_dar_export_syms='~$NMEDIT -s 
$output_objdir/${libname}-symbols.expsym ${lib}'
+    fi
+    if test "$DSYMUTIL" != ":" && test "$lt_cv_ld_force_load" = "no"; then
+      _lt_dsymutil='~$DSYMUTIL $lib || :'
+    else
+      _lt_dsymutil=
+    fi
+    ;;
+  esac
+
+ac_fn_c_check_header_compile "$LINENO" "dlfcn.h" "ac_cv_header_dlfcn_h" 
"$ac_includes_default
+"
+if test "x$ac_cv_header_dlfcn_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_DLFCN_H 1" >>confdefs.h
+
+fi
+
+
+
+
+
+# Set options
+enable_win32_dll=yes
+
+case $host in
+*-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-cegcc*)
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}as", so it can be a program 
name with args.
+set dummy ${ac_tool_prefix}as; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_AS+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$AS"; then
+  ac_cv_prog_AS="$AS" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_AS="${ac_tool_prefix}as"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+AS=$ac_cv_prog_AS
+if test -n "$AS"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $AS" >&5
+printf "%s\n" "$AS" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_AS"; then
+  ac_ct_AS=$AS
+  # Extract the first word of "as", so it can be a program name with args.
+set dummy as; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_AS+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_AS"; then
+  ac_cv_prog_ac_ct_AS="$ac_ct_AS" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_AS="as"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_AS=$ac_cv_prog_ac_ct_AS
+if test -n "$ac_ct_AS"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_AS" >&5
+printf "%s\n" "$ac_ct_AS" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_AS" = x; then
+    AS="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    AS=$ac_ct_AS
+  fi
+else
+  AS="$ac_cv_prog_AS"
+fi
+
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}dlltool", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}dlltool; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_DLLTOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$DLLTOOL"; then
+  ac_cv_prog_DLLTOOL="$DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_DLLTOOL="${ac_tool_prefix}dlltool"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+DLLTOOL=$ac_cv_prog_DLLTOOL
+if test -n "$DLLTOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $DLLTOOL" >&5
+printf "%s\n" "$DLLTOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_DLLTOOL"; then
+  ac_ct_DLLTOOL=$DLLTOOL
+  # Extract the first word of "dlltool", so it can be a program name with args.
+set dummy dlltool; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_DLLTOOL+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_DLLTOOL"; then
+  ac_cv_prog_ac_ct_DLLTOOL="$ac_ct_DLLTOOL" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_DLLTOOL="dlltool"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_DLLTOOL=$ac_cv_prog_ac_ct_DLLTOOL
+if test -n "$ac_ct_DLLTOOL"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_DLLTOOL" >&5
+printf "%s\n" "$ac_ct_DLLTOOL" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_DLLTOOL" = x; then
+    DLLTOOL="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    DLLTOOL=$ac_ct_DLLTOOL
+  fi
+else
+  DLLTOOL="$ac_cv_prog_DLLTOOL"
+fi
+
+  if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}objdump", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}objdump; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_OBJDUMP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$OBJDUMP"; then
+  ac_cv_prog_OBJDUMP="$OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OBJDUMP="${ac_tool_prefix}objdump"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OBJDUMP=$ac_cv_prog_OBJDUMP
+if test -n "$OBJDUMP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJDUMP" >&5
+printf "%s\n" "$OBJDUMP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OBJDUMP"; then
+  ac_ct_OBJDUMP=$OBJDUMP
+  # Extract the first word of "objdump", so it can be a program name with args.
+set dummy objdump; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_OBJDUMP+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_OBJDUMP"; then
+  ac_cv_prog_ac_ct_OBJDUMP="$ac_ct_OBJDUMP" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OBJDUMP="objdump"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OBJDUMP=$ac_cv_prog_ac_ct_OBJDUMP
+if test -n "$ac_ct_OBJDUMP"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJDUMP" >&5
+printf "%s\n" "$ac_ct_OBJDUMP" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_OBJDUMP" = x; then
+    OBJDUMP="false"
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OBJDUMP=$ac_ct_OBJDUMP
+  fi
+else
+  OBJDUMP="$ac_cv_prog_OBJDUMP"
+fi
+
+  ;;
+esac
+
+test -z "$AS" && AS=as
+
+
+
+
+
+test -z "$DLLTOOL" && DLLTOOL=dlltool
+
+
+
+
+
+test -z "$OBJDUMP" && OBJDUMP=objdump
+
+
+
+
+# Check whether --enable-static was given.
+if test ${enable_static+y}
+then :
+  enableval=$enable_static; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_static=yes ;;
+    no) enable_static=no ;;
+    *)
+     enable_static=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_static=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else $as_nop
+  enable_static=no
+fi
+
+
+
+
+
+
+
+
+
+
+        enable_dlopen=no
+
+
+
+            # Check whether --enable-shared was given.
+if test ${enable_shared+y}
+then :
+  enableval=$enable_shared; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_shared=yes ;;
+    no) enable_shared=no ;;
+    *)
+      enable_shared=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_shared=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else $as_nop
+  enable_shared=yes
+fi
+
+
+
+
+
+
+
+
+
+
+
+# Check whether --with-pic was given.
+if test ${with_pic+y}
+then :
+  withval=$with_pic; lt_p=${PACKAGE-default}
+    case $withval in
+    yes|no) pic_mode=$withval ;;
+    *)
+      pic_mode=default
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for lt_pkg in $withval; do
+       IFS="$lt_save_ifs"
+       if test "X$lt_pkg" = "X$lt_p"; then
+         pic_mode=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else $as_nop
+  pic_mode=default
+fi
+
+
+test -z "$pic_mode" && pic_mode=default
+
+
+
+
+
+
+
+  # Check whether --enable-fast-install was given.
+if test ${enable_fast_install+y}
+then :
+  enableval=$enable_fast_install; p=${PACKAGE-default}
+    case $enableval in
+    yes) enable_fast_install=yes ;;
+    no) enable_fast_install=no ;;
+    *)
+      enable_fast_install=no
+      # Look at the argument we got.  We use all the common list separators.
+      lt_save_ifs="$IFS"; IFS="${IFS}$PATH_SEPARATOR,"
+      for pkg in $enableval; do
+       IFS="$lt_save_ifs"
+       if test "X$pkg" = "X$p"; then
+         enable_fast_install=yes
+       fi
+      done
+      IFS="$lt_save_ifs"
+      ;;
+    esac
+else $as_nop
+  enable_fast_install=yes
+fi
+
+
+
+
+
+
+
+
+
+
+
+# This can be used to rebuild libtool when needed
+LIBTOOL_DEPS="$ltmain"
+
+# Always use our own libtool.
+LIBTOOL='$(SHELL) $(top_builddir)/libtool'
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+test -z "$LN_S" && LN_S="ln -s"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+if test -n "${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for objdir" >&5
+printf %s "checking for objdir... " >&6; }
+if test ${lt_cv_objdir+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  rm -f .libs 2>/dev/null
+mkdir .libs 2>/dev/null
+if test -d .libs; then
+  lt_cv_objdir=.libs
+else
+  # MS-DOS does not allow filenames that begin with a dot.
+  lt_cv_objdir=_libs
+fi
+rmdir .libs 2>/dev/null
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_objdir" >&5
+printf "%s\n" "$lt_cv_objdir" >&6; }
+objdir=$lt_cv_objdir
+
+
+
+
+
+printf "%s\n" "#define LT_OBJDIR \"$lt_cv_objdir/\"" >>confdefs.h
+
+
+
+
+case $host_os in
+aix3*)
+  # AIX sometimes has problems with the GCC collect2 program.  For some
+  # reason, if we set the COLLECT_NAMES environment variable, the problems
+  # vanish in a puff of smoke.
+  if test "X${COLLECT_NAMES+set}" != Xset; then
+    COLLECT_NAMES=
+    export COLLECT_NAMES
+  fi
+  ;;
+esac
+
+# Global variables:
+ofile=libtool
+can_build_shared=yes
+
+# All known linkers require a `.a' archive for static linking (except MSVC,
+# which needs '.lib').
+libext=a
+
+with_gnu_ld="$lt_cv_prog_gnu_ld"
+
+old_CC="$CC"
+old_CFLAGS="$CFLAGS"
+
+# Set sane defaults for various variables
+test -z "$CC" && CC=cc
+test -z "$LTCC" && LTCC=$CC
+test -z "$LTCFLAGS" && LTCFLAGS=$CFLAGS
+test -z "$LD" && LD=ld
+test -z "$ac_objext" && ac_objext=o
+
+for cc_temp in $compiler""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+
+
+# Only perform the check for file, if the check method requires it
+test -z "$MAGIC_CMD" && MAGIC_CMD=file
+case $deplibs_check_method in
+file_magic*)
+  if test "$file_magic_cmd" = '$MAGIC_CMD'; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for 
${ac_tool_prefix}file" >&5
+printf %s "checking for ${ac_tool_prefix}file... " >&6; }
+if test ${lt_cv_path_MAGIC_CMD+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  case $MAGIC_CMD in
+[\\/*] |  ?:[\\/]*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a 
path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/${ac_tool_prefix}file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/${ac_tool_prefix}file"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           $EGREP "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
+printf "%s\n" "$MAGIC_CMD" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+
+
+
+if test -z "$lt_cv_path_MAGIC_CMD"; then
+  if test -n "$ac_tool_prefix"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for file" >&5
+printf %s "checking for file... " >&6; }
+if test ${lt_cv_path_MAGIC_CMD+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  case $MAGIC_CMD in
+[\\/*] |  ?:[\\/]*)
+  lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a 
path.
+  ;;
+*)
+  lt_save_MAGIC_CMD="$MAGIC_CMD"
+  lt_save_ifs="$IFS"; IFS=$PATH_SEPARATOR
+  ac_dummy="/usr/bin$PATH_SEPARATOR$PATH"
+  for ac_dir in $ac_dummy; do
+    IFS="$lt_save_ifs"
+    test -z "$ac_dir" && ac_dir=.
+    if test -f $ac_dir/file; then
+      lt_cv_path_MAGIC_CMD="$ac_dir/file"
+      if test -n "$file_magic_test_file"; then
+       case $deplibs_check_method in
+       "file_magic "*)
+         file_magic_regex=`expr "$deplibs_check_method" : "file_magic \(.*\)"`
+         MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+         if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null |
+           $EGREP "$file_magic_regex" > /dev/null; then
+           :
+         else
+           cat <<_LT_EOF 1>&2
+
+*** Warning: the command libtool uses to detect shared libraries,
+*** $file_magic_cmd, produces output that libtool cannot recognize.
+*** The result is that libtool may fail to recognize shared libraries
+*** as such.  This will affect the creation of libtool libraries that
+*** depend on shared libraries, but programs linked with such libtool
+*** libraries will work regardless of this problem.  Nevertheless, you
+*** may want to report the problem to your system manager and/or to
+*** bug-libtool@gnu.org
+
+_LT_EOF
+         fi ;;
+       esac
+      fi
+      break
+    fi
+  done
+  IFS="$lt_save_ifs"
+  MAGIC_CMD="$lt_save_MAGIC_CMD"
+  ;;
+esac
+fi
+
+MAGIC_CMD="$lt_cv_path_MAGIC_CMD"
+if test -n "$MAGIC_CMD"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $MAGIC_CMD" >&5
+printf "%s\n" "$MAGIC_CMD" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  else
+    MAGIC_CMD=:
+  fi
+fi
+
+  fi
+  ;;
+esac
+
+# Use C for the default configuration in the libtool script
+
+lt_save_CC="$CC"
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+
+# Source file extension for C test sources.
+ac_ext=c
+
+# Object file extension for compiled C test sources.
+objext=o
+objext=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code="int some_variable = 0;"
+
+# Code to be used in simple link tests
+lt_simple_link_test_code='int main(){return(0);}'
+
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+# Save the default compiler, since it gets overwritten when the other
+# tags are being tested, and _LT_TAGVAR(compiler, []) is a NOP.
+compiler_DEFAULT=$CC
+
+# save warnings/boilerplate of simple test code
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+
+
+## CAVEAT EMPTOR:
+## There is no encapsulation within the following macros, do not change
+## the running order or otherwise move them around unless you know exactly
+## what you are doing...
+if test -n "$compiler"; then
+
+lt_prog_compiler_no_builtin_flag=
+
+if test "$GCC" = yes; then
+  case $cc_basename in
+  nvcc*)
+    lt_prog_compiler_no_builtin_flag=' -Xcompiler -fno-builtin' ;;
+  *)
+    lt_prog_compiler_no_builtin_flag=' -fno-builtin' ;;
+  esac
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports 
-fno-rtti -fno-exceptions" >&5
+printf %s "checking if $compiler supports -fno-rtti -fno-exceptions... " >&6; }
+if test ${lt_cv_prog_compiler_rtti_exceptions+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_prog_compiler_rtti_exceptions=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="-fno-rtti -fno-exceptions"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; 
then
+       lt_cv_prog_compiler_rtti_exceptions=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_prog_compiler_rtti_exceptions" >&5
+printf "%s\n" "$lt_cv_prog_compiler_rtti_exceptions" >&6; }
+
+if test x"$lt_cv_prog_compiler_rtti_exceptions" = xyes; then
+    lt_prog_compiler_no_builtin_flag="$lt_prog_compiler_no_builtin_flag 
-fno-rtti -fno-exceptions"
+else
+    :
+fi
+
+fi
+
+
+
+
+
+
+  lt_prog_compiler_wl=
+lt_prog_compiler_pic=
+lt_prog_compiler_static=
+
+
+  if test "$GCC" = yes; then
+    lt_prog_compiler_wl='-Wl,'
+    lt_prog_compiler_static='-static'
+
+    case $host_os in
+      aix*)
+      # All AIX code is PIC.
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_prog_compiler_static='-Bstatic'
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            lt_prog_compiler_pic='-fPIC'
+        ;;
+      m68k)
+            # FIXME: we need at least 68020 code to build shared libraries, but
+            # adding the `-m68020' flag to GCC prevents building anything 
better,
+            # like `-m68040'.
+            lt_prog_compiler_pic='-m68020 -resident32 -malways-restore-a4'
+        ;;
+      esac
+      ;;
+
+    beos* | irix5* | irix6* | nonstopux* | osf3* | osf4* | osf5*)
+      # PIC is the default for these OSes.
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      # Although the cygwin gcc ignores -fPIC, still need this for old-style
+      # (--disable-auto-import) libraries
+      lt_prog_compiler_pic='-DDLL_EXPORT'
+      ;;
+
+    darwin* | rhapsody*)
+      # PIC is the default on this platform
+      # Common symbols not allowed in MH_DYLIB files
+      lt_prog_compiler_pic='-fno-common'
+      ;;
+
+    haiku*)
+      # PIC is the default for Haiku.
+      # The "-static" flag exists, but is broken.
+      lt_prog_compiler_static=
+      ;;
+
+    hpux*)
+      # PIC is the default for 64-bit PA HP-UX, but not for 32-bit
+      # PA HP-UX.  On IA64 HP-UX, PIC is the default but the pic flag
+      # sets the default TLS model and affects inlining.
+      case $host_cpu in
+      hppa*64*)
+       # +Z the default
+       ;;
+      *)
+       lt_prog_compiler_pic='-fPIC'
+       ;;
+      esac
+      ;;
+
+    interix[3-9]*)
+      # Interix 3.x gcc -fpic/-fPIC options generate broken code.
+      # Instead, we relocate shared libraries at runtime.
+      ;;
+
+    msdosdjgpp*)
+      # Just because we use GCC doesn't mean we suddenly get shared libraries
+      # on systems that don't support them.
+      lt_prog_compiler_can_build_shared=no
+      enable_shared=no
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic='-fPIC -shared'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       lt_prog_compiler_pic=-Kconform_pic
+      fi
+      ;;
+
+    *)
+      lt_prog_compiler_pic='-fPIC'
+      ;;
+    esac
+
+    case $cc_basename in
+    nvcc*) # Cuda Compiler Driver 2.2
+      lt_prog_compiler_wl='-Xlinker '
+      if test -n "$lt_prog_compiler_pic"; then
+        lt_prog_compiler_pic="-Xcompiler $lt_prog_compiler_pic"
+      fi
+      ;;
+    esac
+  else
+    # PORTME Check for flag to pass linker flags through the system compiler.
+    case $host_os in
+    aix*)
+      lt_prog_compiler_wl='-Wl,'
+      if test "$host_cpu" = ia64; then
+       # AIX 5 now supports IA64 processor
+       lt_prog_compiler_static='-Bstatic'
+      else
+       lt_prog_compiler_static='-bnso -bI:/lib/syscalls.exp'
+      fi
+      ;;
+
+    mingw* | cygwin* | pw32* | os2* | cegcc*)
+      # This hack is so that the source file can tell whether it is being
+      # built for inclusion in a dll (and should export symbols for example).
+      lt_prog_compiler_pic='-DDLL_EXPORT'
+      ;;
+
+    hpux9* | hpux10* | hpux11*)
+      lt_prog_compiler_wl='-Wl,'
+      # PIC is the default for IA64 HP-UX and 64-bit HP-UX, but
+      # not for PA HP-UX.
+      case $host_cpu in
+      hppa*64*|ia64*)
+       # +Z the default
+       ;;
+      *)
+       lt_prog_compiler_pic='+Z'
+       ;;
+      esac
+      # Is there a better lt_prog_compiler_static that works with the bundled 
CC?
+      lt_prog_compiler_static='${wl}-a ${wl}archive'
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      lt_prog_compiler_wl='-Wl,'
+      # PIC (with -KPIC) is the default.
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    linux* | k*bsd*-gnu | kopensolaris*-gnu)
+      case $cc_basename in
+      # old Intel for x86_64 which still supported -KPIC.
+      ecc*)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-KPIC'
+       lt_prog_compiler_static='-static'
+        ;;
+      # icc used to be incompatible with GCC.
+      # ICC 10 doesn't accept -KPIC any more.
+      icc* | ifort*)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-fPIC'
+       lt_prog_compiler_static='-static'
+        ;;
+      # Lahey Fortran 8.1.
+      lf95*)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='--shared'
+       lt_prog_compiler_static='--static'
+       ;;
+      nagfor*)
+       # NAG Fortran compiler
+       lt_prog_compiler_wl='-Wl,-Wl,,'
+       lt_prog_compiler_pic='-PIC'
+       lt_prog_compiler_static='-Bstatic'
+       ;;
+      pgcc* | pgf77* | pgf90* | pgf95* | pgfortran*)
+        # Portland Group compilers (*not* the Pentium gcc compiler,
+       # which looks to be a dead project)
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-fpic'
+       lt_prog_compiler_static='-Bstatic'
+        ;;
+      ccc*)
+        lt_prog_compiler_wl='-Wl,'
+        # All Alpha code is PIC.
+        lt_prog_compiler_static='-non_shared'
+        ;;
+      xl* | bgxl* | bgf* | mpixl*)
+       # IBM XL C 8.0/Fortran 10.1, 11.1 on PPC and BlueGene
+       lt_prog_compiler_wl='-Wl,'
+       lt_prog_compiler_pic='-qpic'
+       lt_prog_compiler_static='-qstaticlink'
+       ;;
+      *)
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ Ceres\ Fortran* | *Sun*Fortran*\ [1-7].* | *Sun*Fortran*\ 
8.[0-3]*)
+         # Sun Fortran 8.3 passes all unrecognized flags to the linker
+         lt_prog_compiler_pic='-KPIC'
+         lt_prog_compiler_static='-Bstatic'
+         lt_prog_compiler_wl=''
+         ;;
+       *Sun\ F* | *Sun*Fortran*)
+         lt_prog_compiler_pic='-KPIC'
+         lt_prog_compiler_static='-Bstatic'
+         lt_prog_compiler_wl='-Qoption ld '
+         ;;
+       *Sun\ C*)
+         # Sun C 5.9
+         lt_prog_compiler_pic='-KPIC'
+         lt_prog_compiler_static='-Bstatic'
+         lt_prog_compiler_wl='-Wl,'
+         ;;
+        *Intel*\ [CF]*Compiler*)
+         lt_prog_compiler_wl='-Wl,'
+         lt_prog_compiler_pic='-fPIC'
+         lt_prog_compiler_static='-static'
+         ;;
+       *Portland\ Group*)
+         lt_prog_compiler_wl='-Wl,'
+         lt_prog_compiler_pic='-fpic'
+         lt_prog_compiler_static='-Bstatic'
+         ;;
+       esac
+       ;;
+      esac
+      ;;
+
+    newsos6)
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    *nto* | *qnx*)
+      # QNX uses GNU C++, but need to define -shared option too, otherwise
+      # it will coredump.
+      lt_prog_compiler_pic='-fPIC -shared'
+      ;;
+
+    osf3* | osf4* | osf5*)
+      lt_prog_compiler_wl='-Wl,'
+      # All OSF/1 code is PIC.
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    rdos*)
+      lt_prog_compiler_static='-non_shared'
+      ;;
+
+    solaris*)
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      case $cc_basename in
+      f77* | f90* | f95* | sunf77* | sunf90* | sunf95*)
+       lt_prog_compiler_wl='-Qoption ld ';;
+      *)
+       lt_prog_compiler_wl='-Wl,';;
+      esac
+      ;;
+
+    sunos4*)
+      lt_prog_compiler_wl='-Qoption ld '
+      lt_prog_compiler_pic='-PIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    sysv4 | sysv4.2uw2* | sysv4.3*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec ;then
+       lt_prog_compiler_pic='-Kconform_pic'
+       lt_prog_compiler_static='-Bstatic'
+      fi
+      ;;
+
+    sysv5* | unixware* | sco3.2v5* | sco5v6* | OpenUNIX*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_pic='-KPIC'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    unicos*)
+      lt_prog_compiler_wl='-Wl,'
+      lt_prog_compiler_can_build_shared=no
+      ;;
+
+    uts4*)
+      lt_prog_compiler_pic='-pic'
+      lt_prog_compiler_static='-Bstatic'
+      ;;
+
+    *)
+      lt_prog_compiler_can_build_shared=no
+      ;;
+    esac
+  fi
+
+case $host_os in
+  # For platforms which do not support PIC, -DPIC is meaningless:
+  *djgpp*)
+    lt_prog_compiler_pic=
+    ;;
+  *)
+    lt_prog_compiler_pic="$lt_prog_compiler_pic -DPIC"
+    ;;
+esac
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $compiler option to 
produce PIC" >&5
+printf %s "checking for $compiler option to produce PIC... " >&6; }
+if test ${lt_cv_prog_compiler_pic+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_prog_compiler_pic=$lt_prog_compiler_pic
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_prog_compiler_pic" >&5
+printf "%s\n" "$lt_cv_prog_compiler_pic" >&6; }
+lt_prog_compiler_pic=$lt_cv_prog_compiler_pic
+
+#
+# Check to make sure the PIC flag actually works.
+#
+if test -n "$lt_prog_compiler_pic"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler PIC flag 
$lt_prog_compiler_pic works" >&5
+printf %s "checking if $compiler PIC flag $lt_prog_compiler_pic works... " 
>&6; }
+if test ${lt_cv_prog_compiler_pic_works+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_prog_compiler_pic_works=no
+   ac_outfile=conftest.$ac_objext
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+   lt_compiler_flag="$lt_prog_compiler_pic -DPIC"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   # The option is referenced via a variable to avoid confusing sed.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>conftest.err)
+   ac_status=$?
+   cat conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s "$ac_outfile"; then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings other than the usual output.
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' >conftest.exp
+     $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+     if test ! -s conftest.er2 || diff conftest.exp conftest.er2 >/dev/null; 
then
+       lt_cv_prog_compiler_pic_works=yes
+     fi
+   fi
+   $RM conftest*
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_prog_compiler_pic_works" >&5
+printf "%s\n" "$lt_cv_prog_compiler_pic_works" >&6; }
+
+if test x"$lt_cv_prog_compiler_pic_works" = xyes; then
+    case $lt_prog_compiler_pic in
+     "" | " "*) ;;
+     *) lt_prog_compiler_pic=" $lt_prog_compiler_pic" ;;
+     esac
+else
+    lt_prog_compiler_pic=
+     lt_prog_compiler_can_build_shared=no
+fi
+
+fi
+
+
+
+
+
+
+
+
+
+
+
+#
+# Check to make sure the static flag actually works.
+#
+wl=$lt_prog_compiler_wl eval lt_tmp_static_flag=\"$lt_prog_compiler_static\"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler static 
flag $lt_tmp_static_flag works" >&5
+printf %s "checking if $compiler static flag $lt_tmp_static_flag works... " 
>&6; }
+if test ${lt_cv_prog_compiler_static_works+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_prog_compiler_static_works=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS $lt_tmp_static_flag"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler_static_works=yes
+       fi
+     else
+       lt_cv_prog_compiler_static_works=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_prog_compiler_static_works" >&5
+printf "%s\n" "$lt_cv_prog_compiler_static_works" >&6; }
+
+if test x"$lt_cv_prog_compiler_static_works" = xyes; then
+    :
+else
+    lt_prog_compiler_static=
+fi
+
+
+
+
+
+
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports 
-c -o file.$ac_objext" >&5
+printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test ${lt_cv_prog_compiler_c_o+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_prog_compiler_c_o=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 
>/dev/null; then
+       lt_cv_prog_compiler_c_o=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_prog_compiler_c_o" >&5
+printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; }
+
+
+
+
+
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $compiler supports 
-c -o file.$ac_objext" >&5
+printf %s "checking if $compiler supports -c -o file.$ac_objext... " >&6; }
+if test ${lt_cv_prog_compiler_c_o+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_prog_compiler_c_o=no
+   $RM -r conftest 2>/dev/null
+   mkdir conftest
+   cd conftest
+   mkdir out
+   echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+   lt_compiler_flag="-o out/conftest2.$ac_objext"
+   # Insert the option either (1) after the last *FLAGS variable, or
+   # (2) before a word containing "conftest.", or (3) at the end.
+   # Note that $ac_compile itself does not contain backslashes and begins
+   # with a dollar sign (not a hyphen), so the echo should work correctly.
+   lt_compile=`echo "$ac_compile" | $SED \
+   -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
+   -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
+   -e 's:$: $lt_compiler_flag:'`
+   (eval echo "\"\$as_me:$LINENO: $lt_compile\"" >&5)
+   (eval "$lt_compile" 2>out/conftest.err)
+   ac_status=$?
+   cat out/conftest.err >&5
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   if (exit $ac_status) && test -s out/conftest2.$ac_objext
+   then
+     # The compiler can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     $ECHO "$_lt_compiler_boilerplate" | $SED '/^$/d' > out/conftest.exp
+     $SED '/^$/d; /^ *+/d' out/conftest.err >out/conftest.er2
+     if test ! -s out/conftest.er2 || diff out/conftest.exp out/conftest.er2 
>/dev/null; then
+       lt_cv_prog_compiler_c_o=yes
+     fi
+   fi
+   chmod u+w . 2>&5
+   $RM conftest*
+   # SGI C++ compiler will create directory out/ii_files/ for
+   # template instantiation
+   test -d out/ii_files && $RM out/ii_files/* && rmdir out/ii_files
+   $RM out/* && rmdir out
+   cd ..
+   $RM -r conftest
+   $RM conftest*
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_prog_compiler_c_o" >&5
+printf "%s\n" "$lt_cv_prog_compiler_c_o" >&6; }
+
+
+
+
+hard_links="nottested"
+if test "$lt_cv_prog_compiler_c_o" = no && test "$need_locks" != no; then
+  # do not overwrite the value of need_locks provided by the user
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if we can lock with 
hard links" >&5
+printf %s "checking if we can lock with hard links... " >&6; }
+  hard_links=yes
+  $RM conftest*
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  touch conftest.a
+  ln conftest.a conftest.b 2>&5 || hard_links=no
+  ln conftest.a conftest.b 2>/dev/null && hard_links=no
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hard_links" >&5
+printf "%s\n" "$hard_links" >&6; }
+  if test "$hard_links" = no; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: \`$CC' does not 
support \`-c -o', so \`make -j' may be unsafe" >&5
+printf "%s\n" "$as_me: WARNING: \`$CC' does not support \`-c -o', so \`make 
-j' may be unsafe" >&2;}
+    need_locks=warn
+  fi
+else
+  need_locks=no
+fi
+
+
+
+
+
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the $compiler 
linker ($LD) supports shared libraries" >&5
+printf %s "checking whether the $compiler linker ($LD) supports shared 
libraries... " >&6; }
+
+  runpath_var=
+  allow_undefined_flag=
+  always_export_symbols=no
+  archive_cmds=
+  archive_expsym_cmds=
+  compiler_needs_object=no
+  enable_shared_with_static_runtimes=no
+  export_dynamic_flag_spec=
+  export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | $SED 
'\''s/.* //'\'' | sort | uniq > $export_symbols'
+  hardcode_automatic=no
+  hardcode_direct=no
+  hardcode_direct_absolute=no
+  hardcode_libdir_flag_spec=
+  hardcode_libdir_separator=
+  hardcode_minus_L=no
+  hardcode_shlibpath_var=unsupported
+  inherit_rpath=no
+  link_all_deplibs=unknown
+  module_cmds=
+  module_expsym_cmds=
+  old_archive_from_new_cmds=
+  old_archive_from_expsyms_cmds=
+  thread_safe_flag_spec=
+  whole_archive_flag_spec=
+  # include_expsyms should be a list of space-separated symbols to be *always*
+  # included in the symbol list
+  include_expsyms=
+  # exclude_expsyms can be an extended regexp of symbols to exclude
+  # it will be wrapped by ` (' and `)$', so one must not match beginning or
+  # end of line.  Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc',
+  # as well as any symbol that contains `d'.
+  exclude_expsyms='_GLOBAL_OFFSET_TABLE_|_GLOBAL__F[ID]_.*'
+  # Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out
+  # platforms (ab)use it in PIC code, but their linkers get confused if
+  # the symbol is explicitly referenced.  Since portable code cannot
+  # rely on this symbol name, it's probably fine to never include it in
+  # preloaded symbol tables.
+  # Exclude shared library initialization/finalization symbols.
+  extract_expsyms_cmds=
+
+  case $host_os in
+  cygwin* | mingw* | pw32* | cegcc*)
+    # FIXME: the MSVC++ port hasn't been tested in a loooong time
+    # When not using gcc, we currently assume that we are using
+    # Microsoft Visual C++.
+    if test "$GCC" != yes; then
+      with_gnu_ld=no
+    fi
+    ;;
+  interix*)
+    # we just hope/assume this is gcc and not c89 (= MSVC++)
+    with_gnu_ld=yes
+    ;;
+  openbsd*)
+    with_gnu_ld=no
+    ;;
+  linux* | k*bsd*-gnu | gnu*)
+    link_all_deplibs=no
+    ;;
+  esac
+
+  ld_shlibs=yes
+
+  # On some targets, GNU ld is compatible enough with the native linker
+  # that we're better off using the native interface for both.
+  lt_use_gnu_ld_interface=no
+  if test "$with_gnu_ld" = yes; then
+    case $host_os in
+      aix*)
+       # The AIX port of GNU ld has always aspired to compatibility
+       # with the native linker.  However, as the warning in the GNU ld
+       # block says, versions before 2.19.5* couldn't really create working
+       # shared libraries, regardless of the interface used.
+       case `$LD -v 2>&1` in
+         *\ \(GNU\ Binutils\)\ 2.19.5*) ;;
+         *\ \(GNU\ Binutils\)\ 2.[2-9]*) ;;
+         *\ \(GNU\ Binutils\)\ [3-9]*) ;;
+         *)
+           lt_use_gnu_ld_interface=yes
+           ;;
+       esac
+       ;;
+      *)
+       lt_use_gnu_ld_interface=yes
+       ;;
+    esac
+  fi
+
+  if test "$lt_use_gnu_ld_interface" = yes; then
+    # If archive_cmds runs LD, not CC, wlarc should be empty
+    wlarc='${wl}'
+
+    # Set some defaults for GNU ld with shared library support. These
+    # are reset later if shared libraries are not supported. Putting them
+    # here allows them to be overridden if necessary.
+    runpath_var=LD_RUN_PATH
+    hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+    export_dynamic_flag_spec='${wl}--export-dynamic'
+    # ancient GNU ld didn't support --whole-archive et. al.
+    if $LD --help 2>&1 | $GREP 'no-whole-archive' > /dev/null; then
+      whole_archive_flag_spec="$wlarc"'--whole-archive$convenience 
'"$wlarc"'--no-whole-archive'
+    else
+      whole_archive_flag_spec=
+    fi
+    supports_anon_versioning=no
+    case `$LD -v 2>&1` in
+      *GNU\ gold*) supports_anon_versioning=yes ;;
+      *\ [01].* | *\ 2.[0-9].* | *\ 2.10.*) ;; # catch versions < 2.11
+      *\ 2.11.93.0.2\ *) supports_anon_versioning=yes ;; # RH7.3 ...
+      *\ 2.11.92.0.12\ *) supports_anon_versioning=yes ;; # Mandrake 8.2 ...
+      *\ 2.11.*) ;; # other 2.11 versions
+      *) supports_anon_versioning=yes ;;
+    esac
+
+    # See if GNU ld supports shared libraries.
+    case $host_os in
+    aix[3-9]*)
+      # On AIX/PPC, the GNU linker is very broken
+      if test "$host_cpu" != ia64; then
+       ld_shlibs=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: the GNU linker, at least up to release 2.19, is reported
+*** to be unable to reliably create shared libraries on AIX.
+*** Therefore, libtool is disabling shared libraries support.  If you
+*** really care for shared libraries, you may want to install binutils
+*** 2.20 or above, or modify your PATH so that a non-GNU linker is found.
+*** You will then need to restart the configuration process.
+
+_LT_EOF
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds=''
+        ;;
+      m68k)
+            archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define 
NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" 
>> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> 
$output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> 
$output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd 
$output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_minus_L=yes
+        ;;
+      esac
+      ;;
+
+    beos*)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       allow_undefined_flag=unsupported
+       # Joseph Beckenbach <jrb3@best.com> says some releases of gcc
+       # support --undefined.  This deserves some investigation.  FIXME
+       archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname -o $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # _LT_TAGVAR(hardcode_libdir_flag_spec, ) is actually meaningless,
+      # as there is no search path for DLLs.
+      hardcode_libdir_flag_spec='-L$libdir'
+      export_dynamic_flag_spec='${wl}--export-all-symbols'
+      allow_undefined_flag=unsupported
+      always_export_symbols=no
+      enable_shared_with_static_runtimes=yes
+      export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | 
$SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1 DATA/;s/^.*[ ]__nm__\([^ ]*\)[ 
][^ ]*/\1 DATA/;/^I[ ]/d;/^[AITW][ ]/s/.* //'\'' | sort | uniq > 
$export_symbols'
+      
exclude_expsyms='[_]+GLOBAL_OFFSET_TABLE_|[_]+GLOBAL__[FID]_.*|[_]+head_[A-Za-z0-9_]+_dll|[A-Za-z0-9_]+_dll_iname'
+
+      if $LD --help 2>&1 | $GREP 'auto-import' > /dev/null; then
+        archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -o 
$output_objdir/$soname ${wl}--enable-auto-image-base -Xlinker --out-implib 
-Xlinker $lib'
+       # If the export-symbols file already is a .def file (1st line
+       # is EXPORTS), use it as is; otherwise, prepend...
+       archive_expsym_cmds='if test "x`$SED \"$sed_uncomment_deffile\" 
$export_symbols | $SED 1q`" = xEXPORTS; then
+         cp $export_symbols $output_objdir/$soname.def;
+       else
+         echo EXPORTS > $output_objdir/$soname.def;
+         cat $export_symbols >> $output_objdir/$soname.def;
+       fi~
+       $CC -shared $output_objdir/$soname.def $libobjs $deplibs 
$compiler_flags -o $output_objdir/$soname ${wl}--enable-auto-image-base 
-Xlinker --out-implib -Xlinker $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    haiku*)
+      archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname 
$wl$soname -o $lib'
+      link_all_deplibs=yes
+      ;;
+
+    interix[3-9]*)
+      hardcode_direct=no
+      hardcode_shlibpath_var=no
+      hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+      export_dynamic_flag_spec='${wl}-E'
+      # Hack: On Interix 3.x, we cannot compile PIC because of a broken gcc.
+      # Instead, shared libraries are loaded at an image base (0x10000000 by
+      # default) and relocated if they conflict, which is a slow very memory
+      # consuming and fragmenting process.  To avoid this, we pick a random,
+      # 256 KiB-aligned image base between 0x50000000 and 0x6FFC0000 at link
+      # time.  Moving up from 0x10000000 also allows more sbrk(2) space.
+      archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags 
${wl}-h,$soname ${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 
1342177280` -o $lib'
+      archive_expsym_cmds='sed "s,^,_," $export_symbols 
>$output_objdir/$soname.expsym~$CC -shared $pic_flag $libobjs $deplibs 
$compiler_flags ${wl}-h,$soname 
${wl}--retain-symbols-file,$output_objdir/$soname.expsym 
${wl}--image-base,`expr ${RANDOM-$$} % 4096 / 2 \* 262144 + 1342177280` -o $lib'
+      ;;
+
+    gnu* | linux* | tpf* | k*bsd*-gnu | kopensolaris*-gnu)
+      tmp_diet=no
+      if test "$host_os" = linux-dietlibc; then
+       case $cc_basename in
+         diet\ *) tmp_diet=yes;;       # linux-dietlibc with static linking 
(!diet-dyn)
+       esac
+      fi
+      if $LD --help 2>&1 | $EGREP ': supported targets:.* elf' > /dev/null \
+        && test "$tmp_diet" = no
+      then
+       tmp_addflag=' $pic_flag'
+       tmp_sharedflag='-shared'
+       case $cc_basename,$host_cpu in
+        pgcc*)                         # Portland Group C compiler
+         whole_archive_flag_spec='${wl}--whole-archive`for conv in 
$convenience\"\"; do test  -n \"$conv\" && 
new_convenience=\"$new_convenience,$conv\"; done; func_echo_all 
\"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag'
+         ;;
+       pgf77* | pgf90* | pgf95* | pgfortran*)
+                                       # Portland Group f77 and f90 compilers
+         whole_archive_flag_spec='${wl}--whole-archive`for conv in 
$convenience\"\"; do test  -n \"$conv\" && 
new_convenience=\"$new_convenience,$conv\"; done; func_echo_all 
\"$new_convenience\"` ${wl}--no-whole-archive'
+         tmp_addflag=' $pic_flag -Mnomain' ;;
+       ecc*,ia64* | icc*,ia64*)        # Intel C compiler on ia64
+         tmp_addflag=' -i_dynamic' ;;
+       efc*,ia64* | ifort*,ia64*)      # Intel Fortran compiler on ia64
+         tmp_addflag=' -i_dynamic -nofor_main' ;;
+       ifc* | ifort*)                  # Intel Fortran compiler
+         tmp_addflag=' -nofor_main' ;;
+       lf95*)                          # Lahey Fortran 8.1
+         whole_archive_flag_spec=
+         tmp_sharedflag='--shared' ;;
+       xl[cC]* | bgxl[cC]* | mpixl[cC]*) # IBM XL C 8.0 on PPC (deal with xlf 
below)
+         tmp_sharedflag='-qmkshrobj'
+         tmp_addflag= ;;
+       nvcc*)  # Cuda Compiler Driver 2.2
+         whole_archive_flag_spec='${wl}--whole-archive`for conv in 
$convenience\"\"; do test  -n \"$conv\" && 
new_convenience=\"$new_convenience,$conv\"; done; func_echo_all 
\"$new_convenience\"` ${wl}--no-whole-archive'
+         compiler_needs_object=yes
+         ;;
+       esac
+       case `$CC -V 2>&1 | sed 5q` in
+       *Sun\ C*)                       # Sun C 5.9
+         whole_archive_flag_spec='${wl}--whole-archive`new_convenience=; for 
conv in $convenience\"\"; do test -z \"$conv\" || 
new_convenience=\"$new_convenience,$conv\"; done; func_echo_all 
\"$new_convenience\"` ${wl}--no-whole-archive'
+         compiler_needs_object=yes
+         tmp_sharedflag='-G' ;;
+       *Sun\ F*)                       # Sun Fortran 8.3
+         tmp_sharedflag='-G' ;;
+       esac
+       archive_cmds='$CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs 
$compiler_flags ${wl}-soname $wl$soname -o $lib'
+
+        if test "x$supports_anon_versioning" = xyes; then
+          archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
+           cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> 
$output_objdir/$libname.ver~
+           echo "local: *; };" >> $output_objdir/$libname.ver~
+           $CC '"$tmp_sharedflag""$tmp_addflag"' $libobjs $deplibs 
$compiler_flags ${wl}-soname $wl$soname ${wl}-version-script 
${wl}$output_objdir/$libname.ver -o $lib'
+        fi
+
+       case $cc_basename in
+       xlf* | bgf* | bgxlf* | mpixlf*)
+         # IBM XL Fortran 10.1 on PPC cannot create shared libs itself
+         whole_archive_flag_spec='--whole-archive$convenience 
--no-whole-archive'
+         hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+         archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname 
$soname -o $lib'
+         if test "x$supports_anon_versioning" = xyes; then
+           archive_expsym_cmds='echo "{ global:" > $output_objdir/$libname.ver~
+             cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> 
$output_objdir/$libname.ver~
+             echo "local: *; };" >> $output_objdir/$libname.ver~
+             $LD -shared $libobjs $deplibs $linker_flags -soname $soname 
-version-script $output_objdir/$libname.ver -o $lib'
+         fi
+         ;;
+       esac
+      else
+        ld_shlibs=no
+      fi
+      ;;
+
+    netbsd* | netbsdelf*-gnu)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib'
+       wlarc=
+      else
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs 
$compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file 
$wl$export_symbols -o $lib'
+      fi
+      ;;
+
+    solaris*)
+      if $LD -v 2>&1 | $GREP 'BFD 2\.8' > /dev/null; then
+       ld_shlibs=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: The releases 2.8.* of the GNU linker cannot reliably
+*** create shared libraries on Solaris systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.9.1 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+      elif $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; 
then
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs 
$compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file 
$wl$export_symbols -o $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX*)
+      case `$LD -v 2>&1` in
+        *\ [01].* | *\ 2.[0-9].* | *\ 2.1[0-5].*)
+       ld_shlibs=no
+       cat <<_LT_EOF 1>&2
+
+*** Warning: Releases of the GNU linker prior to 2.16.91.0.3 can not
+*** reliably create shared libraries on SCO systems.  Therefore, libtool
+*** is disabling shared libraries support.  We urge you to upgrade GNU
+*** binutils to release 2.16.91.0.3 or newer.  Another option is to modify
+*** your PATH or compiler configuration so that the native linker is
+*** used, and then restart.
+
+_LT_EOF
+       ;;
+       *)
+         # For security reasons, it is highly recommended that you always
+         # use absolute paths for naming shared libraries, and exclude the
+         # DT_RUNPATH tag from executables and libraries.  But doing so
+         # requires that you compile everything twice, which is a pain.
+         if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; 
then
+           hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+           archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname -o $lib'
+           archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib'
+         else
+           ld_shlibs=no
+         fi
+       ;;
+      esac
+      ;;
+
+    sunos4*)
+      archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs 
$deplibs $linker_flags'
+      wlarc=
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    *)
+      if $LD --help 2>&1 | $GREP ': supported targets:.* elf' > /dev/null; then
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname -o $lib'
+       archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs 
$compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file 
$wl$export_symbols -o $lib'
+      else
+       ld_shlibs=no
+      fi
+      ;;
+    esac
+
+    if test "$ld_shlibs" = no; then
+      runpath_var=
+      hardcode_libdir_flag_spec=
+      export_dynamic_flag_spec=
+      whole_archive_flag_spec=
+    fi
+  else
+    # PORTME fill in a description of your system's linker (not GNU ld)
+    case $host_os in
+    aix3*)
+      allow_undefined_flag=unsupported
+      always_export_symbols=yes
+      archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs 
$linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib 
$output_objdir/$soname'
+      # Note: this linker hardcodes the directories in LIBPATH if there
+      # are no directories specified by -L.
+      hardcode_minus_L=yes
+      if test "$GCC" = yes && test -z "$lt_prog_compiler_static"; then
+       # Neither direct hardcoding nor static linking is supported with a
+       # broken collect2.
+       hardcode_direct=unsupported
+      fi
+      ;;
+
+    aix[4-9]*)
+      if test "$host_cpu" = ia64; then
+       # On IA64, the linker does run time linking by default, so we don't
+       # have to do anything special.
+       aix_use_runtimelinking=no
+       exp_sym_flag='-Bexport'
+       no_entry_flag=""
+      else
+       # If we're using GNU nm, then we don't want the "-C" option.
+       # -C means demangle to AIX nm, but means don't demangle with GNU nm
+       # Also, AIX nm treats weak defined symbols like other global
+       # defined symbols, whereas GNU nm marks them as "W".
+       if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
+         export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && 
(substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       else
+         export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != 
".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+       fi
+       aix_use_runtimelinking=no
+
+       # Test if we are trying to use run time linking or normal
+       # AIX style linking. If -brtl is somewhere in LDFLAGS, we
+       # need to do runtime linking.
+       case $host_os in aix4.[23]|aix4.[23].*|aix[5-9]*)
+         for ld_flag in $LDFLAGS; do
+         if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl"); then
+           aix_use_runtimelinking=yes
+           break
+         fi
+         done
+         ;;
+       esac
+
+       exp_sym_flag='-bexport'
+       no_entry_flag='-bnoentry'
+      fi
+
+      # When large executables or shared objects are built, AIX ld can
+      # have problems creating the table of contents.  If linking a library
+      # or program results in "error TOC overflow" add -mminimal-toc to
+      # CXXFLAGS/CFLAGS for g++/gcc.  In the cases where that is not
+      # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS.
+
+      archive_cmds=''
+      hardcode_direct=yes
+      hardcode_direct_absolute=yes
+      hardcode_libdir_separator=':'
+      link_all_deplibs=yes
+      file_list_spec='${wl}-f,'
+
+      if test "$GCC" = yes; then
+       case $host_os in aix4.[012]|aix4.[012].*)
+       # We only want to do this on AIX 4.2 and lower, the check
+       # below for broken collect2 doesn't work under 4.3+
+         collect2name=`${CC} -print-prog-name=collect2`
+         if test -f "$collect2name" &&
+          strings "$collect2name" | $GREP resolve_lib_name >/dev/null
+         then
+         # We have reworked collect2
+         :
+         else
+         # We have old collect2
+         hardcode_direct=unsupported
+         # It fails to find uninstalled libraries when the uninstalled
+         # path is not listed in the libpath.  Setting hardcode_minus_L
+         # to unsupported forces relinking
+         hardcode_minus_L=yes
+         hardcode_libdir_flag_spec='-L$libdir'
+         hardcode_libdir_separator=
+         fi
+         ;;
+       esac
+       shared_flag='-shared'
+       if test "$aix_use_runtimelinking" = yes; then
+         shared_flag="$shared_flag "'${wl}-G'
+       fi
+       link_all_deplibs=no
+      else
+       # not using gcc
+       if test "$host_cpu" = ia64; then
+       # VisualAge C++, Version 5.5 for AIX 5L for IA-64, Beta 3 Release
+       # chokes on -Wl,-G. The following line is correct:
+         shared_flag='-G'
+       else
+         if test "$aix_use_runtimelinking" = yes; then
+           shared_flag='${wl}-G'
+         else
+           shared_flag='${wl}-bM:SRE'
+         fi
+       fi
+      fi
+
+      export_dynamic_flag_spec='${wl}-bexpall'
+      # It seems that -bexpall does not export symbols beginning with
+      # underscore (_), so it is better to generate a list of symbols to 
export.
+      always_export_symbols=yes
+      if test "$aix_use_runtimelinking" = yes; then
+       # Warning - without using the other runtime loading flags (-brtl),
+       # -berok will link without error, but may produce a broken library.
+       allow_undefined_flag='-berok'
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test ${lt_cv_aix_libpath_+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e 
"$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e 
"$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath_
+fi
+
+        hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+        archive_expsym_cmds='$CC -o $output_objdir/$soname $libobjs $deplibs 
'"\${wl}$no_entry_flag"' $compiler_flags `if test "x${allow_undefined_flag}" != 
"x"; then func_echo_all "${wl}${allow_undefined_flag}"; else :; fi` 
'"\${wl}$exp_sym_flag:\$export_symbols $shared_flag"
+      else
+       if test "$host_cpu" = ia64; then
+         hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib'
+         allow_undefined_flag="-z nodefs"
+         archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname 
$libobjs $deplibs '"\${wl}$no_entry_flag"' $compiler_flags 
${wl}${allow_undefined_flag} '"\${wl}$exp_sym_flag:\$export_symbols"
+       else
+        # Determine the default libpath from the value encoded in an
+        # empty executable.
+        if test "${lt_cv_aix_libpath+set}" = set; then
+  aix_libpath=$lt_cv_aix_libpath
+else
+  if test ${lt_cv_aix_libpath_+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+
+  lt_aix_libpath_sed='
+      /Import File Strings/,/^$/ {
+         /^0/ {
+             s/^0  *\([^ ]*\) *$/\1/
+             p
+         }
+      }'
+  lt_cv_aix_libpath_=`dump -H conftest$ac_exeext 2>/dev/null | $SED -n -e 
"$lt_aix_libpath_sed"`
+  # Check for a 64-bit object if we didn't find anything.
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_=`dump -HX64 conftest$ac_exeext 2>/dev/null | $SED -n -e 
"$lt_aix_libpath_sed"`
+  fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+  if test -z "$lt_cv_aix_libpath_"; then
+    lt_cv_aix_libpath_="/usr/lib:/lib"
+  fi
+
+fi
+
+  aix_libpath=$lt_cv_aix_libpath_
+fi
+
+        hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:'"$aix_libpath"
+         # Warning - without using the other run time loading flags,
+         # -berok will link without error, but may produce a broken library.
+         no_undefined_flag=' ${wl}-bernotok'
+         allow_undefined_flag=' ${wl}-berok'
+         if test "$with_gnu_ld" = yes; then
+           # We only use this code for GNU lds that support --whole-archive.
+           whole_archive_flag_spec='${wl}--whole-archive$convenience 
${wl}--no-whole-archive'
+         else
+           # Exported symbols can be pulled into shared objects from archives
+           whole_archive_flag_spec='$convenience'
+         fi
+         archive_cmds_need_lc=yes
+         # This is similar to how AIX traditionally builds its shared 
libraries.
+         archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname 
$libobjs $deplibs ${wl}-bnoentry $compiler_flags 
${wl}-bE:$export_symbols${allow_undefined_flag}~$AR $AR_FLAGS 
$output_objdir/$libname$release.a $output_objdir/$soname'
+       fi
+      fi
+      ;;
+
+    amigaos*)
+      case $host_cpu in
+      powerpc)
+            # see comment about AmigaOS4 .so support
+            archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags 
${wl}-soname $wl$soname -o $lib'
+            archive_expsym_cmds=''
+        ;;
+      m68k)
+            archive_cmds='$RM $output_objdir/a2ixlibrary.data~$ECHO "#define 
NAME $libname" > $output_objdir/a2ixlibrary.data~$ECHO "#define LIBRARY_ID 1" 
>> $output_objdir/a2ixlibrary.data~$ECHO "#define VERSION $major" >> 
$output_objdir/a2ixlibrary.data~$ECHO "#define REVISION $revision" >> 
$output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd 
$output_objdir && a2ixlibrary -32)'
+            hardcode_libdir_flag_spec='-L$libdir'
+            hardcode_minus_L=yes
+        ;;
+      esac
+      ;;
+
+    bsdi[45]*)
+      export_dynamic_flag_spec=-rdynamic
+      ;;
+
+    cygwin* | mingw* | pw32* | cegcc*)
+      # When not using gcc, we currently assume that we are using
+      # Microsoft Visual C++.
+      # hardcode_libdir_flag_spec is actually meaningless, as there is
+      # no search path for DLLs.
+      case $cc_basename in
+      cl*)
+       # Native MSVC
+       hardcode_libdir_flag_spec=' '
+       allow_undefined_flag=unsupported
+       always_export_symbols=yes
+       file_list_spec='@'
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       archive_cmds='$CC -o $output_objdir/$soname $libobjs $compiler_flags 
$deplibs -Wl,-dll~linknames='
+       archive_expsym_cmds='if test "x`$SED \"$sed_uncomment_deffile\" 
$export_symbols | $SED 1q`" = xEXPORTS; then
+           sed -n -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' -e 
'1\\\!p' < $export_symbols > $output_objdir/$soname.exp;
+         else
+           sed -e 's/\\\\\\\(.*\\\\\\\)/-link\\\ -EXPORT:\\\\\\\1/' < 
$export_symbols > $output_objdir/$soname.exp;
+         fi~
+         $CC -o $tool_output_objdir$soname $libobjs $compiler_flags $deplibs 
"@$tool_output_objdir$soname.exp" 
-Wl,-DLL,-IMPLIB:"$tool_output_objdir$libname.dll.lib"~
+         linknames='
+       # The linker will not automatically build a static lib if we build a 
DLL.
+       # _LT_TAGVAR(old_archive_from_new_cmds, )='true'
+       enable_shared_with_static_runtimes=yes
+       exclude_expsyms='_NULL_IMPORT_DESCRIPTOR|_IMPORT_DESCRIPTOR_.*'
+       export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | 
$SED -e '\''/^[BCDGRS][ ]/s/.*[ ]\([^ ]*\)/\1,DATA/'\'' | $SED -e '\''/^[AITW][ 
]/s/.*[ ]//'\'' | sort | uniq > $export_symbols'
+       # Don't use ranlib
+       old_postinstall_cmds='chmod 644 $oldlib'
+       postlink_cmds='lt_outputfile="@OUTPUT@"~
+         lt_tool_outputfile="@TOOL_OUTPUT@"~
+         case $lt_outputfile in
+           *.exe|*.EXE) ;;
+           *)
+             lt_outputfile="$lt_outputfile.exe"
+             lt_tool_outputfile="$lt_tool_outputfile.exe"
+             ;;
+         esac~
+         if test "$MANIFEST_TOOL" != ":" && test -f "$lt_outputfile.manifest"; 
then
+           $MANIFEST_TOOL -manifest "$lt_tool_outputfile.manifest" 
-outputresource:"$lt_tool_outputfile" || exit 1;
+           $RM "$lt_outputfile.manifest";
+         fi'
+       ;;
+      *)
+       # Assume MSVC wrapper
+       hardcode_libdir_flag_spec=' '
+       allow_undefined_flag=unsupported
+       # Tell ltmain to make .lib files, not .a files.
+       libext=lib
+       # Tell ltmain to make .dll files, not .so files.
+       shrext_cmds=".dll"
+       # FIXME: Setting linknames here is a bad hack.
+       archive_cmds='$CC -o $lib $libobjs $compiler_flags `func_echo_all 
"$deplibs" | $SED '\''s/ -lc$//'\''` -link -dll~linknames='
+       # The linker will automatically build a .lib file if we build a DLL.
+       old_archive_from_new_cmds='true'
+       # FIXME: Should let the user specify the lib program.
+       old_archive_cmds='lib -OUT:$oldlib$oldobjs$old_deplibs'
+       enable_shared_with_static_runtimes=yes
+       ;;
+      esac
+      ;;
+
+    darwin* | rhapsody*)
+
+
+  archive_cmds_need_lc=no
+  hardcode_direct=no
+  hardcode_automatic=yes
+  hardcode_shlibpath_var=unsupported
+  if test "$lt_cv_ld_force_load" = "yes"; then
+    whole_archive_flag_spec='`for conv in $convenience\"\"; do test  -n 
\"$conv\" && new_convenience=\"$new_convenience ${wl}-force_load,$conv\"; done; 
func_echo_all \"$new_convenience\"`'
+
+  else
+    whole_archive_flag_spec=''
+  fi
+  link_all_deplibs=yes
+  allow_undefined_flag="$_lt_dar_allow_undefined"
+  case $cc_basename in
+     ifort*) _lt_dar_can_shared=yes ;;
+     *) _lt_dar_can_shared=$GCC ;;
+  esac
+  if test "$_lt_dar_can_shared" = "yes"; then
+    output_verbose_link_cmd=func_echo_all
+    archive_cmds="\$CC -dynamiclib \$allow_undefined_flag -o \$lib \$libobjs 
\$deplibs \$compiler_flags -install_name \$rpath/\$soname \$verstring 
$_lt_dar_single_mod${_lt_dsymutil}"
+    module_cmds="\$CC \$allow_undefined_flag -o \$lib -bundle \$libobjs 
\$deplibs \$compiler_flags${_lt_dsymutil}"
+    archive_expsym_cmds="sed 's,^,_,' < \$export_symbols > 
\$output_objdir/\${libname}-symbols.expsym~\$CC -dynamiclib 
\$allow_undefined_flag -o \$lib \$libobjs \$deplibs \$compiler_flags 
-install_name \$rpath/\$soname \$verstring 
${_lt_dar_single_mod}${_lt_dar_export_syms}${_lt_dsymutil}"
+    module_expsym_cmds="sed -e 's,^,_,' < \$export_symbols > 
\$output_objdir/\${libname}-symbols.expsym~\$CC \$allow_undefined_flag -o \$lib 
-bundle \$libobjs \$deplibs 
\$compiler_flags${_lt_dar_export_syms}${_lt_dsymutil}"
+
+  else
+  ld_shlibs=no
+  fi
+
+      ;;
+
+    dgux*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_shlibpath_var=no
+      ;;
+
+    # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor
+    # support.  Future versions do this automatically, but an explicit c++rt0.o
+    # does not break anything, and helps significantly (at the cost of a little
+    # extra space).
+    freebsd2.2*)
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags 
/usr/lib/c++rt0.o'
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    # Unfortunately, older versions of FreeBSD 2 do not have this feature.
+    freebsd2.*)
+      archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    # FreeBSD 3 and greater uses gcc -shared to do shared libraries.
+    freebsd* | dragonfly*)
+      archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs 
$compiler_flags'
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    hpux9*)
+      if test "$GCC" = yes; then
+       archive_cmds='$RM $output_objdir/$soname~$CC -shared $pic_flag ${wl}+b 
${wl}$install_libdir -o $output_objdir/$soname $libobjs $deplibs 
$compiler_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname 
$lib'
+      else
+       archive_cmds='$RM $output_objdir/$soname~$LD -b +b $install_libdir -o 
$output_objdir/$soname $libobjs $deplibs $linker_flags~test 
$output_objdir/$soname = $lib || mv $output_objdir/$soname $lib'
+      fi
+      hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_direct=yes
+
+      # hardcode_minus_L: Not really in the search PATH,
+      # but as the default location of the library.
+      hardcode_minus_L=yes
+      export_dynamic_flag_spec='${wl}-E'
+      ;;
+
+    hpux10*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b 
${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs 
$deplibs $linker_flags'
+      fi
+      if test "$with_gnu_ld" = no; then
+       hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+       hardcode_libdir_separator=:
+       hardcode_direct=yes
+       hardcode_direct_absolute=yes
+       export_dynamic_flag_spec='${wl}-E'
+       # hardcode_minus_L: Not really in the search PATH,
+       # but as the default location of the library.
+       hardcode_minus_L=yes
+      fi
+      ;;
+
+    hpux11*)
+      if test "$GCC" = yes && test "$with_gnu_ld" = no; then
+       case $host_cpu in
+       hppa*64*)
+         archive_cmds='$CC -shared ${wl}+h ${wl}$soname -o $lib $libobjs 
$deplibs $compiler_flags'
+         ;;
+       ia64*)
+         archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname 
${wl}+nodefaultrpath -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+         archive_cmds='$CC -shared $pic_flag ${wl}+h ${wl}$soname ${wl}+b 
${wl}$install_libdir -o $lib $libobjs $deplibs $compiler_flags'
+         ;;
+       esac
+      else
+       case $host_cpu in
+       hppa*64*)
+         archive_cmds='$CC -b ${wl}+h ${wl}$soname -o $lib $libobjs $deplibs 
$compiler_flags'
+         ;;
+       ia64*)
+         archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+nodefaultrpath -o 
$lib $libobjs $deplibs $compiler_flags'
+         ;;
+       *)
+
+         # Older versions of the 11.00 compiler do not understand -b yet
+         # (HP92453-01 A.11.01.20 doesn't, HP92453-01 B.11.X.35175-35176.GP 
does)
+         { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if $CC 
understands -b" >&5
+printf %s "checking if $CC understands -b... " >&6; }
+if test ${lt_cv_prog_compiler__b+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_prog_compiler__b=no
+   save_LDFLAGS="$LDFLAGS"
+   LDFLAGS="$LDFLAGS -b"
+   echo "$lt_simple_link_test_code" > conftest.$ac_ext
+   if (eval $ac_link 2>conftest.err) && test -s conftest$ac_exeext; then
+     # The linker can only warn and ignore the option if not recognized
+     # So say no if there are warnings
+     if test -s conftest.err; then
+       # Append any errors to the config.log.
+       cat conftest.err 1>&5
+       $ECHO "$_lt_linker_boilerplate" | $SED '/^$/d' > conftest.exp
+       $SED '/^$/d; /^ *+/d' conftest.err >conftest.er2
+       if diff conftest.exp conftest.er2 >/dev/null; then
+         lt_cv_prog_compiler__b=yes
+       fi
+     else
+       lt_cv_prog_compiler__b=yes
+     fi
+   fi
+   $RM -r conftest*
+   LDFLAGS="$save_LDFLAGS"
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_prog_compiler__b" 
>&5
+printf "%s\n" "$lt_cv_prog_compiler__b" >&6; }
+
+if test x"$lt_cv_prog_compiler__b" = xyes; then
+    archive_cmds='$CC -b ${wl}+h ${wl}$soname ${wl}+b ${wl}$install_libdir -o 
$lib $libobjs $deplibs $compiler_flags'
+else
+    archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs 
$deplibs $linker_flags'
+fi
+
+         ;;
+       esac
+      fi
+      if test "$with_gnu_ld" = no; then
+       hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir'
+       hardcode_libdir_separator=:
+
+       case $host_cpu in
+       hppa*64*|ia64*)
+         hardcode_direct=no
+         hardcode_shlibpath_var=no
+         ;;
+       *)
+         hardcode_direct=yes
+         hardcode_direct_absolute=yes
+         export_dynamic_flag_spec='${wl}-E'
+
+         # hardcode_minus_L: Not really in the search PATH,
+         # but as the default location of the library.
+         hardcode_minus_L=yes
+         ;;
+       esac
+      fi
+      ;;
+
+    irix5* | irix6* | nonstopux*)
+      if test "$GCC" = yes; then
+       archive_cmds='$CC -shared $pic_flag $libobjs $deplibs $compiler_flags 
${wl}-soname ${wl}$soname `test -n "$verstring" && func_echo_all 
"${wl}-set_version ${wl}$verstring"` ${wl}-update_registry 
${wl}${output_objdir}/so_locations -o $lib'
+       # Try to use the -exported_symbol ld option, if it does not
+       # work, assume that -exports_file does not work either and
+       # implicitly export all symbols.
+       # This should be the same for all languages, so no per-tag cache 
variable.
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the 
$host_os linker accepts -exported_symbol" >&5
+printf %s "checking whether the $host_os linker accepts -exported_symbol... " 
>&6; }
+if test ${lt_cv_irix_exported_symbol+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  save_LDFLAGS="$LDFLAGS"
+          LDFLAGS="$LDFLAGS -shared ${wl}-exported_symbol ${wl}foo 
${wl}-update_registry ${wl}/dev/null"
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int foo (void) { return 0; }
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  lt_cv_irix_exported_symbol=yes
+else $as_nop
+  lt_cv_irix_exported_symbol=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+           LDFLAGS="$save_LDFLAGS"
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_irix_exported_symbol" >&5
+printf "%s\n" "$lt_cv_irix_exported_symbol" >&6; }
+       if test "$lt_cv_irix_exported_symbol" = yes; then
+          archive_expsym_cmds='$CC -shared $pic_flag $libobjs $deplibs 
$compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && 
func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry 
${wl}${output_objdir}/so_locations ${wl}-exports_file ${wl}$export_symbols -o 
$lib'
+       fi
+      else
+       archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags -soname 
$soname `test -n "$verstring" && func_echo_all "-set_version $verstring"` 
-update_registry ${output_objdir}/so_locations -o $lib'
+       archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags 
-soname $soname `test -n "$verstring" && func_echo_all "-set_version 
$verstring"` -update_registry ${output_objdir}/so_locations -exports_file 
$export_symbols -o $lib'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      inherit_rpath=yes
+      link_all_deplibs=yes
+      ;;
+
+    netbsd* | netbsdelf*-gnu)
+      if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+       archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags'  
# a.out
+      else
+       archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags'      
# ELF
+      fi
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_direct=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    newsos6)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_direct=yes
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      hardcode_shlibpath_var=no
+      ;;
+
+    *nto* | *qnx*)
+      ;;
+
+    openbsd*)
+      if test -f /usr/libexec/ld.so; then
+       hardcode_direct=yes
+       hardcode_shlibpath_var=no
+       hardcode_direct_absolute=yes
+       if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test 
"$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+         archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs 
$compiler_flags'
+         archive_expsym_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs 
$compiler_flags ${wl}-retain-symbols-file,$export_symbols'
+         hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+         export_dynamic_flag_spec='${wl}-E'
+       else
+         case $host_os in
+          openbsd[01].* | openbsd2.[0-7] | openbsd2.[0-7].*)
+            archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs 
$linker_flags'
+            hardcode_libdir_flag_spec='-R$libdir'
+            ;;
+          *)
+            archive_cmds='$CC -shared $pic_flag -o $lib $libobjs $deplibs 
$compiler_flags'
+            hardcode_libdir_flag_spec='${wl}-rpath,$libdir'
+            ;;
+         esac
+       fi
+      else
+       ld_shlibs=no
+      fi
+      ;;
+
+    os2*)
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_minus_L=yes
+      allow_undefined_flag=unsupported
+      archive_cmds='$ECHO "LIBRARY $libname INITINSTANCE" > 
$output_objdir/$libname.def~$ECHO "DESCRIPTION \"$libname\"" >> 
$output_objdir/$libname.def~echo DATA >> $output_objdir/$libname.def~echo " 
SINGLE NONSHARED" >> $output_objdir/$libname.def~echo EXPORTS >> 
$output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC 
-Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags 
$output_objdir/$libname.def'
+      old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a 
$output_objdir/$libname.def'
+      ;;
+
+    osf3*)
+      if test "$GCC" = yes; then
+       allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs 
$compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && 
func_echo_all "${wl}-set_version ${wl}$verstring"` ${wl}-update_registry 
${wl}${output_objdir}/so_locations -o $lib'
+      else
+       allow_undefined_flag=' -expect_unresolved \*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs 
$compiler_flags -soname $soname `test -n "$verstring" && func_echo_all 
"-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o 
$lib'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      hardcode_libdir_separator=:
+      ;;
+
+    osf4* | osf5*)     # as osf3* with the addition of -msym flag
+      if test "$GCC" = yes; then
+       allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $pic_flag $libobjs 
$deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n 
"$verstring" && func_echo_all "${wl}-set_version ${wl}$verstring"` 
${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib'
+       hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir'
+      else
+       allow_undefined_flag=' -expect_unresolved \*'
+       archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs 
$compiler_flags -msym -soname $soname `test -n "$verstring" && func_echo_all 
"-set_version $verstring"` -update_registry ${output_objdir}/so_locations -o 
$lib'
+       archive_expsym_cmds='for i in `cat $export_symbols`; do printf "%s 
%s\\n" -exported_symbol "\$i" >> $lib.exp; done; printf "%s\\n" "-hidden">> 
$lib.exp~
+       $CC -shared${allow_undefined_flag} ${wl}-input ${wl}$lib.exp 
$compiler_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && 
$ECHO "-set_version $verstring"` -update_registry ${output_objdir}/so_locations 
-o $lib~$RM $lib.exp'
+
+       # Both c and cxx compiler support -rpath directly
+       hardcode_libdir_flag_spec='-rpath $libdir'
+      fi
+      archive_cmds_need_lc='no'
+      hardcode_libdir_separator=:
+      ;;
+
+    solaris*)
+      no_undefined_flag=' -z defs'
+      if test "$GCC" = yes; then
+       wlarc='${wl}'
+       archive_cmds='$CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-h 
${wl}$soname -o $lib $libobjs $deplibs $compiler_flags'
+       archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols | 
$SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -shared $pic_flag ${wl}-z ${wl}text ${wl}-M ${wl}$lib.exp ${wl}-h 
${wl}$soname -o $lib $libobjs $deplibs $compiler_flags~$RM $lib.exp'
+      else
+       case `$CC -V 2>&1` in
+       *"Compilers 5.0"*)
+         wlarc=''
+         archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib 
$libobjs $deplibs $linker_flags'
+         archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols 
| $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs 
$deplibs $linker_flags~$RM $lib.exp'
+         ;;
+       *)
+         wlarc='${wl}'
+         archive_cmds='$CC -G${allow_undefined_flag} -h $soname -o $lib 
$libobjs $deplibs $compiler_flags'
+         archive_expsym_cmds='echo "{ global:" > $lib.exp~cat $export_symbols 
| $SED -e "s/\(.*\)/\1;/" >> $lib.exp~echo "local: *; };" >> $lib.exp~
+         $CC -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs 
$deplibs $compiler_flags~$RM $lib.exp'
+         ;;
+       esac
+      fi
+      hardcode_libdir_flag_spec='-R$libdir'
+      hardcode_shlibpath_var=no
+      case $host_os in
+      solaris2.[0-5] | solaris2.[0-5].*) ;;
+      *)
+       # The compiler driver will combine and reorder linker options,
+       # but understands `-z linker_flag'.  GCC discards it without `$wl',
+       # but is careful enough not to reorder.
+       # Supported since Solaris 2.6 (maybe 2.5.1?)
+       if test "$GCC" = yes; then
+         whole_archive_flag_spec='${wl}-z ${wl}allextract$convenience ${wl}-z 
${wl}defaultextract'
+       else
+         whole_archive_flag_spec='-z allextract$convenience -z defaultextract'
+       fi
+       ;;
+      esac
+      link_all_deplibs=yes
+      ;;
+
+    sunos4*)
+      if test "x$host_vendor" = xsequent; then
+       # Use $CC to link under sequent, because it throws in some extra .o
+       # files that make .init and .fini sections work.
+       archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs 
$compiler_flags'
+      else
+       archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs 
$linker_flags'
+      fi
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_direct=yes
+      hardcode_minus_L=yes
+      hardcode_shlibpath_var=no
+      ;;
+
+    sysv4)
+      case $host_vendor in
+       sni)
+         archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs 
$linker_flags'
+         hardcode_direct=yes # is this really true???
+       ;;
+       siemens)
+         ## LD is ld it makes a PLAMLIB
+         ## CC just makes a GrossModule.
+         archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags'
+         reload_cmds='$CC -r -o $output$reload_objs'
+         hardcode_direct=no
+        ;;
+       motorola)
+         archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs 
$linker_flags'
+         hardcode_direct=no #Motorola manual says yes, but my tests say they 
lie
+       ;;
+      esac
+      runpath_var='LD_RUN_PATH'
+      hardcode_shlibpath_var=no
+      ;;
+
+    sysv4.3*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_shlibpath_var=no
+      export_dynamic_flag_spec='-Bexport'
+      ;;
+
+    sysv4*MP*)
+      if test -d /usr/nec; then
+       archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+       hardcode_shlibpath_var=no
+       runpath_var=LD_RUN_PATH
+       hardcode_runpath_var=yes
+       ld_shlibs=yes
+      fi
+      ;;
+
+    sysv4*uw2* | sysv5OpenUNIX* | sysv5UnixWare7.[01].[10]* | unixware7* | 
sco3.2v5.0.[024]*)
+      no_undefined_flag='${wl}-z,text'
+      archive_cmds_need_lc=no
+      hardcode_shlibpath_var=no
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs 
$compiler_flags'
+       archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols 
${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs 
$compiler_flags'
+       archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols 
${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    sysv5* | sco3.2v5* | sco5v6*)
+      # Note: We can NOT use -z defs as we might desire, because we do not
+      # link with -lc, and that would cause any symbols used from libc to
+      # always be unresolved, which means just about no library would
+      # ever link correctly.  If we're not using GNU ld we use -z text
+      # though, which does catch some bad symbols but isn't as heavy-handed
+      # as -z defs.
+      no_undefined_flag='${wl}-z,text'
+      allow_undefined_flag='${wl}-z,nodefs'
+      archive_cmds_need_lc=no
+      hardcode_shlibpath_var=no
+      hardcode_libdir_flag_spec='${wl}-R,$libdir'
+      hardcode_libdir_separator=':'
+      link_all_deplibs=yes
+      export_dynamic_flag_spec='${wl}-Bexport'
+      runpath_var='LD_RUN_PATH'
+
+      if test "$GCC" = yes; then
+       archive_cmds='$CC -shared ${wl}-h,$soname -o $lib $libobjs $deplibs 
$compiler_flags'
+       archive_expsym_cmds='$CC -shared ${wl}-Bexport:$export_symbols 
${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      else
+       archive_cmds='$CC -G ${wl}-h,$soname -o $lib $libobjs $deplibs 
$compiler_flags'
+       archive_expsym_cmds='$CC -G ${wl}-Bexport:$export_symbols 
${wl}-h,$soname -o $lib $libobjs $deplibs $compiler_flags'
+      fi
+      ;;
+
+    uts4*)
+      archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags'
+      hardcode_libdir_flag_spec='-L$libdir'
+      hardcode_shlibpath_var=no
+      ;;
+
+    *)
+      ld_shlibs=no
+      ;;
+    esac
+
+    if test x$host_vendor = xsni; then
+      case $host in
+      sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*)
+       export_dynamic_flag_spec='${wl}-Blargedynsym'
+       ;;
+      esac
+    fi
+  fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ld_shlibs" >&5
+printf "%s\n" "$ld_shlibs" >&6; }
+test "$ld_shlibs" = no && can_build_shared=no
+
+with_gnu_ld=$with_gnu_ld
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#
+# Do we need to explicitly link libc?
+#
+case "x$archive_cmds_need_lc" in
+x|xyes)
+  # Assume -lc should be added
+  archive_cmds_need_lc=yes
+
+  if test "$enable_shared" = yes && test "$GCC" = yes; then
+    case $archive_cmds in
+    *'~'*)
+      # FIXME: we may have to deal with multi-command sequences.
+      ;;
+    '$CC '*)
+      # Test whether the compiler implicitly links with -lc since on some
+      # systems, -lgcc has to come before -lc. If gcc already passes -lc
+      # to ld, don't add -lc before -lgcc.
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether -lc 
should be explicitly linked in" >&5
+printf %s "checking whether -lc should be explicitly linked in... " >&6; }
+if test ${lt_cv_archive_cmds_need_lc+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  $RM conftest*
+       echo "$lt_simple_compile_test_code" > conftest.$ac_ext
+
+       if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } 
>&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } 2>conftest.err; then
+         soname=conftest
+         lib=conftest
+         libobjs=conftest.$ac_objext
+         deplibs=
+         wl=$lt_prog_compiler_wl
+         pic_flag=$lt_prog_compiler_pic
+         compiler_flags=-v
+         linker_flags=-v
+         verstring=
+         output_objdir=.
+         libname=conftest
+         lt_save_allow_undefined_flag=$allow_undefined_flag
+         allow_undefined_flag=
+         if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$archive_cmds 
2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1\""; } >&5
+  (eval $archive_cmds 2\>\&1 \| $GREP \" -lc \" \>/dev/null 2\>\&1) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }
+         then
+           lt_cv_archive_cmds_need_lc=no
+         else
+           lt_cv_archive_cmds_need_lc=yes
+         fi
+         allow_undefined_flag=$lt_save_allow_undefined_flag
+       else
+         cat conftest.err 1>&5
+       fi
+       $RM conftest*
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_archive_cmds_need_lc" >&5
+printf "%s\n" "$lt_cv_archive_cmds_need_lc" >&6; }
+      archive_cmds_need_lc=$lt_cv_archive_cmds_need_lc
+      ;;
+    esac
+  fi
+  ;;
+esac
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking dynamic linker 
characteristics" >&5
+printf %s "checking dynamic linker characteristics... " >&6; }
+
+if test "$GCC" = yes; then
+  case $host_os in
+    darwin*) lt_awk_arg="/^libraries:/,/LR/" ;;
+    *) lt_awk_arg="/^libraries:/" ;;
+  esac
+  case $host_os in
+    mingw* | cegcc*) lt_sed_strip_eq="s,=\([A-Za-z]:\),\1,g" ;;
+    *) lt_sed_strip_eq="s,=/,/,g" ;;
+  esac
+  lt_search_path_spec=`$CC -print-search-dirs | awk $lt_awk_arg | $SED -e 
"s/^libraries://" -e $lt_sed_strip_eq`
+  case $lt_search_path_spec in
+  *\;*)
+    # if the path contains ";" then we assume it to be the separator
+    # otherwise default to the standard path separator (i.e. ":") - it is
+    # assumed that no part of a normal pathname contains ";" but that should
+    # okay in the real world where ";" in dirpaths is itself problematic.
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 's/;/ /g'`
+    ;;
+  *)
+    lt_search_path_spec=`$ECHO "$lt_search_path_spec" | $SED 
"s/$PATH_SEPARATOR/ /g"`
+    ;;
+  esac
+  # Ok, now we have the path, separated by spaces, we can step through it
+  # and add multilib dir if necessary.
+  lt_tmp_lt_search_path_spec=
+  lt_multi_os_dir=`$CC $CPPFLAGS $CFLAGS $LDFLAGS -print-multi-os-directory 
2>/dev/null`
+  for lt_sys_path in $lt_search_path_spec; do
+    if test -d "$lt_sys_path/$lt_multi_os_dir"; then
+      lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec 
$lt_sys_path/$lt_multi_os_dir"
+    else
+      test -d "$lt_sys_path" && \
+       lt_tmp_lt_search_path_spec="$lt_tmp_lt_search_path_spec $lt_sys_path"
+    fi
+  done
+  lt_search_path_spec=`$ECHO "$lt_tmp_lt_search_path_spec" | awk '
+BEGIN {RS=" "; FS="/|\n";} {
+  lt_foo="";
+  lt_count=0;
+  for (lt_i = NF; lt_i > 0; lt_i--) {
+    if ($lt_i != "" && $lt_i != ".") {
+      if ($lt_i == "..") {
+        lt_count++;
+      } else {
+        if (lt_count == 0) {
+          lt_foo="/" $lt_i lt_foo;
+        } else {
+          lt_count--;
+        }
+      }
+    }
+  }
+  if (lt_foo != "") { lt_freq[lt_foo]++; }
+  if (lt_freq[lt_foo] == 1) { print lt_foo; }
+}'`
+  # AWK program above erroneously prepends '/' to C:/dos/paths
+  # for these hosts.
+  case $host_os in
+    mingw* | cegcc*) lt_search_path_spec=`$ECHO "$lt_search_path_spec" |\
+      $SED 's,/\([A-Za-z]:\),\1,g'` ;;
+  esac
+  sys_lib_search_path_spec=`$ECHO "$lt_search_path_spec" | $lt_NL2SP`
+else
+  sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib"
+fi
+library_names_spec=
+libname_spec='lib$name'
+soname_spec=
+shrext_cmds=".so"
+postinstall_cmds=
+postuninstall_cmds=
+finish_cmds=
+finish_eval=
+shlibpath_var=
+shlibpath_overrides_runpath=unknown
+version_type=none
+dynamic_linker="$host_os ld.so"
+sys_lib_dlsearch_path_spec="/lib /usr/lib"
+need_lib_prefix=unknown
+hardcode_into_libs=no
+
+# when you set need_version to no, make sure it does not cause -set_version
+# flags to be left without arguments
+need_version=unknown
+
+case $host_os in
+aix3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix $libname.a'
+  shlibpath_var=LIBPATH
+
+  # AIX 3 has no versioning support, so we append a major version to the name.
+  soname_spec='${libname}${release}${shared_ext}$major'
+  ;;
+
+aix[4-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  hardcode_into_libs=yes
+  if test "$host_cpu" = ia64; then
+    # AIX 5 supports IA64
+    library_names_spec='${libname}${release}${shared_ext}$major 
${libname}${release}${shared_ext}$versuffix $libname${shared_ext}'
+    shlibpath_var=LD_LIBRARY_PATH
+  else
+    # With GCC up to 2.95.x, collect2 would create an import file
+    # for dependence libraries.  The import file would start with
+    # the line `#! .'.  This would cause the generated library to
+    # depend on `.', always an invalid library.  This was fixed in
+    # development snapshots of GCC prior to 3.0.
+    case $host_os in
+      aix4 | aix4.[01] | aix4.[01].*)
+      if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)'
+          echo ' yes '
+          echo '#endif'; } | ${CC} -E - | $GREP yes > /dev/null; then
+       :
+      else
+       can_build_shared=no
+      fi
+      ;;
+    esac
+    # AIX (on Power*) has no versioning support, so currently we can not 
hardcode correct
+    # soname into executable. Probably we can add versioning support to
+    # collect2, so additional links can be useful in future.
+    if test "$aix_use_runtimelinking" = yes; then
+      # If using run time linking (on AIX 4.2 or later) use lib<name>.so
+      # instead of lib<name>.a to let people know that these are not
+      # typical AIX shared libraries.
+      library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    else
+      # We preserve .a as extension for shared libraries through AIX4.2
+      # and later when we are not doing run time linking.
+      library_names_spec='${libname}${release}.a $libname.a'
+      soname_spec='${libname}${release}${shared_ext}$major'
+    fi
+    shlibpath_var=LIBPATH
+  fi
+  ;;
+
+amigaos*)
+  case $host_cpu in
+  powerpc)
+    # Since July 2007 AmigaOS4 officially supports .so libraries.
+    # When compiling the executable, add -use-dynld -Lsobjs: to the 
compileline.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    ;;
+  m68k)
+    library_names_spec='$libname.ixlibrary $libname.a'
+    # Create ${libname}_ixlibrary.a entries in /sys/libs.
+    finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do 
libname=`func_echo_all "$lib" | $SED '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\''`; 
test $RM /sys/libs/${libname}_ixlibrary.a; $show "cd /sys/libs && $LN_S $lib 
${libname}_ixlibrary.a"; cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a || 
exit 1; done'
+    ;;
+  esac
+  ;;
+
+beos*)
+  library_names_spec='${libname}${shared_ext}'
+  dynamic_linker="$host_os ld.so"
+  shlibpath_var=LIBRARY_PATH
+  ;;
+
+bsdi[45]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib 
/usr/local/lib"
+  sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib"
+  # the default ld.so.conf also contains /usr/contrib/lib and
+  # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow
+  # libtool to hard-code these into programs
+  ;;
+
+cygwin* | mingw* | pw32* | cegcc*)
+  version_type=windows
+  shrext_cmds=".dll"
+  need_version=no
+  need_lib_prefix=no
+
+  case $GCC,$cc_basename in
+  yes,*)
+    # gcc
+    library_names_spec='$libname.dll.a'
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo 
\$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname~
+      chmod a+x \$dldir/$dlname~
+      if test -n '\''$stripme'\'' && test -n '\''$striplib'\''; then
+        eval '\''$striplib \$dldir/$dlname'\'' || exit \$?;
+      fi'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+
+    case $host_os in
+    cygwin*)
+      # Cygwin DLLs use 'cyg' prefix rather than 'lib'
+      soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | 
$SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+
+      sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/lib/w32api"
+      ;;
+    mingw* | cegcc*)
+      # MinGW DLLs use traditional 'lib' prefix
+      soname_spec='${libname}`echo ${release} | $SED -e 
's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    pw32*)
+      # pw32 DLLs use 'pw' prefix rather than 'lib'
+      library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo 
${release} | $SED -e 's/[.]/-/g'`${versuffix}${shared_ext}'
+      ;;
+    esac
+    dynamic_linker='Win32 ld.exe'
+    ;;
+
+  *,cl*)
+    # Native MSVC
+    libname_spec='$name'
+    soname_spec='${libname}`echo ${release} | $SED -e 
's/[.]/-/g'`${versuffix}${shared_ext}'
+    library_names_spec='${libname}.dll.lib'
+
+    case $build_os in
+    mingw*)
+      sys_lib_search_path_spec=
+      lt_save_ifs=$IFS
+      IFS=';'
+      for lt_path in $LIB
+      do
+        IFS=$lt_save_ifs
+        # Let DOS variable expansion print the short 8.3 style file name.
+        lt_path=`cd "$lt_path" 2>/dev/null && cmd //C "for %i in (".") do 
@echo %~si"`
+        sys_lib_search_path_spec="$sys_lib_search_path_spec $lt_path"
+      done
+      IFS=$lt_save_ifs
+      # Convert to MSYS style.
+      sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | sed -e 
's|\\\\|/|g' -e 's| \\([a-zA-Z]\\):| /\\1|g' -e 's|^ ||'`
+      ;;
+    cygwin*)
+      # Convert to unix form, then to dos form, then back to unix form
+      # but this time dos style (no spaces!) so that the unix form looks
+      # like /cygdrive/c/PROGRA~1:/cygdr...
+      sys_lib_search_path_spec=`cygpath --path --unix "$LIB"`
+      sys_lib_search_path_spec=`cygpath --path --dos 
"$sys_lib_search_path_spec" 2>/dev/null`
+      sys_lib_search_path_spec=`cygpath --path --unix 
"$sys_lib_search_path_spec" | $SED -e "s/$PATH_SEPARATOR/ /g"`
+      ;;
+    *)
+      sys_lib_search_path_spec="$LIB"
+      if $ECHO "$sys_lib_search_path_spec" | $GREP ';[c-zC-Z]:/' >/dev/null; 
then
+        # It is most probably a Windows format PATH.
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 
's/;/ /g'`
+      else
+        sys_lib_search_path_spec=`$ECHO "$sys_lib_search_path_spec" | $SED -e 
"s/$PATH_SEPARATOR/ /g"`
+      fi
+      # FIXME: find the short name or the path components, as spaces are
+      # common. (e.g. "Program Files" -> "PROGRA~1")
+      ;;
+    esac
+
+    # DLL is installed to $(libdir)/../bin by postinstall_cmds
+    postinstall_cmds='base_file=`basename \${file}`~
+      dlpath=`$SHELL 2>&1 -c '\''. $dir/'\''\${base_file}'\''i; echo 
\$dlname'\''`~
+      dldir=$destdir/`dirname \$dlpath`~
+      test -d \$dldir || mkdir -p \$dldir~
+      $install_prog $dir/$dlname \$dldir/$dlname'
+    postuninstall_cmds='dldll=`$SHELL 2>&1 -c '\''. $file; echo \$dlname'\''`~
+      dlpath=$dir/\$dldll~
+       $RM \$dlpath'
+    shlibpath_overrides_runpath=yes
+    dynamic_linker='Win32 link.exe'
+    ;;
+
+  *)
+    # Assume MSVC wrapper
+    library_names_spec='${libname}`echo ${release} | $SED -e 
's/[.]/-/g'`${versuffix}${shared_ext} $libname.lib'
+    dynamic_linker='Win32 ld.exe'
+    ;;
+  esac
+  # FIXME: first we should search . and the directory the executable is in
+  shlibpath_var=PATH
+  ;;
+
+darwin* | rhapsody*)
+  dynamic_linker="$host_os dyld"
+  version_type=darwin
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${major}$shared_ext 
${libname}$shared_ext'
+  soname_spec='${libname}${release}${major}$shared_ext'
+  shlibpath_overrides_runpath=yes
+  shlibpath_var=DYLD_LIBRARY_PATH
+  shrext_cmds='`test .$module = .yes && echo .so || echo .dylib`'
+
+  sys_lib_search_path_spec="$sys_lib_search_path_spec /usr/local/lib"
+  sys_lib_dlsearch_path_spec='/usr/local/lib /lib /usr/lib'
+  ;;
+
+dgux*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname$shared_ext'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+freebsd* | dragonfly*)
+  # DragonFly does not have aout.  When/if they implement a new
+  # versioning mechanism, adjust this.
+  if test -x /usr/bin/objformat; then
+    objformat=`/usr/bin/objformat`
+  else
+    case $host_os in
+    freebsd[23].*) objformat=aout ;;
+    *) objformat=elf ;;
+    esac
+  fi
+  version_type=freebsd-$objformat
+  case $version_type in
+    freebsd-elf*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext} $libname${shared_ext}'
+      need_version=no
+      need_lib_prefix=no
+      ;;
+    freebsd-*)
+      library_names_spec='${libname}${release}${shared_ext}$versuffix 
$libname${shared_ext}$versuffix'
+      need_version=yes
+      ;;
+  esac
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_os in
+  freebsd2.*)
+    shlibpath_overrides_runpath=yes
+    ;;
+  freebsd3.[01]* | freebsdelf3.[01]*)
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  freebsd3.[2-9]* | freebsdelf3.[2-9]* | \
+  freebsd4.[0-5] | freebsdelf4.[0-5] | freebsd4.1.1 | freebsdelf4.1.1)
+    shlibpath_overrides_runpath=no
+    hardcode_into_libs=yes
+    ;;
+  *) # from 4.6 on, and DragonFly
+    shlibpath_overrides_runpath=yes
+    hardcode_into_libs=yes
+    ;;
+  esac
+  ;;
+
+gnu*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+haiku*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  dynamic_linker="$host_os runtime_loader"
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}${major} ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  sys_lib_dlsearch_path_spec='/boot/home/config/lib /boot/common/lib 
/boot/system/lib'
+  hardcode_into_libs=yes
+  ;;
+
+hpux9* | hpux10* | hpux11*)
+  # Give a soname corresponding to the major version so that dld.sl refuses to
+  # link against other versions.
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  case $host_cpu in
+  ia64*)
+    shrext_cmds='.so'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.so"
+    shlibpath_var=LD_LIBRARY_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    if test "X$HPUX_IA64_MODE" = X32; then
+      sys_lib_search_path_spec="/usr/lib/hpux32 /usr/local/lib/hpux32 
/usr/local/lib"
+    else
+      sys_lib_search_path_spec="/usr/lib/hpux64 /usr/local/lib/hpux64"
+    fi
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  hppa*64*)
+    shrext_cmds='.sl'
+    hardcode_into_libs=yes
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=LD_LIBRARY_PATH # How should we handle SHLIB_PATH
+    shlibpath_overrides_runpath=yes # Unless +noenvvar is specified.
+    library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    sys_lib_search_path_spec="/usr/lib/pa20_64 /usr/ccs/lib/pa20_64"
+    sys_lib_dlsearch_path_spec=$sys_lib_search_path_spec
+    ;;
+  *)
+    shrext_cmds='.sl'
+    dynamic_linker="$host_os dld.sl"
+    shlibpath_var=SHLIB_PATH
+    shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH
+    library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    ;;
+  esac
+  # HP-UX runs *really* slowly unless shared libraries are mode 555, ...
+  postinstall_cmds='chmod 555 $lib'
+  # or fails outright, so override atomically:
+  install_override_mode=555
+  ;;
+
+interix[3-9]*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  dynamic_linker='Interix 3.x ld.so.1 (PE, like ELF)'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+irix5* | irix6* | nonstopux*)
+  case $host_os in
+    nonstopux*) version_type=nonstopux ;;
+    *)
+       if test "$lt_cv_prog_gnu_ld" = yes; then
+               version_type=linux # correct to gnu/linux during the next big 
refactor
+       else
+               version_type=irix
+       fi ;;
+  esac
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major ${libname}${release}${shared_ext} 
$libname${shared_ext}'
+  case $host_os in
+  irix5* | nonstopux*)
+    libsuff= shlibsuff=
+    ;;
+  *)
+    case $LD in # libtool.m4 will add one of these switches to LD
+    *-32|*"-32 "|*-melf32bsmip|*"-melf32bsmip ")
+      libsuff= shlibsuff= libmagic=32-bit;;
+    *-n32|*"-n32 "|*-melf32bmipn32|*"-melf32bmipn32 ")
+      libsuff=32 shlibsuff=N32 libmagic=N32;;
+    *-64|*"-64 "|*-melf64bmip|*"-melf64bmip ")
+      libsuff=64 shlibsuff=64 libmagic=64-bit;;
+    *) libsuff= shlibsuff= libmagic=never-match;;
+    esac
+    ;;
+  esac
+  shlibpath_var=LD_LIBRARY${shlibsuff}_PATH
+  shlibpath_overrides_runpath=no
+  sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} 
/usr/local/lib${libsuff}"
+  sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}"
+  hardcode_into_libs=yes
+  ;;
+
+# No shared lib support for Linux oldld, aout, or coff.
+linux*oldld* | linux*aout* | linux*coff*)
+  dynamic_linker=no
+  ;;
+
+linux*android*)
+  version_type=none # Android doesn't support versioned libraries.
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='$libname$release$shared_ext'
+  soname_spec='$libname$release$shared_ext'
+  finish_cmds=
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  dynamic_linker='Android linker'
+  # Don't embed -rpath directories since the linker doesn't support them.
+  hardcode_libdir_flag_spec='-L$libdir'
+  ;;
+
+# This must be glibc/ELF.
+linux* | k*bsd*-gnu | kopensolaris*-gnu)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+
+  # Some binutils ld are patched to set DT_RUNPATH
+  if test ${lt_cv_shlibpath_overrides_runpath+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  lt_cv_shlibpath_overrides_runpath=no
+    save_LDFLAGS=$LDFLAGS
+    save_libdir=$libdir
+    eval "libdir=/foo; wl=\"$lt_prog_compiler_wl\"; \
+        LDFLAGS=\"\$LDFLAGS $hardcode_libdir_flag_spec\""
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  if  ($OBJDUMP -p conftest$ac_exeext) 2>/dev/null | grep "RUNPATH.*$libdir" 
>/dev/null
+then :
+  lt_cv_shlibpath_overrides_runpath=yes
+fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+    LDFLAGS=$save_LDFLAGS
+    libdir=$save_libdir
+
+fi
+
+  shlibpath_overrides_runpath=$lt_cv_shlibpath_overrides_runpath
+
+  # This implies no fast_install, which is unacceptable.
+  # Some rework will be needed to allow for fast_install
+  # before this can be enabled.
+  hardcode_into_libs=yes
+
+  # Append ld.so.conf contents to the search path
+  if test -f /etc/ld.so.conf; then
+    lt_ld_extra=`awk '/^include / { system(sprintf("cd /etc; cat %s 
2>/dev/null", \$2)); skip = 1; } { if (!skip) print \$0; skip = 0; }' < 
/etc/ld.so.conf | $SED -e 's/#.*//;/^[      ]*hwcap[        ]/d;s/[:,      ]/ 
/g;s/=[^=]*$//;s/=[^= ]* / /g;s/"//g;/^$/d' | tr '\n' ' '`
+    sys_lib_dlsearch_path_spec="/lib /usr/lib $lt_ld_extra"
+  fi
+
+  # We used to test for /lib/ld.so.1 and disable shared libraries on
+  # powerpc, because MkLinux only supported shared libraries with the
+  # GNU dynamic linker.  Since this was broken with cross compilers,
+  # most powerpc-linux boxes support dynamic linking these days and
+  # people can always --disable-shared, the test was removed, and we
+  # assume the GNU/Linux dynamic linker is in use.
+  dynamic_linker='GNU/Linux ld.so'
+  ;;
+
+netbsdelf*-gnu)
+  version_type=linux
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='NetBSD ld.elf_so'
+  ;;
+
+netbsd*)
+  version_type=sunos
+  need_lib_prefix=no
+  need_version=no
+  if echo __ELF__ | $CC -E - | $GREP __ELF__ >/dev/null; then
+    library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${shared_ext}$versuffix'
+    finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+    dynamic_linker='NetBSD (a.out) ld.so'
+  else
+    library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major ${libname}${shared_ext}'
+    soname_spec='${libname}${release}${shared_ext}$major'
+    dynamic_linker='NetBSD ld.elf_so'
+  fi
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  ;;
+
+newsos6)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  ;;
+
+*nto* | *qnx*)
+  version_type=qnx
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  dynamic_linker='ldqnx.so'
+  ;;
+
+openbsd*)
+  version_type=sunos
+  sys_lib_dlsearch_path_spec="/usr/lib"
+  need_lib_prefix=no
+  # Some older versions of OpenBSD (3.3 at least) *do* need versioned libs.
+  case $host_os in
+    openbsd3.3 | openbsd3.3.*) need_version=yes ;;
+    *)                         need_version=no  ;;
+  esac
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  if test -z "`echo __ELF__ | $CC -E - | $GREP __ELF__`" || test 
"$host_os-$host_cpu" = "openbsd2.8-powerpc"; then
+    case $host_os in
+      openbsd2.[89] | openbsd2.[89].*)
+       shlibpath_overrides_runpath=no
+       ;;
+      *)
+       shlibpath_overrides_runpath=yes
+       ;;
+      esac
+  else
+    shlibpath_overrides_runpath=yes
+  fi
+  ;;
+
+os2*)
+  libname_spec='$name'
+  shrext_cmds=".dll"
+  need_lib_prefix=no
+  library_names_spec='$libname${shared_ext} $libname.a'
+  dynamic_linker='OS/2 ld.exe'
+  shlibpath_var=LIBPATH
+  ;;
+
+osf3* | osf4* | osf5*)
+  version_type=osf
+  need_lib_prefix=no
+  need_version=no
+  soname_spec='${libname}${release}${shared_ext}$major'
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc 
/usr/lib /usr/local/lib /var/shlib"
+  sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec"
+  ;;
+
+rdos*)
+  dynamic_linker=no
+  ;;
+
+solaris*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  # ldd complains unless libraries are executable
+  postinstall_cmds='chmod +x $lib'
+  ;;
+
+sunos4*)
+  version_type=sunos
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${shared_ext}$versuffix'
+  finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  if test "$with_gnu_ld" = yes; then
+    need_lib_prefix=no
+  fi
+  need_version=yes
+  ;;
+
+sysv4 | sysv4.3*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  case $host_vendor in
+    sni)
+      shlibpath_overrides_runpath=no
+      need_lib_prefix=no
+      runpath_var=LD_RUN_PATH
+      ;;
+    siemens)
+      need_lib_prefix=no
+      ;;
+    motorola)
+      need_lib_prefix=no
+      need_version=no
+      shlibpath_overrides_runpath=no
+      sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib'
+      ;;
+  esac
+  ;;
+
+sysv4*MP*)
+  if test -d /usr/nec ;then
+    version_type=linux # correct to gnu/linux during the next big refactor
+    library_names_spec='$libname${shared_ext}.$versuffix 
$libname${shared_ext}.$major $libname${shared_ext}'
+    soname_spec='$libname${shared_ext}.$major'
+    shlibpath_var=LD_LIBRARY_PATH
+  fi
+  ;;
+
+sysv5* | sco3.2v5* | sco5v6* | unixware* | OpenUNIX* | sysv4*uw2*)
+  version_type=freebsd-elf
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext} $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=yes
+  hardcode_into_libs=yes
+  if test "$with_gnu_ld" = yes; then
+    sys_lib_search_path_spec='/usr/local/lib /usr/gnu/lib /usr/ccs/lib 
/usr/lib /lib'
+  else
+    sys_lib_search_path_spec='/usr/ccs/lib /usr/lib'
+    case $host_os in
+      sco3.2v5*)
+        sys_lib_search_path_spec="$sys_lib_search_path_spec /lib"
+       ;;
+    esac
+  fi
+  sys_lib_dlsearch_path_spec='/usr/lib'
+  ;;
+
+tpf*)
+  # TPF is a cross-target only.  Preferred cross-host = GNU/Linux.
+  version_type=linux # correct to gnu/linux during the next big refactor
+  need_lib_prefix=no
+  need_version=no
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  shlibpath_var=LD_LIBRARY_PATH
+  shlibpath_overrides_runpath=no
+  hardcode_into_libs=yes
+  ;;
+
+uts4*)
+  version_type=linux # correct to gnu/linux during the next big refactor
+  library_names_spec='${libname}${release}${shared_ext}$versuffix 
${libname}${release}${shared_ext}$major $libname${shared_ext}'
+  soname_spec='${libname}${release}${shared_ext}$major'
+  shlibpath_var=LD_LIBRARY_PATH
+  ;;
+
+*)
+  dynamic_linker=no
+  ;;
+esac
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $dynamic_linker" >&5
+printf "%s\n" "$dynamic_linker" >&6; }
+test "$dynamic_linker" = no && can_build_shared=no
+
+variables_saved_for_relink="PATH $shlibpath_var $runpath_var"
+if test "$GCC" = yes; then
+  variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX 
COMPILER_PATH LIBRARY_PATH"
+fi
+
+if test "${lt_cv_sys_lib_search_path_spec+set}" = set; then
+  sys_lib_search_path_spec="$lt_cv_sys_lib_search_path_spec"
+fi
+if test "${lt_cv_sys_lib_dlsearch_path_spec+set}" = set; then
+  sys_lib_dlsearch_path_spec="$lt_cv_sys_lib_dlsearch_path_spec"
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking how to hardcode 
library paths into programs" >&5
+printf %s "checking how to hardcode library paths into programs... " >&6; }
+hardcode_action=
+if test -n "$hardcode_libdir_flag_spec" ||
+   test -n "$runpath_var" ||
+   test "X$hardcode_automatic" = "Xyes" ; then
+
+  # We can hardcode non-existent directories.
+  if test "$hardcode_direct" != no &&
+     # If the only mechanism to avoid hardcoding is shlibpath_var, we
+     # have to relink, otherwise we might link with an installed library
+     # when we should be linking with a yet-to-be-installed one
+     ## test "$_LT_TAGVAR(hardcode_shlibpath_var, )" != no &&
+     test "$hardcode_minus_L" != no; then
+    # Linking always hardcodes the temporary library directory.
+    hardcode_action=relink
+  else
+    # We can link without hardcoding, and we can hardcode nonexisting dirs.
+    hardcode_action=immediate
+  fi
+else
+  # We cannot hardcode anything, or else we can only hardcode existing
+  # directories.
+  hardcode_action=unsupported
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $hardcode_action" >&5
+printf "%s\n" "$hardcode_action" >&6; }
+
+if test "$hardcode_action" = relink ||
+   test "$inherit_rpath" = yes; then
+  # Fast installation is not supported
+  enable_fast_install=no
+elif test "$shlibpath_overrides_runpath" = yes ||
+     test "$enable_shared" = no; then
+  # Fast installation is not necessary
+  enable_fast_install=needless
+fi
+
+
+
+
+
+
+  if test "x$enable_dlopen" != xyes; then
+  enable_dlopen=unknown
+  enable_dlopen_self=unknown
+  enable_dlopen_self_static=unknown
+else
+  lt_cv_dlopen=no
+  lt_cv_dlopen_libs=
+
+  case $host_os in
+  beos*)
+    lt_cv_dlopen="load_add_on"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+    ;;
+
+  mingw* | pw32* | cegcc*)
+    lt_cv_dlopen="LoadLibrary"
+    lt_cv_dlopen_libs=
+    ;;
+
+  cygwin*)
+    lt_cv_dlopen="dlopen"
+    lt_cv_dlopen_libs=
+    ;;
+
+  darwin*)
+  # if libdl is installed we need to link against it
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" 
>&5
+printf %s "checking for dlopen in -ldl... " >&6; }
+if test ${ac_cv_lib_dl_dlopen+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char dlopen ();
+int
+main (void)
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_dl_dlopen=yes
+else $as_nop
+  ac_cv_lib_dl_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
+printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; }
+if test "x$ac_cv_lib_dl_dlopen" = xyes
+then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else $as_nop
+
+    lt_cv_dlopen="dyld"
+    lt_cv_dlopen_libs=
+    lt_cv_dlopen_self=yes
+
+fi
+
+    ;;
+
+  *)
+    ac_fn_c_check_func "$LINENO" "shl_load" "ac_cv_func_shl_load"
+if test "x$ac_cv_func_shl_load" = xyes
+then :
+  lt_cv_dlopen="shl_load"
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for shl_load in 
-ldld" >&5
+printf %s "checking for shl_load in -ldld... " >&6; }
+if test ${ac_cv_lib_dld_shl_load+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char shl_load ();
+int
+main (void)
+{
+return shl_load ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_dld_shl_load=yes
+else $as_nop
+  ac_cv_lib_dld_shl_load=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_shl_load" 
>&5
+printf "%s\n" "$ac_cv_lib_dld_shl_load" >&6; }
+if test "x$ac_cv_lib_dld_shl_load" = xyes
+then :
+  lt_cv_dlopen="shl_load" lt_cv_dlopen_libs="-ldld"
+else $as_nop
+  ac_fn_c_check_func "$LINENO" "dlopen" "ac_cv_func_dlopen"
+if test "x$ac_cv_func_dlopen" = xyes
+then :
+  lt_cv_dlopen="dlopen"
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -ldl" 
>&5
+printf %s "checking for dlopen in -ldl... " >&6; }
+if test ${ac_cv_lib_dl_dlopen+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldl  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char dlopen ();
+int
+main (void)
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_dl_dlopen=yes
+else $as_nop
+  ac_cv_lib_dl_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dl_dlopen" >&5
+printf "%s\n" "$ac_cv_lib_dl_dlopen" >&6; }
+if test "x$ac_cv_lib_dl_dlopen" = xyes
+then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dlopen in -lsvld" 
>&5
+printf %s "checking for dlopen in -lsvld... " >&6; }
+if test ${ac_cv_lib_svld_dlopen+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lsvld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char dlopen ();
+int
+main (void)
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_svld_dlopen=yes
+else $as_nop
+  ac_cv_lib_svld_dlopen=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_svld_dlopen" 
>&5
+printf "%s\n" "$ac_cv_lib_svld_dlopen" >&6; }
+if test "x$ac_cv_lib_svld_dlopen" = xyes
+then :
+  lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for dld_link in 
-ldld" >&5
+printf %s "checking for dld_link in -ldld... " >&6; }
+if test ${ac_cv_lib_dld_dld_link+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-ldld  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char dld_link ();
+int
+main (void)
+{
+return dld_link ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_dld_dld_link=yes
+else $as_nop
+  ac_cv_lib_dld_dld_link=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_dld_dld_link" 
>&5
+printf "%s\n" "$ac_cv_lib_dld_dld_link" >&6; }
+if test "x$ac_cv_lib_dld_dld_link" = xyes
+then :
+  lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-ldld"
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+
+fi
+
+    ;;
+  esac
+
+  if test "x$lt_cv_dlopen" != xno; then
+    enable_dlopen=yes
+  else
+    enable_dlopen=no
+  fi
+
+  case $lt_cv_dlopen in
+  dlopen)
+    save_CPPFLAGS="$CPPFLAGS"
+    test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H"
+
+    save_LDFLAGS="$LDFLAGS"
+    wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS 
$export_dynamic_flag_spec\"
+
+    save_LIBS="$LIBS"
+    LIBS="$lt_cv_dlopen_libs $LIBS"
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a program 
can dlopen itself" >&5
+printf %s "checking whether a program can dlopen itself... " >&6; }
+if test ${lt_cv_dlopen_self+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+         if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || 
(__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+         if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+       }
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}
+_LT_EOF
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self=yes ;;
+      x$lt_dlunknown|x*) lt_cv_dlopen_self=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self=no
+  fi
+fi
+rm -fr conftest*
+
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $lt_cv_dlopen_self" >&5
+printf "%s\n" "$lt_cv_dlopen_self" >&6; }
+
+    if test "x$lt_cv_dlopen_self" = xyes; then
+      wl=$lt_prog_compiler_wl eval LDFLAGS=\"\$LDFLAGS 
$lt_prog_compiler_static\"
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a 
statically linked program can dlopen itself" >&5
+printf %s "checking whether a statically linked program can dlopen itself... " 
>&6; }
+if test ${lt_cv_dlopen_self_static+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+         if test "$cross_compiling" = yes; then :
+  lt_cv_dlopen_self_static=cross
+else
+  lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
+  lt_status=$lt_dlunknown
+  cat > conftest.$ac_ext <<_LT_EOF
+#line $LINENO "configure"
+#include "confdefs.h"
+
+#if HAVE_DLFCN_H
+#include <dlfcn.h>
+#endif
+
+#include <stdio.h>
+
+#ifdef RTLD_GLOBAL
+#  define LT_DLGLOBAL          RTLD_GLOBAL
+#else
+#  ifdef DL_GLOBAL
+#    define LT_DLGLOBAL                DL_GLOBAL
+#  else
+#    define LT_DLGLOBAL                0
+#  endif
+#endif
+
+/* We may have to define LT_DLLAZY_OR_NOW in the command line if we
+   find out it does not work in some platform. */
+#ifndef LT_DLLAZY_OR_NOW
+#  ifdef RTLD_LAZY
+#    define LT_DLLAZY_OR_NOW           RTLD_LAZY
+#  else
+#    ifdef DL_LAZY
+#      define LT_DLLAZY_OR_NOW         DL_LAZY
+#    else
+#      ifdef RTLD_NOW
+#        define LT_DLLAZY_OR_NOW       RTLD_NOW
+#      else
+#        ifdef DL_NOW
+#          define LT_DLLAZY_OR_NOW     DL_NOW
+#        else
+#          define LT_DLLAZY_OR_NOW     0
+#        endif
+#      endif
+#    endif
+#  endif
+#endif
+
+/* When -fvisbility=hidden is used, assume the code has been annotated
+   correspondingly for the symbols needed.  */
+#if defined(__GNUC__) && (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3)) || 
(__GNUC__ > 3))
+int fnord () __attribute__((visibility("default")));
+#endif
+
+int fnord () { return 42; }
+int main ()
+{
+  void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW);
+  int status = $lt_dlunknown;
+
+  if (self)
+    {
+      if (dlsym (self,"fnord"))       status = $lt_dlno_uscore;
+      else
+        {
+         if (dlsym( self,"_fnord"))  status = $lt_dlneed_uscore;
+          else puts (dlerror ());
+       }
+      /* dlclose (self); */
+    }
+  else
+    puts (dlerror ());
+
+  return status;
+}
+_LT_EOF
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_link\""; } >&5
+  (eval $ac_link) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s conftest${ac_exeext} 2>/dev/null; then
+    (./conftest; exit; ) >&5 2>/dev/null
+    lt_status=$?
+    case x$lt_status in
+      x$lt_dlno_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_dlneed_uscore) lt_cv_dlopen_self_static=yes ;;
+      x$lt_dlunknown|x*) lt_cv_dlopen_self_static=no ;;
+    esac
+  else :
+    # compilation failed
+    lt_cv_dlopen_self_static=no
+  fi
+fi
+rm -fr conftest*
+
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$lt_cv_dlopen_self_static" >&5
+printf "%s\n" "$lt_cv_dlopen_self_static" >&6; }
+    fi
+
+    CPPFLAGS="$save_CPPFLAGS"
+    LDFLAGS="$save_LDFLAGS"
+    LIBS="$save_LIBS"
+    ;;
+  esac
+
+  case $lt_cv_dlopen_self in
+  yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;;
+  *) enable_dlopen_self=unknown ;;
+  esac
+
+  case $lt_cv_dlopen_self_static in
+  yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;;
+  *) enable_dlopen_self_static=unknown ;;
+  esac
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+striplib=
+old_striplib=
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether stripping 
libraries is possible" >&5
+printf %s "checking whether stripping libraries is possible... " >&6; }
+if test -n "$STRIP" && $STRIP -V 2>&1 | $GREP "GNU strip" >/dev/null; then
+  test -z "$old_striplib" && old_striplib="$STRIP --strip-debug"
+  test -z "$striplib" && striplib="$STRIP --strip-unneeded"
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+else
+# FIXME - insert some real tests, host_os isn't really good enough
+  case $host_os in
+  darwin*)
+    if test -n "$STRIP" ; then
+      striplib="$STRIP -x"
+      old_striplib="$STRIP -S"
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+    else
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+    fi
+    ;;
+  *)
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+    ;;
+  esac
+fi
+
+
+
+
+
+
+
+
+
+
+
+
+  # Report which library types will actually be built
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if libtool supports 
shared libraries" >&5
+printf %s "checking if libtool supports shared libraries... " >&6; }
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $can_build_shared" >&5
+printf "%s\n" "$can_build_shared" >&6; }
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build 
shared libraries" >&5
+printf %s "checking whether to build shared libraries... " >&6; }
+  test "$can_build_shared" = "no" && enable_shared=no
+
+  # On AIX, shared libraries and static libraries use the same namespace, and
+  # are all built from PIC.
+  case $host_os in
+  aix3*)
+    test "$enable_shared" = yes && enable_static=no
+    if test -n "$RANLIB"; then
+      archive_cmds="$archive_cmds~\$RANLIB \$lib"
+      postinstall_cmds='$RANLIB $lib'
+    fi
+    ;;
+
+  aix[4-9]*)
+    if test "$host_cpu" != ia64 && test "$aix_use_runtimelinking" = no ; then
+      test "$enable_shared" = yes && enable_static=no
+    fi
+    ;;
+  esac
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_shared" >&5
+printf "%s\n" "$enable_shared" >&6; }
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to build 
static libraries" >&5
+printf %s "checking whether to build static libraries... " >&6; }
+  # Make sure either enable_shared or enable_static is yes.
+  test "$enable_shared" = yes || enable_static=yes
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_static" >&5
+printf "%s\n" "$enable_static" >&6; }
+
+
+
+
+fi
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+CC="$lt_save_CC"
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+        ac_config_commands="$ac_config_commands libtool"
+
+
+
+
+# Only expand once:
+
+
+if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}windres", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}windres; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_RC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$RC"; then
+  ac_cv_prog_RC="$RC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_RC="${ac_tool_prefix}windres"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+RC=$ac_cv_prog_RC
+if test -n "$RC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $RC" >&5
+printf "%s\n" "$RC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_RC"; then
+  ac_ct_RC=$RC
+  # Extract the first word of "windres", so it can be a program name with args.
+set dummy windres; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_RC+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_RC"; then
+  ac_cv_prog_ac_ct_RC="$ac_ct_RC" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_RC="windres"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_RC=$ac_cv_prog_ac_ct_RC
+if test -n "$ac_ct_RC"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_RC" >&5
+printf "%s\n" "$ac_ct_RC" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_RC" = x; then
+    RC=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    RC=$ac_ct_RC
+  fi
+else
+  RC="$ac_cv_prog_RC"
+fi
+
+
+
+
+# Source file extension for RC test sources.
+ac_ext=rc
+
+# Object file extension for compiled RC test sources.
+objext=o
+objext_RC=$objext
+
+# Code to be used in simple compile tests
+lt_simple_compile_test_code='sample MENU { MENUITEM "&Soup", 100, CHECKED }'
+
+# Code to be used in simple link tests
+lt_simple_link_test_code="$lt_simple_compile_test_code"
+
+# ltmain only uses $CC for tagged configurations so make sure $CC is set.
+
+
+
+
+
+
+# If no C compiler was specified, use CC.
+LTCC=${LTCC-"$CC"}
+
+# If no C compiler flags were specified, use CFLAGS.
+LTCFLAGS=${LTCFLAGS-"$CFLAGS"}
+
+# Allow CC to be a program name with arguments.
+compiler=$CC
+
+
+# save warnings/boilerplate of simple test code
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_compile_test_code" >conftest.$ac_ext
+eval "$ac_compile" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_compiler_boilerplate=`cat conftest.err`
+$RM conftest*
+
+ac_outfile=conftest.$ac_objext
+echo "$lt_simple_link_test_code" >conftest.$ac_ext
+eval "$ac_link" 2>&1 >/dev/null | $SED '/^$/d; /^ *+/d' >conftest.err
+_lt_linker_boilerplate=`cat conftest.err`
+$RM -r conftest*
+
+
+# Allow CC to be a program name with arguments.
+lt_save_CC="$CC"
+lt_save_CFLAGS=$CFLAGS
+lt_save_GCC=$GCC
+GCC=
+CC=${RC-"windres"}
+CFLAGS=
+compiler=$CC
+compiler_RC=$CC
+for cc_temp in $compiler""; do
+  case $cc_temp in
+    compile | *[\\/]compile | ccache | *[\\/]ccache ) ;;
+    distcc | *[\\/]distcc | purify | *[\\/]purify ) ;;
+    \-*) ;;
+    *) break;;
+  esac
+done
+cc_basename=`$ECHO "$cc_temp" | $SED "s%.*/%%; s%^$host_alias-%%"`
+
+lt_cv_prog_compiler_c_o_RC=yes
+
+if test -n "$compiler"; then
+  :
+
+
+
+fi
+
+GCC=$lt_save_GCC
+ac_ext=c
+ac_cpp='$CPP $CPPFLAGS'
+ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext 
$LIBS >&5'
+ac_compiler_gnu=$ac_cv_c_compiler_gnu
+
+CC=$lt_save_CC
+CFLAGS=$lt_save_CFLAGS
+
+
+
+##########################
+## General definitions. ##
+##########################
+
+# Used by libgcrypt-config
+LIBGCRYPT_CONFIG_LIBS="-lgcrypt"
+LIBGCRYPT_CONFIG_CFLAGS=""
+LIBGCRYPT_CONFIG_HOST="$host"
+
+# Definitions for symmetric ciphers.
+available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed"
+available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20"
+available_ciphers="$available_ciphers sm4"
+enabled_ciphers=""
+
+# Definitions for public-key ciphers.
+available_pubkey_ciphers="dsa elgamal rsa ecc"
+enabled_pubkey_ciphers=""
+
+# Definitions for message digests.
+available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512"
+available_digests="$available_digests sha3 tiger whirlpool stribog blake2"
+available_digests="$available_digests sm3"
+enabled_digests=""
+
+# Definitions for kdfs (optional ones)
+available_kdfs="s2k pkdf2 scrypt"
+enabled_kdfs=""
+
+# Definitions for random modules.
+available_random_modules="getentropy linux egd unix"
+auto_random_modules="$available_random_modules"
+
+# Supported thread backends.
+LIBGCRYPT_THREAD_MODULES=""
+
+# Other definitions.
+have_w32_system=no
+have_w32ce_system=no
+have_pthread=no
+
+
+# Setup some stuff depending on host.
+case "${host}" in
+    *-*-mingw32*)
+      ac_cv_have_dev_random=no
+      have_w32_system=yes
+      case "${host}" in
+        *-mingw32ce*)
+            have_w32ce_system=yes
+            available_random_modules="w32ce"
+            ;;
+        *)
+            available_random_modules="w32"
+            ;;
+      esac
+
+printf "%s\n" "#define USE_ONLY_8DOT3 1" >>confdefs.h
+
+
+printf "%s\n" "#define HAVE_DRIVE_LETTERS 1" >>confdefs.h
+
+
+printf "%s\n" "#define HAVE_DOSISH_SYSTEM 1" >>confdefs.h
+
+      ;;
+
+    i?86-emx-os2 | i?86-*-os2*emx)
+        # OS/2 with the EMX environment
+        ac_cv_have_dev_random=no
+        printf "%s\n" "#define HAVE_DRIVE_LETTERS 1" >>confdefs.h
+
+        printf "%s\n" "#define HAVE_DOSISH_SYSTEM 1" >>confdefs.h
+
+        ;;
+
+    i?86-*-msdosdjgpp*)
+        # DOS with the DJGPP environment
+        ac_cv_have_dev_random=no
+        printf "%s\n" "#define HAVE_DRIVE_LETTERS 1" >>confdefs.h
+
+        printf "%s\n" "#define HAVE_DOSISH_SYSTEM 1" >>confdefs.h
+
+        ;;
+
+    *-*-hpux*)
+        if test -z "$GCC" ; then
+            CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE"
+        fi
+        ;;
+    *-dec-osf4*)
+        if test -z "$GCC" ; then
+            # Suppress all warnings
+            # to get rid of the unsigned/signed char mismatch warnings.
+            CFLAGS="$CFLAGS -w"
+        fi
+        ;;
+    m68k-atari-mint)
+        ;;
+    *-apple-darwin*)
+
+printf "%s\n" "#define _DARWIN_C_SOURCE 1" >>confdefs.h
+
+
+printf "%s\n" "#define USE_POSIX_SPAWN_FOR_TESTS 1" >>confdefs.h
+
+        ac_fn_c_check_header_compile "$LINENO" "spawn.h" 
"ac_cv_header_spawn_h" "$ac_includes_default"
+if test "x$ac_cv_header_spawn_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_SPAWN_H 1" >>confdefs.h
+
+fi
+
+        ;;
+    *)
+      ;;
+esac
+
+if test "$have_w32_system" = yes; then
+
+printf "%s\n" "#define HAVE_W32_SYSTEM 1" >>confdefs.h
+
+   if test "$have_w32ce_system" = yes; then
+
+printf "%s\n" "#define HAVE_W32CE_SYSTEM 1" >>confdefs.h
+
+   fi
+fi
+ if test "$have_w32_system" = yes; then
+  HAVE_W32_SYSTEM_TRUE=
+  HAVE_W32_SYSTEM_FALSE='#'
+else
+  HAVE_W32_SYSTEM_TRUE='#'
+  HAVE_W32_SYSTEM_FALSE=
+fi
+
+ if test "$have_w32ce_system" = yes; then
+  HAVE_W32CE_SYSTEM_TRUE=
+  HAVE_W32CE_SYSTEM_FALSE='#'
+else
+  HAVE_W32CE_SYSTEM_TRUE='#'
+  HAVE_W32CE_SYSTEM_FALSE=
+fi
+
+
+
+
+# A printable OS Name is sometimes useful.
+case "${host}" in
+    *-*-mingw32ce*)
+        PRINTABLE_OS_NAME="W32CE"
+        ;;
+
+    *-*-mingw32*)
+        PRINTABLE_OS_NAME="W32"
+        ;;
+
+    i?86-emx-os2 | i?86-*-os2*emx )
+        PRINTABLE_OS_NAME="OS/2"
+        ;;
+
+    i?86-*-msdosdjgpp*)
+        PRINTABLE_OS_NAME="MSDOS/DJGPP"
+        ;;
+
+    *-linux*)
+        PRINTABLE_OS_NAME="GNU/Linux"
+        ;;
+
+    *)
+        PRINTABLE_OS_NAME=`uname -s || echo "Unknown"`
+        ;;
+esac
+
+NAME_OF_DEV_RANDOM="/dev/random"
+NAME_OF_DEV_URANDOM="/dev/urandom"
+
+# Check whether --enable-endian-check was given.
+if test ${enable_endian_check+y}
+then :
+  enableval=$enable_endian_check; endiancheck=$enableval
+else $as_nop
+  endiancheck=yes
+fi
+
+if test x"$endiancheck" = xyes ; then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether byte 
ordering is bigendian" >&5
+printf %s "checking whether byte ordering is bigendian... " >&6; }
+if test ${ac_cv_c_bigendian+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_c_bigendian=unknown
+    # See if we're dealing with a universal compiler.
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __APPLE_CC__
+              not a universal capable compiler
+            #endif
+            typedef int dummy;
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+       # Check for potential -arch flags.  It is not universal unless
+       # there are at least two -arch flags with different values.
+       ac_arch=
+       ac_prev=
+       for ac_word in $CC $CFLAGS $CPPFLAGS $LDFLAGS; do
+        if test -n "$ac_prev"; then
+          case $ac_word in
+            i?86 | x86_64 | ppc | ppc64)
+              if test -z "$ac_arch" || test "$ac_arch" = "$ac_word"; then
+                ac_arch=$ac_word
+              else
+                ac_cv_c_bigendian=universal
+                break
+              fi
+              ;;
+          esac
+          ac_prev=
+        elif test "x$ac_word" = "x-arch"; then
+          ac_prev=arch
+        fi
+       done
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if sys/param.h defines the BYTE_ORDER macro.
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+            #include <sys/param.h>
+
+int
+main (void)
+{
+#if ! (defined BYTE_ORDER && defined BIG_ENDIAN \
+                    && defined LITTLE_ENDIAN && BYTE_ORDER && BIG_ENDIAN \
+                    && LITTLE_ENDIAN)
+             bogus endian macros
+            #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  # It does; now see whether it defined to BIG_ENDIAN or not.
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+               #include <sys/param.h>
+
+int
+main (void)
+{
+#if BYTE_ORDER != BIG_ENDIAN
+                not big endian
+               #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_c_bigendian=yes
+else $as_nop
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # See if <limits.h> defines _LITTLE_ENDIAN or _BIG_ENDIAN (e.g., 
Solaris).
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main (void)
+{
+#if ! (defined _LITTLE_ENDIAN || defined _BIG_ENDIAN)
+             bogus endian macros
+            #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  # It does; now see whether it defined to _BIG_ENDIAN or not.
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <limits.h>
+
+int
+main (void)
+{
+#ifndef _BIG_ENDIAN
+                not big endian
+               #endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_c_bigendian=yes
+else $as_nop
+  ac_cv_c_bigendian=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+    fi
+    if test $ac_cv_c_bigendian = unknown; then
+      # Compile a test program.
+      if test "$cross_compiling" = yes
+then :
+  # Try to guess by grepping values from an object file.
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+unsigned short int ascii_mm[] =
+                 { 0x4249, 0x4765, 0x6E44, 0x6961, 0x6E53, 0x7953, 0 };
+               unsigned short int ascii_ii[] =
+                 { 0x694C, 0x5454, 0x656C, 0x6E45, 0x6944, 0x6E61, 0 };
+               int use_ascii (int i) {
+                 return ascii_mm[i] + ascii_ii[i];
+               }
+               unsigned short int ebcdic_ii[] =
+                 { 0x89D3, 0xE3E3, 0x8593, 0x95C5, 0x89C4, 0x9581, 0 };
+               unsigned short int ebcdic_mm[] =
+                 { 0xC2C9, 0xC785, 0x95C4, 0x8981, 0x95E2, 0xA8E2, 0 };
+               int use_ebcdic (int i) {
+                 return ebcdic_mm[i] + ebcdic_ii[i];
+               }
+               extern int foo;
+
+int
+main (void)
+{
+return use_ascii (foo) == use_ebcdic (foo);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  if grep BIGenDianSyS conftest.$ac_objext >/dev/null; then
+             ac_cv_c_bigendian=yes
+           fi
+           if grep LiTTleEnDian conftest.$ac_objext >/dev/null ; then
+             if test "$ac_cv_c_bigendian" = unknown; then
+               ac_cv_c_bigendian=no
+             else
+               # finding both strings is unlikely to happen, but who knows?
+               ac_cv_c_bigendian=unknown
+             fi
+           fi
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main (void)
+{
+
+            /* Are we little or big endian?  From Harbison&Steele.  */
+            union
+            {
+              long int l;
+              char c[sizeof (long int)];
+            } u;
+            u.l = 1;
+            return u.c[sizeof (long int) - 1] == 1;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_run "$LINENO"
+then :
+  ac_cv_c_bigendian=no
+else $as_nop
+  ac_cv_c_bigendian=yes
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+    fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_bigendian" >&5
+printf "%s\n" "$ac_cv_c_bigendian" >&6; }
+ case $ac_cv_c_bigendian in #(
+   yes)
+     printf "%s\n" "#define WORDS_BIGENDIAN 1" >>confdefs.h
+;; #(
+   no)
+      ;; #(
+   universal)
+
+printf "%s\n" "#define AC_APPLE_UNIVERSAL_BUILD 1" >>confdefs.h
+
+     ;; #(
+   *)
+     as_fn_error $? "unknown endianness
+ presetting ac_cv_c_bigendian=no (or yes) will help" "$LINENO" 5 ;;
+ esac
+
+else
+
+printf "%s\n" "#define DISABLED_ENDIAN_CHECK 1" >>confdefs.h
+
+fi
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of unsigned short" 
>&5
+printf %s "checking size of unsigned short... " >&6; }
+if test ${ac_cv_sizeof_unsigned_short+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned short))" 
"ac_cv_sizeof_unsigned_short"        "$ac_includes_default"
+then :
+
+else $as_nop
+  if test "$ac_cv_type_unsigned_short" = yes; then
+     { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (unsigned short)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_unsigned_short=0
+   fi
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_sizeof_unsigned_short" >&5
+printf "%s\n" "$ac_cv_sizeof_unsigned_short" >&6; }
+
+
+
+printf "%s\n" "#define SIZEOF_UNSIGNED_SHORT $ac_cv_sizeof_unsigned_short" 
>>confdefs.h
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of unsigned int" 
>&5
+printf %s "checking size of unsigned int... " >&6; }
+if test ${ac_cv_sizeof_unsigned_int+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned int))" 
"ac_cv_sizeof_unsigned_int"        "$ac_includes_default"
+then :
+
+else $as_nop
+  if test "$ac_cv_type_unsigned_int" = yes; then
+     { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (unsigned int)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_unsigned_int=0
+   fi
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_sizeof_unsigned_int" >&5
+printf "%s\n" "$ac_cv_sizeof_unsigned_int" >&6; }
+
+
+
+printf "%s\n" "#define SIZEOF_UNSIGNED_INT $ac_cv_sizeof_unsigned_int" 
>>confdefs.h
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of unsigned long" 
>&5
+printf %s "checking size of unsigned long... " >&6; }
+if test ${ac_cv_sizeof_unsigned_long+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long))" 
"ac_cv_sizeof_unsigned_long"        "$ac_includes_default"
+then :
+
+else $as_nop
+  if test "$ac_cv_type_unsigned_long" = yes; then
+     { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (unsigned long)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_unsigned_long=0
+   fi
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_sizeof_unsigned_long" >&5
+printf "%s\n" "$ac_cv_sizeof_unsigned_long" >&6; }
+
+
+
+printf "%s\n" "#define SIZEOF_UNSIGNED_LONG $ac_cv_sizeof_unsigned_long" 
>>confdefs.h
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of unsigned long 
long" >&5
+printf %s "checking size of unsigned long long... " >&6; }
+if test ${ac_cv_sizeof_unsigned_long_long+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (unsigned long long))" 
"ac_cv_sizeof_unsigned_long_long"        "$ac_includes_default"
+then :
+
+else $as_nop
+  if test "$ac_cv_type_unsigned_long_long" = yes; then
+     { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (unsigned long long)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_unsigned_long_long=0
+   fi
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_sizeof_unsigned_long_long" >&5
+printf "%s\n" "$ac_cv_sizeof_unsigned_long_long" >&6; }
+
+
+
+printf "%s\n" "#define SIZEOF_UNSIGNED_LONG_LONG 
$ac_cv_sizeof_unsigned_long_long" >>confdefs.h
+
+
+# The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of void *" >&5
+printf %s "checking size of void *... " >&6; }
+if test ${ac_cv_sizeof_void_p+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (void *))" 
"ac_cv_sizeof_void_p"        "$ac_includes_default"
+then :
+
+else $as_nop
+  if test "$ac_cv_type_void_p" = yes; then
+     { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (void *)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_void_p=0
+   fi
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_void_p" >&5
+printf "%s\n" "$ac_cv_sizeof_void_p" >&6; }
+
+
+
+printf "%s\n" "#define SIZEOF_VOID_P $ac_cv_sizeof_void_p" >>confdefs.h
+
+
+
+
+  ac_fn_c_check_type "$LINENO" "uintptr_t" "ac_cv_type_uintptr_t" 
"$ac_includes_default"
+if test "x$ac_cv_type_uintptr_t" = xyes
+then :
+
+printf "%s\n" "#define HAVE_UINTPTR_T 1" >>confdefs.h
+
+else $as_nop
+  for ac_type in 'unsigned int' 'unsigned long int' \
+       'unsigned long long int'; do
+       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+$ac_includes_default
+int
+main (void)
+{
+static int test_array [1 - 2 * !(sizeof (void *) <= sizeof ($ac_type))];
+test_array [0] = 0;
+return test_array [0];
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+
+printf "%s\n" "#define uintptr_t $ac_type" >>confdefs.h
+
+         ac_type=
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+       test -z "$ac_type" && break
+     done
+fi
+
+
+
+if test "$ac_cv_sizeof_unsigned_short" = "0" \
+   || test "$ac_cv_sizeof_unsigned_int" = "0" \
+   || test "$ac_cv_sizeof_unsigned_long" = "0"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Hmmm, something is 
wrong with the sizes - using defaults" >&5
+printf "%s\n" "$as_me: WARNING: Hmmm, something is wrong with the sizes - 
using defaults" >&2;};
+fi
+
+# Ensure that we have UINT64_C before we bother to check for uint64_t
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for UINT64_C" >&5
+printf %s "checking for UINT64_C... " >&6; }
+if test ${gnupg_cv_uint64_c_works+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <inttypes.h>
+int
+main (void)
+{
+uint64_t foo=UINT64_C(42);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gnupg_cv_uint64_c_works=yes
+else $as_nop
+  gnupg_cv_uint64_c_works=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gnupg_cv_uint64_c_works" >&5
+printf "%s\n" "$gnupg_cv_uint64_c_works" >&6; }
+if test "$gnupg_cv_uint64_c_works" = "yes" ; then
+   # The cast to long int works around a bug in the HP C Compiler
+# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
+# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
+# This bug is HP SR number 8606223364.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking size of uint64_t" >&5
+printf %s "checking size of uint64_t... " >&6; }
+if test ${ac_cv_sizeof_uint64_t+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if ac_fn_c_compute_int "$LINENO" "(long int) (sizeof (uint64_t))" 
"ac_cv_sizeof_uint64_t"        "$ac_includes_default"
+then :
+
+else $as_nop
+  if test "$ac_cv_type_uint64_t" = yes; then
+     { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error 77 "cannot compute sizeof (uint64_t)
+See \`config.log' for more details" "$LINENO" 5; }
+   else
+     ac_cv_sizeof_uint64_t=0
+   fi
+fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_sizeof_uint64_t" 
>&5
+printf "%s\n" "$ac_cv_sizeof_uint64_t" >&6; }
+
+
+
+printf "%s\n" "#define SIZEOF_UINT64_T $ac_cv_sizeof_uint64_t" >>confdefs.h
+
+
+fi
+
+# Do we have any 64-bit data types?
+if test "$ac_cv_sizeof_unsigned_int" != "8" \
+   && test "$ac_cv_sizeof_unsigned_long" != "8" \
+   && test "$ac_cv_sizeof_unsigned_long_long" != "8" \
+   && test "$ac_cv_sizeof_uint64_t" != "8"; then
+    as_fn_error $? "
+***
+*** No 64-bit integer type available.
+*** It is not possible to build Libgcrypt on this platform.
+***" "$LINENO" 5
+fi
+
+
+# If not specified otherwise, all available algorithms will be
+# included.
+default_ciphers="$available_ciphers"
+default_pubkey_ciphers="$available_pubkey_ciphers"
+default_digests="$available_digests"
+default_kdfs="$available_kdfs"
+# Blacklist MD2 by default
+default_digests=`echo $default_digests | sed -e 's/md2//g'`
+
+# Substitutions to set generated files in a Emacs buffer to read-only.
+emacs_local_vars_begin='Local Variables:'
+
+emacs_local_vars_read_only='buffer-read-only: t'
+
+emacs_local_vars_end='End:'
+
+
+############################
+## Command line switches. ##
+############################
+
+# Implementation of the --enable-ciphers switch.
+# Check whether --enable-ciphers was given.
+if test ${enable_ciphers+y}
+then :
+  enableval=$enable_ciphers; enabled_ciphers=`echo $enableval | tr ',:' '  ' | 
tr 'A-Z' 'a-z'`
+else $as_nop
+  enabled_ciphers=""
+fi
+
+if test "x$enabled_ciphers" = "x" \
+   -o "$enabled_ciphers" = "yes"  \
+   -o "$enabled_ciphers" = "no"; then
+   enabled_ciphers=$default_ciphers
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which symmetric ciphers 
to include" >&5
+printf %s "checking which symmetric ciphers to include... " >&6; }
+for cipher in $enabled_ciphers; do
+
+name=$cipher
+list=$available_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+    if test "$found" = "0"; then
+       as_fn_error $? "unsupported cipher \"$cipher\" specified" "$LINENO" 5
+    fi
+done
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enabled_ciphers" >&5
+printf "%s\n" "$enabled_ciphers" >&6; }
+
+# Implementation of the --enable-pubkey-ciphers switch.
+# Check whether --enable-pubkey-ciphers was given.
+if test ${enable_pubkey_ciphers+y}
+then :
+  enableval=$enable_pubkey_ciphers; enabled_pubkey_ciphers=`echo $enableval | 
tr ',:' '  ' | tr 'A-Z' 'a-z'`
+else $as_nop
+  enabled_pubkey_ciphers=""
+fi
+
+if test "x$enabled_pubkey_ciphers" = "x" \
+   -o "$enabled_pubkey_ciphers" = "yes"  \
+   -o "$enabled_pubkey_ciphers" = "no"; then
+   enabled_pubkey_ciphers=$default_pubkey_ciphers
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which public-key 
ciphers to include" >&5
+printf %s "checking which public-key ciphers to include... " >&6; }
+for cipher in $enabled_pubkey_ciphers; do
+
+name=$cipher
+list=$available_pubkey_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+    if test "$found" = "0"; then
+       as_fn_error $? "unsupported public-key cipher specified" "$LINENO" 5
+    fi
+done
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enabled_pubkey_ciphers" 
>&5
+printf "%s\n" "$enabled_pubkey_ciphers" >&6; }
+
+# Implementation of the --enable-digests switch.
+# Check whether --enable-digests was given.
+if test ${enable_digests+y}
+then :
+  enableval=$enable_digests; enabled_digests=`echo $enableval | tr ',:' '  ' | 
tr 'A-Z' 'a-z'`
+else $as_nop
+  enabled_digests=""
+fi
+
+if test "x$enabled_digests" = "x" \
+   -o "$enabled_digests" = "yes"  \
+   -o "$enabled_digests" = "no"; then
+   enabled_digests=$default_digests
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which message digests 
to include" >&5
+printf %s "checking which message digests to include... " >&6; }
+for digest in $enabled_digests; do
+
+name=$digest
+list=$available_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+    if test "$found" = "0"; then
+       as_fn_error $? "unsupported message digest specified" "$LINENO" 5
+    fi
+done
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enabled_digests" >&5
+printf "%s\n" "$enabled_digests" >&6; }
+
+# Implementation of the --enable-kdfs switch.
+# Check whether --enable-kdfs was given.
+if test ${enable_kdfs+y}
+then :
+  enableval=$enable_kdfs; enabled_kdfs=`echo $enableval | tr ',:' '  ' | tr 
'A-Z' 'a-z'`
+else $as_nop
+  enabled_kdfs=""
+fi
+
+if test "x$enabled_kdfs" = "x" \
+   -o "$enabled_kdfs" = "yes"  \
+   -o "$enabled_kdfs" = "no"; then
+   enabled_kdfs=$default_kdfs
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which key derivation 
functions to include" >&5
+printf %s "checking which key derivation functions to include... " >&6; }
+for kdf in $enabled_kdfs; do
+
+name=$kdf
+list=$available_kdfs
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+    if test "$found" = "0"; then
+       as_fn_error $? "unsupported key derivation function specified" 
"$LINENO" 5
+    fi
+done
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enabled_kdfs" >&5
+printf "%s\n" "$enabled_kdfs" >&6; }
+
+# Implementation of the --enable-random switch.
+# Check whether --enable-random was given.
+if test ${enable_random+y}
+then :
+  enableval=$enable_random; random=`echo $enableval | tr 'A-Z' 'a-z'`
+fi
+
+if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then
+    random=default
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking which random module to 
use" >&5
+printf %s "checking which random module to use... " >&6; }
+if test "$random" != "default" -a "$random" != "auto"; then
+
+name=$random
+list=$available_random_modules
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+    if test "$found" = "0"; then
+       as_fn_error $? "unsupported random module specified" "$LINENO" 5
+    fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $random" >&5
+printf "%s\n" "$random" >&6; }
+
+# Implementation of the --disable-dev-random switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether use of 
/dev/random is requested" >&5
+printf %s "checking whether use of /dev/random is requested... " >&6; }
+# Check whether --enable-dev-random was given.
+if test ${enable_dev_random+y}
+then :
+  enableval=$enable_dev_random; try_dev_random=$enableval
+else $as_nop
+  try_dev_random=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $try_dev_random" >&5
+printf "%s\n" "$try_dev_random" >&6; }
+
+# Implementation of the --with-egd-socket switch.
+
+# Check whether --with-egd-socket was given.
+if test ${with_egd_socket+y}
+then :
+  withval=$with_egd_socket; egd_socket_name="$withval"
+else $as_nop
+  egd_socket_name=""
+fi
+
+
+printf "%s\n" "#define EGD_SOCKET_NAME \"$egd_socket_name\"" >>confdefs.h
+
+
+# Implementation of the --enable-random-daemon
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the 
experimental random daemon is requested" >&5
+printf %s "checking whether the experimental random daemon is requested... " 
>&6; }
+# Check whether --enable-random-daemon was given.
+if test ${enable_random_daemon+y}
+then :
+  enableval=$enable_random_daemon; enable_random_daemon=$enableval
+else $as_nop
+  enable_random_daemon=no
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_random_daemon" 
>&5
+printf "%s\n" "$enable_random_daemon" >&6; }
+ if test x$enable_random_daemon = xyes; then
+  ENABLE_RANDOM_DAEMON_TRUE=
+  ENABLE_RANDOM_DAEMON_FALSE='#'
+else
+  ENABLE_RANDOM_DAEMON_TRUE='#'
+  ENABLE_RANDOM_DAEMON_FALSE=
+fi
+
+
+
+# Implementation of --disable-asm.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether MPI and cipher 
assembler modules are requested" >&5
+printf %s "checking whether MPI and cipher assembler modules are requested... 
" >&6; }
+# Check whether --enable-asm was given.
+if test ${enable_asm+y}
+then :
+  enableval=$enable_asm; try_asm_modules=$enableval
+else $as_nop
+  try_asm_modules=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $try_asm_modules" >&5
+printf "%s\n" "$try_asm_modules" >&6; }
+if test "$try_asm_modules" != yes ; then
+
+printf "%s\n" "#define ASM_DISABLED 1" >>confdefs.h
+
+fi
+
+# Implementation of the --enable-m-guard switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether memory guard is 
requested" >&5
+printf %s "checking whether memory guard is requested... " >&6; }
+# Check whether --enable-m-guard was given.
+if test ${enable_m_guard+y}
+then :
+  enableval=$enable_m_guard; use_m_guard=$enableval
+else $as_nop
+  use_m_guard=no
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $use_m_guard" >&5
+printf "%s\n" "$use_m_guard" >&6; }
+if test "$use_m_guard" = yes ; then
+
+printf "%s\n" "#define M_GUARD 1" >>confdefs.h
+
+fi
+
+# Implementation of the --enable-large-data-tests switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to run large 
data tests" >&5
+printf %s "checking whether to run large data tests... " >&6; }
+# Check whether --enable-large-data-tests was given.
+if test ${enable_large_data_tests+y}
+then :
+  enableval=$enable_large_data_tests; large_data_tests=$enableval
+else $as_nop
+  large_data_tests=no
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $large_data_tests" >&5
+printf "%s\n" "$large_data_tests" >&6; }
+RUN_LARGE_DATA_TESTS=$large_data_tests
+
+
+# Implementation of --enable-force-soft-hwfeatures
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether 'soft' HW 
feature bits are forced on" >&5
+printf %s "checking whether 'soft' HW feature bits are forced on... " >&6; }
+# Check whether --enable-force-soft-hwfeatures was given.
+if test ${enable_force_soft_hwfeatures+y}
+then :
+  enableval=$enable_force_soft_hwfeatures; force_soft_hwfeatures=$enableval
+else $as_nop
+  force_soft_hwfeatures=no
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $force_soft_hwfeatures" 
>&5
+printf "%s\n" "$force_soft_hwfeatures" >&6; }
+
+
+# Implementation of the --with-capabilities switch.
+# Check whether we want to use Linux capabilities
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether use of 
capabilities is requested" >&5
+printf %s "checking whether use of capabilities is requested... " >&6; }
+
+# Check whether --with-capabilities was given.
+if test ${with_capabilities+y}
+then :
+  withval=$with_capabilities; use_capabilities="$withval"
+else $as_nop
+  use_capabilities=no
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $use_capabilities" >&5
+printf "%s\n" "$use_capabilities" >&6; }
+
+# Implementation of the --enable-hmac-binary-check.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a HMAC binary 
check is requested" >&5
+printf %s "checking whether a HMAC binary check is requested... " >&6; }
+# Check whether --enable-hmac-binary-check was given.
+if test ${enable_hmac_binary_check+y}
+then :
+  enableval=$enable_hmac_binary_check; use_hmac_binary_check="$enableval"
+else $as_nop
+  use_hmac_binary_check=no
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $use_hmac_binary_check" 
>&5
+printf "%s\n" "$use_hmac_binary_check" >&6; }
+if test "$use_hmac_binary_check" = no ; then
+    DEF_HMAC_BINARY_CHECK=''
+else
+
+printf "%s\n" "#define ENABLE_HMAC_BINARY_CHECK 1" >>confdefs.h
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}objcopy", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}objcopy; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_OBJCOPY+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$OBJCOPY"; then
+  ac_cv_prog_OBJCOPY="$OBJCOPY" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_OBJCOPY="${ac_tool_prefix}objcopy"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+OBJCOPY=$ac_cv_prog_OBJCOPY
+if test -n "$OBJCOPY"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $OBJCOPY" >&5
+printf "%s\n" "$OBJCOPY" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_OBJCOPY"; then
+  ac_ct_OBJCOPY=$OBJCOPY
+  # Extract the first word of "objcopy", so it can be a program name with args.
+set dummy objcopy; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_OBJCOPY+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_OBJCOPY"; then
+  ac_cv_prog_ac_ct_OBJCOPY="$ac_ct_OBJCOPY" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_OBJCOPY="objcopy"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_OBJCOPY=$ac_cv_prog_ac_ct_OBJCOPY
+if test -n "$ac_ct_OBJCOPY"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_OBJCOPY" >&5
+printf "%s\n" "$ac_ct_OBJCOPY" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_OBJCOPY" = x; then
+    OBJCOPY=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    OBJCOPY=$ac_ct_OBJCOPY
+  fi
+else
+  OBJCOPY="$ac_cv_prog_OBJCOPY"
+fi
+
+    if test -n "$ac_tool_prefix"; then
+  # Extract the first word of "${ac_tool_prefix}readelf", so it can be a 
program name with args.
+set dummy ${ac_tool_prefix}readelf; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_READELF+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$READELF"; then
+  ac_cv_prog_READELF="$READELF" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_READELF="${ac_tool_prefix}readelf"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+READELF=$ac_cv_prog_READELF
+if test -n "$READELF"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $READELF" >&5
+printf "%s\n" "$READELF" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+fi
+if test -z "$ac_cv_prog_READELF"; then
+  ac_ct_READELF=$READELF
+  # Extract the first word of "readelf", so it can be a program name with args.
+set dummy readelf; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_prog_ac_ct_READELF+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -n "$ac_ct_READELF"; then
+  ac_cv_prog_ac_ct_READELF="$ac_ct_READELF" # Let the user override the test.
+else
+as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_prog_ac_ct_READELF="readelf"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+fi
+fi
+ac_ct_READELF=$ac_cv_prog_ac_ct_READELF
+if test -n "$ac_ct_READELF"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_READELF" >&5
+printf "%s\n" "$ac_ct_READELF" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+  if test "x$ac_ct_READELF" = x; then
+    READELF=""
+  else
+    case $cross_compiling:$ac_tool_warned in
+yes:)
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not 
prefixed with host triplet" >&5
+printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host 
triplet" >&2;}
+ac_tool_warned=yes ;;
+esac
+    READELF=$ac_ct_READELF
+  fi
+else
+  READELF="$ac_cv_prog_READELF"
+fi
+
+    if test "$use_hmac_binary_check" != yes ; then
+        
DEF_HMAC_BINARY_CHECK=-DKEY_FOR_BINARY_CHECK="'\"$use_hmac_binary_check\"'"
+    fi
+fi
+ if test "x$use_hmac_binary_check" != xno; then
+  USE_HMAC_BINARY_CHECK_TRUE=
+  USE_HMAC_BINARY_CHECK_FALSE='#'
+else
+  USE_HMAC_BINARY_CHECK_TRUE='#'
+  USE_HMAC_BINARY_CHECK_FALSE=
+fi
+
+
+
+# Implementation of the --with-fips-module-version.
+
+# Check whether --with-fips-module-version was given.
+if test ${with_fips_module_version+y}
+then :
+  withval=$with_fips_module_version; fips_module_version="$withval"
+else $as_nop
+  fips_module_version=""
+fi
+
+
+printf "%s\n" "#define FIPS_MODULE_VERSION \"$fips_module_version\"" 
>>confdefs.h
+
+
+# Implementation of the --disable-jent-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether jitter entropy 
support is requested" >&5
+printf %s "checking whether jitter entropy support is requested... " >&6; }
+# Check whether --enable-jent-support was given.
+if test ${enable_jent_support+y}
+then :
+  enableval=$enable_jent_support; jentsupport=$enableval
+else $as_nop
+  jentsupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $jentsupport" >&5
+printf "%s\n" "$jentsupport" >&6; }
+
+# Implementation of the --disable-padlock-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether padlock support 
is requested" >&5
+printf %s "checking whether padlock support is requested... " >&6; }
+# Check whether --enable-padlock-support was given.
+if test ${enable_padlock_support+y}
+then :
+  enableval=$enable_padlock_support; padlocksupport=$enableval
+else $as_nop
+  padlocksupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $padlocksupport" >&5
+printf "%s\n" "$padlocksupport" >&6; }
+
+# Implementation of the --disable-aesni-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether AESNI support 
is requested" >&5
+printf %s "checking whether AESNI support is requested... " >&6; }
+# Check whether --enable-aesni-support was given.
+if test ${enable_aesni_support+y}
+then :
+  enableval=$enable_aesni_support; aesnisupport=$enableval
+else $as_nop
+  aesnisupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $aesnisupport" >&5
+printf "%s\n" "$aesnisupport" >&6; }
+
+# Implementation of the --disable-shaext-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether SHAEXT support 
is requested" >&5
+printf %s "checking whether SHAEXT support is requested... " >&6; }
+# Check whether --enable-shaext-support was given.
+if test ${enable_shaext_support+y}
+then :
+  enableval=$enable_shaext_support; shaextsupport=$enableval
+else $as_nop
+  shaextsupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $shaextsupport" >&5
+printf "%s\n" "$shaextsupport" >&6; }
+
+# Implementation of the --disable-pclmul-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PCLMUL support 
is requested" >&5
+printf %s "checking whether PCLMUL support is requested... " >&6; }
+# Check whether --enable-pclmul-support was given.
+if test ${enable_pclmul_support+y}
+then :
+  enableval=$enable_pclmul_support; pclmulsupport=$enableval
+else $as_nop
+  pclmulsupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $pclmulsupport" >&5
+printf "%s\n" "$pclmulsupport" >&6; }
+
+# Implementation of the --disable-sse41-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether SSE4.1 support 
is requested" >&5
+printf %s "checking whether SSE4.1 support is requested... " >&6; }
+# Check whether --enable-sse41-support was given.
+if test ${enable_sse41_support+y}
+then :
+  enableval=$enable_sse41_support; sse41support=$enableval
+else $as_nop
+  sse41support=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $sse41support" >&5
+printf "%s\n" "$sse41support" >&6; }
+
+# Implementation of the --disable-drng-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether DRNG support is 
requested" >&5
+printf %s "checking whether DRNG support is requested... " >&6; }
+# Check whether --enable-drng-support was given.
+if test ${enable_drng_support+y}
+then :
+  enableval=$enable_drng_support; drngsupport=$enableval
+else $as_nop
+  drngsupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $drngsupport" >&5
+printf "%s\n" "$drngsupport" >&6; }
+
+# Implementation of the --disable-avx-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether AVX support is 
requested" >&5
+printf %s "checking whether AVX support is requested... " >&6; }
+# Check whether --enable-avx-support was given.
+if test ${enable_avx_support+y}
+then :
+  enableval=$enable_avx_support; avxsupport=$enableval
+else $as_nop
+  avxsupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $avxsupport" >&5
+printf "%s\n" "$avxsupport" >&6; }
+
+# Implementation of the --disable-avx2-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether AVX2 support is 
requested" >&5
+printf %s "checking whether AVX2 support is requested... " >&6; }
+# Check whether --enable-avx2-support was given.
+if test ${enable_avx2_support+y}
+then :
+  enableval=$enable_avx2_support; avx2support=$enableval
+else $as_nop
+  avx2support=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $avx2support" >&5
+printf "%s\n" "$avx2support" >&6; }
+
+# Implementation of the --disable-neon-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether NEON support is 
requested" >&5
+printf %s "checking whether NEON support is requested... " >&6; }
+# Check whether --enable-neon-support was given.
+if test ${enable_neon_support+y}
+then :
+  enableval=$enable_neon_support; neonsupport=$enableval
+else $as_nop
+  neonsupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $neonsupport" >&5
+printf "%s\n" "$neonsupport" >&6; }
+
+# Implementation of the --disable-arm-crypto-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether ARMv8 Crypto 
Extension support is requested" >&5
+printf %s "checking whether ARMv8 Crypto Extension support is requested... " 
>&6; }
+# Check whether --enable-arm-crypto-support was given.
+if test ${enable_arm_crypto_support+y}
+then :
+  enableval=$enable_arm_crypto_support; armcryptosupport=$enableval
+else $as_nop
+  armcryptosupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $armcryptosupport" >&5
+printf "%s\n" "$armcryptosupport" >&6; }
+
+# Implementation of the --disable-ppc-crypto-support switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PPC crypto 
support is requested" >&5
+printf %s "checking whether PPC crypto support is requested... " >&6; }
+# Check whether --enable-ppc-crypto-support was given.
+if test ${enable_ppc_crypto_support+y}
+then :
+  enableval=$enable_ppc_crypto_support; ppccryptosupport=$enableval
+else $as_nop
+  ppccryptosupport=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ppccryptosupport" >&5
+printf "%s\n" "$ppccryptosupport" >&6; }
+
+# Implementation of the --disable-O-flag-munging switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a -O flag 
munging is requested" >&5
+printf %s "checking whether a -O flag munging is requested... " >&6; }
+# Check whether --enable-O-flag-munging was given.
+if test ${enable_O_flag_munging+y}
+then :
+  enableval=$enable_O_flag_munging; enable_o_flag_munging=$enableval
+else $as_nop
+  enable_o_flag_munging=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $enable_o_flag_munging" 
>&5
+printf "%s\n" "$enable_o_flag_munging" >&6; }
+ if test "$enable_o_flag_munging" = "yes"; then
+  ENABLE_O_FLAG_MUNGING_TRUE=
+  ENABLE_O_FLAG_MUNGING_FALSE='#'
+else
+  ENABLE_O_FLAG_MUNGING_TRUE='#'
+  ENABLE_O_FLAG_MUNGING_FALSE=
+fi
+
+
+# Implementation of the --disable-instrumentation-munging switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether a 
instrumentation (-fprofile, -fsanitize) munging is requested" >&5
+printf %s "checking whether a instrumentation (-fprofile, -fsanitize) munging 
is requested... " >&6; }
+# Check whether --enable-instrumentation-munging was given.
+if test ${enable_instrumentation_munging+y}
+then :
+  enableval=$enable_instrumentation_munging; 
enable_instrumentation_munging=$enableval
+else $as_nop
+  enable_instrumentation_munging=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$enable_instrumentation_munging" >&5
+printf "%s\n" "$enable_instrumentation_munging" >&6; }
+ if test "$enable_instrumentation_munging" = "yes"; then
+  ENABLE_INSTRUMENTATION_MUNGING_TRUE=
+  ENABLE_INSTRUMENTATION_MUNGING_FALSE='#'
+else
+  ENABLE_INSTRUMENTATION_MUNGING_TRUE='#'
+  ENABLE_INSTRUMENTATION_MUNGING_FALSE=
+fi
+
+
+# Implementation of the --disable-amd64-as-feature-detection switch.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether to enable AMD64 
as(1) feature detection" >&5
+printf %s "checking whether to enable AMD64 as(1) feature detection... " >&6; }
+# Check whether --enable-amd64-as-feature-detection was given.
+if test ${enable_amd64_as_feature_detection+y}
+then :
+  enableval=$enable_amd64_as_feature_detection; 
amd64_as_feature_detection=$enableval
+else $as_nop
+  amd64_as_feature_detection=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$amd64_as_feature_detection" >&5
+printf "%s\n" "$amd64_as_feature_detection" >&6; }
+
+
+
+printf "%s\n" "#define PRINTABLE_OS_NAME \"$PRINTABLE_OS_NAME\"" >>confdefs.h
+
+
+# For some systems we know that we have ld_version scripts.
+# Use it then as default.
+have_ld_version_script=no
+case "${host}" in
+    *-*-linux*)
+       have_ld_version_script=yes
+        ;;
+    *-*-gnu*)
+       have_ld_version_script=yes
+        ;;
+esac
+# Check whether --enable-ld-version-script was given.
+if test ${enable_ld_version_script+y}
+then :
+  enableval=$enable_ld_version_script; have_ld_version_script=$enableval
+else $as_nop
+   :
+fi
+
+ if test "$have_ld_version_script" = "yes"; then
+  HAVE_LD_VERSION_SCRIPT_TRUE=
+  HAVE_LD_VERSION_SCRIPT_FALSE='#'
+else
+  HAVE_LD_VERSION_SCRIPT_TRUE='#'
+  HAVE_LD_VERSION_SCRIPT_FALSE=
+fi
+
+
+
+printf "%s\n" "#define NAME_OF_DEV_RANDOM \"$NAME_OF_DEV_RANDOM\"" >>confdefs.h
+
+
+printf "%s\n" "#define NAME_OF_DEV_URANDOM \"$NAME_OF_DEV_URANDOM\"" 
>>confdefs.h
+
+
+#
+# Specify how we support our local modification of libtool for Windows
+# 64-bit.  Options are:
+#
+# (1) apply: when appying patch fails, it results failure of entire build
+# (2) never: never apply the patch (no try)
+# (3) try: use patched if it goes well, use original if fails
+#
+
+# Check whether --with-libtool-modification was given.
+if test ${with_libtool_modification+y}
+then :
+  withval=$with_libtool_modification; build_libtool_modification=$withval
+else $as_nop
+  build_libtool_modification=never
+fi
+
+
+#
+# Apply a patch (locally maintained one of ours) to libtool
+#
+case $host in
+  x86_64-*mingw32*)
+ac_config_commands="$ac_config_commands libtool-patch"
+
+  ;;
+  *)
+  ;;
+esac
+
+###############################
+#### Checks for libraries. ####
+###############################
+
+#
+# gpg-error is required.
+#
+  gpg_error_config_prefix=""
+
+# Check whether --with-libgpg-error-prefix was given.
+if test ${with_libgpg_error_prefix+y}
+then :
+  withval=$with_libgpg_error_prefix; gpg_error_config_prefix="$withval"
+fi
+
+
+
+# Check whether --with-gpg-error-prefix was given.
+if test ${with_gpg_error_prefix+y}
+then :
+  withval=$with_gpg_error_prefix; gpg_error_config_prefix="$withval"
+fi
+
+
+  if test x"${GPG_ERROR_CONFIG}" = x ; then
+     if test x"${gpg_error_config_prefix}" != x ; then
+        GPG_ERROR_CONFIG="${gpg_error_config_prefix}/bin/gpg-error-config"
+     else
+       case "${SYSROOT}" in
+         /*)
+           if test -x "${SYSROOT}/bin/gpg-error-config" ; then
+             GPG_ERROR_CONFIG="${SYSROOT}/bin/gpg-error-config"
+           fi
+           ;;
+         '')
+           ;;
+          *)
+           { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Ignoring 
\$SYSROOT as it is not an absolute path." >&5
+printf "%s\n" "$as_me: WARNING: Ignoring \$SYSROOT as it is not an absolute 
path." >&2;}
+           ;;
+       esac
+     fi
+  fi
+
+  # Extract the first word of "gpg-error-config", so it can be a program name 
with args.
+set dummy gpg-error-config; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_path_GPG_ERROR_CONFIG+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  case $GPG_ERROR_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GPG_ERROR_CONFIG="$GPG_ERROR_CONFIG" # Let the user override the 
test with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_path_GPG_ERROR_CONFIG="$as_dir$ac_word$ac_exec_ext"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_GPG_ERROR_CONFIG" && ac_cv_path_GPG_ERROR_CONFIG="no"
+  ;;
+esac
+fi
+GPG_ERROR_CONFIG=$ac_cv_path_GPG_ERROR_CONFIG
+if test -n "$GPG_ERROR_CONFIG"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $GPG_ERROR_CONFIG" >&5
+printf "%s\n" "$GPG_ERROR_CONFIG" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+
+  # Extract the first word of "gpgrt-config", so it can be a program name with 
args.
+set dummy gpgrt-config; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_path_GPGRT_CONFIG+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  case $GPGRT_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_GPGRT_CONFIG="$GPGRT_CONFIG" # Let the user override the test 
with a path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+as_dummy="$prefix/bin:$PATH"
+for as_dir in $as_dummy
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_path_GPGRT_CONFIG="$as_dir$ac_word$ac_exec_ext"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_GPGRT_CONFIG" && ac_cv_path_GPGRT_CONFIG="no"
+  ;;
+esac
+fi
+GPGRT_CONFIG=$ac_cv_path_GPGRT_CONFIG
+if test -n "$GPGRT_CONFIG"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $GPGRT_CONFIG" >&5
+printf "%s\n" "$GPGRT_CONFIG" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  if test "$GPGRT_CONFIG" != "no"; then
+    # Determine gpgrt_libdir
+    #
+    # Get the prefix of gpgrt-config assuming it's something like:
+    #   <PREFIX>/bin/gpgrt-config
+    gpgrt_prefix=${GPGRT_CONFIG%/*/*}
+    possible_libdir1=${gpgrt_prefix}/lib
+    # Determine by using system libdir-format with CC, it's like:
+    #   Normal style: /usr/lib
+    #   GNU cross style: /usr/<triplet>/lib
+    #   Debian style: /usr/lib/<multiarch-name>
+    #   Fedora/openSUSE style: /usr/lib, /usr/lib32 or /usr/lib64
+    # It is assumed that CC is specified to the one of host on cross build.
+    if libdir_candidates=$(${CC:-cc} -print-search-dirs | \
+          sed -n -e "/^libraries/{s/libraries: =//;s/:/\\
+/g;p;}"); then
+      # From the output of -print-search-dirs, select valid pkgconfig dirs.
+      libdir_candidates=$(for dir in $libdir_candidates; do
+        if p=$(cd $dir 2>/dev/null && pwd); then
+          test -d "$p/pkgconfig" && echo $p;
+        fi
+      done)
+
+      for possible_libdir0 in $libdir_candidates; do
+        # possible_libdir0:
+        #   Fallback candidate, the one of system-installed (by $CC)
+        #   (/usr/<triplet>/lib, /usr/lib/<multiarch-name> or /usr/lib32)
+        # possible_libdir1:
+        #   Another candidate, user-locally-installed
+        #   (<gpgrt_prefix>/lib)
+        # possible_libdir2
+        #   Most preferred
+        #   (<gpgrt_prefix>/<triplet>/lib,
+        #    <gpgrt_prefix>/lib/<multiarch-name> or <gpgrt_prefix>/lib32)
+        if test "${possible_libdir0##*/}" = "lib"; then
+          possible_prefix0=${possible_libdir0%/lib}
+          possible_prefix0_triplet=${possible_prefix0##*/}
+          if test -z "$possible_prefix0_triplet"; then
+            continue
+          fi
+          possible_libdir2=${gpgrt_prefix}/$possible_prefix0_triplet/lib
+        else
+          possible_prefix0=${possible_libdir0%%/lib*}
+          possible_libdir2=${gpgrt_prefix}${possible_libdir0#$possible_prefix0}
+        fi
+        if test -f ${possible_libdir2}/pkgconfig/gpg-error.pc; then
+          gpgrt_libdir=${possible_libdir2}
+        elif test -f ${possible_libdir1}/pkgconfig/gpg-error.pc; then
+          gpgrt_libdir=${possible_libdir1}
+        elif test -f ${possible_libdir0}/pkgconfig/gpg-error.pc; then
+          gpgrt_libdir=${possible_libdir0}
+        fi
+        if test -n "$gpgrt_libdir"; then break; fi
+      done
+    fi
+    if test -z "$gpgrt_libdir"; then
+      # No valid pkgconfig dir in any of the system directories, fallback
+      gpgrt_libdir=${possible_libdir1}
+    fi
+  else
+    unset GPGRT_CONFIG
+  fi
+
+  if test -n "$gpgrt_libdir"; then
+    GPGRT_CONFIG="$GPGRT_CONFIG --libdir=$gpgrt_libdir"
+    if $GPGRT_CONFIG gpg-error >/dev/null 2>&1; then
+      GPG_ERROR_CONFIG="$GPGRT_CONFIG gpg-error"
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: Use gpgrt-config with 
$gpgrt_libdir as gpg-error-config" >&5
+printf "%s\n" "$as_me: Use gpgrt-config with $gpgrt_libdir as 
gpg-error-config" >&6;}
+      gpg_error_config_version=`$GPG_ERROR_CONFIG --modversion`
+    else
+      gpg_error_config_version=`$GPG_ERROR_CONFIG --version`
+      unset GPGRT_CONFIG
+    fi
+  elif test "$GPG_ERROR_CONFIG" != "no"; then
+    gpg_error_config_version=`$GPG_ERROR_CONFIG --version`
+    unset GPGRT_CONFIG
+  fi
+
+  min_gpg_error_version="$NEED_GPG_ERROR_VERSION"
+  ok=no
+  if test "$GPG_ERROR_CONFIG" != "no"; then
+    req_major=`echo $min_gpg_error_version | \
+               sed 's/\([0-9]*\)\.\([0-9]*\)/\1/'`
+    req_minor=`echo $min_gpg_error_version | \
+               sed 's/\([0-9]*\)\.\([0-9]*\)/\2/'`
+    major=`echo $gpg_error_config_version | \
+               sed 's/\([0-9]*\)\.\([0-9]*\).*/\1/'`
+    minor=`echo $gpg_error_config_version | \
+               sed 's/\([0-9]*\)\.\([0-9]*\).*/\2/'`
+    if test "$major" -gt "$req_major"; then
+        ok=yes
+    else
+        if test "$major" -eq "$req_major"; then
+            if test "$minor" -ge "$req_minor"; then
+               ok=yes
+            fi
+        fi
+    fi
+  fi
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for GPG Error - 
version >= $min_gpg_error_version" >&5
+printf %s "checking for GPG Error - version >= $min_gpg_error_version... " 
>&6; }
+  if test $ok = yes; then
+    GPG_ERROR_CFLAGS=`$GPG_ERROR_CONFIG --cflags`
+    GPG_ERROR_LIBS=`$GPG_ERROR_CONFIG --libs`
+    if test -z "$GPGRT_CONFIG"; then
+      GPG_ERROR_MT_CFLAGS=`$GPG_ERROR_CONFIG --mt --cflags 2>/dev/null`
+      GPG_ERROR_MT_LIBS=`$GPG_ERROR_CONFIG --mt --libs 2>/dev/null`
+    else
+      GPG_ERROR_MT_CFLAGS=`$GPG_ERROR_CONFIG --variable=mtcflags 2>/dev/null`
+      GPG_ERROR_MT_CFLAGS="$GPG_ERROR_CFLAGS${GPG_ERROR_CFLAGS:+ 
}$GPG_ERROR_MT_CFLAGS"
+      GPG_ERROR_MT_LIBS=`$GPG_ERROR_CONFIG --variable=mtlibs 2>/dev/null`
+      GPG_ERROR_MT_LIBS="$GPG_ERROR_LIBS${GPG_ERROR_LIBS:+ }$GPG_ERROR_MT_LIBS"
+    fi
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes 
($gpg_error_config_version)" >&5
+printf "%s\n" "yes ($gpg_error_config_version)" >&6; }
+    :
+    if test -z "$GPGRT_CONFIG"; then
+      gpg_error_config_host=`$GPG_ERROR_CONFIG --host 2>/dev/null || echo none`
+    else
+      gpg_error_config_host=`$GPG_ERROR_CONFIG --variable=host 2>/dev/null || 
echo none`
+    fi
+    if test x"$gpg_error_config_host" != xnone ; then
+      if test x"$gpg_error_config_host" != x"$host" ; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING:
+***
+*** The config script \"$GPG_ERROR_CONFIG\" was
+*** built for $gpg_error_config_host and thus may not match the
+*** used host $host.
+*** You may want to use the configure option --with-libgpg-error-prefix
+*** to specify a matching config script or use \$SYSROOT.
+***" >&5
+printf "%s\n" "$as_me: WARNING:
+***
+*** The config script \"$GPG_ERROR_CONFIG\" was
+*** built for $gpg_error_config_host and thus may not match the
+*** used host $host.
+*** You may want to use the configure option --with-libgpg-error-prefix
+*** to specify a matching config script or use \$SYSROOT.
+***" >&2;}
+        gpg_config_script_warn="$gpg_config_script_warn libgpg-error"
+      fi
+    fi
+  else
+    GPG_ERROR_CFLAGS=""
+    GPG_ERROR_LIBS=""
+    GPG_ERROR_MT_CFLAGS=""
+    GPG_ERROR_MT_LIBS=""
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+    :
+  fi
+
+
+
+
+
+if test "x$GPG_ERROR_LIBS" = "x"; then
+  as_fn_error $? "libgpg-error is needed.
+                See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ ." "$LINENO" 5
+fi
+
+
+printf "%s\n" "#define GPG_ERR_SOURCE_DEFAULT GPG_ERR_SOURCE_GCRYPT" 
>>confdefs.h
+
+
+#
+# Check whether the GNU Pth library is available.  We require this
+# to build the optional gcryptrnd program.
+#
+
+# Check whether --with-pth-prefix was given.
+if test ${with_pth_prefix+y}
+then :
+  withval=$with_pth_prefix; pth_config_prefix="$withval"
+else $as_nop
+  pth_config_prefix=""
+fi
+
+if test x$pth_config_prefix != x ; then
+   PTH_CONFIG="$pth_config_prefix/bin/pth-config"
+fi
+if test "$enable_random_daemon" = "yes"; then
+  # Extract the first word of "pth-config", so it can be a program name with 
args.
+set dummy pth-config; ac_word=$2
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5
+printf %s "checking for $ac_word... " >&6; }
+if test ${ac_cv_path_PTH_CONFIG+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  case $PTH_CONFIG in
+  [\\/]* | ?:[\\/]*)
+  ac_cv_path_PTH_CONFIG="$PTH_CONFIG" # Let the user override the test with a 
path.
+  ;;
+  *)
+  as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    for ac_exec_ext in '' $ac_executable_extensions; do
+  if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then
+    ac_cv_path_PTH_CONFIG="$as_dir$ac_word$ac_exec_ext"
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: found 
$as_dir$ac_word$ac_exec_ext" >&5
+    break 2
+  fi
+done
+  done
+IFS=$as_save_IFS
+
+  test -z "$ac_cv_path_PTH_CONFIG" && ac_cv_path_PTH_CONFIG="no"
+  ;;
+esac
+fi
+PTH_CONFIG=$ac_cv_path_PTH_CONFIG
+if test -n "$PTH_CONFIG"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $PTH_CONFIG" >&5
+printf "%s\n" "$PTH_CONFIG" >&6; }
+else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+fi
+
+
+  if test "$PTH_CONFIG" = "no"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING:
+***
+*** To build the Libgcrypt's random number daemon
+*** we need the support of the GNU Portable Threads Library.
+*** Download it from ftp://ftp.gnu.org/gnu/pth/
+*** On a Debian GNU/Linux system you might want to try
+***   apt-get install libpth-dev
+***" >&5
+printf "%s\n" "$as_me: WARNING:
+***
+*** To build the Libgcrypt's random number daemon
+*** we need the support of the GNU Portable Threads Library.
+*** Download it from ftp://ftp.gnu.org/gnu/pth/
+*** On a Debian GNU/Linux system you might want to try
+***   apt-get install libpth-dev
+***" >&2;}
+  else
+
+    _pth_version=`$PTH_CONFIG --version | awk 'NR==1 {print $3}'`
+    _req_version="1.3.7"
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for PTH - version 
>= $_req_version" >&5
+printf %s "checking for PTH - version >= $_req_version... " >&6; }
+    for _var in _pth_version _req_version; do
+        eval "_val=\"\$${_var}\""
+        _major=`echo $_val | sed 
's/\([0-9]*\)\.\([0-9]*\)\([ab.]\)\([0-9]*\)/\1/'`
+        _minor=`echo $_val | sed 
's/\([0-9]*\)\.\([0-9]*\)\([ab.]\)\([0-9]*\)/\2/'`
+        _rtype=`echo $_val | sed 
's/\([0-9]*\)\.\([0-9]*\)\([ab.]\)\([0-9]*\)/\3/'`
+        _micro=`echo $_val | sed 
's/\([0-9]*\)\.\([0-9]*\)\([ab.]\)\([0-9]*\)/\4/'`
+        case $_rtype in
+            "a" ) _rtype=0 ;;
+            "b" ) _rtype=1 ;;
+            "." ) _rtype=2 ;;
+        esac
+        _hex=`echo dummy | awk '{ printf("%d%02d%1d%02d", major, minor, rtype, 
micro); }' \
+              "major=$_major" "minor=$_minor" "rtype=$_rtype" "micro=$_micro"`
+        eval "${_var}_hex=\"\$_hex\""
+    done
+    have_pth=no
+    if test ".$_pth_version_hex" != .; then
+        if test ".$_req_version_hex" != .; then
+            if test $_pth_version_hex -ge $_req_version_hex; then
+                have_pth=yes
+            fi
+        fi
+    fi
+    if test $have_pth = yes; then
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether PTH 
installation is sane" >&5
+printf %s "checking whether PTH installation is sane... " >&6; }
+       if test ${gnupg_cv_pth_is_sane+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+
+         _gnupg_pth_save_cflags=$CFLAGS
+         _gnupg_pth_save_ldflags=$LDFLAGS
+         _gnupg_pth_save_libs=$LIBS
+         CFLAGS="$CFLAGS `$PTH_CONFIG --cflags`"
+         LDFLAGS="$LDFLAGS `$PTH_CONFIG --ldflags`"
+         LIBS="$LIBS `$PTH_CONFIG --libs`"
+         cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <pth.h>
+
+int
+main (void)
+{
+ pth_init ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gnupg_cv_pth_is_sane=yes
+else $as_nop
+  gnupg_cv_pth_is_sane=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+         CFLAGS=$_gnupg_pth_save_cflags
+         LDFLAGS=$_gnupg_pth_save_ldflags
+         LIBS=$_gnupg_pth_save_libs
+
+fi
+
+       if test $gnupg_cv_pth_is_sane != yes; then
+          have_pth=no
+       fi
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gnupg_cv_pth_is_sane" >&5
+printf "%s\n" "$gnupg_cv_pth_is_sane" >&6; }
+    else
+       { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+    fi
+
+    if test $have_pth = yes; then
+       PTH_CFLAGS=`$PTH_CONFIG --cflags`
+       PTH_LIBS=`$PTH_CONFIG --ldflags`
+       PTH_LIBS="$PTH_LIBS `$PTH_CONFIG --libs --all`"
+
+printf "%s\n" "#define USE_GNU_PTH 1" >>confdefs.h
+
+
+printf "%s\n" "#define HAVE_PTH 1" >>confdefs.h
+
+    fi
+  fi
+fi
+
+
+
+#
+# Check whether pthreads is available
+#
+if test "$have_w32_system" != yes; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for pthread_create in 
-lpthread" >&5
+printf %s "checking for pthread_create in -lpthread... " >&6; }
+if test ${ac_cv_lib_pthread_pthread_create+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lpthread  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char pthread_create ();
+int
+main (void)
+{
+return pthread_create ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_pthread_pthread_create=yes
+else $as_nop
+  ac_cv_lib_pthread_pthread_create=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_lib_pthread_pthread_create" >&5
+printf "%s\n" "$ac_cv_lib_pthread_pthread_create" >&6; }
+if test "x$ac_cv_lib_pthread_pthread_create" = xyes
+then :
+  have_pthread=yes
+fi
+
+  if test "$have_pthread" = yes; then
+
+printf "%s\n" "#define HAVE_PTHREAD 1 " >>confdefs.h
+
+  fi
+fi
+
+
+# Solaris needs -lsocket and -lnsl. Unisys system includes
+# gethostbyname in libsocket but needs libnsl for socket.
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing 
setsockopt" >&5
+printf %s "checking for library containing setsockopt... " >&6; }
+if test ${ac_cv_search_setsockopt+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char setsockopt ();
+int
+main (void)
+{
+return setsockopt ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' socket
+do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_search_setsockopt=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext
+  if test ${ac_cv_search_setsockopt+y}
+then :
+  break
+fi
+done
+if test ${ac_cv_search_setsockopt+y}
+then :
+
+else $as_nop
+  ac_cv_search_setsockopt=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_search_setsockopt" >&5
+printf "%s\n" "$ac_cv_search_setsockopt" >&6; }
+ac_res=$ac_cv_search_setsockopt
+if test "$ac_res" != no
+then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library 
containing setsockopt" >&5
+printf %s "checking for library containing setsockopt... " >&6; }
+if test ${ac_cv_search_setsockopt+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char setsockopt ();
+int
+main (void)
+{
+return setsockopt ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' socket
+do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib -lnsl $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_search_setsockopt=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext
+  if test ${ac_cv_search_setsockopt+y}
+then :
+  break
+fi
+done
+if test ${ac_cv_search_setsockopt+y}
+then :
+
+else $as_nop
+  ac_cv_search_setsockopt=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_search_setsockopt" >&5
+printf "%s\n" "$ac_cv_search_setsockopt" >&6; }
+ac_res=$ac_cv_search_setsockopt
+if test "$ac_res" != no
+then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library containing 
setsockopt" >&5
+printf %s "checking for library containing setsockopt... " >&6; }
+if test ${ac_cv_search_setsockopt+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char setsockopt ();
+int
+main (void)
+{
+return setsockopt ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' nsl
+do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_search_setsockopt=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext
+  if test ${ac_cv_search_setsockopt+y}
+then :
+  break
+fi
+done
+if test ${ac_cv_search_setsockopt+y}
+then :
+
+else $as_nop
+  ac_cv_search_setsockopt=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_search_setsockopt" >&5
+printf "%s\n" "$ac_cv_search_setsockopt" >&6; }
+ac_res=$ac_cv_search_setsockopt
+if test "$ac_res" != no
+then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+
+##################################
+#### Checks for header files. ####
+##################################
+
+ac_fn_c_check_header_compile "$LINENO" "unistd.h" "ac_cv_header_unistd_h" 
"$ac_includes_default"
+if test "x$ac_cv_header_unistd_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_UNISTD_H 1" >>confdefs.h
+
+fi
+ac_fn_c_check_header_compile "$LINENO" "sys/auxv.h" "ac_cv_header_sys_auxv_h" 
"$ac_includes_default"
+if test "x$ac_cv_header_sys_auxv_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYS_AUXV_H 1" >>confdefs.h
+
+fi
+ac_fn_c_check_header_compile "$LINENO" "sys/random.h" 
"ac_cv_header_sys_random_h" "$ac_includes_default"
+if test "x$ac_cv_header_sys_random_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYS_RANDOM_H 1" >>confdefs.h
+
+fi
+
+
+
+##########################################
+#### Checks for typedefs, structures, ####
+####  and compiler characteristics.   ####
+##########################################
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for an ANSI 
C-conforming const" >&5
+printf %s "checking for an ANSI C-conforming const... " >&6; }
+if test ${ac_cv_c_const+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+#ifndef __cplusplus
+  /* Ultrix mips cc rejects this sort of thing.  */
+  typedef int charset[2];
+  const charset cs = { 0, 0 };
+  /* SunOS 4.1.1 cc rejects this.  */
+  char const *const *pcpcc;
+  char **ppc;
+  /* NEC SVR4.0.2 mips cc rejects this.  */
+  struct point {int x, y;};
+  static struct point const zero = {0,0};
+  /* IBM XL C 1.02.0.0 rejects this.
+     It does not let you subtract one const X* pointer from another in
+     an arm of an if-expression whose if-part is not a constant
+     expression */
+  const char *g = "string";
+  pcpcc = &g + (g ? g-g : 0);
+  /* HPUX 7.0 cc rejects these. */
+  ++pcpcc;
+  ppc = (char**) pcpcc;
+  pcpcc = (char const *const *) ppc;
+  { /* SCO 3.2v4 cc rejects this sort of thing.  */
+    char tx;
+    char *t = &tx;
+    char const *s = 0 ? (char *) 0 : (char const *) 0;
+
+    *t++ = 0;
+    if (s) return 0;
+  }
+  { /* Someone thinks the Sun supposedly-ANSI compiler will reject this.  */
+    int x[] = {25, 17};
+    const int *foo = &x[0];
+    ++foo;
+  }
+  { /* Sun SC1.0 ANSI compiler rejects this -- but not the above. */
+    typedef const int *iptr;
+    iptr p = 0;
+    ++p;
+  }
+  { /* IBM XL C 1.02.0.0 rejects this sort of thing, saying
+       "k.c", line 2.27: 1506-025 (S) Operand must be a modifiable lvalue. */
+    struct s { int j; const int *ap[3]; } bx;
+    struct s *b = &bx; b->j = 5;
+  }
+  { /* ULTRIX-32 V3.1 (Rev 9) vcc rejects this */
+    const int foo = 10;
+    if (!foo) return 0;
+  }
+  return !cs[0] && !zero.x;
+#endif
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_c_const=yes
+else $as_nop
+  ac_cv_c_const=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_const" >&5
+printf "%s\n" "$ac_cv_c_const" >&6; }
+if test $ac_cv_c_const = no; then
+
+printf "%s\n" "#define const /**/" >>confdefs.h
+
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for inline" >&5
+printf %s "checking for inline... " >&6; }
+if test ${ac_cv_c_inline+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_c_inline=no
+for ac_kw in inline __inline__ __inline; do
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#ifndef __cplusplus
+typedef int foo_t;
+static $ac_kw foo_t static_foo (void) {return 0; }
+$ac_kw foo_t foo (void) {return 0; }
+#endif
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_cv_c_inline=$ac_kw
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+  test "$ac_cv_c_inline" != no && break
+done
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_inline" >&5
+printf "%s\n" "$ac_cv_c_inline" >&6; }
+
+case $ac_cv_c_inline in
+  inline | yes) ;;
+  *)
+    case $ac_cv_c_inline in
+      no) ac_val=;;
+      *) ac_val=$ac_cv_c_inline;;
+    esac
+    cat >>confdefs.h <<_ACEOF
+#ifndef __cplusplus
+#define inline $ac_val
+#endif
+_ACEOF
+    ;;
+esac
+
+ac_fn_c_check_type "$LINENO" "size_t" "ac_cv_type_size_t" 
"$ac_includes_default"
+if test "x$ac_cv_type_size_t" = xyes
+then :
+
+else $as_nop
+
+printf "%s\n" "#define size_t unsigned int" >>confdefs.h
+
+fi
+
+
+  ac_fn_c_check_type "$LINENO" "pid_t" "ac_cv_type_pid_t" "$ac_includes_default
+"
+if test "x$ac_cv_type_pid_t" = xyes
+then :
+
+else $as_nop
+                                          cat confdefs.h - <<_ACEOF 
>conftest.$ac_ext
+/* end confdefs.h.  */
+
+          #if defined _WIN64 && !defined __CYGWIN__
+          LLP64
+          #endif
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  ac_pid_type='int'
+else $as_nop
+  ac_pid_type='__int64'
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+
+printf "%s\n" "#define pid_t $ac_pid_type" >>confdefs.h
+
+
+fi
+
+
+
+ac_fn_c_check_type "$LINENO" "byte" "ac_cv_type_byte" "$ac_includes_default"
+if test "x$ac_cv_type_byte" = xyes
+then :
+
+printf "%s\n" "#define HAVE_BYTE 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_type "$LINENO" "ushort" "ac_cv_type_ushort" 
"$ac_includes_default"
+if test "x$ac_cv_type_ushort" = xyes
+then :
+
+printf "%s\n" "#define HAVE_USHORT 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_type "$LINENO" "u16" "ac_cv_type_u16" "$ac_includes_default"
+if test "x$ac_cv_type_u16" = xyes
+then :
+
+printf "%s\n" "#define HAVE_U16 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_type "$LINENO" "u32" "ac_cv_type_u32" "$ac_includes_default"
+if test "x$ac_cv_type_u32" = xyes
+then :
+
+printf "%s\n" "#define HAVE_U32 1" >>confdefs.h
+
+
+fi
+ac_fn_c_check_type "$LINENO" "u64" "ac_cv_type_u64" "$ac_includes_default"
+if test "x$ac_cv_type_u64" = xyes
+then :
+
+printf "%s\n" "#define HAVE_U64 1" >>confdefs.h
+
+
+fi
+
+
+
+
+   if test $ac_cv_header_sys_socket_h = no; then
+                         ac_fn_c_check_header_compile "$LINENO" "ws2tcpip.h" 
"ac_cv_header_ws2tcpip_h" "$ac_includes_default"
+if test "x$ac_cv_header_ws2tcpip_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_WS2TCPIP_H 1" >>confdefs.h
+
+fi
+
+   fi
+
+   ac_fn_c_check_type "$LINENO" "socklen_t" "ac_cv_type_socklen_t" "
+/* <sys/types.h> is not needed according to POSIX, but the
+   <sys/socket.h> in i386-unknown-freebsd4.10 and
+   powerpc-apple-darwin5.5 required it. */
+#include <sys/types.h>
+#if HAVE_SYS_SOCKET_H
+# include <sys/socket.h>
+#elif HAVE_WS2TCPIP_H
+# include <ws2tcpip.h>
+#endif
+
+"
+if test "x$ac_cv_type_socklen_t" = xyes
+then :
+
+else $as_nop
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for socklen_t 
equivalent" >&5
+printf %s "checking for socklen_t equivalent... " >&6; }
+if test ${gl_cv_socklen_t_equiv+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  # Systems have either "struct sockaddr *" or
+         # "void *" as the second argument to getpeername
+         gl_cv_socklen_t_equiv=
+         for arg2 in "struct sockaddr" void; do
+           for t in int size_t "unsigned int" "long int" "unsigned long int"; 
do
+             cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <sys/types.h>
+                   #include <sys/socket.h>
+
+                   int getpeername (int, $arg2 *, $t *);
+int
+main (void)
+{
+$t len;
+                  getpeername (0, 0, &len);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gl_cv_socklen_t_equiv="$t"
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+             test "$gl_cv_socklen_t_equiv" != "" && break
+           done
+           test "$gl_cv_socklen_t_equiv" != "" && break
+         done
+         if test "$gl_cv_socklen_t_equiv" = ""; then
+           as_fn_error $? "Cannot find a type to use in place of socklen_t" 
"$LINENO" 5
+         fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $gl_cv_socklen_t_equiv" 
>&5
+printf "%s\n" "$gl_cv_socklen_t_equiv" >&6; }
+
+printf "%s\n" "#define socklen_t $gl_cv_socklen_t_equiv" >>confdefs.h
+
+fi
+
+
+#
+# Check for __builtin_bswap32 intrinsic.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap32" 
>&5
+printf %s "checking for __builtin_bswap32... " >&6; }
+if test ${gcry_cv_have_builtin_bswap32+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_builtin_bswap32=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+int x = 0; int y = __builtin_bswap32(x); return y;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_have_builtin_bswap32=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_builtin_bswap32" >&5
+printf "%s\n" "$gcry_cv_have_builtin_bswap32" >&6; }
+if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then
+
+printf "%s\n" "#define HAVE_BUILTIN_BSWAP32 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for __builtin_bswap64 intrinsic.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_bswap64" 
>&5
+printf %s "checking for __builtin_bswap64... " >&6; }
+if test ${gcry_cv_have_builtin_bswap64+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_builtin_bswap64=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+long long x = 0; long long y = __builtin_bswap64(x); return y;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_have_builtin_bswap64=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_builtin_bswap64" >&5
+printf "%s\n" "$gcry_cv_have_builtin_bswap64" >&6; }
+if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then
+
+printf "%s\n" "#define HAVE_BUILTIN_BSWAP64 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for __builtin_ctz intrinsic.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctz" >&5
+printf %s "checking for __builtin_ctz... " >&6; }
+if test ${gcry_cv_have_builtin_ctz+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_builtin_ctz=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+unsigned int x = 0; int y = __builtin_ctz(x); return y;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_have_builtin_ctz=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_builtin_ctz" >&5
+printf "%s\n" "$gcry_cv_have_builtin_ctz" >&6; }
+if test "$gcry_cv_have_builtin_ctz" = "yes" ; then
+
+printf "%s\n" "#define HAVE_BUILTIN_CTZ 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for __builtin_ctzl intrinsic.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_ctzl" >&5
+printf %s "checking for __builtin_ctzl... " >&6; }
+if test ${gcry_cv_have_builtin_ctzl+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_builtin_ctzl=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+unsigned long x = 0; long y = __builtin_ctzl(x); return y;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_have_builtin_ctzl=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_builtin_ctzl" >&5
+printf "%s\n" "$gcry_cv_have_builtin_ctzl" >&6; }
+if test "$gcry_cv_have_builtin_ctzl" = "yes" ; then
+
+printf "%s\n" "#define HAVE_BUILTIN_CTZL 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for __builtin_clz intrinsic.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_clz" >&5
+printf %s "checking for __builtin_clz... " >&6; }
+if test ${gcry_cv_have_builtin_clz+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_builtin_clz=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+unsigned int x = 0; int y = __builtin_clz(x); return y;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_have_builtin_clz=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_builtin_clz" >&5
+printf "%s\n" "$gcry_cv_have_builtin_clz" >&6; }
+if test "$gcry_cv_have_builtin_clz" = "yes" ; then
+
+printf "%s\n" "#define HAVE_BUILTIN_CLZ 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for __builtin_clzl intrinsic.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __builtin_clzl" >&5
+printf %s "checking for __builtin_clzl... " >&6; }
+if test ${gcry_cv_have_builtin_clzl+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_builtin_clzl=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+unsigned long x = 0; long y = __builtin_clzl(x); return y;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_have_builtin_clzl=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_builtin_clzl" >&5
+printf "%s\n" "$gcry_cv_have_builtin_clzl" >&6; }
+if test "$gcry_cv_have_builtin_clzl" = "yes" ; then
+
+printf "%s\n" "#define HAVE_BUILTIN_CLZL 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for __sync_synchronize intrinsic.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for __sync_synchronize" 
>&5
+printf %s "checking for __sync_synchronize... " >&6; }
+if test ${gcry_cv_have_sync_synchronize+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_sync_synchronize=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+__sync_synchronize(); return 0;
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_have_sync_synchronize=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_sync_synchronize" >&5
+printf "%s\n" "$gcry_cv_have_sync_synchronize" >&6; }
+if test "$gcry_cv_have_sync_synchronize" = "yes" ; then
+
+printf "%s\n" "#define HAVE_SYNC_SYNCHRONIZE 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for VLA support (variable length arrays).
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the variable 
length arrays are supported" >&5
+printf %s "checking whether the variable length arrays are supported... " >&6; 
}
+if test ${gcry_cv_have_vla+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_vla=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void f1(char *, int);
+            char foo(int i) {
+              char b[(i < 0 ? 0 : i) + 1];
+              f1(b, sizeof b); return b[0];}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_have_vla=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $gcry_cv_have_vla" >&5
+printf "%s\n" "$gcry_cv_have_vla" >&6; }
+if test "$gcry_cv_have_vla" = "yes" ; then
+
+printf "%s\n" "#define HAVE_VLA 1" >>confdefs.h
+
+fi
+
+
+#
+# Check for ELF visibility support.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the visibility 
attribute is supported" >&5
+printf %s "checking whether the visibility attribute is supported... " >&6; }
+if test ${gcry_cv_visibility_attribute+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_visibility_attribute=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int foo __attribute__ ((visibility ("hidden"))) = 1;
+            int bar __attribute__ ((visibility ("protected"))) = 1;
+
+_ACEOF
+
+        if ${CC-cc} -Werror -S conftest.c -o conftest.s \
+                  1>&5 2>&5 ; then
+            if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then
+                if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then
+                    gcry_cv_visibility_attribute=yes
+                fi
+            fi
+        fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_visibility_attribute" >&5
+printf "%s\n" "$gcry_cv_visibility_attribute" >&6; }
+if test "$gcry_cv_visibility_attribute" = "yes"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for broken 
visibility attribute" >&5
+printf %s "checking for broken visibility attribute... " >&6; }
+if test ${gcry_cv_broken_visibility_attribute+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_broken_visibility_attribute=yes
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int foo (int x);
+            int bar (int x) __asm__ ("foo")
+                            __attribute__ ((visibility ("hidden")));
+            int bar (int x) { return x; }
+
+_ACEOF
+
+        if ${CC-cc} -Werror -S conftest.c -o conftest.s \
+                  1>&5 2>&5 ; then
+           if grep '\.hidden[  _]foo' conftest.s >/dev/null 2>&1;
+            then
+               gcry_cv_broken_visibility_attribute=no
+           fi
+        fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_broken_visibility_attribute" >&5
+printf "%s\n" "$gcry_cv_broken_visibility_attribute" >&6; }
+fi
+if test "$gcry_cv_visibility_attribute" = "yes"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for broken alias 
attribute" >&5
+printf %s "checking for broken alias attribute... " >&6; }
+if test ${gcry_cv_broken_alias_attribute+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_broken_alias_attribute=yes
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+extern int foo (int x) __asm ("xyzzy");
+            int bar (int x) { return x; }
+            extern __typeof (bar) foo __attribute ((weak, alias ("bar")));
+            extern int dfoo;
+            extern __typeof (dfoo) dfoo __asm ("abccb");
+            int dfoo = 1;
+
+_ACEOF
+
+        if ${CC-cc} -Werror -S conftest.c -o conftest.s \
+                  1>&5 2>&5 ; then
+           if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \
+              grep 'abccb' conftest.s >/dev/null 2>&1; then
+              gcry_cv_broken_alias_attribute=no
+           fi
+        fi
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_broken_alias_attribute" >&5
+printf "%s\n" "$gcry_cv_broken_alias_attribute" >&6; }
+fi
+if test "$gcry_cv_visibility_attribute" = "yes"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if gcc supports 
-fvisibility=hidden" >&5
+printf %s "checking if gcc supports -fvisibility=hidden... " >&6; }
+if test ${gcry_cv_gcc_has_f_visibility+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_has_f_visibility=no
+        _gcc_cflags_save=$CFLAGS
+        CFLAGS="-fvisibility=hidden"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_has_f_visibility=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+        CFLAGS=$_gcc_cflags_save;
+
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_has_f_visibility" >&5
+printf "%s\n" "$gcry_cv_gcc_has_f_visibility" >&6; }
+fi
+if test "$gcry_cv_visibility_attribute" = "yes" \
+   && test "$gcry_cv_broken_visibility_attribute" != "yes" \
+   && test "$gcry_cv_broken_alias_attribute" != "yes" \
+   && test "$gcry_cv_gcc_has_f_visibility" = "yes"
+ then
+
+printf "%s\n" "#define GCRY_USE_VISIBILITY 1" >>confdefs.h
+
+   CFLAGS="$CFLAGS -fvisibility=hidden"
+fi
+
+
+# Following attribute tests depend on warnings to cause compile to fail,
+# so set -Werror temporarily.
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+
+
+#
+# Check whether the compiler supports the GCC style aligned attribute
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the GCC style 
aligned attribute is supported" >&5
+printf %s "checking whether the GCC style aligned attribute is supported... " 
>&6; }
+if test ${gcry_cv_gcc_attribute_aligned+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_attribute_aligned=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+struct { int a; } foo __attribute__ ((aligned (16)));
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_attribute_aligned=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_attribute_aligned" >&5
+printf "%s\n" "$gcry_cv_gcc_attribute_aligned" >&6; }
+if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ATTRIBUTE_ALIGNED 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether the compiler supports the GCC style packed attribute
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the GCC style 
packed attribute is supported" >&5
+printf %s "checking whether the GCC style packed attribute is supported... " 
>&6; }
+if test ${gcry_cv_gcc_attribute_packed+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_attribute_packed=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+struct foolong_s { long b; } __attribute__ ((packed));
+            struct foo_s { char a; struct foolong_s b; }
+              __attribute__ ((packed));
+            enum bar {
+              FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + 
sizeof(long))),
+            };
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_attribute_packed=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_attribute_packed" >&5
+printf "%s\n" "$gcry_cv_gcc_attribute_packed" >&6; }
+if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ATTRIBUTE_PACKED 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether the compiler supports the GCC style may_alias attribute
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the GCC style 
may_alias attribute is supported" >&5
+printf %s "checking whether the GCC style may_alias attribute is supported... 
" >&6; }
+if test ${gcry_cv_gcc_attribute_may_alias+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_attribute_may_alias=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+typedef struct foo_s { int a; }
+            __attribute__ ((may_alias)) foo_t;
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_attribute_may_alias=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_attribute_may_alias" >&5
+printf "%s\n" "$gcry_cv_gcc_attribute_may_alias" >&6; }
+if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ATTRIBUTE_MAY_ALIAS 1" >>confdefs.h
+
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether the compiler supports 'asm' or '__asm__' keyword for
+# assembler blocks.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether 'asm' assembler 
keyword is supported" >&5
+printf %s "checking whether 'asm' assembler keyword is supported... " >&6; }
+if test ${gcry_cv_have_asm+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_asm=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) { asm("":::"memory"); }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_have_asm=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $gcry_cv_have_asm" >&5
+printf "%s\n" "$gcry_cv_have_asm" >&6; }
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether '__asm__' 
assembler keyword is supported" >&5
+printf %s "checking whether '__asm__' assembler keyword is supported... " >&6; 
}
+if test ${gcry_cv_have___asm__+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have___asm__=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) { __asm__("":::"memory"); }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_have___asm__=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $gcry_cv_have___asm__" 
>&5
+printf "%s\n" "$gcry_cv_have___asm__" >&6; }
+if test "$gcry_cv_have_asm" = "no" ; then
+   if test "$gcry_cv_have___asm__" = "yes" ; then
+
+printf "%s\n" "#define asm __asm__" >>confdefs.h
+
+   fi
+fi
+
+
+#
+# Check whether the compiler supports inline assembly memory barrier.
+#
+if test "$gcry_cv_have_asm" = "no" ; then
+   if test "$gcry_cv_have___asm__" = "yes" ; then
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether inline 
assembly memory barrier is supported" >&5
+printf %s "checking whether inline assembly memory barrier is supported... " 
>&6; }
+if test ${gcry_cv_have_asm_volatile_memory+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_asm_volatile_memory=no
+           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(int x)
+               {
+                 __asm__ volatile("":::"memory");
+                 __asm__ volatile("":"+r"(x)::"memory");
+               }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_have_asm_volatile_memory=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_asm_volatile_memory" >&5
+printf "%s\n" "$gcry_cv_have_asm_volatile_memory" >&6; }
+   fi
+else
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether inline 
assembly memory barrier is supported" >&5
+printf %s "checking whether inline assembly memory barrier is supported... " 
>&6; }
+if test ${gcry_cv_have_asm_volatile_memory+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_have_asm_volatile_memory=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(int x)
+            {
+              asm volatile("":::"memory");
+              asm volatile("":"+r"(x)::"memory"); }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_have_asm_volatile_memory=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_have_asm_volatile_memory" >&5
+printf "%s\n" "$gcry_cv_have_asm_volatile_memory" >&6; }
+fi
+if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ASM_VOLATILE_MEMORY 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for our ARM
+# implementations.  This needs to be done before setting up the
+# assembler stuff.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC assembler 
is compatible for ARM assembly implementations" >&5
+printf %s "checking whether GCC assembler is compatible for ARM assembly 
implementations... " >&6; }
+if test ${gcry_cv_gcc_arm_platform_as_ok+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_arm_platform_as_ok="n/a"
+        else
+          gcry_cv_gcc_arm_platform_as_ok=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                /* Test if assembler supports UAL syntax.  */
+                ".syntax unified\n\t"
+                ".arm\n\t" /* our assembly code is in ARM mode  */
+                ".text\n\t"
+                /* Following causes error if assembler ignored '.syntax 
unified'.  */
+                "asmfunc:\n\t"
+                "add %r0, %r0, %r4, ror #12;\n\t"
+
+                /* Test if '.type' and '.size' are supported.  */
+                ".size asmfunc,.-asmfunc;\n\t"
+                ".type asmfunc,%function;\n\t"
+              );
+              void asmfunc(void);
+int
+main (void)
+{
+ asmfunc();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_arm_platform_as_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_arm_platform_as_ok" >&5
+printf "%s\n" "$gcry_cv_gcc_arm_platform_as_ok" >&6; }
+if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
+
+printf "%s\n" "#define HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for our ARMv8/Aarch64
+# implementations.  This needs to be done before setting up the
+# assembler stuff.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC assembler 
is compatible for ARMv8/Aarch64 assembly implementations" >&5
+printf %s "checking whether GCC assembler is compatible for ARMv8/Aarch64 
assembly implementations... " >&6; }
+if test ${gcry_cv_gcc_aarch64_platform_as_ok+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_aarch64_platform_as_ok="n/a"
+        else
+          gcry_cv_gcc_aarch64_platform_as_ok=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                ".text\n\t"
+                "asmfunc:\n\t"
+                "eor x0, x0, x30, ror #12;\n\t"
+                "add x0, x0, x30, asr #12;\n\t"
+                "eor v0.16b, v0.16b, v31.16b;\n\t"
+              );
+              void asmfunc(void);
+int
+main (void)
+{
+ asmfunc();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_aarch64_platform_as_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_aarch64_platform_as_ok" >&5
+printf "%s\n" "$gcry_cv_gcc_aarch64_platform_as_ok" >&6; }
+if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then
+
+printf "%s\n" "#define HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS 1" >>confdefs.h
+
+fi
+
+#
+# Check whether GCC assembler supports for CFI directives.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC assembler 
supports for CFI directives" >&5
+printf %s "checking whether GCC assembler supports for CFI directives... " 
>&6; }
+if test ${gcry_cv_gcc_asm_cfi_directives+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_asm_cfi_directives=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                ".text\n\t"
+                "ac_test:\n\t"
+                ".cfi_startproc\n\t"
+                ".cfi_remember_state\n\t"
+                ".cfi_adjust_cfa_offset 8\n\t"
+                ".cfi_rel_offset 0, 8\n\t"
+                ".cfi_def_cfa_register 1\n\t"
+                ".cfi_register 2, 3\n\t"
+                ".cfi_restore 2\n\t"
+                ".cfi_escape 0x0f, 0x02, 0x11, 0x00\n\t"
+                ".cfi_restore_state\n\t"
+                ".long 0\n\t"
+                ".cfi_endproc\n\t"
+            );
+            void asmfunc(void)
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_asm_cfi_directives=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_asm_cfi_directives" >&5
+printf "%s\n" "$gcry_cv_gcc_asm_cfi_directives" >&6; }
+if test "$gcry_cv_gcc_asm_cfi_directives" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ASM_CFI_DIRECTIVES 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC assembler supports for ELF directives.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC assembler 
supports for ELF directives" >&5
+printf %s "checking whether GCC assembler supports for ELF directives... " 
>&6; }
+if test ${gcry_cv_gcc_asm_elf_directives+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_asm_elf_directives=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                /* Test if ELF directives '.type' and '.size' are supported. */
+                ".text\n\t"
+                "asmfunc:\n\t"
+                ".size asmfunc,.-asmfunc;\n\t"
+                ".type asmfunc,STT_FUNC;\n\t"
+            );
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_asm_elf_directives=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_asm_elf_directives" >&5
+printf "%s\n" "$gcry_cv_gcc_asm_elf_directives" >&6; }
+if test "$gcry_cv_gcc_asm_elf_directives" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ASM_ELF_DIRECTIVES 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether underscores in symbols are required.  This needs to be
+# done before setting up the assembler stuff.
+#
+
+tmp_do_check="no"
+case "${host}" in
+    i?86-mingw32* | i?86-*-mingw32*)
+        ac_cv_sys_symbol_underscore=yes
+        ;;
+    x86_64-*-mingw32*)
+        ac_cv_sys_symbol_underscore=no
+        ;;
+    i386-emx-os2 | i345686-pc-os2*emx | i386-pc-msdosdjgpp)
+        ac_cv_sys_symbol_underscore=yes
+        ;;
+    *)
+      if test "$cross_compiling" != yes; then
+         tmp_do_check="yes"
+      fi
+      ;;
+esac
+if test "$tmp_do_check" = "yes"; then
+
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for _ prefix in 
compiled symbols" >&5
+printf %s "checking for _ prefix in compiled symbols... " >&6; }
+  if test ${ac_cv_sys_symbol_underscore+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_cv_sys_symbol_underscore=no
+   cat > conftest.$ac_ext <<EOF
+      void nm_test_func(){}
+      int main(){nm_test_func;return 0;}
+EOF
+  if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5
+  (eval $ac_compile) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+    # Now try to grab the symbols.
+    ac_nlist=conftest.nm
+    if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$NM 
conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \| cut -d \' \' -f 2 \> 
$ac_nlist\""; } >&5
+  (eval $NM conftest.$ac_objext \| $lt_cv_sys_global_symbol_pipe \| cut -d \' 
\' -f 2 \> $ac_nlist) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; } && test -s "$ac_nlist"; then
+      # See whether the symbols have a leading underscore.
+      if egrep '^_nm_test_func' "$ac_nlist" >/dev/null; then
+        ac_cv_sys_symbol_underscore=yes
+      else
+        if egrep '^nm_test_func ' "$ac_nlist" >/dev/null; then
+          :
+        else
+          echo "configure: cannot find nm_test_func in $ac_nlist" >&5
+        fi
+      fi
+    else
+      echo "configure: cannot run $lt_cv_sys_global_symbol_pipe" >&5
+    fi
+  else
+    echo "configure: failed program was:" >&5
+    cat conftest.c >&5
+  fi
+  rm -rf conftest*
+
+fi
+
+  else
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for _ prefix in 
compiled symbols" >&5
+printf %s "checking for _ prefix in compiled symbols... " >&6; }
+  fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_sys_symbol_underscore" >&5
+printf "%s\n" "$ac_cv_sys_symbol_underscore" >&6; }
+if test x$ac_cv_sys_symbol_underscore = xyes; then
+
+printf "%s\n" "#define WITH_SYMBOL_UNDERSCORE 1" >>confdefs.h
+
+fi
+
+
+
+#################################
+####                         ####
+#### Setup assembler stuff.  ####
+#### Define mpi_cpu_arch.    ####
+####                         ####
+#################################
+# Check whether --enable-mpi-path was given.
+if test ${enable_mpi_path+y}
+then :
+  enableval=$enable_mpi_path; mpi_extra_path="$enableval"
+else $as_nop
+  mpi_extra_path=""
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking architecture and mpi 
assembler functions" >&5
+printf %s "checking architecture and mpi assembler functions... " >&6; }
+if test -f $srcdir/mpi/config.links ; then
+    . $srcdir/mpi/config.links
+    ac_config_links="$ac_config_links "$mpi_ln_list""
+
+    ac_cv_mpi_sflags="$mpi_sflags"
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $mpi_cpu_arch" >&5
+printf "%s\n" "$mpi_cpu_arch" >&6; }
+else
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: failed" >&5
+printf "%s\n" "failed" >&6; }
+    as_fn_error $? "mpi/config.links missing!" "$LINENO" 5
+fi
+MPI_SFLAGS="$ac_cv_mpi_sflags"
+
+
+ if test "$mpi_mod_asm_mpih_add1" = yes; then
+  MPI_MOD_ASM_MPIH_ADD1_TRUE=
+  MPI_MOD_ASM_MPIH_ADD1_FALSE='#'
+else
+  MPI_MOD_ASM_MPIH_ADD1_TRUE='#'
+  MPI_MOD_ASM_MPIH_ADD1_FALSE=
+fi
+
+ if test "$mpi_mod_asm_mpih_sub1" = yes; then
+  MPI_MOD_ASM_MPIH_SUB1_TRUE=
+  MPI_MOD_ASM_MPIH_SUB1_FALSE='#'
+else
+  MPI_MOD_ASM_MPIH_SUB1_TRUE='#'
+  MPI_MOD_ASM_MPIH_SUB1_FALSE=
+fi
+
+ if test "$mpi_mod_asm_mpih_mul1" = yes; then
+  MPI_MOD_ASM_MPIH_MUL1_TRUE=
+  MPI_MOD_ASM_MPIH_MUL1_FALSE='#'
+else
+  MPI_MOD_ASM_MPIH_MUL1_TRUE='#'
+  MPI_MOD_ASM_MPIH_MUL1_FALSE=
+fi
+
+ if test "$mpi_mod_asm_mpih_mul2" = yes; then
+  MPI_MOD_ASM_MPIH_MUL2_TRUE=
+  MPI_MOD_ASM_MPIH_MUL2_FALSE='#'
+else
+  MPI_MOD_ASM_MPIH_MUL2_TRUE='#'
+  MPI_MOD_ASM_MPIH_MUL2_FALSE=
+fi
+
+ if test "$mpi_mod_asm_mpih_mul3" = yes; then
+  MPI_MOD_ASM_MPIH_MUL3_TRUE=
+  MPI_MOD_ASM_MPIH_MUL3_FALSE='#'
+else
+  MPI_MOD_ASM_MPIH_MUL3_TRUE='#'
+  MPI_MOD_ASM_MPIH_MUL3_FALSE=
+fi
+
+ if test "$mpi_mod_asm_mpih_lshift" = yes; then
+  MPI_MOD_ASM_MPIH_LSHIFT_TRUE=
+  MPI_MOD_ASM_MPIH_LSHIFT_FALSE='#'
+else
+  MPI_MOD_ASM_MPIH_LSHIFT_TRUE='#'
+  MPI_MOD_ASM_MPIH_LSHIFT_FALSE=
+fi
+
+ if test "$mpi_mod_asm_mpih_rshift" = yes; then
+  MPI_MOD_ASM_MPIH_RSHIFT_TRUE=
+  MPI_MOD_ASM_MPIH_RSHIFT_FALSE='#'
+else
+  MPI_MOD_ASM_MPIH_RSHIFT_TRUE='#'
+  MPI_MOD_ASM_MPIH_RSHIFT_FALSE=
+fi
+
+ if test "$mpi_mod_asm_udiv" = yes; then
+  MPI_MOD_ASM_UDIV_TRUE=
+  MPI_MOD_ASM_UDIV_FALSE='#'
+else
+  MPI_MOD_ASM_UDIV_TRUE='#'
+  MPI_MOD_ASM_UDIV_FALSE=
+fi
+
+ if test "$mpi_mod_asm_udiv_qrnnd" = yes; then
+  MPI_MOD_ASM_UDIV_QRNND_TRUE=
+  MPI_MOD_ASM_UDIV_QRNND_FALSE='#'
+else
+  MPI_MOD_ASM_UDIV_QRNND_TRUE='#'
+  MPI_MOD_ASM_UDIV_QRNND_FALSE=
+fi
+
+ if test "$mpi_mod_c_mpih_add1" = yes; then
+  MPI_MOD_C_MPIH_ADD1_TRUE=
+  MPI_MOD_C_MPIH_ADD1_FALSE='#'
+else
+  MPI_MOD_C_MPIH_ADD1_TRUE='#'
+  MPI_MOD_C_MPIH_ADD1_FALSE=
+fi
+
+ if test "$mpi_mod_c_mpih_sub1" = yes; then
+  MPI_MOD_C_MPIH_SUB1_TRUE=
+  MPI_MOD_C_MPIH_SUB1_FALSE='#'
+else
+  MPI_MOD_C_MPIH_SUB1_TRUE='#'
+  MPI_MOD_C_MPIH_SUB1_FALSE=
+fi
+
+ if test "$mpi_mod_c_mpih_mul1" = yes; then
+  MPI_MOD_C_MPIH_MUL1_TRUE=
+  MPI_MOD_C_MPIH_MUL1_FALSE='#'
+else
+  MPI_MOD_C_MPIH_MUL1_TRUE='#'
+  MPI_MOD_C_MPIH_MUL1_FALSE=
+fi
+
+ if test "$mpi_mod_c_mpih_mul2" = yes; then
+  MPI_MOD_C_MPIH_MUL2_TRUE=
+  MPI_MOD_C_MPIH_MUL2_FALSE='#'
+else
+  MPI_MOD_C_MPIH_MUL2_TRUE='#'
+  MPI_MOD_C_MPIH_MUL2_FALSE=
+fi
+
+ if test "$mpi_mod_c_mpih_mul3" = yes; then
+  MPI_MOD_C_MPIH_MUL3_TRUE=
+  MPI_MOD_C_MPIH_MUL3_FALSE='#'
+else
+  MPI_MOD_C_MPIH_MUL3_TRUE='#'
+  MPI_MOD_C_MPIH_MUL3_FALSE=
+fi
+
+ if test "$mpi_mod_c_mpih_lshift" = yes; then
+  MPI_MOD_C_MPIH_LSHIFT_TRUE=
+  MPI_MOD_C_MPIH_LSHIFT_FALSE='#'
+else
+  MPI_MOD_C_MPIH_LSHIFT_TRUE='#'
+  MPI_MOD_C_MPIH_LSHIFT_FALSE=
+fi
+
+ if test "$mpi_mod_c_mpih_rshift" = yes; then
+  MPI_MOD_C_MPIH_RSHIFT_TRUE=
+  MPI_MOD_C_MPIH_RSHIFT_FALSE='#'
+else
+  MPI_MOD_C_MPIH_RSHIFT_TRUE='#'
+  MPI_MOD_C_MPIH_RSHIFT_FALSE=
+fi
+
+ if test "$mpi_mod_c_udiv" = yes; then
+  MPI_MOD_C_UDIV_TRUE=
+  MPI_MOD_C_UDIV_FALSE='#'
+else
+  MPI_MOD_C_UDIV_TRUE='#'
+  MPI_MOD_C_UDIV_FALSE=
+fi
+
+ if test "$mpi_mod_c_udiv_qrnnd" = yes; then
+  MPI_MOD_C_UDIV_QRNND_TRUE=
+  MPI_MOD_C_UDIV_QRNND_FALSE='#'
+else
+  MPI_MOD_C_UDIV_QRNND_TRUE='#'
+  MPI_MOD_C_UDIV_QRNND_FALSE=
+fi
+
+
+# Reset non applicable feature flags.
+if test "$mpi_cpu_arch" != "x86" ; then
+   aesnisupport="n/a"
+   shaextsupport="n/a"
+   pclmulsupport="n/a"
+   sse41support="n/a"
+   avxsupport="n/a"
+   avx2support="n/a"
+   padlocksupport="n/a"
+   drngsupport="n/a"
+fi
+
+if test "$mpi_cpu_arch" != "arm" ; then
+   if test "$mpi_cpu_arch" != "aarch64" ; then
+     neonsupport="n/a"
+     armcryptosupport="n/a"
+   fi
+fi
+
+if test "$mpi_cpu_arch" != "ppc"; then
+   ppccryptosupport="n/a"
+fi
+
+#############################################
+####                                     ####
+#### Platform specific compiler checks.  ####
+####                                     ####
+#############################################
+
+
+# Following tests depend on warnings to cause compile to fail, so set -Werror
+# temporarily.
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+
+
+#
+# Check whether compiler supports 'ms_abi' function attribute.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether compiler 
supports 'ms_abi' function attribute" >&5
+printf %s "checking whether compiler supports 'ms_abi' function attribute... " 
>&6; }
+if test ${gcry_cv_gcc_attribute_ms_abi+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_attribute_ms_abi=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int __attribute__ ((ms_abi)) proto(int);
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_attribute_ms_abi=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_attribute_ms_abi" >&5
+printf "%s\n" "$gcry_cv_gcc_attribute_ms_abi" >&6; }
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ATTRIBUTE_MS_ABI 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether compiler supports 'sysv_abi' function attribute.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether compiler 
supports 'sysv_abi' function attribute" >&5
+printf %s "checking whether compiler supports 'sysv_abi' function attribute... 
" >&6; }
+if test ${gcry_cv_gcc_attribute_sysv_abi+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_attribute_sysv_abi=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+int __attribute__ ((sysv_abi)) proto(int);
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_attribute_sysv_abi=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_attribute_sysv_abi" >&5
+printf "%s\n" "$gcry_cv_gcc_attribute_sysv_abi" >&6; }
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_ATTRIBUTE_SYSV_ABI 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether default calling convention is 'ms_abi'.
+#
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether default 
calling convention is 'ms_abi'" >&5
+printf %s "checking whether default calling convention is 'ms_abi'... " >&6; }
+if test ${gcry_cv_gcc_default_abi_is_ms_abi+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_default_abi_is_ms_abi=no
+           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((ms_abi))(*msabi_func)(void);
+                 /* warning on SysV abi targets, passes on Windows based 
targets */
+                 msabi_func = def_func;
+                 return msabi_func;
+             }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_default_abi_is_ms_abi=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_default_abi_is_ms_abi" >&5
+printf "%s\n" "$gcry_cv_gcc_default_abi_is_ms_abi" >&6; }
+   if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_DEFAULT_ABI_IS_MS_ABI 1" >>confdefs.h
+
+   fi
+fi
+
+
+#
+# Check whether default calling convention is 'sysv_abi'.
+#
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether default 
calling convention is 'sysv_abi'" >&5
+printf %s "checking whether default calling convention is 'sysv_abi'... " >&6; 
}
+if test ${gcry_cv_gcc_default_abi_is_sysv_abi+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_default_abi_is_sysv_abi=no
+           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((sysv_abi))(*sysvabi_func)(void);
+                 /* warning on MS ABI targets, passes on SysV ABI targets */
+                 sysvabi_func = def_func;
+                 return sysvabi_func;
+             }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_gcc_default_abi_is_sysv_abi=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_default_abi_is_sysv_abi" >&5
+printf "%s\n" "$gcry_cv_gcc_default_abi_is_sysv_abi" >&6; }
+   if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI 1" >>confdefs.h
+
+   fi
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether GCC inline assembler supports SSSE3 instructions
+# This is required for the AES-NI instructions.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports SSSE3 instructions" >&5
+printf %s "checking whether GCC inline assembler supports SSSE3 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_ssse3+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_ssse3="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ssse3=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+              { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+            void a(void) {
+              __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):);
+            }
+int
+main (void)
+{
+ a();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_ssse3=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_ssse3" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_ssse3" >&6; }
+if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_SSSE3 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports PCLMUL instructions.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports PCLMUL instructions" >&5
+printf %s "checking whether GCC inline assembler supports PCLMUL 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_pclmul+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_pclmul="n/a"
+        else
+          gcry_cv_gcc_inline_asm_pclmul=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) {
+              __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc");
+            }
+int
+main (void)
+{
+ a();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_pclmul=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_pclmul" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_pclmul" >&6; }
+if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_PCLMUL 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports SHA Extensions instructions.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports SHA Extensions instructions" >&5
+printf %s "checking whether GCC inline assembler supports SHA Extensions 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_shaext+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_shaext="n/a"
+        else
+          gcry_cv_gcc_inline_asm_shaext=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) {
+              __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc");
+            }
+int
+main (void)
+{
+ a();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_shaext=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_shaext" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_shaext" >&6; }
+if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_SHAEXT 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports SSE4.1 instructions.
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports SSE4.1 instructions" >&5
+printf %s "checking whether GCC inline assembler supports SSE4.1 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_sse41+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_sse41="n/a"
+        else
+          gcry_cv_gcc_inline_asm_sse41=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) {
+              int i;
+              __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i));
+            }
+int
+main (void)
+{
+ a();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_sse41=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_sse41" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_sse41" >&6; }
+if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_SSE41 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports AVX instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports AVX instructions" >&5
+printf %s "checking whether GCC inline assembler supports AVX instructions... 
" >&6; }
+if test ${gcry_cv_gcc_inline_asm_avx+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_avx="n/a"
+        else
+          gcry_cv_gcc_inline_asm_avx=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) {
+              __asm__("xgetbv; vaesdeclast 
(%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):);
+            }
+int
+main (void)
+{
+ a();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_avx=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_avx" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_avx" >&6; }
+if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_AVX 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports AVX2 instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports AVX2 instructions" >&5
+printf %s "checking whether GCC inline assembler supports AVX2 instructions... 
" >&6; }
+if test ${gcry_cv_gcc_inline_asm_avx2+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_avx2="n/a"
+        else
+          gcry_cv_gcc_inline_asm_avx2=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) {
+              __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc");
+            }
+int
+main (void)
+{
+ a();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_avx2=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_avx2" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_avx2" >&6; }
+if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_AVX2 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports VAES and VPCLMUL instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports VAES and VPCLMUL instructions" >&5
+printf %s "checking whether GCC inline assembler supports VAES and VPCLMUL 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_vaes_vpclmul+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_vaes_vpclmul="n/a"
+        else
+          gcry_cv_gcc_inline_asm_vaes_vpclmul=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void a(void) {
+              __asm__("vaesenclast 
%%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/
+              __asm__("vaesenclast 
%%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/
+              __asm__("vpclmulqdq 
\$0,%%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/
+              __asm__("vpclmulqdq 
\$0,%%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/
+            }
+int
+main (void)
+{
+ a();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_vaes_vpclmul=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_vaes_vpclmul" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_vaes_vpclmul" >&6; }
+if test "$gcry_cv_gcc_inline_asm_vaes_vpclmul" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_VAES_VPCLMUL 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports BMI2 instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports BMI2 instructions" >&5
+printf %s "checking whether GCC inline assembler supports BMI2 instructions... 
" >&6; }
+if test ${gcry_cv_gcc_inline_asm_bmi2+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_bmi2="n/a"
+        else
+          gcry_cv_gcc_inline_asm_bmi2=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+unsigned int a(unsigned int x, unsigned int y) {
+              unsigned int tmp1, tmp2;
+              asm ("rorxl %2, %1, %0"
+                   : "=r" (tmp1)
+                   : "rm0" (x), "J" (32 - ((23) & 31)));
+              asm ("andnl %2, %1, %0"
+                   : "=r" (tmp2)
+                   : "r0" (x), "rm" (y));
+              return tmp1 + tmp2;
+            }
+int
+main (void)
+{
+ a(1, 2);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_bmi2=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_bmi2" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_bmi2" >&6; }
+if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_BMI2 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC assembler needs "-Wa,--divide" to correctly handle
+# constant division
+#
+if test $amd64_as_feature_detection = yes; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC assembler 
handles division correctly" >&5
+printf %s "checking whether GCC assembler handles division correctly... " >&6; 
}
+if test ${gcry_cv_gcc_as_const_division_ok+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_as_const_division_ok=no
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t");
+            void fn(void);
+int
+main (void)
+{
+fn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_as_const_division_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_as_const_division_ok" >&5
+printf "%s\n" "$gcry_cv_gcc_as_const_division_ok" >&6; }
+  if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then
+    #
+    # Add '-Wa,--divide' to CPPFLAGS and try check again.
+    #
+    _gcc_cppflags_save="$CPPFLAGS"
+    CPPFLAGS="$CPPFLAGS -Wa,--divide"
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC 
assembler handles division correctly with \"-Wa,--divide\"" >&5
+printf %s "checking whether GCC assembler handles division correctly with 
\"-Wa,--divide\"... " >&6; }
+if test ${gcry_cv_gcc_as_const_division_with_wadivide_ok+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_as_const_division_with_wadivide_ok=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t");
+              void fn(void);
+int
+main (void)
+{
+fn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_as_const_division_with_wadivide_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_as_const_division_with_wadivide_ok" >&5
+printf "%s\n" "$gcry_cv_gcc_as_const_division_with_wadivide_ok" >&6; }
+    if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then
+      # '-Wa,--divide' did not work, restore old flags.
+      CPPFLAGS="$_gcc_cppflags_save"
+    fi
+  fi
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for our amd64
+# implementations
+#
+if test $amd64_as_feature_detection = yes; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC assembler 
is compatible for amd64 assembly implementations" >&5
+printf %s "checking whether GCC assembler is compatible for amd64 assembly 
implementations... " >&6; }
+if test ${gcry_cv_gcc_amd64_platform_as_ok+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_amd64_platform_as_ok="n/a"
+        else
+          gcry_cv_gcc_amd64_platform_as_ok=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                /* Test if '.type' and '.size' are supported.  */
+                /* These work only on ELF targets. */
+                ".text\n\t"
+                "asmfunc:\n\t"
+                ".size asmfunc,.-asmfunc;\n\t"
+                ".type asmfunc,@function;\n\t"
+                /* Test if assembler allows use of '/' for constant division
+                 * (Solaris/x86 issue). If previous constant division check
+                 * and "-Wa,--divide" workaround failed, this causes assembly
+                 * to be disable on this machine. */
+                 "xorl \$(123456789/12345678), %ebp;\n\t"
+            );
+            void asmfunc(void);
+int
+main (void)
+{
+ asmfunc();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_amd64_platform_as_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_amd64_platform_as_ok" >&5
+printf "%s\n" "$gcry_cv_gcc_amd64_platform_as_ok" >&6; }
+  if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then
+
+printf "%s\n" "#define HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS 1" >>confdefs.h
+
+  fi
+  if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" &&
+     test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" &&
+     test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC 
assembler is compatible for WIN64 assembly implementations" >&5
+printf %s "checking whether GCC assembler is compatible for WIN64 assembly 
implementations... " >&6; }
+if test ${gcry_cv_gcc_win64_platform_as_ok+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_gcc_win64_platform_as_ok=no
+      cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+              ".text\n\t"
+              ".globl asmfunc\n\t"
+              "asmfunc:\n\t"
+              "xorq \$(1234), %rbp;\n\t"
+          );
+          void asmfunc(void);
+int
+main (void)
+{
+ asmfunc();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_win64_platform_as_ok=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_win64_platform_as_ok" >&5
+printf "%s\n" "$gcry_cv_gcc_win64_platform_as_ok" >&6; }
+    if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then
+
+printf "%s\n" "#define HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS 1" >>confdefs.h
+
+    fi
+  fi
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for assembly
+# implementations that use Intel syntax
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC assembler 
is compatible for Intel syntax assembly implementations" >&5
+printf %s "checking whether GCC assembler is compatible for Intel syntax 
assembly implementations... " >&6; }
+if test ${gcry_cv_gcc_platform_as_ok_for_intel_syntax+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a"
+        else
+          gcry_cv_gcc_platform_as_ok_for_intel_syntax=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                ".intel_syntax noprefix\n\t"
+                ".text\n\t"
+                "actest:\n\t"
+                "pxor xmm1, xmm7;\n\t"
+                "vperm2i128 ymm2, ymm3, ymm0, 1;\n\t"
+                "add eax, ebp;\n\t"
+                "rorx eax, ebp, 1;\n\t"
+                "sub eax, [esp + 4];\n\t"
+                "add dword ptr [esp + eax], 0b10101;\n\t"
+                ".att_syntax prefix\n\t"
+            );
+            void actest(void);
+int
+main (void)
+{
+ actest();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_platform_as_ok_for_intel_syntax" >&5
+printf "%s\n" "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" >&6; }
+if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then
+
+printf "%s\n" "#define HAVE_INTEL_SYNTAX_PLATFORM_AS 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether compiler is configured for ARMv6 or newer architecture
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether compiler is 
configured for ARMv6 or newer architecture" >&5
+printf %s "checking whether compiler is configured for ARMv6 or newer 
architecture... " >&6; }
+if test ${gcry_cv_cc_arm_arch_is_v6+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "arm" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_cc_arm_arch_is_v6="n/a"
+        else
+          gcry_cv_cc_arm_arch_is_v6=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+           #if defined(__arm__) && \
+             ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
+             || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+             || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
+             || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
+             || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+             || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+             || defined(__ARM_ARCH_7EM__))
+             /* empty */
+           #else
+             /* fail compile if not ARMv6. */
+             not_armv6 not_armv6 = (not_armv6)not_armv6;
+           #endif
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_cc_arm_arch_is_v6=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_cc_arm_arch_is_v6" >&5
+printf "%s\n" "$gcry_cv_cc_arm_arch_is_v6" >&6; }
+if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then
+
+printf "%s\n" "#define HAVE_ARM_ARCH_V6 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports NEON instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports NEON instructions" >&5
+printf %s "checking whether GCC inline assembler supports NEON instructions... 
" >&6; }
+if test ${gcry_cv_gcc_inline_asm_neon+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "arm" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_neon="n/a"
+        else
+          gcry_cv_gcc_inline_asm_neon=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                ".syntax unified\n\t"
+                ".arm\n\t"
+                ".fpu neon\n\t"
+                ".text\n\t"
+                "testfn:\n\t"
+                "vld1.64 {%q0-%q1}, [%r0]!;\n\t"
+                "vrev64.8 %q0, %q3;\n\t"
+                "vadd.u64 %q0, %q1;\n\t"
+                "vadd.s64 %d3, %d2, %d3;\n\t"
+                );
+            void testfn(void);
+
+int
+main (void)
+{
+ testfn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_neon=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_neon" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_neon" >&6; }
+if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_NEON 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports AArch32 Crypto Extension 
instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports AArch32 Crypto Extension instructions" >&5
+printf %s "checking whether GCC inline assembler supports AArch32 Crypto 
Extension instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_aarch32_crypto+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "arm" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_aarch32_crypto="n/a"
+        else
+          gcry_cv_gcc_inline_asm_aarch32_crypto=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                ".syntax unified\n\t"
+                ".arch armv8-a\n\t"
+                ".arm\n\t"
+                ".fpu crypto-neon-fp-armv8\n\t"
+                ".text\n\t"
+
+                "testfn:\n\t"
+                "sha1h.32 q0, q0;\n\t"
+                "sha1c.32 q0, q0, q0;\n\t"
+                "sha1p.32 q0, q0, q0;\n\t"
+                "sha1su0.32 q0, q0, q0;\n\t"
+                "sha1su1.32 q0, q0;\n\t"
+
+                "sha256h.32 q0, q0, q0;\n\t"
+                "sha256h2.32 q0, q0, q0;\n\t"
+                "sha1p.32 q0, q0, q0;\n\t"
+                "sha256su0.32 q0, q0;\n\t"
+                "sha256su1.32 q0, q0, q15;\n\t"
+
+                "aese.8 q0, q0;\n\t"
+                "aesd.8 q0, q0;\n\t"
+                "aesmc.8 q0, q0;\n\t"
+                "aesimc.8 q0, q0;\n\t"
+
+                "vmull.p64 q0, d0, d0;\n\t"
+                );
+            void testfn(void);
+
+int
+main (void)
+{
+ testfn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_aarch32_crypto=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_aarch32_crypto" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_aarch32_crypto" >&6; }
+if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports AArch64 NEON instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports AArch64 NEON instructions" >&5
+printf %s "checking whether GCC inline assembler supports AArch64 NEON 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_aarch64_neon+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "aarch64" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_aarch64_neon="n/a"
+        else
+          gcry_cv_gcc_inline_asm_aarch64_neon=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                ".cpu generic+simd\n\t"
+                ".text\n\t"
+                "testfn:\n\t"
+                "mov w0, \#42;\n\t"
+                "dup v0.8b, w0;\n\t"
+                "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t"
+                );
+            void testfn(void);
+
+int
+main (void)
+{
+ testfn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_aarch64_neon=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_aarch64_neon" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_aarch64_neon" >&6; }
+if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_AARCH64_NEON 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports AArch64 Crypto Extension 
instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports AArch64 Crypto Extension instructions" >&5
+printf %s "checking whether GCC inline assembler supports AArch64 Crypto 
Extension instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_aarch64_crypto+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "aarch64" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_aarch64_crypto="n/a"
+        else
+          gcry_cv_gcc_inline_asm_aarch64_crypto=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(
+                ".cpu generic+simd+crypto\n\t"
+                ".text\n\t"
+                "testfn:\n\t"
+                "mov w0, \#42;\n\t"
+                "dup v0.8b, w0;\n\t"
+                "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t"
+
+                "sha1h s0, s0;\n\t"
+                "sha1c q0, s0, v0.4s;\n\t"
+                "sha1p q0, s0, v0.4s;\n\t"
+                "sha1su0 v0.4s, v0.4s, v0.4s;\n\t"
+                "sha1su1 v0.4s, v0.4s;\n\t"
+
+                "sha256h q0, q0, v0.4s;\n\t"
+                "sha256h2 q0, q0, v0.4s;\n\t"
+                "sha1p q0, s0, v0.4s;\n\t"
+                "sha256su0 v0.4s, v0.4s;\n\t"
+                "sha256su1 v0.4s, v0.4s, v31.4s;\n\t"
+
+                "aese v0.16b, v0.16b;\n\t"
+                "aesd v0.16b, v0.16b;\n\t"
+                "aesmc v0.16b, v0.16b;\n\t"
+                "aesimc v0.16b, v0.16b;\n\t"
+
+                "pmull v0.1q, v0.1d, v31.1d;\n\t"
+                "pmull2 v0.1q, v0.2d, v31.2d;\n\t"
+                );
+            void testfn(void);
+
+int
+main (void)
+{
+ testfn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_aarch64_crypto=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_aarch64_crypto" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_aarch64_crypto" >&6; }
+if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether PowerPC AltiVec/VSX intrinsics
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether compiler 
supports PowerPC AltiVec/VSX intrinsics" >&5
+printf %s "checking whether compiler supports PowerPC AltiVec/VSX 
intrinsics... " >&6; }
+if test ${gcry_cv_cc_ppc_altivec+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "ppc" ||
+         test "$try_asm_modules" != "yes" ; then
+       gcry_cv_cc_ppc_altivec="n/a"
+      else
+       gcry_cv_cc_ppc_altivec=no
+       cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <altivec.h>
+         typedef vector unsigned char block;
+         typedef vector unsigned int vecu32;
+         block fn(block in)
+         {
+           block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+           vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
+           return vec_cipher_be (t, in) ^ (block)y;
+         }
+
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_cc_ppc_altivec=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+      fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $gcry_cv_cc_ppc_altivec" 
>&5
+printf "%s\n" "$gcry_cv_cc_ppc_altivec" >&6; }
+if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then
+
+printf "%s\n" "#define HAVE_COMPATIBLE_CC_PPC_ALTIVEC 1" >>confdefs.h
+
+fi
+
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -maltivec -mvsx -mcrypto"
+
+if test "$gcry_cv_cc_ppc_altivec" = "no" &&
+    test "$mpi_cpu_arch" = "ppc" &&
+    test "$try_asm_modules" == "yes" ; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether compiler 
supports PowerPC AltiVec/VSX/crypto intrinsics with extra GCC flags" >&5
+printf %s "checking whether compiler supports PowerPC AltiVec/VSX/crypto 
intrinsics with extra GCC flags... " >&6; }
+if test ${gcry_cv_cc_ppc_altivec_cflags+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  gcry_cv_cc_ppc_altivec_cflags=no
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <altivec.h>
+       typedef vector unsigned char block;
+       typedef vector unsigned int vecu32;
+       block fn(block in)
+       {
+         block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+         vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
+         return vec_cipher_be (t, in) ^ (block)y;
+       }
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  gcry_cv_cc_ppc_altivec_cflags=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_cc_ppc_altivec_cflags" >&5
+printf "%s\n" "$gcry_cv_cc_ppc_altivec_cflags" >&6; }
+  if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then
+
+printf "%s\n" "#define HAVE_COMPATIBLE_CC_PPC_ALTIVEC 1" >>confdefs.h
+
+
+printf "%s\n" "#define HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS 1" 
>>confdefs.h
+
+  fi
+fi
+
+ if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes"; then
+  ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_TRUE=
+  ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_FALSE='#'
+else
+  ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_TRUE='#'
+  ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_FALSE=
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto 
instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports PowerPC AltiVec/VSX/crypto instructions" >&5
+printf %s "checking whether GCC inline assembler supports PowerPC 
AltiVec/VSX/crypto instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_ppc_altivec+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "ppc" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_ppc_altivec="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ppc_altivec=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(".globl testfn;\n"
+                    ".text\n\t"
+                    "testfn:\n"
+                    "stvx %v31,%r12,%r0;\n"
+                    "lvx  %v20,%r12,%r0;\n"
+                    "vcipher %v0, %v1, %v22;\n"
+                    "lxvw4x %vs32, %r0, %r1;\n"
+                    "vadduwm %v0, %v1, %v22;\n"
+                    "vshasigmaw %v0, %v1, 0, 15;\n"
+                    "vshasigmad %v0, %v1, 0, 15;\n"
+                    "vpmsumd %v11, %v11, %v11;\n"
+                  );
+            void testfn(void);
+
+int
+main (void)
+{
+ testfn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_ppc_altivec=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_ppc_altivec" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_ppc_altivec" >&6; }
+if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_PPC_ALTIVEC 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports PowerISA 3.00 instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports PowerISA 3.00 instructions" >&5
+printf %s "checking whether GCC inline assembler supports PowerISA 3.00 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_ppc_arch_3_00+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "ppc" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ppc_arch_3_00=no
+          cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+__asm__(".text\n\t"
+                    ".globl testfn;\n"
+                    "testfn:\n"
+                    "stxvb16x %r1,%v12,%v30;\n"
+                  );
+            void testfn(void);
+
+int
+main (void)
+{
+ testfn();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+        fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_ppc_arch_3_00" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" >&6; }
+if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports zSeries instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports zSeries instructions" >&5
+printf %s "checking whether GCC inline assembler supports zSeries 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_s390x+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "s390x" ||
+         test "$try_asm_modules" != "yes" ; then
+         gcry_cv_gcc_inline_asm_s390x="n/a"
+       else
+         gcry_cv_gcc_inline_asm_s390x=no
+         cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+typedef unsigned int u128_t __attribute__ ((mode (TI)));
+           unsigned int testfunc(unsigned int x, void *y, unsigned int z)
+           {
+             unsigned long fac[8];
+             register unsigned long reg0 asm("0") = 0;
+             register unsigned long reg1 asm("1") = x;
+             u128_t r1 = ((u128_t)(unsigned long)y << 64) | (unsigned long)z;
+             u128_t r2 = 0;
+             u128_t r3 = 0;
+             asm volatile (".insn rre,0xb92e << 16, %[r1], %[r2]\n\t"
+                           : [r1] "+a" (r1), [r2] "+a" (r2)
+                           : "r" (reg0), "r" (reg1)
+                           : "cc", "memory");
+             asm volatile (".insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+                           : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3)
+                           : "r" (reg0), "r" (reg1)
+                           : "cc", "memory");
+             reg0 = 8 - 1;
+             asm ("stfle %1\n\t"
+                  : "+d" (reg0), "=Q" (fac[0])
+                  :
+                  : "cc", "memory");
+             asm volatile ("mvc 0(16, %0), 0(%1)\n\t"
+                           :
+                           : "a" (y), "a" (fac)
+                           : "memory");
+             asm volatile ("xc 0(16, %0), 0(%0)\n\t"
+                           :
+                           : "a" (fac)
+                           : "memory");
+             asm volatile ("risbgn %%r11, %%r11, 0, 129, 0\n\t"
+                           :
+                           :
+                           : "memory", "r11");
+             asm volatile ("algrk %%r14, %%r14, %%r14\n\t"
+                           :
+                           :
+                           : "memory", "r14");
+             return (unsigned int)r1 ^ reg0;
+           }
+
+int
+main (void)
+{
+ testfunc(0, 0, 0);
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_s390x=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+       fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_s390x" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_s390x" >&6; }
+if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_S390X 1" >>confdefs.h
+
+fi
+
+
+#
+# Check whether GCC inline assembler supports zSeries vector instructions
+#
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether GCC inline 
assembler supports zSeries vector instructions" >&5
+printf %s "checking whether GCC inline assembler supports zSeries vector 
instructions... " >&6; }
+if test ${gcry_cv_gcc_inline_asm_s390x_vx+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$mpi_cpu_arch" != "s390x" ||
+         test "$try_asm_modules" != "yes" ; then
+         gcry_cv_gcc_inline_asm_s390x_vx="n/a"
+       else
+         gcry_cv_gcc_inline_asm_s390x_vx=no
+         if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then
+           cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+void testfunc(void)
+             {
+               asm volatile (".machine \"z13+vx\"\n\t"
+                             "vx %%v0, %%v1, %%v31\n\t"
+                             "verllf %%v11, %%v11, (16)(0)\n\t"
+                             :
+                             :
+                             : "memory");
+             }
+
+int
+main (void)
+{
+ testfunc();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gcry_cv_gcc_inline_asm_s390x_vx=yes
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+         fi
+       fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gcry_cv_gcc_inline_asm_s390x_vx" >&5
+printf "%s\n" "$gcry_cv_gcc_inline_asm_s390x_vx" >&6; }
+if test "$gcry_cv_gcc_inline_asm_s390x_vx" = "yes" ; then
+
+printf "%s\n" "#define HAVE_GCC_INLINE_ASM_S390X_VX 1" >>confdefs.h
+
+fi
+
+
+#######################################
+#### Checks for library functions. ####
+#######################################
+
+ac_func=
+for ac_item in $ac_func_c_list
+do
+  if test $ac_func; then
+    ac_fn_c_check_func "$LINENO" $ac_func ac_cv_func_$ac_func
+    if eval test \"x\$ac_cv_func_$ac_func\" = xyes; then
+      echo "#define $ac_item 1" >> confdefs.h
+    fi
+    ac_func=
+  else
+    ac_func=$ac_item
+  fi
+done
+
+if test "x$ac_cv_func_vprintf" = xno
+then :
+  ac_fn_c_check_func "$LINENO" "_doprnt" "ac_cv_func__doprnt"
+if test "x$ac_cv_func__doprnt" = xyes
+then :
+
+printf "%s\n" "#define HAVE_DOPRNT 1" >>confdefs.h
+
+fi
+
+fi
+# We have replacements for these in src/missing-string.c
+ac_fn_c_check_func "$LINENO" "stpcpy" "ac_cv_func_stpcpy"
+if test "x$ac_cv_func_stpcpy" = xyes
+then :
+  printf "%s\n" "#define HAVE_STPCPY 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "strcasecmp" "ac_cv_func_strcasecmp"
+if test "x$ac_cv_func_strcasecmp" = xyes
+then :
+  printf "%s\n" "#define HAVE_STRCASECMP 1" >>confdefs.h
+
+fi
+
+# We have replacements for these in src/g10lib.h
+ac_fn_c_check_func "$LINENO" "strtoul" "ac_cv_func_strtoul"
+if test "x$ac_cv_func_strtoul" = xyes
+then :
+  printf "%s\n" "#define HAVE_STRTOUL 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "memmove" "ac_cv_func_memmove"
+if test "x$ac_cv_func_memmove" = xyes
+then :
+  printf "%s\n" "#define HAVE_MEMMOVE 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "stricmp" "ac_cv_func_stricmp"
+if test "x$ac_cv_func_stricmp" = xyes
+then :
+  printf "%s\n" "#define HAVE_STRICMP 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "atexit" "ac_cv_func_atexit"
+if test "x$ac_cv_func_atexit" = xyes
+then :
+  printf "%s\n" "#define HAVE_ATEXIT 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "raise" "ac_cv_func_raise"
+if test "x$ac_cv_func_raise" = xyes
+then :
+  printf "%s\n" "#define HAVE_RAISE 1" >>confdefs.h
+
+fi
+
+# Other checks
+ac_fn_c_check_func "$LINENO" "strerror" "ac_cv_func_strerror"
+if test "x$ac_cv_func_strerror" = xyes
+then :
+  printf "%s\n" "#define HAVE_STRERROR 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "rand" "ac_cv_func_rand"
+if test "x$ac_cv_func_rand" = xyes
+then :
+  printf "%s\n" "#define HAVE_RAND 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "mmap" "ac_cv_func_mmap"
+if test "x$ac_cv_func_mmap" = xyes
+then :
+  printf "%s\n" "#define HAVE_MMAP 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "getpagesize" "ac_cv_func_getpagesize"
+if test "x$ac_cv_func_getpagesize" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETPAGESIZE 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "sysconf" "ac_cv_func_sysconf"
+if test "x$ac_cv_func_sysconf" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYSCONF 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "waitpid" "ac_cv_func_waitpid"
+if test "x$ac_cv_func_waitpid" = xyes
+then :
+  printf "%s\n" "#define HAVE_WAITPID 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "wait4" "ac_cv_func_wait4"
+if test "x$ac_cv_func_wait4" = xyes
+then :
+  printf "%s\n" "#define HAVE_WAIT4 1" >>confdefs.h
+
+fi
+
+ac_fn_c_check_func "$LINENO" "gettimeofday" "ac_cv_func_gettimeofday"
+if test "x$ac_cv_func_gettimeofday" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETTIMEOFDAY 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "getrusage" "ac_cv_func_getrusage"
+if test "x$ac_cv_func_getrusage" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETRUSAGE 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "gethrtime" "ac_cv_func_gethrtime"
+if test "x$ac_cv_func_gethrtime" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETHRTIME 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "clock_gettime" "ac_cv_func_clock_gettime"
+if test "x$ac_cv_func_clock_gettime" = xyes
+then :
+  printf "%s\n" "#define HAVE_CLOCK_GETTIME 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "syslog" "ac_cv_func_syslog"
+if test "x$ac_cv_func_syslog" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYSLOG 1" >>confdefs.h
+
+fi
+
+ac_fn_c_check_func "$LINENO" "syscall" "ac_cv_func_syscall"
+if test "x$ac_cv_func_syscall" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYSCALL 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "fcntl" "ac_cv_func_fcntl"
+if test "x$ac_cv_func_fcntl" = xyes
+then :
+  printf "%s\n" "#define HAVE_FCNTL 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "ftruncate" "ac_cv_func_ftruncate"
+if test "x$ac_cv_func_ftruncate" = xyes
+then :
+  printf "%s\n" "#define HAVE_FTRUNCATE 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "flockfile" "ac_cv_func_flockfile"
+if test "x$ac_cv_func_flockfile" = xyes
+then :
+  printf "%s\n" "#define HAVE_FLOCKFILE 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "getauxval" "ac_cv_func_getauxval"
+if test "x$ac_cv_func_getauxval" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETAUXVAL 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "elf_aux_info" "ac_cv_func_elf_aux_info"
+if test "x$ac_cv_func_elf_aux_info" = xyes
+then :
+  printf "%s\n" "#define HAVE_ELF_AUX_INFO 1" >>confdefs.h
+
+fi
+
+ac_fn_c_check_func "$LINENO" "explicit_bzero" "ac_cv_func_explicit_bzero"
+if test "x$ac_cv_func_explicit_bzero" = xyes
+then :
+  printf "%s\n" "#define HAVE_EXPLICIT_BZERO 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "explicit_memset" "ac_cv_func_explicit_memset"
+if test "x$ac_cv_func_explicit_memset" = xyes
+then :
+  printf "%s\n" "#define HAVE_EXPLICIT_MEMSET 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "getentropy" "ac_cv_func_getentropy"
+if test "x$ac_cv_func_getentropy" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETENTROPY 1" >>confdefs.h
+
+fi
+
+
+ ac_fn_c_check_func "$LINENO" "mlock" "ac_cv_func_mlock"
+if test "x$ac_cv_func_mlock" = xyes
+then :
+  printf "%s\n" "#define HAVE_MLOCK 1" >>confdefs.h
+
+fi
+
+    if test "$ac_cv_func_mlock" = "no"; then
+        ac_fn_c_check_header_compile "$LINENO" "sys/mman.h" 
"ac_cv_header_sys_mman_h" "$ac_includes_default"
+if test "x$ac_cv_header_sys_mman_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYS_MMAN_H 1" >>confdefs.h
+
+fi
+
+        if test "$ac_cv_header_sys_mman_h" = "yes"; then
+            # Add librt to LIBS:
+            { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for memlk 
in -lrt" >&5
+printf %s "checking for memlk in -lrt... " >&6; }
+if test ${ac_cv_lib_rt_memlk+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lrt  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char memlk ();
+int
+main (void)
+{
+return memlk ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_rt_memlk=yes
+else $as_nop
+  ac_cv_lib_rt_memlk=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_rt_memlk" >&5
+printf "%s\n" "$ac_cv_lib_rt_memlk" >&6; }
+if test "x$ac_cv_lib_rt_memlk" = xyes
+then :
+  printf "%s\n" "#define HAVE_LIBRT 1" >>confdefs.h
+
+  LIBS="-lrt $LIBS"
+
+fi
+
+            { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether 
mlock is in sys/mman.h" >&5
+printf %s "checking whether mlock is in sys/mman.h... " >&6; }
+if test ${gnupg_cv_mlock_is_in_sys_mman+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+                    #include <assert.h>
+                    #ifdef HAVE_SYS_MMAN_H
+                    #include <sys/mman.h>
+                    #endif
+
+int
+main (void)
+{
+
+int i;
+
+/* glibc defines this for functions which it implements
+ * to always fail with ENOSYS.  Some functions are actually
+ * named something starting with __ and the normal name
+ * is an alias.  */
+#if defined (__stub_mlock) || defined (__stub___mlock)
+choke me
+#else
+mlock(&i, 4);
+#endif
+; return 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  gnupg_cv_mlock_is_in_sys_mman=yes
+else $as_nop
+  gnupg_cv_mlock_is_in_sys_mman=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: 
$gnupg_cv_mlock_is_in_sys_mman" >&5
+printf "%s\n" "$gnupg_cv_mlock_is_in_sys_mman" >&6; }
+            if test "$gnupg_cv_mlock_is_in_sys_mman" = "yes"; then
+
+printf "%s\n" "#define HAVE_MLOCK 1" >>confdefs.h
+
+            fi
+        fi
+    fi
+    if test "$ac_cv_func_mlock" = "yes"; then
+        ac_fn_c_check_func "$LINENO" "sysconf" "ac_cv_func_sysconf"
+if test "x$ac_cv_func_sysconf" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYSCONF 1" >>confdefs.h
+
+fi
+ac_fn_c_check_func "$LINENO" "getpagesize" "ac_cv_func_getpagesize"
+if test "x$ac_cv_func_getpagesize" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETPAGESIZE 1" >>confdefs.h
+
+fi
+
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether mlock 
is broken" >&5
+printf %s "checking whether mlock is broken... " >&6; }
+          if test ${gnupg_cv_have_broken_mlock+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test "$cross_compiling" = yes
+then :
+  gnupg_cv_have_broken_mlock="assume-no"
+
+else $as_nop
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <fcntl.h>
+
+int main()
+{
+    char *pool;
+    int err;
+    long int pgsize;
+
+#if defined(HAVE_SYSCONF) && defined(_SC_PAGESIZE)
+    pgsize = sysconf (_SC_PAGESIZE);
+#elif defined (HAVE_GETPAGESIZE)
+    pgsize = getpagesize();
+#else
+    pgsize = -1;
+#endif
+
+    if (pgsize == -1)
+      pgsize = 4096;
+
+    pool = malloc( 4096 + pgsize );
+    if( !pool )
+        return 2;
+    pool += (pgsize - ((size_t)pool % pgsize));
+
+    err = mlock( pool, 4096 );
+    if( !err || errno == EPERM || errno == EAGAIN)
+        return 0; /* okay */
+
+    return 1;  /* hmmm */
+}
+
+_ACEOF
+if ac_fn_c_try_run "$LINENO"
+then :
+  gnupg_cv_have_broken_mlock="no"
+else $as_nop
+  gnupg_cv_have_broken_mlock="yes"
+fi
+rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \
+  conftest.$ac_objext conftest.beam conftest.$ac_ext
+fi
+
+
+fi
+
+         if test "$gnupg_cv_have_broken_mlock" = "yes"; then
+
+printf "%s\n" "#define HAVE_BROKEN_MLOCK 1" >>confdefs.h
+
+             { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+printf "%s\n" "yes" >&6; }
+         else
+            if test "$gnupg_cv_have_broken_mlock" = "no"; then
+                { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5
+printf "%s\n" "no" >&6; }
+            else
+                { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: assuming 
no" >&5
+printf "%s\n" "assuming no" >&6; }
+            fi
+         fi
+    fi
+
+
+#
+# Replacement functions.
+#
+ac_fn_c_check_func "$LINENO" "getpid" "ac_cv_func_getpid"
+if test "x$ac_cv_func_getpid" = xyes
+then :
+  printf "%s\n" "#define HAVE_GETPID 1" >>confdefs.h
+
+else $as_nop
+  case " $LIBOBJS " in
+  *" getpid.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS getpid.$ac_objext"
+ ;;
+esac
+
+fi
+ac_fn_c_check_func "$LINENO" "clock" "ac_cv_func_clock"
+if test "x$ac_cv_func_clock" = xyes
+then :
+  printf "%s\n" "#define HAVE_CLOCK 1" >>confdefs.h
+
+else $as_nop
+  case " $LIBOBJS " in
+  *" clock.$ac_objext "* ) ;;
+  *) LIBOBJS="$LIBOBJS clock.$ac_objext"
+ ;;
+esac
+
+fi
+
+
+
+#
+# Check whether it is necessary to link against libdl.
+#
+DL_LIBS=""
+if test "$use_hmac_binary_check" != no ; then
+  _gcry_save_libs="$LIBS"
+  LIBS=""
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for library 
containing dlopen" >&5
+printf %s "checking for library containing dlopen... " >&6; }
+if test ${ac_cv_search_dlopen+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_func_search_save_LIBS=$LIBS
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char dlopen ();
+int
+main (void)
+{
+return dlopen ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' c dl
+do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_search_dlopen=$ac_res
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext
+  if test ${ac_cv_search_dlopen+y}
+then :
+  break
+fi
+done
+if test ${ac_cv_search_dlopen+y}
+then :
+
+else $as_nop
+  ac_cv_search_dlopen=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_search_dlopen" >&5
+printf "%s\n" "$ac_cv_search_dlopen" >&6; }
+ac_res=$ac_cv_search_dlopen
+if test "$ac_res" != no
+then :
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+fi
+
+  DL_LIBS=$LIBS
+  LIBS="$_gcry_save_libs"
+fi
+
+
+
+#
+# Check whether we can use Linux capabilities as requested.
+#
+if test "$use_capabilities" = "yes" ; then
+use_capabilities=no
+ac_fn_c_check_header_compile "$LINENO" "sys/capability.h" 
"ac_cv_header_sys_capability_h" "$ac_includes_default"
+if test "x$ac_cv_header_sys_capability_h" = xyes
+then :
+  printf "%s\n" "#define HAVE_SYS_CAPABILITY_H 1" >>confdefs.h
+
+fi
+
+if test "$ac_cv_header_sys_capability_h" = "yes" ; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cap_init in 
-lcap" >&5
+printf %s "checking for cap_init in -lcap... " >&6; }
+if test ${ac_cv_lib_cap_cap_init+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lcap  $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+char cap_init ();
+int
+main (void)
+{
+return cap_init ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"
+then :
+  ac_cv_lib_cap_cap_init=yes
+else $as_nop
+  ac_cv_lib_cap_cap_init=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_cap_cap_init" 
>&5
+printf "%s\n" "$ac_cv_lib_cap_cap_init" >&6; }
+if test "x$ac_cv_lib_cap_cap_init" = xyes
+then :
+  ac_need_libcap=1
+fi
+
+  if test "$ac_cv_lib_cap_cap_init" = "yes"; then
+
+printf "%s\n" "#define USE_CAPABILITIES 1" >>confdefs.h
+
+     LIBS="$LIBS -lcap"
+     use_capabilities=yes
+  fi
+fi
+if test "$use_capabilities" = "no" ; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING:
+***
+*** The use of capabilities on this system is not possible.
+*** You need a recent Linux kernel and some patches:
+***   fcaps-2.2.9-990610.patch      (kernel patch for 2.2.9)
+***   fcap-module-990613.tar.gz     (kernel module)
+***   libcap-1.92.tar.gz            (user mode library and utilities)
+*** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN
+*** set (filesystems menu). Be warned: This code is *really* ALPHA.
+***" >&5
+printf "%s\n" "$as_me: WARNING:
+***
+*** The use of capabilities on this system is not possible.
+*** You need a recent Linux kernel and some patches:
+***   fcaps-2.2.9-990610.patch      (kernel patch for 2.2.9)
+***   fcap-module-990613.tar.gz     (kernel module)
+***   libcap-1.92.tar.gz            (user mode library and utilities)
+*** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN
+*** set (filesystems menu). Be warned: This code is *really* ALPHA.
+***" >&2;}
+fi
+fi
+
+# Check whether a random device is available.
+if test "$try_dev_random" = yes ; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for random device" 
>&5
+printf %s "checking for random device... " >&6; }
+if test ${ac_cv_have_dev_random+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+  if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then
+      ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_have_dev_random" 
>&5
+printf "%s\n" "$ac_cv_have_dev_random" >&6; }
+    if test "$ac_cv_have_dev_random" = yes; then
+
+printf "%s\n" "#define HAVE_DEV_RANDOM 1" >>confdefs.h
+
+    fi
+else
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for random device" 
>&5
+printf %s "checking for random device... " >&6; }
+    ac_cv_have_dev_random=no
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: has been disabled" 
>&5
+printf "%s\n" "has been disabled" >&6; }
+fi
+
+# Figure out the random modules for this configuration.
+if test "$random" = "default"; then
+
+    # Select default value.
+    if test "$ac_cv_func_getentropy" = yes; then
+        random_modules="getentropy"
+    elif test "$ac_cv_have_dev_random" = yes; then
+        # Try Linuxish random device.
+        random_modules="linux"
+    else
+        case "${host}" in
+        *-*-mingw32ce*)
+          # WindowsCE random device.
+          random_modules="w32ce"
+          ;;
+        *-*-mingw32*|*-*-cygwin*)
+          # Windows random device.
+          random_modules="w32"
+          ;;
+        *)
+          # Build everything, allow to select at runtime.
+          random_modules="$auto_random_modules"
+          ;;
+        esac
+    fi
+else
+    if test "$random" = "auto"; then
+        # Build everything, allow to select at runtime.
+        random_modules="$auto_random_modules"
+    else
+        random_modules="$random"
+    fi
+fi
+
+
+#
+# Other defines
+#
+if test mym4_isgit = "yes"; then
+
+printf "%s\n" "#define IS_DEVELOPMENT_VERSION 1" >>confdefs.h
+
+fi
+
+
+ if test x$cross_compiling = xyes; then
+  CROSS_COMPILING_TRUE=
+  CROSS_COMPILING_FALSE='#'
+else
+  CROSS_COMPILING_TRUE='#'
+  CROSS_COMPILING_FALSE=
+fi
+
+
+
+# This is handy for debugging so the compiler doesn't rearrange
+# things and eliminate variables.
+# Check whether --enable-optimization was given.
+if test ${enable_optimization+y}
+then :
+  enableval=$enable_optimization; if test $enableval = no ; then
+                         CFLAGS=`echo $CFLAGS | sed 's/-O[0-9]//'`
+                       fi
+fi
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for cc features" >&5
+printf "%s\n" "$as_me: checking for cc features" >&6;}
+# CFLAGS mangling when using gcc.
+if test "$GCC" = yes; then
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if gcc supports 
-fno-delete-null-pointer-checks" >&5
+printf %s "checking if gcc supports -fno-delete-null-pointer-checks... " >&6; }
+    _gcc_cflags_save=$CFLAGS
+    CFLAGS="-fno-delete-null-pointer-checks"
+    cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  _gcc_wopt=yes
+else $as_nop
+  _gcc_wopt=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $_gcc_wopt" >&5
+printf "%s\n" "$_gcc_wopt" >&6; }
+    CFLAGS=$_gcc_cflags_save;
+    if test x"$_gcc_wopt" = xyes ; then
+       CFLAGS="$CFLAGS -fno-delete-null-pointer-checks"
+    fi
+
+    CFLAGS="$CFLAGS -Wall"
+    if test "$USE_MAINTAINER_MODE" = "yes"; then
+        CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes"
+        CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security"
+
+        # If -Wno-missing-field-initializers is supported we can enable a
+        # a bunch of really useful warnings.
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if gcc supports 
-Wno-missing-field-initializers" >&5
+printf %s "checking if gcc supports -Wno-missing-field-initializers... " >&6; }
+        _gcc_cflags_save=$CFLAGS
+        CFLAGS="-Wno-missing-field-initializers"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  _gcc_wopt=yes
+else $as_nop
+  _gcc_wopt=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $_gcc_wopt" >&5
+printf "%s\n" "$_gcc_wopt" >&6; }
+        CFLAGS=$_gcc_cflags_save;
+        if test x"$_gcc_wopt" = xyes ; then
+          CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast"
+          CFLAGS="$CFLAGS -Wwrite-strings"
+          CFLAGS="$CFLAGS -Wdeclaration-after-statement"
+          CFLAGS="$CFLAGS -Wno-missing-field-initializers"
+          CFLAGS="$CFLAGS -Wno-sign-compare"
+        fi
+
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking if gcc supports 
-Wpointer-arith" >&5
+printf %s "checking if gcc supports -Wpointer-arith... " >&6; }
+        _gcc_cflags_save=$CFLAGS
+        CFLAGS="-Wpointer-arith"
+        cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main (void)
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"
+then :
+  _gcc_wopt=yes
+else $as_nop
+  _gcc_wopt=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext
+        { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $_gcc_wopt" >&5
+printf "%s\n" "$_gcc_wopt" >&6; }
+        CFLAGS=$_gcc_cflags_save;
+        if test x"$_gcc_wopt" = xyes ; then
+          CFLAGS="$CFLAGS -Wpointer-arith"
+        fi
+    fi
+fi
+
+# Check whether as(1) supports a noeexecstack feature.  This test
+# includes an override option.
+
+
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether non excutable 
stack support is requested" >&5
+printf %s "checking whether non excutable stack support is requested... " >&6; 
}
+# Check whether --enable-noexecstack was given.
+if test ${enable_noexecstack+y}
+then :
+  enableval=$enable_noexecstack; noexecstack_support=$enableval
+else $as_nop
+  noexecstack_support=yes
+fi
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $noexecstack_support" >&5
+printf "%s\n" "$noexecstack_support" >&6; }
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether assembler 
supports --noexecstack option" >&5
+printf %s "checking whether assembler supports --noexecstack option... " >&6; }
+if test ${cl_cv_as_noexecstack+y}
+then :
+  printf %s "(cached) " >&6
+else $as_nop
+    cat > conftest.c <<EOF
+void foo(void) {}
+EOF
+  if { ac_try='${CC} $CFLAGS $CPPFLAGS
+                     -S -o conftest.s conftest.c >/dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; } \
+     && grep .note.GNU-stack conftest.s >/dev/null \
+     && { ac_try='${CCAS} $CCASFLAGS $CPPFLAGS -Wa,--noexecstack
+                       -c -o conftest.o conftest.s >/dev/null'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+  then
+    cl_cv_as_noexecstack=yes
+  else
+    cl_cv_as_noexecstack=no
+  fi
+  rm -f conftest*
+fi
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cl_cv_as_noexecstack" 
>&5
+printf "%s\n" "$cl_cv_as_noexecstack" >&6; }
+  if test "$noexecstack_support" = yes -a "$cl_cv_as_noexecstack" = yes; then
+       NOEXECSTACK_FLAGS="-Wa,--noexecstack"
+  else
+        NOEXECSTACK_FLAGS=
+  fi
+
+
+
+
+
+
+
+
+
+
+ac_config_commands="$ac_config_commands gcrypt-conf"
+
+
+#####################
+#### Conclusion. ####
+#####################
+
+# Check that requested feature can actually be used and define
+# ENABLE_foo_SUPPORT macros.
+
+if test x"$aesnisupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then
+    aesnisupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$shaextsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then
+    shaextsupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$pclmulsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then
+    pclmulsupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$sse41support" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then
+    sse41support="no (unsupported by compiler)"
+  fi
+fi
+if test x"$avxsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then
+    avxsupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$avx2support" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then
+    avx2support="no (unsupported by compiler)"
+  fi
+fi
+if test x"$neonsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then
+    if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then
+      neonsupport="no (unsupported by compiler)"
+    fi
+  fi
+fi
+if test x"$armcryptosupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then
+    if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then
+      neonsupport="no (unsupported by compiler)"
+    fi
+  fi
+fi
+
+if test x"$aesnisupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_AESNI_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$shaextsupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_SHAEXT_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$pclmulsupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_PCLMUL_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$sse41support" = xyes ; then
+
+printf "%s\n" "#define ENABLE_SSE41_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$avxsupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_AVX_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$avx2support" = xyes ; then
+
+printf "%s\n" "#define ENABLE_AVX2_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$neonsupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_NEON_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$armcryptosupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_ARM_CRYPTO_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$ppccryptosupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_PPC_CRYPTO_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$jentsupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_JENT_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$padlocksupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_PADLOCK_SUPPORT 1" >>confdefs.h
+
+fi
+if test x"$drngsupport" = xyes ; then
+
+printf "%s\n" "#define ENABLE_DRNG_SUPPORT 1" >>confdefs.h
+
+fi
+
+
+if test x"$force_soft_hwfeatures" = xyes ; then
+
+printf "%s\n" "#define ENABLE_FORCE_SOFT_HWFEATURES 1" >>confdefs.h
+
+fi
+
+# Define conditional sources and config.h symbols depending on the
+# selected ciphers, pubkey-ciphers, digests, kdfs, and random modules.
+
+
+name=arcfour
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1"; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo"
+
+printf "%s\n" "#define USE_ARCFOUR 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS arcfour-amd64.lo"
+      ;;
+   esac
+fi
+
+
+name=blowfish
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo"
+
+printf "%s\n" "#define USE_BLOWFISH 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-arm.lo"
+      ;;
+   esac
+fi
+
+
+name=cast5
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo"
+
+printf "%s\n" "#define USE_CAST5 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-arm.lo"
+      ;;
+   esac
+fi
+
+
+name=des
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo"
+
+printf "%s\n" "#define USE_DES 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS des-amd64.lo"
+      ;;
+   esac
+fi
+
+
+name=aes
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo"
+
+printf "%s\n" "#define USE_AES 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-amd64.lo"
+
+         # Build with the SSSE3 implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64-asm.lo"
+
+         # Build with the VAES/AVX2 implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes-avx2-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-arm.lo"
+
+         # Build with the ARMv8/AArch32 CE implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch32-ce.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aarch64.lo"
+
+         # Build with the ARMv8/AArch64 CE implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch64-ce.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc9le.lo"
+
+         if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" &&
+            test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then
+            # Build with AES-GCM bulk implementation for P10
+            GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-gcm-p10le.lo"
+            GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-p10le.lo"
+         fi
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo"
+      ;;
+      s390x-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-s390x.lo"
+      ;;
+   esac
+
+   case "$mpi_cpu_arch" in
+     x86)
+         # Build with the AES-NI implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aesni.lo"
+
+         # Build with the Padlock implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-padlock.lo"
+      ;;
+   esac
+fi
+
+
+name=twofish
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo"
+
+printf "%s\n" "#define USE_TWOFISH 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-amd64.lo"
+
+         if test x"$avx2support" = xyes ; then
+            # Build with the AVX2 implementation
+            GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-avx2-amd64.lo"
+         fi
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-arm.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-aarch64.lo"
+      ;;
+   esac
+fi
+
+
+name=serpent
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo"
+
+printf "%s\n" "#define USE_SERPENT 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the SSE2 implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-sse2-amd64.lo"
+      ;;
+   esac
+
+   if test x"$avx2support" = xyes ; then
+      # Build with the AVX2 implementation
+      GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-avx2-amd64.lo"
+   fi
+
+   if test x"$neonsupport" = xyes ; then
+      # Build with the NEON implementation
+      GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-armv7-neon.lo"
+   fi
+fi
+
+
+name=rfc2268
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo"
+
+printf "%s\n" "#define USE_RFC2268 1" >>confdefs.h
+
+fi
+
+
+name=seed
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo"
+
+printf "%s\n" "#define USE_SEED 1" >>confdefs.h
+
+fi
+
+
+name=camellia
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo"
+
+printf "%s\n" "#define USE_CAMELLIA 1" >>confdefs.h
+
+
+   case "${host}" in
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-arm.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aarch64.lo"
+      ;;
+   esac
+
+   if test x"$avxsupport" = xyes ; then
+      if test x"$aesnisupport" = xyes ; then
+        # Build with the AES-NI/AVX implementation
+        GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx-amd64.lo"
+      fi
+   fi
+
+   if test x"$avx2support" = xyes ; then
+      if test x"$aesnisupport" = xyes ; then
+        # Build with the AES-NI/AVX2 implementation
+        GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx2-amd64.lo"
+
+        # Build with the VAES/AVX2 implementation
+        GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-vaes-avx2-amd64.lo"
+      fi
+   fi
+fi
+
+
+name=idea
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo"
+
+printf "%s\n" "#define USE_IDEA 1" >>confdefs.h
+
+fi
+
+
+name=salsa20
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo"
+
+printf "%s\n" "#define USE_SALSA20 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-amd64.lo"
+      ;;
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-armv7-neon.lo"
+   fi
+fi
+
+
+name=gost28147
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo"
+
+printf "%s\n" "#define USE_GOST28147 1" >>confdefs.h
+
+fi
+
+
+name=chacha20
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo"
+
+printf "%s\n" "#define USE_CHACHA20 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-ssse3.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-avx2.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-aarch64.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the ppc8 vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         # Build with the ppc8 vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Build with the ppc8 vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo"
+      ;;
+      s390x-*-*)
+         # Build with the s390x/zSeries vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-s390x.lo"
+      ;;
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-armv7-neon.lo"
+   fi
+fi
+
+
+name=sm4
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS sm4.lo"
+
+printf "%s\n" "#define USE_SM4 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx-amd64.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx2-amd64.lo"
+      ;;
+   esac
+fi
+
+
+name=dsa
+list=$enabled_pubkey_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+ if test "$found" = "1"; then
+  USE_DSA_TRUE=
+  USE_DSA_FALSE='#'
+else
+  USE_DSA_TRUE='#'
+  USE_DSA_FALSE=
+fi
+
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo"
+
+printf "%s\n" "#define USE_DSA 1" >>confdefs.h
+
+fi
+
+
+name=rsa
+list=$enabled_pubkey_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+ if test "$found" = "1"; then
+  USE_RSA_TRUE=
+  USE_RSA_FALSE='#'
+else
+  USE_RSA_TRUE='#'
+  USE_RSA_FALSE=
+fi
+
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo"
+
+printf "%s\n" "#define USE_RSA 1" >>confdefs.h
+
+fi
+
+
+name=elgamal
+list=$enabled_pubkey_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+ if test "$found" = "1"; then
+  USE_ELGAMAL_TRUE=
+  USE_ELGAMAL_FALSE='#'
+else
+  USE_ELGAMAL_TRUE='#'
+  USE_ELGAMAL_FALSE=
+fi
+
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo"
+
+printf "%s\n" "#define USE_ELGAMAL 1" >>confdefs.h
+
+fi
+
+
+name=ecc
+list=$enabled_pubkey_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+ if test "$found" = "1"; then
+  USE_ECC_TRUE=
+  USE_ECC_FALSE='#'
+else
+  USE_ECC_TRUE='#'
+  USE_ECC_FALSE=
+fi
+
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \
+                          ecc.lo ecc-curves.lo ecc-misc.lo \
+                          ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \
+                          ecc-sm2.lo"
+
+printf "%s\n" "#define USE_ECC 1" >>confdefs.h
+
+fi
+
+
+name=crc
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo"
+
+printf "%s\n" "#define USE_CRC 1" >>confdefs.h
+
+
+   case "${host}" in
+      i?86-*-* | x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-intel-pclmul.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-ce.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-aarch64-ce.lo"
+      ;;
+      powerpc64le-*-*)
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo"
+      ;;
+      powerpc-*-*)
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo"
+      ;;
+   esac
+fi
+
+
+name=gostr3411-94
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   # GOST R 34.11-94 internally uses GOST 28147-89
+
+name=gost28147
+list=$enabled_ciphers
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+   if test "$found" = "1" ; then
+      GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo"
+
+printf "%s\n" "#define USE_GOST_R_3411_94 1" >>confdefs.h
+
+   fi
+fi
+
+
+name=stribog
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo"
+
+printf "%s\n" "#define USE_GOST_R_3411_12 1" >>confdefs.h
+
+fi
+
+
+name=md2
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo"
+
+printf "%s\n" "#define USE_MD2 1" >>confdefs.h
+
+fi
+
+
+name=md4
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo"
+
+printf "%s\n" "#define USE_MD4 1" >>confdefs.h
+
+fi
+
+
+name=md5
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo"
+
+printf "%s\n" "#define USE_MD5 1" >>confdefs.h
+
+fi
+
+
+name=rmd160
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo"
+
+printf "%s\n" "#define USE_RMD160 1" >>confdefs.h
+
+fi
+
+
+name=sha256
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo"
+
+printf "%s\n" "#define USE_SHA256 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ssse3-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx2-bmi2-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch32-ce.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch64-ce.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+   esac
+
+   case "$mpi_cpu_arch" in
+     x86)
+       # Build with the SHAEXT implementation
+       GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-intel-shaext.lo"
+     ;;
+   esac
+fi
+
+
+name=sha512
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo"
+
+printf "%s\n" "#define USE_SHA512 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx2-bmi2-amd64.lo"
+      ;;
+      i?86-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-i386.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-arm.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-armv7-neon.lo"
+   fi
+fi
+
+
+name=sha3
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo"
+
+printf "%s\n" "#define USE_SHA3 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         :
+      ;;
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS keccak-armv7-neon.lo"
+   fi
+fi
+
+
+name=tiger
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo"
+
+printf "%s\n" "#define USE_TIGER 1" >>confdefs.h
+
+fi
+
+
+name=whirlpool
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo"
+
+printf "%s\n" "#define USE_WHIRLPOOL 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS whirlpool-sse2-amd64.lo"
+      ;;
+   esac
+fi
+
+
+name=blake2
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo"
+
+printf "%s\n" "#define USE_BLAKE2 1" >>confdefs.h
+
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2b-amd64-avx2.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2s-amd64-avx.lo"
+      ;;
+   esac
+fi
+
+
+name=sm3
+list=$enabled_digests
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo"
+
+printf "%s\n" "#define USE_SM3 1" >>confdefs.h
+
+
+   case "${host}" in
+     x86_64-*-*)
+        # Build with the assembly implementation
+        GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-avx-bmi2-amd64.lo"
+     ;;
+     aarch64-*-*)
+        # Build with the assembly implementation
+        GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-aarch64.lo"
+     ;;
+   esac
+fi
+
+# SHA-1 needs to be included always for example because it is used by
+# random-csprng.c.
+GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo"
+
+printf "%s\n" "#define USE_SHA1 1" >>confdefs.h
+
+
+case "${host}" in
+  x86_64-*-*)
+    # Build with the assembly implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-ssse3-amd64.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-amd64.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-bmi2-amd64.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx2-bmi2-amd64.lo"
+  ;;
+  arm*-*-*)
+    # Build with the assembly implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv7-neon.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch32-ce.lo"
+  ;;
+  aarch64-*-*)
+    # Build with the assembly implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch64-ce.lo"
+  ;;
+esac
+
+case "$mpi_cpu_arch" in
+  x86)
+    # Build with the SHAEXT implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-intel-shaext.lo"
+  ;;
+esac
+
+# Arch specific GCM implementations
+case "${host}" in
+  i?86-*-* | x86_64-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-intel-pclmul.lo"
+  ;;
+  arm*-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv7-neon.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch32-ce.lo"
+  ;;
+  aarch64-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch64-ce.lo"
+  ;;
+  powerpc64le-*-* | powerpc64-*-* | powerpc-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-ppc.lo"
+  ;;
+esac
+
+# Arch specific MAC implementations
+case "${host}" in
+  s390x-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-s390x.lo"
+  ;;
+esac
+
+
+name=scrypt
+list=$enabled_kdfs
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo"
+
+printf "%s\n" "#define USE_SCRYPT 1" >>confdefs.h
+
+fi
+
+
+name=getentropy
+list=$random_modules
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndgetentropy.lo"
+
+printf "%s\n" "#define USE_RNDGETENTROPY 1" >>confdefs.h
+
+fi
+
+
+name=linux
+list=$random_modules
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndoldlinux.lo"
+
+printf "%s\n" "#define USE_RNDOLDLINUX 1" >>confdefs.h
+
+fi
+
+
+name=unix
+list=$random_modules
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo"
+
+printf "%s\n" "#define USE_RNDUNIX 1" >>confdefs.h
+
+fi
+
+
+name=egd
+list=$random_modules
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo"
+
+printf "%s\n" "#define USE_RNDEGD 1" >>confdefs.h
+
+fi
+
+
+name=w32
+list=$random_modules
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo"
+
+printf "%s\n" "#define USE_RNDW32 1" >>confdefs.h
+
+fi
+
+
+name=w32ce
+list=$random_modules
+found=0
+
+for n in $list; do
+  if test "x$name" = "x$n"; then
+    found=1
+  fi
+done
+
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo"
+
+printf "%s\n" "#define USE_RNDW32CE 1" >>confdefs.h
+
+fi
+
+if test "$try_asm_modules" = yes ; then
+  # Build with assembly implementations
+  GCRYPT_CIPHERS="$GCRYPT_CIPHERS $GCRYPT_ASM_CIPHERS"
+  GCRYPT_DIGESTS="$GCRYPT_DIGESTS $GCRYPT_ASM_DIGESTS"
+fi
+
+
+
+
+
+
+
+LIBGCRYPT_CIPHERS=$enabled_ciphers
+
+LIBGCRYPT_PUBKEY_CIPHERS=$enabled_pubkey_ciphers
+
+LIBGCRYPT_DIGESTS=$enabled_digests
+
+
+# For printing the configuration we need a colon separated list of
+# algorithm names.
+tmp=`echo "$enabled_ciphers" | tr ' ' : `
+
+printf "%s\n" "#define LIBGCRYPT_CIPHERS \"$tmp\"" >>confdefs.h
+
+tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : `
+
+printf "%s\n" "#define LIBGCRYPT_PUBKEY_CIPHERS \"$tmp\"" >>confdefs.h
+
+tmp=`echo "$enabled_digests" | tr ' ' : `
+
+printf "%s\n" "#define LIBGCRYPT_DIGESTS \"$tmp\"" >>confdefs.h
+
+tmp=`echo "$enabled_kdfs" | tr ' ' : `
+
+printf "%s\n" "#define LIBGCRYPT_KDFS \"$tmp\"" >>confdefs.h
+
+
+
+#
+# Define conditional sources depending on the used hardware platform.
+# Note that all possible modules must also be listed in
+# src/Makefile.am (EXTRA_libgcrypt_la_SOURCES).
+#
+GCRYPT_HWF_MODULES=
+case "$mpi_cpu_arch" in
+     x86)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_X86 1" >>confdefs.h
+
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-x86.lo"
+        ;;
+     alpha)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_ALPHA 1" >>confdefs.h
+
+        ;;
+     sparc)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_SPARC 1" >>confdefs.h
+
+        ;;
+     mips)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_MIPS 1" >>confdefs.h
+
+        ;;
+     m68k)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_M68K 1" >>confdefs.h
+
+        ;;
+     ppc)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_PPC 1" >>confdefs.h
+
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo"
+        ;;
+     arm)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_ARM 1" >>confdefs.h
+
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo"
+        ;;
+     aarch64)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_ARM 1" >>confdefs.h
+
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo"
+        ;;
+     s390x)
+
+printf "%s\n" "#define HAVE_CPU_ARCH_S390X 1" >>confdefs.h
+
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-s390x.lo"
+        ;;
+esac
+
+
+
+#
+# Option to disable building of doc file
+#
+build_doc=yes
+# Check whether --enable-doc was given.
+if test ${enable_doc+y}
+then :
+  enableval=$enable_doc; build_doc=$enableval
+else $as_nop
+  build_doc=yes
+fi
+
+ if test "x$build_doc" != xno; then
+  BUILD_DOC_TRUE=
+  BUILD_DOC_FALSE='#'
+else
+  BUILD_DOC_TRUE='#'
+  BUILD_DOC_FALSE=
+fi
+
+
+
+#
+# Provide information about the build.
+#
+BUILD_REVISION="aa161086"
+
+
+printf "%s\n" "#define BUILD_REVISION \"$BUILD_REVISION\"" >>confdefs.h
+
+
+BUILD_VERSION=`echo "$PACKAGE_VERSION" | sed 's/\([0-9.]*\).*/\1./'`
+BUILD_VERSION="${BUILD_VERSION}43542"
+BUILD_FILEVERSION=`echo "${BUILD_VERSION}" | tr . ,`
+
+
+
+# Check whether --enable-build-timestamp was given.
+if test ${enable_build_timestamp+y}
+then :
+  enableval=$enable_build_timestamp; if test "$enableval" = "yes"; then
+        BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date`
+      else
+        BUILD_TIMESTAMP="$enableval"
+      fi
+else $as_nop
+  BUILD_TIMESTAMP="<none>"
+fi
+
+
+
+printf "%s\n" "#define BUILD_TIMESTAMP \"$BUILD_TIMESTAMP\"" >>confdefs.h
+
+
+
+# And create the files.
+ac_config_files="$ac_config_files Makefile m4/Makefile compat/Makefile 
mpi/Makefile cipher/Makefile random/Makefile doc/Makefile src/Makefile 
src/gcrypt.h src/libgcrypt-config src/libgcrypt.pc src/versioninfo.rc 
tests/Makefile"
+
+ac_config_files="$ac_config_files tests/hashtest-256g"
+
+ac_config_files="$ac_config_files tests/basic-disable-all-hwf"
+
+cat >confcache <<\_ACEOF
+# This file is a shell script that caches the results of configure
+# tests run on this system so they can be shared between configure
+# scripts and configure runs, see configure's option --config-cache.
+# It is not useful on other systems.  If it contains results you don't
+# want to keep, you may remove or edit it.
+#
+# config.status only pays attention to the cache file if you give it
+# the --recheck option to rerun configure.
+#
+# `ac_cv_env_foo' variables (set or unset) will be overridden when
+# loading this file, other *unset* `ac_cv_foo' will be assigned the
+# following values.
+
+_ACEOF
+
+# The following way of writing the cache mishandles newlines in values,
+# but we know of no workaround that is simple, portable, and efficient.
+# So, we kill variables containing newlines.
+# Ultrix sh set writes to stderr and can't be redirected directly,
+# and sets the high bit in the cache file unless we assign to the vars.
+(
+  for ac_var in `(set) 2>&1 | sed -n 's/^\([a-zA-Z_][a-zA-Z0-9_]*\)=.*/\1/p'`; 
do
+    eval ac_val=\$$ac_var
+    case $ac_val in #(
+    *${as_nl}*)
+      case $ac_var in #(
+      *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache 
variable $ac_var contains a newline" >&5
+printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" 
>&2;} ;;
+      esac
+      case $ac_var in #(
+      _ | IFS | as_nl) ;; #(
+      BASH_ARGV | BASH_SOURCE) eval $ac_var= ;; #(
+      *) { eval $ac_var=; unset $ac_var;} ;;
+      esac ;;
+    esac
+  done
+
+  (set) 2>&1 |
+    case $as_nl`(ac_space=' '; set) 2>&1` in #(
+    *${as_nl}ac_space=\ *)
+      # `set' does not quote correctly, so add quotes: double-quote
+      # substitution turns \\\\ into \\, and sed turns \\ into \.
+      sed -n \
+       "s/'/'\\\\''/g;
+         s/^\\([_$as_cr_alnum]*_cv_[_$as_cr_alnum]*\\)=\\(.*\\)/\\1='\\2'/p"
+      ;; #(
+    *)
+      # `set' quotes correctly as required by POSIX, so do not add quotes.
+      sed -n "/^[_$as_cr_alnum]*_cv_[_$as_cr_alnum]*=/p"
+      ;;
+    esac |
+    sort
+) |
+  sed '
+     /^ac_cv_env_/b end
+     t clear
+     :clear
+     s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/
+     t end
+     s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/
+     :end' >>confcache
+if diff "$cache_file" confcache >/dev/null 2>&1; then :; else
+  if test -w "$cache_file"; then
+    if test "x$cache_file" != "x/dev/null"; then
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache 
$cache_file" >&5
+printf "%s\n" "$as_me: updating cache $cache_file" >&6;}
+      if test ! -f "$cache_file" || test -h "$cache_file"; then
+       cat confcache >"$cache_file"
+      else
+        case $cache_file in #(
+        */* | ?:*)
+         mv -f confcache "$cache_file"$$ &&
+         mv -f "$cache_file"$$ "$cache_file" ;; #(
+        *)
+         mv -f confcache "$cache_file" ;;
+       esac
+      fi
+    fi
+  else
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable 
cache $cache_file" >&5
+printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;}
+  fi
+fi
+rm -f confcache
+
+test "x$prefix" = xNONE && prefix=$ac_default_prefix
+# Let make expand exec_prefix.
+test "x$exec_prefix" = xNONE && exec_prefix='${prefix}'
+
+DEFS=-DHAVE_CONFIG_H
+
+ac_libobjs=
+ac_ltlibobjs=
+U=
+for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue
+  # 1. Remove the extension, and $U if already installed.
+  ac_script='s/\$U\././;s/\.o$//;s/\.obj$//'
+  ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"`
+  # 2. Prepend LIBOBJDIR.  When used with automake>=1.10 LIBOBJDIR
+  #    will be set to the directory where LIBOBJS objects are built.
+  as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext"
+  as_fn_append ac_ltlibobjs " \${LIBOBJDIR}$ac_i"'$U.lo'
+done
+LIBOBJS=$ac_libobjs
+
+LTLIBOBJS=$ac_ltlibobjs
+
+
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking that generated files 
are newer than configure" >&5
+printf %s "checking that generated files are newer than configure... " >&6; }
+   if test -n "$am_sleep_pid"; then
+     # Hide warnings about reused PIDs.
+     wait $am_sleep_pid 2>/dev/null
+   fi
+   { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: done" >&5
+printf "%s\n" "done" >&6; }
+ if test -n "$EXEEXT"; then
+  am__EXEEXT_TRUE=
+  am__EXEEXT_FALSE='#'
+else
+  am__EXEEXT_TRUE='#'
+  am__EXEEXT_FALSE=
+fi
+
+if test -z "${MAINTAINER_MODE_TRUE}" && test -z "${MAINTAINER_MODE_FALSE}"; 
then
+  as_fn_error $? "conditional \"MAINTAINER_MODE\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${AMDEP_TRUE}" && test -z "${AMDEP_FALSE}"; then
+  as_fn_error $? "conditional \"AMDEP\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
+  as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCC_TRUE}" && test -z "${am__fastdepCC_FALSE}"; then
+  as_fn_error $? "conditional \"am__fastdepCC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${am__fastdepCCAS_TRUE}" && test -z "${am__fastdepCCAS_FALSE}"; 
then
+  as_fn_error $? "conditional \"am__fastdepCCAS\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${HAVE_W32_SYSTEM_TRUE}" && test -z "${HAVE_W32_SYSTEM_FALSE}"; 
then
+  as_fn_error $? "conditional \"HAVE_W32_SYSTEM\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${HAVE_W32CE_SYSTEM_TRUE}" && test -z 
"${HAVE_W32CE_SYSTEM_FALSE}"; then
+  as_fn_error $? "conditional \"HAVE_W32CE_SYSTEM\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+
+if test -z "${ENABLE_RANDOM_DAEMON_TRUE}" && test -z 
"${ENABLE_RANDOM_DAEMON_FALSE}"; then
+  as_fn_error $? "conditional \"ENABLE_RANDOM_DAEMON\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${USE_HMAC_BINARY_CHECK_TRUE}" && test -z 
"${USE_HMAC_BINARY_CHECK_FALSE}"; then
+  as_fn_error $? "conditional \"USE_HMAC_BINARY_CHECK\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${ENABLE_O_FLAG_MUNGING_TRUE}" && test -z 
"${ENABLE_O_FLAG_MUNGING_FALSE}"; then
+  as_fn_error $? "conditional \"ENABLE_O_FLAG_MUNGING\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${ENABLE_INSTRUMENTATION_MUNGING_TRUE}" && test -z 
"${ENABLE_INSTRUMENTATION_MUNGING_FALSE}"; then
+  as_fn_error $? "conditional \"ENABLE_INSTRUMENTATION_MUNGING\" was never 
defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${HAVE_LD_VERSION_SCRIPT_TRUE}" && test -z 
"${HAVE_LD_VERSION_SCRIPT_FALSE}"; then
+  as_fn_error $? "conditional \"HAVE_LD_VERSION_SCRIPT\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_MPIH_ADD1_TRUE}" && test -z 
"${MPI_MOD_ASM_MPIH_ADD1_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_MPIH_ADD1\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_MPIH_SUB1_TRUE}" && test -z 
"${MPI_MOD_ASM_MPIH_SUB1_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_MPIH_SUB1\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_MPIH_MUL1_TRUE}" && test -z 
"${MPI_MOD_ASM_MPIH_MUL1_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_MPIH_MUL1\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_MPIH_MUL2_TRUE}" && test -z 
"${MPI_MOD_ASM_MPIH_MUL2_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_MPIH_MUL2\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_MPIH_MUL3_TRUE}" && test -z 
"${MPI_MOD_ASM_MPIH_MUL3_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_MPIH_MUL3\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_MPIH_LSHIFT_TRUE}" && test -z 
"${MPI_MOD_ASM_MPIH_LSHIFT_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_MPIH_LSHIFT\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_MPIH_RSHIFT_TRUE}" && test -z 
"${MPI_MOD_ASM_MPIH_RSHIFT_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_MPIH_RSHIFT\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_UDIV_TRUE}" && test -z "${MPI_MOD_ASM_UDIV_FALSE}"; 
then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_UDIV\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_ASM_UDIV_QRNND_TRUE}" && test -z 
"${MPI_MOD_ASM_UDIV_QRNND_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_ASM_UDIV_QRNND\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_MPIH_ADD1_TRUE}" && test -z 
"${MPI_MOD_C_MPIH_ADD1_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_MPIH_ADD1\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_MPIH_SUB1_TRUE}" && test -z 
"${MPI_MOD_C_MPIH_SUB1_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_MPIH_SUB1\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_MPIH_MUL1_TRUE}" && test -z 
"${MPI_MOD_C_MPIH_MUL1_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_MPIH_MUL1\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_MPIH_MUL2_TRUE}" && test -z 
"${MPI_MOD_C_MPIH_MUL2_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_MPIH_MUL2\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_MPIH_MUL3_TRUE}" && test -z 
"${MPI_MOD_C_MPIH_MUL3_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_MPIH_MUL3\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_MPIH_LSHIFT_TRUE}" && test -z 
"${MPI_MOD_C_MPIH_LSHIFT_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_MPIH_LSHIFT\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_MPIH_RSHIFT_TRUE}" && test -z 
"${MPI_MOD_C_MPIH_RSHIFT_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_MPIH_RSHIFT\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_UDIV_TRUE}" && test -z "${MPI_MOD_C_UDIV_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_UDIV\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${MPI_MOD_C_UDIV_QRNND_TRUE}" && test -z 
"${MPI_MOD_C_UDIV_QRNND_FALSE}"; then
+  as_fn_error $? "conditional \"MPI_MOD_C_UDIV_QRNND\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_TRUE}" && test -z 
"${ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_FALSE}"; then
+  as_fn_error $? "conditional \"ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS\" was never 
defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${CROSS_COMPILING_TRUE}" && test -z "${CROSS_COMPILING_FALSE}"; 
then
+  as_fn_error $? "conditional \"CROSS_COMPILING\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${USE_DSA_TRUE}" && test -z "${USE_DSA_FALSE}"; then
+  as_fn_error $? "conditional \"USE_DSA\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${USE_RSA_TRUE}" && test -z "${USE_RSA_FALSE}"; then
+  as_fn_error $? "conditional \"USE_RSA\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${USE_ELGAMAL_TRUE}" && test -z "${USE_ELGAMAL_FALSE}"; then
+  as_fn_error $? "conditional \"USE_ELGAMAL\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${USE_ECC_TRUE}" && test -z "${USE_ECC_FALSE}"; then
+  as_fn_error $? "conditional \"USE_ECC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+if test -z "${BUILD_DOC_TRUE}" && test -z "${BUILD_DOC_FALSE}"; then
+  as_fn_error $? "conditional \"BUILD_DOC\" was never defined.
+Usually this means the macro was only invoked conditionally." "$LINENO" 5
+fi
+
+: "${CONFIG_STATUS=./config.status}"
+ac_write_fail=0
+ac_clean_files_save=$ac_clean_files
+ac_clean_files="$ac_clean_files $CONFIG_STATUS"
+{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5
+printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;}
+as_write_fail=0
+cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1
+#! $SHELL
+# Generated by $as_me.
+# Run this file to recreate the current configuration.
+# Compiler output produced by configure, useful for debugging
+# configure, is in config.log if it exists.
+
+debug=false
+ac_cs_recheck=false
+ac_cs_silent=false
+
+SHELL=\${CONFIG_SHELL-$SHELL}
+export SHELL
+_ASEOF
+cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1
+## -------------------- ##
+## M4sh Initialization. ##
+## -------------------- ##
+
+# Be more Bourne compatible
+DUALCASE=1; export DUALCASE # for MKS sh
+as_nop=:
+if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1
+then :
+  emulate sh
+  NULLCMD=:
+  # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which
+  # is contrary to our usage.  Disable this feature.
+  alias -g '${1+"$@"}'='"$@"'
+  setopt NO_GLOB_SUBST
+else $as_nop
+  case `(set -o) 2>/dev/null` in #(
+  *posix*) :
+    set -o posix ;; #(
+  *) :
+     ;;
+esac
+fi
+
+
+
+# Reset variables that may have inherited troublesome values from
+# the environment.
+
+# IFS needs to be set, to space, tab, and newline, in precisely that order.
+# (If _AS_PATH_WALK were called with IFS unset, it would have the
+# side effect of setting IFS to empty, thus disabling word splitting.)
+# Quoting is to prevent editors from complaining about space-tab.
+as_nl='
+'
+export as_nl
+IFS=" ""       $as_nl"
+
+PS1='$ '
+PS2='> '
+PS4='+ '
+
+# Ensure predictable behavior from utilities with locale-dependent output.
+LC_ALL=C
+export LC_ALL
+LANGUAGE=C
+export LANGUAGE
+
+# We cannot yet rely on "unset" to work, but we need these variables
+# to be unset--not just set to an empty or harmless value--now, to
+# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh).  This construct
+# also avoids known problems related to "unset" and subshell syntax
+# in other old shells (e.g. bash 2.01 and pdksh 5.2.14).
+for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH
+do eval test \${$as_var+y} \
+  && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || :
+done
+
+# Ensure that fds 0, 1, and 2 are open.
+if (exec 3>&0) 2>/dev/null; then :; else exec 0</dev/null; fi
+if (exec 3>&1) 2>/dev/null; then :; else exec 1>/dev/null; fi
+if (exec 3>&2)            ; then :; else exec 2>/dev/null; fi
+
+# The user is always right.
+if ${PATH_SEPARATOR+false} :; then
+  PATH_SEPARATOR=:
+  (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {
+    (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||
+      PATH_SEPARATOR=';'
+  }
+fi
+
+
+# Find who we are.  Look in the path if we contain no directory separator.
+as_myself=
+case $0 in #((
+  *[\\/]* ) as_myself=$0 ;;
+  *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR
+for as_dir in $PATH
+do
+  IFS=$as_save_IFS
+  case $as_dir in #(((
+    '') as_dir=./ ;;
+    */) ;;
+    *) as_dir=$as_dir/ ;;
+  esac
+    test -r "$as_dir$0" && as_myself=$as_dir$0 && break
+  done
+IFS=$as_save_IFS
+
+     ;;
+esac
+# We did not find ourselves, most probably we were run as `sh COMMAND'
+# in which case we are not to be found in the path.
+if test "x$as_myself" = x; then
+  as_myself=$0
+fi
+if test ! -f "$as_myself"; then
+  printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute 
file name" >&2
+  exit 1
+fi
+
+
+
+# as_fn_error STATUS ERROR [LINENO LOG_FD]
+# ----------------------------------------
+# Output "`basename $0`: error: ERROR" to stderr. If LINENO and LOG_FD are
+# provided, also output the error to LOG_FD, referencing LINENO. Then exit the
+# script with STATUS, using 1 if that was 0.
+as_fn_error ()
+{
+  as_status=$1; test $as_status -eq 0 && as_status=1
+  if test "$4"; then
+    as_lineno=${as_lineno-"$3"} 
as_lineno_stack=as_lineno_stack=$as_lineno_stack
+    printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4
+  fi
+  printf "%s\n" "$as_me: error: $2" >&2
+  as_fn_exit $as_status
+} # as_fn_error
+
+
+
+# as_fn_set_status STATUS
+# -----------------------
+# Set $? to STATUS, without forking.
+as_fn_set_status ()
+{
+  return $1
+} # as_fn_set_status
+
+# as_fn_exit STATUS
+# -----------------
+# Exit the shell with STATUS, even in a "trap 0" or "set -e" context.
+as_fn_exit ()
+{
+  set +e
+  as_fn_set_status $1
+  exit $1
+} # as_fn_exit
+
+# as_fn_unset VAR
+# ---------------
+# Portably unset VAR.
+as_fn_unset ()
+{
+  { eval $1=; unset $1;}
+}
+as_unset=as_fn_unset
+
+# as_fn_append VAR VALUE
+# ----------------------
+# Append the text in VALUE to the end of the definition contained in VAR. Take
+# advantage of any shell optimizations that allow amortized linear growth over
+# repeated appends, instead of the typical quadratic growth present in naive
+# implementations.
+if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null
+then :
+  eval 'as_fn_append ()
+  {
+    eval $1+=\$2
+  }'
+else $as_nop
+  as_fn_append ()
+  {
+    eval $1=\$$1\$2
+  }
+fi # as_fn_append
+
+# as_fn_arith ARG...
+# ------------------
+# Perform arithmetic evaluation on the ARGs, and store the result in the
+# global $as_val. Take advantage of shells that can avoid forks. The arguments
+# must be portable across $(()) and expr.
+if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null
+then :
+  eval 'as_fn_arith ()
+  {
+    as_val=$(( $* ))
+  }'
+else $as_nop
+  as_fn_arith ()
+  {
+    as_val=`expr "$@" || test $? -eq 1`
+  }
+fi # as_fn_arith
+
+
+if expr a : '\(a\)' >/dev/null 2>&1 &&
+   test "X`expr 00001 : '.*\(...\)'`" = X001; then
+  as_expr=expr
+else
+  as_expr=false
+fi
+
+if (basename -- /) >/dev/null 2>&1 && test "X`basename -- / 2>&1`" = "X/"; then
+  as_basename=basename
+else
+  as_basename=false
+fi
+
+if (as_dir=`dirname -- /` && test "X$as_dir" = X/) >/dev/null 2>&1; then
+  as_dirname=dirname
+else
+  as_dirname=false
+fi
+
+as_me=`$as_basename -- "$0" ||
+$as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \
+        X"$0" : 'X\(//\)$' \| \
+        X"$0" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X/"$0" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+
+# Avoid depending upon Character Ranges.
+as_cr_letters='abcdefghijklmnopqrstuvwxyz'
+as_cr_LETTERS='ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+as_cr_Letters=$as_cr_letters$as_cr_LETTERS
+as_cr_digits='0123456789'
+as_cr_alnum=$as_cr_Letters$as_cr_digits
+
+
+# Determine whether it's possible to make 'echo' print without a newline.
+# These variables are no longer used directly by Autoconf, but are AC_SUBSTed
+# for compatibility with existing Makefiles.
+ECHO_C= ECHO_N= ECHO_T=
+case `echo -n x` in #(((((
+-n*)
+  case `echo 'xy\c'` in
+  *c*) ECHO_T='        ';;     # ECHO_T is single tab character.
+  xy)  ECHO_C='\c';;
+  *)   echo `echo ksh88 bug on AIX 6.1` > /dev/null
+       ECHO_T='        ';;
+  esac;;
+*)
+  ECHO_N='-n';;
+esac
+
+# For backward compatibility with old third-party macros, we provide
+# the shell variables $as_echo and $as_echo_n.  New code should use
+# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively.
+as_echo='printf %s\n'
+as_echo_n='printf %s'
+
+rm -f conf$$ conf$$.exe conf$$.file
+if test -d conf$$.dir; then
+  rm -f conf$$.dir/conf$$.file
+else
+  rm -f conf$$.dir
+  mkdir conf$$.dir 2>/dev/null
+fi
+if (echo >conf$$.file) 2>/dev/null; then
+  if ln -s conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s='ln -s'
+    # ... but there are two gotchas:
+    # 1) On MSYS, both `ln -s file dir' and `ln file dir' fail.
+    # 2) DJGPP < 2.04 has no symlinks; `ln -s' creates a wrapper executable.
+    # In both cases, we have to default to `cp -pR'.
+    ln -s conf$$.file conf$$.dir 2>/dev/null && test ! -f conf$$.exe ||
+      as_ln_s='cp -pR'
+  elif ln conf$$.file conf$$ 2>/dev/null; then
+    as_ln_s=ln
+  else
+    as_ln_s='cp -pR'
+  fi
+else
+  as_ln_s='cp -pR'
+fi
+rm -f conf$$ conf$$.exe conf$$.dir/conf$$.file conf$$.file
+rmdir conf$$.dir 2>/dev/null
+
+
+# as_fn_mkdir_p
+# -------------
+# Create "$as_dir" as a directory, including parents if necessary.
+as_fn_mkdir_p ()
+{
+
+  case $as_dir in #(
+  -*) as_dir=./$as_dir;;
+  esac
+  test -d "$as_dir" || eval $as_mkdir_p || {
+    as_dirs=
+    while :; do
+      case $as_dir in #(
+      *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'(
+      *) as_qdir=$as_dir;;
+      esac
+      as_dirs="'$as_qdir' $as_dirs"
+      as_dir=`$as_dirname -- "$as_dir" ||
+$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$as_dir" : 'X\(//\)[^/]' \| \
+        X"$as_dir" : 'X\(//\)$' \| \
+        X"$as_dir" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$as_dir" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+      test -d "$as_dir" && break
+    done
+    test -z "$as_dirs" || eval "mkdir $as_dirs"
+  } || test -d "$as_dir" || as_fn_error $? "cannot create directory $as_dir"
+
+
+} # as_fn_mkdir_p
+if mkdir -p . 2>/dev/null; then
+  as_mkdir_p='mkdir -p "$as_dir"'
+else
+  test -d ./-p && rmdir ./-p
+  as_mkdir_p=false
+fi
+
+
+# as_fn_executable_p FILE
+# -----------------------
+# Test if FILE is an executable regular file.
+as_fn_executable_p ()
+{
+  test -f "$1" && test -x "$1"
+} # as_fn_executable_p
+as_test_x='test -x'
+as_executable_p=as_fn_executable_p
+
+# Sed expression to map a string onto a valid CPP name.
+as_tr_cpp="eval sed 
'y%*$as_cr_letters%P$as_cr_LETTERS%;s%[^_$as_cr_alnum]%_%g'"
+
+# Sed expression to map a string onto a valid variable name.
+as_tr_sh="eval sed 'y%*+%pp%;s%[^_$as_cr_alnum]%_%g'"
+
+
+exec 6>&1
+## ----------------------------------- ##
+## Main body of $CONFIG_STATUS script. ##
+## ----------------------------------- ##
+_ASEOF
+test $as_write_fail = 0 && chmod +x $CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# Save the log message, to keep $0 and so on meaningful, and to
+# report actual input values of CONFIG_FILES etc. instead of their
+# values after options handling.
+ac_log="
+This file was extended by libgcrypt $as_me 1.10.3, which was
+generated by GNU Autoconf 2.71.  Invocation command line was
+
+  CONFIG_FILES    = $CONFIG_FILES
+  CONFIG_HEADERS  = $CONFIG_HEADERS
+  CONFIG_LINKS    = $CONFIG_LINKS
+  CONFIG_COMMANDS = $CONFIG_COMMANDS
+  $ $0 $@
+
+on `(hostname || uname -n) 2>/dev/null | sed 1q`
+"
+
+_ACEOF
+
+case $ac_config_files in *"
+"*) set x $ac_config_files; shift; ac_config_files=$*;;
+esac
+
+case $ac_config_headers in *"
+"*) set x $ac_config_headers; shift; ac_config_headers=$*;;
+esac
+
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+# Files that config.status was made for.
+config_files="$ac_config_files"
+config_headers="$ac_config_headers"
+config_links="$ac_config_links"
+config_commands="$ac_config_commands"
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+ac_cs_usage="\
+\`$as_me' instantiates files and other configuration actions
+from templates according to the current configuration.  Unless the files
+and actions are specified as TAGs, all are instantiated by default.
+
+Usage: $0 [OPTION]... [TAG]...
+
+  -h, --help       print this help, then exit
+  -V, --version    print version number and configuration settings, then exit
+      --config     print configuration, then exit
+  -q, --quiet, --silent
+                   do not print progress messages
+  -d, --debug      don't remove temporary files
+      --recheck    update $as_me by reconfiguring in the same conditions
+      --file=FILE[:TEMPLATE]
+                   instantiate the configuration file FILE
+      --header=FILE[:TEMPLATE]
+                   instantiate the configuration header FILE
+
+Configuration files:
+$config_files
+
+Configuration headers:
+$config_headers
+
+Configuration links:
+$config_links
+
+Configuration commands:
+$config_commands
+
+Report bugs to <https://bugs.gnupg.org>."
+
+_ACEOF
+ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"`
+ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; 
s/'/'\\\\\\\\''/g"`
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_cs_config='$ac_cs_config_escaped'
+ac_cs_version="\\
+libgcrypt config.status 1.10.3
+configured by $0, generated by GNU Autoconf 2.71,
+  with options \\"\$ac_cs_config\\"
+
+Copyright (C) 2021 Free Software Foundation, Inc.
+This config.status script is free software; the Free Software Foundation
+gives unlimited permission to copy, distribute and modify it."
+
+ac_pwd='$ac_pwd'
+srcdir='$srcdir'
+INSTALL='$INSTALL'
+MKDIR_P='$MKDIR_P'
+AWK='$AWK'
+test -n "\$AWK" || AWK=awk
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# The default lists apply if the user does not specify any file.
+ac_need_defaults=:
+while test $# != 0
+do
+  case $1 in
+  --*=?*)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=`expr "X$1" : 'X[^=]*=\(.*\)'`
+    ac_shift=:
+    ;;
+  --*=)
+    ac_option=`expr "X$1" : 'X\([^=]*\)='`
+    ac_optarg=
+    ac_shift=:
+    ;;
+  *)
+    ac_option=$1
+    ac_optarg=$2
+    ac_shift=shift
+    ;;
+  esac
+
+  case $ac_option in
+  # Handling of the options.
+  -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r)
+    ac_cs_recheck=: ;;
+  --version | --versio | --versi | --vers | --ver | --ve | --v | -V )
+    printf "%s\n" "$ac_cs_version"; exit ;;
+  --config | --confi | --conf | --con | --co | --c )
+    printf "%s\n" "$ac_cs_config"; exit ;;
+  --debug | --debu | --deb | --de | --d | -d )
+    debug=: ;;
+  --file | --fil | --fi | --f )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    '') as_fn_error $? "missing file argument" ;;
+    esac
+    as_fn_append CONFIG_FILES " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --header | --heade | --head | --hea )
+    $ac_shift
+    case $ac_optarg in
+    *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;;
+    esac
+    as_fn_append CONFIG_HEADERS " '$ac_optarg'"
+    ac_need_defaults=false;;
+  --he | --h)
+    # Conflict between --help and --header
+    as_fn_error $? "ambiguous option: \`$1'
+Try \`$0 --help' for more information.";;
+  --help | --hel | -h )
+    printf "%s\n" "$ac_cs_usage"; exit ;;
+  -q | -quiet | --quiet | --quie | --qui | --qu | --q \
+  | -silent | --silent | --silen | --sile | --sil | --si | --s)
+    ac_cs_silent=: ;;
+
+  # This is an error.
+  -*) as_fn_error $? "unrecognized option: \`$1'
+Try \`$0 --help' for more information." ;;
+
+  *) as_fn_append ac_config_targets " $1"
+     ac_need_defaults=false ;;
+
+  esac
+  shift
+done
+
+ac_configure_extra_args=
+
+if $ac_cs_silent; then
+  exec 6>/dev/null
+  ac_configure_extra_args="$ac_configure_extra_args --silent"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+if \$ac_cs_recheck; then
+  set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create 
--no-recursion
+  shift
+  \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6
+  CONFIG_SHELL='$SHELL'
+  export CONFIG_SHELL
+  exec "\$@"
+fi
+
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+exec 5>>config.log
+{
+  echo
+  sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX
+## Running $as_me. ##
+_ASBOX
+  printf "%s\n" "$ac_log"
+} >&5
+
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+#
+# INIT-COMMANDS
+#
+AMDEP_TRUE="$AMDEP_TRUE" MAKE="${MAKE-make}"
+
+
+# The HP-UX ksh and POSIX shell print the target directory to stdout
+# if CDPATH is set.
+(unset CDPATH) >/dev/null 2>&1 && unset CDPATH
+
+sed_quote_subst='$sed_quote_subst'
+double_quote_subst='$double_quote_subst'
+delay_variable_subst='$delay_variable_subst'
+macro_version='`$ECHO "$macro_version" | $SED "$delay_single_quote_subst"`'
+macro_revision='`$ECHO "$macro_revision" | $SED "$delay_single_quote_subst"`'
+AS='`$ECHO "$AS" | $SED "$delay_single_quote_subst"`'
+DLLTOOL='`$ECHO "$DLLTOOL" | $SED "$delay_single_quote_subst"`'
+OBJDUMP='`$ECHO "$OBJDUMP" | $SED "$delay_single_quote_subst"`'
+enable_static='`$ECHO "$enable_static" | $SED "$delay_single_quote_subst"`'
+enable_shared='`$ECHO "$enable_shared" | $SED "$delay_single_quote_subst"`'
+pic_mode='`$ECHO "$pic_mode" | $SED "$delay_single_quote_subst"`'
+enable_fast_install='`$ECHO "$enable_fast_install" | $SED 
"$delay_single_quote_subst"`'
+SHELL='`$ECHO "$SHELL" | $SED "$delay_single_quote_subst"`'
+ECHO='`$ECHO "$ECHO" | $SED "$delay_single_quote_subst"`'
+PATH_SEPARATOR='`$ECHO "$PATH_SEPARATOR" | $SED "$delay_single_quote_subst"`'
+host_alias='`$ECHO "$host_alias" | $SED "$delay_single_quote_subst"`'
+host='`$ECHO "$host" | $SED "$delay_single_quote_subst"`'
+host_os='`$ECHO "$host_os" | $SED "$delay_single_quote_subst"`'
+build_alias='`$ECHO "$build_alias" | $SED "$delay_single_quote_subst"`'
+build='`$ECHO "$build" | $SED "$delay_single_quote_subst"`'
+build_os='`$ECHO "$build_os" | $SED "$delay_single_quote_subst"`'
+SED='`$ECHO "$SED" | $SED "$delay_single_quote_subst"`'
+Xsed='`$ECHO "$Xsed" | $SED "$delay_single_quote_subst"`'
+GREP='`$ECHO "$GREP" | $SED "$delay_single_quote_subst"`'
+EGREP='`$ECHO "$EGREP" | $SED "$delay_single_quote_subst"`'
+FGREP='`$ECHO "$FGREP" | $SED "$delay_single_quote_subst"`'
+LD='`$ECHO "$LD" | $SED "$delay_single_quote_subst"`'
+NM='`$ECHO "$NM" | $SED "$delay_single_quote_subst"`'
+LN_S='`$ECHO "$LN_S" | $SED "$delay_single_quote_subst"`'
+max_cmd_len='`$ECHO "$max_cmd_len" | $SED "$delay_single_quote_subst"`'
+ac_objext='`$ECHO "$ac_objext" | $SED "$delay_single_quote_subst"`'
+exeext='`$ECHO "$exeext" | $SED "$delay_single_quote_subst"`'
+lt_unset='`$ECHO "$lt_unset" | $SED "$delay_single_quote_subst"`'
+lt_SP2NL='`$ECHO "$lt_SP2NL" | $SED "$delay_single_quote_subst"`'
+lt_NL2SP='`$ECHO "$lt_NL2SP" | $SED "$delay_single_quote_subst"`'
+lt_cv_to_host_file_cmd='`$ECHO "$lt_cv_to_host_file_cmd" | $SED 
"$delay_single_quote_subst"`'
+lt_cv_to_tool_file_cmd='`$ECHO "$lt_cv_to_tool_file_cmd" | $SED 
"$delay_single_quote_subst"`'
+reload_flag='`$ECHO "$reload_flag" | $SED "$delay_single_quote_subst"`'
+reload_cmds='`$ECHO "$reload_cmds" | $SED "$delay_single_quote_subst"`'
+deplibs_check_method='`$ECHO "$deplibs_check_method" | $SED 
"$delay_single_quote_subst"`'
+file_magic_cmd='`$ECHO "$file_magic_cmd" | $SED "$delay_single_quote_subst"`'
+file_magic_glob='`$ECHO "$file_magic_glob" | $SED "$delay_single_quote_subst"`'
+want_nocaseglob='`$ECHO "$want_nocaseglob" | $SED "$delay_single_quote_subst"`'
+sharedlib_from_linklib_cmd='`$ECHO "$sharedlib_from_linklib_cmd" | $SED 
"$delay_single_quote_subst"`'
+AR='`$ECHO "$AR" | $SED "$delay_single_quote_subst"`'
+AR_FLAGS='`$ECHO "$AR_FLAGS" | $SED "$delay_single_quote_subst"`'
+archiver_list_spec='`$ECHO "$archiver_list_spec" | $SED 
"$delay_single_quote_subst"`'
+STRIP='`$ECHO "$STRIP" | $SED "$delay_single_quote_subst"`'
+RANLIB='`$ECHO "$RANLIB" | $SED "$delay_single_quote_subst"`'
+old_postinstall_cmds='`$ECHO "$old_postinstall_cmds" | $SED 
"$delay_single_quote_subst"`'
+old_postuninstall_cmds='`$ECHO "$old_postuninstall_cmds" | $SED 
"$delay_single_quote_subst"`'
+old_archive_cmds='`$ECHO "$old_archive_cmds" | $SED 
"$delay_single_quote_subst"`'
+lock_old_archive_extraction='`$ECHO "$lock_old_archive_extraction" | $SED 
"$delay_single_quote_subst"`'
+CC='`$ECHO "$CC" | $SED "$delay_single_quote_subst"`'
+CFLAGS='`$ECHO "$CFLAGS" | $SED "$delay_single_quote_subst"`'
+compiler='`$ECHO "$compiler" | $SED "$delay_single_quote_subst"`'
+GCC='`$ECHO "$GCC" | $SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_pipe='`$ECHO "$lt_cv_sys_global_symbol_pipe" | $SED 
"$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_cdecl='`$ECHO "$lt_cv_sys_global_symbol_to_cdecl" | 
$SED "$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_c_name_address='`$ECHO 
"$lt_cv_sys_global_symbol_to_c_name_address" | $SED 
"$delay_single_quote_subst"`'
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix='`$ECHO 
"$lt_cv_sys_global_symbol_to_c_name_address_lib_prefix" | $SED 
"$delay_single_quote_subst"`'
+nm_file_list_spec='`$ECHO "$nm_file_list_spec" | $SED 
"$delay_single_quote_subst"`'
+lt_sysroot='`$ECHO "$lt_sysroot" | $SED "$delay_single_quote_subst"`'
+objdir='`$ECHO "$objdir" | $SED "$delay_single_quote_subst"`'
+MAGIC_CMD='`$ECHO "$MAGIC_CMD" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_no_builtin_flag='`$ECHO "$lt_prog_compiler_no_builtin_flag" | 
$SED "$delay_single_quote_subst"`'
+lt_prog_compiler_pic='`$ECHO "$lt_prog_compiler_pic" | $SED 
"$delay_single_quote_subst"`'
+lt_prog_compiler_wl='`$ECHO "$lt_prog_compiler_wl" | $SED 
"$delay_single_quote_subst"`'
+lt_prog_compiler_static='`$ECHO "$lt_prog_compiler_static" | $SED 
"$delay_single_quote_subst"`'
+lt_cv_prog_compiler_c_o='`$ECHO "$lt_cv_prog_compiler_c_o" | $SED 
"$delay_single_quote_subst"`'
+need_locks='`$ECHO "$need_locks" | $SED "$delay_single_quote_subst"`'
+MANIFEST_TOOL='`$ECHO "$MANIFEST_TOOL" | $SED "$delay_single_quote_subst"`'
+DSYMUTIL='`$ECHO "$DSYMUTIL" | $SED "$delay_single_quote_subst"`'
+NMEDIT='`$ECHO "$NMEDIT" | $SED "$delay_single_quote_subst"`'
+LIPO='`$ECHO "$LIPO" | $SED "$delay_single_quote_subst"`'
+OTOOL='`$ECHO "$OTOOL" | $SED "$delay_single_quote_subst"`'
+OTOOL64='`$ECHO "$OTOOL64" | $SED "$delay_single_quote_subst"`'
+libext='`$ECHO "$libext" | $SED "$delay_single_quote_subst"`'
+shrext_cmds='`$ECHO "$shrext_cmds" | $SED "$delay_single_quote_subst"`'
+extract_expsyms_cmds='`$ECHO "$extract_expsyms_cmds" | $SED 
"$delay_single_quote_subst"`'
+archive_cmds_need_lc='`$ECHO "$archive_cmds_need_lc" | $SED 
"$delay_single_quote_subst"`'
+enable_shared_with_static_runtimes='`$ECHO 
"$enable_shared_with_static_runtimes" | $SED "$delay_single_quote_subst"`'
+export_dynamic_flag_spec='`$ECHO "$export_dynamic_flag_spec" | $SED 
"$delay_single_quote_subst"`'
+whole_archive_flag_spec='`$ECHO "$whole_archive_flag_spec" | $SED 
"$delay_single_quote_subst"`'
+compiler_needs_object='`$ECHO "$compiler_needs_object" | $SED 
"$delay_single_quote_subst"`'
+old_archive_from_new_cmds='`$ECHO "$old_archive_from_new_cmds" | $SED 
"$delay_single_quote_subst"`'
+old_archive_from_expsyms_cmds='`$ECHO "$old_archive_from_expsyms_cmds" | $SED 
"$delay_single_quote_subst"`'
+archive_cmds='`$ECHO "$archive_cmds" | $SED "$delay_single_quote_subst"`'
+archive_expsym_cmds='`$ECHO "$archive_expsym_cmds" | $SED 
"$delay_single_quote_subst"`'
+module_cmds='`$ECHO "$module_cmds" | $SED "$delay_single_quote_subst"`'
+module_expsym_cmds='`$ECHO "$module_expsym_cmds" | $SED 
"$delay_single_quote_subst"`'
+with_gnu_ld='`$ECHO "$with_gnu_ld" | $SED "$delay_single_quote_subst"`'
+allow_undefined_flag='`$ECHO "$allow_undefined_flag" | $SED 
"$delay_single_quote_subst"`'
+no_undefined_flag='`$ECHO "$no_undefined_flag" | $SED 
"$delay_single_quote_subst"`'
+hardcode_libdir_flag_spec='`$ECHO "$hardcode_libdir_flag_spec" | $SED 
"$delay_single_quote_subst"`'
+hardcode_libdir_separator='`$ECHO "$hardcode_libdir_separator" | $SED 
"$delay_single_quote_subst"`'
+hardcode_direct='`$ECHO "$hardcode_direct" | $SED "$delay_single_quote_subst"`'
+hardcode_direct_absolute='`$ECHO "$hardcode_direct_absolute" | $SED 
"$delay_single_quote_subst"`'
+hardcode_minus_L='`$ECHO "$hardcode_minus_L" | $SED 
"$delay_single_quote_subst"`'
+hardcode_shlibpath_var='`$ECHO "$hardcode_shlibpath_var" | $SED 
"$delay_single_quote_subst"`'
+hardcode_automatic='`$ECHO "$hardcode_automatic" | $SED 
"$delay_single_quote_subst"`'
+inherit_rpath='`$ECHO "$inherit_rpath" | $SED "$delay_single_quote_subst"`'
+link_all_deplibs='`$ECHO "$link_all_deplibs" | $SED 
"$delay_single_quote_subst"`'
+always_export_symbols='`$ECHO "$always_export_symbols" | $SED 
"$delay_single_quote_subst"`'
+export_symbols_cmds='`$ECHO "$export_symbols_cmds" | $SED 
"$delay_single_quote_subst"`'
+exclude_expsyms='`$ECHO "$exclude_expsyms" | $SED "$delay_single_quote_subst"`'
+include_expsyms='`$ECHO "$include_expsyms" | $SED "$delay_single_quote_subst"`'
+prelink_cmds='`$ECHO "$prelink_cmds" | $SED "$delay_single_quote_subst"`'
+postlink_cmds='`$ECHO "$postlink_cmds" | $SED "$delay_single_quote_subst"`'
+file_list_spec='`$ECHO "$file_list_spec" | $SED "$delay_single_quote_subst"`'
+variables_saved_for_relink='`$ECHO "$variables_saved_for_relink" | $SED 
"$delay_single_quote_subst"`'
+need_lib_prefix='`$ECHO "$need_lib_prefix" | $SED "$delay_single_quote_subst"`'
+need_version='`$ECHO "$need_version" | $SED "$delay_single_quote_subst"`'
+version_type='`$ECHO "$version_type" | $SED "$delay_single_quote_subst"`'
+runpath_var='`$ECHO "$runpath_var" | $SED "$delay_single_quote_subst"`'
+shlibpath_var='`$ECHO "$shlibpath_var" | $SED "$delay_single_quote_subst"`'
+shlibpath_overrides_runpath='`$ECHO "$shlibpath_overrides_runpath" | $SED 
"$delay_single_quote_subst"`'
+libname_spec='`$ECHO "$libname_spec" | $SED "$delay_single_quote_subst"`'
+library_names_spec='`$ECHO "$library_names_spec" | $SED 
"$delay_single_quote_subst"`'
+soname_spec='`$ECHO "$soname_spec" | $SED "$delay_single_quote_subst"`'
+install_override_mode='`$ECHO "$install_override_mode" | $SED 
"$delay_single_quote_subst"`'
+postinstall_cmds='`$ECHO "$postinstall_cmds" | $SED 
"$delay_single_quote_subst"`'
+postuninstall_cmds='`$ECHO "$postuninstall_cmds" | $SED 
"$delay_single_quote_subst"`'
+finish_cmds='`$ECHO "$finish_cmds" | $SED "$delay_single_quote_subst"`'
+finish_eval='`$ECHO "$finish_eval" | $SED "$delay_single_quote_subst"`'
+hardcode_into_libs='`$ECHO "$hardcode_into_libs" | $SED 
"$delay_single_quote_subst"`'
+sys_lib_search_path_spec='`$ECHO "$sys_lib_search_path_spec" | $SED 
"$delay_single_quote_subst"`'
+sys_lib_dlsearch_path_spec='`$ECHO "$sys_lib_dlsearch_path_spec" | $SED 
"$delay_single_quote_subst"`'
+hardcode_action='`$ECHO "$hardcode_action" | $SED "$delay_single_quote_subst"`'
+enable_dlopen='`$ECHO "$enable_dlopen" | $SED "$delay_single_quote_subst"`'
+enable_dlopen_self='`$ECHO "$enable_dlopen_self" | $SED 
"$delay_single_quote_subst"`'
+enable_dlopen_self_static='`$ECHO "$enable_dlopen_self_static" | $SED 
"$delay_single_quote_subst"`'
+old_striplib='`$ECHO "$old_striplib" | $SED "$delay_single_quote_subst"`'
+striplib='`$ECHO "$striplib" | $SED "$delay_single_quote_subst"`'
+LD_RC='`$ECHO "$LD_RC" | $SED "$delay_single_quote_subst"`'
+reload_flag_RC='`$ECHO "$reload_flag_RC" | $SED "$delay_single_quote_subst"`'
+reload_cmds_RC='`$ECHO "$reload_cmds_RC" | $SED "$delay_single_quote_subst"`'
+old_archive_cmds_RC='`$ECHO "$old_archive_cmds_RC" | $SED 
"$delay_single_quote_subst"`'
+compiler_RC='`$ECHO "$compiler_RC" | $SED "$delay_single_quote_subst"`'
+GCC_RC='`$ECHO "$GCC_RC" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_no_builtin_flag_RC='`$ECHO 
"$lt_prog_compiler_no_builtin_flag_RC" | $SED "$delay_single_quote_subst"`'
+lt_prog_compiler_pic_RC='`$ECHO "$lt_prog_compiler_pic_RC" | $SED 
"$delay_single_quote_subst"`'
+lt_prog_compiler_wl_RC='`$ECHO "$lt_prog_compiler_wl_RC" | $SED 
"$delay_single_quote_subst"`'
+lt_prog_compiler_static_RC='`$ECHO "$lt_prog_compiler_static_RC" | $SED 
"$delay_single_quote_subst"`'
+lt_cv_prog_compiler_c_o_RC='`$ECHO "$lt_cv_prog_compiler_c_o_RC" | $SED 
"$delay_single_quote_subst"`'
+archive_cmds_need_lc_RC='`$ECHO "$archive_cmds_need_lc_RC" | $SED 
"$delay_single_quote_subst"`'
+enable_shared_with_static_runtimes_RC='`$ECHO 
"$enable_shared_with_static_runtimes_RC" | $SED "$delay_single_quote_subst"`'
+export_dynamic_flag_spec_RC='`$ECHO "$export_dynamic_flag_spec_RC" | $SED 
"$delay_single_quote_subst"`'
+whole_archive_flag_spec_RC='`$ECHO "$whole_archive_flag_spec_RC" | $SED 
"$delay_single_quote_subst"`'
+compiler_needs_object_RC='`$ECHO "$compiler_needs_object_RC" | $SED 
"$delay_single_quote_subst"`'
+old_archive_from_new_cmds_RC='`$ECHO "$old_archive_from_new_cmds_RC" | $SED 
"$delay_single_quote_subst"`'
+old_archive_from_expsyms_cmds_RC='`$ECHO "$old_archive_from_expsyms_cmds_RC" | 
$SED "$delay_single_quote_subst"`'
+archive_cmds_RC='`$ECHO "$archive_cmds_RC" | $SED "$delay_single_quote_subst"`'
+archive_expsym_cmds_RC='`$ECHO "$archive_expsym_cmds_RC" | $SED 
"$delay_single_quote_subst"`'
+module_cmds_RC='`$ECHO "$module_cmds_RC" | $SED "$delay_single_quote_subst"`'
+module_expsym_cmds_RC='`$ECHO "$module_expsym_cmds_RC" | $SED 
"$delay_single_quote_subst"`'
+with_gnu_ld_RC='`$ECHO "$with_gnu_ld_RC" | $SED "$delay_single_quote_subst"`'
+allow_undefined_flag_RC='`$ECHO "$allow_undefined_flag_RC" | $SED 
"$delay_single_quote_subst"`'
+no_undefined_flag_RC='`$ECHO "$no_undefined_flag_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_libdir_flag_spec_RC='`$ECHO "$hardcode_libdir_flag_spec_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_libdir_separator_RC='`$ECHO "$hardcode_libdir_separator_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_direct_RC='`$ECHO "$hardcode_direct_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_direct_absolute_RC='`$ECHO "$hardcode_direct_absolute_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_minus_L_RC='`$ECHO "$hardcode_minus_L_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_shlibpath_var_RC='`$ECHO "$hardcode_shlibpath_var_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_automatic_RC='`$ECHO "$hardcode_automatic_RC" | $SED 
"$delay_single_quote_subst"`'
+inherit_rpath_RC='`$ECHO "$inherit_rpath_RC" | $SED 
"$delay_single_quote_subst"`'
+link_all_deplibs_RC='`$ECHO "$link_all_deplibs_RC" | $SED 
"$delay_single_quote_subst"`'
+always_export_symbols_RC='`$ECHO "$always_export_symbols_RC" | $SED 
"$delay_single_quote_subst"`'
+export_symbols_cmds_RC='`$ECHO "$export_symbols_cmds_RC" | $SED 
"$delay_single_quote_subst"`'
+exclude_expsyms_RC='`$ECHO "$exclude_expsyms_RC" | $SED 
"$delay_single_quote_subst"`'
+include_expsyms_RC='`$ECHO "$include_expsyms_RC" | $SED 
"$delay_single_quote_subst"`'
+prelink_cmds_RC='`$ECHO "$prelink_cmds_RC" | $SED "$delay_single_quote_subst"`'
+postlink_cmds_RC='`$ECHO "$postlink_cmds_RC" | $SED 
"$delay_single_quote_subst"`'
+file_list_spec_RC='`$ECHO "$file_list_spec_RC" | $SED 
"$delay_single_quote_subst"`'
+hardcode_action_RC='`$ECHO "$hardcode_action_RC" | $SED 
"$delay_single_quote_subst"`'
+
+LTCC='$LTCC'
+LTCFLAGS='$LTCFLAGS'
+compiler='$compiler_DEFAULT'
+
+# A function that is used when there is no print builtin or printf.
+func_fallback_echo ()
+{
+  eval 'cat <<_LTECHO_EOF
+\$1
+_LTECHO_EOF'
+}
+
+# Quote evaled strings.
+for var in AS \
+DLLTOOL \
+OBJDUMP \
+SHELL \
+ECHO \
+PATH_SEPARATOR \
+SED \
+GREP \
+EGREP \
+FGREP \
+LD \
+NM \
+LN_S \
+lt_SP2NL \
+lt_NL2SP \
+reload_flag \
+deplibs_check_method \
+file_magic_cmd \
+file_magic_glob \
+want_nocaseglob \
+sharedlib_from_linklib_cmd \
+AR \
+AR_FLAGS \
+archiver_list_spec \
+STRIP \
+RANLIB \
+CC \
+CFLAGS \
+compiler \
+lt_cv_sys_global_symbol_pipe \
+lt_cv_sys_global_symbol_to_cdecl \
+lt_cv_sys_global_symbol_to_c_name_address \
+lt_cv_sys_global_symbol_to_c_name_address_lib_prefix \
+nm_file_list_spec \
+lt_prog_compiler_no_builtin_flag \
+lt_prog_compiler_pic \
+lt_prog_compiler_wl \
+lt_prog_compiler_static \
+lt_cv_prog_compiler_c_o \
+need_locks \
+MANIFEST_TOOL \
+DSYMUTIL \
+NMEDIT \
+LIPO \
+OTOOL \
+OTOOL64 \
+shrext_cmds \
+export_dynamic_flag_spec \
+whole_archive_flag_spec \
+compiler_needs_object \
+with_gnu_ld \
+allow_undefined_flag \
+no_undefined_flag \
+hardcode_libdir_flag_spec \
+hardcode_libdir_separator \
+exclude_expsyms \
+include_expsyms \
+file_list_spec \
+variables_saved_for_relink \
+libname_spec \
+library_names_spec \
+soname_spec \
+install_override_mode \
+finish_eval \
+old_striplib \
+striplib \
+LD_RC \
+reload_flag_RC \
+compiler_RC \
+lt_prog_compiler_no_builtin_flag_RC \
+lt_prog_compiler_pic_RC \
+lt_prog_compiler_wl_RC \
+lt_prog_compiler_static_RC \
+lt_cv_prog_compiler_c_o_RC \
+export_dynamic_flag_spec_RC \
+whole_archive_flag_spec_RC \
+compiler_needs_object_RC \
+with_gnu_ld_RC \
+allow_undefined_flag_RC \
+no_undefined_flag_RC \
+hardcode_libdir_flag_spec_RC \
+hardcode_libdir_separator_RC \
+exclude_expsyms_RC \
+include_expsyms_RC \
+file_list_spec_RC; do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[\\\\\\\`\\"\\\$]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED 
\\"\\\$sed_quote_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+# Double-quote double-evaled strings.
+for var in reload_cmds \
+old_postinstall_cmds \
+old_postuninstall_cmds \
+old_archive_cmds \
+extract_expsyms_cmds \
+old_archive_from_new_cmds \
+old_archive_from_expsyms_cmds \
+archive_cmds \
+archive_expsym_cmds \
+module_cmds \
+module_expsym_cmds \
+export_symbols_cmds \
+prelink_cmds \
+postlink_cmds \
+postinstall_cmds \
+postuninstall_cmds \
+finish_cmds \
+sys_lib_search_path_spec \
+sys_lib_dlsearch_path_spec \
+reload_cmds_RC \
+old_archive_cmds_RC \
+old_archive_from_new_cmds_RC \
+old_archive_from_expsyms_cmds_RC \
+archive_cmds_RC \
+archive_expsym_cmds_RC \
+module_cmds_RC \
+module_expsym_cmds_RC \
+export_symbols_cmds_RC \
+prelink_cmds_RC \
+postlink_cmds_RC; do
+    case \`eval \\\\\$ECHO \\\\""\\\\\$\$var"\\\\"\` in
+    *[\\\\\\\`\\"\\\$]*)
+      eval "lt_\$var=\\\\\\"\\\`\\\$ECHO \\"\\\$\$var\\" | \\\$SED -e 
\\"\\\$double_quote_subst\\" -e \\"\\\$sed_quote_subst\\" -e 
\\"\\\$delay_variable_subst\\"\\\`\\\\\\""
+      ;;
+    *)
+      eval "lt_\$var=\\\\\\"\\\$\$var\\\\\\""
+      ;;
+    esac
+done
+
+ac_aux_dir='$ac_aux_dir'
+xsi_shell='$xsi_shell'
+lt_shell_append='$lt_shell_append'
+
+# See if we are running on zsh, and set the options which allow our
+# commands through without removal of \ escapes INIT.
+if test -n "\${ZSH_VERSION+set}" ; then
+   setopt NO_GLOB_SUBST
+fi
+
+
+    PACKAGE='$PACKAGE'
+    VERSION='$VERSION'
+    TIMESTAMP='$TIMESTAMP'
+    RM='$RM'
+    ofile='$ofile'
+
+
+
+
+
+build_selection=$build_libtool_modification
+
+prefix=$prefix
+exec_prefix=$exec_prefix
+libdir=$libdir
+datadir=$datadir
+DATADIRNAME=$DATADIRNAME
+
+
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+
+# Handling of arguments.
+for ac_config_target in $ac_config_targets
+do
+  case $ac_config_target in
+    "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
+    "depfiles") CONFIG_COMMANDS="$CONFIG_COMMANDS depfiles" ;;
+    "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;;
+    "libtool-patch") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool-patch" ;;
+    ""$mpi_ln_list"") CONFIG_LINKS="$CONFIG_LINKS "$mpi_ln_list"" ;;
+    "gcrypt-conf") CONFIG_COMMANDS="$CONFIG_COMMANDS gcrypt-conf" ;;
+    "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+    "m4/Makefile") CONFIG_FILES="$CONFIG_FILES m4/Makefile" ;;
+    "compat/Makefile") CONFIG_FILES="$CONFIG_FILES compat/Makefile" ;;
+    "mpi/Makefile") CONFIG_FILES="$CONFIG_FILES mpi/Makefile" ;;
+    "cipher/Makefile") CONFIG_FILES="$CONFIG_FILES cipher/Makefile" ;;
+    "random/Makefile") CONFIG_FILES="$CONFIG_FILES random/Makefile" ;;
+    "doc/Makefile") CONFIG_FILES="$CONFIG_FILES doc/Makefile" ;;
+    "src/Makefile") CONFIG_FILES="$CONFIG_FILES src/Makefile" ;;
+    "src/gcrypt.h") CONFIG_FILES="$CONFIG_FILES src/gcrypt.h" ;;
+    "src/libgcrypt-config") CONFIG_FILES="$CONFIG_FILES src/libgcrypt-config" 
;;
+    "src/libgcrypt.pc") CONFIG_FILES="$CONFIG_FILES src/libgcrypt.pc" ;;
+    "src/versioninfo.rc") CONFIG_FILES="$CONFIG_FILES src/versioninfo.rc" ;;
+    "tests/Makefile") CONFIG_FILES="$CONFIG_FILES tests/Makefile" ;;
+    "tests/hashtest-256g") CONFIG_FILES="$CONFIG_FILES tests/hashtest-256g" ;;
+    "tests/basic-disable-all-hwf") CONFIG_FILES="$CONFIG_FILES 
tests/basic-disable-all-hwf" ;;
+
+  *) as_fn_error $? "invalid argument: \`$ac_config_target'" "$LINENO" 5;;
+  esac
+done
+
+
+# If the user did not use the arguments to specify the items to instantiate,
+# then the envvar interface is used.  Set only those that are not.
+# We use the long form for the default assignment because of an extremely
+# bizarre bug on SunOS 4.1.3.
+if $ac_need_defaults; then
+  test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files
+  test ${CONFIG_HEADERS+y} || CONFIG_HEADERS=$config_headers
+  test ${CONFIG_LINKS+y} || CONFIG_LINKS=$config_links
+  test ${CONFIG_COMMANDS+y} || CONFIG_COMMANDS=$config_commands
+fi
+
+# Have a temporary directory for convenience.  Make it in the build tree
+# simply because there is no reason against having it here, and in addition,
+# creating and moving files from /tmp can sometimes cause problems.
+# Hook for its removal unless debugging.
+# Note that there is a small window in which the directory will not be cleaned:
+# after its creation but before its name has been assigned to `$tmp'.
+$debug ||
+{
+  tmp= ac_tmp=
+  trap 'exit_status=$?
+  : "${ac_tmp:=$tmp}"
+  { test ! -d "$ac_tmp" || rm -fr "$ac_tmp"; } && exit $exit_status
+' 0
+  trap 'as_fn_exit 1' 1 2 13 15
+}
+# Create a (secure) tmp directory for tmp files.
+
+{
+  tmp=`(umask 077 && mktemp -d "./confXXXXXX") 2>/dev/null` &&
+  test -d "$tmp"
+}  ||
+{
+  tmp=./conf$$-$RANDOM
+  (umask 077 && mkdir "$tmp")
+} || as_fn_error $? "cannot create a temporary directory in ." "$LINENO" 5
+ac_tmp=$tmp
+
+# Set up the scripts for CONFIG_FILES section.
+# No need to generate them if there are no CONFIG_FILES.
+# This happens for instance with `./config.status config.h'.
+if test -n "$CONFIG_FILES"; then
+
+
+ac_cr=`echo X | tr X '\015'`
+# On cygwin, bash can eat \r inside `` if the user requested igncr.
+# But we know of no other shell where ac_cr would be empty at this
+# point, so we can use a bashism as a fallback.
+if test "x$ac_cr" = x; then
+  eval ac_cr=\$\'\\r\'
+fi
+ac_cs_awk_cr=`$AWK 'BEGIN { print "a\rb" }' </dev/null 2>/dev/null`
+if test "$ac_cs_awk_cr" = "a${ac_cr}b"; then
+  ac_cs_awk_cr='\\r'
+else
+  ac_cs_awk_cr=$ac_cr
+fi
+
+echo 'BEGIN {' >"$ac_tmp/subs1.awk" &&
+_ACEOF
+
+
+{
+  echo "cat >conf$$subs.awk <<_ACEOF" &&
+  echo "$ac_subst_vars" | sed 's/.*/&!$&$ac_delim/' &&
+  echo "_ACEOF"
+} >conf$$subs.sh ||
+  as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+ac_delim_num=`echo "$ac_subst_vars" | grep -c '^'`
+ac_delim='%!_!# '
+for ac_last_try in false false false false false :; do
+  . ./conf$$subs.sh ||
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+
+  ac_delim_n=`sed -n "s/.*$ac_delim\$/X/p" conf$$subs.awk | grep -c X`
+  if test $ac_delim_n = $ac_delim_num; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_STATUS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+rm -f conf$$subs.sh
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+cat >>"\$ac_tmp/subs1.awk" <<\\_ACAWK &&
+_ACEOF
+sed -n '
+h
+s/^/S["/; s/!.*/"]=/
+p
+g
+s/^[^!]*!//
+:repl
+t repl
+s/'"$ac_delim"'$//
+t delim
+:nl
+h
+s/\(.\{148\}\)..*/\1/
+t more1
+s/["\\]/\\&/g; s/^/"/; s/$/\\n"\\/
+p
+n
+b repl
+:more1
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t nl
+:delim
+h
+s/\(.\{148\}\)..*/\1/
+t more2
+s/["\\]/\\&/g; s/^/"/; s/$/"/
+p
+b
+:more2
+s/["\\]/\\&/g; s/^/"/; s/$/"\\/
+p
+g
+s/.\{148\}//
+t delim
+' <conf$$subs.awk | sed '
+/^[^""]/{
+  N
+  s/\n//
+}
+' >>$CONFIG_STATUS || ac_write_fail=1
+rm -f conf$$subs.awk
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+_ACAWK
+cat >>"\$ac_tmp/subs1.awk" <<_ACAWK &&
+  for (key in S) S_is_set[key] = 1
+  FS = ""
+
+}
+{
+  line = $ 0
+  nfields = split(line, field, "@")
+  substed = 0
+  len = length(field[1])
+  for (i = 2; i < nfields; i++) {
+    key = field[i]
+    keylen = length(key)
+    if (S_is_set[key]) {
+      value = S[key]
+      line = substr(line, 1, len) "" value "" substr(line, len + keylen + 3)
+      len += length(value) + length(field[++i])
+      substed = 1
+    } else
+      len += 1 + keylen
+  }
+
+  print line
+}
+
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+if sed "s/$ac_cr//" < /dev/null > /dev/null 2>&1; then
+  sed "s/$ac_cr\$//; s/$ac_cr/$ac_cs_awk_cr/g"
+else
+  cat
+fi < "$ac_tmp/subs1.awk" > "$ac_tmp/subs.awk" \
+  || as_fn_error $? "could not setup config files machinery" "$LINENO" 5
+_ACEOF
+
+# VPATH may cause trouble with some makes, so we remove sole $(srcdir),
+# ${srcdir} and @srcdir@ entries from VPATH if srcdir is ".", strip leading and
+# trailing colons and then remove the whole line if VPATH becomes empty
+# (actually we leave an empty line to preserve line numbers).
+if test "x$srcdir" = x.; then
+  ac_vpsub='/^[         ]*VPATH[        ]*=[    ]*/{
+h
+s///
+s/^/:/
+s/[     ]*$/:/
+s/:\$(srcdir):/:/g
+s/:\${srcdir}:/:/g
+s/:@srcdir@:/:/g
+s/^:*//
+s/:*$//
+x
+s/\(=[  ]*\).*/\1/
+G
+s/\n//
+s/^[^=]*=[      ]*$//
+}'
+fi
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+fi # test -n "$CONFIG_FILES"
+
+# Set up the scripts for CONFIG_HEADERS section.
+# No need to generate them if there are no CONFIG_HEADERS.
+# This happens for instance with `./config.status Makefile'.
+if test -n "$CONFIG_HEADERS"; then
+cat >"$ac_tmp/defines.awk" <<\_ACAWK ||
+BEGIN {
+_ACEOF
+
+# Transform confdefs.h into an awk script `defines.awk', embedded as
+# here-document in config.status, that substitutes the proper values into
+# config.h.in to produce config.h.
+
+# Create a delimiter string that does not exist in confdefs.h, to ease
+# handling of long lines.
+ac_delim='%!_!# '
+for ac_last_try in false false :; do
+  ac_tt=`sed -n "/$ac_delim/p" confdefs.h`
+  if test -z "$ac_tt"; then
+    break
+  elif $ac_last_try; then
+    as_fn_error $? "could not make $CONFIG_HEADERS" "$LINENO" 5
+  else
+    ac_delim="$ac_delim!$ac_delim _$ac_delim!! "
+  fi
+done
+
+# For the awk script, D is an array of macro values keyed by name,
+# likewise P contains macro parameters if any.  Preserve backslash
+# newline sequences.
+
+ac_word_re=[_$as_cr_Letters][_$as_cr_alnum]*
+sed -n '
+s/.\{148\}/&'"$ac_delim"'/g
+t rset
+:rset
+s/^[    ]*#[    ]*define[       ][      ]*/ /
+t def
+d
+:def
+s/\\$//
+t bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3"/p
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]=" \2"/p
+d
+:bsnl
+s/["\\]/\\&/g
+s/^ \('"$ac_word_re"'\)\(([^()]*)\)[    ]*\(.*\)/P["\1"]="\2"\
+D["\1"]=" \3\\\\\\n"\\/p
+t cont
+s/^ \('"$ac_word_re"'\)[        ]*\(.*\)/D["\1"]=" \2\\\\\\n"\\/p
+t cont
+d
+:cont
+n
+s/.\{148\}/&'"$ac_delim"'/g
+t clear
+:clear
+s/\\$//
+t bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/"/p
+d
+:bsnlc
+s/["\\]/\\&/g; s/^/"/; s/$/\\\\\\n"\\/p
+b cont
+' <confdefs.h | sed '
+s/'"$ac_delim"'/"\\\
+"/g' >>$CONFIG_STATUS || ac_write_fail=1
+
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  for (key in D) D_is_set[key] = 1
+  FS = ""
+}
+/^[\t ]*#[\t ]*(define|undef)[\t ]+$ac_word_re([\t (]|\$)/ {
+  line = \$ 0
+  split(line, arg, " ")
+  if (arg[1] == "#") {
+    defundef = arg[2]
+    mac1 = arg[3]
+  } else {
+    defundef = substr(arg[1], 2)
+    mac1 = arg[2]
+  }
+  split(mac1, mac2, "(") #)
+  macro = mac2[1]
+  prefix = substr(line, 1, index(line, defundef) - 1)
+  if (D_is_set[macro]) {
+    # Preserve the white space surrounding the "#".
+    print prefix "define", macro P[macro] D[macro]
+    next
+  } else {
+    # Replace #undef with comments.  This is necessary, for example,
+    # in the case of _POSIX_SOURCE, which is predefined and required
+    # on some systems where configure will not decide to define it.
+    if (defundef == "undef") {
+      print "/*", prefix defundef, macro, "*/"
+      next
+    }
+  }
+}
+{ print }
+_ACAWK
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+  as_fn_error $? "could not setup config headers machinery" "$LINENO" 5
+fi # test -n "$CONFIG_HEADERS"
+
+
+eval set X "  :F $CONFIG_FILES  :H $CONFIG_HEADERS  :L $CONFIG_LINKS  :C 
$CONFIG_COMMANDS"
+shift
+for ac_tag
+do
+  case $ac_tag in
+  :[FHLC]) ac_mode=$ac_tag; continue;;
+  esac
+  case $ac_mode$ac_tag in
+  :[FHL]*:*);;
+  :L* | :C*:*) as_fn_error $? "invalid tag \`$ac_tag'" "$LINENO" 5;;
+  :[FH]-) ac_tag=-:-;;
+  :[FH]*) ac_tag=$ac_tag:$ac_tag.in;;
+  esac
+  ac_save_IFS=$IFS
+  IFS=:
+  set x $ac_tag
+  IFS=$ac_save_IFS
+  shift
+  ac_file=$1
+  shift
+
+  case $ac_mode in
+  :L) ac_source=$1;;
+  :[FH])
+    ac_file_inputs=
+    for ac_f
+    do
+      case $ac_f in
+      -) ac_f="$ac_tmp/stdin";;
+      *) # Look for the file first in the build tree, then in the source tree
+        # (if the path is not absolute).  The absolute path cannot be 
DOS-style,
+        # because $ac_f cannot contain `:'.
+        test -f "$ac_f" ||
+          case $ac_f in
+          [\\/$]*) false;;
+          *) test -f "$srcdir/$ac_f" && ac_f="$srcdir/$ac_f";;
+          esac ||
+          as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;;
+      esac
+      case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed 
"s/'/'\\\\\\\\''/g"`;; esac
+      as_fn_append ac_file_inputs " '$ac_f'"
+    done
+
+    # Let's still pretend it is `configure' which instantiates (i.e., don't
+    # use $as_me), people would be surprised to read:
+    #    /* config.h.  Generated by config.status.  */
+    configure_input='Generated from '`
+         printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g'
+       `' by configure.'
+    if test x"$ac_file" != x-; then
+      configure_input="$ac_file.  $configure_input"
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5
+printf "%s\n" "$as_me: creating $ac_file" >&6;}
+    fi
+    # Neutralize special characters interpreted by sed in replacement strings.
+    case $configure_input in #(
+    *\&* | *\|* | *\\* )
+       ac_sed_conf_input=`printf "%s\n" "$configure_input" |
+       sed 's/[\\\\&|]/\\\\&/g'`;; #(
+    *) ac_sed_conf_input=$configure_input;;
+    esac
+
+    case $ac_tag in
+    *:-:* | *:-) cat >"$ac_tmp/stdin" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5 ;;
+    esac
+    ;;
+  esac
+
+  ac_dir=`$as_dirname -- "$ac_file" ||
+$as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$ac_file" : 'X\(//\)[^/]' \| \
+        X"$ac_file" : 'X\(//\)$' \| \
+        X"$ac_file" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$ac_file" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+  as_dir="$ac_dir"; as_fn_mkdir_p
+  ac_builddir=.
+
+case "$ac_dir" in
+.) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;;
+*)
+  ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'`
+  # A ".." for each directory in $ac_dir_suffix.
+  ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 
's|/[^\\/]*|/..|g;s|/||'`
+  case $ac_top_builddir_sub in
+  "") ac_top_builddir_sub=. ac_top_build_prefix= ;;
+  *)  ac_top_build_prefix=$ac_top_builddir_sub/ ;;
+  esac ;;
+esac
+ac_abs_top_builddir=$ac_pwd
+ac_abs_builddir=$ac_pwd$ac_dir_suffix
+# for backward compatibility:
+ac_top_builddir=$ac_top_build_prefix
+
+case $srcdir in
+  .)  # We are building in place.
+    ac_srcdir=.
+    ac_top_srcdir=$ac_top_builddir_sub
+    ac_abs_top_srcdir=$ac_pwd ;;
+  [\\/]* | ?:[\\/]* )  # Absolute name.
+    ac_srcdir=$srcdir$ac_dir_suffix;
+    ac_top_srcdir=$srcdir
+    ac_abs_top_srcdir=$srcdir ;;
+  *) # Relative name.
+    ac_srcdir=$ac_top_build_prefix$srcdir$ac_dir_suffix
+    ac_top_srcdir=$ac_top_build_prefix$srcdir
+    ac_abs_top_srcdir=$ac_pwd/$srcdir ;;
+esac
+ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix
+
+
+  case $ac_mode in
+  :F)
+  #
+  # CONFIG_FILE
+  #
+
+  case $INSTALL in
+  [\\/$]* | ?:[\\/]* ) ac_INSTALL=$INSTALL ;;
+  *) ac_INSTALL=$ac_top_build_prefix$INSTALL ;;
+  esac
+  ac_MKDIR_P=$MKDIR_P
+  case $MKDIR_P in
+  [\\/$]* | ?:[\\/]* ) ;;
+  */*) ac_MKDIR_P=$ac_top_build_prefix$MKDIR_P ;;
+  esac
+_ACEOF
+
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+# If the template does not know about datarootdir, expand it.
+# FIXME: This hack should be removed a few years after 2.60.
+ac_datarootdir_hack=; ac_datarootdir_seen=
+ac_sed_dataroot='
+/datarootdir/ {
+  p
+  q
+}
+/@datadir@/p
+/@docdir@/p
+/@infodir@/p
+/@localedir@/p
+/@mandir@/p'
+case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in
+*datarootdir*) ac_datarootdir_seen=yes;;
+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems 
to ignore the --datarootdir setting" >&5
+printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the 
--datarootdir setting" >&2;}
+_ACEOF
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+  ac_datarootdir_hack='
+  s&@datadir@&$datadir&g
+  s&@docdir@&$docdir&g
+  s&@infodir@&$infodir&g
+  s&@localedir@&$localedir&g
+  s&@mandir@&$mandir&g
+  s&\\\${datarootdir}&$datarootdir&g' ;;
+esac
+_ACEOF
+
+# Neutralize VPATH when `$srcdir' = `.'.
+# Shell code in configure.ac might set extrasub.
+# FIXME: do we really want to maintain this feature?
+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
+ac_sed_extra="$ac_vpsub
+$extrasub
+_ACEOF
+cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
+:t
+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b
+s|@configure_input@|$ac_sed_conf_input|;t t
+s&@top_builddir@&$ac_top_builddir_sub&;t t
+s&@top_build_prefix@&$ac_top_build_prefix&;t t
+s&@srcdir@&$ac_srcdir&;t t
+s&@abs_srcdir@&$ac_abs_srcdir&;t t
+s&@top_srcdir@&$ac_top_srcdir&;t t
+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t
+s&@builddir@&$ac_builddir&;t t
+s&@abs_builddir@&$ac_abs_builddir&;t t
+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t
+s&@INSTALL@&$ac_INSTALL&;t t
+s&@MKDIR_P@&$ac_MKDIR_P&;t t
+$ac_datarootdir_hack
+"
+eval sed \"\$ac_sed_extra\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \
+  >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+
+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&
+  { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } &&
+  { ac_out=`sed -n '/^[         ]*datarootdir[  ]*:*=/p' \
+      "$ac_tmp/out"`; test -z "$ac_out"; } &&
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a 
reference to the variable \`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&5
+printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable 
\`datarootdir'
+which seems to be undefined.  Please make sure it is defined" >&2;}
+
+  rm -f "$ac_tmp/stdin"
+  case $ac_file in
+  -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;
+  *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;
+  esac \
+  || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+ ;;
+  :H)
+  #
+  # CONFIG_HEADER
+  #
+  if test x"$ac_file" != x-; then
+    {
+      printf "%s\n" "/* $configure_input  */" >&1 \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs"
+    } >"$ac_tmp/config.h" \
+      || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    if diff "$ac_file" "$ac_tmp/config.h" >/dev/null 2>&1; then
+      { printf "%s\n" "$as_me:${as_lineno-$LINENO}: $ac_file is unchanged" >&5
+printf "%s\n" "$as_me: $ac_file is unchanged" >&6;}
+    else
+      rm -f "$ac_file"
+      mv "$ac_tmp/config.h" "$ac_file" \
+       || as_fn_error $? "could not create $ac_file" "$LINENO" 5
+    fi
+  else
+    printf "%s\n" "/* $configure_input  */" >&1 \
+      && eval '$AWK -f "$ac_tmp/defines.awk"' "$ac_file_inputs" \
+      || as_fn_error $? "could not create -" "$LINENO" 5
+  fi
+# Compute "$ac_file"'s index in $config_headers.
+_am_arg="$ac_file"
+_am_stamp_count=1
+for _am_header in $config_headers :; do
+  case $_am_header in
+    $_am_arg | $_am_arg:* )
+      break ;;
+    * )
+      _am_stamp_count=`expr $_am_stamp_count + 1` ;;
+  esac
+done
+echo "timestamp for $_am_arg" >`$as_dirname -- "$_am_arg" ||
+$as_expr X"$_am_arg" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$_am_arg" : 'X\(//\)[^/]' \| \
+        X"$_am_arg" : 'X\(//\)$' \| \
+        X"$_am_arg" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$_am_arg" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`/stamp-h$_am_stamp_count
+ ;;
+  :L)
+  #
+  # CONFIG_LINK
+  #
+
+  if test "$ac_source" = "$ac_file" && test "$srcdir" = '.'; then
+    :
+  else
+    # Prefer the file from the source tree if names are identical.
+    if test "$ac_source" = "$ac_file" || test ! -r "$ac_source"; then
+      ac_source=$srcdir/$ac_source
+    fi
+
+    { printf "%s\n" "$as_me:${as_lineno-$LINENO}: linking $ac_source to 
$ac_file" >&5
+printf "%s\n" "$as_me: linking $ac_source to $ac_file" >&6;}
+
+    if test ! -r "$ac_source"; then
+      as_fn_error $? "$ac_source: file not found" "$LINENO" 5
+    fi
+    rm -f "$ac_file"
+
+    # Try a relative symlink, then a hard link, then a copy.
+    case $ac_source in
+    [\\/$]* | ?:[\\/]* ) ac_rel_source=$ac_source ;;
+       *) ac_rel_source=$ac_top_build_prefix$ac_source ;;
+    esac
+    ln -s "$ac_rel_source" "$ac_file" 2>/dev/null ||
+      ln "$ac_source" "$ac_file" 2>/dev/null ||
+      cp -p "$ac_source" "$ac_file" ||
+      as_fn_error $? "cannot link or copy $ac_source to $ac_file" "$LINENO" 5
+  fi
+ ;;
+  :C)  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: executing $ac_file 
commands" >&5
+printf "%s\n" "$as_me: executing $ac_file commands" >&6;}
+ ;;
+  esac
+
+
+  case $ac_file$ac_mode in
+    "depfiles":C) test x"$AMDEP_TRUE" != x"" || {
+  # Older Autoconf quotes --file arguments for eval, but not when files
+  # are listed without --file.  Let's play safe and only enable the eval
+  # if we detect the quoting.
+  # TODO: see whether this extra hack can be removed once we start
+  # requiring Autoconf 2.70 or later.
+  case $CONFIG_FILES in #(
+  *\'*) :
+    eval set x "$CONFIG_FILES" ;; #(
+  *) :
+    set x $CONFIG_FILES ;; #(
+  *) :
+     ;;
+esac
+  shift
+  # Used to flag and report bootstrapping failures.
+  am_rc=0
+  for am_mf
+  do
+    # Strip MF so we end up with the name of the file.
+    am_mf=`printf "%s\n" "$am_mf" | sed -e 's/:.*$//'`
+    # Check whether this is an Automake generated Makefile which includes
+    # dependency-tracking related rules and includes.
+    # Grep'ing the whole file directly is not great: AIX grep has a line
+    # limit of 2048, but all sed's we know have understand at least 4000.
+    sed -n 's,^am--depfiles:.*,X,p' "$am_mf" | grep X >/dev/null 2>&1 \
+      || continue
+    am_dirpart=`$as_dirname -- "$am_mf" ||
+$as_expr X"$am_mf" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
+        X"$am_mf" : 'X\(//\)[^/]' \| \
+        X"$am_mf" : 'X\(//\)$' \| \
+        X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X"$am_mf" |
+    sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)[^/].*/{
+           s//\1/
+           q
+         }
+         /^X\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+    am_filepart=`$as_basename -- "$am_mf" ||
+$as_expr X/"$am_mf" : '.*/\([^/][^/]*\)/*$' \| \
+        X"$am_mf" : 'X\(//\)$' \| \
+        X"$am_mf" : 'X\(/\)' \| . 2>/dev/null ||
+printf "%s\n" X/"$am_mf" |
+    sed '/^.*\/\([^/][^/]*\)\/*$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\/\)$/{
+           s//\1/
+           q
+         }
+         /^X\/\(\/\).*/{
+           s//\1/
+           q
+         }
+         s/.*/./; q'`
+    { echo "$as_me:$LINENO: cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles" >&5
+   (cd "$am_dirpart" \
+      && sed -e '/# am--include-marker/d' "$am_filepart" \
+        | $MAKE -f - am--depfiles) >&5 2>&5
+   ac_status=$?
+   echo "$as_me:$LINENO: \$? = $ac_status" >&5
+   (exit $ac_status); } || am_rc=$?
+  done
+  if test $am_rc -ne 0; then
+    { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "Something went wrong bootstrapping makefile fragments
+    for automatic dependency tracking.  If GNU make was not used, consider
+    re-running the configure script with MAKE=\"gmake\" (or whatever is
+    necessary).  You can also try re-running configure with the
+    '--disable-dependency-tracking' option to at least be able to build
+    the package (albeit without support for automatic dependency tracking).
+See \`config.log' for more details" "$LINENO" 5; }
+  fi
+  { am_dirpart=; unset am_dirpart;}
+  { am_filepart=; unset am_filepart;}
+  { am_mf=; unset am_mf;}
+  { am_rc=; unset am_rc;}
+  rm -f conftest-deps.mk
+}
+ ;;
+    "libtool":C)
+
+    # See if we are running on zsh, and set the options which allow our
+    # commands through without removal of \ escapes.
+    if test -n "${ZSH_VERSION+set}" ; then
+      setopt NO_GLOB_SUBST
+    fi
+
+    cfgfile="${ofile}T"
+    trap "$RM \"$cfgfile\"; exit 1" 1 2 15
+    $RM "$cfgfile"
+
+    cat <<_LT_EOF >> "$cfgfile"
+#! $SHELL
+
+# `$ECHO "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building 
support services.
+# Generated automatically by $as_me ($PACKAGE$TIMESTAMP) $VERSION
+# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`:
+# NOTE: Changes made to this file will be lost: look at ltmain.sh.
+#
+#   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, 2005,
+#                 2006, 2007, 2008, 2009, 2010, 2011 Free Software
+#                 Foundation, Inc.
+#   Written by Gordon Matzigkeit, 1996
+#
+#   This file is part of GNU Libtool.
+#
+# GNU Libtool is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of
+# the License, or (at your option) any later version.
+#
+# As a special exception to the GNU General Public License,
+# if you distribute this file as part of a program or library that
+# is built using GNU Libtool, you may include this file under the
+# same distribution terms that you use for the rest of that program.
+#
+# GNU Libtool is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU Libtool; see the file COPYING.  If not, a copy
+# can be downloaded from https://www.gnu.org/licenses/gpl.html, or
+# obtained by writing to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+
+# The names of the tagged configurations supported by this script.
+available_tags="RC "
+
+# ### BEGIN LIBTOOL CONFIG
+
+# Which release of libtool.m4 was used?
+macro_version=$macro_version
+macro_revision=$macro_revision
+
+# Assembler program.
+AS=$lt_AS
+
+# DLL creation program.
+DLLTOOL=$lt_DLLTOOL
+
+# Object dumper program.
+OBJDUMP=$lt_OBJDUMP
+
+# Whether or not to build static libraries.
+build_old_libs=$enable_static
+
+# Whether or not to build shared libraries.
+build_libtool_libs=$enable_shared
+
+# What type of objects to build.
+pic_mode=$pic_mode
+
+# Whether or not to optimize for fast installation.
+fast_install=$enable_fast_install
+
+# Shell to use when invoking shell scripts.
+SHELL=$lt_SHELL
+
+# An echo program that protects backslashes.
+ECHO=$lt_ECHO
+
+# The PATH separator for the build system.
+PATH_SEPARATOR=$lt_PATH_SEPARATOR
+
+# The host system.
+host_alias=$host_alias
+host=$host
+host_os=$host_os
+
+# The build system.
+build_alias=$build_alias
+build=$build
+build_os=$build_os
+
+# A sed program that does not truncate output.
+SED=$lt_SED
+
+# Sed that helps us avoid accidentally triggering echo(1) options like -n.
+Xsed="\$SED -e 1s/^X//"
+
+# A grep program that handles long lines.
+GREP=$lt_GREP
+
+# An ERE matcher.
+EGREP=$lt_EGREP
+
+# A literal string matcher.
+FGREP=$lt_FGREP
+
+# A BSD- or MS-compatible name lister.
+NM=$lt_NM
+
+# Whether we need soft or hard links.
+LN_S=$lt_LN_S
+
+# What is the maximum length of a command?
+max_cmd_len=$max_cmd_len
+
+# Object file suffix (normally "o").
+objext=$ac_objext
+
+# Executable file suffix (normally "").
+exeext=$exeext
+
+# whether the shell understands "unset".
+lt_unset=$lt_unset
+
+# turn spaces into newlines.
+SP2NL=$lt_lt_SP2NL
+
+# turn newlines into spaces.
+NL2SP=$lt_lt_NL2SP
+
+# convert \$build file names to \$host format.
+to_host_file_cmd=$lt_cv_to_host_file_cmd
+
+# convert \$build files to toolchain format.
+to_tool_file_cmd=$lt_cv_to_tool_file_cmd
+
+# Method to check whether dependent libraries are shared objects.
+deplibs_check_method=$lt_deplibs_check_method
+
+# Command to use when deplibs_check_method = "file_magic".
+file_magic_cmd=$lt_file_magic_cmd
+
+# How to find potential files when deplibs_check_method = "file_magic".
+file_magic_glob=$lt_file_magic_glob
+
+# Find potential files using nocaseglob when deplibs_check_method = 
"file_magic".
+want_nocaseglob=$lt_want_nocaseglob
+
+# Command to associate shared and link libraries.
+sharedlib_from_linklib_cmd=$lt_sharedlib_from_linklib_cmd
+
+# The archiver.
+AR=$lt_AR
+
+# Flags to create an archive.
+AR_FLAGS=$lt_AR_FLAGS
+
+# How to feed a file listing to the archiver.
+archiver_list_spec=$lt_archiver_list_spec
+
+# A symbol stripping program.
+STRIP=$lt_STRIP
+
+# Commands used to install an old-style archive.
+RANLIB=$lt_RANLIB
+old_postinstall_cmds=$lt_old_postinstall_cmds
+old_postuninstall_cmds=$lt_old_postuninstall_cmds
+
+# Whether to use a lock for old archive extraction.
+lock_old_archive_extraction=$lock_old_archive_extraction
+
+# A C compiler.
+LTCC=$lt_CC
+
+# LTCC compiler flags.
+LTCFLAGS=$lt_CFLAGS
+
+# Take the output of nm and produce a listing of raw symbols and C names.
+global_symbol_pipe=$lt_lt_cv_sys_global_symbol_pipe
+
+# Transform the output of nm in a proper C declaration.
+global_symbol_to_cdecl=$lt_lt_cv_sys_global_symbol_to_cdecl
+
+# Transform the output of nm in a C name address pair.
+global_symbol_to_c_name_address=$lt_lt_cv_sys_global_symbol_to_c_name_address
+
+# Transform the output of nm in a C name address pair when lib prefix is 
needed.
+global_symbol_to_c_name_address_lib_prefix=$lt_lt_cv_sys_global_symbol_to_c_name_address_lib_prefix
+
+# Specify filename containing input files for \$NM.
+nm_file_list_spec=$lt_nm_file_list_spec
+
+# The root where to search for dependent libraries,and in which our libraries 
should be installed.
+lt_sysroot=$lt_sysroot
+
+# The name of the directory that contains temporary libtool files.
+objdir=$objdir
+
+# Used to examine libraries when file_magic_cmd begins with "file".
+MAGIC_CMD=$MAGIC_CMD
+
+# Must we lock files when doing compilation?
+need_locks=$lt_need_locks
+
+# Manifest tool.
+MANIFEST_TOOL=$lt_MANIFEST_TOOL
+
+# Tool to manipulate archived DWARF debug symbol files on Mac OS X.
+DSYMUTIL=$lt_DSYMUTIL
+
+# Tool to change global to local symbols on Mac OS X.
+NMEDIT=$lt_NMEDIT
+
+# Tool to manipulate fat objects and archives on Mac OS X.
+LIPO=$lt_LIPO
+
+# ldd/readelf like tool for Mach-O binaries on Mac OS X.
+OTOOL=$lt_OTOOL
+
+# ldd/readelf like tool for 64 bit Mach-O binaries on Mac OS X 10.4.
+OTOOL64=$lt_OTOOL64
+
+# Old archive suffix (normally "a").
+libext=$libext
+
+# Shared library suffix (normally ".so").
+shrext_cmds=$lt_shrext_cmds
+
+# The commands to extract the exported symbol list from a shared archive.
+extract_expsyms_cmds=$lt_extract_expsyms_cmds
+
+# Variables whose values should be saved in libtool wrapper scripts and
+# restored at link time.
+variables_saved_for_relink=$lt_variables_saved_for_relink
+
+# Do we need the "lib" prefix for modules?
+need_lib_prefix=$need_lib_prefix
+
+# Do we need a version for libraries?
+need_version=$need_version
+
+# Library versioning type.
+version_type=$version_type
+
+# Shared library runtime path variable.
+runpath_var=$runpath_var
+
+# Shared library path variable.
+shlibpath_var=$shlibpath_var
+
+# Is shlibpath searched before the hard-coded library search path?
+shlibpath_overrides_runpath=$shlibpath_overrides_runpath
+
+# Format of library name prefix.
+libname_spec=$lt_libname_spec
+
+# List of archive names.  First name is the real one, the rest are links.
+# The last name is the one that the linker finds with -lNAME
+library_names_spec=$lt_library_names_spec
+
+# The coded name of the library, if different from the real name.
+soname_spec=$lt_soname_spec
+
+# Permission mode override for installation of shared libraries.
+install_override_mode=$lt_install_override_mode
+
+# Command to use after installation of a shared archive.
+postinstall_cmds=$lt_postinstall_cmds
+
+# Command to use after uninstallation of a shared archive.
+postuninstall_cmds=$lt_postuninstall_cmds
+
+# Commands used to finish a libtool library installation in a directory.
+finish_cmds=$lt_finish_cmds
+
+# As "finish_cmds", except a single script fragment to be evaled but
+# not shown.
+finish_eval=$lt_finish_eval
+
+# Whether we should hardcode library paths into libraries.
+hardcode_into_libs=$hardcode_into_libs
+
+# Compile-time system search path for libraries.
+sys_lib_search_path_spec=$lt_sys_lib_search_path_spec
+
+# Run-time system search path for libraries.
+sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec
+
+# Whether dlopen is supported.
+dlopen_support=$enable_dlopen
+
+# Whether dlopen of programs is supported.
+dlopen_self=$enable_dlopen_self
+
+# Whether dlopen of statically linked programs is supported.
+dlopen_self_static=$enable_dlopen_self_static
+
+# Commands to strip libraries.
+old_striplib=$lt_old_striplib
+striplib=$lt_striplib
+
+
+# The linker used to build libraries.
+LD=$lt_LD
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag
+reload_cmds=$lt_reload_cmds
+
+# Commands used to build an old-style archive.
+old_archive_cmds=$lt_old_archive_cmds
+
+# A language specific compiler.
+CC=$lt_compiler
+
+# Is the compiler the GNU compiler?
+with_gcc=$GCC
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_lt_prog_compiler_pic
+
+# How to pass a linker flag through the compiler.
+wl=$lt_lt_prog_compiler_wl
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_lt_prog_compiler_static
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_lt_cv_prog_compiler_c_o
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$archive_cmds_need_lc
+
+# Whether or not to disallow shared libs when runtime libs are static.
+allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec
+
+# Whether the compiler copes with passing no objects directly.
+compiler_needs_object=$lt_compiler_needs_object
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds
+
+# Commands used to build a shared archive.
+archive_cmds=$lt_archive_cmds
+archive_expsym_cmds=$lt_archive_expsym_cmds
+
+# Commands used to build a loadable module if different from building
+# a shared archive.
+module_cmds=$lt_module_cmds
+module_expsym_cmds=$lt_module_expsym_cmds
+
+# Whether we are building with GNU ld or not.
+with_gnu_ld=$lt_with_gnu_ld
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag
+
+# Flag that enforces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec
+
+# Whether we need a single "-rpath" flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary.
+hardcode_direct=$hardcode_direct
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary and the resulting library dependency is
+# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
+# library is relocated.
+hardcode_direct_absolute=$hardcode_direct_absolute
+
+# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+# into the resulting binary.
+hardcode_minus_L=$hardcode_minus_L
+
+# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+# into the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var
+
+# Set to "yes" if building a shared library automatically hardcodes DIR
+# into the library and all subsequent libraries and executables linked
+# against it.
+hardcode_automatic=$hardcode_automatic
+
+# Set to yes if linker adds runtime paths of dependent libraries
+# to runtime path list.
+inherit_rpath=$inherit_rpath
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs
+
+# Set to "yes" if exported symbols are required.
+always_export_symbols=$always_export_symbols
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms
+
+# Commands necessary for linking programs (against libraries) with templates.
+prelink_cmds=$lt_prelink_cmds
+
+# Commands necessary for finishing linking programs.
+postlink_cmds=$lt_postlink_cmds
+
+# Specify filename containing input files.
+file_list_spec=$lt_file_list_spec
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action
+
+# ### END LIBTOOL CONFIG
+
+_LT_EOF
+
+  case $host_os in
+  aix3*)
+    cat <<\_LT_EOF >> "$cfgfile"
+# AIX sometimes has problems with the GCC collect2 program.  For some
+# reason, if we set the COLLECT_NAMES environment variable, the problems
+# vanish in a puff of smoke.
+if test "X${COLLECT_NAMES+set}" != Xset; then
+  COLLECT_NAMES=
+  export COLLECT_NAMES
+fi
+_LT_EOF
+    ;;
+  esac
+
+
+
+ltmain="$ac_aux_dir/ltmain.sh"
+
+
+  # We use sed instead of cat because bash on DJGPP gets confused if
+  # if finds mixed CR/LF and LF-only lines.  Since sed operates in
+  # text mode, it properly converts lines to CR/LF.  This bash problem
+  # is reportedly fixed, but why not run on old versions too?
+  sed '$q' "$ltmain" >> "$cfgfile" \
+     || (rm -f "$cfgfile"; exit 1)
+
+  if test x"$xsi_shell" = xyes; then
+  sed -e '/^func_dirname ()$/,/^} # func_dirname /c\
+func_dirname ()\
+{\
+\    case ${1} in\
+\      */*) func_dirname_result="${1%/*}${2}" ;;\
+\      *  ) func_dirname_result="${3}" ;;\
+\    esac\
+} # Extended-shell func_dirname implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_basename ()$/,/^} # func_basename /c\
+func_basename ()\
+{\
+\    func_basename_result="${1##*/}"\
+} # Extended-shell func_basename implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_dirname_and_basename ()$/,/^} # func_dirname_and_basename /c\
+func_dirname_and_basename ()\
+{\
+\    case ${1} in\
+\      */*) func_dirname_result="${1%/*}${2}" ;;\
+\      *  ) func_dirname_result="${3}" ;;\
+\    esac\
+\    func_basename_result="${1##*/}"\
+} # Extended-shell func_dirname_and_basename implementation' "$cfgfile" > 
$cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_stripname ()$/,/^} # func_stripname /c\
+func_stripname ()\
+{\
+\    # pdksh 5.2.14 does not do ${X%$Y} correctly if both X and Y are\
+\    # positional parameters, so assign one to ordinary parameter first.\
+\    func_stripname_result=${3}\
+\    func_stripname_result=${func_stripname_result#"${1}"}\
+\    func_stripname_result=${func_stripname_result%"${2}"}\
+} # Extended-shell func_stripname implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_split_long_opt ()$/,/^} # func_split_long_opt /c\
+func_split_long_opt ()\
+{\
+\    func_split_long_opt_name=${1%%=*}\
+\    func_split_long_opt_arg=${1#*=}\
+} # Extended-shell func_split_long_opt implementation' "$cfgfile" > 
$cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_split_short_opt ()$/,/^} # func_split_short_opt /c\
+func_split_short_opt ()\
+{\
+\    func_split_short_opt_arg=${1#??}\
+\    func_split_short_opt_name=${1%"$func_split_short_opt_arg"}\
+} # Extended-shell func_split_short_opt implementation' "$cfgfile" > 
$cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_lo2o ()$/,/^} # func_lo2o /c\
+func_lo2o ()\
+{\
+\    case ${1} in\
+\      *.lo) func_lo2o_result=${1%.lo}.${objext} ;;\
+\      *)    func_lo2o_result=${1} ;;\
+\    esac\
+} # Extended-shell func_lo2o implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_xform ()$/,/^} # func_xform /c\
+func_xform ()\
+{\
+    func_xform_result=${1%.*}.lo\
+} # Extended-shell func_xform implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_arith ()$/,/^} # func_arith /c\
+func_arith ()\
+{\
+    func_arith_result=$(( $* ))\
+} # Extended-shell func_arith implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_len ()$/,/^} # func_len /c\
+func_len ()\
+{\
+    func_len_result=${#1}\
+} # Extended-shell func_len implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+fi
+
+if test x"$lt_shell_append" = xyes; then
+  sed -e '/^func_append ()$/,/^} # func_append /c\
+func_append ()\
+{\
+    eval "${1}+=\\${2}"\
+} # Extended-shell func_append implementation' "$cfgfile" > $cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  sed -e '/^func_append_quoted ()$/,/^} # func_append_quoted /c\
+func_append_quoted ()\
+{\
+\    func_quote_for_eval "${2}"\
+\    eval "${1}+=\\\\ \\$func_quote_for_eval_result"\
+} # Extended-shell func_append_quoted implementation' "$cfgfile" > 
$cfgfile.tmp \
+  && mv -f "$cfgfile.tmp" "$cfgfile" \
+    || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+test 0 -eq $? || _lt_function_replace_fail=:
+
+
+  # Save a `func_append' function call where possible by direct use of '+='
+  sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1+="%g' $cfgfile > $cfgfile.tmp 
\
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+else
+  # Save a `func_append' function call even when '+=' is not available
+  sed -e 's%func_append \([a-zA-Z_]\{1,\}\) "%\1="$\1%g' $cfgfile > 
$cfgfile.tmp \
+    && mv -f "$cfgfile.tmp" "$cfgfile" \
+      || (rm -f "$cfgfile" && cp "$cfgfile.tmp" "$cfgfile" && rm -f 
"$cfgfile.tmp")
+  test 0 -eq $? || _lt_function_replace_fail=:
+fi
+
+if test x"$_lt_function_replace_fail" = x":"; then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: Unable to substitute 
extended shell functions in $ofile" >&5
+printf "%s\n" "$as_me: WARNING: Unable to substitute extended shell functions 
in $ofile" >&2;}
+fi
+
+
+   mv -f "$cfgfile" "$ofile" ||
+    (rm -f "$ofile" && cp "$cfgfile" "$ofile" && rm -f "$cfgfile")
+  chmod +x "$ofile"
+
+
+    cat <<_LT_EOF >> "$ofile"
+
+# ### BEGIN LIBTOOL TAG CONFIG: RC
+
+# The linker used to build libraries.
+LD=$lt_LD_RC
+
+# How to create reloadable object files.
+reload_flag=$lt_reload_flag_RC
+reload_cmds=$lt_reload_cmds_RC
+
+# Commands used to build an old-style archive.
+old_archive_cmds=$lt_old_archive_cmds_RC
+
+# A language specific compiler.
+CC=$lt_compiler_RC
+
+# Is the compiler the GNU compiler?
+with_gcc=$GCC_RC
+
+# Compiler flag to turn off builtin functions.
+no_builtin_flag=$lt_lt_prog_compiler_no_builtin_flag_RC
+
+# Additional compiler flags for building library objects.
+pic_flag=$lt_lt_prog_compiler_pic_RC
+
+# How to pass a linker flag through the compiler.
+wl=$lt_lt_prog_compiler_wl_RC
+
+# Compiler flag to prevent dynamic linking.
+link_static_flag=$lt_lt_prog_compiler_static_RC
+
+# Does compiler simultaneously support -c and -o options?
+compiler_c_o=$lt_lt_cv_prog_compiler_c_o_RC
+
+# Whether or not to add -lc for building shared libraries.
+build_libtool_need_lc=$archive_cmds_need_lc_RC
+
+# Whether or not to disallow shared libs when runtime libs are static.
+allow_libtool_libs_with_static_runtimes=$enable_shared_with_static_runtimes_RC
+
+# Compiler flag to allow reflexive dlopens.
+export_dynamic_flag_spec=$lt_export_dynamic_flag_spec_RC
+
+# Compiler flag to generate shared objects directly from archives.
+whole_archive_flag_spec=$lt_whole_archive_flag_spec_RC
+
+# Whether the compiler copes with passing no objects directly.
+compiler_needs_object=$lt_compiler_needs_object_RC
+
+# Create an old-style archive from a shared archive.
+old_archive_from_new_cmds=$lt_old_archive_from_new_cmds_RC
+
+# Create a temporary old-style archive to link instead of a shared archive.
+old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds_RC
+
+# Commands used to build a shared archive.
+archive_cmds=$lt_archive_cmds_RC
+archive_expsym_cmds=$lt_archive_expsym_cmds_RC
+
+# Commands used to build a loadable module if different from building
+# a shared archive.
+module_cmds=$lt_module_cmds_RC
+module_expsym_cmds=$lt_module_expsym_cmds_RC
+
+# Whether we are building with GNU ld or not.
+with_gnu_ld=$lt_with_gnu_ld_RC
+
+# Flag that allows shared libraries with undefined symbols to be built.
+allow_undefined_flag=$lt_allow_undefined_flag_RC
+
+# Flag that enforces no undefined symbols.
+no_undefined_flag=$lt_no_undefined_flag_RC
+
+# Flag to hardcode \$libdir into a binary during linking.
+# This must work even if \$libdir does not exist
+hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec_RC
+
+# Whether we need a single "-rpath" flag with a separated argument.
+hardcode_libdir_separator=$lt_hardcode_libdir_separator_RC
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary.
+hardcode_direct=$hardcode_direct_RC
+
+# Set to "yes" if using DIR/libNAME\${shared_ext} during linking hardcodes
+# DIR into the resulting binary and the resulting library dependency is
+# "absolute",i.e impossible to change by setting \${shlibpath_var} if the
+# library is relocated.
+hardcode_direct_absolute=$hardcode_direct_absolute_RC
+
+# Set to "yes" if using the -LDIR flag during linking hardcodes DIR
+# into the resulting binary.
+hardcode_minus_L=$hardcode_minus_L_RC
+
+# Set to "yes" if using SHLIBPATH_VAR=DIR during linking hardcodes DIR
+# into the resulting binary.
+hardcode_shlibpath_var=$hardcode_shlibpath_var_RC
+
+# Set to "yes" if building a shared library automatically hardcodes DIR
+# into the library and all subsequent libraries and executables linked
+# against it.
+hardcode_automatic=$hardcode_automatic_RC
+
+# Set to yes if linker adds runtime paths of dependent libraries
+# to runtime path list.
+inherit_rpath=$inherit_rpath_RC
+
+# Whether libtool must link a program against all its dependency libraries.
+link_all_deplibs=$link_all_deplibs_RC
+
+# Set to "yes" if exported symbols are required.
+always_export_symbols=$always_export_symbols_RC
+
+# The commands to list exported symbols.
+export_symbols_cmds=$lt_export_symbols_cmds_RC
+
+# Symbols that should not be listed in the preloaded symbols.
+exclude_expsyms=$lt_exclude_expsyms_RC
+
+# Symbols that must always be exported.
+include_expsyms=$lt_include_expsyms_RC
+
+# Commands necessary for linking programs (against libraries) with templates.
+prelink_cmds=$lt_prelink_cmds_RC
+
+# Commands necessary for finishing linking programs.
+postlink_cmds=$lt_postlink_cmds_RC
+
+# Specify filename containing input files.
+file_list_spec=$lt_file_list_spec_RC
+
+# How to hardcode a shared library path into an executable.
+hardcode_action=$hardcode_action_RC
+
+# ### END LIBTOOL TAG CONFIG: RC
+_LT_EOF
+
+ ;;
+    "libtool-patch":C)
+  if test "$build_selection" = never; then
+    echo "patch not applied"
+  elif (mv -f libtool libtool.orig; \
+        sed -f $srcdir/build-aux/libtool-patch.sed libtool.orig >libtool); then
+    echo "applied successfully"
+  elif test "$build_selection" = try; then
+    mv -f libtool.orig libtool
+    echo "patch failed, thus, using original"
+  else
+    echo "patch failed"
+    as_fn_exit 1
+  fi
+ ;;
+    "gcrypt-conf":C)
+chmod +x src/libgcrypt-config
+ ;;
+    "tests/hashtest-256g":F) chmod +x tests/hashtest-256g ;;
+    "tests/basic-disable-all-hwf":F) chmod +x tests/basic-disable-all-hwf ;;
+
+  esac
+done # for ac_tag
+
+
+as_fn_exit 0
+_ACEOF
+ac_clean_files=$ac_clean_files_save
+
+test $ac_write_fail = 0 ||
+  as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5
+
+
+# configure is writing to config.log, and then calls config.status.
+# config.status does its own redirection, appending to config.log.
+# Unfortunately, on DOS this fails, as config.log is still kept open
+# by configure, so config.status won't be able to write to it; its
+# output is simply discarded.  So we exec the FD to /dev/null,
+# effectively closing config.log, so it can be properly (re)opened and
+# appended to by config.status.  When coming back to configure, we
+# need to make the FD available again.
+if test "$no_create" != yes; then
+  ac_cs_success=:
+  ac_config_status_args=
+  test "$silent" = yes &&
+    ac_config_status_args="$ac_config_status_args --quiet"
+  exec 5>/dev/null
+  $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false
+  exec 5>>config.log
+  # Use ||, not &&, to avoid exiting from the if with $? = 1, which
+  # would make configure fail if this is the last instruction.
+  $ac_cs_success || as_fn_exit 1
+fi
+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; 
then
+  { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: 
$ac_unrecognized_opts" >&5
+printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" 
>&2;}
+fi
+
+
+
+detection_module="${GCRYPT_HWF_MODULES%.lo}"
+test -n "$detection_module" || detection_module="none"
+
+# Give some feedback
+
+     echo "         " 1>&6
+
+
+     echo "        Libgcrypt v${VERSION} has been configured as follows:" 1>&6
+
+
+     echo "         " 1>&6
+
+
+     echo "        Platform:                  $PRINTABLE_OS_NAME ($host)" 1>&6
+
+
+     echo "        Hardware detection module: $detection_module" 1>&6
+
+
+    tmp="        Enabled cipher algorithms:"
+    tmpi="abc"
+    if test "${#tmpi}" -ne 3 >/dev/null 2>&1 ; then
+            echo "$tmp $enabled_ciphers" 1>&6
+    else
+      tmpi=`echo "$tmp"| sed 's/./ /g'`
+      echo $enabled_ciphers EOF | tr ' ' '\n' | \
+        while read word; do
+          if test "${#tmp}" -gt 70 ; then
+            echo "$tmp" 1>&6
+            tmp="$tmpi"
+          fi
+          if test "$word" = "EOF" ; then
+            echo "$tmp" 1>&6
+          else
+            tmp="$tmp $word"
+          fi
+        done
+    fi
+
+
+    tmp="        Enabled digest algorithms:"
+    tmpi="abc"
+    if test "${#tmpi}" -ne 3 >/dev/null 2>&1 ; then
+            echo "$tmp $enabled_digests" 1>&6
+    else
+      tmpi=`echo "$tmp"| sed 's/./ /g'`
+      echo $enabled_digests EOF | tr ' ' '\n' | \
+        while read word; do
+          if test "${#tmp}" -gt 70 ; then
+            echo "$tmp" 1>&6
+            tmp="$tmpi"
+          fi
+          if test "$word" = "EOF" ; then
+            echo "$tmp" 1>&6
+          else
+            tmp="$tmp $word"
+          fi
+        done
+    fi
+
+
+    tmp="        Enabled kdf algorithms:   "
+    tmpi="abc"
+    if test "${#tmpi}" -ne 3 >/dev/null 2>&1 ; then
+            echo "$tmp $enabled_kdfs" 1>&6
+    else
+      tmpi=`echo "$tmp"| sed 's/./ /g'`
+      echo $enabled_kdfs EOF | tr ' ' '\n' | \
+        while read word; do
+          if test "${#tmp}" -gt 70 ; then
+            echo "$tmp" 1>&6
+            tmp="$tmpi"
+          fi
+          if test "$word" = "EOF" ; then
+            echo "$tmp" 1>&6
+          else
+            tmp="$tmp $word"
+          fi
+        done
+    fi
+
+
+    tmp="        Enabled pubkey algorithms:"
+    tmpi="abc"
+    if test "${#tmpi}" -ne 3 >/dev/null 2>&1 ; then
+            echo "$tmp $enabled_pubkey_ciphers" 1>&6
+    else
+      tmpi=`echo "$tmp"| sed 's/./ /g'`
+      echo $enabled_pubkey_ciphers EOF | tr ' ' '\n' | \
+        while read word; do
+          if test "${#tmp}" -gt 70 ; then
+            echo "$tmp" 1>&6
+            tmp="$tmpi"
+          fi
+          if test "$word" = "EOF" ; then
+            echo "$tmp" 1>&6
+          else
+            tmp="$tmp $word"
+          fi
+        done
+    fi
+
+
+     echo "        Random number generator:   $random" 1>&6
+
+
+     echo "        Try using jitter entropy:  $jentsupport" 1>&6
+
+
+     echo "        Using linux capabilities:  $use_capabilities" 1>&6
+
+
+     echo "        FIPS module version:       $fips_module_version" 1>&6
+
+
+     echo "        Try using Padlock crypto:  $padlocksupport" 1>&6
+
+
+     echo "        Try using AES-NI crypto:   $aesnisupport" 1>&6
+
+
+     echo "        Try using Intel SHAEXT:    $shaextsupport" 1>&6
+
+
+     echo "        Try using Intel PCLMUL:    $pclmulsupport" 1>&6
+
+
+     echo "        Try using Intel SSE4.1:    $sse41support" 1>&6
+
+
+     echo "        Try using DRNG (RDRAND):   $drngsupport" 1>&6
+
+
+     echo "        Try using Intel AVX:       $avxsupport" 1>&6
+
+
+     echo "        Try using Intel AVX2:      $avx2support" 1>&6
+
+
+     echo "        Try using ARM NEON:        $neonsupport" 1>&6
+
+
+     echo "        Try using ARMv8 crypto:    $armcryptosupport" 1>&6
+
+
+     echo "        Try using PPC crypto:      $ppccryptosupport" 1>&6
+
+
+     echo "         " 1>&6
+
+
+if test "x${gpg_config_script_warn}" != x; then
+cat <<G10EOF
+        Mismatches between the target platform and the to
+        be used libraries have been been detected for:
+         ${gpg_config_script_warn}
+        Please check above for warning messages.
+
+G10EOF
+fi
+
+if test "$gcry_cv_gcc_attribute_aligned" != "yes" ; then
+cat <<G10EOF
+   Please note that your compiler does not support the GCC style
+   aligned attribute. Using this software may evoke bus errors.
+
+G10EOF
+fi
+
+if test -n "$gpl"; then
+  echo "Please note that you are building a version of Libgcrypt with"
+  echo "  $gpl"
+  echo "included.  These parts are licensed under the GPL and thus the"
+  echo "use of this library has to comply with the conditions of the GPL."
+  echo ""
+fi
+
diff --git a/grub-core/lib/libgcrypt/configure.ac 
b/grub-core/lib/libgcrypt/configure.ac
new file mode 100644
index 000000000..6aef89781
--- /dev/null
+++ b/grub-core/lib/libgcrypt/configure.ac
@@ -0,0 +1,3394 @@
+# Configure.ac script for Libgcrypt
+# Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006,
+#               2007, 2008, 2009, 2011 Free Software Foundation, Inc.
+# Copyright (C) 2012-2021  g10 Code GmbH
+#
+# This file is part of Libgcrypt.
+#
+# Libgcrypt is free software; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as
+# published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# Libgcrypt is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, see <http://www.gnu.org/licenses/>.
+
+# (Process this file with autoconf to produce a configure script.)
+AC_REVISION($Revision$)
+AC_PREREQ([2.69])
+min_automake_version="1.14"
+
+# To build a release you need to create a tag with the version number
+# (git tag -s libgcrypt-n.m.k) and run "./autogen.sh --force".  Please
+# bump the version number immediately after the release and do another
+# commit and push so that the git magic is able to work.  See below
+# for the LT versions.
+m4_define([mym4_package],[libgcrypt])
+m4_define([mym4_major], [1])
+m4_define([mym4_minor], [10])
+m4_define([mym4_micro], [3])
+
+# Below is m4 magic to extract and compute the git revision number,
+# the decimalized short revision number, a beta version string and a
+# flag indicating a development version (mym4_isbeta).  Note that the
+# m4 processing is done by autoconf and not during the configure run.
+m4_define([mym4_verslist], m4_split(m4_esyscmd([./autogen.sh --find-version] \
+                           mym4_package mym4_major mym4_minor mym4_micro),[:]))
+m4_define([mym4_isbeta],       m4_argn(2, mym4_verslist))
+m4_define([mym4_version],      m4_argn(4, mym4_verslist))
+m4_define([mym4_revision],     m4_argn(7, mym4_verslist))
+m4_define([mym4_revision_dec], m4_argn(8, mym4_verslist))
+m4_esyscmd([echo ]mym4_version[>VERSION])
+AC_INIT([mym4_package],[mym4_version],[https://bugs.gnupg.org])
+
+# LT Version numbers, remember to change them just *before* a release.
+#   (Code changed:                     REVISION++)
+#   (Interfaces added/removed/changed: CURRENT++, REVISION=0)
+#   (Interfaces added:                 AGE++)
+#   (Interfaces removed:               AGE=0)
+#
+#   (Interfaces removed:    CURRENT++, AGE=0, REVISION=0)
+#   (Interfaces added:      CURRENT++, AGE++, REVISION=0)
+#   (No interfaces changed:                   REVISION++)
+LIBGCRYPT_LT_CURRENT=24
+LIBGCRYPT_LT_AGE=4
+LIBGCRYPT_LT_REVISION=3
+################################################
+
+AC_SUBST(LIBGCRYPT_LT_CURRENT)
+AC_SUBST(LIBGCRYPT_LT_AGE)
+AC_SUBST(LIBGCRYPT_LT_REVISION)
+
+# If the API is changed in an incompatible way: increment the next counter.
+#
+# 1.6: ABI and API change but the change is to most users irrelevant
+#      and thus the API version number has not been incremented.
+LIBGCRYPT_CONFIG_API_VERSION=1
+
+# If you change the required gpg-error version, please remove
+# unnecessary error code defines in src/gcrypt-int.h.
+NEED_GPG_ERROR_VERSION=1.27
+
+AC_CONFIG_AUX_DIR([build-aux])
+AC_CONFIG_SRCDIR([src/libgcrypt.vers])
+AM_INIT_AUTOMAKE([serial-tests dist-bzip2])
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_MACRO_DIR([m4])
+AC_CONFIG_LIBOBJ_DIR([compat])
+AC_CANONICAL_HOST
+AM_MAINTAINER_MODE
+AM_SILENT_RULES
+AC_USE_SYSTEM_EXTENSIONS
+
+AC_ARG_VAR(SYSROOT,[locate config scripts also below that directory])
+
+AH_TOP([
+#ifndef _GCRYPT_CONFIG_H_INCLUDED
+#define _GCRYPT_CONFIG_H_INCLUDED
+
+/* Enable gpg-error's strerror macro for W32CE.  */
+#define GPG_ERR_ENABLE_ERRNO_MACROS 1
+])
+
+AH_BOTTOM([
+#define _GCRYPT_IN_LIBGCRYPT 1
+
+/* Add .note.gnu.property section for Intel CET in assembler sources
+   when CET is enabled.  */
+#if defined(__ASSEMBLER__) && defined(__CET__)
+# include <cet.h>
+#endif
+
+/* If the configure check for endianness has been disabled, get it from
+   OS macros.  This is intended for making fat binary builds on OS X.  */
+#ifdef DISABLED_ENDIAN_CHECK
+# if defined(__BIG_ENDIAN__)
+#  define WORDS_BIGENDIAN 1
+# elif defined(__LITTLE_ENDIAN__)
+#  undef WORDS_BIGENDIAN
+# else
+#  error "No endianness found"
+# endif
+#endif /*DISABLED_ENDIAN_CHECK*/
+
+/* We basically use the original Camellia source.  Make sure the symbols
+   properly prefixed.  */
+#define CAMELLIA_EXT_SYM_PREFIX _gcry_
+
+#endif /*_GCRYPT_CONFIG_H_INCLUDED*/
+])
+
+AH_VERBATIM([_REENTRANT],
+[/* To allow the use of Libgcrypt in multithreaded programs we have to use
+    special features from the library. */
+#ifndef _REENTRANT
+# define _REENTRANT 1
+#endif
+])
+
+
+######################
+##  Basic checks.  ### (we need some results later on (e.g. $GCC)
+######################
+
+AC_PROG_MAKE_SET
+missing_dir=`cd $ac_aux_dir && pwd`
+AM_MISSING_PROG(ACLOCAL, aclocal, $missing_dir)
+AM_MISSING_PROG(AUTOCONF, autoconf, $missing_dir)
+AM_MISSING_PROG(AUTOMAKE, automake, $missing_dir)
+AM_MISSING_PROG(AUTOHEADER, autoheader, $missing_dir)
+# AM_MISSING_PROG(MAKEINFO, makeinfo, $missing_dir)
+AC_PROG_CC
+AC_PROG_CPP
+AM_PROG_CC_C_O
+AM_PROG_AS
+AC_SEARCH_LIBS([strerror],[cposix])
+AC_PROG_INSTALL
+AC_PROG_AWK
+
+# Taken from mpfr-4.0.1, then modified for LDADD_FOR_TESTS_KLUDGE
+dnl Under Linux, make sure that the old dtags are used if LD_LIBRARY_PATH
+dnl is defined. The issue is that with the new dtags, LD_LIBRARY_PATH has
+dnl the precedence over the run path, so that if a compatible MPFR library
+dnl is installed in some directory from $LD_LIBRARY_PATH, then the tested
+dnl MPFR library will be this library instead of the MPFR library from the
+dnl build tree. Other OS with the same issue might be added later.
+dnl
+dnl References:
+dnl   https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=859732
+dnl   http://lists.gnu.org/archive/html/libtool/2017-05/msg00000.html
+dnl
+dnl We need to check whether --disable-new-dtags is supported as alternate
+dnl linkers may be used (e.g., with tcc: CC=tcc LD=tcc).
+dnl
+case $host in
+  *-*-linux*)
+    if test -n "$LD_LIBRARY_PATH"; then
+      saved_LDFLAGS="$LDFLAGS"
+      LDADD_FOR_TESTS_KLUDGE="-Wl,--disable-new-dtags"
+      LDFLAGS="$LDFLAGS $LDADD_FOR_TESTS_KLUDGE"
+      AC_MSG_CHECKING(whether --disable-new-dtags is supported by the linker)
+      AC_LINK_IFELSE([AC_LANG_SOURCE([[
+int main (void) { return 0; }
+      ]])],
+      [AC_MSG_RESULT(yes (use it since LD_LIBRARY_PATH is set))],
+      [AC_MSG_RESULT(no)
+       LDADD_FOR_TESTS_KLUDGE=""
+      ])
+      LDFLAGS="$saved_LDFLAGS"
+    fi
+    ;;
+esac
+AC_SUBST([LDADD_FOR_TESTS_KLUDGE])
+
+VERSION_NUMBER=m4_esyscmd(printf "0x%02x%02x%02x" mym4_major \
+                          mym4_minor mym4_micro)
+AC_SUBST(VERSION_NUMBER)
+
+# We need to compile and run a program on the build machine.
+AX_CC_FOR_BUILD
+
+
+LT_PREREQ([2.2.6])
+LT_INIT([win32-dll disable-static])
+LT_LANG([Windows Resource])
+
+
+##########################
+## General definitions. ##
+##########################
+
+# Used by libgcrypt-config
+LIBGCRYPT_CONFIG_LIBS="-lgcrypt"
+LIBGCRYPT_CONFIG_CFLAGS=""
+LIBGCRYPT_CONFIG_HOST="$host"
+
+# Definitions for symmetric ciphers.
+available_ciphers="arcfour blowfish cast5 des aes twofish serpent rfc2268 seed"
+available_ciphers="$available_ciphers camellia idea salsa20 gost28147 chacha20"
+available_ciphers="$available_ciphers sm4"
+enabled_ciphers=""
+
+# Definitions for public-key ciphers.
+available_pubkey_ciphers="dsa elgamal rsa ecc"
+enabled_pubkey_ciphers=""
+
+# Definitions for message digests.
+available_digests="crc gostr3411-94 md2 md4 md5 rmd160 sha1 sha256 sha512"
+available_digests="$available_digests sha3 tiger whirlpool stribog blake2"
+available_digests="$available_digests sm3"
+enabled_digests=""
+
+# Definitions for kdfs (optional ones)
+available_kdfs="s2k pkdf2 scrypt"
+enabled_kdfs=""
+
+# Definitions for random modules.
+available_random_modules="getentropy linux egd unix"
+auto_random_modules="$available_random_modules"
+
+# Supported thread backends.
+LIBGCRYPT_THREAD_MODULES=""
+
+# Other definitions.
+have_w32_system=no
+have_w32ce_system=no
+have_pthread=no
+
+
+# Setup some stuff depending on host.
+case "${host}" in
+    *-*-mingw32*)
+      ac_cv_have_dev_random=no
+      have_w32_system=yes
+      case "${host}" in
+        *-mingw32ce*)
+            have_w32ce_system=yes
+            available_random_modules="w32ce"
+            ;;
+        *)
+            available_random_modules="w32"
+            ;;
+      esac
+      AC_DEFINE(USE_ONLY_8DOT3,1,
+                [set this to limit filenames to the 8.3 format])
+      AC_DEFINE(HAVE_DRIVE_LETTERS,1,
+                [defined if we must run on a stupid file system])
+      AC_DEFINE(HAVE_DOSISH_SYSTEM,1,
+                [defined if we run on some of the PCDOS like systems
+                 (DOS, Windoze. OS/2) with special properties like
+                  no file modes])
+      ;;
+
+    i?86-emx-os2 | i?86-*-os2*emx)
+        # OS/2 with the EMX environment
+        ac_cv_have_dev_random=no
+        AC_DEFINE(HAVE_DRIVE_LETTERS)
+        AC_DEFINE(HAVE_DOSISH_SYSTEM)
+        ;;
+
+    i?86-*-msdosdjgpp*)
+        # DOS with the DJGPP environment
+        ac_cv_have_dev_random=no
+        AC_DEFINE(HAVE_DRIVE_LETTERS)
+        AC_DEFINE(HAVE_DOSISH_SYSTEM)
+        ;;
+
+    *-*-hpux*)
+        if test -z "$GCC" ; then
+            CFLAGS="$CFLAGS -Ae -D_HPUX_SOURCE"
+        fi
+        ;;
+    *-dec-osf4*)
+        if test -z "$GCC" ; then
+            # Suppress all warnings
+            # to get rid of the unsigned/signed char mismatch warnings.
+            CFLAGS="$CFLAGS -w"
+        fi
+        ;;
+    m68k-atari-mint)
+        ;;
+    *-apple-darwin*)
+        AC_DEFINE(_DARWIN_C_SOURCE, 1,
+                  Expose all libc features (__DARWIN_C_FULL).)
+        AC_DEFINE(USE_POSIX_SPAWN_FOR_TESTS, 1,
+                  [defined if we use posix_spawn in test program])
+        AC_CHECK_HEADERS(spawn.h)
+        ;;
+    *)
+      ;;
+esac
+
+if test "$have_w32_system" = yes; then
+   AC_DEFINE(HAVE_W32_SYSTEM,1, [Defined if we run on a W32 API based system])
+   if test "$have_w32ce_system" = yes; then
+     AC_DEFINE(HAVE_W32CE_SYSTEM,1,[Defined if we run on WindowsCE])
+   fi
+fi
+AM_CONDITIONAL(HAVE_W32_SYSTEM, test "$have_w32_system" = yes)
+AM_CONDITIONAL(HAVE_W32CE_SYSTEM, test "$have_w32ce_system" = yes)
+
+
+
+# A printable OS Name is sometimes useful.
+case "${host}" in
+    *-*-mingw32ce*)
+        PRINTABLE_OS_NAME="W32CE"
+        ;;
+
+    *-*-mingw32*)
+        PRINTABLE_OS_NAME="W32"
+        ;;
+
+    i?86-emx-os2 | i?86-*-os2*emx )
+        PRINTABLE_OS_NAME="OS/2"
+        ;;
+
+    i?86-*-msdosdjgpp*)
+        PRINTABLE_OS_NAME="MSDOS/DJGPP"
+        ;;
+
+    *-linux*)
+        PRINTABLE_OS_NAME="GNU/Linux"
+        ;;
+
+    *)
+        PRINTABLE_OS_NAME=`uname -s || echo "Unknown"`
+        ;;
+esac
+
+NAME_OF_DEV_RANDOM="/dev/random"
+NAME_OF_DEV_URANDOM="/dev/urandom"
+
+AC_ARG_ENABLE(endian-check,
+              AS_HELP_STRING([--disable-endian-check],
+              [disable the endian check and trust the OS provided macros]),
+             endiancheck=$enableval,endiancheck=yes)
+if test x"$endiancheck" = xyes ; then
+  AC_C_BIGENDIAN
+else
+  AC_DEFINE(DISABLED_ENDIAN_CHECK,1,[configure did not test for endianness])
+fi
+
+AC_CHECK_SIZEOF(unsigned short, 2)
+AC_CHECK_SIZEOF(unsigned int, 4)
+AC_CHECK_SIZEOF(unsigned long, 4)
+AC_CHECK_SIZEOF(unsigned long long, 0)
+AC_CHECK_SIZEOF(void *, 0)
+
+AC_TYPE_UINTPTR_T
+
+if test "$ac_cv_sizeof_unsigned_short" = "0" \
+   || test "$ac_cv_sizeof_unsigned_int" = "0" \
+   || test "$ac_cv_sizeof_unsigned_long" = "0"; then
+    AC_MSG_WARN([Hmmm, something is wrong with the sizes - using defaults]);
+fi
+
+# Ensure that we have UINT64_C before we bother to check for uint64_t
+AC_CACHE_CHECK([for UINT64_C],[gnupg_cv_uint64_c_works],
+   AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[#include <inttypes.h>]],
+       [[uint64_t foo=UINT64_C(42);]])],
+     gnupg_cv_uint64_c_works=yes,gnupg_cv_uint64_c_works=no))
+if test "$gnupg_cv_uint64_c_works" = "yes" ; then
+   AC_CHECK_SIZEOF(uint64_t)
+fi
+
+# Do we have any 64-bit data types?
+if test "$ac_cv_sizeof_unsigned_int" != "8" \
+   && test "$ac_cv_sizeof_unsigned_long" != "8" \
+   && test "$ac_cv_sizeof_unsigned_long_long" != "8" \
+   && test "$ac_cv_sizeof_uint64_t" != "8"; then
+    AC_MSG_ERROR([[
+***
+*** No 64-bit integer type available.
+*** It is not possible to build Libgcrypt on this platform.
+***]])
+fi
+
+
+# If not specified otherwise, all available algorithms will be
+# included.
+default_ciphers="$available_ciphers"
+default_pubkey_ciphers="$available_pubkey_ciphers"
+default_digests="$available_digests"
+default_kdfs="$available_kdfs"
+# Blacklist MD2 by default
+default_digests=`echo $default_digests | sed -e 's/md2//g'`
+
+# Substitutions to set generated files in a Emacs buffer to read-only.
+AC_SUBST(emacs_local_vars_begin, ['Local Variables:'])
+AC_SUBST(emacs_local_vars_read_only, ['buffer-read-only: t'])
+AC_SUBST(emacs_local_vars_end, ['End:'])
+
+############################
+## Command line switches. ##
+############################
+
+# Implementation of the --enable-ciphers switch.
+AC_ARG_ENABLE(ciphers,
+             AS_HELP_STRING([--enable-ciphers=ciphers],
+                             [select the symmetric ciphers to include]),
+             [enabled_ciphers=`echo $enableval | tr ',:' '  ' | tr '[A-Z]' 
'[a-z]'`],
+             [enabled_ciphers=""])
+if test "x$enabled_ciphers" = "x" \
+   -o "$enabled_ciphers" = "yes"  \
+   -o "$enabled_ciphers" = "no"; then
+   enabled_ciphers=$default_ciphers
+fi
+AC_MSG_CHECKING([which symmetric ciphers to include])
+for cipher in $enabled_ciphers; do
+    LIST_MEMBER($cipher, $available_ciphers)
+    if test "$found" = "0"; then
+       AC_MSG_ERROR([unsupported cipher "$cipher" specified])
+    fi
+done
+AC_MSG_RESULT([$enabled_ciphers])
+
+# Implementation of the --enable-pubkey-ciphers switch.
+AC_ARG_ENABLE(pubkey-ciphers,
+             AS_HELP_STRING([--enable-pubkey-ciphers=ciphers],
+                             [select the public-key ciphers to include]),
+             [enabled_pubkey_ciphers=`echo $enableval | tr ',:' '  ' | tr 
'[A-Z]' '[a-z]'`],
+             [enabled_pubkey_ciphers=""])
+if test "x$enabled_pubkey_ciphers" = "x" \
+   -o "$enabled_pubkey_ciphers" = "yes"  \
+   -o "$enabled_pubkey_ciphers" = "no"; then
+   enabled_pubkey_ciphers=$default_pubkey_ciphers
+fi
+AC_MSG_CHECKING([which public-key ciphers to include])
+for cipher in $enabled_pubkey_ciphers; do
+    LIST_MEMBER($cipher, $available_pubkey_ciphers)
+    if test "$found" = "0"; then
+       AC_MSG_ERROR([unsupported public-key cipher specified])
+    fi
+done
+AC_MSG_RESULT([$enabled_pubkey_ciphers])
+
+# Implementation of the --enable-digests switch.
+AC_ARG_ENABLE(digests,
+             AS_HELP_STRING([--enable-digests=digests],
+                             [select the message digests to include]),
+             [enabled_digests=`echo $enableval | tr ',:' '  ' | tr '[A-Z]' 
'[a-z]'`],
+             [enabled_digests=""])
+if test "x$enabled_digests" = "x" \
+   -o "$enabled_digests" = "yes"  \
+   -o "$enabled_digests" = "no"; then
+   enabled_digests=$default_digests
+fi
+AC_MSG_CHECKING([which message digests to include])
+for digest in $enabled_digests; do
+    LIST_MEMBER($digest, $available_digests)
+    if test "$found" = "0"; then
+       AC_MSG_ERROR([unsupported message digest specified])
+    fi
+done
+AC_MSG_RESULT([$enabled_digests])
+
+# Implementation of the --enable-kdfs switch.
+AC_ARG_ENABLE(kdfs,
+      AS_HELP_STRING([--enable-kfds=kdfs],
+                     [select the KDFs to include]),
+      [enabled_kdfs=`echo $enableval | tr ',:' '  ' | tr '[A-Z]' '[a-z]'`],
+      [enabled_kdfs=""])
+if test "x$enabled_kdfs" = "x" \
+   -o "$enabled_kdfs" = "yes"  \
+   -o "$enabled_kdfs" = "no"; then
+   enabled_kdfs=$default_kdfs
+fi
+AC_MSG_CHECKING([which key derivation functions to include])
+for kdf in $enabled_kdfs; do
+    LIST_MEMBER($kdf, $available_kdfs)
+    if test "$found" = "0"; then
+       AC_MSG_ERROR([unsupported key derivation function specified])
+    fi
+done
+AC_MSG_RESULT([$enabled_kdfs])
+
+# Implementation of the --enable-random switch.
+AC_ARG_ENABLE(random,
+             AS_HELP_STRING([--enable-random=name],
+                             [select which random number generator to use]),
+             [random=`echo $enableval | tr '[A-Z]' '[a-z]'`],
+             [])
+if test "x$random" = "x" -o "$random" = "yes" -o "$random" = "no"; then
+    random=default
+fi
+AC_MSG_CHECKING([which random module to use])
+if test "$random" != "default" -a "$random" != "auto"; then
+    LIST_MEMBER($random, $available_random_modules)
+    if test "$found" = "0"; then
+       AC_MSG_ERROR([unsupported random module specified])
+    fi
+fi
+AC_MSG_RESULT($random)
+
+# Implementation of the --disable-dev-random switch.
+AC_MSG_CHECKING([whether use of /dev/random is requested])
+AC_ARG_ENABLE(dev-random,
+[  --disable-dev-random    disable the use of dev random],
+    try_dev_random=$enableval, try_dev_random=yes)
+AC_MSG_RESULT($try_dev_random)
+
+# Implementation of the --with-egd-socket switch.
+AC_ARG_WITH(egd-socket,
+    [  --with-egd-socket=NAME  Use NAME for the EGD socket)],
+            egd_socket_name="$withval", egd_socket_name="" )
+AC_DEFINE_UNQUOTED(EGD_SOCKET_NAME, "$egd_socket_name",
+                   [Define if you don't want the default EGD socket name.
+                    For details see cipher/rndegd.c])
+
+# Implementation of the --enable-random-daemon
+AC_MSG_CHECKING([whether the experimental random daemon is requested])
+AC_ARG_ENABLE([random-daemon],
+              AS_HELP_STRING([--enable-random-daemon],
+                             [Build the experimental gcryptrnd]),
+              [enable_random_daemon=$enableval],
+              [enable_random_daemon=no])
+AC_MSG_RESULT($enable_random_daemon)
+AM_CONDITIONAL(ENABLE_RANDOM_DAEMON, test x$enable_random_daemon = xyes)
+
+
+# Implementation of --disable-asm.
+AC_MSG_CHECKING([whether MPI and cipher assembler modules are requested])
+AC_ARG_ENABLE([asm],
+              AS_HELP_STRING([--disable-asm],
+                             [Disable MPI and cipher assembler modules]),
+              [try_asm_modules=$enableval],
+              [try_asm_modules=yes])
+AC_MSG_RESULT($try_asm_modules)
+if test "$try_asm_modules" != yes ; then
+    AC_DEFINE(ASM_DISABLED,1,[Defined if --disable-asm was used to configure])
+fi
+
+# Implementation of the --enable-m-guard switch.
+AC_MSG_CHECKING([whether memory guard is requested])
+AC_ARG_ENABLE(m-guard,
+              AS_HELP_STRING([--enable-m-guard],
+                             [Enable memory guard facility]),
+              [use_m_guard=$enableval], [use_m_guard=no])
+AC_MSG_RESULT($use_m_guard)
+if test "$use_m_guard" = yes ; then
+    AC_DEFINE(M_GUARD,1,[Define to use the (obsolete) malloc guarding feature])
+fi
+
+# Implementation of the --enable-large-data-tests switch.
+AC_MSG_CHECKING([whether to run large data tests])
+AC_ARG_ENABLE(large-data-tests,
+              AS_HELP_STRING([--enable-large-data-tests],
+                 [Enable the real long ruinning large data tests]),
+             large_data_tests=$enableval,large_data_tests=no)
+AC_MSG_RESULT($large_data_tests)
+AC_SUBST(RUN_LARGE_DATA_TESTS, $large_data_tests)
+
+# Implementation of --enable-force-soft-hwfeatures
+AC_MSG_CHECKING([whether 'soft' HW feature bits are forced on])
+AC_ARG_ENABLE([force-soft-hwfeatures],
+              AS_HELP_STRING([--enable-force-soft-hwfeatures],
+                             [Enable forcing 'soft' HW feature bits on]),
+              [force_soft_hwfeatures=$enableval],
+              [force_soft_hwfeatures=no])
+AC_MSG_RESULT($force_soft_hwfeatures)
+
+
+# Implementation of the --with-capabilities switch.
+# Check whether we want to use Linux capabilities
+AC_MSG_CHECKING([whether use of capabilities is requested])
+AC_ARG_WITH(capabilities,
+            AS_HELP_STRING([--with-capabilities],
+                           [Use linux capabilities [default=no]]),
+            [use_capabilities="$withval"],[use_capabilities=no])
+AC_MSG_RESULT($use_capabilities)
+
+# Implementation of the --enable-hmac-binary-check.
+AC_MSG_CHECKING([whether a HMAC binary check is requested])
+AC_ARG_ENABLE(hmac-binary-check,
+              AS_HELP_STRING([--enable-hmac-binary-check],
+                             [Enable library integrity check]),
+              [use_hmac_binary_check="$enableval"],
+              [use_hmac_binary_check=no])
+AC_MSG_RESULT($use_hmac_binary_check)
+if test "$use_hmac_binary_check" = no ; then
+    DEF_HMAC_BINARY_CHECK=''
+else
+    AC_DEFINE(ENABLE_HMAC_BINARY_CHECK,1,
+              [Define to support an HMAC based integrity check])
+    AC_CHECK_TOOL(OBJCOPY, [objcopy])
+    AC_CHECK_TOOL(READELF, [readelf])
+    if test "$use_hmac_binary_check" != yes ; then
+        
DEF_HMAC_BINARY_CHECK=-DKEY_FOR_BINARY_CHECK="'\"$use_hmac_binary_check\"'"
+    fi
+fi
+AM_CONDITIONAL(USE_HMAC_BINARY_CHECK, test "x$use_hmac_binary_check" != xno)
+AC_SUBST(DEF_HMAC_BINARY_CHECK)
+
+# Implementation of the --with-fips-module-version.
+AC_ARG_WITH(fips-module-version,
+            AS_HELP_STRING([--with-fips-module-version=VERSION],
+                           [Specify the FIPS module version for the build]),
+            fips_module_version="$withval", fips_module_version="" )
+AC_DEFINE_UNQUOTED(FIPS_MODULE_VERSION, "$fips_module_version",
+                   [Define FIPS module version for certification])
+
+# Implementation of the --disable-jent-support switch.
+AC_MSG_CHECKING([whether jitter entropy support is requested])
+AC_ARG_ENABLE(jent-support,
+              AS_HELP_STRING([--disable-jent-support],
+                        [Disable support for the Jitter entropy collector]),
+             jentsupport=$enableval,jentsupport=yes)
+AC_MSG_RESULT($jentsupport)
+
+# Implementation of the --disable-padlock-support switch.
+AC_MSG_CHECKING([whether padlock support is requested])
+AC_ARG_ENABLE(padlock-support,
+              AS_HELP_STRING([--disable-padlock-support],
+                        [Disable support for the PadLock Engine of VIA 
processors]),
+             padlocksupport=$enableval,padlocksupport=yes)
+AC_MSG_RESULT($padlocksupport)
+
+# Implementation of the --disable-aesni-support switch.
+AC_MSG_CHECKING([whether AESNI support is requested])
+AC_ARG_ENABLE(aesni-support,
+              AS_HELP_STRING([--disable-aesni-support],
+                 [Disable support for the Intel AES-NI instructions]),
+             aesnisupport=$enableval,aesnisupport=yes)
+AC_MSG_RESULT($aesnisupport)
+
+# Implementation of the --disable-shaext-support switch.
+AC_MSG_CHECKING([whether SHAEXT support is requested])
+AC_ARG_ENABLE(shaext-support,
+              AS_HELP_STRING([--disable-shaext-support],
+                 [Disable support for the Intel SHAEXT instructions]),
+              shaextsupport=$enableval,shaextsupport=yes)
+AC_MSG_RESULT($shaextsupport)
+
+# Implementation of the --disable-pclmul-support switch.
+AC_MSG_CHECKING([whether PCLMUL support is requested])
+AC_ARG_ENABLE(pclmul-support,
+              AS_HELP_STRING([--disable-pclmul-support],
+                 [Disable support for the Intel PCLMUL instructions]),
+             pclmulsupport=$enableval,pclmulsupport=yes)
+AC_MSG_RESULT($pclmulsupport)
+
+# Implementation of the --disable-sse41-support switch.
+AC_MSG_CHECKING([whether SSE4.1 support is requested])
+AC_ARG_ENABLE(sse41-support,
+              AS_HELP_STRING([--disable-sse41-support],
+                 [Disable support for the Intel SSE4.1 instructions]),
+             sse41support=$enableval,sse41support=yes)
+AC_MSG_RESULT($sse41support)
+
+# Implementation of the --disable-drng-support switch.
+AC_MSG_CHECKING([whether DRNG support is requested])
+AC_ARG_ENABLE(drng-support,
+              AS_HELP_STRING([--disable-drng-support],
+                 [Disable support for the Intel DRNG (RDRAND instruction)]),
+             drngsupport=$enableval,drngsupport=yes)
+AC_MSG_RESULT($drngsupport)
+
+# Implementation of the --disable-avx-support switch.
+AC_MSG_CHECKING([whether AVX support is requested])
+AC_ARG_ENABLE(avx-support,
+              AS_HELP_STRING([--disable-avx-support],
+                 [Disable support for the Intel AVX instructions]),
+             avxsupport=$enableval,avxsupport=yes)
+AC_MSG_RESULT($avxsupport)
+
+# Implementation of the --disable-avx2-support switch.
+AC_MSG_CHECKING([whether AVX2 support is requested])
+AC_ARG_ENABLE(avx2-support,
+              AS_HELP_STRING([--disable-avx2-support],
+                 [Disable support for the Intel AVX2 instructions]),
+             avx2support=$enableval,avx2support=yes)
+AC_MSG_RESULT($avx2support)
+
+# Implementation of the --disable-neon-support switch.
+AC_MSG_CHECKING([whether NEON support is requested])
+AC_ARG_ENABLE(neon-support,
+              AS_HELP_STRING([--disable-neon-support],
+                 [Disable support for the ARM NEON instructions]),
+             neonsupport=$enableval,neonsupport=yes)
+AC_MSG_RESULT($neonsupport)
+
+# Implementation of the --disable-arm-crypto-support switch.
+AC_MSG_CHECKING([whether ARMv8 Crypto Extension support is requested])
+AC_ARG_ENABLE(arm-crypto-support,
+              AS_HELP_STRING([--disable-arm-crypto-support],
+                 [Disable support for the ARMv8 Crypto Extension 
instructions]),
+             armcryptosupport=$enableval,armcryptosupport=yes)
+AC_MSG_RESULT($armcryptosupport)
+
+# Implementation of the --disable-ppc-crypto-support switch.
+AC_MSG_CHECKING([whether PPC crypto support is requested])
+AC_ARG_ENABLE(ppc-crypto-support,
+              AS_HELP_STRING([--disable-ppc-crypto-support],
+                 [Disable support for the PPC crypto instructions introduced 
in POWER 8 (PowerISA 2.07)]),
+              ppccryptosupport=$enableval,ppccryptosupport=yes)
+AC_MSG_RESULT($ppccryptosupport)
+
+# Implementation of the --disable-O-flag-munging switch.
+AC_MSG_CHECKING([whether a -O flag munging is requested])
+AC_ARG_ENABLE([O-flag-munging],
+              AS_HELP_STRING([--disable-O-flag-munging],
+                 [Disable modification of the cc -O flag]),
+              [enable_o_flag_munging=$enableval],
+              [enable_o_flag_munging=yes])
+AC_MSG_RESULT($enable_o_flag_munging)
+AM_CONDITIONAL(ENABLE_O_FLAG_MUNGING, test "$enable_o_flag_munging" = "yes")
+
+# Implementation of the --disable-instrumentation-munging switch.
+AC_MSG_CHECKING([whether a instrumentation (-fprofile, -fsanitize) munging is 
requested])
+AC_ARG_ENABLE([instrumentation-munging],
+              AS_HELP_STRING([--disable-instrumentation-munging],
+                 [Disable modification of the cc instrumentation options]),
+              [enable_instrumentation_munging=$enableval],
+              [enable_instrumentation_munging=yes])
+AC_MSG_RESULT($enable_instrumentation_munging)
+AM_CONDITIONAL(ENABLE_INSTRUMENTATION_MUNGING,
+              test "$enable_instrumentation_munging" = "yes")
+
+# Implementation of the --disable-amd64-as-feature-detection switch.
+AC_MSG_CHECKING([whether to enable AMD64 as(1) feature detection])
+AC_ARG_ENABLE(amd64-as-feature-detection,
+              AS_HELP_STRING([--disable-amd64-as-feature-detection],
+                 [Disable the auto-detection of AMD64 as(1) features]),
+             amd64_as_feature_detection=$enableval,
+              amd64_as_feature_detection=yes)
+AC_MSG_RESULT($amd64_as_feature_detection)
+
+
+AC_DEFINE_UNQUOTED(PRINTABLE_OS_NAME, "$PRINTABLE_OS_NAME",
+                   [A human readable text with the name of the OS])
+
+# For some systems we know that we have ld_version scripts.
+# Use it then as default.
+have_ld_version_script=no
+case "${host}" in
+    *-*-linux*)
+       have_ld_version_script=yes
+        ;;
+    *-*-gnu*)
+       have_ld_version_script=yes
+        ;;
+esac
+AC_ARG_ENABLE([ld-version-script],
+              AS_HELP_STRING([--enable-ld-version-script],
+                             [enable/disable use of linker version script.
+                              (default is system dependent)]),
+              [have_ld_version_script=$enableval],
+              [ : ] )
+AM_CONDITIONAL(HAVE_LD_VERSION_SCRIPT, test "$have_ld_version_script" = "yes")
+
+AC_DEFINE_UNQUOTED(NAME_OF_DEV_RANDOM, "$NAME_OF_DEV_RANDOM",
+                   [defined to the name of the strong random device])
+AC_DEFINE_UNQUOTED(NAME_OF_DEV_URANDOM, "$NAME_OF_DEV_URANDOM",
+                   [defined to the name of the weaker random device])
+
+#
+# Specify how we support our local modification of libtool for Windows
+# 64-bit.  Options are:
+#
+# (1) apply: when appying patch fails, it results failure of entire build
+# (2) never: never apply the patch (no try)
+# (3) try: use patched if it goes well, use original if fails
+#
+AC_ARG_WITH([libtool-modification],
+  AS_HELP_STRING([--with-libtool-modification=apply|never|try],
+                 [how to handle libtool modification (default=never)]),
+                 build_libtool_modification=$withval,
+                 build_libtool_modification=never)
+
+#
+# Apply a patch (locally maintained one of ours) to libtool
+#
+case $host in
+  x86_64-*mingw32*)
+AC_CONFIG_COMMANDS([libtool-patch],[[
+  if test "$build_selection" = never; then
+    echo "patch not applied"
+  elif (mv -f libtool libtool.orig; \
+        sed -f $srcdir/build-aux/libtool-patch.sed libtool.orig >libtool); then
+    echo "applied successfully"
+  elif test "$build_selection" = try; then
+    mv -f libtool.orig libtool
+    echo "patch failed, thus, using original"
+  else
+    echo "patch failed"
+    as_fn_exit 1
+  fi
+]],[build_selection=$build_libtool_modification])
+  ;;
+  *)
+  ;;
+esac
+
+###############################
+#### Checks for libraries. ####
+###############################
+
+#
+# gpg-error is required.
+#
+AM_PATH_GPG_ERROR("$NEED_GPG_ERROR_VERSION")
+if test "x$GPG_ERROR_LIBS" = "x"; then
+  AC_MSG_ERROR([libgpg-error is needed.
+                See ftp://ftp.gnupg.org/gcrypt/libgpg-error/ .])
+fi
+
+AC_DEFINE(GPG_ERR_SOURCE_DEFAULT, GPG_ERR_SOURCE_GCRYPT,
+          [The default error source for libgcrypt.])
+
+#
+# Check whether the GNU Pth library is available.  We require this
+# to build the optional gcryptrnd program.
+#
+AC_ARG_WITH(pth-prefix,
+            AS_HELP_STRING([--with-pth-prefix=PFX],
+                           [prefix where GNU Pth is installed (optional)]),
+     pth_config_prefix="$withval", pth_config_prefix="")
+if test x$pth_config_prefix != x ; then
+   PTH_CONFIG="$pth_config_prefix/bin/pth-config"
+fi
+if test "$enable_random_daemon" = "yes"; then
+  AC_PATH_PROG(PTH_CONFIG, pth-config, no)
+  if test "$PTH_CONFIG" = "no"; then
+    AC_MSG_WARN([[
+***
+*** To build the Libgcrypt's random number daemon
+*** we need the support of the GNU Portable Threads Library.
+*** Download it from ftp://ftp.gnu.org/gnu/pth/
+*** On a Debian GNU/Linux system you might want to try
+***   apt-get install libpth-dev
+***]])
+  else
+    GNUPG_PTH_VERSION_CHECK([1.3.7])
+    if test $have_pth = yes; then
+       PTH_CFLAGS=`$PTH_CONFIG --cflags`
+       PTH_LIBS=`$PTH_CONFIG --ldflags`
+       PTH_LIBS="$PTH_LIBS `$PTH_CONFIG --libs --all`"
+       AC_DEFINE(USE_GNU_PTH, 1,
+                [Defined if the GNU Portable Thread Library should be used])
+       AC_DEFINE(HAVE_PTH, 1,
+                [Defined if the GNU Pth is available])
+    fi
+  fi
+fi
+AC_SUBST(PTH_CFLAGS)
+AC_SUBST(PTH_LIBS)
+
+#
+# Check whether pthreads is available
+#
+if test "$have_w32_system" != yes; then
+  AC_CHECK_LIB(pthread,pthread_create,have_pthread=yes)
+  if test "$have_pthread" = yes; then
+    AC_DEFINE(HAVE_PTHREAD, 1 ,[Define if we have pthread.])
+  fi
+fi
+
+
+# Solaris needs -lsocket and -lnsl. Unisys system includes
+# gethostbyname in libsocket but needs libnsl for socket.
+AC_SEARCH_LIBS(setsockopt, [socket], ,
+       [AC_SEARCH_LIBS(setsockopt, [socket], , , [-lnsl])])
+AC_SEARCH_LIBS(setsockopt, [nsl])
+
+##################################
+#### Checks for header files. ####
+##################################
+
+AC_CHECK_HEADERS(unistd.h sys/auxv.h sys/random.h)
+
+
+##########################################
+#### Checks for typedefs, structures, ####
+####  and compiler characteristics.   ####
+##########################################
+
+AC_C_CONST
+AC_C_INLINE
+AC_TYPE_SIZE_T
+AC_TYPE_PID_T
+
+AC_CHECK_TYPES([byte, ushort, u16, u32, u64])
+
+gl_TYPE_SOCKLEN_T
+
+#
+# Check for __builtin_bswap32 intrinsic.
+#
+AC_CACHE_CHECK(for __builtin_bswap32,
+       [gcry_cv_have_builtin_bswap32],
+       [gcry_cv_have_builtin_bswap32=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [int x = 0; int y = __builtin_bswap32(x); return y;])],
+          [gcry_cv_have_builtin_bswap32=yes])])
+if test "$gcry_cv_have_builtin_bswap32" = "yes" ; then
+   AC_DEFINE(HAVE_BUILTIN_BSWAP32,1,
+             [Defined if compiler has '__builtin_bswap32' intrinsic])
+fi
+
+
+#
+# Check for __builtin_bswap64 intrinsic.
+#
+AC_CACHE_CHECK(for __builtin_bswap64,
+       [gcry_cv_have_builtin_bswap64],
+       [gcry_cv_have_builtin_bswap64=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [long long x = 0; long long y = __builtin_bswap64(x); return y;])],
+          [gcry_cv_have_builtin_bswap64=yes])])
+if test "$gcry_cv_have_builtin_bswap64" = "yes" ; then
+   AC_DEFINE(HAVE_BUILTIN_BSWAP64,1,
+             [Defined if compiler has '__builtin_bswap64' intrinsic])
+fi
+
+
+#
+# Check for __builtin_ctz intrinsic.
+#
+AC_CACHE_CHECK(for __builtin_ctz,
+       [gcry_cv_have_builtin_ctz],
+       [gcry_cv_have_builtin_ctz=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [unsigned int x = 0; int y = __builtin_ctz(x); return y;])],
+          [gcry_cv_have_builtin_ctz=yes])])
+if test "$gcry_cv_have_builtin_ctz" = "yes" ; then
+   AC_DEFINE(HAVE_BUILTIN_CTZ, 1,
+             [Defined if compiler has '__builtin_ctz' intrinsic])
+fi
+
+
+#
+# Check for __builtin_ctzl intrinsic.
+#
+AC_CACHE_CHECK(for __builtin_ctzl,
+       [gcry_cv_have_builtin_ctzl],
+       [gcry_cv_have_builtin_ctzl=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [unsigned long x = 0; long y = __builtin_ctzl(x); return y;])],
+          [gcry_cv_have_builtin_ctzl=yes])])
+if test "$gcry_cv_have_builtin_ctzl" = "yes" ; then
+   AC_DEFINE(HAVE_BUILTIN_CTZL, 1,
+             [Defined if compiler has '__builtin_ctzl' intrinsic])
+fi
+
+
+#
+# Check for __builtin_clz intrinsic.
+#
+AC_CACHE_CHECK(for __builtin_clz,
+       [gcry_cv_have_builtin_clz],
+       [gcry_cv_have_builtin_clz=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [unsigned int x = 0; int y = __builtin_clz(x); return y;])],
+          [gcry_cv_have_builtin_clz=yes])])
+if test "$gcry_cv_have_builtin_clz" = "yes" ; then
+   AC_DEFINE(HAVE_BUILTIN_CLZ, 1,
+             [Defined if compiler has '__builtin_clz' intrinsic])
+fi
+
+
+#
+# Check for __builtin_clzl intrinsic.
+#
+AC_CACHE_CHECK(for __builtin_clzl,
+       [gcry_cv_have_builtin_clzl],
+       [gcry_cv_have_builtin_clzl=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [unsigned long x = 0; long y = __builtin_clzl(x); return y;])],
+          [gcry_cv_have_builtin_clzl=yes])])
+if test "$gcry_cv_have_builtin_clzl" = "yes" ; then
+   AC_DEFINE(HAVE_BUILTIN_CLZL, 1,
+             [Defined if compiler has '__builtin_clzl' intrinsic])
+fi
+
+
+#
+# Check for __sync_synchronize intrinsic.
+#
+AC_CACHE_CHECK(for __sync_synchronize,
+       [gcry_cv_have_sync_synchronize],
+       [gcry_cv_have_sync_synchronize=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([],
+          [__sync_synchronize(); return 0;])],
+          [gcry_cv_have_sync_synchronize=yes])])
+if test "$gcry_cv_have_sync_synchronize" = "yes" ; then
+   AC_DEFINE(HAVE_SYNC_SYNCHRONIZE, 1,
+             [Defined if compiler has '__sync_synchronize' intrinsic])
+fi
+
+
+#
+# Check for VLA support (variable length arrays).
+#
+AC_CACHE_CHECK(whether the variable length arrays are supported,
+       [gcry_cv_have_vla],
+       [gcry_cv_have_vla=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[void f1(char *, int);
+            char foo(int i) {
+              char b[(i < 0 ? 0 : i) + 1];
+              f1(b, sizeof b); return b[0];}]])],
+          [gcry_cv_have_vla=yes])])
+if test "$gcry_cv_have_vla" = "yes" ; then
+   AC_DEFINE(HAVE_VLA,1, [Defined if variable length arrays are supported])
+fi
+
+
+#
+# Check for ELF visibility support.
+#
+AC_CACHE_CHECK(whether the visibility attribute is supported,
+       gcry_cv_visibility_attribute,
+       [gcry_cv_visibility_attribute=no
+        AC_LANG_CONFTEST([AC_LANG_SOURCE(
+          [[int foo __attribute__ ((visibility ("hidden"))) = 1;
+            int bar __attribute__ ((visibility ("protected"))) = 1;
+          ]])])
+
+        if ${CC-cc} -Werror -S conftest.c -o conftest.s \
+                  1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then
+            if grep '\.hidden.*foo' conftest.s >/dev/null 2>&1 ; then
+                if grep '\.protected.*bar' conftest.s >/dev/null 2>&1; then
+                    gcry_cv_visibility_attribute=yes
+                fi
+            fi
+        fi
+       ])
+if test "$gcry_cv_visibility_attribute" = "yes"; then
+    AC_CACHE_CHECK(for broken visibility attribute,
+       gcry_cv_broken_visibility_attribute,
+       [gcry_cv_broken_visibility_attribute=yes
+        AC_LANG_CONFTEST([AC_LANG_SOURCE(
+          [[int foo (int x);
+            int bar (int x) __asm__ ("foo")
+                            __attribute__ ((visibility ("hidden")));
+            int bar (int x) { return x; }
+          ]])])
+
+        if ${CC-cc} -Werror -S conftest.c -o conftest.s \
+                  1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then
+           if grep '\.hidden@<:@       _@:>@foo' conftest.s >/dev/null 2>&1;
+            then
+               gcry_cv_broken_visibility_attribute=no
+           fi
+        fi
+       ])
+fi
+if test "$gcry_cv_visibility_attribute" = "yes"; then
+    AC_CACHE_CHECK(for broken alias attribute,
+       gcry_cv_broken_alias_attribute,
+       [gcry_cv_broken_alias_attribute=yes
+        AC_LANG_CONFTEST([AC_LANG_SOURCE(
+          [[extern int foo (int x) __asm ("xyzzy");
+            int bar (int x) { return x; }
+            extern __typeof (bar) foo __attribute ((weak, alias ("bar")));
+            extern int dfoo;
+            extern __typeof (dfoo) dfoo __asm ("abccb");
+            int dfoo = 1;
+          ]])])
+
+        if ${CC-cc} -Werror -S conftest.c -o conftest.s \
+                  1>&AS_MESSAGE_LOG_FD 2>&AS_MESSAGE_LOG_FD ; then
+           if grep 'xyzzy' conftest.s >/dev/null 2>&1 && \
+              grep 'abccb' conftest.s >/dev/null 2>&1; then
+              gcry_cv_broken_alias_attribute=no
+           fi
+        fi
+        ])
+fi
+if test "$gcry_cv_visibility_attribute" = "yes"; then
+    AC_CACHE_CHECK(if gcc supports -fvisibility=hidden,
+       gcry_cv_gcc_has_f_visibility,
+       [gcry_cv_gcc_has_f_visibility=no
+        _gcc_cflags_save=$CFLAGS
+        CFLAGS="-fvisibility=hidden"
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],
+                          gcry_cv_gcc_has_f_visibility=yes)
+        CFLAGS=$_gcc_cflags_save;
+       ])
+fi
+if test "$gcry_cv_visibility_attribute" = "yes" \
+   && test "$gcry_cv_broken_visibility_attribute" != "yes" \
+   && test "$gcry_cv_broken_alias_attribute" != "yes" \
+   && test "$gcry_cv_gcc_has_f_visibility" = "yes"
+ then
+   AC_DEFINE(GCRY_USE_VISIBILITY, 1,
+               [Define to use the GNU C visibility attribute.])
+   CFLAGS="$CFLAGS -fvisibility=hidden"
+fi
+
+
+# Following attribute tests depend on warnings to cause compile to fail,
+# so set -Werror temporarily.
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+
+
+#
+# Check whether the compiler supports the GCC style aligned attribute
+#
+AC_CACHE_CHECK([whether the GCC style aligned attribute is supported],
+       [gcry_cv_gcc_attribute_aligned],
+       [gcry_cv_gcc_attribute_aligned=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[struct { int a; } foo __attribute__ ((aligned (16)));]])],
+          [gcry_cv_gcc_attribute_aligned=yes])])
+if test "$gcry_cv_gcc_attribute_aligned" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_ALIGNED,1,
+     [Defined if a GCC style "__attribute__ ((aligned (n))" is supported])
+fi
+
+
+#
+# Check whether the compiler supports the GCC style packed attribute
+#
+AC_CACHE_CHECK([whether the GCC style packed attribute is supported],
+       [gcry_cv_gcc_attribute_packed],
+       [gcry_cv_gcc_attribute_packed=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[struct foolong_s { long b; } __attribute__ ((packed));
+            struct foo_s { char a; struct foolong_s b; }
+              __attribute__ ((packed));
+            enum bar {
+              FOO = 1 / (sizeof(struct foo_s) == (sizeof(char) + 
sizeof(long))),
+            };]])],
+          [gcry_cv_gcc_attribute_packed=yes])])
+if test "$gcry_cv_gcc_attribute_packed" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_PACKED,1,
+     [Defined if a GCC style "__attribute__ ((packed))" is supported])
+fi
+
+
+#
+# Check whether the compiler supports the GCC style may_alias attribute
+#
+AC_CACHE_CHECK([whether the GCC style may_alias attribute is supported],
+       [gcry_cv_gcc_attribute_may_alias],
+       [gcry_cv_gcc_attribute_may_alias=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[typedef struct foo_s { int a; }
+            __attribute__ ((may_alias)) foo_t;]])],
+          [gcry_cv_gcc_attribute_may_alias=yes])])
+if test "$gcry_cv_gcc_attribute_may_alias" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_MAY_ALIAS,1,
+     [Defined if a GCC style "__attribute__ ((may_alias))" is supported])
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether the compiler supports 'asm' or '__asm__' keyword for
+# assembler blocks.
+#
+AC_CACHE_CHECK([whether 'asm' assembler keyword is supported],
+       [gcry_cv_have_asm],
+       [gcry_cv_have_asm=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[void a(void) { asm("":::"memory"); }]])],
+          [gcry_cv_have_asm=yes])])
+AC_CACHE_CHECK([whether '__asm__' assembler keyword is supported],
+       [gcry_cv_have___asm__],
+       [gcry_cv_have___asm__=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[void a(void) { __asm__("":::"memory"); }]])],
+          [gcry_cv_have___asm__=yes])])
+if test "$gcry_cv_have_asm" = "no" ; then
+   if test "$gcry_cv_have___asm__" = "yes" ; then
+      AC_DEFINE(asm,__asm__,
+        [Define to supported assembler block keyword, if plain 'asm' was not
+         supported])
+   fi
+fi
+
+
+#
+# Check whether the compiler supports inline assembly memory barrier.
+#
+if test "$gcry_cv_have_asm" = "no" ; then
+   if test "$gcry_cv_have___asm__" = "yes" ; then
+      AC_CACHE_CHECK([whether inline assembly memory barrier is supported],
+          [gcry_cv_have_asm_volatile_memory],
+          [gcry_cv_have_asm_volatile_memory=no
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+             [[void a(int x)
+               {
+                 __asm__ volatile("":::"memory");
+                 __asm__ volatile("":"+r"(x)::"memory");
+               }]])],
+             [gcry_cv_have_asm_volatile_memory=yes])])
+   fi
+else
+   AC_CACHE_CHECK([whether inline assembly memory barrier is supported],
+       [gcry_cv_have_asm_volatile_memory],
+       [gcry_cv_have_asm_volatile_memory=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[void a(int x)
+            {
+              asm volatile("":::"memory");
+              asm volatile("":"+r"(x)::"memory"); }]])],
+          [gcry_cv_have_asm_volatile_memory=yes])])
+fi
+if test "$gcry_cv_have_asm_volatile_memory" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ASM_VOLATILE_MEMORY,1,
+     [Define if inline asm memory barrier is supported])
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for our ARM
+# implementations.  This needs to be done before setting up the
+# assembler stuff.
+#
+AC_CACHE_CHECK([whether GCC assembler is compatible for ARM assembly 
implementations],
+       [gcry_cv_gcc_arm_platform_as_ok],
+       [if test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_arm_platform_as_ok="n/a"
+        else
+          gcry_cv_gcc_arm_platform_as_ok=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+            [[__asm__(
+                /* Test if assembler supports UAL syntax.  */
+                ".syntax unified\n\t"
+                ".arm\n\t" /* our assembly code is in ARM mode  */
+                ".text\n\t"
+                /* Following causes error if assembler ignored '.syntax 
unified'.  */
+                "asmfunc:\n\t"
+                "add %r0, %r0, %r4, ror #12;\n\t"
+
+                /* Test if '.type' and '.size' are supported.  */
+                ".size asmfunc,.-asmfunc;\n\t"
+                ".type asmfunc,%function;\n\t"
+              );
+              void asmfunc(void);]], [ asmfunc(); ] )],
+            [gcry_cv_gcc_arm_platform_as_ok=yes])
+        fi])
+if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
+   AC_DEFINE(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS,1,
+     [Defined if underlying assembler is compatible with ARM assembly 
implementations])
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for our ARMv8/Aarch64
+# implementations.  This needs to be done before setting up the
+# assembler stuff.
+#
+AC_CACHE_CHECK([whether GCC assembler is compatible for ARMv8/Aarch64 assembly 
implementations],
+       [gcry_cv_gcc_aarch64_platform_as_ok],
+       [if test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_aarch64_platform_as_ok="n/a"
+        else
+          gcry_cv_gcc_aarch64_platform_as_ok=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+            [[__asm__(
+                ".text\n\t"
+                "asmfunc:\n\t"
+                "eor x0, x0, x30, ror #12;\n\t"
+                "add x0, x0, x30, asr #12;\n\t"
+                "eor v0.16b, v0.16b, v31.16b;\n\t"
+              );
+              void asmfunc(void);]], [ asmfunc(); ] )],
+            [gcry_cv_gcc_aarch64_platform_as_ok=yes])
+        fi])
+if test "$gcry_cv_gcc_aarch64_platform_as_ok" = "yes" ; then
+   AC_DEFINE(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS,1,
+     [Defined if underlying assembler is compatible with ARMv8/Aarch64 
assembly implementations])
+fi
+
+#
+# Check whether GCC assembler supports for CFI directives.
+#
+AC_CACHE_CHECK([whether GCC assembler supports for CFI directives],
+       [gcry_cv_gcc_asm_cfi_directives],
+       [gcry_cv_gcc_asm_cfi_directives=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                ".text\n\t"
+                "ac_test:\n\t"
+                ".cfi_startproc\n\t"
+                ".cfi_remember_state\n\t"
+                ".cfi_adjust_cfa_offset 8\n\t"
+                ".cfi_rel_offset 0, 8\n\t"
+                ".cfi_def_cfa_register 1\n\t"
+                ".cfi_register 2, 3\n\t"
+                ".cfi_restore 2\n\t"
+                ".cfi_escape 0x0f, 0x02, 0x11, 0x00\n\t"
+                ".cfi_restore_state\n\t"
+                ".long 0\n\t"
+                ".cfi_endproc\n\t"
+            );
+            void asmfunc(void)]])],
+          [gcry_cv_gcc_asm_cfi_directives=yes])])
+if test "$gcry_cv_gcc_asm_cfi_directives" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ASM_CFI_DIRECTIVES,1,
+             [Defined if underlying assembler supports for CFI directives])
+fi
+
+
+#
+# Check whether GCC assembler supports for ELF directives.
+#
+AC_CACHE_CHECK([whether GCC assembler supports for ELF directives],
+       [gcry_cv_gcc_asm_elf_directives],
+       [gcry_cv_gcc_asm_elf_directives=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                /* Test if ELF directives '.type' and '.size' are supported. */
+                ".text\n\t"
+                "asmfunc:\n\t"
+                ".size asmfunc,.-asmfunc;\n\t"
+                ".type asmfunc,STT_FUNC;\n\t"
+            );]])],
+          [gcry_cv_gcc_asm_elf_directives=yes])])
+if test "$gcry_cv_gcc_asm_elf_directives" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ASM_ELF_DIRECTIVES,1,
+             [Defined if underlying assembler supports for ELF directives])
+fi
+
+
+#
+# Check whether underscores in symbols are required.  This needs to be
+# done before setting up the assembler stuff.
+#
+GNUPG_SYS_SYMBOL_UNDERSCORE()
+
+
+#################################
+####                         ####
+#### Setup assembler stuff.  ####
+#### Define mpi_cpu_arch.    ####
+####                         ####
+#################################
+AC_ARG_ENABLE(mpi-path,
+              AS_HELP_STRING([--enable-mpi-path=EXTRA_PATH],
+              [prepend EXTRA_PATH to list of CPU specific optimizations]),
+             mpi_extra_path="$enableval",mpi_extra_path="")
+AC_MSG_CHECKING(architecture and mpi assembler functions)
+if test -f $srcdir/mpi/config.links ; then
+    . $srcdir/mpi/config.links
+    AC_CONFIG_LINKS("$mpi_ln_list")
+    ac_cv_mpi_sflags="$mpi_sflags"
+    AC_MSG_RESULT($mpi_cpu_arch)
+else
+    AC_MSG_RESULT(failed)
+    AC_MSG_ERROR([mpi/config.links missing!])
+fi
+MPI_SFLAGS="$ac_cv_mpi_sflags"
+AC_SUBST(MPI_SFLAGS)
+
+AM_CONDITIONAL(MPI_MOD_ASM_MPIH_ADD1, test "$mpi_mod_asm_mpih_add1" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_MPIH_SUB1, test "$mpi_mod_asm_mpih_sub1" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL1, test "$mpi_mod_asm_mpih_mul1" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL2, test "$mpi_mod_asm_mpih_mul2" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_MPIH_MUL3, test "$mpi_mod_asm_mpih_mul3" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_MPIH_LSHIFT, test "$mpi_mod_asm_mpih_lshift" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_MPIH_RSHIFT, test "$mpi_mod_asm_mpih_rshift" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_UDIV, test "$mpi_mod_asm_udiv" = yes)
+AM_CONDITIONAL(MPI_MOD_ASM_UDIV_QRNND, test "$mpi_mod_asm_udiv_qrnnd" = yes)
+AM_CONDITIONAL(MPI_MOD_C_MPIH_ADD1, test "$mpi_mod_c_mpih_add1" = yes)
+AM_CONDITIONAL(MPI_MOD_C_MPIH_SUB1, test "$mpi_mod_c_mpih_sub1" = yes)
+AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL1, test "$mpi_mod_c_mpih_mul1" = yes)
+AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL2, test "$mpi_mod_c_mpih_mul2" = yes)
+AM_CONDITIONAL(MPI_MOD_C_MPIH_MUL3, test "$mpi_mod_c_mpih_mul3" = yes)
+AM_CONDITIONAL(MPI_MOD_C_MPIH_LSHIFT, test "$mpi_mod_c_mpih_lshift" = yes)
+AM_CONDITIONAL(MPI_MOD_C_MPIH_RSHIFT, test "$mpi_mod_c_mpih_rshift" = yes)
+AM_CONDITIONAL(MPI_MOD_C_UDIV, test "$mpi_mod_c_udiv" = yes)
+AM_CONDITIONAL(MPI_MOD_C_UDIV_QRNND, test "$mpi_mod_c_udiv_qrnnd" = yes)
+
+# Reset non applicable feature flags.
+if test "$mpi_cpu_arch" != "x86" ; then
+   aesnisupport="n/a"
+   shaextsupport="n/a"
+   pclmulsupport="n/a"
+   sse41support="n/a"
+   avxsupport="n/a"
+   avx2support="n/a"
+   padlocksupport="n/a"
+   drngsupport="n/a"
+fi
+
+if test "$mpi_cpu_arch" != "arm" ; then
+   if test "$mpi_cpu_arch" != "aarch64" ; then
+     neonsupport="n/a"
+     armcryptosupport="n/a"
+   fi
+fi
+
+if test "$mpi_cpu_arch" != "ppc"; then
+   ppccryptosupport="n/a"
+fi
+
+#############################################
+####                                     ####
+#### Platform specific compiler checks.  ####
+####                                     ####
+#############################################
+
+
+# Following tests depend on warnings to cause compile to fail, so set -Werror
+# temporarily.
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -Werror"
+
+
+#
+# Check whether compiler supports 'ms_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'ms_abi' function attribute],
+       [gcry_cv_gcc_attribute_ms_abi],
+       [gcry_cv_gcc_attribute_ms_abi=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[int __attribute__ ((ms_abi)) proto(int);]])],
+          [gcry_cv_gcc_attribute_ms_abi=yes])])
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_MS_ABI,1,
+     [Defined if compiler supports "__attribute__ ((ms_abi))" function 
attribute])
+fi
+
+
+#
+# Check whether compiler supports 'sysv_abi' function attribute.
+#
+AC_CACHE_CHECK([whether compiler supports 'sysv_abi' function attribute],
+       [gcry_cv_gcc_attribute_sysv_abi],
+       [gcry_cv_gcc_attribute_sysv_abi=no
+        AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[int __attribute__ ((sysv_abi)) proto(int);]])],
+          [gcry_cv_gcc_attribute_sysv_abi=yes])])
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_ATTRIBUTE_SYSV_ABI,1,
+     [Defined if compiler supports "__attribute__ ((sysv_abi))" function 
attribute])
+fi
+
+
+#
+# Check whether default calling convention is 'ms_abi'.
+#
+if test "$gcry_cv_gcc_attribute_ms_abi" = "yes" ; then
+   AC_CACHE_CHECK([whether default calling convention is 'ms_abi'],
+          [gcry_cv_gcc_default_abi_is_ms_abi],
+          [gcry_cv_gcc_default_abi_is_ms_abi=no
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+             [[void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((ms_abi))(*msabi_func)(void);
+                 /* warning on SysV abi targets, passes on Windows based 
targets */
+                 msabi_func = def_func;
+                 return msabi_func;
+             }]])],
+             [gcry_cv_gcc_default_abi_is_ms_abi=yes])])
+   if test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes" ; then
+      AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_MS_ABI,1,
+        [Defined if default calling convention is 'ms_abi'])
+   fi
+fi
+
+
+#
+# Check whether default calling convention is 'sysv_abi'.
+#
+if test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" ; then
+   AC_CACHE_CHECK([whether default calling convention is 'sysv_abi'],
+          [gcry_cv_gcc_default_abi_is_sysv_abi],
+          [gcry_cv_gcc_default_abi_is_sysv_abi=no
+           AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+             [[void *test(void) {
+                 void *(*def_func)(void) = test;
+                 void *__attribute__((sysv_abi))(*sysvabi_func)(void);
+                 /* warning on MS ABI targets, passes on SysV ABI targets */
+                 sysvabi_func = def_func;
+                 return sysvabi_func;
+             }]])],
+             [gcry_cv_gcc_default_abi_is_sysv_abi=yes])])
+   if test "$gcry_cv_gcc_default_abi_is_sysv_abi" = "yes" ; then
+      AC_DEFINE(HAVE_GCC_DEFAULT_ABI_IS_SYSV_ABI,1,
+        [Defined if default calling convention is 'sysv_abi'])
+   fi
+fi
+
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether GCC inline assembler supports SSSE3 instructions
+# This is required for the AES-NI instructions.
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports SSSE3 instructions],
+       [gcry_cv_gcc_inline_asm_ssse3],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_ssse3="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ssse3=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[static unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+              { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+            void a(void) {
+              __asm__("pshufb %[mask], %%xmm2\n\t"::[mask]"m"(*be_mask):);
+            }]], [ a(); ] )],
+          [gcry_cv_gcc_inline_asm_ssse3=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_ssse3" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_SSSE3,1,
+     [Defined if inline assembler supports SSSE3 instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports PCLMUL instructions.
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions],
+       [gcry_cv_gcc_inline_asm_pclmul],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_pclmul="n/a"
+        else
+          gcry_cv_gcc_inline_asm_pclmul=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[void a(void) {
+              __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc");
+            }]], [ a(); ] )],
+          [gcry_cv_gcc_inline_asm_pclmul=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1,
+     [Defined if inline assembler supports PCLMUL instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports SHA Extensions instructions.
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports SHA Extensions 
instructions],
+       [gcry_cv_gcc_inline_asm_shaext],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_shaext="n/a"
+        else
+          gcry_cv_gcc_inline_asm_shaext=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[void a(void) {
+              __asm__("sha1rnds4 \$0, %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha1nexte %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha1msg1 %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha1msg2 %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha256rnds2 %%xmm0, %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha256msg1 %%xmm1, %%xmm3\n\t":::"cc");
+              __asm__("sha256msg2 %%xmm1, %%xmm3\n\t":::"cc");
+            }]], [ a(); ] )],
+          [gcry_cv_gcc_inline_asm_shaext=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_shaext" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_SHAEXT,1,
+     [Defined if inline assembler supports SHA Extensions instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports SSE4.1 instructions.
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports SSE4.1 instructions],
+       [gcry_cv_gcc_inline_asm_sse41],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_sse41="n/a"
+        else
+          gcry_cv_gcc_inline_asm_sse41=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[void a(void) {
+              int i;
+              __asm__("pextrd \$2, %%xmm0, %[out]\n\t" : [out] "=m" (i));
+            }]], [ a(); ] )],
+          [gcry_cv_gcc_inline_asm_sse41=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_sse41" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_SSE41,1,
+     [Defined if inline assembler supports SSE4.1 instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports AVX instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions],
+       [gcry_cv_gcc_inline_asm_avx],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_avx="n/a"
+        else
+          gcry_cv_gcc_inline_asm_avx=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[void a(void) {
+              __asm__("xgetbv; vaesdeclast 
(%[mem]),%%xmm0,%%xmm7\n\t"::[mem]"r"(0):);
+            }]], [ a(); ] )],
+          [gcry_cv_gcc_inline_asm_avx=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_avx" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX,1,
+     [Defined if inline assembler supports AVX instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports AVX2 instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports AVX2 instructions],
+       [gcry_cv_gcc_inline_asm_avx2],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_avx2="n/a"
+        else
+          gcry_cv_gcc_inline_asm_avx2=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[void a(void) {
+              __asm__("xgetbv; vpbroadcastb %%xmm7,%%ymm1\n\t":::"cc");
+            }]], [ a(); ] )],
+          [gcry_cv_gcc_inline_asm_avx2=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_avx2" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_AVX2,1,
+     [Defined if inline assembler supports AVX2 instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports VAES and VPCLMUL instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports VAES and VPCLMUL 
instructions],
+       [gcry_cv_gcc_inline_asm_vaes_vpclmul],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_vaes_vpclmul="n/a"
+        else
+          gcry_cv_gcc_inline_asm_vaes_vpclmul=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[void a(void) {
+              __asm__("vaesenclast 
%%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/
+              __asm__("vaesenclast 
%%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/
+              __asm__("vpclmulqdq 
\$0,%%ymm7,%%ymm7,%%ymm1\n\t":::"cc");/*256-bit*/
+              __asm__("vpclmulqdq 
\$0,%%zmm7,%%zmm7,%%zmm1\n\t":::"cc");/*512-bit*/
+            }]], [ a(); ] )],
+          [gcry_cv_gcc_inline_asm_vaes_vpclmul=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_vaes_vpclmul" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL,1,
+     [Defined if inline assembler supports VAES and VPCLMUL instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports BMI2 instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports BMI2 instructions],
+       [gcry_cv_gcc_inline_asm_bmi2],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_bmi2="n/a"
+        else
+          gcry_cv_gcc_inline_asm_bmi2=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[unsigned int a(unsigned int x, unsigned int y) {
+              unsigned int tmp1, tmp2;
+              asm ("rorxl %2, %1, %0"
+                   : "=r" (tmp1)
+                   : "rm0" (x), "J" (32 - ((23) & 31)));
+              asm ("andnl %2, %1, %0"
+                   : "=r" (tmp2)
+                   : "r0" (x), "rm" (y));
+              return tmp1 + tmp2;
+            }]], [ a(1, 2); ] )],
+          [gcry_cv_gcc_inline_asm_bmi2=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_bmi2" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_BMI2,1,
+     [Defined if inline assembler supports BMI2 instructions])
+fi
+
+
+#
+# Check whether GCC assembler needs "-Wa,--divide" to correctly handle
+# constant division
+#
+if test $amd64_as_feature_detection = yes; then
+  AC_CACHE_CHECK([whether GCC assembler handles division correctly],
+       [gcry_cv_gcc_as_const_division_ok],
+       [gcry_cv_gcc_as_const_division_ok=no
+        AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), %ebp;\n\t");
+            void fn(void);]],
+            [fn();])],
+          [gcry_cv_gcc_as_const_division_ok=yes])])
+  if test "$gcry_cv_gcc_as_const_division_ok" = "no" ; then
+    #
+    # Add '-Wa,--divide' to CPPFLAGS and try check again.
+    #
+    _gcc_cppflags_save="$CPPFLAGS"
+    CPPFLAGS="$CPPFLAGS -Wa,--divide"
+    AC_CACHE_CHECK([whether GCC assembler handles division correctly with 
"-Wa,--divide"],
+         [gcry_cv_gcc_as_const_division_with_wadivide_ok],
+         [gcry_cv_gcc_as_const_division_with_wadivide_ok=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+            [[__asm__(".text\n\tfn:\n\t xorl \$(123456789/12345678), 
%ebp;\n\t");
+              void fn(void);]],
+              [fn();])],
+            [gcry_cv_gcc_as_const_division_with_wadivide_ok=yes])])
+    if test "$gcry_cv_gcc_as_const_division_with_wadivide_ok" = "no" ; then
+      # '-Wa,--divide' did not work, restore old flags.
+      CPPFLAGS="$_gcc_cppflags_save"
+    fi
+  fi
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for our amd64
+# implementations
+#
+if test $amd64_as_feature_detection = yes; then
+  AC_CACHE_CHECK([whether GCC assembler is compatible for amd64 assembly 
implementations],
+       [gcry_cv_gcc_amd64_platform_as_ok],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_amd64_platform_as_ok="n/a"
+        else
+          gcry_cv_gcc_amd64_platform_as_ok=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                /* Test if '.type' and '.size' are supported.  */
+                /* These work only on ELF targets. */
+                ".text\n\t"
+                "asmfunc:\n\t"
+                ".size asmfunc,.-asmfunc;\n\t"
+                ".type asmfunc,@function;\n\t"
+                /* Test if assembler allows use of '/' for constant division
+                 * (Solaris/x86 issue). If previous constant division check
+                 * and "-Wa,--divide" workaround failed, this causes assembly
+                 * to be disable on this machine. */
+                 "xorl \$(123456789/12345678), %ebp;\n\t"
+            );
+            void asmfunc(void);]], [ asmfunc(); ])],
+          [gcry_cv_gcc_amd64_platform_as_ok=yes])
+        fi])
+  if test "$gcry_cv_gcc_amd64_platform_as_ok" = "yes" ; then
+     AC_DEFINE(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS,1,
+              [Defined if underlying assembler is compatible with amd64 
assembly implementations])
+  fi
+  if test "$gcry_cv_gcc_amd64_platform_as_ok" = "no" &&
+     test "$gcry_cv_gcc_attribute_sysv_abi" = "yes" &&
+     test "$gcry_cv_gcc_default_abi_is_ms_abi" = "yes"; then
+    AC_CACHE_CHECK([whether GCC assembler is compatible for WIN64 assembly 
implementations],
+      [gcry_cv_gcc_win64_platform_as_ok],
+      [gcry_cv_gcc_win64_platform_as_ok=no
+      AC_LINK_IFELSE([AC_LANG_PROGRAM(
+        [[__asm__(
+              ".text\n\t"
+              ".globl asmfunc\n\t"
+              "asmfunc:\n\t"
+              "xorq \$(1234), %rbp;\n\t"
+          );
+          void asmfunc(void);]], [ asmfunc(); ])],
+        [gcry_cv_gcc_win64_platform_as_ok=yes])])
+    if test "$gcry_cv_gcc_win64_platform_as_ok" = "yes" ; then
+      AC_DEFINE(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS,1,
+                [Defined if underlying assembler is compatible with WIN64 
assembly implementations])
+    fi
+  fi
+fi
+
+
+#
+# Check whether GCC assembler supports features needed for assembly
+# implementations that use Intel syntax
+#
+AC_CACHE_CHECK([whether GCC assembler is compatible for Intel syntax assembly 
implementations],
+       [gcry_cv_gcc_platform_as_ok_for_intel_syntax],
+       [if test "$mpi_cpu_arch" != "x86" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_platform_as_ok_for_intel_syntax="n/a"
+        else
+          gcry_cv_gcc_platform_as_ok_for_intel_syntax=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                ".intel_syntax noprefix\n\t"
+                ".text\n\t"
+                "actest:\n\t"
+                "pxor xmm1, xmm7;\n\t"
+                "vperm2i128 ymm2, ymm3, ymm0, 1;\n\t"
+                "add eax, ebp;\n\t"
+                "rorx eax, ebp, 1;\n\t"
+                "sub eax, [esp + 4];\n\t"
+                "add dword ptr [esp + eax], 0b10101;\n\t"
+                ".att_syntax prefix\n\t"
+            );
+            void actest(void);]], [ actest(); ])],
+          [gcry_cv_gcc_platform_as_ok_for_intel_syntax=yes])
+        fi])
+if test "$gcry_cv_gcc_platform_as_ok_for_intel_syntax" = "yes" ; then
+  AC_DEFINE(HAVE_INTEL_SYNTAX_PLATFORM_AS,1,
+            [Defined if underlying assembler is compatible with Intel syntax 
assembly implementations])
+fi
+
+
+#
+# Check whether compiler is configured for ARMv6 or newer architecture
+#
+AC_CACHE_CHECK([whether compiler is configured for ARMv6 or newer 
architecture],
+       [gcry_cv_cc_arm_arch_is_v6],
+       [if test "$mpi_cpu_arch" != "arm" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_cc_arm_arch_is_v6="n/a"
+        else
+          gcry_cv_cc_arm_arch_is_v6=no
+          AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+          [[
+           #if defined(__arm__) && \
+             ((defined(__ARM_ARCH) && __ARM_ARCH >= 6) \
+             || defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+             || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
+             || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__) \
+             || defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+             || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+             || defined(__ARM_ARCH_7EM__))
+             /* empty */
+           #else
+             /* fail compile if not ARMv6. */
+             not_armv6 not_armv6 = (not_armv6)not_armv6;
+           #endif
+          ]])],
+          [gcry_cv_cc_arm_arch_is_v6=yes])
+        fi])
+if test "$gcry_cv_cc_arm_arch_is_v6" = "yes" ; then
+   AC_DEFINE(HAVE_ARM_ARCH_V6,1,
+     [Defined if ARM architecture is v6 or newer])
+fi
+
+
+#
+# Check whether GCC inline assembler supports NEON instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports NEON instructions],
+       [gcry_cv_gcc_inline_asm_neon],
+       [if test "$mpi_cpu_arch" != "arm" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_neon="n/a"
+        else
+          gcry_cv_gcc_inline_asm_neon=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                ".syntax unified\n\t"
+                ".arm\n\t"
+                ".fpu neon\n\t"
+                ".text\n\t"
+                "testfn:\n\t"
+                "vld1.64 {%q0-%q1}, [%r0]!;\n\t"
+                "vrev64.8 %q0, %q3;\n\t"
+                "vadd.u64 %q0, %q1;\n\t"
+                "vadd.s64 %d3, %d2, %d3;\n\t"
+                );
+            void testfn(void);
+            ]], [ testfn(); ])],
+          [gcry_cv_gcc_inline_asm_neon=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_neon" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_NEON,1,
+     [Defined if inline assembler supports NEON instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports AArch32 Crypto Extension 
instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports AArch32 Crypto Extension 
instructions],
+       [gcry_cv_gcc_inline_asm_aarch32_crypto],
+       [if test "$mpi_cpu_arch" != "arm" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_aarch32_crypto="n/a"
+        else
+          gcry_cv_gcc_inline_asm_aarch32_crypto=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                ".syntax unified\n\t"
+                ".arch armv8-a\n\t"
+                ".arm\n\t"
+                ".fpu crypto-neon-fp-armv8\n\t"
+                ".text\n\t"
+
+                "testfn:\n\t"
+                "sha1h.32 q0, q0;\n\t"
+                "sha1c.32 q0, q0, q0;\n\t"
+                "sha1p.32 q0, q0, q0;\n\t"
+                "sha1su0.32 q0, q0, q0;\n\t"
+                "sha1su1.32 q0, q0;\n\t"
+
+                "sha256h.32 q0, q0, q0;\n\t"
+                "sha256h2.32 q0, q0, q0;\n\t"
+                "sha1p.32 q0, q0, q0;\n\t"
+                "sha256su0.32 q0, q0;\n\t"
+                "sha256su1.32 q0, q0, q15;\n\t"
+
+                "aese.8 q0, q0;\n\t"
+                "aesd.8 q0, q0;\n\t"
+                "aesmc.8 q0, q0;\n\t"
+                "aesimc.8 q0, q0;\n\t"
+
+                "vmull.p64 q0, d0, d0;\n\t"
+                );
+            void testfn(void);
+            ]], [ testfn(); ])],
+          [gcry_cv_gcc_inline_asm_aarch32_crypto=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO,1,
+     [Defined if inline assembler supports AArch32 Crypto Extension 
instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports AArch64 NEON instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 NEON 
instructions],
+       [gcry_cv_gcc_inline_asm_aarch64_neon],
+       [if test "$mpi_cpu_arch" != "aarch64" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_aarch64_neon="n/a"
+        else
+          gcry_cv_gcc_inline_asm_aarch64_neon=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                ".cpu generic+simd\n\t"
+                ".text\n\t"
+                "testfn:\n\t"
+                "mov w0, \#42;\n\t"
+                "dup v0.8b, w0;\n\t"
+                "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t"
+                );
+            void testfn(void);
+            ]], [ testfn(); ])],
+          [gcry_cv_gcc_inline_asm_aarch64_neon=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_aarch64_neon" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_NEON,1,
+     [Defined if inline assembler supports AArch64 NEON instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports AArch64 Crypto Extension 
instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports AArch64 Crypto Extension 
instructions],
+       [gcry_cv_gcc_inline_asm_aarch64_crypto],
+       [if test "$mpi_cpu_arch" != "aarch64" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_aarch64_crypto="n/a"
+        else
+          gcry_cv_gcc_inline_asm_aarch64_crypto=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(
+                ".cpu generic+simd+crypto\n\t"
+                ".text\n\t"
+                "testfn:\n\t"
+                "mov w0, \#42;\n\t"
+                "dup v0.8b, w0;\n\t"
+                "ld4 {v0.8b,v1.8b,v2.8b,v3.8b},[x0],\#32;\n\t"
+
+                "sha1h s0, s0;\n\t"
+                "sha1c q0, s0, v0.4s;\n\t"
+                "sha1p q0, s0, v0.4s;\n\t"
+                "sha1su0 v0.4s, v0.4s, v0.4s;\n\t"
+                "sha1su1 v0.4s, v0.4s;\n\t"
+
+                "sha256h q0, q0, v0.4s;\n\t"
+                "sha256h2 q0, q0, v0.4s;\n\t"
+                "sha1p q0, s0, v0.4s;\n\t"
+                "sha256su0 v0.4s, v0.4s;\n\t"
+                "sha256su1 v0.4s, v0.4s, v31.4s;\n\t"
+
+                "aese v0.16b, v0.16b;\n\t"
+                "aesd v0.16b, v0.16b;\n\t"
+                "aesmc v0.16b, v0.16b;\n\t"
+                "aesimc v0.16b, v0.16b;\n\t"
+
+                "pmull v0.1q, v0.1d, v31.1d;\n\t"
+                "pmull2 v0.1q, v0.2d, v31.2d;\n\t"
+                );
+            void testfn(void);
+            ]], [ testfn(); ])],
+          [gcry_cv_gcc_inline_asm_aarch64_crypto=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO,1,
+     [Defined if inline assembler supports AArch64 Crypto Extension 
instructions])
+fi
+
+
+#
+# Check whether PowerPC AltiVec/VSX intrinsics
+#
+AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX intrinsics],
+      [gcry_cv_cc_ppc_altivec],
+      [if test "$mpi_cpu_arch" != "ppc" ||
+         test "$try_asm_modules" != "yes" ; then
+       gcry_cv_cc_ppc_altivec="n/a"
+      else
+       gcry_cv_cc_ppc_altivec=no
+       AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+       [[#include <altivec.h>
+         typedef vector unsigned char block;
+         typedef vector unsigned int vecu32;
+         block fn(block in)
+         {
+           block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+           vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
+           return vec_cipher_be (t, in) ^ (block)y;
+         }
+         ]])],
+       [gcry_cv_cc_ppc_altivec=yes])
+      fi])
+if test "$gcry_cv_cc_ppc_altivec" = "yes" ; then
+    AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1,
+           [Defined if underlying compiler supports PowerPC AltiVec/VSX/crypto 
intrinsics])
+fi
+
+_gcc_cflags_save=$CFLAGS
+CFLAGS="$CFLAGS -maltivec -mvsx -mcrypto"
+
+if test "$gcry_cv_cc_ppc_altivec" = "no" &&
+    test "$mpi_cpu_arch" = "ppc" &&
+    test "$try_asm_modules" == "yes" ; then
+  AC_CACHE_CHECK([whether compiler supports PowerPC AltiVec/VSX/crypto 
intrinsics with extra GCC flags],
+    [gcry_cv_cc_ppc_altivec_cflags],
+    [gcry_cv_cc_ppc_altivec_cflags=no
+    AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+      [[#include <altivec.h>
+       typedef vector unsigned char block;
+       typedef vector unsigned int vecu32;
+       block fn(block in)
+       {
+         block t = vec_perm (in, in, vec_vsx_ld (0, (unsigned char*)0));
+         vecu32 y = vec_vsx_ld (0, (unsigned int*)0);
+         return vec_cipher_be (t, in) ^ (block)y;
+       }]])],
+      [gcry_cv_cc_ppc_altivec_cflags=yes])])
+  if test "$gcry_cv_cc_ppc_altivec_cflags" = "yes" ; then
+    AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC,1,
+             [Defined if underlying compiler supports PowerPC 
AltiVec/VSX/crypto intrinsics])
+    AC_DEFINE(HAVE_COMPATIBLE_CC_PPC_ALTIVEC_WITH_CFLAGS,1,
+             [Defined if underlying compiler supports PowerPC 
AltiVec/VSX/crypto intrinsics with extra GCC flags])
+  fi
+fi
+
+AM_CONDITIONAL(ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS,
+              test "$gcry_cv_cc_ppc_altivec_cflags" = "yes")
+
+# Restore flags.
+CFLAGS=$_gcc_cflags_save;
+
+
+#
+# Check whether GCC inline assembler supports PowerPC AltiVec/VSX/crypto 
instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PowerPC 
AltiVec/VSX/crypto instructions],
+       [gcry_cv_gcc_inline_asm_ppc_altivec],
+       [if test "$mpi_cpu_arch" != "ppc" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_ppc_altivec="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ppc_altivec=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(".globl testfn;\n"
+                    ".text\n\t"
+                    "testfn:\n"
+                    "stvx %v31,%r12,%r0;\n"
+                    "lvx  %v20,%r12,%r0;\n"
+                    "vcipher %v0, %v1, %v22;\n"
+                    "lxvw4x %vs32, %r0, %r1;\n"
+                    "vadduwm %v0, %v1, %v22;\n"
+                    "vshasigmaw %v0, %v1, 0, 15;\n"
+                    "vshasigmad %v0, %v1, 0, 15;\n"
+                    "vpmsumd %v11, %v11, %v11;\n"
+                  );
+            void testfn(void);
+            ]], [ testfn(); ] )],
+          [gcry_cv_gcc_inline_asm_ppc_altivec=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC,1,
+     [Defined if inline assembler supports PowerPC AltiVec/VSX/crypto 
instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports PowerISA 3.00 instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PowerISA 3.00 
instructions],
+       [gcry_cv_gcc_inline_asm_ppc_arch_3_00],
+       [if test "$mpi_cpu_arch" != "ppc" ||
+           test "$try_asm_modules" != "yes" ; then
+          gcry_cv_gcc_inline_asm_ppc_arch_3_00="n/a"
+        else
+          gcry_cv_gcc_inline_asm_ppc_arch_3_00=no
+          AC_LINK_IFELSE([AC_LANG_PROGRAM(
+          [[__asm__(".text\n\t"
+                    ".globl testfn;\n"
+                    "testfn:\n"
+                    "stxvb16x %r1,%v12,%v30;\n"
+                  );
+            void testfn(void);
+            ]], [ testfn(); ])],
+          [gcry_cv_gcc_inline_asm_ppc_arch_3_00=yes])
+        fi])
+if test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_PPC_ARCH_3_00,1,
+     [Defined if inline assembler supports PowerISA 3.00 instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports zSeries instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports zSeries instructions],
+      [gcry_cv_gcc_inline_asm_s390x],
+      [if test "$mpi_cpu_arch" != "s390x" ||
+         test "$try_asm_modules" != "yes" ; then
+         gcry_cv_gcc_inline_asm_s390x="n/a"
+       else
+         gcry_cv_gcc_inline_asm_s390x=no
+         AC_LINK_IFELSE([AC_LANG_PROGRAM(
+         [[typedef unsigned int u128_t __attribute__ ((mode (TI)));
+           unsigned int testfunc(unsigned int x, void *y, unsigned int z)
+           {
+             unsigned long fac[8];
+             register unsigned long reg0 asm("0") = 0;
+             register unsigned long reg1 asm("1") = x;
+             u128_t r1 = ((u128_t)(unsigned long)y << 64) | (unsigned long)z;
+             u128_t r2 = 0;
+             u128_t r3 = 0;
+             asm volatile (".insn rre,0xb92e << 16, %[r1], %[r2]\n\t"
+                           : [r1] "+a" (r1), [r2] "+a" (r2)
+                           : "r" (reg0), "r" (reg1)
+                           : "cc", "memory");
+             asm volatile (".insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+                           : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3)
+                           : "r" (reg0), "r" (reg1)
+                           : "cc", "memory");
+             reg0 = 8 - 1;
+             asm ("stfle %1\n\t"
+                  : "+d" (reg0), "=Q" (fac[0])
+                  :
+                  : "cc", "memory");
+             asm volatile ("mvc 0(16, %0), 0(%1)\n\t"
+                           :
+                           : "a" (y), "a" (fac)
+                           : "memory");
+             asm volatile ("xc 0(16, %0), 0(%0)\n\t"
+                           :
+                           : "a" (fac)
+                           : "memory");
+             asm volatile ("risbgn %%r11, %%r11, 0, 129, 0\n\t"
+                           :
+                           :
+                           : "memory", "r11");
+             asm volatile ("algrk %%r14, %%r14, %%r14\n\t"
+                           :
+                           :
+                           : "memory", "r14");
+             return (unsigned int)r1 ^ reg0;
+           }
+           ]] , [ testfunc(0, 0, 0); ])],
+         [gcry_cv_gcc_inline_asm_s390x=yes])
+       fi])
+if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X,1,
+     [Defined if inline assembler supports zSeries instructions])
+fi
+
+
+#
+# Check whether GCC inline assembler supports zSeries vector instructions
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports zSeries vector 
instructions],
+      [gcry_cv_gcc_inline_asm_s390x_vx],
+      [if test "$mpi_cpu_arch" != "s390x" ||
+         test "$try_asm_modules" != "yes" ; then
+         gcry_cv_gcc_inline_asm_s390x_vx="n/a"
+       else
+         gcry_cv_gcc_inline_asm_s390x_vx=no
+         if test "$gcry_cv_gcc_inline_asm_s390x" = "yes" ; then
+           AC_LINK_IFELSE([AC_LANG_PROGRAM(
+           [[void testfunc(void)
+             {
+               asm volatile (".machine \"z13+vx\"\n\t"
+                             "vx %%v0, %%v1, %%v31\n\t"
+                             "verllf %%v11, %%v11, (16)(0)\n\t"
+                             :
+                             :
+                             : "memory");
+             }
+             ]], [ testfunc(); ])],
+           [gcry_cv_gcc_inline_asm_s390x_vx=yes])
+         fi
+       fi])
+if test "$gcry_cv_gcc_inline_asm_s390x_vx" = "yes" ; then
+   AC_DEFINE(HAVE_GCC_INLINE_ASM_S390X_VX,1,
+     [Defined if inline assembler supports zSeries vector instructions])
+fi
+
+
+#######################################
+#### Checks for library functions. ####
+#######################################
+
+AC_FUNC_VPRINTF
+# We have replacements for these in src/missing-string.c
+AC_CHECK_FUNCS(stpcpy strcasecmp)
+# We have replacements for these in src/g10lib.h
+AC_CHECK_FUNCS(strtoul memmove stricmp atexit raise)
+# Other checks
+AC_CHECK_FUNCS(strerror rand mmap getpagesize sysconf waitpid wait4)
+AC_CHECK_FUNCS(gettimeofday getrusage gethrtime clock_gettime syslog)
+AC_CHECK_FUNCS(syscall fcntl ftruncate flockfile getauxval elf_aux_info)
+AC_CHECK_FUNCS(explicit_bzero explicit_memset getentropy)
+
+GNUPG_CHECK_MLOCK
+
+#
+# Replacement functions.
+#
+AC_REPLACE_FUNCS([getpid clock])
+
+
+#
+# Check whether it is necessary to link against libdl.
+#
+DL_LIBS=""
+if test "$use_hmac_binary_check" != no ; then
+  _gcry_save_libs="$LIBS"
+  LIBS=""
+  AC_SEARCH_LIBS(dlopen, c dl,,,)
+  DL_LIBS=$LIBS
+  LIBS="$_gcry_save_libs"
+fi
+AC_SUBST(DL_LIBS)
+
+
+#
+# Check whether we can use Linux capabilities as requested.
+#
+if test "$use_capabilities" = "yes" ; then
+use_capabilities=no
+AC_CHECK_HEADERS(sys/capability.h)
+if test "$ac_cv_header_sys_capability_h" = "yes" ; then
+  AC_CHECK_LIB(cap, cap_init, ac_need_libcap=1)
+  if test "$ac_cv_lib_cap_cap_init" = "yes"; then
+     AC_DEFINE(USE_CAPABILITIES,1,
+               [define if capabilities should be used])
+     LIBS="$LIBS -lcap"
+     use_capabilities=yes
+  fi
+fi
+if test "$use_capabilities" = "no" ; then
+    AC_MSG_WARN([[
+***
+*** The use of capabilities on this system is not possible.
+*** You need a recent Linux kernel and some patches:
+***   fcaps-2.2.9-990610.patch      (kernel patch for 2.2.9)
+***   fcap-module-990613.tar.gz     (kernel module)
+***   libcap-1.92.tar.gz            (user mode library and utilities)
+*** And you have to configure the kernel with CONFIG_VFS_CAP_PLUGIN
+*** set (filesystems menu). Be warned: This code is *really* ALPHA.
+***]])
+fi
+fi
+
+# Check whether a random device is available.
+if test "$try_dev_random" = yes ; then
+    AC_CACHE_CHECK(for random device, ac_cv_have_dev_random,
+    [if test -r "$NAME_OF_DEV_RANDOM" && test -r "$NAME_OF_DEV_URANDOM" ; then
+      ac_cv_have_dev_random=yes; else ac_cv_have_dev_random=no; fi])
+    if test "$ac_cv_have_dev_random" = yes; then
+        AC_DEFINE(HAVE_DEV_RANDOM,1,
+                 [defined if the system supports a random device] )
+    fi
+else
+    AC_MSG_CHECKING(for random device)
+    ac_cv_have_dev_random=no
+    AC_MSG_RESULT(has been disabled)
+fi
+
+# Figure out the random modules for this configuration.
+if test "$random" = "default"; then
+
+    # Select default value.
+    if test "$ac_cv_func_getentropy" = yes; then
+        random_modules="getentropy"
+    elif test "$ac_cv_have_dev_random" = yes; then
+        # Try Linuxish random device.
+        random_modules="linux"
+    else
+        case "${host}" in
+        *-*-mingw32ce*)
+          # WindowsCE random device.
+          random_modules="w32ce"
+          ;;
+        *-*-mingw32*|*-*-cygwin*)
+          # Windows random device.
+          random_modules="w32"
+          ;;
+        *)
+          # Build everything, allow to select at runtime.
+          random_modules="$auto_random_modules"
+          ;;
+        esac
+    fi
+else
+    if test "$random" = "auto"; then
+        # Build everything, allow to select at runtime.
+        random_modules="$auto_random_modules"
+    else
+        random_modules="$random"
+    fi
+fi
+
+
+#
+# Other defines
+#
+if test mym4_isgit = "yes"; then
+    AC_DEFINE(IS_DEVELOPMENT_VERSION,1,
+              [Defined if this is not a regular release])
+fi
+
+
+AM_CONDITIONAL(CROSS_COMPILING, test x$cross_compiling = xyes)
+
+
+# This is handy for debugging so the compiler doesn't rearrange
+# things and eliminate variables.
+AC_ARG_ENABLE(optimization,
+       AS_HELP_STRING([--disable-optimization],
+                      [disable compiler optimization]),
+                      [if test $enableval = no ; then
+                         CFLAGS=`echo $CFLAGS | sed 's/-O[[0-9]]//'`
+                       fi])
+
+AC_MSG_NOTICE([checking for cc features])
+# CFLAGS mangling when using gcc.
+if test "$GCC" = yes; then
+    AC_MSG_CHECKING([if gcc supports -fno-delete-null-pointer-checks])
+    _gcc_cflags_save=$CFLAGS
+    CFLAGS="-fno-delete-null-pointer-checks"
+    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no)
+    AC_MSG_RESULT($_gcc_wopt)
+    CFLAGS=$_gcc_cflags_save;
+    if test x"$_gcc_wopt" = xyes ; then
+       CFLAGS="$CFLAGS -fno-delete-null-pointer-checks"
+    fi
+
+    CFLAGS="$CFLAGS -Wall"
+    if test "$USE_MAINTAINER_MODE" = "yes"; then
+        CFLAGS="$CFLAGS -Wcast-align -Wshadow -Wstrict-prototypes"
+        CFLAGS="$CFLAGS -Wformat -Wno-format-y2k -Wformat-security"
+
+        # If -Wno-missing-field-initializers is supported we can enable a
+        # a bunch of really useful warnings.
+        AC_MSG_CHECKING([if gcc supports -Wno-missing-field-initializers])
+        _gcc_cflags_save=$CFLAGS
+        CFLAGS="-Wno-missing-field-initializers"
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no)
+        AC_MSG_RESULT($_gcc_wopt)
+        CFLAGS=$_gcc_cflags_save;
+        if test x"$_gcc_wopt" = xyes ; then
+          CFLAGS="$CFLAGS -W -Wextra -Wbad-function-cast"
+          CFLAGS="$CFLAGS -Wwrite-strings"
+          CFLAGS="$CFLAGS -Wdeclaration-after-statement"
+          CFLAGS="$CFLAGS -Wno-missing-field-initializers"
+          CFLAGS="$CFLAGS -Wno-sign-compare"
+        fi
+
+        AC_MSG_CHECKING([if gcc supports -Wpointer-arith])
+        _gcc_cflags_save=$CFLAGS
+        CFLAGS="-Wpointer-arith"
+        AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],[])],_gcc_wopt=yes,_gcc_wopt=no)
+        AC_MSG_RESULT($_gcc_wopt)
+        CFLAGS=$_gcc_cflags_save;
+        if test x"$_gcc_wopt" = xyes ; then
+          CFLAGS="$CFLAGS -Wpointer-arith"
+        fi
+    fi
+fi
+
+# Check whether as(1) supports a noeexecstack feature.  This test
+# includes an override option.
+CL_AS_NOEXECSTACK
+
+
+AC_SUBST(LIBGCRYPT_CONFIG_API_VERSION)
+AC_SUBST(LIBGCRYPT_CONFIG_LIBS)
+AC_SUBST(LIBGCRYPT_CONFIG_CFLAGS)
+AC_SUBST(LIBGCRYPT_CONFIG_HOST)
+AC_SUBST(LIBGCRYPT_THREAD_MODULES)
+
+AC_CONFIG_COMMANDS([gcrypt-conf],[[
+chmod +x src/libgcrypt-config
+]],[[
+prefix=$prefix
+exec_prefix=$exec_prefix
+libdir=$libdir
+datadir=$datadir
+DATADIRNAME=$DATADIRNAME
+]])
+
+#####################
+#### Conclusion. ####
+#####################
+
+# Check that requested feature can actually be used and define
+# ENABLE_foo_SUPPORT macros.
+
+if test x"$aesnisupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_ssse3" != "yes" ; then
+    aesnisupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$shaextsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_shaext" != "yes" ; then
+    shaextsupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$pclmulsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then
+    pclmulsupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$sse41support" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_sse41" != "yes" ; then
+    sse41support="no (unsupported by compiler)"
+  fi
+fi
+if test x"$avxsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then
+    avxsupport="no (unsupported by compiler)"
+  fi
+fi
+if test x"$avx2support" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_avx2" != "yes" ; then
+    avx2support="no (unsupported by compiler)"
+  fi
+fi
+if test x"$neonsupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_neon" != "yes" ; then
+    if test "$gcry_cv_gcc_inline_asm_aarch64_neon" != "yes" ; then
+      neonsupport="no (unsupported by compiler)"
+    fi
+  fi
+fi
+if test x"$armcryptosupport" = xyes ; then
+  if test "$gcry_cv_gcc_inline_asm_aarch32_crypto" != "yes" ; then
+    if test "$gcry_cv_gcc_inline_asm_aarch64_crypto" != "yes" ; then
+      neonsupport="no (unsupported by compiler)"
+    fi
+  fi
+fi
+
+if test x"$aesnisupport" = xyes ; then
+  AC_DEFINE(ENABLE_AESNI_SUPPORT, 1,
+            [Enable support for Intel AES-NI instructions.])
+fi
+if test x"$shaextsupport" = xyes ; then
+  AC_DEFINE(ENABLE_SHAEXT_SUPPORT, 1,
+            [Enable support for Intel SHAEXT instructions.])
+fi
+if test x"$pclmulsupport" = xyes ; then
+  AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1,
+            [Enable support for Intel PCLMUL instructions.])
+fi
+if test x"$sse41support" = xyes ; then
+  AC_DEFINE(ENABLE_SSE41_SUPPORT, 1,
+            [Enable support for Intel SSE4.1 instructions.])
+fi
+if test x"$avxsupport" = xyes ; then
+  AC_DEFINE(ENABLE_AVX_SUPPORT,1,
+            [Enable support for Intel AVX instructions.])
+fi
+if test x"$avx2support" = xyes ; then
+  AC_DEFINE(ENABLE_AVX2_SUPPORT,1,
+            [Enable support for Intel AVX2 instructions.])
+fi
+if test x"$neonsupport" = xyes ; then
+  AC_DEFINE(ENABLE_NEON_SUPPORT,1,
+            [Enable support for ARM NEON instructions.])
+fi
+if test x"$armcryptosupport" = xyes ; then
+  AC_DEFINE(ENABLE_ARM_CRYPTO_SUPPORT,1,
+            [Enable support for ARMv8 Crypto Extension instructions.])
+fi
+if test x"$ppccryptosupport" = xyes ; then
+  AC_DEFINE(ENABLE_PPC_CRYPTO_SUPPORT,1,
+            [Enable support for POWER 8 (PowerISA 2.07) crypto extension.])
+fi
+if test x"$jentsupport" = xyes ; then
+  AC_DEFINE(ENABLE_JENT_SUPPORT, 1,
+            [Enable support for the jitter entropy collector.])
+fi
+if test x"$padlocksupport" = xyes ; then
+  AC_DEFINE(ENABLE_PADLOCK_SUPPORT, 1,
+            [Enable support for the PadLock engine.])
+fi
+if test x"$drngsupport" = xyes ; then
+  AC_DEFINE(ENABLE_DRNG_SUPPORT, 1,
+            [Enable support for Intel DRNG (RDRAND instruction).])
+fi
+
+
+if test x"$force_soft_hwfeatures" = xyes ; then
+  AC_DEFINE(ENABLE_FORCE_SOFT_HWFEATURES, 1,
+            [Enable forcing 'soft' HW feature bits on (for testing).])
+fi
+
+# Define conditional sources and config.h symbols depending on the
+# selected ciphers, pubkey-ciphers, digests, kdfs, and random modules.
+
+LIST_MEMBER(arcfour, $enabled_ciphers)
+if test "$found" = "1"; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS arcfour.lo"
+   AC_DEFINE(USE_ARCFOUR, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS arcfour-amd64.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(blowfish, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS blowfish.lo"
+   AC_DEFINE(USE_BLOWFISH, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS blowfish-arm.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(cast5, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS cast5.lo"
+   AC_DEFINE(USE_CAST5, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS cast5-arm.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(des, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS des.lo"
+   AC_DEFINE(USE_DES, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS des-amd64.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(aes, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS rijndael.lo"
+   AC_DEFINE(USE_AES, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-amd64.lo"
+
+         # Build with the SSSE3 implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ssse3-amd64-asm.lo"
+
+         # Build with the VAES/AVX2 implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-vaes-avx2-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-arm.lo"
+
+         # Build with the ARMv8/AArch32 CE implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch32-ce.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aarch64.lo"
+
+         # Build with the ARMv8/AArch64 CE implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-ce.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-armv8-aarch64-ce.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc9le.lo"
+
+         if test "$gcry_cv_gcc_inline_asm_ppc_altivec" = "yes" &&
+            test "$gcry_cv_gcc_inline_asm_ppc_arch_3_00" = "yes" ; then
+            # Build with AES-GCM bulk implementation for P10
+            GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-gcm-p10le.lo"
+            GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-p10le.lo"
+         fi
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-ppc.lo"
+      ;;
+      s390x-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-s390x.lo"
+      ;;
+   esac
+
+   case "$mpi_cpu_arch" in
+     x86)
+         # Build with the AES-NI implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-aesni.lo"
+
+         # Build with the Padlock implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS rijndael-padlock.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(twofish, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS twofish.lo"
+   AC_DEFINE(USE_TWOFISH, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-amd64.lo"
+
+         if test x"$avx2support" = xyes ; then
+            # Build with the AVX2 implementation
+            GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-avx2-amd64.lo"
+         fi
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-arm.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS twofish-aarch64.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(serpent, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS serpent.lo"
+   AC_DEFINE(USE_SERPENT, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the SSE2 implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-sse2-amd64.lo"
+      ;;
+   esac
+
+   if test x"$avx2support" = xyes ; then
+      # Build with the AVX2 implementation
+      GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-avx2-amd64.lo"
+   fi
+
+   if test x"$neonsupport" = xyes ; then
+      # Build with the NEON implementation
+      GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS serpent-armv7-neon.lo"
+   fi
+fi
+
+LIST_MEMBER(rfc2268, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS rfc2268.lo"
+   AC_DEFINE(USE_RFC2268, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(seed, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS seed.lo"
+   AC_DEFINE(USE_SEED, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(camellia, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS camellia.lo camellia-glue.lo"
+   AC_DEFINE(USE_CAMELLIA, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-arm.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aarch64.lo"
+      ;;
+   esac
+
+   if test x"$avxsupport" = xyes ; then
+      if test x"$aesnisupport" = xyes ; then
+        # Build with the AES-NI/AVX implementation
+        GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx-amd64.lo"
+      fi
+   fi
+
+   if test x"$avx2support" = xyes ; then
+      if test x"$aesnisupport" = xyes ; then
+        # Build with the AES-NI/AVX2 implementation
+        GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-aesni-avx2-amd64.lo"
+
+        # Build with the VAES/AVX2 implementation
+        GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS camellia-vaes-avx2-amd64.lo"
+      fi
+   fi
+fi
+
+LIST_MEMBER(idea, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS idea.lo"
+   AC_DEFINE(USE_IDEA, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(salsa20, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS salsa20.lo"
+   AC_DEFINE(USE_SALSA20, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-amd64.lo"
+      ;;
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS salsa20-armv7-neon.lo"
+   fi
+fi
+
+LIST_MEMBER(gost28147, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS gost28147.lo"
+   AC_DEFINE(USE_GOST28147, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(chacha20, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS chacha20.lo"
+   AC_DEFINE(USE_CHACHA20, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-ssse3.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-amd64-avx2.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-aarch64.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the ppc8 vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         # Build with the ppc8 vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Build with the ppc8 vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-ppc.lo"
+      ;;
+      s390x-*-*)
+         # Build with the s390x/zSeries vector implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-s390x.lo"
+      ;;
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS chacha20-armv7-neon.lo"
+   fi
+fi
+
+LIST_MEMBER(sm4, $enabled_ciphers)
+if test "$found" = "1" ; then
+   GCRYPT_CIPHERS="$GCRYPT_CIPHERS sm4.lo"
+   AC_DEFINE(USE_SM4, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx-amd64.lo"
+         GCRYPT_ASM_CIPHERS="$GCRYPT_ASM_CIPHERS sm4-aesni-avx2-amd64.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(dsa, $enabled_pubkey_ciphers)
+AM_CONDITIONAL(USE_DSA, [test "$found" = "1"])
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS dsa.lo"
+   AC_DEFINE(USE_DSA, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(rsa, $enabled_pubkey_ciphers)
+AM_CONDITIONAL(USE_RSA, [test "$found" = "1"])
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS rsa.lo"
+   AC_DEFINE(USE_RSA, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(elgamal, $enabled_pubkey_ciphers)
+AM_CONDITIONAL(USE_ELGAMAL, [test "$found" = "1"])
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS elgamal.lo"
+   AC_DEFINE(USE_ELGAMAL, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(ecc, $enabled_pubkey_ciphers)
+AM_CONDITIONAL(USE_ECC, [test "$found" = "1"])
+if test "$found" = "1" ; then
+   GCRYPT_PUBKEY_CIPHERS="$GCRYPT_PUBKEY_CIPHERS \
+                          ecc.lo ecc-curves.lo ecc-misc.lo \
+                          ecc-ecdh.lo ecc-ecdsa.lo ecc-eddsa.lo ecc-gost.lo \
+                          ecc-sm2.lo"
+   AC_DEFINE(USE_ECC, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(crc, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS crc.lo"
+   AC_DEFINE(USE_CRC, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      i?86-*-* | x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-intel-pclmul.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-ce.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-armv8-aarch64-ce.lo"
+      ;;
+      powerpc64le-*-*)
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo"
+      ;;
+      powerpc-*-*)
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS crc-ppc.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(gostr3411-94, $enabled_digests)
+if test "$found" = "1" ; then
+   # GOST R 34.11-94 internally uses GOST 28147-89
+   LIST_MEMBER(gost28147, $enabled_ciphers)
+   if test "$found" = "1" ; then
+      GCRYPT_DIGESTS="$GCRYPT_DIGESTS gostr3411-94.lo"
+      AC_DEFINE(USE_GOST_R_3411_94, 1, [Defined if this module should be 
included])
+   fi
+fi
+
+LIST_MEMBER(stribog, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS stribog.lo"
+   AC_DEFINE(USE_GOST_R_3411_12, 1, [Defined if this module should be 
included])
+fi
+
+LIST_MEMBER(md2, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS md2.lo"
+   AC_DEFINE(USE_MD2, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(md4, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS md4.lo"
+   AC_DEFINE(USE_MD4, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(md5, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS md5.lo"
+   AC_DEFINE(USE_MD5, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(rmd160, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS rmd160.lo"
+   AC_DEFINE(USE_RMD160, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(sha256, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha256.lo"
+   AC_DEFINE(USE_SHA256, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ssse3-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-avx2-bmi2-amd64.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch32-ce.lo"
+      ;;
+      aarch64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-armv8-aarch64-ce.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-ppc.lo"
+   esac
+
+   case "$mpi_cpu_arch" in
+     x86)
+       # Build with the SHAEXT implementation
+       GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha256-intel-shaext.lo"
+     ;;
+   esac
+fi
+
+LIST_MEMBER(sha512, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha512.lo"
+   AC_DEFINE(USE_SHA512, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx-amd64.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-avx2-bmi2-amd64.lo"
+      ;;
+      i?86-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ssse3-i386.lo"
+      ;;
+      arm*-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-arm.lo"
+      ;;
+      powerpc64le-*-*)
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+      ;;
+      powerpc64-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+      ;;
+      powerpc-*-*)
+         # Big-Endian.
+         # Build with the crypto extension implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-ppc.lo"
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha512-armv7-neon.lo"
+   fi
+fi
+
+LIST_MEMBER(sha3, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS keccak.lo"
+   AC_DEFINE(USE_SHA3, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         :
+      ;;
+   esac
+
+   if test x"$neonsupport" = xyes ; then
+     # Build with the NEON implementation
+     GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS keccak-armv7-neon.lo"
+   fi
+fi
+
+LIST_MEMBER(tiger, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS tiger.lo"
+   AC_DEFINE(USE_TIGER, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(whirlpool, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS whirlpool.lo"
+   AC_DEFINE(USE_WHIRLPOOL, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS whirlpool-sse2-amd64.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(blake2, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS blake2.lo"
+   AC_DEFINE(USE_BLAKE2, 1, [Defined if this module should be included])
+
+   case "${host}" in
+      x86_64-*-*)
+         # Build with the assembly implementation
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2b-amd64-avx2.lo"
+         GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS blake2s-amd64-avx.lo"
+      ;;
+   esac
+fi
+
+LIST_MEMBER(sm3, $enabled_digests)
+if test "$found" = "1" ; then
+   GCRYPT_DIGESTS="$GCRYPT_DIGESTS sm3.lo"
+   AC_DEFINE(USE_SM3, 1, [Defined if this module should be included])
+
+   case "${host}" in
+     x86_64-*-*)
+        # Build with the assembly implementation
+        GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-avx-bmi2-amd64.lo"
+     ;;
+     aarch64-*-*)
+        # Build with the assembly implementation
+        GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sm3-aarch64.lo"
+     ;;
+   esac
+fi
+
+# SHA-1 needs to be included always for example because it is used by
+# random-csprng.c.
+GCRYPT_DIGESTS="$GCRYPT_DIGESTS sha1.lo"
+AC_DEFINE(USE_SHA1, 1,   [Defined if this module should be included])
+
+case "${host}" in
+  x86_64-*-*)
+    # Build with the assembly implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-ssse3-amd64.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-amd64.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx-bmi2-amd64.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-avx2-bmi2-amd64.lo"
+  ;;
+  arm*-*-*)
+    # Build with the assembly implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv7-neon.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch32-ce.lo"
+  ;;
+  aarch64-*-*)
+    # Build with the assembly implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-armv8-aarch64-ce.lo"
+  ;;
+esac
+
+case "$mpi_cpu_arch" in
+  x86)
+    # Build with the SHAEXT implementation
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS sha1-intel-shaext.lo"
+  ;;
+esac
+
+# Arch specific GCM implementations
+case "${host}" in
+  i?86-*-* | x86_64-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-intel-pclmul.lo"
+  ;;
+  arm*-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv7-neon.lo"
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch32-ce.lo"
+  ;;
+  aarch64-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-armv8-aarch64-ce.lo"
+  ;;
+  powerpc64le-*-* | powerpc64-*-* | powerpc-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS cipher-gcm-ppc.lo"
+  ;;
+esac
+
+# Arch specific MAC implementations
+case "${host}" in
+  s390x-*-*)
+    GCRYPT_ASM_DIGESTS="$GCRYPT_ASM_DIGESTS poly1305-s390x.lo"
+  ;;
+esac
+
+LIST_MEMBER(scrypt, $enabled_kdfs)
+if test "$found" = "1" ; then
+   GCRYPT_KDFS="$GCRYPT_KDFS scrypt.lo"
+   AC_DEFINE(USE_SCRYPT, 1, [Defined if this module should be included])
+fi
+
+LIST_MEMBER(getentropy, $random_modules)
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndgetentropy.lo"
+   AC_DEFINE(USE_RNDGETENTROPY, 1, [Defined if the getentropy RNG should be 
used.])
+fi
+
+LIST_MEMBER(linux, $random_modules)
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndoldlinux.lo"
+   AC_DEFINE(USE_RNDOLDLINUX, 1, [Defined if the /dev/random RNG should be 
used.])
+fi
+
+LIST_MEMBER(unix, $random_modules)
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndunix.lo"
+   AC_DEFINE(USE_RNDUNIX, 1, [Defined if the default Unix RNG should be used.])
+fi
+
+LIST_MEMBER(egd, $random_modules)
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndegd.lo"
+   AC_DEFINE(USE_RNDEGD, 1, [Defined if the EGD based RNG should be used.])
+fi
+
+LIST_MEMBER(w32, $random_modules)
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32.lo"
+   AC_DEFINE(USE_RNDW32, 1,
+             [Defined if the Windows specific RNG should be used.])
+fi
+
+LIST_MEMBER(w32ce, $random_modules)
+if test "$found" = "1" ; then
+   GCRYPT_RANDOM="$GCRYPT_RANDOM rndw32ce.lo"
+   AC_DEFINE(USE_RNDW32CE, 1,
+             [Defined if the WindowsCE specific RNG should be used.])
+fi
+
+if test "$try_asm_modules" = yes ; then
+  # Build with assembly implementations
+  GCRYPT_CIPHERS="$GCRYPT_CIPHERS $GCRYPT_ASM_CIPHERS"
+  GCRYPT_DIGESTS="$GCRYPT_DIGESTS $GCRYPT_ASM_DIGESTS"
+fi
+
+AC_SUBST([GCRYPT_CIPHERS])
+AC_SUBST([GCRYPT_PUBKEY_CIPHERS])
+AC_SUBST([GCRYPT_DIGESTS])
+AC_SUBST([GCRYPT_KDFS])
+AC_SUBST([GCRYPT_RANDOM])
+
+AC_SUBST(LIBGCRYPT_CIPHERS, $enabled_ciphers)
+AC_SUBST(LIBGCRYPT_PUBKEY_CIPHERS, $enabled_pubkey_ciphers)
+AC_SUBST(LIBGCRYPT_DIGESTS, $enabled_digests)
+
+# For printing the configuration we need a colon separated list of
+# algorithm names.
+tmp=`echo "$enabled_ciphers" | tr ' ' : `
+AC_DEFINE_UNQUOTED(LIBGCRYPT_CIPHERS, "$tmp",
+                   [List of available cipher algorithms])
+tmp=`echo "$enabled_pubkey_ciphers" | tr ' ' : `
+AC_DEFINE_UNQUOTED(LIBGCRYPT_PUBKEY_CIPHERS, "$tmp",
+                   [List of available public key cipher algorithms])
+tmp=`echo "$enabled_digests" | tr ' ' : `
+AC_DEFINE_UNQUOTED(LIBGCRYPT_DIGESTS, "$tmp",
+                   [List of available digest algorithms])
+tmp=`echo "$enabled_kdfs" | tr ' ' : `
+AC_DEFINE_UNQUOTED(LIBGCRYPT_KDFS, "$tmp",
+                   [List of available KDF algorithms])
+
+
+#
+# Define conditional sources depending on the used hardware platform.
+# Note that all possible modules must also be listed in
+# src/Makefile.am (EXTRA_libgcrypt_la_SOURCES).
+#
+GCRYPT_HWF_MODULES=
+case "$mpi_cpu_arch" in
+     x86)
+        AC_DEFINE(HAVE_CPU_ARCH_X86, 1,   [Defined for the x86 platforms])
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-x86.lo"
+        ;;
+     alpha)
+        AC_DEFINE(HAVE_CPU_ARCH_ALPHA, 1, [Defined for Alpha platforms])
+        ;;
+     sparc)
+        AC_DEFINE(HAVE_CPU_ARCH_SPARC, 1, [Defined for SPARC platforms])
+        ;;
+     mips)
+        AC_DEFINE(HAVE_CPU_ARCH_MIPS, 1,  [Defined for MIPS platforms])
+        ;;
+     m68k)
+        AC_DEFINE(HAVE_CPU_ARCH_M68K, 1,  [Defined for M68k platforms])
+        ;;
+     ppc)
+        AC_DEFINE(HAVE_CPU_ARCH_PPC, 1,   [Defined for PPC platforms])
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-ppc.lo"
+        ;;
+     arm)
+        AC_DEFINE(HAVE_CPU_ARCH_ARM, 1,   [Defined for ARM platforms])
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo"
+        ;;
+     aarch64)
+        AC_DEFINE(HAVE_CPU_ARCH_ARM, 1,   [Defined for ARM AArch64 platforms])
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-arm.lo"
+        ;;
+     s390x)
+        AC_DEFINE(HAVE_CPU_ARCH_S390X, 1, [Defined for s390x/zSeries 
platforms])
+        GCRYPT_HWF_MODULES="libgcrypt_la-hwf-s390x.lo"
+        ;;
+esac
+AC_SUBST([GCRYPT_HWF_MODULES])
+
+
+#
+# Option to disable building of doc file
+#
+build_doc=yes
+AC_ARG_ENABLE([doc], AS_HELP_STRING([--disable-doc],
+                                    [do not build the documentation]),
+                     build_doc=$enableval, build_doc=yes)
+AM_CONDITIONAL([BUILD_DOC], [test "x$build_doc" != xno])
+
+
+#
+# Provide information about the build.
+#
+BUILD_REVISION="mym4_revision"
+AC_SUBST(BUILD_REVISION)
+AC_DEFINE_UNQUOTED(BUILD_REVISION, "$BUILD_REVISION",
+                   [GIT commit id revision used to build this package])
+
+changequote(,)dnl
+BUILD_VERSION=`echo "$PACKAGE_VERSION" | sed 's/\([0-9.]*\).*/\1./'`
+changequote([,])dnl
+BUILD_VERSION="${BUILD_VERSION}mym4_revision_dec"
+BUILD_FILEVERSION=`echo "${BUILD_VERSION}" | tr . ,`
+AC_SUBST(BUILD_VERSION)
+AC_SUBST(BUILD_FILEVERSION)
+
+AC_ARG_ENABLE([build-timestamp],
+  AS_HELP_STRING([--enable-build-timestamp],
+                 [set an explicit build timestamp for reproducibility.
+                  (default is the current time in ISO-8601 format)]),
+     [if test "$enableval" = "yes"; then
+        BUILD_TIMESTAMP=`date -u +%Y-%m-%dT%H:%M+0000 2>/dev/null || date`
+      else
+        BUILD_TIMESTAMP="$enableval"
+      fi],
+     [BUILD_TIMESTAMP="<none>"])
+AC_SUBST(BUILD_TIMESTAMP)
+AC_DEFINE_UNQUOTED(BUILD_TIMESTAMP, "$BUILD_TIMESTAMP",
+                   [The time this package was configured for a build])
+
+
+# And create the files.
+AC_CONFIG_FILES([
+Makefile
+m4/Makefile
+compat/Makefile
+mpi/Makefile
+cipher/Makefile
+random/Makefile
+doc/Makefile
+src/Makefile
+src/gcrypt.h
+src/libgcrypt-config
+src/libgcrypt.pc
+src/versioninfo.rc
+tests/Makefile
+])
+AC_CONFIG_FILES([tests/hashtest-256g], [chmod +x tests/hashtest-256g])
+AC_CONFIG_FILES([tests/basic-disable-all-hwf], [chmod +x 
tests/basic-disable-all-hwf])
+AC_OUTPUT
+
+
+detection_module="${GCRYPT_HWF_MODULES%.lo}"
+test -n "$detection_module" || detection_module="none"
+
+# Give some feedback
+GCRY_MSG_SHOW([],[])
+GCRY_MSG_SHOW([Libgcrypt],[v${VERSION} has been configured as follows:])
+GCRY_MSG_SHOW([],[])
+GCRY_MSG_SHOW([Platform:                 ],[$PRINTABLE_OS_NAME ($host)])
+GCRY_MSG_SHOW([Hardware detection module:],[$detection_module])
+GCRY_MSG_WRAP([Enabled cipher algorithms:],[$enabled_ciphers])
+GCRY_MSG_WRAP([Enabled digest algorithms:],[$enabled_digests])
+GCRY_MSG_WRAP([Enabled kdf algorithms:   ],[$enabled_kdfs])
+GCRY_MSG_WRAP([Enabled pubkey algorithms:],[$enabled_pubkey_ciphers])
+GCRY_MSG_SHOW([Random number generator:  ],[$random])
+GCRY_MSG_SHOW([Try using jitter entropy: ],[$jentsupport])
+GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities])
+GCRY_MSG_SHOW([FIPS module version:      ],[$fips_module_version])
+GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport])
+GCRY_MSG_SHOW([Try using AES-NI crypto:  ],[$aesnisupport])
+GCRY_MSG_SHOW([Try using Intel SHAEXT:   ],[$shaextsupport])
+GCRY_MSG_SHOW([Try using Intel PCLMUL:   ],[$pclmulsupport])
+GCRY_MSG_SHOW([Try using Intel SSE4.1:   ],[$sse41support])
+GCRY_MSG_SHOW([Try using DRNG (RDRAND):  ],[$drngsupport])
+GCRY_MSG_SHOW([Try using Intel AVX:      ],[$avxsupport])
+GCRY_MSG_SHOW([Try using Intel AVX2:     ],[$avx2support])
+GCRY_MSG_SHOW([Try using ARM NEON:       ],[$neonsupport])
+GCRY_MSG_SHOW([Try using ARMv8 crypto:   ],[$armcryptosupport])
+GCRY_MSG_SHOW([Try using PPC crypto:     ],[$ppccryptosupport])
+GCRY_MSG_SHOW([],[])
+
+if test "x${gpg_config_script_warn}" != x; then
+cat <<G10EOF
+        Mismatches between the target platform and the to
+        be used libraries have been been detected for:
+         ${gpg_config_script_warn}
+        Please check above for warning messages.
+
+G10EOF
+fi
+
+if test "$gcry_cv_gcc_attribute_aligned" != "yes" ; then
+cat <<G10EOF
+   Please note that your compiler does not support the GCC style
+   aligned attribute. Using this software may evoke bus errors.
+
+G10EOF
+fi
+
+if test -n "$gpl"; then
+  echo "Please note that you are building a version of Libgcrypt with"
+  echo "  $gpl"
+  echo "included.  These parts are licensed under the GPL and thus the"
+  echo "use of this library has to comply with the conditions of the GPL."
+  echo ""
+fi
diff --git a/grub-core/lib/libgcrypt/mkinstalldirs 
b/grub-core/lib/libgcrypt/mkinstalldirs
new file mode 100755
index 000000000..ef7e16fda
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mkinstalldirs
@@ -0,0 +1,161 @@
+#! /bin/sh
+# mkinstalldirs --- make directory hierarchy
+
+scriptversion=2006-05-11.19
+
+# Original author: Noah Friedman <friedman@prep.ai.mit.edu>
+# Created: 1993-05-16
+# Public domain.
+#
+# This file is maintained in Automake, please report
+# bugs to <bug-automake@gnu.org> or send patches to
+# <automake-patches@gnu.org>.
+
+nl='
+'
+IFS=" ""       $nl"
+errstatus=0
+dirmode=
+
+usage="\
+Usage: mkinstalldirs [-h] [--help] [--version] [-m MODE] DIR ...
+
+Create each directory DIR (with mode MODE, if specified), including all
+leading file name components.
+
+Report bugs to <bug-automake@gnu.org>."
+
+# process command line arguments
+while test $# -gt 0 ; do
+  case $1 in
+    -h | --help | --h*)         # -h for help
+      echo "$usage"
+      exit $?
+      ;;
+    -m)                         # -m PERM arg
+      shift
+      test $# -eq 0 && { echo "$usage" 1>&2; exit 1; }
+      dirmode=$1
+      shift
+      ;;
+    --version)
+      echo "$0 $scriptversion"
+      exit $?
+      ;;
+    --)                         # stop option processing
+      shift
+      break
+      ;;
+    -*)                         # unknown option
+      echo "$usage" 1>&2
+      exit 1
+      ;;
+    *)                          # first non-opt arg
+      break
+      ;;
+  esac
+done
+
+for file
+do
+  if test -d "$file"; then
+    shift
+  else
+    break
+  fi
+done
+
+case $# in
+  0) exit 0 ;;
+esac
+
+# Solaris 8's mkdir -p isn't thread-safe.  If you mkdir -p a/b and
+# mkdir -p a/c at the same time, both will detect that a is missing,
+# one will create a, then the other will try to create a and die with
+# a "File exists" error.  This is a problem when calling mkinstalldirs
+# from a parallel make.  We use --version in the probe to restrict
+# ourselves to GNU mkdir, which is thread-safe.
+case $dirmode in
+  '')
+    if mkdir -p --version . >/dev/null 2>&1 && test ! -d ./--version; then
+      echo "mkdir -p -- $*"
+      exec mkdir -p -- "$@"
+    else
+      # On NextStep and OpenStep, the `mkdir' command does not
+      # recognize any option.  It will interpret all options as
+      # directories to create, and then abort because `.' already
+      # exists.
+      test -d ./-p && rmdir ./-p
+      test -d ./--version && rmdir ./--version
+    fi
+    ;;
+  *)
+    if mkdir -m "$dirmode" -p --version . >/dev/null 2>&1 &&
+       test ! -d ./--version; then
+      echo "mkdir -m $dirmode -p -- $*"
+      exec mkdir -m "$dirmode" -p -- "$@"
+    else
+      # Clean up after NextStep and OpenStep mkdir.
+      for d in ./-m ./-p ./--version "./$dirmode";
+      do
+        test -d $d && rmdir $d
+      done
+    fi
+    ;;
+esac
+
+for file
+do
+  case $file in
+    /*) pathcomp=/ ;;
+    *)  pathcomp= ;;
+  esac
+  oIFS=$IFS
+  IFS=/
+  set fnord $file
+  shift
+  IFS=$oIFS
+
+  for d
+  do
+    test "x$d" = x && continue
+
+    pathcomp=$pathcomp$d
+    case $pathcomp in
+      -*) pathcomp=./$pathcomp ;;
+    esac
+
+    if test ! -d "$pathcomp"; then
+      echo "mkdir $pathcomp"
+
+      mkdir "$pathcomp" || lasterr=$?
+
+      if test ! -d "$pathcomp"; then
+       errstatus=$lasterr
+      else
+       if test ! -z "$dirmode"; then
+         echo "chmod $dirmode $pathcomp"
+         lasterr=
+         chmod "$dirmode" "$pathcomp" || lasterr=$?
+
+         if test ! -z "$lasterr"; then
+           errstatus=$lasterr
+         fi
+       fi
+      fi
+    fi
+
+    pathcomp=$pathcomp/
+  done
+done
+
+exit $errstatus
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
diff --git a/grub-core/lib/libgcrypt/mpi/ChangeLog-2011 
b/grub-core/lib/libgcrypt/mpi/ChangeLog-2011
index 930717141..1e0787218 100644
--- a/grub-core/lib/libgcrypt/mpi/ChangeLog-2011
+++ b/grub-core/lib/libgcrypt/mpi/ChangeLog-2011
@@ -1,9 +1,14 @@
 2011-12-01  Werner Koch  <wk@g10code.com>
 
-        NB: ChangeLog files are no longer manually maintained.  Starting
-        on December 1st, 2011 we put change information only in the GIT
-        commit log, and generate a top-level ChangeLog file from logs at
-        "make dist".  See doc/HACKING for details.
+       NB: ChangeLog files are no longer manually maintained.  Starting
+       on December 1st, 2011 we put change information only in the GIT
+       commit log, and generate a top-level ChangeLog file from logs at
+       "make dist".  See doc/HACKING for details.
+
+2011-07-04  Werner Koch  <wk@g10code.com>
+
+       * longlong.h (add_ssaaaa) [__arm__]: Do no use asm if thumb code
+       generation is enabled.  This is bug#1202.  Reported for gpg 1.4.
 
 2011-03-28  Werner Koch  <wk@g10code.com>
 
@@ -820,3 +825,7 @@ Mon Feb 16 13:00:27 1998  Werner Koch  
(wk@isil.d.shuttle.de)
  This file is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Local Variables:
+buffer-read-only: t
+End:
diff --git a/grub-core/lib/libgcrypt/mpi/Makefile.am 
b/grub-core/lib/libgcrypt/mpi/Makefile.am
index e900539a2..e1362c888 100644
--- a/grub-core/lib/libgcrypt/mpi/Makefile.am
+++ b/grub-core/lib/libgcrypt/mpi/Makefile.am
@@ -29,7 +29,7 @@ AM_CFLAGS = $(GPG_ERROR_CFLAGS)
 AM_ASFLAGS = $(MPI_SFLAGS)
 AM_CCASFLAGS = $(NOEXECSTACK_FLAGS)
 
-EXTRA_DIST = Manifest config.links
+EXTRA_DIST = config.links
 DISTCLEANFILES = mpi-asm-defs.h \
                  mpih-add1-asm.S mpih-mul1-asm.S mpih-mul2-asm.S 
mpih-mul3-asm.S  \
                 mpih-lshift-asm.S mpih-rshift-asm.S mpih-sub1-asm.S 
asm-syntax.h \
@@ -173,5 +173,8 @@ libmpi_la_SOURCES = longlong.h         \
              mpicoder.c     \
              mpih-div.c     \
              mpih-mul.c     \
-             mpiutil.c      \
-              ec.c
+             mpih-const-time.c \
+             mpiutil.c         \
+              ec.c ec-internal.h ec-ed25519.c ec-nist.c ec-inline.h \
+              ec-hw-s390x.c
+EXTRA_libmpi_la_SOURCES = asm-common-aarch64.h asm-common-amd64.h
diff --git a/grub-core/lib/libgcrypt/mpi/Manifest 
b/grub-core/lib/libgcrypt/mpi/Manifest
deleted file mode 100644
index 3b0d6733d..000000000
--- a/grub-core/lib/libgcrypt/mpi/Manifest
+++ /dev/null
@@ -1,41 +0,0 @@
-# Manifest - checksums of the mpi directory
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-Makefile.am
-config.links
-longlong.h
-mpi-add.c
-mpi-bit.c
-mpi-cmp.c
-mpi-div.c
-mpi-gcd.c
-mpi-inline.c
-mpi-inline.h
-mpi-internal.h
-mpi-inv.c
-mpi-mpow.c
-mpi-mul.c
-mpi-pow.c
-mpi-scan.c
-mpicoder.c
-mpih-div.c
-mpih-mul.c
-mpiutil.c
-$names$ 
iQCVAwUAP+LmfDEAnp832S/7AQKZJQQAkR/gQITUM+6Ygy9WAOAO17btyKAlCtGTXp5XSZ+J3X0o/rYneRdSCW89IJvwFRJjAOcFJd52MXs6ZVFF/RQBC8MvJzuQChbEzvihK8o2VgK34YWjU+6XH9sFgRMIgzkHs/51ZZxeQUOPy1XF7TyKB0WE7YBUVisFiRaqB1qGIOs==Z3qB
-
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/distfiles 
b/grub-core/lib/libgcrypt/mpi/aarch64/distfiles
similarity index 80%
rename from grub-core/lib/libgcrypt/mpi/pentium4/sse2/distfiles
rename to grub-core/lib/libgcrypt/mpi/aarch64/distfiles
index 7252cd7e3..1327bd4a7 100644
--- a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/aarch64/distfiles
@@ -3,3 +3,4 @@ mpih-mul1.S
 mpih-mul2.S
 mpih-mul3.S
 mpih-sub1.S
+mpi-asm-defs.h
diff --git a/grub-core/lib/libgcrypt/mpi/aarch64/mpi-asm-defs.h 
b/grub-core/lib/libgcrypt/mpi/aarch64/mpi-asm-defs.h
new file mode 100644
index 000000000..65190653b
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/aarch64/mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG_LONG)
diff --git a/grub-core/lib/libgcrypt/mpi/aarch64/mpih-add1.S 
b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-add1.S
new file mode 100644
index 000000000..24859b179
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-add1.S
@@ -0,0 +1,74 @@
+/* ARM64 add_n -- Add two limb vectors of the same length > 0 and store
+ *                sum in a third limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+#include "asm-common-aarch64.h"
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,       x0
+ *                mpi_ptr_t s1_ptr,            x1
+ *                mpi_ptr_t s2_ptr,            x2
+ *                mpi_size_t size)             w3
+ */
+
+.text
+
+.globl C_SYMBOL_NAME(_gcry_mpih_add_n)
+ELF(.type  C_SYMBOL_NAME(_gcry_mpih_add_n),%function)
+C_SYMBOL_NAME(_gcry_mpih_add_n):
+       CFI_STARTPROC()
+       and     w5, w3, #3;
+       adds    xzr, xzr, xzr; /* clear carry flag */
+
+       cbz     w5, .Large_loop;
+
+.Loop:
+       ldr     x4, [x1], #8;
+       sub     w3, w3, #1;
+       ldr     x11, [x2], #8;
+       and     w5, w3, #3;
+       adcs    x4, x4, x11;
+       str     x4, [x0], #8;
+       cbz     w3, .Lend;
+       cbnz    w5, .Loop;
+
+.Large_loop:
+       ldp     x4, x6, [x1], #16;
+       ldp     x5, x7, [x2], #16;
+       ldp     x8, x10, [x1], #16;
+       ldp     x9, x11, [x2], #16;
+       sub     w3, w3, #4;
+       adcs    x4, x4, x5;
+       adcs    x6, x6, x7;
+       adcs    x8, x8, x9;
+       adcs    x10, x10, x11;
+       stp     x4, x6, [x0], #16;
+       stp     x8, x10, [x0], #16;
+       cbnz    w3, .Large_loop;
+
+.Lend:
+       adc     x0, xzr, xzr;
+       ret_spec_stop;
+       CFI_ENDPROC()
+ELF(.size C_SYMBOL_NAME(_gcry_mpih_add_n),.-C_SYMBOL_NAME(_gcry_mpih_add_n);)
diff --git a/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul1.S 
b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul1.S
new file mode 100644
index 000000000..f34c13c57
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul1.S
@@ -0,0 +1,99 @@
+/* ARM64 mul_1 -- Multiply a limb vector with a limb and store the result in
+ *                a second limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+#include "asm-common-aarch64.h"
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,                x0
+ *               mpi_ptr_t s1_ptr,             x1
+ *               mpi_size_t s1_size,           w2
+ *               mpi_limb_t s2_limb)           x3
+ */
+
+.text
+
+.globl C_SYMBOL_NAME(_gcry_mpih_mul_1)
+ELF(.type  C_SYMBOL_NAME(_gcry_mpih_mul_1),%function)
+C_SYMBOL_NAME(_gcry_mpih_mul_1):
+       CFI_STARTPROC()
+       and     w5, w2, #3;
+       mov     x4, xzr;
+
+       cbz     w5, .Large_loop;
+
+.Loop:
+       ldr     x5, [x1], #8;
+       sub     w2, w2, #1;
+       mul     x9, x5, x3;
+       umulh   x10, x5, x3;
+       and     w5, w2, #3;
+       adds    x4, x4, x9;
+       str     x4, [x0], #8;
+       adc     x4, x10, xzr;
+
+       cbz     w2, .Lend;
+       cbnz    w5, .Loop;
+
+.Large_loop:
+       ldp     x5, x6, [x1];
+       sub     w2, w2, #4;
+
+       mul     x9, x5, x3;
+       ldp     x7, x8, [x1, #16];
+       umulh   x10, x5, x3;
+       add     x1, x1, #32;
+
+       adds    x4, x4, x9;
+       str     x4, [x0], #8;
+       mul     x11, x6, x3;
+       adc     x4, x10, xzr;
+
+       umulh   x12, x6, x3;
+
+       adds    x4, x4, x11;
+       str     x4, [x0], #8;
+       mul     x13, x7, x3;
+       adc     x4, x12, xzr;
+
+       umulh   x14, x7, x3;
+
+       adds    x4, x4, x13;
+       str     x4, [x0], #8;
+       mul     x15, x8, x3;
+       adc     x4, x14, xzr;
+
+       umulh   x16, x8, x3;
+
+       adds    x4, x4, x15;
+       str     x4, [x0], #8;
+       adc     x4, x16, xzr;
+
+       cbnz    w2, .Large_loop;
+
+.Lend:
+       mov     x0, x4;
+       ret_spec_stop;
+       CFI_ENDPROC()
+ELF(.size C_SYMBOL_NAME(_gcry_mpih_mul_1),.-C_SYMBOL_NAME(_gcry_mpih_mul_1);)
diff --git a/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul2.S 
b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul2.S
new file mode 100644
index 000000000..1880999d4
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul2.S
@@ -0,0 +1,111 @@
+/* ARM64 mul_2 -- Multiply a limb vector with a limb and add the result to
+ *                a second limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+#include "asm-common-aarch64.h"
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,     x0
+ *                  mpi_ptr_t s1_ptr,          x1
+ *                  mpi_size_t s1_size,        w2
+ *                  mpi_limb_t s2_limb)        x3
+ */
+
+.text
+
+.globl C_SYMBOL_NAME(_gcry_mpih_addmul_1)
+ELF(.type  C_SYMBOL_NAME(_gcry_mpih_addmul_1),%function)
+C_SYMBOL_NAME(_gcry_mpih_addmul_1):
+       CFI_STARTPROC()
+       and     w5, w2, #3;
+       mov     x6, xzr;
+       mov     x7, xzr;
+
+       cbz     w5, .Large_loop;
+
+.Loop:
+       ldr     x5, [x1], #8;
+
+       mul     x12, x5, x3;
+       ldr     x4, [x0];
+       umulh   x13, x5, x3;
+       sub     w2, w2, #1;
+
+       adds    x12, x12, x4;
+       and     w5, w2, #3;
+       adc     x13, x13, x7;
+       adds    x12, x12, x6;
+       str     x12, [x0], #8;
+       adc     x6, x7, x13;
+
+       cbz     w2, .Lend;
+       cbnz    w5, .Loop;
+
+.Large_loop:
+       ldp     x5, x9, [x1], #16;
+       sub     w2, w2, #4;
+       ldp     x4, x8, [x0];
+
+       mul     x12, x5, x3;
+       umulh   x13, x5, x3;
+
+       adds    x12, x12, x4;
+       mul     x14, x9, x3;
+       adc     x13, x13, x7;
+       adds    x12, x12, x6;
+       umulh   x15, x9, x3;
+       str     x12, [x0], #8;
+       adc     x6, x7, x13;
+
+       adds    x14, x14, x8;
+       ldp     x5, x9, [x1], #16;
+       adc     x15, x15, x7;
+       adds    x14, x14, x6;
+       mul     x12, x5, x3;
+       str     x14, [x0], #8;
+       ldp     x4, x8, [x0];
+       umulh   x13, x5, x3;
+       adc     x6, x7, x15;
+
+       adds    x12, x12, x4;
+       mul     x14, x9, x3;
+       adc     x13, x13, x7;
+       adds    x12, x12, x6;
+       umulh   x15, x9, x3;
+       str     x12, [x0], #8;
+       adc     x6, x7, x13;
+
+       adds    x14, x14, x8;
+       adc     x15, x15, x7;
+       adds    x14, x14, x6;
+       str     x14, [x0], #8;
+       adc     x6, x7, x15;
+
+       cbnz    w2, .Large_loop;
+
+.Lend:
+       mov     x0, x6;
+       ret_spec_stop;
+       CFI_ENDPROC()
+ELF(.size 
C_SYMBOL_NAME(_gcry_mpih_addmul_1),.-C_SYMBOL_NAME(_gcry_mpih_addmul_1);)
diff --git a/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul3.S 
b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul3.S
new file mode 100644
index 000000000..e5faeddcb
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-mul3.S
@@ -0,0 +1,124 @@
+/* ARM mul_3 -- Multiply a limb vector with a limb and subtract the result
+ *              from a second limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+#include "asm-common-aarch64.h"
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,     x0
+ *                  mpi_ptr_t s1_ptr,          x1
+ *                  mpi_size_t s1_size,        w2
+ *                  mpi_limb_t s2_limb)        x3
+ */
+
+.text
+
+.globl C_SYMBOL_NAME(_gcry_mpih_submul_1)
+ELF(.type  C_SYMBOL_NAME(_gcry_mpih_submul_1),%function)
+C_SYMBOL_NAME(_gcry_mpih_submul_1):
+       CFI_STARTPROC()
+       and     w5, w2, #3;
+       mov     x7, xzr;
+       cbz     w5, .Large_loop;
+
+       subs    xzr, xzr, xzr;
+
+.Loop:
+       ldr     x4, [x1], #8;
+       cinc    x7, x7, cc;
+       ldr     x5, [x0];
+       sub     w2, w2, #1;
+
+       mul     x6, x4, x3;
+       subs    x5, x5, x7;
+       umulh   x4, x4, x3;
+       and     w10, w2, #3;
+
+       cset    x7, cc;
+       subs    x5, x5, x6;
+       add     x7, x7, x4;
+       str     x5, [x0], #8;
+
+       cbz     w2, .Loop_end;
+       cbnz    w10, .Loop;
+
+       cinc    x7, x7, cc;
+
+.Large_loop:
+       ldp     x4, x8, [x1], #16;
+       sub     w2, w2, #4;
+       ldp     x5, x9, [x0];
+
+       mul     x6, x4, x3;
+       subs    x5, x5, x7;
+       umulh   x4, x4, x3;
+
+       cset    x7, cc;
+       subs    x5, x5, x6;
+       mul     x6, x8, x3;
+       add     x7, x7, x4;
+       str     x5, [x0], #8;
+       cinc    x7, x7, cc;
+
+       umulh   x8, x8, x3;
+
+       subs    x9, x9, x7;
+       cset    x7, cc;
+       subs    x9, x9, x6;
+       ldp     x4, x10, [x1], #16;
+       str     x9, [x0], #8;
+       add     x7, x7, x8;
+       ldp     x5, x9, [x0];
+       cinc    x7, x7, cc;
+
+       mul     x6, x4, x3;
+       subs    x5, x5, x7;
+       umulh   x4, x4, x3;
+
+       cset    x7, cc;
+       subs    x5, x5, x6;
+       mul     x6, x10, x3;
+       add     x7, x7, x4;
+       str     x5, [x0], #8;
+       cinc    x7, x7, cc;
+
+       umulh   x10, x10, x3;
+
+       subs    x9, x9, x7;
+       cset    x7, cc;
+       subs    x9, x9, x6;
+       add     x7, x7, x10;
+       str     x9, [x0], #8;
+       cinc    x7, x7, cc;
+
+       cbnz    w2, .Large_loop;
+
+       mov     x0, x7;
+       ret_spec_stop;
+
+.Loop_end:
+       cinc    x0, x7, cc;
+       ret_spec_stop;
+       CFI_ENDPROC()
+ELF(.size 
C_SYMBOL_NAME(_gcry_mpih_submul_1),.-C_SYMBOL_NAME(_gcry_mpih_submul_1);)
diff --git a/grub-core/lib/libgcrypt/mpi/aarch64/mpih-sub1.S 
b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-sub1.S
new file mode 100644
index 000000000..469082863
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/aarch64/mpih-sub1.S
@@ -0,0 +1,74 @@
+/* ARM64 sub_n -- Subtract two limb vectors of the same length > 0 and store
+ *                sum in a third limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+#include "asm-common-aarch64.h"
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,       x0
+ *                mpi_ptr_t s1_ptr,            x1
+ *                mpi_ptr_t s2_ptr,            x2
+ *                mpi_size_t size)             w3
+ */
+
+.text
+
+.globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
+ELF(.type  C_SYMBOL_NAME(_gcry_mpih_sub_n),%function)
+C_SYMBOL_NAME(_gcry_mpih_sub_n):
+       CFI_STARTPROC()
+       and     w5, w3, #3;
+       subs    xzr, xzr, xzr; /* prepare carry flag for sub */
+
+       cbz     w5, .Large_loop;
+
+.Loop:
+       ldr     x4, [x1], #8;
+       sub     w3, w3, #1;
+       ldr     x11, [x2], #8;
+       and     w5, w3, #3;
+       sbcs    x4, x4, x11;
+       str     x4, [x0], #8;
+       cbz     w3, .Lend;
+       cbnz    w5, .Loop;
+
+.Large_loop:
+       ldp     x4, x6, [x1], #16;
+       ldp     x5, x7, [x2], #16;
+       ldp     x8, x10, [x1], #16;
+       ldp     x9, x11, [x2], #16;
+       sub     w3, w3, #4;
+       sbcs    x4, x4, x5;
+       sbcs    x6, x6, x7;
+       sbcs    x8, x8, x9;
+       sbcs    x10, x10, x11;
+       stp     x4, x6, [x0], #16;
+       stp     x8, x10, [x0], #16;
+       cbnz    w3, .Large_loop;
+
+.Lend:
+       cset    x0, cc;
+       ret_spec_stop;
+       CFI_ENDPROC()
+ELF(.size C_SYMBOL_NAME(_gcry_mpih_sub_n),.-C_SYMBOL_NAME(_gcry_mpih_sub_n);)
diff --git a/grub-core/lib/libgcrypt/mpi/alpha/README 
b/grub-core/lib/libgcrypt/mpi/alpha/README
index 55c0a2917..00addfd39 100644
--- a/grub-core/lib/libgcrypt/mpi/alpha/README
+++ b/grub-core/lib/libgcrypt/mpi/alpha/README
@@ -5,7 +5,7 @@ RELEVANT OPTIMIZATION ISSUES
 EV4
 
 1. This chip has very limited store bandwidth.  The on-chip L1 cache is
-write-through, and a cache line is transfered from the store buffer to the
+write-through, and a cache line is transferred from the store buffer to the
 off-chip L2 in as much 15 cycles on most systems.  This delay hurts
 mpn_add_n, mpn_sub_n, mpn_lshift, and mpn_rshift.
 
@@ -20,7 +20,7 @@ EV5
 
 1. The memory bandwidth of this chip seems excellent, both for loads and
 stores.  Even when the working set is larger than the on-chip L1 and L2
-caches, the perfromance remain almost unaffected.
+caches, the performance remain almost unaffected.
 
 2. mulq has a measured latency of 13 cycles and an issue rate of 1 each 8th
 cycle.  umulh has a measured latency of 15 cycles and an issue rate of 1
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/distfiles 
b/grub-core/lib/libgcrypt/mpi/amd64/distfiles
index 634e36fb0..44aad5f82 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/amd64/distfiles
@@ -1,3 +1,4 @@
+func_abi.h
 mpih-add1.S
 mpih-lshift.S
 mpih-mul1.S
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/func_abi.h 
b/grub-core/lib/libgcrypt/mpi/amd64/func_abi.h
new file mode 100644
index 000000000..c3f2d026c
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/amd64/func_abi.h
@@ -0,0 +1,34 @@
+#include <config.h>
+
+#include "asm-common-amd64.h"
+
+#ifdef USE_MS_ABI
+ /* Store registers and move four first input arguments from MS ABI to
+  * SYSV ABI.  */
+ #define FUNC_ENTRY() \
+       CFI_STARTPROC(); \
+       pushq %rsi; \
+       CFI_PUSH(%rsi); \
+       pushq %rdi; \
+       CFI_PUSH(%rdi); \
+       movq %rdx, %rsi; \
+       movq %rcx, %rdi; \
+       movq %r8, %rdx; \
+       movq %r9, %rcx;
+
+ /* Restore registers.  */
+ #define FUNC_EXIT() \
+       popq %rdi; \
+       CFI_POP(%rdi); \
+       popq %rsi; \
+       CFI_POP(%rsi); \
+       ret_spec_stop; \
+       CFI_ENDPROC();
+#else
+ #define FUNC_ENTRY() \
+       CFI_STARTPROC();
+
+ #define FUNC_EXIT() \
+       ret_spec_stop; \
+       CFI_ENDPROC();
+#endif
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpi-asm-defs.h 
b/grub-core/lib/libgcrypt/mpi/amd64/mpi-asm-defs.h
index 17de1c190..65190653b 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpi-asm-defs.h
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpi-asm-defs.h
@@ -1,4 +1,4 @@
 /* This file defines some basic constants for the MPI machinery.  We
  * need to define the types on a per-CPU basis, so it is done with
  * this file here.  */
-#define BYTES_PER_MPI_LIMB  8
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG_LONG)
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpih-add1.S 
b/grub-core/lib/libgcrypt/mpi/amd64/mpih-add1.S
index f0ec89cc6..39c00c524 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpih-add1.S
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpih-add1.S
@@ -43,6 +43,7 @@
 .text
        .globl C_SYMBOL_NAME(_gcry_mpih_add_n)
 C_SYMBOL_NAME(_gcry_mpih_add_n:)
+       FUNC_ENTRY()
        leaq    (%rsi,%rcx,8), %rsi
        leaq    (%rdi,%rcx,8), %rdi
        leaq    (%rdx,%rcx,8), %rdx
@@ -59,5 +60,4 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:)
 
        movq    %rcx, %rax              /* zero %rax */
        adcq    %rax, %rax
-       ret
-       
\ No newline at end of file
+       FUNC_EXIT()
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpih-lshift.S 
b/grub-core/lib/libgcrypt/mpi/amd64/mpih-lshift.S
index e87dd1a99..a9c7d7e1e 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpih-lshift.S
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpih-lshift.S
@@ -42,36 +42,37 @@
 .text
        .globl C_SYMBOL_NAME(_gcry_mpih_lshift)
 C_SYMBOL_NAME(_gcry_mpih_lshift:)
-       movq    -8(%rsi,%rdx,8), %mm7
-       movd    %ecx, %mm1
+       FUNC_ENTRY()
+       /* Note: %xmm6 and %xmm7 not used for WIN64 ABI compatibility. */
+       movq    -8(%rsi,%rdx,8), %xmm4
+       movd    %ecx, %xmm1
        movl    $64, %eax
        subl    %ecx, %eax
-       movd    %eax, %mm0
-       movq    %mm7, %mm3
-       psrlq   %mm0, %mm7
-       movd    %mm7, %rax
+       movd    %eax, %xmm0
+       movdqa  %xmm4, %xmm3
+       psrlq   %xmm0, %xmm4
+       movd    %xmm4, %rax
        subq    $2, %rdx
        jl      .Lendo
 
        ALIGN(4)                        /* minimal alignment for claimed speed 
*/
-.Loop: movq    (%rsi,%rdx,8), %mm6
-       movq    %mm6, %mm2
-       psrlq   %mm0, %mm6
-       psllq   %mm1, %mm3
-       por     %mm6, %mm3
-       movq    %mm3, 8(%rdi,%rdx,8)
+.Loop: movq    (%rsi,%rdx,8), %xmm5
+       movdqa  %xmm5, %xmm2
+       psrlq   %xmm0, %xmm5
+       psllq   %xmm1, %xmm3
+       por     %xmm5, %xmm3
+       movq    %xmm3, 8(%rdi,%rdx,8)
        je      .Lende
-       movq    -8(%rsi,%rdx,8), %mm7
-       movq    %mm7, %mm3
-       psrlq   %mm0, %mm7
-       psllq   %mm1, %mm2
-       por     %mm7, %mm2
-       movq    %mm2, (%rdi,%rdx,8)
+       movq    -8(%rsi,%rdx,8), %xmm4
+       movdqa  %xmm4, %xmm3
+       psrlq   %xmm0, %xmm4
+       psllq   %xmm1, %xmm2
+       por     %xmm4, %xmm2
+       movq    %xmm2, (%rdi,%rdx,8)
        subq    $2, %rdx
        jge     .Loop
 
-.Lendo:        movq    %mm3, %mm2
-.Lende:        psllq   %mm1, %mm2
-       movq    %mm2, (%rdi)
-       emms
-       ret
+.Lendo:        movdqa  %xmm3, %xmm2
+.Lende:        psllq   %xmm1, %xmm2
+       movq    %xmm2, (%rdi)
+       FUNC_EXIT()
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul1.S 
b/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul1.S
index 54b0ab489..dacb9d870 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul1.S
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul1.S
@@ -46,6 +46,7 @@
        GLOBL   C_SYMBOL_NAME(_gcry_mpih_mul_1)
 C_SYMBOL_NAME(_gcry_mpih_mul_1:)
 
+       FUNC_ENTRY()
        movq    %rdx, %r11
        leaq    (%rsi,%rdx,8), %rsi
        leaq    (%rdi,%rdx,8), %rdi
@@ -62,4 +63,4 @@ C_SYMBOL_NAME(_gcry_mpih_mul_1:)
        jne     .Loop
 
        movq    %r8, %rax
-       ret
+       FUNC_EXIT()
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul2.S 
b/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul2.S
index 1180f7602..07913586d 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul2.S
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul2.S
@@ -31,49 +31,6 @@
 #include "sysdep.h"
 #include "asm-syntax.h"
 
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_addmul_2( mpi_ptr_t res_ptr,      (sp + 4)
- *                  mpi_ptr_t s1_ptr,       (sp + 8)
- *                  mpi_size_t s1_size,     (sp + 12)
- *                  mpi_limb_t s2_limb)     (sp + 16)
- */
-
-       /* i80386 addmul_1 -- Multiply a limb vector with a limb and add
- *                   the result to a second limb vector.
- *
- *      Copyright (C) 1992, 1994, 1998,
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
 /*******************
  * mpi_limb_t
  * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,   (rdi)
@@ -84,6 +41,7 @@
        TEXT
        GLOBL   C_SYMBOL_NAME(_gcry_mpih_addmul_1)
 C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
+       FUNC_ENTRY()
        movq    %rdx, %r11
        leaq    (%rsi,%rdx,8), %rsi
        leaq    (%rdi,%rdx,8), %rdi
@@ -104,4 +62,4 @@ C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
        jne     .Loop
 
        movq    %r8, %rax
-       ret
+       FUNC_EXIT()
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul3.S 
b/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul3.S
index 4d458a794..f8889eb2a 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul3.S
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpih-mul3.S
@@ -42,7 +42,7 @@
        TEXT
        GLOBL   C_SYMBOL_NAME(_gcry_mpih_submul_1)
 C_SYMBOL_NAME(_gcry_mpih_submul_1:)
-
+       FUNC_ENTRY()
        movq    %rdx, %r11
        leaq    (%rsi,%r11,8), %rsi
        leaq    (%rdi,%r11,8), %rdi
@@ -63,4 +63,4 @@ C_SYMBOL_NAME(_gcry_mpih_submul_1:)
        jne     .Loop
 
        movq    %r8, %rax
-       ret
+       FUNC_EXIT()
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpih-rshift.S 
b/grub-core/lib/libgcrypt/mpi/amd64/mpih-rshift.S
index 4cfc8f602..8ecf155f5 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpih-rshift.S
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpih-rshift.S
@@ -42,39 +42,40 @@
 .text
        .globl C_SYMBOL_NAME(_gcry_mpih_rshift)
 C_SYMBOL_NAME(_gcry_mpih_rshift:)
-       movq    (%rsi), %mm7
-       movd    %ecx, %mm1
+       FUNC_ENTRY()
+       /* Note: %xmm6 and %xmm7 not used for WIN64 ABI compatibility. */
+       movq    (%rsi), %xmm4
+       movd    %ecx, %xmm1
        movl    $64, %eax
        subl    %ecx, %eax
-       movd    %eax, %mm0
-       movq    %mm7, %mm3
-       psllq   %mm0, %mm7
-       movd    %mm7, %rax
+       movd    %eax, %xmm0
+       movdqa  %xmm4, %xmm3
+       psllq   %xmm0, %xmm4
+       movd    %xmm4, %rax
        leaq    (%rsi,%rdx,8), %rsi
        leaq    (%rdi,%rdx,8), %rdi
        negq    %rdx
        addq    $2, %rdx
        jg      .Lendo
 
-       ALIGN(8)                        /* minimal alignment for claimed speed 
*/
-.Loop: movq    -8(%rsi,%rdx,8), %mm6
-       movq    %mm6, %mm2
-       psllq   %mm0, %mm6
-       psrlq   %mm1, %mm3
-       por     %mm6, %mm3
-       movq    %mm3, -16(%rdi,%rdx,8)
+       ALIGN(4)                        /* minimal alignment for claimed speed 
*/
+.Loop: movq    -8(%rsi,%rdx,8), %xmm5
+       movdqa  %xmm5, %xmm2
+       psllq   %xmm0, %xmm5
+       psrlq   %xmm1, %xmm3
+       por     %xmm5, %xmm3
+       movq    %xmm3, -16(%rdi,%rdx,8)
        je      .Lende
-       movq    (%rsi,%rdx,8), %mm7
-       movq    %mm7, %mm3
-       psllq   %mm0, %mm7
-       psrlq   %mm1, %mm2
-       por     %mm7, %mm2
-       movq    %mm2, -8(%rdi,%rdx,8)
+       movq    (%rsi,%rdx,8), %xmm4
+       movdqa  %xmm4, %xmm3
+       psllq   %xmm0, %xmm4
+       psrlq   %xmm1, %xmm2
+       por     %xmm4, %xmm2
+       movq    %xmm2, -8(%rdi,%rdx,8)
        addq    $2, %rdx
        jle     .Loop
 
-.Lendo:        movq    %mm3, %mm2
-.Lende:        psrlq   %mm1, %mm2
-       movq    %mm2, -8(%rdi)
-       emms
-       ret
+.Lendo:        movdqa  %xmm3, %xmm2
+.Lende:        psrlq   %xmm1, %xmm2
+       movq    %xmm2, -8(%rdi)
+       FUNC_EXIT()
diff --git a/grub-core/lib/libgcrypt/mpi/amd64/mpih-sub1.S 
b/grub-core/lib/libgcrypt/mpi/amd64/mpih-sub1.S
index b3609b024..d60b58a5b 100644
--- a/grub-core/lib/libgcrypt/mpi/amd64/mpih-sub1.S
+++ b/grub-core/lib/libgcrypt/mpi/amd64/mpih-sub1.S
@@ -42,6 +42,7 @@
 .text
        .globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
 C_SYMBOL_NAME(_gcry_mpih_sub_n:)
+       FUNC_ENTRY()
        leaq    (%rsi,%rcx,8), %rsi
        leaq    (%rdi,%rcx,8), %rdi
        leaq    (%rdx,%rcx,8), %rdx
@@ -58,4 +59,4 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:)
 
        movq    %rcx, %rax              /* zero %rax */
        adcq    %rax, %rax
-       ret
+       FUNC_EXIT()
diff --git a/grub-core/lib/libgcrypt/mpi/i586/distfiles 
b/grub-core/lib/libgcrypt/mpi/arm/distfiles
similarity index 57%
rename from grub-core/lib/libgcrypt/mpi/i586/distfiles
rename to grub-core/lib/libgcrypt/mpi/arm/distfiles
index 546f77768..27a2ca527 100644
--- a/grub-core/lib/libgcrypt/mpi/i586/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/arm/distfiles
@@ -1,10 +1,6 @@
-Manifest
+mpi-asm-defs.h
 mpih-add1.S
 mpih-mul1.S
 mpih-mul2.S
 mpih-mul3.S
-mpih-lshift.S
-mpih-rshift.S
 mpih-sub1.S
-README
-
diff --git a/grub-core/lib/libgcrypt/mpi/arm/mpi-asm-defs.h 
b/grub-core/lib/libgcrypt/mpi/arm/mpi-asm-defs.h
new file mode 100644
index 000000000..047d1f5a7
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/arm/mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
diff --git a/grub-core/lib/libgcrypt/mpi/arm/mpih-add1.S 
b/grub-core/lib/libgcrypt/mpi/arm/mpih-add1.S
new file mode 100644
index 000000000..09e8b3b2b
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/arm/mpih-add1.S
@@ -0,0 +1,76 @@
+/* ARM add_n -- Add two limb vectors of the same length > 0 and store
+ *              sum in a third limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,       %r0
+ *                mpi_ptr_t s1_ptr,            %r1
+ *                mpi_ptr_t s2_ptr,            %r2
+ *                mpi_size_t size)             %r3
+ */
+
+.text
+
+.globl _gcry_mpih_add_n
+.type  _gcry_mpih_add_n,%function
+_gcry_mpih_add_n:
+       push    {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+       cmn     %r0, #0; /* clear carry flag */
+
+       tst     %r3, #3;
+       beq     .Large_loop;
+
+.Loop:
+       ldr     %r4, [%r1], #4;
+       sub     %r3, #1;
+       ldr     %lr, [%r2], #4;
+       adcs    %r4, %lr;
+       tst     %r3, #3;
+       str     %r4, [%r0], #4;
+       bne     .Loop;
+
+       teq     %r3, #0;
+       beq     .Lend;
+
+.Large_loop:
+       ldm     %r1!, {%r4, %r6, %r8, %r10};
+       ldm     %r2!, {%r5, %r7, %r9, %lr};
+       sub     %r3, #4;
+       adcs    %r4, %r5;
+       adcs    %r6, %r7;
+       adcs    %r8, %r9;
+       adcs    %r10, %lr;
+       teq     %r3, #0;
+       stm     %r0!, {%r4, %r6, %r8, %r10};
+       bne     .Large_loop;
+
+.Lend:
+       adc     %r0, %r3, #0;
+       pop     {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_add_n,.-_gcry_mpih_add_n;
diff --git a/grub-core/lib/libgcrypt/mpi/arm/mpih-mul1.S 
b/grub-core/lib/libgcrypt/mpi/arm/mpih-mul1.S
new file mode 100644
index 000000000..c2e2854bf
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/arm/mpih-mul1.S
@@ -0,0 +1,80 @@
+/* ARM mul_1 -- Multiply a limb vector with a limb and store the result in
+ *              a second limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,                %r0
+ *               mpi_ptr_t s1_ptr,             %r1
+ *               mpi_size_t s1_size,           %r2
+ *               mpi_limb_t s2_limb)           %r3
+ */
+
+.text
+
+.globl _gcry_mpih_mul_1
+.type  _gcry_mpih_mul_1,%function
+_gcry_mpih_mul_1:
+       push    {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %lr};
+       mov     %r4, #0;
+
+       tst     %r2, #3;
+       beq     .Large_loop;
+
+.Loop:
+       ldr     %r5, [%r1], #4;
+       mov     %lr, #0;
+       umlal   %r4, %lr, %r5, %r3;
+       sub     %r2, #1;
+       str     %r4, [%r0], #4;
+       tst     %r2, #3;
+       mov     %r4, %lr;
+       bne     .Loop;
+
+       teq     %r2, #0;
+       beq     .Lend;
+
+.Large_loop:
+       ldm     %r1!, {%r5, %r6, %r7, %r8};
+       mov     %r9, #0;
+       mov     %r10, #0;
+       umlal   %r4, %r9, %r5, %r3;
+       mov     %r11, #0;
+       umlal   %r9, %r10, %r6, %r3;
+       str     %r4, [%r0], #4;
+       mov     %r4, #0;
+       umlal   %r10, %r11, %r7, %r3;
+       subs    %r2, #4;
+       umlal   %r11, %r4, %r8, %r3;
+       stm     %r0!, {%r9, %r10, %r11};
+       bne     .Large_loop;
+
+.Lend:
+       mov     %r0, %r4;
+       pop     {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %r11, %pc};
+.size _gcry_mpih_mul_1,.-_gcry_mpih_mul_1;
diff --git a/grub-core/lib/libgcrypt/mpi/arm/mpih-mul2.S 
b/grub-core/lib/libgcrypt/mpi/arm/mpih-mul2.S
new file mode 100644
index 000000000..bce932e9b
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/arm/mpih-mul2.S
@@ -0,0 +1,94 @@
+/* ARM mul_2 -- Multiply a limb vector with a limb and add the result to
+ *              a second limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,     %r0
+ *                  mpi_ptr_t s1_ptr,          %r1
+ *                  mpi_size_t s1_size,        %r2
+ *                  mpi_limb_t s2_limb)        %r3
+ */
+
+.text
+
+.globl _gcry_mpih_addmul_1
+.type  _gcry_mpih_addmul_1,%function
+_gcry_mpih_addmul_1:
+       push    {%r4, %r5, %r6, %r8, %r10, %lr};
+       mov     %lr, #0;
+       cmn     %r0, #0; /* clear carry flag */
+
+       tst     %r2, #3;
+       beq     .Large_loop;
+.Loop:
+       ldr     %r5, [%r1], #4;
+       ldr     %r4, [%r0];
+       sub     %r2, #1;
+       adcs    %r4, %lr;
+       mov     %lr, #0;
+       umlal   %r4, %lr, %r5, %r3;
+       tst     %r2, #3;
+       str     %r4, [%r0], #4;
+       bne     .Loop;
+
+       teq     %r2, #0;
+       beq     .Lend;
+
+.Large_loop:
+       ldr     %r5, [%r1], #4;
+       ldm     %r0, {%r4, %r6, %r8, %r10};
+
+       sub     %r2, #4;
+       adcs    %r4, %lr;
+       mov     %lr, #0;
+       umlal   %r4, %lr, %r5, %r3;
+
+       ldr     %r5, [%r1], #4;
+       adcs    %r6, %lr;
+       mov     %lr, #0;
+       umlal   %r6, %lr, %r5, %r3;
+
+       ldr     %r5, [%r1], #4;
+       adcs    %r8, %lr;
+       mov     %lr, #0;
+       umlal   %r8, %lr, %r5, %r3;
+
+       ldr     %r5, [%r1], #4;
+       adcs    %r10, %lr;
+       mov     %lr, #0;
+       umlal   %r10, %lr, %r5, %r3;
+
+       teq     %r2, #0;
+       stm     %r0!, {%r4, %r6, %r8, %r10};
+       bne     .Large_loop;
+
+.Lend:
+       adc     %r0, %lr, #0;
+       pop     {%r4, %r5, %r6, %r8, %r10, %pc};
+.size _gcry_mpih_addmul_1,.-_gcry_mpih_addmul_1;
diff --git a/grub-core/lib/libgcrypt/mpi/arm/mpih-mul3.S 
b/grub-core/lib/libgcrypt/mpi/arm/mpih-mul3.S
new file mode 100644
index 000000000..33326c787
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/arm/mpih-mul3.S
@@ -0,0 +1,100 @@
+/* ARM mul_3 -- Multiply a limb vector with a limb and subtract the result
+ *              from a second limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ * mpi_limb_t
+ * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,     %r0
+ *                  mpi_ptr_t s1_ptr,          %r1
+ *                  mpi_size_t s1_size,        %r2
+ *                  mpi_limb_t s2_limb)        %r3
+ */
+
+.text
+
+.globl _gcry_mpih_submul_1
+.type  _gcry_mpih_submul_1,%function
+_gcry_mpih_submul_1:
+       push    {%r4, %r5, %r6, %r8, %r9, %r10, %lr};
+       mov     %lr, #0;
+       cmp     %r0, #0; /* prepare carry flag for sbc */
+
+       tst     %r2, #3;
+       beq     .Large_loop;
+.Loop:
+       ldr     %r5, [%r1], #4;
+       mov     %r4, %lr;
+       mov     %lr, #0;
+       ldr     %r6, [%r0];
+       umlal   %r4, %lr, %r5, %r3;
+       sub     %r2, #1;
+       sbcs    %r4, %r6, %r4;
+       tst     %r2, #3;
+       str     %r4, [%r0], #4;
+       bne     .Loop;
+
+       teq     %r2, #0;
+       beq     .Lend;
+
+.Large_loop:
+       ldr     %r5, [%r1], #4;
+       mov     %r9, #0;
+       ldr     %r4, [%r0, #0];
+
+       umlal   %lr, %r9, %r5, %r3;
+       ldr     %r6, [%r0, #4];
+       ldr     %r5, [%r1], #4;
+       sbcs    %r4, %r4, %lr;
+
+       mov     %lr, #0;
+       umlal   %r9, %lr, %r5, %r3;
+       ldr     %r8, [%r0, #8];
+       ldr     %r5, [%r1], #4;
+       sbcs    %r6, %r6, %r9;
+
+       mov     %r9, #0;
+       umlal   %lr, %r9, %r5, %r3;
+       ldr     %r10, [%r0, #12];
+       ldr     %r5, [%r1], #4;
+       sbcs    %r8, %r8, %lr;
+
+       mov     %lr, #0;
+       umlal   %r9, %lr, %r5, %r3;
+       sub     %r2, #4;
+       sbcs    %r10, %r10, %r9;
+
+       teq     %r2, #0;
+       stm     %r0!, {%r4, %r6, %r8, %r10};
+       bne     .Large_loop;
+
+.Lend:
+       it      cc
+       movcc   %r2, #1;
+       add     %r0, %lr, %r2;
+       pop     {%r4, %r5, %r6, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_submul_1,.-_gcry_mpih_submul_1;
diff --git a/grub-core/lib/libgcrypt/mpi/arm/mpih-sub1.S 
b/grub-core/lib/libgcrypt/mpi/arm/mpih-sub1.S
new file mode 100644
index 000000000..593e3cded
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/arm/mpih-sub1.S
@@ -0,0 +1,77 @@
+/* ARM sub_n -- Subtract two limb vectors of the same length > 0 and store
+ *              sum in a third limb vector.
+ *
+ *      Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Note: This code is heavily based on the GNU MP Library (version 4.2.1).
+ */
+
+#include "sysdep.h"
+#include "asm-syntax.h"
+
+.syntax unified
+.arm
+
+/*******************
+ *  mpi_limb_t
+ *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,       %r0
+ *                mpi_ptr_t s1_ptr,            %r1
+ *                mpi_ptr_t s2_ptr,            %r2
+ *                mpi_size_t size)             %r3
+ */
+
+.text
+
+.globl _gcry_mpih_sub_n
+.type  _gcry_mpih_sub_n,%function
+_gcry_mpih_sub_n:
+       push    {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %lr};
+       cmp     %r0, #0; /* prepare carry flag for sub */
+
+       tst     %r3, #3;
+       beq     .Large_loop;
+
+.Loop:
+       ldr     %r4, [%r1], #4;
+       sub     %r3, #1;
+       ldr     %lr, [%r2], #4;
+       sbcs    %r4, %lr;
+       tst     %r3, #3;
+       str     %r4, [%r0], #4;
+       bne     .Loop;
+
+       teq     %r3, #0;
+       beq     .Lend;
+
+.Large_loop:
+       ldm     %r1!, {%r4, %r6, %r8, %r10};
+       sub     %r3, #4;
+       ldm     %r2!, {%r5, %r7, %r9, %lr};
+       sbcs    %r4, %r5;
+       sbcs    %r6, %r7;
+       sbcs    %r8, %r9;
+       sbcs    %r10, %lr;
+       teq     %r3, #0;
+       stm     %r0!, {%r4, %r6, %r8, %r10};
+       bne     .Large_loop;
+
+.Lend:
+       sbc     %r0, %r3, #0;
+       neg     %r0, %r0;
+       pop     {%r4, %r5, %r6, %r7, %r8, %r9, %r10, %pc};
+.size _gcry_mpih_sub_n,.-_gcry_mpih_sub_n;
diff --git a/grub-core/lib/libgcrypt/mpi/asm-common-aarch64.h 
b/grub-core/lib/libgcrypt/mpi/asm-common-aarch64.h
new file mode 100644
index 000000000..cf4bdb852
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/asm-common-aarch64.h
@@ -0,0 +1,26 @@
+/* asm-common-aarch64.h  -  Common macros for AArch64 assembly
+ *
+ * Copyright (C) 2018 Martin Storsjö <martin@martin.st>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPI_ASM_COMMON_AARCH64_H
+#define MPI_ASM_COMMON_AARCH64_H
+
+#include "../cipher/asm-common-aarch64.h"
+
+#endif /* MPI_ASM_COMMON_AARCH64_H */
diff --git a/grub-core/lib/libgcrypt/mpi/asm-common-amd64.h 
b/grub-core/lib/libgcrypt/mpi/asm-common-amd64.h
new file mode 100644
index 000000000..ad0e8e62f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/asm-common-amd64.h
@@ -0,0 +1,26 @@
+/* asm-common-amd64.h  -  Common macros for AMD64 assembly
+ *
+ * Copyright (C) 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPI_ASM_COMMON_AMD64_H
+#define MPI_ASM_COMMON_AMD64_H
+
+#include "../cipher/asm-common-amd64.h"
+
+#endif /* MPI_ASM_COMMON_AMD64_H */
diff --git a/grub-core/lib/libgcrypt/mpi/config.links 
b/grub-core/lib/libgcrypt/mpi/config.links
index 7e910ee37..8cd6657e6 100644
--- a/grub-core/lib/libgcrypt/mpi/config.links
+++ b/grub-core/lib/libgcrypt/mpi/config.links
@@ -1,5 +1,6 @@
 # config.links - helper for ../configure             -*- mode: sh -*-
 # Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+# Copyright (C) 2012  g10 Code GmbH
 #
 # This file is part of Libgcrypt.
 #
@@ -20,9 +21,14 @@
 # sourced by ../configure to get the list of files to link
 # this should set $mpi_ln_list.
 # Note: this is called from the above directory.
+#
+# Reguired variables:
+#  $ac_cv_sys_symbol_underscore
+#  $gcry_cv_gcc_arm_platform_as_ok
 
 mpi_sflags=
 mpi_extra_modules=
+mpi_cpu_arch=
 
 test -d ./mpi || mkdir ./mpi
 
@@ -37,152 +43,179 @@ mpi_optional_modules=`$AWK 
'/^#BEGIN_ASM_LIST/,/^#END_ASM_LIST/ {
 echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
 echo "/* Host: ${host} */" >>./mpi/asm-syntax.h
 
-if test "$try_asm_modules" = "yes" ; then
 case "${host}" in
-    powerpc-apple-darwin*          | \
     i[34567]86*-*-openbsd[12]*     | \
     i[34567]86*-*-openbsd3.[0123]*)
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
+       mpi_cpu_arch="x86"
        ;;
-    i[3467]86*-*-openbsd*      | \
-    i[3467]86*-*-freebsd*-elf  | \
-    i[3467]86*-*-freebsd[3-9]* | \
-    i[3467]86*-*-freebsdelf*   | \
-    i[3467]86*-*-netbsd*       | \
-    i[3467]86*-*-k*bsd*)
-       echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
-       cat  $srcdir/mpi/i386/syntax.h     >>./mpi/asm-syntax.h
-       path="i386"
-       ;;
-    i586*-*-openbsd*         | \
-    i586*-*-freebsd*-elf     | \
-    i586*-*-freebsd[3-9]*    | \
-    i586*-*-freebsdelf*      | \
-    i586*-*-netbsd*         | \
-    i586*-*-k*bsd*          | \
-    pentium-*-netbsd*       | \
+    i[34567]86*-*-openbsd*         | \
+    i[34567]86*-*-freebsd*-elf     | \
+    i[34567]86*-*-freebsd[3-9]*    | \
+    i[34567]86*-*-freebsd[12][0-9]*| \
+    i[34567]86*-*-freebsdelf*      | \
+    i[34567]86*-*-netbsd*          | \
+    i[34567]86*-*-k*bsd*           | \
+    pentium-*-netbsd*             | \
     pentiumpro-*-netbsd*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h     >>./mpi/asm-syntax.h
-       path="i586 i386"
+       path="i386"
+       mpi_cpu_arch="x86"
        ;;
     i[34]86*-*-bsdi4*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h   >>./mpi/asm-syntax.h
        path="i386"
+       mpi_cpu_arch="x86"
        ;;
-    i[3467]86*-*-linuxaout*  | \
-    i[3467]86*-*-linuxoldld* | \
-    i[3467]86*-*-*bsd*)
+    i[34567]86*-*-linuxaout*  | \
+    i[34567]86*-*-linuxoldld* | \
+    i[34567]86*-*-*bsd*)
        echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
        echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h      >>./mpi/asm-syntax.h
        path="i386"
+        mpi_cpu_arch="x86"
        ;;
-    i586*-*-linuxaout*  | \
-    i586*-*-linuxoldld* | \
-    i586*-*-*bsd*)
-       echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
-       echo '#define X86_BROKEN_ALIGN' >>./mpi/asm-syntax.h
-       cat  $srcdir/mpi/i386/syntax.h      >>./mpi/asm-syntax.h
-       path="i586 i386"
-       ;;
-    i[3467]86*-msdosdjgpp* | \
-    i[34]86*-apple-darwin*)
+    i[34567]86*-msdosdjgpp* | \
+    i[34567]86*-apple-darwin*)
        echo '#define BSD_SYNTAX'        >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h   >>./mpi/asm-syntax.h
        path="i386"
+        mpi_cpu_arch="x86"
        ;;
-    i586*-msdosdjgpp* | \
-    i[567]86*-apple-darwin*)
-       echo '#define BSD_SYNTAX'        >>./mpi/asm-syntax.h
-       cat  $srcdir/mpi/i386/syntax.h   >>./mpi/asm-syntax.h
-       path="i586 i386"
-       ;;
-    i[3467]86*-*-*)
+    i[34567]86*-*-* | \
+    pentium-*-*     | \
+    pentiumpro-*-*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h      >>./mpi/asm-syntax.h
        path="i386"
+        mpi_cpu_arch="x86"
        ;;
-    i586*-*-*  | \
-    pentium-*-*   | \
-    pentiumpro-*-*)
-       echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+    x86_64-apple-darwin*)
+       echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h      >>./mpi/asm-syntax.h
-       path="i586 i386"
+       cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
+       path="amd64"
+        mpi_cpu_arch="x86"
        ;;
+    x86_64-*mingw32*)
+       echo '#define USE_MS_ABI' >>./mpi/asm-syntax.h
+       echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
+       cat  $srcdir/mpi/i386/syntax.h      >>./mpi/asm-syntax.h
+       cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
+       path="amd64"
+        mpi_cpu_arch="x86"
+        ;;
     x86_64-*-*)
        echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/i386/syntax.h      >>./mpi/asm-syntax.h
+       cat  $srcdir/mpi/amd64/func_abi.h   >>./mpi/asm-syntax.h
        path="amd64"
+        mpi_cpu_arch="x86"
        ;;
     alpha*-*-*)
        echo '/* configured for alpha */' >>./mpi/asm-syntax.h
        path="alpha"
        mpi_extra_modules="udiv-qrnnd"
+        mpi_cpu_arch="alpha"
+       ;;
+    aarch64-*-*)
+       echo '/* configured for aarch64 */' >>./mpi/asm-syntax.h
+       path="aarch64"
+       mpi_cpu_arch="aarch64"
+       ;;
+    arm*-*-*)
+       mpi_cpu_arch="arm"
+       if test "$gcry_cv_gcc_arm_platform_as_ok" = "yes" ; then
+         echo '/* configured for arm */' >>./mpi/asm-syntax.h
+         path="arm"
+       else
+         echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+         path=""
+       fi
        ;;
     hppa7000*-*-*)
        echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h
        path="hppa1.1 hppa"
        mpi_extra_modules="udiv-qrnnd"
+       mpi_cpu_arch="hppa"
        ;;
     hppa1.0*-*-*)
        echo '/* configured for HPPA 1.0 */' >>./mpi/asm-syntax.h
        path="hppa"
        mpi_extra_modules="udiv-qrnnd"
+       mpi_cpu_arch="hppa"
        ;;
     hppa*-*-*) # assume pa7100
        echo '/* configured for HPPA (pa7100) */' >>./mpi/asm-syntax.h
        path="pa7100 hppa1.1 hppa"
        mpi_extra_modules="udiv-qrnnd"
+       mpi_cpu_arch="hppa"
        ;;
     sparc64-*-linux-gnu)
         echo '/* No working assembler modules available */' 
>>./mpi/asm-syntax.h
        path=""
+        mpi_cpu_arch="sparc"
        ;;
     sparc64-sun-solaris2*)
         echo '/* No working assembler modules available */' 
>>./mpi/asm-syntax.h
        path=""
+        mpi_cpu_arch="sparc"
         ;;
     sparc64-*-netbsd* | sparc64-*-freebsd* | sparc64-*-openbsd*)
        # There are no sparc64 assembler modules that work on the
        # *BSDs, so use the generic C functions.
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
+       mpi_cpu_arch="sparc"
        ;;
     sparc64*-*-*)
        echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
        path=""
+       mpi_cpu_arch="sparc"
        ;;
     sparc9*-*-*     | \
     ultrasparc*-*-* )
        echo '/* configured for sparc9 or higher */' >>./mpi/asm-syntax.h
        path="sparc32v8 sparc32"
+        mpi_cpu_arch="sparc"
        ;;
     sparc8*-*-*     | \
     microsparc*-*-*)
        echo '/* configured for sparc8 */' >>./mpi/asm-syntax.h
        path="sparc32v8 sparc32"
+        mpi_cpu_arch="sparc"
        ;;
     supersparc*-*-*)
        echo '/* configured for supersparc */' >>./mpi/asm-syntax.h
        path="supersparc sparc32v8 sparc32"
        mpi_extra_modules="udiv"
+        mpi_cpu_arch="sparc"
        ;;
     sparc*-*-*)
        echo '/* configured for sparc */' >>./mpi/asm-syntax.h
        path="sparc32"
        mpi_extra_modules="udiv"
+        mpi_cpu_arch="sparc"
        ;;
     mips[34]*-*-* | \
     mips*-*-irix6*)
        echo '/* configured for MIPS3 */' >>./mpi/asm-syntax.h
        path="mips3"
+       mpi_cpu_arch="mips"
        ;;
     mips*-*-*)
        echo '/* configured for MIPS2 */' >>./mpi/asm-syntax.h
        path="mips2"
+       mpi_cpu_arch="mips"
+       ;;
+    s390x*-*-*)
+       echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
+       path=""
+       mpi_cpu_arch="s390x"
        ;;
 
     # Motorola 68k configurations.  Let m68k mean 68020-68040.
@@ -192,58 +225,74 @@ case "${host}" in
        echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/m68k/syntax.h      >>./mpi/asm-syntax.h
        path="m68k/mc68020 m68k"
+        mpi_cpu_arch="m68k"
        ;;
     m68060*-*-linuxaout*)
        echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/m68k/syntax.h      >>./mpi/asm-syntax.h
        path="m68k"
+        mpi_cpu_arch="m68k"
        ;;
     m680[234]0*-*-linux* | \
     m68k*-*-linux*)
        echo '#define ELF_SYNTAX'           >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/m68k/syntax.h      >>./mpi/asm-syntax.h
+        mpi_cpu_arch="m68k"
        ;;
     m68060*-*-linux*)
        echo '#define ELF_SYNTAX'           >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/m68k/syntax.h      >>./mpi/asm-syntax.h
        path="m68k"
+        mpi_cpu_arch="m68k"
        ;;
     m68k-atari-mint)
        echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/m68k/syntax.h      >>./mpi/asm-syntax.h
-       path="m68k/mc68020 m68k"
+       path="m68k"
+        mpi_cpu_arch="m68k"
        ;;
     m68000*-*-* | \
     m68060*-*-*)
        echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/m68k/syntax.h      >>./mpi/asm-syntax.h
        path="m68k/mc68000"
+        mpi_cpu_arch="m68k"
        ;;
     m680[234]0*-*-* | \
     m68k*-*-*)
        echo '#define MIT_SYNTAX'           >>./mpi/asm-syntax.h
        cat  $srcdir/mpi/m68k/syntax.h      >>./mpi/asm-syntax.h
        path="m68k/mc68020 m68k"
+        mpi_cpu_arch="m68k"
        ;;
 
+    powerpc-apple-darwin*)
+       echo '/* No working assembler modules available */' >>./mpi/asm-syntax.h
+       path=""
+       mpi_cpu_arch="ppc"
+       ;;
+
     powerpc*-*-netbsd* | powerpc*-*-openbsd*)
        echo '/* configured {Open,Net}BSD on powerpc */' >>./mpi/asm-syntax.h
        echo '#define ELF_SYNTAX'                 >>./mpi/asm-syntax.h
        cat   $srcdir/mpi/powerpc32/syntax.h     >>./mpi/asm-syntax.h
        mpi_sflags="-Wa,-mppc"
        path="powerpc32"
+       mpi_cpu_arch="ppc"
        ;;
 
     ppc620-*-*     | \
     powerpc64*-*-*)
        mpi_sflags="-Wa,-mppc"
        path="powerpc64"
+        mpi_cpu_arch="ppc"
        ;;
     powerpc*-*-linux*)
        echo '/* configured for powerpc/ELF */' >>./mpi/asm-syntax.h
        echo '#define ELF_SYNTAX'               >>./mpi/asm-syntax.h
        cat   $srcdir/mpi/powerpc32/syntax.h    >>./mpi/asm-syntax.h
        path="powerpc32"
+       mpi_cpu_arch="ppc"
        ;;
 
     rs6000-*-aix[456789]*    | \
@@ -251,6 +300,7 @@ case "${host}" in
        mpi_sflags="-Wa,-mpwr"
        path="power"
        mpi_extra_modules="udiv-w-sdiv"
+       mpi_cpu_arch="ppc"
        ;;
     rs6000-*-* | \
     power-*-*  | \
@@ -258,6 +308,7 @@ case "${host}" in
        mpi_sflags="-Wa,-mppc"
        path="power"
        mpi_extra_modules="udiv-w-sdiv"
+        mpi_cpu_arch="ppc"
        ;;
     powerpc-ibm-aix4.2.* )
        # I am not sure about this one but a machine identified by
@@ -265,29 +316,54 @@ case "${host}" in
        mpi_sflags="-Wa,-mpwr"
        path="power"
        mpi_extra_modules="udiv-w-sdiv"
+        mpi_cpu_arch="ppc"
        ;;
     ppc601-*-*)
        mpi_sflags="-Wa,-mppc"
        path="power powerpc32"
+        mpi_cpu_arch="ppc"
        ;;
     ppc60[234]*-*-*)
        mpi_sflags="-Wa,-mppc"
        path="powerpc32"
+        mpi_cpu_arch="ppc"
        ;;
     powerpc*-*-*)
        mpi_sflags="-Wa,-mppc"
        path="powerpc32"
+        mpi_cpu_arch="ppc"
        ;;
     *)
-       echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
+       echo '/* Platform not known */' >>./mpi/asm-syntax.h
        path=""
        ;;
 esac
-else
-    echo '/* Assembler modules disabled on request */' >>./mpi/asm-syntax.h
+
+# If asm modules are disabled reset the found variables but keep
+# mpi_cpu_arch.
+if test "$try_asm_modules" != "yes" ; then
+    echo '/* Assembler modules disabled on request */' >./mpi/asm-syntax.h
     path=""
+    mpi_sflags=""
+    mpi_extra_modules=""
+    mpi_cpu_arch="disabled"
+fi
+
+# Make sure that mpi_cpu_arch is not the empty string.
+if test x"$mpi_cpu_arch" = x ; then
+    mpi_cpu_arch="unknown"
 fi
 
+# Add .note.gnu.property section for Intel CET in assembler sources
+# when CET is enabled.  */
+if test x"$mpi_cpu_arch" = xx86 ; then
+    cat <<EOF >> ./mpi/asm-syntax.h
+
+#if defined(__ASSEMBLER__) && defined(__CET__)
+# include <cet.h>
+#endif
+EOF
+fi
 
 # Make sysdep.h
 echo '/* created by config.links - do not edit */' >./mpi/sysdep.h
diff --git a/grub-core/lib/libgcrypt/cipher/rmd.h 
b/grub-core/lib/libgcrypt/mpi/ec-ed25519.c
similarity index 52%
rename from grub-core/lib/libgcrypt/cipher/rmd.h
rename to grub-core/lib/libgcrypt/mpi/ec-ed25519.c
index 6a9fe3135..acfe2a69f 100644
--- a/grub-core/lib/libgcrypt/cipher/rmd.h
+++ b/grub-core/lib/libgcrypt/mpi/ec-ed25519.c
@@ -1,5 +1,5 @@
-/* rmd.h - RIPE-MD hash functions
- *     Copyright (C) 1998, 2001, 2002 Free Software Foundation, Inc.
+/* ec-ed25519.c -  Ed25519 optimized elliptic curve functions
+ * Copyright (C) 2013 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -14,23 +14,24 @@
  * GNU Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
-#ifndef G10_RMD_H
-#define G10_RMD_H
 
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "mpi-internal.h"
+#include "longlong.h"
+#include "g10lib.h"
+#include "context.h"
+#include "ec-context.h"
 
-/* We need this here because random.c must have direct access. */
-typedef struct
-{
-  u32  h0,h1,h2,h3,h4;
-  u32  nblocks;
-  byte buf[64];
-  int  count;
-} RMD160_CONTEXT;
 
-void _gcry_rmd160_init ( void *context );
-void _gcry_rmd160_mixblock ( RMD160_CONTEXT *hd, void *blockof64byte );
+void
+_gcry_mpi_ec_ed25519_mod (gcry_mpi_t a)
+{
+  (void)a;
 
-#endif /*G10_RMD_H*/
+}
diff --git a/grub-core/lib/libgcrypt/mpi/ec-hw-s390x.c 
b/grub-core/lib/libgcrypt/mpi/ec-hw-s390x.c
new file mode 100644
index 000000000..149a061d7
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/ec-hw-s390x.c
@@ -0,0 +1,412 @@
+/* ec-hw-s390x.c -  zSeries ECC acceleration
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#ifdef HAVE_GCC_INLINE_ASM_S390X
+
+#include "mpi-internal.h"
+#include "g10lib.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ec-internal.h"
+
+#include "../cipher/bufhelp.h"
+#include "../cipher/asm-inline-s390x.h"
+
+
+#define S390X_PCC_PARAM_BLOCK_SIZE 4096
+
+
+extern void reverse_buffer (unsigned char *buffer, unsigned int length);
+
+static int s390_mul_point_montgomery (mpi_point_t result, gcry_mpi_t scalar,
+                                     mpi_point_t point, mpi_ec_t ctx,
+                                     byte *param_block_buf);
+
+
+static int
+mpi_copy_to_raw(byte *raw, unsigned int raw_nbytes, gcry_mpi_t a)
+{
+  unsigned int num_to_zero;
+  unsigned int nbytes;
+  int i, j;
+
+  if (mpi_has_sign (a))
+    return -1;
+
+  if (mpi_get_flag (a, GCRYMPI_FLAG_OPAQUE))
+    {
+      unsigned int nbits;
+      byte *buf;
+
+      buf = mpi_get_opaque (a, &nbits);
+      nbytes = (nbits + 7) / 8;
+
+      if (raw_nbytes < nbytes)
+       return -1;
+
+      num_to_zero = raw_nbytes - nbytes;
+      if (num_to_zero > 0)
+        memset (raw, 0, num_to_zero);
+      if (nbytes > 0)
+       memcpy (raw + num_to_zero, buf, nbytes);
+
+      return 0;
+    }
+
+  nbytes = a->nlimbs * BYTES_PER_MPI_LIMB;
+  if (raw_nbytes < nbytes)
+    return -1;
+
+  num_to_zero = raw_nbytes - nbytes;
+  if (num_to_zero > 0)
+    memset (raw, 0, num_to_zero);
+
+  for (j = a->nlimbs - 1, i = 0; i < a->nlimbs; i++, j--)
+    {
+      buf_put_be64(raw + num_to_zero + i * BYTES_PER_MPI_LIMB, a->d[j]);
+    }
+
+  return 0;
+}
+
+int
+_gcry_s390x_ec_hw_mul_point (mpi_point_t result, gcry_mpi_t scalar,
+                            mpi_point_t point, mpi_ec_t ctx)
+{
+  byte param_block_buf[S390X_PCC_PARAM_BLOCK_SIZE];
+  byte *param_out_x = NULL;
+  byte *param_out_y = NULL;
+  byte *param_in_x = NULL;
+  byte *param_in_y = NULL;
+  byte *param_scalar = NULL;
+  unsigned int field_nbits;
+  unsigned int pcc_func;
+  gcry_mpi_t x, y;
+  gcry_mpi_t d = NULL;
+  int rc = -1;
+
+  if (ctx->name == NULL)
+    return -1;
+
+  if (!(_gcry_get_hw_features () & HWF_S390X_MSA_9))
+    return -1; /* ECC acceleration not supported by HW. */
+
+  if (ctx->model == MPI_EC_MONTGOMERY)
+    return s390_mul_point_montgomery (result, scalar, point, ctx,
+                                     param_block_buf);
+
+  if (ctx->model == MPI_EC_WEIERSTRASS && ctx->nbits == 256 &&
+      strcmp (ctx->name, "NIST P-256") == 0)
+    {
+      struct pcc_param_block_nistp256_s
+      {
+       byte out_x[256 / 8];
+       byte out_y[256 / 8];
+       byte in_x[256 / 8];
+       byte in_y[256 / 8];
+       byte scalar[256 / 8];
+       byte c_and_ribm[64];
+      } *params = (void *)param_block_buf;
+
+      memset (params->c_and_ribm, 0, sizeof(params->c_and_ribm));
+
+      pcc_func = PCC_FUNCTION_NIST_P256;
+      field_nbits = 256;
+      param_out_x = params->out_x;
+      param_out_y = params->out_y;
+      param_in_x = params->in_x;
+      param_in_y = params->in_y;
+      param_scalar = params->scalar;
+    }
+  else if (ctx->model == MPI_EC_WEIERSTRASS && ctx->nbits == 384 &&
+           strcmp (ctx->name, "NIST P-384") == 0)
+    {
+      struct pcc_param_block_nistp384_s
+      {
+       byte out_x[384 / 8];
+       byte out_y[384 / 8];
+       byte in_x[384 / 8];
+       byte in_y[384 / 8];
+       byte scalar[384 / 8];
+       byte c_and_ribm[64];
+      } *params = (void *)param_block_buf;
+
+      memset (params->c_and_ribm, 0, sizeof(params->c_and_ribm));
+
+      pcc_func = PCC_FUNCTION_NIST_P384;
+      field_nbits = 384;
+      param_out_x = params->out_x;
+      param_out_y = params->out_y;
+      param_in_x = params->in_x;
+      param_in_y = params->in_y;
+      param_scalar = params->scalar;
+    }
+  else if (ctx->model == MPI_EC_WEIERSTRASS && ctx->nbits == 521 &&
+           strcmp (ctx->name, "NIST P-521") == 0)
+    {
+      struct pcc_param_block_nistp521_s
+      {
+       byte out_x[640 / 8]; /* note: first 14 bytes not modified by pcc */
+       byte out_y[640 / 8]; /* note: first 14 bytes not modified by pcc */
+       byte in_x[640 / 8];
+       byte in_y[640 / 8];
+       byte scalar[640 / 8];
+       byte c_and_ribm[64];
+      } *params = (void *)param_block_buf;
+
+      memset (params->out_x, 0, 14);
+      memset (params->out_y, 0, 14);
+      memset (params->c_and_ribm, 0, sizeof(params->c_and_ribm));
+
+      pcc_func = PCC_FUNCTION_NIST_P521;
+      field_nbits = 640;
+      param_out_x = params->out_x;
+      param_out_y = params->out_y;
+      param_in_x = params->in_x;
+      param_in_y = params->in_y;
+      param_scalar = params->scalar;
+    }
+  else if (ctx->model == MPI_EC_EDWARDS && ctx->nbits == 255 &&
+           strcmp (ctx->name, "Ed25519") == 0)
+    {
+      struct pcc_param_block_ed25519_s
+      {
+       byte out_x[256 / 8];
+       byte out_y[256 / 8];
+       byte in_x[256 / 8];
+       byte in_y[256 / 8];
+       byte scalar[256 / 8];
+       byte c_and_ribm[64];
+      } *params = (void *)param_block_buf;
+
+      memset (params->c_and_ribm, 0, sizeof(params->c_and_ribm));
+
+      pcc_func = PCC_FUNCTION_ED25519;
+      field_nbits = 256;
+      param_out_x = params->out_x;
+      param_out_y = params->out_y;
+      param_in_x = params->in_x;
+      param_in_y = params->in_y;
+      param_scalar = params->scalar;
+    }
+  else if (ctx->model == MPI_EC_EDWARDS && ctx->nbits == 448 &&
+           strcmp (ctx->name, "Ed448") == 0)
+    {
+      struct pcc_param_block_ed448_s
+      {
+       byte out_x[512 / 8]; /* note: first 8 bytes not modified by pcc */
+       byte out_y[512 / 8]; /* note: first 8 bytes not modified by pcc */
+       byte in_x[512 / 8];
+       byte in_y[512 / 8];
+       byte scalar[512 / 8];
+       byte c_and_ribm[64];
+      } *params = (void *)param_block_buf;
+
+      memset (params->out_x, 0, 8);
+      memset (params->out_y, 0, 8);
+      memset (params->c_and_ribm, 0, sizeof(params->c_and_ribm));
+
+      pcc_func = PCC_FUNCTION_ED448;
+      field_nbits = 512;
+      param_out_x = params->out_x;
+      param_out_y = params->out_y;
+      param_in_x = params->in_x;
+      param_in_y = params->in_y;
+      param_scalar = params->scalar;
+    }
+
+  if (param_scalar == NULL)
+    return -1; /* No curve match. */
+
+  if (!(pcc_query () & km_function_to_mask (pcc_func)))
+    return -1; /* HW does not support acceleration for this curve. */
+
+  x = mpi_new (0);
+  y = mpi_new (0);
+
+  if (_gcry_mpi_ec_get_affine (x, y, point, ctx) < 0)
+    {
+      /* Point at infinity. */
+      goto out;
+    }
+
+  if (mpi_has_sign (scalar) || mpi_cmp (scalar, ctx->n) >= 0)
+    {
+      d = mpi_is_secure (scalar) ? mpi_snew (ctx->nbits) : mpi_new 
(ctx->nbits);
+      _gcry_mpi_mod (d, scalar, ctx->n);
+    }
+  else
+    {
+      d = scalar;
+    }
+
+  if (mpi_copy_to_raw (param_in_x, field_nbits / 8, x) < 0)
+    goto out;
+
+  if (mpi_copy_to_raw (param_in_y, field_nbits / 8, y) < 0)
+    goto out;
+
+  if (mpi_copy_to_raw (param_scalar, field_nbits / 8, d) < 0)
+    goto out;
+
+  if (pcc_scalar_multiply (pcc_func, param_block_buf) != 0)
+    goto out;
+
+  _gcry_mpi_set_buffer (result->x, param_out_x, field_nbits / 8, 0);
+  _gcry_mpi_set_buffer (result->y, param_out_y, field_nbits / 8, 0);
+  mpi_set_ui (result->z, 1);
+  mpi_normalize (result->x);
+  mpi_normalize (result->y);
+  if (ctx->model == MPI_EC_EDWARDS)
+    mpi_point_resize (result, ctx);
+
+  rc = 0;
+
+out:
+  if (d != scalar)
+    mpi_release (d);
+  mpi_release (y);
+  mpi_release (x);
+  wipememory (param_block_buf, S390X_PCC_PARAM_BLOCK_SIZE);
+
+  return rc;
+}
+
+
+static int
+s390_mul_point_montgomery (mpi_point_t result, gcry_mpi_t scalar,
+                          mpi_point_t point, mpi_ec_t ctx,
+                          byte *param_block_buf)
+{
+  byte *param_out_x = NULL;
+  byte *param_in_x = NULL;
+  byte *param_scalar = NULL;
+  unsigned int field_nbits;
+  unsigned int pcc_func;
+  gcry_mpi_t x;
+  gcry_mpi_t d = NULL;
+  int rc = -1;
+
+  if (ctx->nbits == 255 && strcmp (ctx->name, "Curve25519") == 0)
+    {
+      struct pcc_param_block_x25519_s
+      {
+       byte out_x[256 / 8];
+       byte in_x[256 / 8];
+       byte scalar[256 / 8];
+       byte c_and_ribm[64];
+      } *params = (void *)param_block_buf;
+
+      memset (params->c_and_ribm, 0, sizeof(params->c_and_ribm));
+
+      pcc_func = PCC_FUNCTION_X25519;
+      field_nbits = 256;
+      param_out_x = params->out_x;
+      param_in_x = params->in_x;
+      param_scalar = params->scalar;
+    }
+  else if (ctx->nbits == 448 && strcmp (ctx->name, "X448") == 0)
+    {
+      struct pcc_param_block_x448_s
+      {
+       byte out_x[512 / 8]; /* note: first 8 bytes not modified by pcc */
+       byte in_x[512 / 8];
+       byte scalar[512 / 8];
+       byte c_and_ribm[64];
+      } *params = (void *)param_block_buf;
+
+      memset (params->out_x, 0, 8);
+      memset (params->c_and_ribm, 0, sizeof(params->c_and_ribm));
+
+      pcc_func = PCC_FUNCTION_X448;
+      field_nbits = 512;
+      param_out_x = params->out_x;
+      param_in_x = params->in_x;
+      param_scalar = params->scalar;
+    }
+
+  if (param_scalar == NULL)
+    return -1; /* No curve match. */
+
+  if (!(pcc_query () & km_function_to_mask (pcc_func)))
+    return -1; /* HW does not support acceleration for this curve. */
+
+  x = mpi_new (0);
+
+  if (mpi_is_opaque (scalar))
+    {
+      const unsigned int pbits = ctx->nbits;
+      unsigned int n;
+      unsigned char *raw;
+
+      raw = _gcry_mpi_get_opaque_copy (scalar, &n);
+      if ((n + 7) / 8 != (pbits + 7) / 8)
+        log_fatal ("scalar size (%d) != prime size (%d)\n",
+                   (n + 7) / 8, (pbits + 7) / 8);
+
+      reverse_buffer (raw, (n + 7 ) / 8);
+      if ((pbits % 8))
+        raw[0] &= (1 << (pbits % 8)) - 1;
+      raw[0] |= (1 << ((pbits + 7) % 8));
+      raw[(pbits + 7) / 8 - 1] &= (256 - ctx->h);
+      d = mpi_is_secure (scalar) ? mpi_snew (pbits) : mpi_new (pbits);
+      _gcry_mpi_set_buffer (d, raw, (n + 7) / 8, 0);
+      xfree (raw);
+    }
+  else
+    {
+      d = scalar;
+    }
+
+  if (_gcry_mpi_ec_get_affine (x, NULL, point, ctx) < 0)
+    {
+      /* Point at infinity. */
+      goto out;
+    }
+
+  if (mpi_copy_to_raw (param_in_x, field_nbits / 8, x) < 0)
+    goto out;
+
+  if (mpi_copy_to_raw (param_scalar, field_nbits / 8, d) < 0)
+    goto out;
+
+  if (pcc_scalar_multiply (pcc_func, param_block_buf) != 0)
+    goto out;
+
+  _gcry_mpi_set_buffer (result->x, param_out_x, field_nbits / 8, 0);
+  mpi_set_ui (result->z, 1);
+  mpi_point_resize (result, ctx);
+
+  rc = 0;
+
+out:
+  if (d != scalar)
+    mpi_release (d);
+  mpi_release (x);
+  wipememory (param_block_buf, S390X_PCC_PARAM_BLOCK_SIZE);
+
+  return rc;
+}
+
+#endif /* HAVE_GCC_INLINE_ASM_S390X */
diff --git a/grub-core/lib/libgcrypt/mpi/ec-inline.h 
b/grub-core/lib/libgcrypt/mpi/ec-inline.h
new file mode 100644
index 000000000..a07826e39
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/ec-inline.h
@@ -0,0 +1,1065 @@
+/* ec-inline.h - EC inline addition/substraction helpers
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_EC_INLINE_H
+#define GCRY_EC_INLINE_H
+
+#include "mpi-internal.h"
+#include "longlong.h"
+#include "ec-context.h"
+#include "../cipher/bithelp.h"
+#include "../cipher/bufhelp.h"
+
+
+#if BYTES_PER_MPI_LIMB == 8
+
+/* 64-bit limb definitions for 64-bit architectures.  */
+
+#define LIMBS_PER_LIMB64 1
+#define LOAD64(x, pos) ((x)[pos])
+#define STORE64(x, pos, v) ((x)[pos] = (mpi_limb_t)(v))
+#define LIMB_TO64(v) ((mpi_limb_t)(v))
+#define LIMB_FROM64(v) ((mpi_limb_t)(v))
+#define HIBIT_LIMB64(v) ((mpi_limb_t)(v) >> (BITS_PER_MPI_LIMB - 1))
+#define HI32_LIMB64(v) (u32)((mpi_limb_t)(v) >> (BITS_PER_MPI_LIMB - 32))
+#define LO32_LIMB64(v) ((u32)(v))
+#define LIMB64_C(hi, lo) (((mpi_limb_t)(u32)(hi) << 32) | (u32)(lo))
+#define MASK_AND64(mask, val) ((mask) & (val))
+#define LIMB_OR64(val1, val2) ((val1) | (val2))
+#define STORE64_COND(x, pos, mask1, val1, mask2, val2) \
+    ((x)[(pos)] = ((mask1) & (val1)) | ((mask2) & (val2)))
+
+typedef mpi_limb_t mpi_limb64_t;
+
+static inline u32
+LOAD32(mpi_ptr_t x, unsigned int pos)
+{
+  unsigned int shr = (pos % 2) * 32;
+  return (x[pos / 2] >> shr);
+}
+
+static inline mpi_limb64_t
+LIMB64_HILO(u32 hi, u32 lo)
+{
+  mpi_limb64_t v = hi;
+  return (v << 32) | lo;
+}
+
+
+/* x86-64 addition/subtraction helpers.  */
+#if defined (__x86_64__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4
+
+#define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("addq %8, %2\n" \
+          "adcq %7, %1\n" \
+          "adcq %6, %0\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B2)), \
+            "1" ((mpi_limb_t)(B1)), \
+            "2" ((mpi_limb_t)(B0)), \
+            "rme" ((mpi_limb_t)(C2)), \
+            "rme" ((mpi_limb_t)(C1)), \
+            "rme" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB3_LIMB64(A3, A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("subq %8, %2\n" \
+          "sbbq %7, %1\n" \
+          "sbbq %6, %0\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B2)), \
+            "1" ((mpi_limb_t)(B1)), \
+            "2" ((mpi_limb_t)(B0)), \
+            "rme" ((mpi_limb_t)(C2)), \
+            "rme" ((mpi_limb_t)(C1)), \
+            "rme" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("addq %11, %3\n" \
+          "adcq %10, %2\n" \
+          "adcq %9, %1\n" \
+          "adcq %8, %0\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B3)), \
+            "1" ((mpi_limb_t)(B2)), \
+            "2" ((mpi_limb_t)(B1)), \
+            "3" ((mpi_limb_t)(B0)), \
+            "rme" ((mpi_limb_t)(C3)), \
+            "rme" ((mpi_limb_t)(C2)), \
+            "rme" ((mpi_limb_t)(C1)), \
+            "rme" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("subq %11, %3\n" \
+          "sbbq %10, %2\n" \
+          "sbbq %9, %1\n" \
+          "sbbq %8, %0\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B3)), \
+            "1" ((mpi_limb_t)(B2)), \
+            "2" ((mpi_limb_t)(B1)), \
+            "3" ((mpi_limb_t)(B0)), \
+            "rme" ((mpi_limb_t)(C3)), \
+            "rme" ((mpi_limb_t)(C2)), \
+            "rme" ((mpi_limb_t)(C1)), \
+            "rme" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) \
+  __asm__ ("addq %14, %4\n" \
+          "adcq %13, %3\n" \
+          "adcq %12, %2\n" \
+          "adcq %11, %1\n" \
+          "adcq %10, %0\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B4)), \
+            "1" ((mpi_limb_t)(B3)), \
+            "2" ((mpi_limb_t)(B2)), \
+            "3" ((mpi_limb_t)(B1)), \
+            "4" ((mpi_limb_t)(B0)), \
+            "rme" ((mpi_limb_t)(C4)), \
+            "rme" ((mpi_limb_t)(C3)), \
+            "rme" ((mpi_limb_t)(C2)), \
+            "rme" ((mpi_limb_t)(C1)), \
+            "rme" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) \
+  __asm__ ("subq %14, %4\n" \
+          "sbbq %13, %3\n" \
+          "sbbq %12, %2\n" \
+          "sbbq %11, %1\n" \
+          "sbbq %10, %0\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B4)), \
+            "1" ((mpi_limb_t)(B3)), \
+            "2" ((mpi_limb_t)(B2)), \
+            "3" ((mpi_limb_t)(B1)), \
+            "4" ((mpi_limb_t)(B0)), \
+            "rme" ((mpi_limb_t)(C4)), \
+            "rme" ((mpi_limb_t)(C3)), \
+            "rme" ((mpi_limb_t)(C2)), \
+            "rme" ((mpi_limb_t)(C1)), \
+            "rme" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#endif /* __x86_64__ */
+
+
+/* ARM AArch64 addition/subtraction helpers.  */
+#if defined (__aarch64__) && defined(HAVE_CPU_ARCH_ARM) && __GNUC__ >= 4
+
+#define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("adds %2, %5, %8\n" \
+          "adcs %1, %4, %7\n" \
+          "adc  %0, %3, %6\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("subs %2, %5, %8\n" \
+          "sbcs %1, %4, %7\n" \
+          "sbc  %0, %3, %6\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("adds %3, %7, %11\n" \
+          "adcs %2, %6, %10\n" \
+          "adcs %1, %5, %9\n" \
+          "adc  %0, %4, %8\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("subs %3, %7, %11\n" \
+          "sbcs %2, %6, %10\n" \
+          "sbcs %1, %5, %9\n" \
+          "sbc  %0, %4, %8\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) \
+  __asm__ ("adds %4, %9, %14\n" \
+          "adcs %3, %8, %13\n" \
+          "adcs %2, %7, %12\n" \
+          "adcs %1, %6, %11\n" \
+          "adc  %0, %5, %10\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B4)), \
+            "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C4)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) \
+  __asm__ ("subs %4, %9, %14\n" \
+          "sbcs %3, %8, %13\n" \
+          "sbcs %2, %7, %12\n" \
+          "sbcs %1, %6, %11\n" \
+          "sbc  %0, %5, %10\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B4)), \
+            "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C4)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#endif /* __aarch64__ */
+
+
+/* PowerPC64 addition/subtraction helpers.  */
+#if defined (__powerpc__) && defined(HAVE_CPU_ARCH_PPC) && __GNUC__ >= 4
+
+#define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("addc %2, %8, %5\n" \
+          "adde %1, %7, %4\n" \
+          "adde %0, %6, %3\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc", "r0")
+
+#define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("subfc %2, %8, %5\n" \
+          "subfe %1, %7, %4\n" \
+          "subfe %0, %6, %3\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc", "r0")
+
+#define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("addc %3, %11, %7\n" \
+          "adde %2, %10, %6\n" \
+          "adde %1, %9, %5\n" \
+          "adde %0, %8, %4\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("subfc %3, %11, %7\n" \
+          "subfe %2, %10, %6\n" \
+          "subfe %1, %9, %5\n" \
+          "subfe %0, %8, %4\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                           C4, C3, C2, C1, C0) \
+  __asm__ ("addc %4, %14, %9\n" \
+          "adde %3, %13, %8\n" \
+          "adde %2, %12, %7\n" \
+          "adde %1, %11, %6\n" \
+          "adde %0, %10, %5\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B4)), \
+            "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C4)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                           C4, C3, C2, C1, C0) \
+  __asm__ ("subfc %4, %14, %9\n" \
+          "subfe %3, %13, %8\n" \
+          "subfe %2, %12, %7\n" \
+          "subfe %1, %11, %6\n" \
+          "subfe %0, %10, %5\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B4)), \
+            "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C4)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#endif /* __powerpc__ */
+
+
+/* s390x/zSeries addition/subtraction helpers.  */
+#if defined (__s390x__) && defined(HAVE_CPU_ARCH_S390X) && __GNUC__ >= 4
+
+#define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("algr %2, %8\n" \
+          "alcgr %1, %7\n" \
+          "alcgr %0, %6\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B2)), \
+            "1" ((mpi_limb_t)(B1)), \
+            "2" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB3_LIMB64(A3, A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+  __asm__ ("slgr %2, %8\n" \
+          "slbgr %1, %7\n" \
+          "slbgr %0, %6\n" \
+          : "=r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B2)), \
+            "1" ((mpi_limb_t)(B1)), \
+            "2" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("algr %3, %11\n" \
+          "alcgr %2, %10\n" \
+          "alcgr %1, %9\n" \
+          "alcgr %0, %8\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B3)), \
+            "1" ((mpi_limb_t)(B2)), \
+            "2" ((mpi_limb_t)(B1)), \
+            "3" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("slgr %3, %11\n" \
+          "slbgr %2, %10\n" \
+          "slbgr %1, %9\n" \
+          "slbgr %0, %8\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B3)), \
+            "1" ((mpi_limb_t)(B2)), \
+            "2" ((mpi_limb_t)(B1)), \
+            "3" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) \
+  __asm__ ("algr %4, %14\n" \
+          "alcgr %3, %13\n" \
+          "alcgr %2, %12\n" \
+          "alcgr %1, %11\n" \
+          "alcgr %0, %10\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B4)), \
+            "1" ((mpi_limb_t)(B3)), \
+            "2" ((mpi_limb_t)(B2)), \
+            "3" ((mpi_limb_t)(B1)), \
+            "4" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C4)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) \
+  __asm__ ("slgr %4, %14\n" \
+          "slbgr %3, %13\n" \
+          "slbgr %2, %12\n" \
+          "slbgr %1, %11\n" \
+          "slbgr %0, %10\n" \
+          : "=r" (A4), \
+            "=&r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "0" ((mpi_limb_t)(B4)), \
+            "1" ((mpi_limb_t)(B3)), \
+            "2" ((mpi_limb_t)(B2)), \
+            "3" ((mpi_limb_t)(B1)), \
+            "4" ((mpi_limb_t)(B0)), \
+            "r" ((mpi_limb_t)(C4)), \
+            "r" ((mpi_limb_t)(C3)), \
+            "r" ((mpi_limb_t)(C2)), \
+            "r" ((mpi_limb_t)(C1)), \
+            "r" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#endif /* __s390x__ */
+
+
+/* Common 64-bit arch addition/subtraction macros.  */
+
+#define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) \
+  add_ssaaaa(A1, A0, B1, B0, C1, C0)
+
+#define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) \
+  sub_ddmmss(A1, A0, B1, B0, C1, C0)
+
+#endif /* BYTES_PER_MPI_LIMB == 8 */
+
+
+#if BYTES_PER_MPI_LIMB == 4
+
+/* 64-bit limb definitions for 32-bit architectures.  */
+
+#define LIMBS_PER_LIMB64 2
+#define LIMB_FROM64(v) ((v).lo)
+#define HIBIT_LIMB64(v) ((v).hi >> (BITS_PER_MPI_LIMB - 1))
+#define HI32_LIMB64(v) ((v).hi)
+#define LO32_LIMB64(v) ((v).lo)
+#define LOAD32(x, pos) ((x)[pos])
+#define LIMB64_C(hi, lo) { (lo), (hi) }
+
+typedef struct
+{
+  mpi_limb_t lo;
+  mpi_limb_t hi;
+} mpi_limb64_t;
+
+static inline mpi_limb64_t
+LOAD64(const mpi_ptr_t x, unsigned int pos)
+{
+  mpi_limb64_t v;
+  v.lo = x[pos * 2 + 0];
+  v.hi = x[pos * 2 + 1];
+  return v;
+}
+
+static inline void
+STORE64(mpi_ptr_t x, unsigned int pos, mpi_limb64_t v)
+{
+  x[pos * 2 + 0] = v.lo;
+  x[pos * 2 + 1] = v.hi;
+}
+
+static inline mpi_limb64_t
+MASK_AND64(mpi_limb_t mask, mpi_limb64_t val)
+{
+  val.lo &= mask;
+  val.hi &= mask;
+  return val;
+}
+
+static inline mpi_limb64_t
+LIMB_OR64(mpi_limb64_t val1, mpi_limb64_t val2)
+{
+  val1.lo |= val2.lo;
+  val1.hi |= val2.hi;
+  return val1;
+}
+
+static inline void
+STORE64_COND(mpi_ptr_t x, unsigned int pos, mpi_limb_t mask1,
+            mpi_limb64_t val1, mpi_limb_t mask2, mpi_limb64_t val2)
+{
+  x[pos * 2 + 0] = (mask1 & val1.lo) | (mask2 & val2.lo);
+  x[pos * 2 + 1] = (mask1 & val1.hi) | (mask2 & val2.hi);
+}
+
+static inline mpi_limb64_t
+LIMB_TO64(mpi_limb_t x)
+{
+  mpi_limb64_t v;
+  v.lo = x;
+  v.hi = 0;
+  return v;
+}
+
+static inline mpi_limb64_t
+LIMB64_HILO(mpi_limb_t hi, mpi_limb_t lo)
+{
+  mpi_limb64_t v;
+  v.lo = lo;
+  v.hi = hi;
+  return v;
+}
+
+
+/* i386 addition/subtraction helpers.  */
+#if defined (__i386__) && defined(HAVE_CPU_ARCH_X86) && __GNUC__ >= 4
+
+#define ADD4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) \
+  __asm__ ("addl %11, %3\n" \
+          "adcl %10, %2\n" \
+          "adcl %9, %1\n" \
+          "adcl %8, %0\n" \
+          : "=r" (a3), \
+            "=&r" (a2), \
+            "=&r" (a1), \
+            "=&r" (a0) \
+          : "0" ((mpi_limb_t)(b3)), \
+            "1" ((mpi_limb_t)(b2)), \
+            "2" ((mpi_limb_t)(b1)), \
+            "3" ((mpi_limb_t)(b0)), \
+            "g" ((mpi_limb_t)(c3)), \
+            "g" ((mpi_limb_t)(c2)), \
+            "g" ((mpi_limb_t)(c1)), \
+            "g" ((mpi_limb_t)(c0)) \
+          : "cc")
+
+#define ADD6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \
+                   c5, c4, c3, c2, c1, c0) do { \
+    mpi_limb_t __carry6_32; \
+    __asm__ ("addl %10, %3\n" \
+            "adcl %9, %2\n" \
+            "adcl %8, %1\n" \
+            "sbbl %0, %0\n" \
+            : "=r" (__carry6_32), \
+              "=&r" (a2), \
+              "=&r" (a1), \
+              "=&r" (a0) \
+            : "0" ((mpi_limb_t)(0)), \
+              "1" ((mpi_limb_t)(b2)), \
+              "2" ((mpi_limb_t)(b1)), \
+              "3" ((mpi_limb_t)(b0)), \
+              "g" ((mpi_limb_t)(c2)), \
+              "g" ((mpi_limb_t)(c1)), \
+              "g" ((mpi_limb_t)(c0)) \
+            : "cc"); \
+    __asm__ ("addl $1, %3\n" \
+            "adcl %10, %2\n" \
+            "adcl %9, %1\n" \
+            "adcl %8, %0\n" \
+            : "=r" (a5), \
+              "=&r" (a4), \
+              "=&r" (a3), \
+              "=&r" (__carry6_32) \
+            : "0" ((mpi_limb_t)(b5)), \
+              "1" ((mpi_limb_t)(b4)), \
+              "2" ((mpi_limb_t)(b3)), \
+              "3" ((mpi_limb_t)(__carry6_32)), \
+              "g" ((mpi_limb_t)(c5)), \
+              "g" ((mpi_limb_t)(c4)), \
+              "g" ((mpi_limb_t)(c3)) \
+          : "cc"); \
+  } while (0)
+
+#define SUB4_LIMB32(a3, a2, a1, a0, b3, b2, b1, b0, c3, c2, c1, c0) \
+  __asm__ ("subl %11, %3\n" \
+          "sbbl %10, %2\n" \
+          "sbbl %9, %1\n" \
+          "sbbl %8, %0\n" \
+          : "=r" (a3), \
+            "=&r" (a2), \
+            "=&r" (a1), \
+            "=&r" (a0) \
+          : "0" ((mpi_limb_t)(b3)), \
+            "1" ((mpi_limb_t)(b2)), \
+            "2" ((mpi_limb_t)(b1)), \
+            "3" ((mpi_limb_t)(b0)), \
+            "g" ((mpi_limb_t)(c3)), \
+            "g" ((mpi_limb_t)(c2)), \
+            "g" ((mpi_limb_t)(c1)), \
+            "g" ((mpi_limb_t)(c0)) \
+          : "cc")
+
+#define SUB6_LIMB32(a5, a4, a3, a2, a1, a0, b5, b4, b3, b2, b1, b0, \
+                   c5, c4, c3, c2, c1, c0) do { \
+    mpi_limb_t __borrow6_32; \
+    __asm__ ("subl %10, %3\n" \
+            "sbbl %9, %2\n" \
+            "sbbl %8, %1\n" \
+            "sbbl %0, %0\n" \
+            : "=r" (__borrow6_32), \
+              "=&r" (a2), \
+              "=&r" (a1), \
+              "=&r" (a0) \
+            : "0" ((mpi_limb_t)(0)), \
+              "1" ((mpi_limb_t)(b2)), \
+              "2" ((mpi_limb_t)(b1)), \
+              "3" ((mpi_limb_t)(b0)), \
+              "g" ((mpi_limb_t)(c2)), \
+              "g" ((mpi_limb_t)(c1)), \
+              "g" ((mpi_limb_t)(c0)) \
+            : "cc"); \
+    __asm__ ("addl $1, %3\n" \
+            "sbbl %10, %2\n" \
+            "sbbl %9, %1\n" \
+            "sbbl %8, %0\n" \
+            : "=r" (a5), \
+              "=&r" (a4), \
+              "=&r" (a3), \
+              "=&r" (__borrow6_32) \
+            : "0" ((mpi_limb_t)(b5)), \
+              "1" ((mpi_limb_t)(b4)), \
+              "2" ((mpi_limb_t)(b3)), \
+              "3" ((mpi_limb_t)(__borrow6_32)), \
+              "g" ((mpi_limb_t)(c5)), \
+              "g" ((mpi_limb_t)(c4)), \
+              "g" ((mpi_limb_t)(c3)) \
+          : "cc"); \
+  } while (0)
+
+#endif /* __i386__ */
+
+
+/* ARM addition/subtraction helpers.  */
+#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
+
+#define ADD4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("adds %3, %7, %11\n" \
+          "adcs %2, %6, %10\n" \
+          "adcs %1, %5, %9\n" \
+          "adc  %0, %4, %8\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "Ir" ((mpi_limb_t)(C3)), \
+            "Ir" ((mpi_limb_t)(C2)), \
+            "Ir" ((mpi_limb_t)(C1)), \
+            "Ir" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+#define ADD6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \
+                   C5, C4, C3, C2, C1, C0) do { \
+    mpi_limb_t __carry6_32; \
+    __asm__ ("adds %3, %7, %10\n" \
+            "adcs %2, %6, %9\n" \
+            "adcs %1, %5, %8\n" \
+            "adc  %0, %4, %4\n" \
+            : "=r" (__carry6_32), \
+              "=&r" (A2), \
+              "=&r" (A1), \
+              "=&r" (A0) \
+            : "r" ((mpi_limb_t)(0)), \
+              "r" ((mpi_limb_t)(B2)), \
+              "r" ((mpi_limb_t)(B1)), \
+              "r" ((mpi_limb_t)(B0)), \
+              "Ir" ((mpi_limb_t)(C2)), \
+              "Ir" ((mpi_limb_t)(C1)), \
+              "Ir" ((mpi_limb_t)(C0)) \
+            : "cc"); \
+    ADD4_LIMB32(A5, A4, A3, __carry6_32, B5, B4, B3, __carry6_32, \
+               C5, C4, C3, 0xffffffffU); \
+  } while (0)
+
+#define SUB4_LIMB32(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) \
+  __asm__ ("subs %3, %7, %11\n" \
+          "sbcs %2, %6, %10\n" \
+          "sbcs %1, %5, %9\n" \
+          "sbc  %0, %4, %8\n" \
+          : "=r" (A3), \
+            "=&r" (A2), \
+            "=&r" (A1), \
+            "=&r" (A0) \
+          : "r" ((mpi_limb_t)(B3)), \
+            "r" ((mpi_limb_t)(B2)), \
+            "r" ((mpi_limb_t)(B1)), \
+            "r" ((mpi_limb_t)(B0)), \
+            "Ir" ((mpi_limb_t)(C3)), \
+            "Ir" ((mpi_limb_t)(C2)), \
+            "Ir" ((mpi_limb_t)(C1)), \
+            "Ir" ((mpi_limb_t)(C0)) \
+          : "cc")
+
+
+#define SUB6_LIMB32(A5, A4, A3, A2, A1, A0, B5, B4, B3, B2, B1, B0, \
+                   C5, C4, C3, C2, C1, C0) do { \
+    mpi_limb_t __borrow6_32; \
+    __asm__ ("subs %3, %7, %10\n" \
+            "sbcs %2, %6, %9\n" \
+            "sbcs %1, %5, %8\n" \
+            "sbc  %0, %4, %4\n" \
+            : "=r" (__borrow6_32), \
+              "=&r" (A2), \
+              "=&r" (A1), \
+              "=&r" (A0) \
+            : "r" ((mpi_limb_t)(0)), \
+              "r" ((mpi_limb_t)(B2)), \
+              "r" ((mpi_limb_t)(B1)), \
+              "r" ((mpi_limb_t)(B0)), \
+              "Ir" ((mpi_limb_t)(C2)), \
+              "Ir" ((mpi_limb_t)(C1)), \
+              "Ir" ((mpi_limb_t)(C0)) \
+            : "cc"); \
+    SUB4_LIMB32(A5, A4, A3, __borrow6_32, B5, B4, B3, 0, \
+               C5, C4, C3, -__borrow6_32); \
+  } while (0)
+
+#endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */
+
+
+/* Common 32-bit arch addition/subtraction macros.  */
+
+#if defined(ADD4_LIMB32)
+/* A[0..1] = B[0..1] + C[0..1] */
+#define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) \
+       ADD4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \
+                   B1.hi, B1.lo, B0.hi, B0.lo, \
+                   C1.hi, C1.lo, C0.hi, C0.lo)
+#else
+/* A[0..1] = B[0..1] + C[0..1] */
+#define ADD2_LIMB64(A1, A0, B1, B0, C1, C0) do { \
+    mpi_limb_t __carry2_0, __carry2_1; \
+    add_ssaaaa(__carry2_0, A0.lo, 0, B0.lo, 0, C0.lo); \
+    add_ssaaaa(__carry2_1, A0.hi, 0, B0.hi, 0, C0.hi); \
+    add_ssaaaa(__carry2_1, A0.hi, __carry2_1, A0.hi, 0, __carry2_0); \
+    add_ssaaaa(A1.hi, A1.lo, B1.hi, B1.lo, C1.hi, C1.lo); \
+    add_ssaaaa(A1.hi, A1.lo, A1.hi, A1.lo, 0, __carry2_1); \
+  } while (0)
+#endif
+
+#if defined(ADD6_LIMB32)
+/* A[0..2] = B[0..2] + C[0..2] */
+#define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+       ADD6_LIMB32(A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \
+                   B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \
+                   C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo)
+#endif
+
+#if defined(ADD6_LIMB32)
+/* A[0..3] = B[0..3] + C[0..3] */
+#define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \
+    mpi_limb_t __carry4; \
+    ADD6_LIMB32(__carry4, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \
+               0, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \
+               0, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo); \
+    ADD4_LIMB32(A3.hi, A3.lo, A2.hi, __carry4, \
+               B3.hi, B3.lo, B2.hi, __carry4, \
+               C3.hi, C3.lo, C2.hi, 0xffffffffU); \
+  } while (0)
+#endif
+
+#if defined(SUB4_LIMB32)
+/* A[0..1] = B[0..1] - C[0..1] */
+#define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) \
+       SUB4_LIMB32(A1.hi, A1.lo, A0.hi, A0.lo, \
+                   B1.hi, B1.lo, B0.hi, B0.lo, \
+                   C1.hi, C1.lo, C0.hi, C0.lo)
+#else
+/* A[0..1] = B[0..1] - C[0..1] */
+#define SUB2_LIMB64(A1, A0, B1, B0, C1, C0) do { \
+    mpi_limb_t __borrow2_0, __borrow2_1; \
+    sub_ddmmss(__borrow2_0, A0.lo, 0, B0.lo, 0, C0.lo); \
+    sub_ddmmss(__borrow2_1, A0.hi, 0, B0.hi, 0, C0.hi); \
+    sub_ddmmss(__borrow2_1, A0.hi, __borrow2_1, A0.hi, 0, -__borrow2_0); \
+    sub_ddmmss(A1.hi, A1.lo, B1.hi, B1.lo, C1.hi, C1.lo); \
+    sub_ddmmss(A1.hi, A1.lo, A1.hi, A1.lo, 0, -__borrow2_1); \
+  } while (0)
+#endif
+
+#if defined(SUB6_LIMB32)
+/* A[0..2] = B[0..2] - C[0..2] */
+#define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) \
+       SUB6_LIMB32(A2.hi, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \
+                   B2.hi, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \
+                   C2.hi, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo)
+#endif
+
+#if defined(SUB6_LIMB32)
+/* A[0..3] = B[0..3] - C[0..3] */
+#define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \
+    mpi_limb_t __borrow4; \
+    SUB6_LIMB32(__borrow4, A2.lo, A1.hi, A1.lo, A0.hi, A0.lo, \
+               0, B2.lo, B1.hi, B1.lo, B0.hi, B0.lo, \
+               0, C2.lo, C1.hi, C1.lo, C0.hi, C0.lo); \
+    SUB4_LIMB32(A3.hi, A3.lo, A2.hi, __borrow4, \
+               B3.hi, B3.lo, B2.hi, 0, \
+               C3.hi, C3.lo, C2.hi, -__borrow4); \
+  } while (0)
+#endif
+
+#endif /* BYTES_PER_MPI_LIMB == 4 */
+
+
+/* Common definitions.  */
+#define BITS_PER_MPI_LIMB64 (BITS_PER_MPI_LIMB * LIMBS_PER_LIMB64)
+#define BYTES_PER_MPI_LIMB64 (BYTES_PER_MPI_LIMB * LIMBS_PER_LIMB64)
+
+
+/* Common addition/subtraction macros.  */
+
+#ifndef ADD3_LIMB64
+/* A[0..2] = B[0..2] + C[0..2] */
+#define ADD3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) do { \
+    mpi_limb64_t __carry3; \
+    ADD2_LIMB64(__carry3, A0, zero, B0, zero, C0); \
+    ADD2_LIMB64(A2, A1, B2, B1, C2, C1); \
+    ADD2_LIMB64(A2, A1, A2, A1, zero, __carry3); \
+  } while (0)
+#endif
+
+#ifndef ADD4_LIMB64
+/* A[0..3] = B[0..3] + C[0..3] */
+#define ADD4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \
+    mpi_limb64_t __carry4; \
+    ADD3_LIMB64(__carry4, A1, A0, zero, B1, B0, zero, C1, C0); \
+    ADD2_LIMB64(A3, A2, B3, B2, C3, C2); \
+    ADD2_LIMB64(A3, A2, A3, A2, zero, __carry4); \
+  } while (0)
+#endif
+
+#ifndef ADD5_LIMB64
+/* A[0..4] = B[0..4] + C[0..4] */
+#define ADD5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) do { \
+    mpi_limb64_t __carry5; \
+    ADD4_LIMB64(__carry5, A2, A1, A0, zero, B2, B1, B0, zero, C2, C1, C0); \
+    ADD2_LIMB64(A4, A3, B4, B3, C4, C3); \
+    ADD2_LIMB64(A4, A3, A4, A3, zero, __carry5); \
+  } while (0)
+#endif
+
+#ifndef ADD7_LIMB64
+/* A[0..6] = B[0..6] + C[0..6] */
+#define ADD7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \
+                    C6, C5, C4, C3, C2, C1, C0) do { \
+    mpi_limb64_t __carry7; \
+    ADD4_LIMB64(__carry7, A2, A1, A0, zero, B2, B1, B0, \
+               zero, C2, C1, C0); \
+    ADD5_LIMB64(A6, A5, A4, A3, __carry7, B6, B5, B4, B3, \
+               __carry7, C6, C5, C4, C3, LIMB64_HILO(-1, -1)); \
+  } while (0)
+#endif
+
+#ifndef SUB3_LIMB64
+/* A[0..2] = B[0..2] - C[0..2] */
+#define SUB3_LIMB64(A2, A1, A0, B2, B1, B0, C2, C1, C0) do { \
+    mpi_limb64_t __borrow3; \
+    SUB2_LIMB64(__borrow3, A0, zero, B0, zero, C0); \
+    SUB2_LIMB64(A2, A1, B2, B1, C2, C1); \
+    SUB2_LIMB64(A2, A1, A2, A1, zero, LIMB_TO64(-LIMB_FROM64(__borrow3))); \
+  } while (0)
+#endif
+
+#ifndef SUB4_LIMB64
+/* A[0..3] = B[0..3] - C[0..3] */
+#define SUB4_LIMB64(A3, A2, A1, A0, B3, B2, B1, B0, C3, C2, C1, C0) do { \
+    mpi_limb64_t __borrow4; \
+    SUB3_LIMB64(__borrow4, A1, A0, zero, B1, B0, zero, C1, C0); \
+    SUB2_LIMB64(A3, A2, B3, B2, C3, C2); \
+    SUB2_LIMB64(A3, A2, A3, A2, zero, LIMB_TO64(-LIMB_FROM64(__borrow4))); \
+  } while (0)
+#endif
+
+#ifndef SUB5_LIMB64
+/* A[0..4] = B[0..4] - C[0..4] */
+#define SUB5_LIMB64(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0, \
+                    C4, C3, C2, C1, C0) do { \
+    mpi_limb64_t __borrow5; \
+    SUB4_LIMB64(__borrow5, A2, A1, A0, zero, B2, B1, B0, zero, C2, C1, C0); \
+    SUB2_LIMB64(A4, A3, B4, B3, C4, C3); \
+    SUB2_LIMB64(A4, A3, A4, A3, zero, LIMB_TO64(-LIMB_FROM64(__borrow5))); \
+  } while (0)
+#endif
+
+#ifndef SUB7_LIMB64
+/* A[0..6] = B[0..6] - C[0..6] */
+#define SUB7_LIMB64(A6, A5, A4, A3, A2, A1, A0, B6, B5, B4, B3, B2, B1, B0, \
+                    C6, C5, C4, C3, C2, C1, C0) do { \
+    mpi_limb64_t __borrow7; \
+    SUB4_LIMB64(__borrow7, A2, A1, A0, zero, B2, B1, B0, \
+               zero, C2, C1, C0); \
+    SUB5_LIMB64(A6, A5, A4, A3, __borrow7, B6, B5, B4, B3, zero, \
+               C6, C5, C4, C3, LIMB_TO64(-LIMB_FROM64(__borrow7))); \
+  } while (0)
+#endif
+
+
+#if defined(WORDS_BIGENDIAN) || (BITS_PER_MPI_LIMB64 != BITS_PER_MPI_LIMB)
+#define LOAD64_UNALIGNED(x, pos) \
+  LIMB64_HILO(LOAD32(x, 2 * (pos) + 2), LOAD32(x, 2 * (pos) + 1))
+#else
+#define LOAD64_UNALIGNED(x, pos) \
+  buf_get_le64((const byte *)(&(x)[pos]) + 4)
+#endif
+
+
+/* Helper functions.  */
+
+static inline int
+mpi_nbits_more_than (gcry_mpi_t w, unsigned int nbits)
+{
+  unsigned int nbits_nlimbs;
+  mpi_limb_t wlimb;
+  unsigned int n;
+
+  nbits_nlimbs = (nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB;
+
+  /* Note: Assumes that 'w' is normalized. */
+
+  if (w->nlimbs > nbits_nlimbs)
+    return 1;
+  if (w->nlimbs < nbits_nlimbs)
+    return 0;
+  if ((nbits % BITS_PER_MPI_LIMB) == 0)
+    return 0;
+
+  wlimb = w->d[nbits_nlimbs - 1];
+  if (wlimb == 0)
+    log_bug ("mpi_nbits_more_than: input mpi not normalized\n");
+
+  count_leading_zeros (n, wlimb);
+
+  return (BITS_PER_MPI_LIMB - n) > (nbits % BITS_PER_MPI_LIMB);
+}
+
+#endif /* GCRY_EC_INLINE_H */
diff --git a/grub-core/lib/libgcrypt/mpi/ec-internal.h 
b/grub-core/lib/libgcrypt/mpi/ec-internal.h
new file mode 100644
index 000000000..3f948aa00
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/ec-internal.h
@@ -0,0 +1,49 @@
+/* ec-internal.h - Internal declarations of ec*.c
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_EC_INTERNAL_H
+#define GCRY_EC_INTERNAL_H
+
+#include <config.h>
+
+void _gcry_mpi_ec_ed25519_mod (gcry_mpi_t a);
+
+#ifndef ASM_DISABLED
+void _gcry_mpi_ec_nist192_mod (gcry_mpi_t w, mpi_ec_t ctx);
+void _gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx);
+void _gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx);
+void _gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx);
+void _gcry_mpi_ec_nist521_mod (gcry_mpi_t w, mpi_ec_t ctx);
+#else
+# define _gcry_mpi_ec_nist192_mod NULL
+# define _gcry_mpi_ec_nist224_mod NULL
+# define _gcry_mpi_ec_nist256_mod NULL
+# define _gcry_mpi_ec_nist384_mod NULL
+# define _gcry_mpi_ec_nist521_mod NULL
+#endif
+
+#ifdef HAVE_GCC_INLINE_ASM_S390X
+int _gcry_s390x_ec_hw_mul_point (mpi_point_t result, gcry_mpi_t scalar,
+                                mpi_point_t point, mpi_ec_t ctx);
+# define mpi_ec_hw_mul_point _gcry_s390x_ec_hw_mul_point
+#else
+# define mpi_ec_hw_mul_point(r,s,p,c) (-1)
+#endif
+
+#endif /*GCRY_EC_INTERNAL_H*/
diff --git a/grub-core/lib/libgcrypt/mpi/ec-nist.c 
b/grub-core/lib/libgcrypt/mpi/ec-nist.c
new file mode 100644
index 000000000..f792405c7
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/ec-nist.c
@@ -0,0 +1,817 @@
+/* ec-nist.c -  NIST optimized elliptic curve functions
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+
+
+#ifndef ASM_DISABLED
+
+
+#include "mpi-internal.h"
+#include "longlong.h"
+#include "g10lib.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ec-inline.h"
+#include "const-time.h"
+
+
+static inline
+void prefetch(const void *tab, size_t len)
+{
+  const volatile byte *vtab = tab;
+
+  if (len > 0 * 64)
+    (void)vtab[0 * 64];
+  if (len > 1 * 64)
+    (void)vtab[1 * 64];
+  if (len > 2 * 64)
+    (void)vtab[2 * 64];
+  if (len > 3 * 64)
+    (void)vtab[3 * 64];
+  if (len > 4 * 64)
+    (void)vtab[4 * 64];
+  if (len > 5 * 64)
+    (void)vtab[5 * 64];
+  if (len > 6 * 64)
+    (void)vtab[6 * 64];
+  if (len > 7 * 64)
+    (void)vtab[7 * 64];
+  if (len > 8 * 64)
+    (void)vtab[8 * 64];
+  if (len > 9 * 64)
+    (void)vtab[9 * 64];
+  if (len > 10 * 64)
+    (void)vtab[10 * 64];
+  (void)vtab[len - 1];
+}
+
+
+/* Fast reduction routines for NIST curves.  */
+
+void
+_gcry_mpi_ec_nist192_mod (gcry_mpi_t w, mpi_ec_t ctx)
+{
+  static const mpi_limb64_t p_mult[3][4] =
+  {
+    { /* P * 1 */
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xfffffffeU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000000U)
+    },
+    { /* P * 2 */
+      LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xfffffffdU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000001U)
+    },
+    { /* P * 3 */
+      LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xfffffffcU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0x00000002U)
+    }
+  };
+  const mpi_limb64_t zero = LIMB_TO64(0);
+  mpi_ptr_t wp;
+  mpi_size_t wsize = 192 / BITS_PER_MPI_LIMB64;
+  mpi_limb64_t s[wsize + 1];
+  mpi_limb64_t o[wsize + 1];
+  mpi_limb_t mask1;
+  mpi_limb_t mask2;
+  mpi_limb_t s_is_negative;
+  int carry;
+
+  MPN_NORMALIZE (w->d, w->nlimbs);
+  if (mpi_nbits_more_than (w, 2 * 192))
+    log_bug ("W must be less than m^2\n");
+
+  RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
+  RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
+
+  wp = w->d;
+
+  prefetch (p_mult, sizeof(p_mult));
+
+  /* See "FIPS 186-4, D.2.1 Curve P-192". */
+
+  s[0] = LOAD64(wp, 3);
+  ADD3_LIMB64 (s[3],  s[2],          s[1],
+              zero,  zero,          LOAD64(wp, 3),
+              zero,  LOAD64(wp, 4), LOAD64(wp, 4));
+
+  ADD4_LIMB64 (s[3],  s[2],          s[1],          s[0],
+              s[3],  s[2],          s[1],          s[0],
+              zero,  LOAD64(wp, 5), LOAD64(wp, 5), LOAD64(wp, 5));
+
+  ADD4_LIMB64 (s[3],  s[2],          s[1],          s[0],
+              s[3],  s[2],          s[1],          s[0],
+              zero,  LOAD64(wp, 2), LOAD64(wp, 1), LOAD64(wp, 0));
+
+  /* mod p:
+   *  's[3]' holds carry value (0..2). Subtract (carry + 1) * p. Result will be
+   *  with in range -p...p. Handle result being negative with addition and
+   *  conditional store. */
+
+  carry = LO32_LIMB64(s[3]);
+
+  SUB4_LIMB64 (s[3], s[2], s[1], s[0],
+              s[3], s[2], s[1], s[0],
+              p_mult[carry][3], p_mult[carry][2],
+              p_mult[carry][1], p_mult[carry][0]);
+
+  ADD4_LIMB64 (o[3], o[2], o[1], o[0],
+              s[3], s[2], s[1], s[0],
+              zero,
+              p_mult[0][2], p_mult[0][1], p_mult[0][0]);
+
+  s_is_negative = LO32_LIMB64(s[3]) >> 31;
+
+  mask2 = ct_limb_gen_mask(s_is_negative);
+  mask1 = ct_limb_gen_inv_mask(s_is_negative);
+
+  STORE64_COND(wp, 0, mask2, o[0], mask1, s[0]);
+  STORE64_COND(wp, 1, mask2, o[1], mask1, s[1]);
+  STORE64_COND(wp, 2, mask2, o[2], mask1, s[2]);
+
+  w->nlimbs = 192 / BITS_PER_MPI_LIMB;
+  MPN_NORMALIZE (wp, w->nlimbs);
+}
+
+void
+_gcry_mpi_ec_nist224_mod (gcry_mpi_t w, mpi_ec_t ctx)
+{
+  static const mpi_limb64_t p_mult[5][4] =
+  {
+    { /* P * -1 */
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0x00000000U)
+    },
+    { /* P * 0 */
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U)
+    },
+    { /* P * 1 */
+      LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0xffffffffU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU)
+    },
+    { /* P * 2 */
+      LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0xfffffffeU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000001U, 0xffffffffU)
+    },
+    { /* P * 3 */
+      LIMB64_C(0x00000000U, 0x00000003U), LIMB64_C(0xfffffffdU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000002U, 0xffffffffU)
+    }
+  };
+  const mpi_limb64_t zero = LIMB_TO64(0);
+  mpi_ptr_t wp;
+  mpi_size_t wsize = (224 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64;
+  mpi_size_t psize = ctx->p->nlimbs;
+  mpi_limb64_t s[wsize];
+  mpi_limb64_t d[wsize];
+  mpi_limb_t mask1;
+  mpi_limb_t mask2;
+  mpi_limb_t s_is_negative;
+  int carry;
+
+  MPN_NORMALIZE (w->d, w->nlimbs);
+  if (mpi_nbits_more_than (w, 2 * 224))
+    log_bug ("W must be less than m^2\n");
+
+  RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
+  RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
+  ctx->p->nlimbs = psize;
+
+  wp = w->d;
+
+  prefetch (p_mult, sizeof(p_mult));
+
+  /* See "FIPS 186-4, D.2.2 Curve P-224". */
+
+  /* "S1 + S2" with 64-bit limbs:
+   *     [0:A10]:[ A9: A8]:[ A7:0]:[0:0]
+   *  +    [0:0]:[A13:A12]:[A11:0]:[0:0]
+   *  => s[3]:s[2]:s[1]:s[0]
+   */
+  s[0] = zero;
+  ADD3_LIMB64 (s[3], s[2], s[1],
+              LIMB64_HILO(0, LOAD32(wp, 10)),
+              LOAD64(wp, 8 / 2),
+              LIMB64_HILO(LOAD32(wp, 7), 0),
+              zero,
+              LOAD64(wp, 12 / 2),
+              LIMB64_HILO(LOAD32(wp, 11), 0));
+
+  /* "T + S1 + S2" */
+  ADD4_LIMB64 (s[3], s[2], s[1], s[0],
+              s[3], s[2], s[1], s[0],
+              LIMB64_HILO(0, LOAD32(wp, 6)),
+              LOAD64(wp, 4 / 2),
+              LOAD64(wp, 2 / 2),
+              LOAD64(wp, 0 / 2));
+
+  /* "D1 + D2" with 64-bit limbs:
+   *     [0:A13]:[A12:A11]:[A10: A9]:[ A8: A7]
+   *  +    [0:0]:[  0:  0]:[  0:A13]:[A12:A11]
+   *  => d[3]:d[2]:d[1]:d[0]
+   */
+  ADD4_LIMB64 (d[3], d[2], d[1], d[0],
+              LIMB64_HILO(0, LOAD32(wp, 13)),
+              LOAD64_UNALIGNED(wp, 11 / 2),
+              LOAD64_UNALIGNED(wp, 9 / 2),
+              LOAD64_UNALIGNED(wp, 7 / 2),
+              zero,
+              zero,
+              LIMB64_HILO(0, LOAD32(wp, 13)),
+              LOAD64_UNALIGNED(wp, 11 / 2));
+
+  /* "T + S1 + S2 - D1 - D2" */
+  SUB4_LIMB64 (s[3], s[2], s[1], s[0],
+              s[3], s[2], s[1], s[0],
+              d[3], d[2], d[1], d[0]);
+
+  /* mod p:
+   *  Upper 32-bits of 's[3]' holds carry value (-2..2).
+   *  Subtract (carry + 1) * p. Result will be with in range -p...p.
+   *  Handle result being negative with addition and conditional store. */
+
+  carry = HI32_LIMB64(s[3]);
+
+  SUB4_LIMB64 (s[3], s[2], s[1], s[0],
+              s[3], s[2], s[1], s[0],
+              p_mult[carry + 2][3], p_mult[carry + 2][2],
+              p_mult[carry + 2][1], p_mult[carry + 2][0]);
+
+  ADD4_LIMB64 (d[3], d[2], d[1], d[0],
+              s[3], s[2], s[1], s[0],
+              p_mult[0 + 2][3], p_mult[0 + 2][2],
+              p_mult[0 + 2][1], p_mult[0 + 2][0]);
+
+  s_is_negative = (HI32_LIMB64(s[3]) >> 31);
+
+  mask2 = ct_limb_gen_mask(s_is_negative);
+  mask1 = ct_limb_gen_inv_mask(s_is_negative);
+
+  STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]);
+  STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]);
+  STORE64_COND(wp, 2, mask2, d[2], mask1, s[2]);
+  STORE64_COND(wp, 3, mask2, d[3], mask1, s[3]);
+
+  w->nlimbs = wsize * LIMBS_PER_LIMB64;
+  MPN_NORMALIZE (wp, w->nlimbs);
+}
+
+void
+_gcry_mpi_ec_nist256_mod (gcry_mpi_t w, mpi_ec_t ctx)
+{
+  static const mpi_limb64_t p_mult[12][5] =
+  {
+    { /* P * -3 */
+      LIMB64_C(0x00000000U, 0x00000003U), LIMB64_C(0xfffffffdU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000002U, 0xfffffffcU),
+      LIMB64_C(0xffffffffU, 0xfffffffdU)
+    },
+    { /* P * -2 */
+      LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0xfffffffeU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000001U, 0xfffffffdU),
+      LIMB64_C(0xffffffffU, 0xfffffffeU)
+    },
+    { /* P * -1 */
+      LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0xffffffffU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xfffffffeU),
+      LIMB64_C(0xffffffffU, 0xffffffffU)
+    },
+    { /* P * 0 */
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U)
+    },
+    { /* P * 1 */
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0x00000000U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xffffffffU, 0x00000001U),
+      LIMB64_C(0x00000000U, 0x00000000U)
+    },
+    { /* P * 2 */
+      LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0x00000001U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffeU, 0x00000002U),
+      LIMB64_C(0x00000000U, 0x00000001U)
+    },
+    { /* P * 3 */
+      LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0x00000002U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffdU, 0x00000003U),
+      LIMB64_C(0x00000000U, 0x00000002U)
+    },
+    { /* P * 4 */
+      LIMB64_C(0xffffffffU, 0xfffffffcU), LIMB64_C(0x00000003U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffcU, 0x00000004U),
+      LIMB64_C(0x00000000U, 0x00000003U)
+    },
+    { /* P * 5 */
+      LIMB64_C(0xffffffffU, 0xfffffffbU), LIMB64_C(0x00000004U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffbU, 0x00000005U),
+      LIMB64_C(0x00000000U, 0x00000004U)
+    },
+    { /* P * 6 */
+      LIMB64_C(0xffffffffU, 0xfffffffaU), LIMB64_C(0x00000005U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffffaU, 0x00000006U),
+      LIMB64_C(0x00000000U, 0x00000005U)
+    },
+    { /* P * 7 */
+      LIMB64_C(0xffffffffU, 0xfffffff9U), LIMB64_C(0x00000006U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0xfffffff9U, 0x00000007U),
+      LIMB64_C(0x00000000U, 0x00000006U)
+    }
+  };
+  const mpi_limb64_t zero = LIMB_TO64(0);
+  mpi_ptr_t wp;
+  mpi_size_t wsize = (256 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64;
+  mpi_size_t psize = ctx->p->nlimbs;
+  mpi_limb64_t s[wsize + 1];
+  mpi_limb64_t t[wsize + 1];
+  mpi_limb64_t d[wsize + 1];
+  mpi_limb64_t e[wsize + 1];
+  mpi_limb_t mask1;
+  mpi_limb_t mask2;
+  mpi_limb_t mask3;
+  mpi_limb_t s_is_negative;
+  mpi_limb_t d_is_negative;
+  int carry;
+
+  MPN_NORMALIZE (w->d, w->nlimbs);
+  if (mpi_nbits_more_than (w, 2 * 256))
+    log_bug ("W must be less than m^2\n");
+
+  RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
+  RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
+  ctx->p->nlimbs = psize;
+
+  wp = w->d;
+
+  prefetch (p_mult, sizeof(p_mult));
+
+  /* See "FIPS 186-4, D.2.3 Curve P-256". */
+
+  /* "S1 + S2" with 64-bit limbs:
+   *     [A15:A14]:[A13:A12]:[A11:0]:[0:0]
+   *  +    [0:A15]:[A14:A13]:[A12:0]:[0:0]
+   *  => s[4]:s[3]:s[2]:s[1]:s[0]
+   */
+  s[0] = zero;
+  ADD4_LIMB64 (s[4], s[3], s[2], s[1],
+              zero,
+              LOAD64(wp, 14 / 2),
+              LOAD64(wp, 12 / 2),
+              LIMB64_HILO(LOAD32(wp, 11), 0),
+              zero,
+              LIMB64_HILO(0, LOAD32(wp, 15)),
+              LOAD64_UNALIGNED(wp, 13 / 2),
+              LIMB64_HILO(LOAD32(wp, 12), 0));
+
+  /* "S3 + S4" with 64-bit limbs:
+   *     [A15:A14]:[  0:  0]:[  0:A10]:[ A9:A8]
+   *  +   [A8:A13]:[A15:A14]:[A13:A11]:[A10:A9]
+   *  => t[4]:t[3]:t[2]:t[1]:t[0]
+   */
+  ADD5_LIMB64 (t[4], t[3], t[2], t[1], t[0],
+              zero,
+              LOAD64(wp, 14 / 2),
+              zero,
+              LIMB64_HILO(0, LOAD32(wp, 10)),
+              LOAD64(wp, 8 / 2),
+              zero,
+              LIMB64_HILO(LOAD32(wp, 8), LOAD32(wp, 13)),
+              LOAD64(wp, 14 / 2),
+              LIMB64_HILO(LOAD32(wp, 13), LOAD32(wp, 11)),
+              LOAD64_UNALIGNED(wp, 9 / 2));
+
+  /* "2*S1 + 2*S2" */
+  ADD5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
+               s[4], s[3], s[2], s[1], s[0],
+               s[4], s[3], s[2], s[1], s[0]);
+
+  /* "T + S3 + S4" */
+  ADD5_LIMB64 (t[4], t[3], t[2], t[1], t[0],
+              t[4], t[3], t[2], t[1], t[0],
+              zero,
+              LOAD64(wp, 6 / 2),
+              LOAD64(wp, 4 / 2),
+              LOAD64(wp, 2 / 2),
+              LOAD64(wp, 0 / 2));
+
+  /* "2*S1 + 2*S2 - D3" with 64-bit limbs:
+   *    s[4]:    s[3]:    s[2]:    s[1]:     s[0]
+   *  -       [A12:0]:[A10:A9]:[A8:A15]:[A14:A13]
+   *  => s[4]:s[3]:s[2]:s[1]:s[0]
+   */
+  SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
+               s[4], s[3], s[2], s[1], s[0],
+              zero,
+              LIMB64_HILO(LOAD32(wp, 12), 0),
+              LOAD64_UNALIGNED(wp, 9 / 2),
+              LIMB64_HILO(LOAD32(wp, 8), LOAD32(wp, 15)),
+              LOAD64_UNALIGNED(wp, 13 / 2));
+
+  /* "T + 2*S1 + 2*S2 + S3 + S4 - D3" */
+  ADD5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
+               s[4], s[3], s[2], s[1], s[0],
+               t[4], t[3], t[2], t[1], t[0]);
+
+  /* "D1 + D2" with 64-bit limbs:
+   *     [0:A13]:[A12:A11] + [A15:A14]:[A13:A12] => d[2]:d[1]:d[0]
+   *     [A10:A8] + [A11:A9] => d[4]:d[3]
+   */
+  ADD3_LIMB64 (d[2], d[1], d[0],
+              zero,
+              LIMB64_HILO(0, LOAD32(wp, 13)),
+              LOAD64_UNALIGNED(wp, 11 / 2),
+              zero,
+              LOAD64(wp, 14 / 2),
+              LOAD64(wp, 12 / 2));
+  ADD2_LIMB64 (d[4], d[3],
+              zero, LIMB64_HILO(LOAD32(wp, 10), LOAD32(wp, 8)),
+              zero, LIMB64_HILO(LOAD32(wp, 11), LOAD32(wp, 9)));
+
+  /* "D1 + D2 + D4" with 64-bit limbs:
+   *    d[4]:    d[3]:     d[2]:  d[1]:     d[0]
+   *  -       [A13:0]:[A11:A10]:[A9:0]:[A15:A14]
+   *  => d[4]:d[3]:d[2]:d[1]:d[0]
+   */
+  ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0],
+               d[4], d[3], d[2], d[1], d[0],
+              zero,
+              LIMB64_HILO(LOAD32(wp, 13), 0),
+              LOAD64(wp, 10 / 2),
+              LIMB64_HILO(LOAD32(wp, 9), 0),
+              LOAD64(wp, 14 / 2));
+
+  /* "T + 2*S1 + 2*S2 + S3 + S4 - D1 - D2 - D3 - D4" */
+  SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
+               s[4], s[3], s[2], s[1], s[0],
+               d[4], d[3], d[2], d[1], d[0]);
+
+  /* mod p:
+   *  's[4]' holds carry value (-4..6). Subtract (carry + 1) * p. Result
+   *  will be with in range -2*p...p. Handle result being negative with
+   *  addition and conditional store. */
+
+  carry = LO32_LIMB64(s[4]);
+
+  SUB5_LIMB64 (s[4], s[3], s[2], s[1], s[0],
+              s[4], s[3], s[2], s[1], s[0],
+              p_mult[carry + 4][4], p_mult[carry + 4][3],
+              p_mult[carry + 4][2], p_mult[carry + 4][1],
+              p_mult[carry + 4][0]);
+
+  /* Add 1*P */
+  ADD5_LIMB64 (d[4], d[3], d[2], d[1], d[0],
+              s[4], s[3], s[2], s[1], s[0],
+              zero,
+              p_mult[0 + 4][3], p_mult[0 + 4][2],
+              p_mult[0 + 4][1], p_mult[0 + 4][0]);
+
+  /* Add 2*P */
+  ADD5_LIMB64 (e[4], e[3], e[2], e[1], e[0],
+              s[4], s[3], s[2], s[1], s[0],
+              zero,
+              p_mult[1 + 4][3], p_mult[1 + 4][2],
+              p_mult[1 + 4][1], p_mult[1 + 4][0]);
+
+  s_is_negative = LO32_LIMB64(s[4]) >> 31;
+  d_is_negative = LO32_LIMB64(d[4]) >> 31;
+  mask3 = ct_limb_gen_mask(d_is_negative);
+  mask2 = ct_limb_gen_mask(s_is_negative) & ~mask3;
+  mask1 = ct_limb_gen_inv_mask(s_is_negative) & ~mask3;
+
+  s[0] = LIMB_OR64(MASK_AND64(mask2, d[0]), MASK_AND64(mask1, s[0]));
+  s[1] = LIMB_OR64(MASK_AND64(mask2, d[1]), MASK_AND64(mask1, s[1]));
+  s[2] = LIMB_OR64(MASK_AND64(mask2, d[2]), MASK_AND64(mask1, s[2]));
+  s[3] = LIMB_OR64(MASK_AND64(mask2, d[3]), MASK_AND64(mask1, s[3]));
+  s[0] = LIMB_OR64(MASK_AND64(mask3, e[0]), s[0]);
+  s[1] = LIMB_OR64(MASK_AND64(mask3, e[1]), s[1]);
+  s[2] = LIMB_OR64(MASK_AND64(mask3, e[2]), s[2]);
+  s[3] = LIMB_OR64(MASK_AND64(mask3, e[3]), s[3]);
+
+  STORE64(wp, 0, s[0]);
+  STORE64(wp, 1, s[1]);
+  STORE64(wp, 2, s[2]);
+  STORE64(wp, 3, s[3]);
+
+  w->nlimbs = wsize * LIMBS_PER_LIMB64;
+  MPN_NORMALIZE (wp, w->nlimbs);
+}
+
+void
+_gcry_mpi_ec_nist384_mod (gcry_mpi_t w, mpi_ec_t ctx)
+{
+  static const mpi_limb64_t p_mult[11][7] =
+  {
+    { /* P * -2 */
+      LIMB64_C(0xfffffffeU, 0x00000002U), LIMB64_C(0x00000001U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000002U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffffeU)
+    },
+    { /* P * -1 */
+      LIMB64_C(0xffffffffU, 0x00000001U), LIMB64_C(0x00000000U, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000001U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xffffffffU)
+    },
+    { /* P * 0 */
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U), LIMB64_C(0x00000000U, 0x00000000U),
+      LIMB64_C(0x00000000U, 0x00000000U)
+    },
+    { /* P * 1 */
+      LIMB64_C(0x00000000U, 0xffffffffU), LIMB64_C(0xffffffffU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffffeU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000000U)
+    },
+    { /* P * 2 */
+      LIMB64_C(0x00000001U, 0xfffffffeU), LIMB64_C(0xfffffffeU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffffdU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000001U)
+    },
+    { /* P * 3 */
+      LIMB64_C(0x00000002U, 0xfffffffdU), LIMB64_C(0xfffffffdU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffffcU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000002U)
+    },
+    { /* P * 4 */
+      LIMB64_C(0x00000003U, 0xfffffffcU), LIMB64_C(0xfffffffcU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffffbU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000003U)
+    },
+    { /* P * 5 */
+      LIMB64_C(0x00000004U, 0xfffffffbU), LIMB64_C(0xfffffffbU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffffaU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000004U)
+    },
+    { /* P * 6 */
+      LIMB64_C(0x00000005U, 0xfffffffaU), LIMB64_C(0xfffffffaU, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffff9U), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000005U)
+    },
+    { /* P * 7 */
+      LIMB64_C(0x00000006U, 0xfffffff9U), LIMB64_C(0xfffffff9U, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffff8U), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000006U)
+    },
+    { /* P * 8 */
+      LIMB64_C(0x00000007U, 0xfffffff8U), LIMB64_C(0xfffffff8U, 0x00000000U),
+      LIMB64_C(0xffffffffU, 0xfffffff7U), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0xffffffffU, 0xffffffffU), LIMB64_C(0xffffffffU, 0xffffffffU),
+      LIMB64_C(0x00000000U, 0x00000007U)
+    },
+  };
+  const mpi_limb64_t zero = LIMB_TO64(0);
+  mpi_ptr_t wp;
+  mpi_size_t wsize = (384 + BITS_PER_MPI_LIMB64 - 1) / BITS_PER_MPI_LIMB64;
+  mpi_size_t psize = ctx->p->nlimbs;
+#if (BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB) && defined(WORDS_BIGENDIAN)
+  mpi_limb_t wp_shr32[wsize * LIMBS_PER_LIMB64];
+#endif
+  mpi_limb64_t s[wsize + 1];
+  mpi_limb64_t t[wsize + 1];
+  mpi_limb64_t d[wsize + 1];
+  mpi_limb64_t x[wsize + 1];
+  mpi_limb_t mask1;
+  mpi_limb_t mask2;
+  mpi_limb_t s_is_negative;
+  int carry;
+
+  MPN_NORMALIZE (w->d, w->nlimbs);
+  if (mpi_nbits_more_than (w, 2 * 384))
+    log_bug ("W must be less than m^2\n");
+
+  RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2 * LIMBS_PER_LIMB64);
+  RESIZE_AND_CLEAR_IF_NEEDED (ctx->p, wsize * LIMBS_PER_LIMB64);
+  ctx->p->nlimbs = psize;
+
+  wp = w->d;
+
+  prefetch (p_mult, sizeof(p_mult));
+
+  /* See "FIPS 186-4, D.2.4 Curve P-384". */
+
+#if BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB
+# ifdef WORDS_BIGENDIAN
+#  define LOAD64_SHR32(idx) LOAD64(wp_shr32, ((idx) / 2 - wsize))
+  _gcry_mpih_rshift (wp_shr32, wp + 384 / BITS_PER_MPI_LIMB,
+                    wsize * LIMBS_PER_LIMB64, 32);
+# else
+# define LOAD64_SHR32(idx) LOAD64_UNALIGNED(wp, idx / 2)
+#endif
+#else
+# define LOAD64_SHR32(idx) LIMB64_HILO(LOAD32(wp, (idx) + 1), LOAD32(wp, idx))
+#endif
+
+  /* "S1 + S1" with 64-bit limbs:
+   *     [0:A23]:[A22:A21]
+   *  +  [0:A23]:[A22:A21]
+   *  => s[3]:s[2]
+   */
+  ADD2_LIMB64 (s[3], s[2],
+              LIMB64_HILO(0, LOAD32(wp, 23)),
+              LOAD64_SHR32(21),
+              LIMB64_HILO(0, LOAD32(wp, 23)),
+              LOAD64_SHR32(21));
+
+  /* "S5 + S6" with 64-bit limbs:
+   *     [A23:A22]:[A21:A20]:[  0:0]:[0:  0]
+   *  +  [  0:  0]:[A23:A22]:[A21:0]:[0:A20]
+   *  => x[4]:x[3]:x[2]:x[1]:x[0]
+   */
+  x[0] = LIMB64_HILO(0, LOAD32(wp, 20));
+  x[1] = LIMB64_HILO(LOAD32(wp, 21), 0);
+  ADD3_LIMB64 (x[4], x[3], x[2],
+              zero, LOAD64(wp, 22 / 2), LOAD64(wp, 20 / 2),
+              zero, zero, LOAD64(wp, 22 / 2));
+
+  /* "D2 + D3" with 64-bit limbs:
+   *     [0:A23]:[A22:A21]:[A20:0]
+   *  +  [0:A23]:[A23:0]:[0:0]
+   *  => d[2]:d[1]:d[0]
+   */
+  d[0] = LIMB64_HILO(LOAD32(wp, 20), 0);
+  ADD2_LIMB64 (d[2], d[1],
+              LIMB64_HILO(0, LOAD32(wp, 23)),
+              LOAD64_SHR32(21),
+              LIMB64_HILO(0, LOAD32(wp, 23)),
+              LIMB64_HILO(LOAD32(wp, 23), 0));
+
+  /* "2*S1 + S5 + S6" with 64-bit limbs:
+   *     s[4]:s[3]:s[2]:s[1]:s[0]
+   *  +  x[4]:x[3]:x[2]:x[1]:x[0]
+   *  => s[4]:s[3]:s[2]:s[1]:s[0]
+   */
+  s[0] = x[0];
+  s[1] = x[1];
+  ADD3_LIMB64(s[4], s[3], s[2],
+             zero, s[3], s[2],
+             x[4], x[3], x[2]);
+
+  /* "T + S2" with 64-bit limbs:
+   *     [A11:A10]:[ A9: A8]:[ A7: A6]:[ A5: A4]:[ A3: A2]:[ A1: A0]
+   *  +  [A23:A22]:[A21:A20]:[A19:A18]:[A17:A16]:[A15:A14]:[A13:A12]
+   *  => t[6]:t[5]:t[4]:t[3]:t[2]:t[1]:t[0]
+   */
+  ADD7_LIMB64 (t[6], t[5], t[4], t[3], t[2], t[1], t[0],
+              zero,
+              LOAD64(wp, 10 / 2), LOAD64(wp, 8 / 2), LOAD64(wp, 6 / 2),
+              LOAD64(wp, 4 / 2), LOAD64(wp, 2 / 2), LOAD64(wp, 0 / 2),
+              zero,
+              LOAD64(wp, 22 / 2), LOAD64(wp, 20 / 2), LOAD64(wp, 18 / 2),
+              LOAD64(wp, 16 / 2), LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2));
+
+  /* "2*S1 + S4 + S5 + S6" with 64-bit limbs:
+   *     s[6]:     s[5]:     s[4]:     s[3]:     s[2]:   s[1]:   s[0]
+   *  +       [A19:A18]:[A17:A16]:[A15:A14]:[A13:A12]:[A20:0]:[A23:0]
+   *  => s[6]:s[5]:s[4]:s[3]:s[2]:s[1]:s[0]
+   */
+  ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+              zero, zero, s[4], s[3], s[2], s[1], s[0],
+              zero,
+              LOAD64(wp, 18 / 2), LOAD64(wp, 16 / 2),
+              LOAD64(wp, 14 / 2), LOAD64(wp, 12 / 2),
+              LIMB64_HILO(LOAD32(wp, 20), 0),
+              LIMB64_HILO(LOAD32(wp, 23), 0));
+
+  /* "D1 + D2 + D3" with 64-bit limbs:
+   *     d[6]:     d[5]:     d[4]:     d[3]:     d[2]:     d[1]:     d[0]
+   *  +       [A22:A21]:[A20:A19]:[A18:A17]:[A16:A15]:[A14:A13]:[A12:A23]
+   *  => d[6]:d[5]:d[4]:d[3]:d[2]:d[1]:d[0]
+   */
+  ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0],
+              zero, zero, zero, zero, d[2], d[1], d[0],
+              zero,
+              LOAD64_SHR32(21),
+              LOAD64_SHR32(19),
+              LOAD64_SHR32(17),
+              LOAD64_SHR32(15),
+              LOAD64_SHR32(13),
+              LIMB64_HILO(LOAD32(wp, 12), LOAD32(wp, 23)));
+
+  /* "2*S1 + S3 + S4 + S5 + S6" with 64-bit limbs:
+   *     s[6]:     s[5]:     s[4]:     s[3]:     s[2]:     s[1]:     s[0]
+   *  +       [A20:A19]:[A18:A17]:[A16:A15]:[A14:A13]:[A12:A23]:[A22:A21]
+   *  => s[6]:s[5]:s[4]:s[3]:s[2]:s[1]:s[0]
+   */
+  ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+              s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+              zero,
+              LOAD64_SHR32(19),
+              LOAD64_SHR32(17),
+              LOAD64_SHR32(15),
+              LOAD64_SHR32(13),
+              LIMB64_HILO(LOAD32(wp, 12), LOAD32(wp, 23)),
+              LOAD64_SHR32(21));
+
+  /* "T + 2*S1 + S2 + S3 + S4 + S5 + S6" */
+  ADD7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+               s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+               t[6], t[5], t[4], t[3], t[2], t[1], t[0]);
+
+  /* "T + 2*S1 + S2 + S3 + S4 + S5 + S6 - D1 - D2 - D3" */
+  SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+               s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+               d[6], d[5], d[4], d[3], d[2], d[1], d[0]);
+
+#undef LOAD64_SHR32
+
+  /* mod p:
+   *  's[6]' holds carry value (-3..7). Subtract (carry + 1) * p. Result
+   *  will be with in range -p...p. Handle result being negative with
+   *  addition and conditional store. */
+
+  carry = LO32_LIMB64(s[6]);
+
+  SUB7_LIMB64 (s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+              s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+              p_mult[carry + 3][6], p_mult[carry + 3][5],
+              p_mult[carry + 3][4], p_mult[carry + 3][3],
+              p_mult[carry + 3][2], p_mult[carry + 3][1],
+              p_mult[carry + 3][0]);
+
+  ADD7_LIMB64 (d[6], d[5], d[4], d[3], d[2], d[1], d[0],
+              s[6], s[5], s[4], s[3], s[2], s[1], s[0],
+              zero,
+              p_mult[0 + 3][5], p_mult[0 + 3][4],
+              p_mult[0 + 3][3], p_mult[0 + 3][2],
+              p_mult[0 + 3][1], p_mult[0 + 3][0]);
+
+  s_is_negative = LO32_LIMB64(s[6]) >> 31;
+  mask2 = ct_limb_gen_mask(s_is_negative);
+  mask1 = ct_limb_gen_inv_mask(s_is_negative);
+
+  STORE64_COND(wp, 0, mask2, d[0], mask1, s[0]);
+  STORE64_COND(wp, 1, mask2, d[1], mask1, s[1]);
+  STORE64_COND(wp, 2, mask2, d[2], mask1, s[2]);
+  STORE64_COND(wp, 3, mask2, d[3], mask1, s[3]);
+  STORE64_COND(wp, 4, mask2, d[4], mask1, s[4]);
+  STORE64_COND(wp, 5, mask2, d[5], mask1, s[5]);
+
+  w->nlimbs = wsize * LIMBS_PER_LIMB64;
+  MPN_NORMALIZE (wp, w->nlimbs);
+
+#if (BITS_PER_MPI_LIMB64 == BITS_PER_MPI_LIMB) && defined(WORDS_BIGENDIAN)
+  wipememory(wp_shr32, sizeof(wp_shr32));
+#endif
+}
+
+void
+_gcry_mpi_ec_nist521_mod (gcry_mpi_t w, mpi_ec_t ctx)
+{
+  mpi_size_t wsize = (521 + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB;
+  mpi_limb_t s[wsize];
+  mpi_limb_t cy;
+  mpi_ptr_t wp;
+
+  MPN_NORMALIZE (w->d, w->nlimbs);
+  if (mpi_nbits_more_than (w, 2 * 521))
+    log_bug ("W must be less than m^2\n");
+
+  RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2);
+
+  wp = w->d;
+
+  /* See "FIPS 186-4, D.2.5 Curve P-521". */
+
+  _gcry_mpih_rshift (s, wp + wsize - 1, wsize, 521 % BITS_PER_MPI_LIMB);
+  s[wsize - 1] &= (1 << (521 % BITS_PER_MPI_LIMB)) - 1;
+  wp[wsize - 1] &= (1 << (521 % BITS_PER_MPI_LIMB)) - 1;
+  _gcry_mpih_add_n (wp, wp, s, wsize);
+
+  /* "mod p" */
+  cy = _gcry_mpih_sub_n (wp, wp, ctx->p->d, wsize);
+  _gcry_mpih_add_n (s, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, s, wsize, mpih_limb_is_not_zero (cy));
+
+  w->nlimbs = wsize;
+  MPN_NORMALIZE (wp, w->nlimbs);
+}
+
+#endif /* !ASM_DISABLED */
diff --git a/grub-core/lib/libgcrypt/mpi/ec.c b/grub-core/lib/libgcrypt/mpi/ec.c
index fa00818fe..e8233ae89 100644
--- a/grub-core/lib/libgcrypt/mpi/ec.c
+++ b/grub-core/lib/libgcrypt/mpi/ec.c
@@ -1,73 +1,118 @@
 /* ec.c -  Elliptic Curve functions
-   Copyright (C) 2007 Free Software Foundation, Inc.
-
-   This file is part of Libgcrypt.
-
-   Libgcrypt is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of
-   the License, or (at your option) any later version.
-
-   Libgcrypt is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
-   USA.  */
-
+ * Copyright (C) 2007 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
 
 #include <config.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <errno.h>
 
 #include "mpi-internal.h"
 #include "longlong.h"
 #include "g10lib.h"
+#include "context.h"
+#include "ec-context.h"
+#include "ec-internal.h"
 
+extern void reverse_buffer (unsigned char *buffer, unsigned int length);
 
-#define point_init(a)  _gcry_mpi_ec_point_init ((a))
-#define point_free(a)  _gcry_mpi_ec_point_free ((a))
-
+#define point_init(a)  _gcry_mpi_point_init ((a))
+#define point_free(a)  _gcry_mpi_point_free_parts ((a))
 
-/* Object to represent a point in projective coordinates. */
-/* Currently defined in mpi.h */
 
-/* This context is used with all our EC functions. */
-struct mpi_ec_ctx_s
+/* Print a point using the log functions.  If CTX is not NULL affine
+   coordinates will be printed.  */
+void
+_gcry_mpi_point_log (const char *name, mpi_point_t point, mpi_ec_t ctx)
 {
-  /* Domain parameters.  */
-  gcry_mpi_t p;   /* Prime specifying the field GF(p).  */
-  gcry_mpi_t a;   /* First coefficient of the Weierstrass equation.  */
+  gcry_mpi_t x, y;
+  char buf[100];
+
+  if (!point)
+    {
+      snprintf (buf, sizeof buf - 1, "%s.*", name);
+      log_mpidump (buf, NULL);
+      return;
+    }
+  snprintf (buf, sizeof buf - 1, "%s.X", name);
+
+  if (ctx)
+    {
+      x = mpi_new (0);
+      y = mpi_new (0);
+    }
+  if (!ctx || _gcry_mpi_ec_get_affine (x, y, point, ctx))
+    {
+      log_mpidump (buf, point->x);
+      buf[strlen(buf)-1] = 'Y';
+      log_mpidump (buf, point->y);
+      buf[strlen(buf)-1] = 'Z';
+      log_mpidump (buf, point->z);
+    }
+  else
+    {
+      buf[strlen(buf)-1] = 'x';
+      log_mpidump (buf, x);
+      buf[strlen(buf)-1] = 'y';
+      log_mpidump (buf, y);
 
-  int a_is_pminus3;  /* True if A = P - 3. */
+    }
+  if (ctx)
+    {
+      _gcry_mpi_release (x);
+      _gcry_mpi_release (y);
+    }
+}
 
-  /* Some often used constants.  */
-  gcry_mpi_t one;
-  gcry_mpi_t two;
-  gcry_mpi_t three;
-  gcry_mpi_t four;
-  gcry_mpi_t eight;
-  gcry_mpi_t two_inv_p;
 
-  /* Scratch variables.  */
-  gcry_mpi_t scratch[11];
+/* Create a new point option.  NBITS gives the size in bits of one
+   coordinate; it is only used to pre-allocate some resources and
+   might also be passed as 0 to use a default value.  */
+mpi_point_t
+_gcry_mpi_point_new (unsigned int nbits)
+{
+  mpi_point_t p;
 
-  /* Helper for fast reduction.  */
-/*   int nist_nbits; /\* If this is a NIST curve, the number of bits.  *\/ */
-/*   gcry_mpi_t s[10]; */
-/*   gcry_mpi_t c; */
+  (void)nbits;  /* Currently not used.  */
+
+  p = xmalloc (sizeof *p);
+  _gcry_mpi_point_init (p);
+  return p;
+}
 
-};
 
+/* Release the point object P.  P may be NULL. */
+void
+_gcry_mpi_point_release (mpi_point_t p)
+{
+  if (p)
+    {
+      _gcry_mpi_point_free_parts (p);
+      xfree (p);
+    }
+}
 
 
-/* Initialized a point object.  gcry_mpi_ec_point_free shall be used
-   to release this object.  */
+/* Initialize the fields of a point object.  gcry_mpi_point_free_parts
+   may be used to release the fields.  */
 void
-_gcry_mpi_ec_point_init (mpi_point_t *p)
+_gcry_mpi_point_init (mpi_point_t p)
 {
   p->x = mpi_new (0);
   p->y = mpi_new (0);
@@ -75,18 +120,19 @@ _gcry_mpi_ec_point_init (mpi_point_t *p)
 }
 
 
-/* Release a point object. */
+/* Release the parts of a point object. */
 void
-_gcry_mpi_ec_point_free (mpi_point_t *p)
+_gcry_mpi_point_free_parts (mpi_point_t p)
 {
   mpi_free (p->x); p->x = NULL;
   mpi_free (p->y); p->y = NULL;
   mpi_free (p->z); p->z = NULL;
 }
 
+
 /* Set the value from S into D.  */
 static void
-point_set (mpi_point_t *d, mpi_point_t *s)
+point_set (mpi_point_t d, mpi_point_t s)
 {
   mpi_set (d->x, s->x);
   mpi_set (d->y, s->y);
@@ -94,132 +140,176 @@ point_set (mpi_point_t *d, mpi_point_t *s)
 }
 
 
+/* Return a copy of POINT. */
+gcry_mpi_point_t
+_gcry_mpi_point_copy (gcry_mpi_point_t point)
+{
+  mpi_point_t newpoint;
+
+  newpoint = _gcry_mpi_point_new (0);
+  if (point)
+    point_set (newpoint, point);
+
+  return newpoint;
+}
+
+
+void
+_gcry_mpi_ec_point_resize (mpi_point_t p, mpi_ec_t ctx)
+{
+  size_t nlimbs = ctx->p->nlimbs;
+
+  mpi_resize (p->x, nlimbs);
+  p->x->nlimbs = nlimbs;
+  mpi_resize (p->z, nlimbs);
+  p->z->nlimbs = nlimbs;
+
+  if (ctx->model != MPI_EC_MONTGOMERY)
+    {
+      mpi_resize (p->y, nlimbs);
+      p->y->nlimbs = nlimbs;
+    }
+}
+
+
+static void
+point_swap_cond (mpi_point_t d, mpi_point_t s, unsigned long swap,
+                 mpi_ec_t ctx)
+{
+  mpi_swap_cond (d->x, s->x, swap);
+  if (ctx->model != MPI_EC_MONTGOMERY)
+    mpi_swap_cond (d->y, s->y, swap);
+  mpi_swap_cond (d->z, s->z, swap);
+}
+
+
+/* Set the projective coordinates from POINT into X, Y, and Z.  If a
+   coordinate is not required, X, Y, or Z may be passed as NULL.  */
+void
+_gcry_mpi_point_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                     mpi_point_t point)
+{
+  if (x)
+    mpi_set (x, point->x);
+  if (y)
+    mpi_set (y, point->y);
+  if (z)
+    mpi_set (z, point->z);
+}
+
+
+/* Set the projective coordinates from POINT into X, Y, and Z and
+   release POINT.  If a coordinate is not required, X, Y, or Z may be
+   passed as NULL.  */
+void
+_gcry_mpi_point_snatch_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                            mpi_point_t point)
+{
+  mpi_snatch (x, point->x);
+  mpi_snatch (y, point->y);
+  mpi_snatch (z, point->z);
+  xfree (point);
+}
+
+
+/* Set the projective coordinates from X, Y, and Z into POINT.  If a
+   coordinate is given as NULL, the value 0 is stored into point.  If
+   POINT is given as NULL a new point object is allocated.  Returns
+   POINT or the newly allocated point object. */
+mpi_point_t
+_gcry_mpi_point_set (mpi_point_t point,
+                     gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z)
+{
+  if (!point)
+    point = mpi_point_new (0);
+
+  if (x)
+    mpi_set (point->x, x);
+  else
+    mpi_clear (point->x);
+  if (y)
+    mpi_set (point->y, y);
+  else
+    mpi_clear (point->y);
+  if (z)
+    mpi_set (point->z, z);
+  else
+    mpi_clear (point->z);
+
+  return point;
+}
+
+
+/* Set the projective coordinates from X, Y, and Z into POINT.  If a
+   coordinate is given as NULL, the value 0 is stored into point.  If
+   POINT is given as NULL a new point object is allocated.  The
+   coordinates X, Y, and Z are released.  Returns POINT or the newly
+   allocated point object. */
+mpi_point_t
+_gcry_mpi_point_snatch_set (mpi_point_t point,
+                            gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z)
+{
+  if (!point)
+    point = mpi_point_new (0);
+
+  if (x)
+    mpi_snatch (point->x, x);
+  else
+    mpi_clear (point->x);
+  if (y)
+    mpi_snatch (point->y, y);
+  else
+    mpi_clear (point->y);
+  if (z)
+    mpi_snatch (point->z, z);
+  else
+    mpi_clear (point->z);
+
+  return point;
+}
+
+
+/* W = W mod P.  */
+static void
+ec_mod (gcry_mpi_t w, mpi_ec_t ec)
+{
+  if (0 && ec->dialect == ECC_DIALECT_ED25519)
+    _gcry_mpi_ec_ed25519_mod (w);
+  else if (ec->t.p_barrett)
+    _gcry_mpi_mod_barrett (w, w, ec->t.p_barrett);
+  else
+    _gcry_mpi_mod (w, w, ec->p);
+}
 
 static void
 ec_addm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
 {
-  mpi_addm (w, u, v, ctx->p);
+  mpi_add (w, u, v);
+  ctx->mod (w, ctx);
 }
 
 static void
-ec_subm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
+ec_subm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ec)
 {
-  mpi_subm (w, u, v, ctx->p);
+  mpi_sub (w, u, v);
+  while (w->sign)
+    mpi_add (w, w, ec->p);
+  /*ctx->mod (w, ec);*/
 }
 
 static void
 ec_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
 {
-#if 0
-  /* NOTE: This code works only for limb sizes of 32 bit.  */
-  mpi_limb_t *wp, *sp;
-
-  if (ctx->nist_nbits == 192)
-    {
-      mpi_mul (w, u, v);
-      mpi_resize (w, 12);
-      wp = w->d;
-
-      sp = ctx->s[0]->d;
-      sp[0*2+0] = wp[0*2+0];
-      sp[0*2+1] = wp[0*2+1];
-      sp[1*2+0] = wp[1*2+0];
-      sp[1*2+1] = wp[1*2+1];
-      sp[2*2+0] = wp[2*2+0];
-      sp[2*2+1] = wp[2*2+1];
-
-      sp = ctx->s[1]->d;
-      sp[0*2+0] = wp[3*2+0];
-      sp[0*2+1] = wp[3*2+1];
-      sp[1*2+0] = wp[3*2+0];
-      sp[1*2+1] = wp[3*2+1];
-      sp[2*2+0] = 0;
-      sp[2*2+1] = 0;
-
-      sp = ctx->s[2]->d;
-      sp[0*2+0] = 0;
-      sp[0*2+1] = 0;
-      sp[1*2+0] = wp[4*2+0];
-      sp[1*2+1] = wp[4*2+1];
-      sp[2*2+0] = wp[4*2+0];
-      sp[2*2+1] = wp[4*2+1];
-
-      sp = ctx->s[3]->d;
-      sp[0*2+0] = wp[5*2+0];
-      sp[0*2+1] = wp[5*2+1];
-      sp[1*2+0] = wp[5*2+0];
-      sp[1*2+1] = wp[5*2+1];
-      sp[2*2+0] = wp[5*2+0];
-      sp[2*2+1] = wp[5*2+1];
-
-      ctx->s[0]->nlimbs = 6;
-      ctx->s[1]->nlimbs = 6;
-      ctx->s[2]->nlimbs = 6;
-      ctx->s[3]->nlimbs = 6;
-
-      mpi_add (ctx->c, ctx->s[0], ctx->s[1]);
-      mpi_add (ctx->c, ctx->c, ctx->s[2]);
-      mpi_add (ctx->c, ctx->c, ctx->s[3]);
-
-      while ( mpi_cmp (ctx->c, ctx->p ) >= 0 )
-        mpi_sub ( ctx->c, ctx->c, ctx->p );
-      mpi_set (w, ctx->c);
-    }
-  else if (ctx->nist_nbits == 384)
-    {
-      int i;
-      mpi_mul (w, u, v);
-      mpi_resize (w, 24);
-      wp = w->d;
-
-#define NEXT(a) do { ctx->s[(a)]->nlimbs = 12; \
-                     sp = ctx->s[(a)]->d; \
-                     i = 0; } while (0)
-#define X(a) do { sp[i++] = wp[(a)];} while (0)
-#define X0(a) do { sp[i++] = 0; } while (0)
-      NEXT(0);
-      X(0);X(1);X(2);X(3);X(4);X(5);X(6);X(7);X(8);X(9);X(10);X(11);
-      NEXT(1);
-      X0();X0();X0();X0();X(21);X(22);X(23);X0();X0();X0();X0();X0();
-      NEXT(2);
-      X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);X(20);X(21);X(22);X(23);
-      NEXT(3);
-      X(21);X(22);X(23);X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);X(20);
-      NEXT(4);
-      X0();X(23);X0();X(20);X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);
-      NEXT(5);
-      X0();X0();X0();X0();X(20);X(21);X(22);X(23);X0();X0();X0();X0();
-      NEXT(6);
-      X(20);X0();X0();X(21);X(22);X(23);X0();X0();X0();X0();X0();X0();
-      NEXT(7);
-      X(23);X(12);X(13);X(14);X(15);X(16);X(17);X(18);X(19);X(20);X(21);X(22);
-      NEXT(8);
-      X0();X(20);X(21);X(22);X(23);X0();X0();X0();X0();X0();X0();X0();
-      NEXT(9);
-      X0();X0();X0();X(23);X(23);X0();X0();X0();X0();X0();X0();X0();
-#undef X0
-#undef X
-#undef NEXT
-      mpi_add (ctx->c, ctx->s[0], ctx->s[1]);
-      mpi_add (ctx->c, ctx->c, ctx->s[1]);
-      mpi_add (ctx->c, ctx->c, ctx->s[2]);
-      mpi_add (ctx->c, ctx->c, ctx->s[3]);
-      mpi_add (ctx->c, ctx->c, ctx->s[4]);
-      mpi_add (ctx->c, ctx->c, ctx->s[5]);
-      mpi_add (ctx->c, ctx->c, ctx->s[6]);
-      mpi_sub (ctx->c, ctx->c, ctx->s[7]);
-      mpi_sub (ctx->c, ctx->c, ctx->s[8]);
-      mpi_sub (ctx->c, ctx->c, ctx->s[9]);
-
-      while ( mpi_cmp (ctx->c, ctx->p ) >= 0 )
-        mpi_sub ( ctx->c, ctx->c, ctx->p );
-      while ( ctx->c->sign )
-        mpi_add ( ctx->c, ctx->c, ctx->p );
-      mpi_set (w, ctx->c);
-    }
-  else
-#endif /*0*/
-    mpi_mulm (w, u, v, ctx->p);
+  mpi_mul (w, u, v);
+  ctx->mod (w, ctx);
+}
+
+/* W = 2 * U mod P.  */
+static void
+ec_mul2 (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx)
+{
+  mpi_lshift (w, u, 1);
+  ctx->mod (w, ctx);
 }
 
 static void
@@ -227,58 +317,610 @@ ec_powm (gcry_mpi_t w, const gcry_mpi_t b, const 
gcry_mpi_t e,
          mpi_ec_t ctx)
 {
   mpi_powm (w, b, e, ctx->p);
+  /* _gcry_mpi_abs (w); */
 }
 
+
+/* Shortcut for
+     ec_powm (B, B, mpi_const (MPI_C_TWO), ctx);
+   for easier optimization.  */
+static void
+ec_pow2 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx)
+{
+  /* Using mpi_mul is slightly faster (at least on amd64).  */
+  /* mpi_powm (w, b, mpi_const (MPI_C_TWO), ctx->p); */
+  ec_mulm (w, b, b, ctx);
+}
+
+
+/* Shortcut for
+     ec_powm (B, B, mpi_const (MPI_C_THREE), ctx);
+   for easier optimization.  */
+static void
+ec_pow3 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx)
+{
+  mpi_powm (w, b, mpi_const (MPI_C_THREE), ctx->p);
+}
+
+
 static void
 ec_invm (gcry_mpi_t x, gcry_mpi_t a, mpi_ec_t ctx)
 {
-  mpi_invm (x, a, ctx->p);
+  if (!mpi_invm (x, a, ctx->p))
+    {
+      log_error ("ec_invm: inverse does not exist:\n");
+      log_mpidump ("  a", a);
+      log_mpidump ("  p", ctx->p);
+    }
 }
+
+/* Routines for 2^255 - 19.  */
 
+#define LIMB_SIZE_25519 ((256+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
 
+static void
+ec_addm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
+{
+  mpi_ptr_t wp, up, vp;
+  mpi_size_t wsize = LIMB_SIZE_25519;
+  mpi_limb_t n[LIMB_SIZE_25519];
+  mpi_limb_t borrow;
+
+  if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+    log_bug ("addm_25519: different sizes\n");
+
+  up = u->d;
+  vp = v->d;
+  wp = w->d;
+
+  _gcry_mpih_add_n (wp, up, vp, wsize);
+  borrow = _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, mpih_limb_is_zero (borrow));
+  wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+}
 
-/* This function returns a new context for elliptic curve based on the
-   field GF(p).  P is the prime specifying thuis field, A is the first
-   coefficient.
+static void
+ec_subm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
+{
+  mpi_ptr_t wp, up, vp;
+  mpi_size_t wsize = LIMB_SIZE_25519;
+  mpi_limb_t n[LIMB_SIZE_25519];
+  mpi_limb_t borrow;
+
+  if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+    log_bug ("subm_25519: different sizes\n");
+
+  up = u->d;
+  vp = v->d;
+  wp = w->d;
+
+  borrow = _gcry_mpih_sub_n (wp, up, vp, wsize);
+  _gcry_mpih_add_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, mpih_limb_is_not_zero (borrow));
+  wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+}
 
-   This context needs to be released using _gcry_mpi_ec_free.  */
-mpi_ec_t
-_gcry_mpi_ec_init (gcry_mpi_t p, gcry_mpi_t a)
+static void
+ec_mulm_25519 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
+{
+  mpi_ptr_t wp, up, vp;
+  mpi_size_t wsize = LIMB_SIZE_25519;
+  mpi_limb_t n[LIMB_SIZE_25519*2];
+  mpi_limb_t cy;
+  int msb;
+
+  (void)ctx;
+  if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+    log_bug ("mulm_25519: different sizes\n");
+
+  up = u->d;
+  vp = v->d;
+  wp = w->d;
+
+  _gcry_mpih_mul_n (n, up, vp, wsize);
+  memcpy (wp, n, wsize * BYTES_PER_MPI_LIMB);
+  wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+
+  _gcry_mpih_rshift (n, n+LIMB_SIZE_25519-1, LIMB_SIZE_25519+1,
+                    (255 % BITS_PER_MPI_LIMB));
+
+  cy = _gcry_mpih_addmul_1 (wp, n, wsize, 19);
+
+  memset (n, 0, wsize * BYTES_PER_MPI_LIMB);
+  msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB));
+  n[0] = (cy * 2 + msb) * 19;
+  wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+  _gcry_mpih_add_n (wp, wp, n, wsize);
+
+  cy = _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, mpih_limb_is_zero (cy));
+}
+
+static void
+ec_mul2_25519 (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx)
+{
+  ec_addm_25519 (w, u, u, ctx);
+}
+
+static void
+ec_pow2_25519 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx)
+{
+  ec_mulm_25519 (w, b, b, ctx);
+}
+
+/* Routines for 2^448 - 2^224 - 1.  */
+
+#define LIMB_SIZE_448 ((448+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
+#define LIMB_SIZE_HALF_448 ((LIMB_SIZE_448+1)/2)
+
+static void
+ec_addm_448 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
+{
+  mpi_ptr_t wp, up, vp;
+  mpi_size_t wsize = LIMB_SIZE_448;
+  mpi_limb_t n[LIMB_SIZE_448];
+  mpi_limb_t cy;
+
+  if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+    log_bug ("addm_448: different sizes\n");
+
+  up = u->d;
+  vp = v->d;
+  wp = w->d;
+
+  cy = _gcry_mpih_add_n (wp, up, vp, wsize);
+  _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, mpih_limb_is_not_zero (cy));
+}
+
+static void
+ec_subm_448 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
+{
+  mpi_ptr_t wp, up, vp;
+  mpi_size_t wsize = LIMB_SIZE_448;
+  mpi_limb_t n[LIMB_SIZE_448];
+  mpi_limb_t borrow;
+
+  if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+    log_bug ("subm_448: different sizes\n");
+
+  up = u->d;
+  vp = v->d;
+  wp = w->d;
+
+  borrow = _gcry_mpih_sub_n (wp, up, vp, wsize);
+  _gcry_mpih_add_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, mpih_limb_is_not_zero (borrow));
+}
+
+static void
+ec_mulm_448 (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx)
+{
+  mpi_ptr_t wp, up, vp;
+  mpi_size_t wsize = LIMB_SIZE_448;
+  mpi_limb_t n[LIMB_SIZE_448*2];
+  mpi_limb_t a2[LIMB_SIZE_HALF_448];
+  mpi_limb_t a3[LIMB_SIZE_HALF_448];
+  mpi_limb_t b0[LIMB_SIZE_HALF_448];
+  mpi_limb_t b1[LIMB_SIZE_HALF_448];
+  mpi_limb_t cy;
+
+  if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+    log_bug ("mulm_448: different sizes\n");
+
+  up = u->d;
+  vp = v->d;
+  wp = w->d;
+
+  _gcry_mpih_mul_n (n, up, vp, wsize);
+
+  memcpy (b0, n, LIMB_SIZE_HALF_448 * BYTES_PER_MPI_LIMB);
+  memcpy (a2, n + wsize, LIMB_SIZE_HALF_448 * BYTES_PER_MPI_LIMB);
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+  b0[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL<<32)-1;
+  a2[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL<<32)-1;
+  _gcry_mpih_rshift (b1, n + wsize/2, LIMB_SIZE_HALF_448, 32);
+  _gcry_mpih_rshift (a3, n + wsize + wsize/2, LIMB_SIZE_HALF_448, 32);
+#else
+  memcpy (b1, n + wsize/2, LIMB_SIZE_HALF_448 * BYTES_PER_MPI_LIMB);
+  memcpy (a3, n + wsize + wsize/2, LIMB_SIZE_HALF_448 * BYTES_PER_MPI_LIMB);
+#endif
+
+  cy = _gcry_mpih_add_n (b0, b0, a2, LIMB_SIZE_HALF_448);
+  cy += _gcry_mpih_add_n (wp, b0, a3, LIMB_SIZE_HALF_448);
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+  cy = wp[LIMB_SIZE_HALF_448-1] >> 32;
+  wp[LIMB_SIZE_HALF_448-1] &= (((mpi_limb_t)1UL <<32)-1);
+#endif
+  memset (b0, 0, LIMB_SIZE_HALF_448 * BYTES_PER_MPI_LIMB);
+  b0[0] = cy;
+
+  cy = _gcry_mpih_add_n (b1, b1, b0, LIMB_SIZE_HALF_448);
+  cy += _gcry_mpih_lshift (a3, a3, LIMB_SIZE_HALF_448, 1);
+  cy += _gcry_mpih_add_n (b1, b1, a2, LIMB_SIZE_HALF_448);
+  cy += _gcry_mpih_add_n (b1, b1, a3, LIMB_SIZE_HALF_448);
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+  cy = _gcry_mpih_rshift (b1, b1, LIMB_SIZE_HALF_448, 32);
+  wp[LIMB_SIZE_HALF_448-1] |= cy;
+#endif
+  memcpy (wp + LIMB_SIZE_HALF_448, b1, (wsize / 2) * BYTES_PER_MPI_LIMB);
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+  cy = b1[LIMB_SIZE_HALF_448-1];
+#endif
+
+  memset (n, 0, wsize * BYTES_PER_MPI_LIMB);
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+  n[LIMB_SIZE_HALF_448-1] = cy << 32;
+#else
+  n[LIMB_SIZE_HALF_448] = cy;
+#endif
+  n[0] = cy;
+  _gcry_mpih_add_n (wp, wp, n, wsize);
+
+  cy = _gcry_mpih_sub_n (n, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, n, wsize, mpih_limb_is_zero (cy));
+}
+
+static void
+ec_mul2_448 (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx)
+{
+  ec_addm_448 (w, u, u, ctx);
+}
+
+static void
+ec_pow2_448 (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx)
+{
+  ec_mulm_448 (w, b, b, ctx);
+}
+
+
+/* Fast reduction for secp256k1 */
+static void
+ec_secp256k1_mod (gcry_mpi_t w, mpi_ec_t ctx)
+{
+  mpi_size_t wsize = (256 + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB;
+  mpi_limb_t n[wsize + 1];
+  mpi_limb_t s[wsize + 1];
+  mpi_limb_t cy, borrow;
+  mpi_ptr_t wp;
+
+  MPN_NORMALIZE (w->d, w->nlimbs);
+  if (w->nlimbs > 2 * 256 / BITS_PER_MPI_LIMB)
+    log_bug ("W must be less than m^2\n");
+
+  RESIZE_AND_CLEAR_IF_NEEDED (w, wsize * 2);
+
+  wp = w->d;
+
+  /* mod P (2^256 - 2^32 - 977) */
+
+  /* first pass of reduction */
+  memcpy (n, wp + wsize, wsize * BYTES_PER_MPI_LIMB);
+#if BITS_PER_MPI_LIMB == 64
+  s[wsize] = _gcry_mpih_lshift (s, wp + wsize, wsize, 32);
+#else
+  s[0] = 0;
+  memcpy (s + 1, wp + wsize, wsize * BYTES_PER_MPI_LIMB);
+#endif
+  wp[wsize] = _gcry_mpih_addmul_1 (wp, n, wsize, 977);
+  cy = _gcry_mpih_add_n (wp, wp, s, wsize + 1);
+
+  /* second pass of reduction */
+#if BITS_PER_MPI_LIMB == 64
+  /* cy == 0 */
+  memset (n + 1, 0, (wsize - 1) * BYTES_PER_MPI_LIMB);
+  umul_ppmm(n[1], n[0], wp[wsize], ((mpi_limb_t)1 << 32) + 977);
+#else
+  memset (n + 2, 0, (wsize - 2) * BYTES_PER_MPI_LIMB);
+  umul_ppmm(n[1], n[0], wp[wsize], 977);
+  add_ssaaaa(n[2], n[1], 0, n[1], 0, cy * 977);
+  add_ssaaaa(n[2], n[1], n[2], n[1], cy, wp[wsize]);
+#endif
+  cy = _gcry_mpih_add_n (wp, wp, n, wsize);
+
+  borrow = _gcry_mpih_sub_n (s, wp, ctx->p->d, wsize);
+  mpih_set_cond (wp, s, wsize,
+                mpih_limb_is_not_zero (cy) | mpih_limb_is_zero (borrow));
+
+  w->nlimbs = wsize;
+  MPN_NORMALIZE (wp, w->nlimbs);
+}
+
+
+struct field_table {
+  const char *p;
+
+  /* computation routines for the field.  */
+  void (* addm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx);
+  void (* subm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx);
+  void (* mulm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx);
+  void (* mul2) (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx);
+  void (* pow2) (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx);
+  void (* mod) (gcry_mpi_t w, mpi_ec_t ctx);
+};
+
+static const struct field_table field_table[] = {
+  {
+    "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+    ec_addm_25519,
+    ec_subm_25519,
+    ec_mulm_25519,
+    ec_mul2_25519,
+    ec_pow2_25519,
+    NULL
+  },
+  {
+   "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+   "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+    ec_addm_448,
+    ec_subm_448,
+    ec_mulm_448,
+    ec_mul2_448,
+    ec_pow2_448,
+    NULL
+  },
+  {
+    "0xfffffffffffffffffffffffffffffffeffffffffffffffff",
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    _gcry_mpi_ec_nist192_mod
+  },
+  {
+    "0xffffffffffffffffffffffffffffffff000000000000000000000001",
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    _gcry_mpi_ec_nist224_mod
+  },
+  {
+    "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff",
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    _gcry_mpi_ec_nist256_mod
+  },
+  {
+    "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+    "ffffffff0000000000000000ffffffff",
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    _gcry_mpi_ec_nist384_mod
+  },
+  {
+    "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+    "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    _gcry_mpi_ec_nist521_mod
+  },
+  {
+    "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F",
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    ec_secp256k1_mod
+  },
+  { NULL, NULL, NULL, NULL, NULL, NULL },
+};
+
+static gcry_mpi_t field_table_mpis[DIM(field_table)];
+
+
+/* Force recomputation of all helper variables.  */
+void
+_gcry_mpi_ec_get_reset (mpi_ec_t ec)
+{
+  ec->t.valid.a_is_pminus3 = 0;
+  ec->t.valid.two_inv_p = 0;
+}
+
+
+/* Accessor for helper variable.  */
+static int
+ec_get_a_is_pminus3 (mpi_ec_t ec)
 {
-  int i;
-  mpi_ec_t ctx;
   gcry_mpi_t tmp;
 
-  mpi_normalize (p);
-  mpi_normalize (a);
+  if (!ec->t.valid.a_is_pminus3)
+    {
+      ec->t.valid.a_is_pminus3 = 1;
+      tmp = mpi_alloc_like (ec->p);
+      mpi_sub_ui (tmp, ec->p, 3);
+      ec->t.a_is_pminus3 = !mpi_cmp (ec->a, tmp);
+      mpi_free (tmp);
+    }
+
+  return ec->t.a_is_pminus3;
+}
 
-  /* Fixme: Do we want to check some constraints? e.g.
-     a < p
-  */
 
-  ctx = gcry_xcalloc (1, sizeof *ctx);
+/* Accessor for helper variable.  */
+static gcry_mpi_t
+ec_get_two_inv_p (mpi_ec_t ec)
+{
+  if (!ec->t.valid.two_inv_p)
+    {
+      ec->t.valid.two_inv_p = 1;
+      if (!ec->t.two_inv_p)
+        ec->t.two_inv_p = mpi_alloc (0);
+      ec_invm (ec->t.two_inv_p, mpi_const (MPI_C_TWO), ec);
+    }
+  return ec->t.two_inv_p;
+}
+
+
+static const char *const curve25519_bad_points[] = {
+  "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed",
+  "0x0000000000000000000000000000000000000000000000000000000000000000",
+  "0x0000000000000000000000000000000000000000000000000000000000000001",
+  "0x00b8495f16056286fdb1329ceb8d09da6ac49ff1fae35616aeb8413b7c7aebe0",
+  "0x57119fd0dd4e22d8868e1c58c45c44045bef839c55b1d0b1248c50a3bc959c5f",
+  "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec",
+  "0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee",
+  NULL
+};
+
+
+static const char *const curve448_bad_points[] = {
+  "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+  "ffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+  "0x00000000000000000000000000000000000000000000000000000000"
+  "00000000000000000000000000000000000000000000000000000000",
+  "0x00000000000000000000000000000000000000000000000000000000"
+  "00000000000000000000000000000000000000000000000000000001",
+  "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+  "fffffffffffffffffffffffffffffffffffffffffffffffffffffffe",
+  "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+  "00000000000000000000000000000000000000000000000000000000",
+  NULL
+};
 
+static const char *const *bad_points_table[] = {
+  curve25519_bad_points,
+  curve448_bad_points,
+};
+
+static gcry_mpi_t
+scanval (const char *string)
+{
+  gpg_err_code_t rc;
+  gcry_mpi_t val;
+
+  rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL);
+  if (rc)
+    log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc));
+  return val;
+}
+
+
+/* This function initialized a context for elliptic curve based on the
+   field GF(p).  P is the prime specifying this field, A is the first
+   coefficient.  CTX is expected to be zeroized.  */
+static void
+ec_p_init (mpi_ec_t ctx, enum gcry_mpi_ec_models model,
+           enum ecc_dialects dialect,
+           int flags,
+           gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b)
+{
+  int i;
+  static int use_barrett;
+
+  if (!use_barrett)
+    {
+      if (getenv ("GCRYPT_BARRETT"))
+        use_barrett = 1;
+      else
+        use_barrett = -1;
+    }
+
+  /* Fixme: Do we want to check some constraints? e.g.  a < p  */
+
+  ctx->model = model;
+  ctx->dialect = dialect;
+  ctx->flags = flags;
+  ctx->nbits = mpi_get_nbits (p);
   ctx->p = mpi_copy (p);
   ctx->a = mpi_copy (a);
+  ctx->b = mpi_copy (b);
+
+  ctx->t.p_barrett = use_barrett > 0? _gcry_mpi_barrett_init (ctx->p, 0):NULL;
+
+  _gcry_mpi_ec_get_reset (ctx);
 
-  tmp = mpi_alloc_like (ctx->p);
-  mpi_sub_ui (tmp, ctx->p, 3);
-  ctx->a_is_pminus3 = !mpi_cmp (ctx->a, tmp);
-  mpi_free (tmp);
+  if (model == MPI_EC_MONTGOMERY)
+    {
+      for (i=0; i< DIM(bad_points_table); i++)
+        {
+          gcry_mpi_t p_candidate = scanval (bad_points_table[i][0]);
+          int match_p = !mpi_cmp (ctx->p, p_candidate);
+          int j;
 
+          mpi_free (p_candidate);
+          if (!match_p)
+            continue;
 
-  /* Allocate constants.  */
-  ctx->one   = mpi_alloc_set_ui (1);
-  ctx->two   = mpi_alloc_set_ui (2);
-  ctx->three = mpi_alloc_set_ui (3);
-  ctx->four  = mpi_alloc_set_ui (4);
-  ctx->eight = mpi_alloc_set_ui (8);
-  ctx->two_inv_p = mpi_alloc (0);
-  ec_invm (ctx->two_inv_p, ctx->two, ctx);
+          for (j=0; i< DIM(ctx->t.scratch) && bad_points_table[i][j]; j++)
+            ctx->t.scratch[j] = scanval (bad_points_table[i][j]);
+        }
+    }
+  else
+    {
+      /* Allocate scratch variables.  */
+      for (i=0; i< DIM(ctx->t.scratch); i++)
+        ctx->t.scratch[i] = mpi_alloc_like (ctx->p);
+    }
+
+  ctx->addm = ec_addm;
+  ctx->subm = ec_subm;
+  ctx->mulm = ec_mulm;
+  ctx->mul2 = ec_mul2;
+  ctx->pow2 = ec_pow2;
+  ctx->mod = ec_mod;
+
+  for (i=0; field_table[i].p; i++)
+    {
+      gcry_mpi_t f_p;
+      gpg_err_code_t rc;
+
+      if (field_table_mpis[i] == NULL)
+       {
+         rc = _gcry_mpi_scan (&f_p, GCRYMPI_FMT_HEX, field_table[i].p, 0,
+                              NULL);
+         if (rc)
+           log_fatal ("scanning ECC parameter failed: %s\n",
+                      gpg_strerror (rc));
+         field_table_mpis[i] = f_p; /* cache */
+       }
+      else
+       {
+         f_p = field_table_mpis[i];
+       }
 
-  /* Allocate scratch variables.  */
-  for (i=0; i< DIM(ctx->scratch); i++)
-    ctx->scratch[i] = mpi_alloc_like (ctx->p);
+      if (!mpi_cmp (p, f_p))
+        {
+          ctx->addm = field_table[i].addm ? field_table[i].addm : ctx->addm;
+          ctx->subm = field_table[i].subm ? field_table[i].subm : ctx->subm;
+          ctx->mulm = field_table[i].mulm ? field_table[i].mulm : ctx->mulm;
+          ctx->mul2 = field_table[i].mul2 ? field_table[i].mul2 : ctx->mul2;
+          ctx->pow2 = field_table[i].pow2 ? field_table[i].pow2 : ctx->pow2;
+          ctx->mod = field_table[i].mod ? field_table[i].mod : ctx->mod;
+
+         if (ctx->a)
+           {
+             mpi_resize (ctx->a, ctx->p->nlimbs);
+             ctx->a->nlimbs = ctx->p->nlimbs;
+           }
+
+         if (ctx->b)
+           {
+             mpi_resize (ctx->b, ctx->p->nlimbs);
+             ctx->b->nlimbs = ctx->p->nlimbs;
+           }
+
+          for (i=0; i< DIM(ctx->t.scratch) && ctx->t.scratch[i]; i++)
+            ctx->t.scratch[i]->nlimbs = ctx->p->nlimbs;
+
+          break;
+        }
+    }
 
   /* Prepare for fast reduction.  */
   /* FIXME: need a test for NIST values.  However it does not gain us
@@ -297,31 +939,33 @@ _gcry_mpi_ec_init (gcry_mpi_t p, gcry_mpi_t a)
 /*         ctx->s[i] = mpi_new (384); */
 /*       ctx->c    = mpi_new (384*2); */
 /*     } */
-
-  return ctx;
 }
 
-void
-_gcry_mpi_ec_free (mpi_ec_t ctx)
+
+static void
+ec_deinit (void *opaque)
 {
+  mpi_ec_t ctx = opaque;
   int i;
 
-  if (!ctx)
-    return;
+  _gcry_mpi_barrett_free (ctx->t.p_barrett);
 
+  /* Domain parameter.  */
   mpi_free (ctx->p);
   mpi_free (ctx->a);
+  mpi_free (ctx->b);
+  _gcry_mpi_point_release (ctx->G);
+  mpi_free (ctx->n);
 
-  mpi_free (ctx->one);
-  mpi_free (ctx->two);
-  mpi_free (ctx->three);
-  mpi_free (ctx->four);
-  mpi_free (ctx->eight);
+  /* The key.  */
+  _gcry_mpi_point_release (ctx->Q);
+  mpi_free (ctx->d);
 
-  mpi_free (ctx->two_inv_p);
+  /* Private data of ec.c.  */
+  mpi_free (ctx->t.two_inv_p);
 
-  for (i=0; i< DIM(ctx->scratch); i++)
-    mpi_free (ctx->scratch[i]);
+  for (i=0; i< DIM(ctx->t.scratch); i++)
+    mpi_free (ctx->t.scratch[i]);
 
 /*   if (ctx->nist_nbits == 192) */
 /*     { */
@@ -335,61 +979,255 @@ _gcry_mpi_ec_free (mpi_ec_t ctx)
 /*         mpi_free (ctx->s[i]); */
 /*       mpi_free (ctx->c); */
 /*     } */
+}
+
+
+/* This function returns a new context for elliptic curve based on the
+   field GF(p).  P is the prime specifying this field, A is the first
+   coefficient, B is the second coefficient, and MODEL is the model
+   for the curve.  This function is only used within Libgcrypt and not
+   part of the public API.
+
+   This context needs to be released using _gcry_mpi_ec_free.  */
+mpi_ec_t
+_gcry_mpi_ec_p_internal_new (enum gcry_mpi_ec_models model,
+                             enum ecc_dialects dialect,
+                             int flags,
+                             gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b)
+{
+  mpi_ec_t ctx;
+
+  ctx = xcalloc (1, sizeof *ctx);
+  ec_p_init (ctx, model, dialect, flags, p, a, b);
+
+  return ctx;
+}
+
+
+/* This is a variant of _gcry_mpi_ec_p_internal_new which returns an
+   public context and does some error checking on the supplied
+   arguments.  On success the new context is stored at R_CTX and 0 is
+   returned; on error NULL is stored at R_CTX and an error code is
+   returned.
+
+   The context needs to be released using gcry_ctx_release.  */
+gpg_err_code_t
+_gcry_mpi_ec_p_new (gcry_ctx_t *r_ctx,
+                    enum gcry_mpi_ec_models model,
+                    enum ecc_dialects dialect,
+                    int flags,
+                    gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b)
+{
+  gcry_ctx_t ctx;
+  mpi_ec_t ec;
+
+  *r_ctx = NULL;
+  if (!p || !a)
+    return GPG_ERR_EINVAL;
+
+  ctx = _gcry_ctx_alloc (CONTEXT_TYPE_EC, sizeof *ec, ec_deinit);
+  if (!ctx)
+    return gpg_err_code_from_syserror ();
+  ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+  ec_p_init (ec, model, dialect, flags, p, a, b);
+
+  *r_ctx = ctx;
+  return 0;
+}
+
+
+void
+_gcry_mpi_ec_free (mpi_ec_t ctx)
+{
+  if (ctx)
+    {
+      ec_deinit (ctx);
+      xfree (ctx);
+    }
+}
+
+
+gcry_mpi_t
+_gcry_mpi_ec_get_mpi (const char *name, gcry_ctx_t ctx, int copy)
+{
+  mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
 
-  gcry_free (ctx);
+  return _gcry_ecc_get_mpi (name, ec, copy);
 }
 
+
+gcry_mpi_point_t
+_gcry_mpi_ec_get_point (const char *name, gcry_ctx_t ctx, int copy)
+{
+  mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+
+  (void)copy;  /* Not used.  */
+
+  return _gcry_ecc_get_point (name, ec);
+}
+
+
+gpg_err_code_t
+_gcry_mpi_ec_set_mpi (const char *name, gcry_mpi_t newvalue,
+                      gcry_ctx_t ctx)
+{
+  mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+
+  return _gcry_ecc_set_mpi (name, newvalue, ec);
+}
+
+
+gpg_err_code_t
+_gcry_mpi_ec_set_point (const char *name, gcry_mpi_point_t newvalue,
+                        gcry_ctx_t ctx)
+{
+  mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+
+  return _gcry_ecc_set_point (name, newvalue, ec);
+}
+
+
+/* Given an encoded point in the MPI VALUE and a context EC, decode
+ * the point according to the context and store it in RESULT.  On
+ * error an error code is return but RESULT might have been changed.
+ * If no context is given the function tries to decode VALUE by
+ * assuming a 0x04 prefixed uncompressed encoding.  */
+gpg_err_code_t
+_gcry_mpi_ec_decode_point (mpi_point_t result, gcry_mpi_t value, mpi_ec_t ec)
+{
+  gpg_err_code_t rc;
+
+  if (ec
+      && (ec->dialect == ECC_DIALECT_ED25519
+          || (ec->model == MPI_EC_EDWARDS
+              && ec->dialect == ECC_DIALECT_SAFECURVE)))
+    rc = _gcry_ecc_eddsa_decodepoint (value, ec, result, NULL, NULL);
+  else if (ec && ec->model == MPI_EC_MONTGOMERY)
+    rc = _gcry_ecc_mont_decodepoint (value, ec, result);
+  else
+    rc = _gcry_ecc_sec_decodepoint (value, ec, result);
+
+  return rc;
+}
+
+
 /* Compute the affine coordinates from the projective coordinates in
    POINT.  Set them into X and Y.  If one coordinate is not required,
    X or Y may be passed as NULL.  CTX is the usual context. Returns: 0
    on success or !0 if POINT is at infinity.  */
 int
-_gcry_mpi_ec_get_affine (gcry_mpi_t x, gcry_mpi_t y, mpi_point_t *point,
+_gcry_mpi_ec_get_affine (gcry_mpi_t x, gcry_mpi_t y, mpi_point_t point,
                          mpi_ec_t ctx)
 {
-  gcry_mpi_t z1, z2, z3;
-
   if (!mpi_cmp_ui (point->z, 0))
     return -1;
 
-  z1 = mpi_new (0);
-  z2 = mpi_new (0);
-  ec_invm (z1, point->z, ctx);  /* z1 = z^(-1) mod p  */
-  ec_mulm (z2, z1, z1, ctx);    /* z2 = z^(-2) mod p  */
-
-  if (x)
-    ec_mulm (x, point->x, z2, ctx);
-
-  if (y)
+  switch (ctx->model)
     {
-      z3 = mpi_new (0);
-      ec_mulm (z3, z2, z1, ctx);      /* z3 = z^(-3) mod p  */
-      ec_mulm (y, point->y, z3, ctx);
-      mpi_free (z3);
+    case MPI_EC_WEIERSTRASS: /* Using Jacobian coordinates.  */
+      {
+        gcry_mpi_t z1, z2, z3;
+
+       if (!mpi_cmp_ui (point->z, 1))
+         {
+           if (x)
+             mpi_set (x, point->x);
+           if (y)
+             mpi_set (y, point->y);
+           return 0;
+         }
+
+        z1 = mpi_new (0);
+        z2 = mpi_new (0);
+        ec_invm (z1, point->z, ctx);  /* z1 = z^(-1) mod p  */
+        ec_mulm (z2, z1, z1, ctx);    /* z2 = z^(-2) mod p  */
+
+        if (x)
+          ec_mulm (x, point->x, z2, ctx);
+
+        if (y)
+          {
+            z3 = mpi_new (0);
+            ec_mulm (z3, z2, z1, ctx);      /* z3 = z^(-3) mod p  */
+            ec_mulm (y, point->y, z3, ctx);
+            mpi_free (z3);
+          }
+
+        mpi_free (z2);
+        mpi_free (z1);
+      }
+      return 0;
+
+    case MPI_EC_MONTGOMERY:
+      {
+        if (x)
+          mpi_set (x, point->x);
+
+        if (y)
+          {
+            log_fatal ("%s: Getting Y-coordinate on %s is not supported\n",
+                       "_gcry_mpi_ec_get_affine", "Montgomery");
+            return -1;
+          }
+      }
+      return 0;
+
+    case MPI_EC_EDWARDS:
+      {
+        gcry_mpi_t z;
+
+       if (!mpi_cmp_ui (point->z, 1))
+         {
+           if (x)
+             mpi_set (x, point->x);
+           if (y)
+             mpi_set (y, point->y);
+           return 0;
+         }
+
+        z = mpi_new (0);
+        ec_invm (z, point->z, ctx);
+
+        mpi_resize (z, ctx->p->nlimbs);
+        z->nlimbs = ctx->p->nlimbs;
+
+        if (x)
+          {
+            mpi_resize (x, ctx->p->nlimbs);
+            x->nlimbs = ctx->p->nlimbs;
+            ctx->mulm (x, point->x, z, ctx);
+          }
+        if (y)
+          {
+            mpi_resize (y, ctx->p->nlimbs);
+            y->nlimbs = ctx->p->nlimbs;
+            ctx->mulm (y, point->y, z, ctx);
+          }
+
+        _gcry_mpi_release (z);
+      }
+      return 0;
+
+    default:
+      return -1;
     }
-
-  mpi_free (z2);
-  mpi_free (z1);
-  return 0;
 }
 
 
-
-
 
-/*  RESULT = 2 * POINT  */
-void
-_gcry_mpi_ec_dup_point (mpi_point_t *result, mpi_point_t *point, mpi_ec_t ctx)
+/*  RESULT = 2 * POINT  (Weierstrass version). */
+static void
+dup_point_weierstrass (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx)
 {
 #define x3 (result->x)
 #define y3 (result->y)
 #define z3 (result->z)
-#define t1 (ctx->scratch[0])
-#define t2 (ctx->scratch[1])
-#define t3 (ctx->scratch[2])
-#define l1 (ctx->scratch[3])
-#define l2 (ctx->scratch[4])
-#define l3 (ctx->scratch[5])
+#define t1 (ctx->t.scratch[0])
+#define t2 (ctx->t.scratch[1])
+#define t3 (ctx->t.scratch[2])
+#define l1 (ctx->t.scratch[3])
+#define l2 (ctx->t.scratch[4])
+#define l3 (ctx->t.scratch[5])
 
   if (!mpi_cmp_ui (point->y, 0) || !mpi_cmp_ui (point->z, 0))
     {
@@ -400,14 +1238,14 @@ _gcry_mpi_ec_dup_point (mpi_point_t *result, mpi_point_t 
*point, mpi_ec_t ctx)
     }
   else
     {
-      if (ctx->a_is_pminus3)  /* Use the faster case.  */
+      if (ec_get_a_is_pminus3 (ctx))  /* Use the faster case.  */
         {
           /* L1 = 3(X - Z^2)(X + Z^2) */
           /*                          T1: used for Z^2. */
           /*                          T2: used for the right term.  */
-          ec_powm (t1, point->z, ctx->two, ctx);
+          ec_pow2 (t1, point->z, ctx);
           ec_subm (l1, point->x, t1, ctx);
-          ec_mulm (l1, l1, ctx->three, ctx);
+          ec_mulm (l1, l1, mpi_const (MPI_C_THREE), ctx);
           ec_addm (t2, point->x, t1, ctx);
           ec_mulm (l1, l1, t2, ctx);
         }
@@ -415,32 +1253,32 @@ _gcry_mpi_ec_dup_point (mpi_point_t *result, mpi_point_t 
*point, mpi_ec_t ctx)
         {
           /* L1 = 3X^2 + aZ^4 */
           /*                          T1: used for aZ^4. */
-          ec_powm (l1, point->x, ctx->two, ctx);
-          ec_mulm (l1, l1, ctx->three, ctx);
-          ec_powm (t1, point->z, ctx->four, ctx);
+          ec_pow2 (l1, point->x, ctx);
+          ec_mulm (l1, l1, mpi_const (MPI_C_THREE), ctx);
+          ec_powm (t1, point->z, mpi_const (MPI_C_FOUR), ctx);
           ec_mulm (t1, t1, ctx->a, ctx);
           ec_addm (l1, l1, t1, ctx);
         }
       /* Z3 = 2YZ */
       ec_mulm (z3, point->y, point->z, ctx);
-      ec_mulm (z3, z3, ctx->two, ctx);
+      ec_mul2 (z3, z3, ctx);
 
       /* L2 = 4XY^2 */
       /*                              T2: used for Y2; required later. */
-      ec_powm (t2, point->y, ctx->two, ctx);
+      ec_pow2 (t2, point->y, ctx);
       ec_mulm (l2, t2, point->x, ctx);
-      ec_mulm (l2, l2, ctx->four, ctx);
+      ec_mulm (l2, l2, mpi_const (MPI_C_FOUR), ctx);
 
       /* X3 = L1^2 - 2L2 */
       /*                              T1: used for L2^2. */
-      ec_powm (x3, l1, ctx->two, ctx);
-      ec_mulm (t1, l2, ctx->two, ctx);
+      ec_pow2 (x3, l1, ctx);
+      ec_mul2 (t1, l2, ctx);
       ec_subm (x3, x3, t1, ctx);
 
       /* L3 = 8Y^4 */
       /*                              T2: taken from above. */
-      ec_powm (t2, t2, ctx->two, ctx);
-      ec_mulm (l3, t2, ctx->eight, ctx);
+      ec_pow2 (t2, t2, ctx);
+      ec_mulm (l3, t2, mpi_const (MPI_C_EIGHT), ctx);
 
       /* Y3 = L1(L2 - X3) - L3 */
       ec_subm (y3, l2, x3, ctx);
@@ -460,12 +1298,115 @@ _gcry_mpi_ec_dup_point (mpi_point_t *result, 
mpi_point_t *point, mpi_ec_t ctx)
 }
 
 
+/*  RESULT = 2 * POINT  (Montgomery version). */
+static void
+dup_point_montgomery (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx)
+{
+  (void)result;
+  (void)point;
+  (void)ctx;
+  log_fatal ("%s: %s not yet supported\n",
+             "_gcry_mpi_ec_dup_point", "Montgomery");
+}
 
-/* RESULT = P1 + P2 */
+
+/*  RESULT = 2 * POINT  (Twisted Edwards version). */
+static void
+dup_point_edwards (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx)
+{
+#define X1 (point->x)
+#define Y1 (point->y)
+#define Z1 (point->z)
+#define X3 (result->x)
+#define Y3 (result->y)
+#define Z3 (result->z)
+#define B (ctx->t.scratch[0])
+#define C (ctx->t.scratch[1])
+#define D (ctx->t.scratch[2])
+#define E (ctx->t.scratch[3])
+#define F (ctx->t.scratch[4])
+#define H (ctx->t.scratch[5])
+#define J (ctx->t.scratch[6])
+
+  /* Compute: (X_3 : Y_3 : Z_3) = 2( X_1 : Y_1 : Z_1 ) */
+
+  /* B = (X_1 + Y_1)^2  */
+  ctx->addm (B, X1, Y1, ctx);
+  ctx->pow2 (B, B, ctx);
+
+  /* C = X_1^2 */
+  /* D = Y_1^2 */
+  ctx->pow2 (C, X1, ctx);
+  ctx->pow2 (D, Y1, ctx);
+
+  /* E = aC */
+  if (ctx->dialect == ECC_DIALECT_ED25519)
+    ctx->subm (E, ctx->p, C, ctx);
+  else
+    ctx->mulm (E, ctx->a, C, ctx);
+
+  /* F = E + D */
+  ctx->addm (F, E, D, ctx);
+
+  /* H = Z_1^2 */
+  ctx->pow2 (H, Z1, ctx);
+
+  /* J = F - 2H */
+  ctx->mul2 (J, H, ctx);
+  ctx->subm (J, F, J, ctx);
+
+  /* X_3 = (B - C - D) · J */
+  ctx->subm (X3, B, C, ctx);
+  ctx->subm (X3, X3, D, ctx);
+  ctx->mulm (X3, X3, J, ctx);
+
+  /* Y_3 = F · (E - D) */
+  ctx->subm (Y3, E, D, ctx);
+  ctx->mulm (Y3, Y3, F, ctx);
+
+  /* Z_3 = F · J */
+  ctx->mulm (Z3, F, J, ctx);
+
+#undef X1
+#undef Y1
+#undef Z1
+#undef X3
+#undef Y3
+#undef Z3
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef H
+#undef J
+}
+
+
+/*  RESULT = 2 * POINT  */
 void
-_gcry_mpi_ec_add_points (mpi_point_t *result,
-                         mpi_point_t *p1, mpi_point_t *p2,
-                         mpi_ec_t ctx)
+_gcry_mpi_ec_dup_point (mpi_point_t result, mpi_point_t point, mpi_ec_t ctx)
+{
+  switch (ctx->model)
+    {
+    case MPI_EC_WEIERSTRASS:
+      dup_point_weierstrass (result, point, ctx);
+      break;
+    case MPI_EC_MONTGOMERY:
+      dup_point_montgomery (result, point, ctx);
+      break;
+    case MPI_EC_EDWARDS:
+      dup_point_edwards (result, point, ctx);
+      break;
+    }
+}
+
+
+/* RESULT = P1 + P2  (Weierstrass version).*/
+static void
+add_points_weierstrass (mpi_point_t result,
+                        mpi_point_t p1, mpi_point_t p2,
+                        mpi_ec_t ctx)
 {
 #define x1 (p1->x    )
 #define y1 (p1->y    )
@@ -476,17 +1417,17 @@ _gcry_mpi_ec_add_points (mpi_point_t *result,
 #define x3 (result->x)
 #define y3 (result->y)
 #define z3 (result->z)
-#define l1 (ctx->scratch[0])
-#define l2 (ctx->scratch[1])
-#define l3 (ctx->scratch[2])
-#define l4 (ctx->scratch[3])
-#define l5 (ctx->scratch[4])
-#define l6 (ctx->scratch[5])
-#define l7 (ctx->scratch[6])
-#define l8 (ctx->scratch[7])
-#define l9 (ctx->scratch[8])
-#define t1 (ctx->scratch[9])
-#define t2 (ctx->scratch[10])
+#define l1 (ctx->t.scratch[0])
+#define l2 (ctx->t.scratch[1])
+#define l3 (ctx->t.scratch[2])
+#define l4 (ctx->t.scratch[3])
+#define l5 (ctx->t.scratch[4])
+#define l6 (ctx->t.scratch[5])
+#define l7 (ctx->t.scratch[6])
+#define l8 (ctx->t.scratch[7])
+#define l9 (ctx->t.scratch[8])
+#define t1 (ctx->t.scratch[9])
+#define t2 (ctx->t.scratch[10])
 
   if ( (!mpi_cmp (x1, x2)) && (!mpi_cmp (y1, y2)) && (!mpi_cmp (z1, z2)) )
     {
@@ -518,23 +1459,23 @@ _gcry_mpi_ec_add_points (mpi_point_t *result,
         mpi_set (l1, x1);
       else
         {
-          ec_powm (l1, z2, ctx->two, ctx);
+          ec_pow2 (l1, z2, ctx);
           ec_mulm (l1, l1, x1, ctx);
         }
       if (z1_is_one)
         mpi_set (l2, x2);
       else
         {
-          ec_powm (l2, z1, ctx->two, ctx);
+          ec_pow2 (l2, z1, ctx);
           ec_mulm (l2, l2, x2, ctx);
         }
       /* l3 = l1 - l2 */
       ec_subm (l3, l1, l2, ctx);
       /* l4 = y1 z2^3  */
-      ec_powm (l4, z2, ctx->three, ctx);
+      ec_powm (l4, z2, mpi_const (MPI_C_THREE), ctx);
       ec_mulm (l4, l4, y1, ctx);
       /* l5 = y2 z1^3  */
-      ec_powm (l5, z1, ctx->three, ctx);
+      ec_powm (l5, z1, mpi_const (MPI_C_THREE), ctx);
       ec_mulm (l5, l5, y2, ctx);
       /* l6 = l4 - l5  */
       ec_subm (l6, l4, l5, ctx);
@@ -564,19 +1505,19 @@ _gcry_mpi_ec_add_points (mpi_point_t *result,
           ec_mulm (z3, z1, z2, ctx);
           ec_mulm (z3, z3, l3, ctx);
           /* x3 = l6^2 - l7 l3^2  */
-          ec_powm (t1, l6, ctx->two, ctx);
-          ec_powm (t2, l3, ctx->two, ctx);
+          ec_pow2 (t1, l6, ctx);
+          ec_pow2 (t2, l3, ctx);
           ec_mulm (t2, t2, l7, ctx);
           ec_subm (x3, t1, t2, ctx);
           /* l9 = l7 l3^2 - 2 x3  */
-          ec_mulm (t1, x3, ctx->two, ctx);
+          ec_mul2 (t1, x3, ctx);
           ec_subm (l9, t2, t1, ctx);
           /* y3 = (l9 l6 - l8 l3^3)/2  */
           ec_mulm (l9, l9, l6, ctx);
-          ec_powm (t1, l3, ctx->three, ctx); /* fixme: Use saved value*/
+          ec_powm (t1, l3, mpi_const (MPI_C_THREE), ctx); /* fixme: Use saved 
value*/
           ec_mulm (t1, t1, l8, ctx);
           ec_subm (y3, l9, t1, ctx);
-          ec_mulm (y3, y3, ctx->two_inv_p, ctx);
+          ec_mulm (y3, y3, ec_get_two_inv_p (ctx), ctx);
         }
     }
 
@@ -603,36 +1544,421 @@ _gcry_mpi_ec_add_points (mpi_point_t *result,
 }
 
 
+/* RESULT = P1 + P2  (Montgomery version).*/
+static void
+add_points_montgomery (mpi_point_t result,
+                       mpi_point_t p1, mpi_point_t p2,
+                       mpi_ec_t ctx)
+{
+  (void)result;
+  (void)p1;
+  (void)p2;
+  (void)ctx;
+  log_fatal ("%s: %s not yet supported\n",
+             "_gcry_mpi_ec_add_points", "Montgomery");
+}
 
-/* Scalar point multiplication - the main function for ECC.  If takes
-   an integer SCALAR and a POINT as well as the usual context CTX.
-   RESULT will be set to the resulting point. */
+
+/* RESULT = P1 + P2  (Twisted Edwards version).*/
+static void
+add_points_edwards (mpi_point_t result,
+                    mpi_point_t p1, mpi_point_t p2,
+                    mpi_ec_t ctx)
+{
+#define X1 (p1->x)
+#define Y1 (p1->y)
+#define Z1 (p1->z)
+#define X2 (p2->x)
+#define Y2 (p2->y)
+#define Z2 (p2->z)
+#define X3 (result->x)
+#define Y3 (result->y)
+#define Z3 (result->z)
+#define A (ctx->t.scratch[0])
+#define B (ctx->t.scratch[1])
+#define C (ctx->t.scratch[2])
+#define D (ctx->t.scratch[3])
+#define E (ctx->t.scratch[4])
+#define F (ctx->t.scratch[5])
+#define G (ctx->t.scratch[6])
+#define tmp (ctx->t.scratch[7])
+
+  mpi_point_resize (result, ctx);
+
+  /* Compute: (X_3 : Y_3 : Z_3) = (X_1 : Y_1 : Z_1) + (X_2 : Y_2 : Z_3)  */
+
+  /* A = Z1 · Z2 */
+  ctx->mulm (A, Z1, Z2, ctx);
+
+  /* B = A^2 */
+  ctx->pow2 (B, A, ctx);
+
+  /* C = X1 · X2 */
+  ctx->mulm (C, X1, X2, ctx);
+
+  /* D = Y1 · Y2 */
+  ctx->mulm (D, Y1, Y2, ctx);
+
+  /* E = d · C · D */
+  ctx->mulm (E, ctx->b, C, ctx);
+  ctx->mulm (E, E, D, ctx);
+
+  /* F = B - E */
+  ctx->subm (F, B, E, ctx);
+
+  /* G = B + E */
+  ctx->addm (G, B, E, ctx);
+
+  /* X_3 = A · F · ((X_1 + Y_1) · (X_2 + Y_2) - C - D) */
+  ctx->addm (tmp, X1, Y1, ctx);
+  ctx->addm (X3, X2, Y2, ctx);
+  ctx->mulm (X3, X3, tmp, ctx);
+  ctx->subm (X3, X3, C, ctx);
+  ctx->subm (X3, X3, D, ctx);
+  ctx->mulm (X3, X3, F, ctx);
+  ctx->mulm (X3, X3, A, ctx);
+
+  /* Y_3 = A · G · (D - aC) */
+  if (ctx->dialect == ECC_DIALECT_ED25519)
+    {
+      ctx->addm (Y3, D, C, ctx);
+    }
+  else
+    {
+      ctx->mulm (Y3, ctx->a, C, ctx);
+      ctx->subm (Y3, D, Y3, ctx);
+    }
+  ctx->mulm (Y3, Y3, G, ctx);
+  ctx->mulm (Y3, Y3, A, ctx);
+
+  /* Z_3 = F · G */
+  ctx->mulm (Z3, F, G, ctx);
+
+
+#undef X1
+#undef Y1
+#undef Z1
+#undef X2
+#undef Y2
+#undef Z2
+#undef X3
+#undef Y3
+#undef Z3
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef tmp
+}
+
+
+/* Compute a step of Montgomery Ladder (only use X and Z in the point).
+   Inputs:  P1, P2, and x-coordinate of DIF = P1 - P1.
+   Outputs: PRD = 2 * P1 and  SUM = P1 + P2. */
+static void
+montgomery_ladder (mpi_point_t prd, mpi_point_t sum,
+                   mpi_point_t p1, mpi_point_t p2, gcry_mpi_t dif_x,
+                   mpi_ec_t ctx)
+{
+  ctx->addm (sum->x, p2->x, p2->z, ctx);
+  ctx->subm (p2->z, p2->x, p2->z, ctx);
+  ctx->addm (prd->x, p1->x, p1->z, ctx);
+  ctx->subm (p1->z, p1->x, p1->z, ctx);
+  ctx->mulm (p2->x, p1->z, sum->x, ctx);
+  ctx->mulm (p2->z, prd->x, p2->z, ctx);
+  ctx->pow2 (p1->x, prd->x, ctx);
+  ctx->pow2 (p1->z, p1->z, ctx);
+  ctx->addm (sum->x, p2->x, p2->z, ctx);
+  ctx->subm (p2->z, p2->x, p2->z, ctx);
+  ctx->mulm (prd->x, p1->x, p1->z, ctx);
+  ctx->subm (p1->z, p1->x, p1->z, ctx);
+  ctx->pow2 (sum->x, sum->x, ctx);
+  ctx->pow2 (sum->z, p2->z, ctx);
+  ctx->mulm (prd->z, p1->z, ctx->a, ctx); /* CTX->A: (a-2)/4 */
+  ctx->mulm (sum->z, sum->z, dif_x, ctx);
+  ctx->addm (prd->z, p1->x, prd->z, ctx);
+  ctx->mulm (prd->z, prd->z, p1->z, ctx);
+}
+
+
+/* RESULT = P1 + P2 */
 void
-_gcry_mpi_ec_mul_point (mpi_point_t *result,
-                        gcry_mpi_t scalar, mpi_point_t *point,
+_gcry_mpi_ec_add_points (mpi_point_t result,
+                         mpi_point_t p1, mpi_point_t p2,
+                         mpi_ec_t ctx)
+{
+  switch (ctx->model)
+    {
+    case MPI_EC_WEIERSTRASS:
+      add_points_weierstrass (result, p1, p2, ctx);
+      break;
+    case MPI_EC_MONTGOMERY:
+      add_points_montgomery (result, p1, p2, ctx);
+      break;
+    case MPI_EC_EDWARDS:
+      add_points_edwards (result, p1, p2, ctx);
+      break;
+    }
+}
+
+
+/* RESULT = P1 - P2  (Weierstrass version).*/
+static void
+sub_points_weierstrass (mpi_point_t result,
+                        mpi_point_t p1, mpi_point_t p2,
                         mpi_ec_t ctx)
 {
-#if 0
-  /* Simple left to right binary method.  GECC Algorithm 3.27 */
-  unsigned int nbits;
-  int i;
+  (void)result;
+  (void)p1;
+  (void)p2;
+  (void)ctx;
+  log_fatal ("%s: %s not yet supported\n",
+             "_gcry_mpi_ec_sub_points", "Weierstrass");
+}
 
-  nbits = mpi_get_nbits (scalar);
-  mpi_set_ui (result->x, 1);
-  mpi_set_ui (result->y, 1);
-  mpi_set_ui (result->z, 0);
 
-  for (i=nbits-1; i >= 0; i--)
+/* RESULT = P1 - P2  (Montgomery version).*/
+static void
+sub_points_montgomery (mpi_point_t result,
+                       mpi_point_t p1, mpi_point_t p2,
+                       mpi_ec_t ctx)
+{
+  (void)result;
+  (void)p1;
+  (void)p2;
+  (void)ctx;
+  log_fatal ("%s: %s not yet supported\n",
+             "_gcry_mpi_ec_sub_points", "Montgomery");
+}
+
+
+/* RESULT = P1 - P2  (Twisted Edwards version).*/
+static void
+sub_points_edwards (mpi_point_t result,
+                    mpi_point_t p1, mpi_point_t p2,
+                    mpi_ec_t ctx)
+{
+  mpi_point_t p2i = _gcry_mpi_point_new (0);
+  point_set (p2i, p2);
+  ctx->subm (p2i->x, ctx->p, p2i->x, ctx);
+  add_points_edwards (result, p1, p2i, ctx);
+  _gcry_mpi_point_release (p2i);
+}
+
+
+/* RESULT = P1 - P2 */
+void
+_gcry_mpi_ec_sub_points (mpi_point_t result,
+                         mpi_point_t p1, mpi_point_t p2,
+                         mpi_ec_t ctx)
+{
+  switch (ctx->model)
     {
-      _gcry_mpi_ec_dup_point (result, result, ctx);
-      if (mpi_test_bit (scalar, i) == 1)
-        _gcry_mpi_ec_add_points (result, result, point, ctx);
+    case MPI_EC_WEIERSTRASS:
+      sub_points_weierstrass (result, p1, p2, ctx);
+      break;
+    case MPI_EC_MONTGOMERY:
+      sub_points_montgomery (result, p1, p2, ctx);
+      break;
+    case MPI_EC_EDWARDS:
+      sub_points_edwards (result, p1, p2, ctx);
+      break;
     }
+}
 
-#else
+
+/* Scalar point multiplication - the main function for ECC.  It takes
+   an integer SCALAR and a POINT as well as the usual context CTX.
+   RESULT will be set to the resulting point. */
+void
+_gcry_mpi_ec_mul_point (mpi_point_t result,
+                        gcry_mpi_t scalar, mpi_point_t point,
+                        mpi_ec_t ctx)
+{
   gcry_mpi_t x1, y1, z1, k, h, yy;
   unsigned int i, loops;
-  mpi_point_t p1, p2, p1inv;
+  mpi_point_struct p1, p2, p1inv;
+
+  /* First try HW accelerated scalar multiplications.  Error
+     is returned if acceleration is not supported or if HW
+     does not support acceleration of given input.  */
+  if (mpi_ec_hw_mul_point (result, scalar, point, ctx) >= 0)
+    {
+      return;
+    }
+
+  if (ctx->model == MPI_EC_EDWARDS
+      || (ctx->model == MPI_EC_WEIERSTRASS
+          && mpi_is_secure (scalar)))
+    {
+      /* Simple left to right binary method.  Algorithm 3.27 from
+       * {author={Hankerson, Darrel and Menezes, Alfred J. and Vanstone, 
Scott},
+       *  title = {Guide to Elliptic Curve Cryptography},
+       *  year = {2003}, isbn = {038795273X},
+       *  url = {http://www.cacr.math.uwaterloo.ca/ecc/},
+       *  publisher = {Springer-Verlag New York, Inc.}} */
+      unsigned int nbits;
+      int j;
+
+      if (mpi_cmp (scalar, ctx->p) >= 0)
+        nbits = mpi_get_nbits (scalar);
+      else
+        nbits = mpi_get_nbits (ctx->p);
+
+      if (ctx->model == MPI_EC_WEIERSTRASS)
+        {
+          mpi_set_ui (result->x, 1);
+          mpi_set_ui (result->y, 1);
+          mpi_set_ui (result->z, 0);
+        }
+      else
+        {
+          mpi_set_ui (result->x, 0);
+          mpi_set_ui (result->y, 1);
+          mpi_set_ui (result->z, 1);
+          mpi_point_resize (point, ctx);
+        }
+
+      if (mpi_is_secure (scalar))
+        {
+          /* If SCALAR is in secure memory we assume that it is the
+             secret key we use constant time operation.  */
+          mpi_point_struct tmppnt;
+
+          point_init (&tmppnt);
+          mpi_point_resize (result, ctx);
+          mpi_point_resize (&tmppnt, ctx);
+          for (j=nbits-1; j >= 0; j--)
+            {
+              _gcry_mpi_ec_dup_point (result, result, ctx);
+              _gcry_mpi_ec_add_points (&tmppnt, result, point, ctx);
+              point_swap_cond (result, &tmppnt, mpi_test_bit (scalar, j), ctx);
+            }
+          point_free (&tmppnt);
+        }
+      else
+        {
+          if (ctx->model == MPI_EC_EDWARDS)
+            {
+              mpi_point_resize (result, ctx);
+              mpi_point_resize (point, ctx);
+            }
+
+          for (j=nbits-1; j >= 0; j--)
+            {
+              _gcry_mpi_ec_dup_point (result, result, ctx);
+              if (mpi_test_bit (scalar, j))
+                _gcry_mpi_ec_add_points (result, result, point, ctx);
+            }
+        }
+      return;
+    }
+  else if (ctx->model == MPI_EC_MONTGOMERY)
+    {
+      unsigned int nbits;
+      int j;
+      mpi_point_struct p1_, p2_;
+      mpi_point_t q1, q2, prd, sum;
+      unsigned long sw;
+      mpi_size_t rsize;
+      int scalar_copied = 0;
+
+      /* Compute scalar point multiplication with Montgomery Ladder.
+         Note that we don't use Y-coordinate in the points at all.
+         RESULT->Y will be filled by zero.  */
+
+      nbits = mpi_get_nbits (scalar);
+      point_init (&p1);
+      point_init (&p2);
+      point_init (&p1_);
+      point_init (&p2_);
+      mpi_set_ui (p1.x, 1);
+      mpi_free (p2.x);
+      p2.x  = mpi_copy (point->x);
+      mpi_set_ui (p2.z, 1);
+
+      if (mpi_is_opaque (scalar))
+        {
+          const unsigned int pbits = ctx->nbits;
+          gcry_mpi_t a;
+          unsigned int n;
+          unsigned char *raw;
+
+          scalar_copied = 1;
+
+          raw = _gcry_mpi_get_opaque_copy (scalar, &n);
+          if ((n+7)/8 != (pbits+7)/8)
+            log_fatal ("scalar size (%d) != prime size (%d)\n",
+                       (n+7)/8, (pbits+7)/8);
+
+          reverse_buffer (raw, (n+7)/8);
+          if ((pbits % 8))
+            raw[0] &= (1 << (pbits % 8)) - 1;
+          raw[0] |= (1 << ((pbits + 7) % 8));
+          raw[(pbits+7)/8 - 1] &= (256 - ctx->h);
+          a = mpi_is_secure (scalar) ? mpi_snew (pbits): mpi_new (pbits);
+          _gcry_mpi_set_buffer (a, raw, (n+7)/8, 0);
+          xfree (raw);
+
+          scalar = a;
+        }
+
+      mpi_point_resize (&p1, ctx);
+      mpi_point_resize (&p2, ctx);
+      mpi_point_resize (&p1_, ctx);
+      mpi_point_resize (&p2_, ctx);
+
+      mpi_resize (point->x, ctx->p->nlimbs);
+      point->x->nlimbs = ctx->p->nlimbs;
+
+      q1 = &p1;
+      q2 = &p2;
+      prd = &p1_;
+      sum = &p2_;
+
+      for (j=nbits-1; j >= 0; j--)
+        {
+          mpi_point_t t;
+
+          sw = mpi_test_bit (scalar, j);
+          point_swap_cond (q1, q2, sw, ctx);
+          montgomery_ladder (prd, sum, q1, q2, point->x, ctx);
+          point_swap_cond (prd, sum, sw, ctx);
+          t = q1;  q1 = prd;  prd = t;
+          t = q2;  q2 = sum;  sum = t;
+        }
+
+      mpi_clear (result->y);
+      sw = (nbits & 1);
+      point_swap_cond (&p1, &p1_, sw, ctx);
+
+      rsize = p1.z->nlimbs;
+      MPN_NORMALIZE (p1.z->d, rsize);
+      if (rsize == 0)
+        {
+          mpi_set_ui (result->x, 1);
+          mpi_set_ui (result->z, 0);
+        }
+      else
+        {
+          z1 = mpi_new (0);
+          ec_invm (z1, p1.z, ctx);
+          ec_mulm (result->x, p1.x, z1, ctx);
+          mpi_set_ui (result->z, 1);
+          mpi_free (z1);
+        }
+
+      point_free (&p1);
+      point_free (&p2);
+      point_free (&p1_);
+      point_free (&p2_);
+      if (scalar_copied)
+        _gcry_mpi_release (scalar);
+      return;
+    }
 
   x1 = mpi_alloc_like (ctx->p);
   y1 = mpi_alloc_like (ctx->p);
@@ -640,7 +1966,7 @@ _gcry_mpi_ec_mul_point (mpi_point_t *result,
   k  = mpi_copy (scalar);
   yy = mpi_copy (point->y);
 
-  if ( mpi_is_neg (k) )
+  if ( mpi_has_sign (k) )
     {
       k->sign = 0;
       ec_invm (yy, yy, ctx);
@@ -666,9 +1992,9 @@ _gcry_mpi_ec_mul_point (mpi_point_t *result,
       mpi_free (z2);
       mpi_free (z3);
     }
-  z1 = mpi_copy (ctx->one);
+  z1 = mpi_copy (mpi_const (MPI_C_ONE));
 
-  mpi_mul (h, k, ctx->three); /* h = 3k */
+  mpi_mul (h, k, mpi_const (MPI_C_THREE)); /* h = 3k */
   loops = mpi_get_nbits (h);
   if (loops < 2)
     {
@@ -694,6 +2020,10 @@ _gcry_mpi_ec_mul_point (mpi_point_t *result,
   point_init (&p2);
   point_init (&p1inv);
 
+  /* Invert point: y = p - y mod p  */
+  point_set (&p1inv, &p1);
+  ec_subm (p1inv.y, ctx->p, p1inv.y, ctx);
+
   for (i=loops-2; i > 0; i--)
     {
       _gcry_mpi_ec_dup_point (result, result, ctx);
@@ -705,9 +2035,6 @@ _gcry_mpi_ec_mul_point (mpi_point_t *result,
       if (mpi_test_bit (h, i) == 0 && mpi_test_bit (k, i) == 1)
         {
           point_set (&p2, result);
-          /* Invert point: y = p - y mod p  */
-          point_set (&p1inv, &p1);
-          ec_subm (p1inv.y, ctx->p, p1inv.y, ctx);
           _gcry_mpi_ec_add_points (result, &p2, &p1inv, ctx);
         }
     }
@@ -717,5 +2044,127 @@ _gcry_mpi_ec_mul_point (mpi_point_t *result,
   point_free (&p1inv);
   mpi_free (h);
   mpi_free (k);
-#endif
+}
+
+
+/* Return true if POINT is on the curve described by CTX.  */
+int
+_gcry_mpi_ec_curve_point (gcry_mpi_point_t point, mpi_ec_t ctx)
+{
+  int res = 0;
+  gcry_mpi_t x, y, w;
+
+  x = mpi_new (0);
+  y = mpi_new (0);
+  w = mpi_new (0);
+
+  /* Check that the point is in range.  This needs to be done here and
+   * not after conversion to affine coordinates.  */
+  if (mpi_cmpabs (point->x, ctx->p) >= 0)
+    goto leave;
+  if (mpi_cmpabs (point->y, ctx->p) >= 0)
+    goto leave;
+  if (mpi_cmpabs (point->z, ctx->p) >= 0)
+    goto leave;
+
+  switch (ctx->model)
+    {
+    case MPI_EC_WEIERSTRASS:
+      {
+        gcry_mpi_t xxx;
+
+        if (_gcry_mpi_ec_get_affine (x, y, point, ctx))
+          goto leave;
+
+        xxx = mpi_new (0);
+
+        /* y^2 == x^3 + a·x + b */
+        ec_pow2 (y, y, ctx);
+
+        ec_pow3 (xxx, x, ctx);
+        ec_mulm (w, ctx->a, x, ctx);
+        ec_addm (w, w, ctx->b, ctx);
+        ec_addm (w, w, xxx, ctx);
+
+        if (!mpi_cmp (y, w))
+          res = 1;
+
+        _gcry_mpi_release (xxx);
+      }
+      break;
+    case MPI_EC_MONTGOMERY:
+      {
+#define xx y
+        /* With Montgomery curve, only X-coordinate is valid.  */
+        if (_gcry_mpi_ec_get_affine (x, NULL, point, ctx))
+          goto leave;
+
+        /* The equation is: b * y^2 == x^3 + a · x^2 + x */
+        /* We check if right hand is quadratic residue or not by
+           Euler's criterion.  */
+        /* CTX->A has (a-2)/4 and CTX->B has b^-1 */
+        ec_mulm (w, ctx->a, mpi_const (MPI_C_FOUR), ctx);
+        ec_addm (w, w, mpi_const (MPI_C_TWO), ctx);
+        ec_mulm (w, w, x, ctx);
+        ec_pow2 (xx, x, ctx);
+        ec_addm (w, w, xx, ctx);
+        ec_addm (w, w, mpi_const (MPI_C_ONE), ctx);
+        ec_mulm (w, w, x, ctx);
+        ec_mulm (w, w, ctx->b, ctx);
+#undef xx
+        /* Compute Euler's criterion: w^(p-1)/2 */
+#define p_minus1 y
+        ec_subm (p_minus1, ctx->p, mpi_const (MPI_C_ONE), ctx);
+        mpi_rshift (p_minus1, p_minus1, 1);
+        ec_powm (w, w, p_minus1, ctx);
+
+        res = !mpi_cmp_ui (w, 1);
+#undef p_minus1
+      }
+      break;
+    case MPI_EC_EDWARDS:
+      {
+        if (_gcry_mpi_ec_get_affine (x, y, point, ctx))
+          goto leave;
+
+        mpi_resize (w, ctx->p->nlimbs);
+        w->nlimbs = ctx->p->nlimbs;
+
+        /* a · x^2 + y^2 - 1 - b · x^2 · y^2 == 0 */
+        ctx->pow2 (x, x, ctx);
+        ctx->pow2 (y, y, ctx);
+        if (ctx->dialect == ECC_DIALECT_ED25519)
+          ctx->subm (w, ctx->p, x, ctx);
+        else
+          ctx->mulm (w, ctx->a, x, ctx);
+        ctx->addm (w, w, y, ctx);
+        ctx->mulm (x, x, y, ctx);
+        ctx->mulm (x, x, ctx->b, ctx);
+        ctx->subm (w, w, x, ctx);
+        if (!mpi_cmp_ui (w, 1))
+          res = 1;
+      }
+      break;
+    }
+
+ leave:
+  _gcry_mpi_release (w);
+  _gcry_mpi_release (x);
+  _gcry_mpi_release (y);
+
+  return res;
+}
+
+
+int
+_gcry_mpi_ec_bad_point (gcry_mpi_point_t point, mpi_ec_t ctx)
+{
+  int i;
+  gcry_mpi_t x_bad;
+
+  for (i = 0; (x_bad = ctx->t.scratch[i]); i++)
+    if (!mpi_cmp (point->x, x_bad))
+      return 1;
+
+  return 0;
 }
diff --git a/grub-core/lib/libgcrypt/mpi/generic/Manifest 
b/grub-core/lib/libgcrypt/mpi/generic/Manifest
deleted file mode 100644
index c429fde72..000000000
--- a/grub-core/lib/libgcrypt/mpi/generic/Manifest
+++ /dev/null
@@ -1,29 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-add1.c 
iQCVAwUAP+Lj2DEAnp832S/7AQKn/AQAwQLWggl6zNQ5EZ+lE+jKV8W3FsogW3/6tp9T5rrSR5JnlWyoHQ9/Pu4knOcLjS6nIfVOiAEifu3nuIysQr9jDSSSJA2LylSUBSXKLKDamPsOCwXOLxiZODslJT3CCGAUtLvXJrWDbTZQrkEuwnLnjQFDzuA7iY9JLrG9kAoXD6Q==WoWm
-mpih-mul1.c 
iQCVAwUAP+LkCTEAnp832S/7AQKFVQP+MhBNjcY73JtnsHZfnaVZq3TiKwN151cWV51nDc1RnTaMhSIFeuNlj3vNML2W0Gn8n+GnyiWE2XXdQEaik6BL02eekUn9aq7I/rdpnTHuOjQPK1uwjuNl8RuJ9YrERBAxq4oB71f+iwMab8dsMSUlVC+NdeAocRqLLgnR/efkdLc==2Tkb
-mpih-mul2.c 
iQCVAwUAP+LkMjEAnp832S/7AQLPeAQAqmRzxFe/mDqTdZr/pTXT8RVyB1vKB0Ei2THV05BxmI4OPv39uysfFpLMt/INsX7AGqdOlj4jOZ/qNaFXR1ceMrlSXvo8u/epk6rCXFp82kM7Qs983LjoP//PrMCkYkXwblaVrgUGiBUCbuPMliWTK6qKkxxXtEfqZ7nVbEWdBx8==Kwhl
-mpih-mul3.c 
iQCVAwUAP+LkVDEAnp832S/7AQL91gP/Qd5iZWxRiN5DdEIVHAedoNvl23NPrT2UUdXvnSK49DpplTxkLiMBj0WqCayG/YIET2NpMRCeLvAZNcSt6lOm0bSZDYo1Hv/N+UoqD3V1McjY16REBv/nnPaMWMZcx7rl5yKTVZiX2PgV6oQOL7Yfrt5ZIOlrHBRs9S2/zcCaVz0==9BQe
-mpih-lshift.c 
iQCVAwUAP+LlATEAnp832S/7AQIACAQAhMrpx0SRXE/LN1NkjMO9n74nMrvmzYJyru0gw2O4BYrUPvD/LWGju2FZaggKV0IBjmi0cDoCrNeK9EGjKOO1lfgODbX2IZ1LUhr9jDuMj0QRqj6T9YkAFYTNUk4GfpwIf7T6Ybo7c78Jx93PidCJt7d39eMMEalooC7LZ4IU3NM==nZ4k
-mpih-rshift.c 
iQCVAwUAP+LlIjEAnp832S/7AQKiuAP/eYC2ZScd+taBx/kNzRvGjA0eAXvORMkMLV6Ot+OXVzVUi04eoP2yXdxSNFKwUj12p8GWXkdoMG3aOGBKg2a7bY5Q5RUho3hUWb9UsVYVUfXLf7IOTt/3a6MLh2CmV5dFPWJmSlbCyQRcn6n/fLDeJ3A2bWTS/BhqGfpOXUIU1ws==jCf8
-mpih-sub1.c 
iQCVAwUAP+LlZzEAnp832S/7AQIEPgP/dLHTDRbPrYJhsLp9SjGstU1M8/IC5XytcDtO3NQeu4mx6vaXjpujtsTvKIbX4QL5IahNntVVKv1xFLEm2yFg7L2ns0uD/mfwGgOhCG1j2o/SaTAWP5KxP7ae5UDcZl2w6NWvEuMj9t32zmziAZjP8W73A37FUspeRDYiL9sQzkI==QQzk
-udiv-w-sdiv.c 
iQCVAwUAP+Lk0TEAnp832S/7AQICXAQAsxe1SQD4+xZaZTqBC0V9Cyuo0mrdccnRFzthOtm0ARwKFXU2cuLW/ZBOkmeWOVmOFhBp22/I8dEGYnMA3gcfmOMCpNu9i9zk/XHfptdunA1MnOe3GsoWgfHL0rhpAyPhp/X043ICB41NElnnuxADuQQlD4Z1fca5ygYxMr2crJg==EI/6
-mpi-asm-defs.h 
iQCVAwUAP+LkgDEAnp832S/7AQK0FgQAxJZ7xvXhoZa33GWe23LRb3asrno/loZSyAIXrntqtVH8M3pEsCY0OyW4ry4hX2RnxpuhRCM/PdRNLG3xXyMSVIhkHU8WVRLqzF2LLjEkyU3cAmHnnTQ9aO/XpUWtJGTZ8q2bv7ZsAEi4aPl0p6KhPXcPgM9vQ2XcyOPn3Dl0d6Q==xpjI
-$names$ 
iQCVAwUAP+LmNDEAnp832S/7AQJa+gP+KQNJpbNOgc+s2UX+Ya2gDaOFcAROImIllhg3ej8EaBF8xxdHmWT1zaKwTwi3moEEleykMR104YAGWyQeMbFYiuPPBW+ohrT6KxRBVJpIA9auOOqqJMyglZyoR3Hv7gduVYUW1h/DebnqiKXKEfzQDFqYuT0ayuteoOR4B5NICbE==nLSh
diff --git a/grub-core/lib/libgcrypt/mpi/generic/distfiles 
b/grub-core/lib/libgcrypt/mpi/generic/distfiles
index 9810eef4d..649e829b7 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/generic/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-add1.c
 mpih-mul1.c
 mpih-mul2.c
diff --git a/grub-core/lib/libgcrypt/mpi/generic/mpi-asm-defs.h 
b/grub-core/lib/libgcrypt/mpi/generic/mpi-asm-defs.h
index 13424e280..e607806e1 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/mpi-asm-defs.h
+++ b/grub-core/lib/libgcrypt/mpi/generic/mpi-asm-defs.h
@@ -1,10 +1,8 @@
-/* This file defines some basic constants for the MPI machinery.  We
- * need to define the types on a per-CPU basis, so it is done with
- * this file here.  */
+/* This file defines some basic constants for the MPI machinery.
+ * AMD64 compiled for the x32 ABI is special and thus we can't use the
+ * standard values for this ABI.  */
+#if __GNUC__ >= 3 && defined(__x86_64__) && defined(__ILP32__)
+#define BYTES_PER_MPI_LIMB 8
+#else
 #define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
-
-
-
-
-
-
+#endif
diff --git a/grub-core/lib/libgcrypt/mpi/generic/mpih-add1.c 
b/grub-core/lib/libgcrypt/mpi/generic/mpih-add1.c
index 4ffe0eb23..4a84df64d 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/mpih-add1.c
+++ b/grub-core/lib/libgcrypt/mpi/generic/mpih-add1.c
@@ -1,5 +1,5 @@
 /* mpihelp-add_1.c  -  MPI helper functions
- * Copyright (C) 1994, 1996, 1997, 1998,
+ * Copyright (C) 1994, 1996, 1997, 1998, 
  *               2000, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
@@ -48,7 +48,7 @@ _gcry_mpih_add_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
   res_ptr -= j;
 
   cy = 0;
-  do
+  do 
     {
       y = s2_ptr[j];
       x = s1_ptr[j];
@@ -57,7 +57,7 @@ _gcry_mpih_add_n (mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
       y += x;            /* add other addend */
       cy += y < x;       /* get out carry from that add, combine */
       res_ptr[j] = y;
-    }
+    } 
   while ( ++j );
 
   return cy;
diff --git a/grub-core/lib/libgcrypt/mpi/generic/mpih-lshift.c 
b/grub-core/lib/libgcrypt/mpi/generic/mpih-lshift.c
index 8c1d943b0..f48c12cd0 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/mpih-lshift.c
+++ b/grub-core/lib/libgcrypt/mpi/generic/mpih-lshift.c
@@ -54,7 +54,7 @@ _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t 
usize,
   low_limb = up[i];
   retval = low_limb >> sh_2;
   high_limb = low_limb;
-  while ( --i >= 0 )
+  while ( --i >= 0 ) 
     {
       low_limb = up[i];
       wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
diff --git a/grub-core/lib/libgcrypt/mpi/generic/mpih-mul1.c 
b/grub-core/lib/libgcrypt/mpi/generic/mpih-mul1.c
index 614646c43..0e8197d88 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/mpih-mul1.c
+++ b/grub-core/lib/libgcrypt/mpi/generic/mpih-mul1.c
@@ -48,13 +48,13 @@ _gcry_mpih_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, 
mpi_size_t s1_size,
   res_ptr -= j;
 
   cy_limb = 0;
-  do
+  do 
     {
       umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
       prod_low += cy_limb;
       cy_limb = (prod_low < cy_limb?1:0) + prod_high;
       res_ptr[j] = prod_low;
-    }
+    } 
   while( ++j );
 
   return cy_limb;
diff --git a/grub-core/lib/libgcrypt/mpi/generic/mpih-mul2.c 
b/grub-core/lib/libgcrypt/mpi/generic/mpih-mul2.c
index 56979dfdb..3b7549605 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/mpih-mul2.c
+++ b/grub-core/lib/libgcrypt/mpi/generic/mpih-mul2.c
@@ -48,7 +48,7 @@ _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
   s1_ptr -= j;
 
   cy_limb = 0;
-  do
+  do 
     {
       umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb );
 
@@ -59,9 +59,9 @@ _gcry_mpih_addmul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
       prod_low = x + prod_low;
       cy_limb += prod_low < x?1:0;
       res_ptr[j] = prod_low;
-    }
+    } 
   while ( ++j );
-
+    
   return cy_limb;
 }
 
diff --git a/grub-core/lib/libgcrypt/mpi/generic/mpih-mul3.c 
b/grub-core/lib/libgcrypt/mpi/generic/mpih-mul3.c
index 9b8df1a69..5e84f94f3 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/mpih-mul3.c
+++ b/grub-core/lib/libgcrypt/mpi/generic/mpih-mul3.c
@@ -48,7 +48,7 @@ _gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
   s1_ptr -= j;
 
   cy_limb = 0;
-  do
+  do 
     {
       umul_ppmm( prod_high, prod_low, s1_ptr[j], s2_limb);
 
@@ -59,7 +59,7 @@ _gcry_mpih_submul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
       prod_low = x - prod_low;
       cy_limb += prod_low > x?1:0;
       res_ptr[j] = prod_low;
-    }
+    } 
   while( ++j );
 
   return cy_limb;
diff --git a/grub-core/lib/libgcrypt/mpi/generic/mpih-sub1.c 
b/grub-core/lib/libgcrypt/mpi/generic/mpih-sub1.c
index 25b08af1c..e88821bfb 100644
--- a/grub-core/lib/libgcrypt/mpi/generic/mpih-sub1.c
+++ b/grub-core/lib/libgcrypt/mpi/generic/mpih-sub1.c
@@ -48,7 +48,7 @@ _gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
   res_ptr -= j;
 
   cy = 0;
-  do
+  do 
     {
       y = s2_ptr[j];
       x = s1_ptr[j];
@@ -57,7 +57,7 @@ _gcry_mpih_sub_n( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
       y = x - y;                 /* main subtract */
       cy += y > x;               /* get out carry from the subtract, combine */
       res_ptr[j] = y;
-    }
+    } 
   while( ++j );
 
   return cy;
diff --git a/grub-core/lib/libgcrypt/mpi/i386/Manifest 
b/grub-core/lib/libgcrypt/mpi/i386/Manifest
deleted file mode 100644
index 812bc8a5c..000000000
--- a/grub-core/lib/libgcrypt/mpi/i386/Manifest
+++ /dev/null
@@ -1,28 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-add1.S
-mpih-mul1.S
-mpih-mul2.S
-mpih-mul3.S
-mpih-lshift.S
-mpih-rshift.S
-mpih-sub1.S
-syntax.h
-$names$ 
iQCVAwUAP+LmOTEAnp832S/7AQJZmgQA1+GIl7rXiEY00y5xD2kG5Lm2QD6c9aBME8hTl812OEcj0ul/QSpdv8E2NEKooifr4SiLVhEVfLNaLqAgN3cIsttn3rRX3/pMC5JwSKHDJPsUbpN9tzb5dr2YC9GG9m8xngAQrN11IQPnGfvFLJK+oDnEMIAeHDpOnX9NeQPDAQA==bnOy
diff --git a/grub-core/lib/libgcrypt/mpi/i386/distfiles 
b/grub-core/lib/libgcrypt/mpi/i386/distfiles
index 22b9979bd..88d2a30c7 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/i386/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-add1.S
 mpih-mul1.S
 mpih-mul2.S
diff --git a/grub-core/lib/libgcrypt/mpi/i386/mpih-add1.S 
b/grub-core/lib/libgcrypt/mpi/i386/mpih-add1.S
index 652b23218..95a75890c 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/mpih-add1.S
+++ b/grub-core/lib/libgcrypt/mpi/i386/mpih-add1.S
@@ -44,14 +44,22 @@
        ALIGN (3)
        .globl C_SYMBOL_NAME(_gcry_mpih_add_n)
 C_SYMBOL_NAME(_gcry_mpih_add_n:)
+       CFI_STARTPROC()
        pushl %edi
+       CFI_PUSH(%edi)
        pushl %esi
+       CFI_PUSH(%esi)
 
        movl 12(%esp),%edi              /* res_ptr */
        movl 16(%esp),%esi              /* s1_ptr */
        movl 20(%esp),%edx              /* s2_ptr */
        movl 24(%esp),%ecx              /* size */
 
+#if defined __CET__ && (__CET__ & 1) != 0
+       pushl   %ebx
+       CFI_PUSH(%ebx)
+#endif
+
        movl    %ecx,%eax
        shrl    $3,%ecx                 /* compute count for unrolled loop */
        negl    %eax
@@ -63,41 +71,70 @@ C_SYMBOL_NAME(_gcry_mpih_add_n:)
        subl    %eax,%esi               /* ... by a constant when we ... */
        subl    %eax,%edx               /* ... enter the loop */
        shrl    $2,%eax                 /* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+       leal    -4(,%eax,4),%ebx        /* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
        call    L0
+       CFI_ADJUST_CFA_OFFSET(4)
 L0:    leal    (%eax,%eax,8),%eax
        addl    (%esp),%eax
        addl    $(Loop-L0-3),%eax
        addl    $4,%esp
+       CFI_ADJUST_CFA_OFFSET(-4)
 #else
 /* Calculate start address in loop for non-PIC.  */
        leal    (Loop - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+       addl    %ebx,%eax               /* Adjust for endbr32 */
 #endif
        jmp     *%eax                   /* jump into loop */
        ALIGN (3)
 Loop:  movl    (%esi),%eax
        adcl    (%edx),%eax
        movl    %eax,(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    4(%esi),%eax
        adcl    4(%edx),%eax
        movl    %eax,4(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    8(%esi),%eax
        adcl    8(%edx),%eax
        movl    %eax,8(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    12(%esi),%eax
        adcl    12(%edx),%eax
        movl    %eax,12(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    16(%esi),%eax
        adcl    16(%edx),%eax
        movl    %eax,16(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    20(%esi),%eax
        adcl    20(%edx),%eax
        movl    %eax,20(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    24(%esi),%eax
        adcl    24(%edx),%eax
        movl    %eax,24(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    28(%esi),%eax
        adcl    28(%edx),%eax
        movl    %eax,28(%edi)
@@ -110,7 +147,15 @@ Loop:      movl    (%esi),%eax
        sbbl    %eax,%eax
        negl    %eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+       popl    %ebx
+       CFI_POP(%ebx)
+#endif
+
        popl %esi
+       CFI_POP(%esi)
        popl %edi
-       ret
+       CFI_POP(%edi)
+       ret_spec_stop
+       CFI_ENDPROC()
 
diff --git a/grub-core/lib/libgcrypt/mpi/i386/mpih-lshift.S 
b/grub-core/lib/libgcrypt/mpi/i386/mpih-lshift.S
index bf8ed9d4c..3404cf557 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/mpih-lshift.S
+++ b/grub-core/lib/libgcrypt/mpi/i386/mpih-lshift.S
@@ -42,9 +42,13 @@
        ALIGN (3)
        .globl C_SYMBOL_NAME(_gcry_mpih_lshift)
 C_SYMBOL_NAME(_gcry_mpih_lshift:)
+       CFI_STARTPROC()
        pushl   %edi
+       CFI_PUSH(%edi)
        pushl   %esi
+       CFI_PUSH(%esi)
        pushl   %ebx
+       CFI_PUSH(%ebx)
 
        movl    16(%esp),%edi           /* res_ptr */
        movl    20(%esp),%esi           /* s_ptr */
@@ -82,13 +86,17 @@ L1: movl    (%esi,%edx,4),%eax
        popl    %ebx
        popl    %esi
        popl    %edi
-       ret
+       ret_spec_stop
 
 Lend:  shll    %cl,%ebx                /* compute least significant limb */
        movl    %ebx,(%edi)             /* store it */
 
        popl    %ebx
+       CFI_POP(%ebx)
        popl    %esi
+       CFI_POP(%esi)
        popl    %edi
-       ret
+       CFI_POP(%edi)
+       ret_spec_stop
+       CFI_ENDPROC()
 
diff --git a/grub-core/lib/libgcrypt/mpi/i386/mpih-mul1.S 
b/grub-core/lib/libgcrypt/mpi/i386/mpih-mul1.S
index c9760ef92..a672d052f 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/mpih-mul1.S
+++ b/grub-core/lib/libgcrypt/mpi/i386/mpih-mul1.S
@@ -49,10 +49,15 @@
        GLOBL   C_SYMBOL_NAME(_gcry_mpih_mul_1)
 C_SYMBOL_NAME(_gcry_mpih_mul_1:)
 
+       CFI_STARTPROC()
        INSN1(push,l    ,R(edi))
+       CFI_PUSH(%edi)
        INSN1(push,l    ,R(esi))
+       CFI_PUSH(%esi)
        INSN1(push,l    ,R(ebx))
+       CFI_PUSH(%ebx)
        INSN1(push,l    ,R(ebp))
+       CFI_PUSH(%ebp)
 
        INSN2(mov,l     ,R(res_ptr),MEM_DISP(esp,20))
        INSN2(mov,l     ,R(s1_ptr),MEM_DISP(esp,24))
@@ -77,8 +82,13 @@ Loop:
        INSN2(mov,l     ,R(eax),R(ebx))
 
        INSN1(pop,l     ,R(ebp))
+       CFI_POP(%ebp)
        INSN1(pop,l     ,R(ebx))
+       CFI_POP(%ebx)
        INSN1(pop,l     ,R(esi))
+       CFI_POP(%esi)
        INSN1(pop,l     ,R(edi))
-       ret
+       CFI_POP(%edi)
+       ret_spec_stop
+       CFI_ENDPROC()
 
diff --git a/grub-core/lib/libgcrypt/mpi/i386/mpih-mul2.S 
b/grub-core/lib/libgcrypt/mpi/i386/mpih-mul2.S
index 9794e1108..e09c3f7c8 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/mpih-mul2.S
+++ b/grub-core/lib/libgcrypt/mpi/i386/mpih-mul2.S
@@ -50,10 +50,15 @@
        GLOBL   C_SYMBOL_NAME(_gcry_mpih_addmul_1)
 C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
 
+       CFI_STARTPROC()
        INSN1(push,l    ,R(edi))
+       CFI_PUSH(%edi)
        INSN1(push,l    ,R(esi))
+       CFI_PUSH(%esi)
        INSN1(push,l    ,R(ebx))
+       CFI_PUSH(%ebx)
        INSN1(push,l    ,R(ebp))
+       CFI_PUSH(%ebp)
 
        INSN2(mov,l     ,R(res_ptr),MEM_DISP(esp,20))
        INSN2(mov,l     ,R(s1_ptr),MEM_DISP(esp,24))
@@ -79,8 +84,13 @@ Loop:
        INSN2(mov,l     ,R(eax),R(ebx))
 
        INSN1(pop,l     ,R(ebp))
+       CFI_POP(%ebp)
        INSN1(pop,l     ,R(ebx))
+       CFI_POP(%ebx)
        INSN1(pop,l     ,R(esi))
+       CFI_POP(%esi)
        INSN1(pop,l     ,R(edi))
-       ret
+       CFI_POP(%edi)
+       ret_spec_stop
+       CFI_ENDPROC()
 
diff --git a/grub-core/lib/libgcrypt/mpi/i386/mpih-mul3.S 
b/grub-core/lib/libgcrypt/mpi/i386/mpih-mul3.S
index 6df201763..4112c6997 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/mpih-mul3.S
+++ b/grub-core/lib/libgcrypt/mpi/i386/mpih-mul3.S
@@ -50,10 +50,15 @@
        GLOBL   C_SYMBOL_NAME(_gcry_mpih_submul_1)
 C_SYMBOL_NAME(_gcry_mpih_submul_1:)
 
+       CFI_STARTPROC()
        INSN1(push,l    ,R(edi))
+       CFI_PUSH(%edi)
        INSN1(push,l    ,R(esi))
+       CFI_PUSH(%esi)
        INSN1(push,l    ,R(ebx))
+       CFI_PUSH(%ebx)
        INSN1(push,l    ,R(ebp))
+       CFI_PUSH(%ebp)
 
        INSN2(mov,l     ,R(res_ptr),MEM_DISP(esp,20))
        INSN2(mov,l     ,R(s1_ptr),MEM_DISP(esp,24))
@@ -79,8 +84,13 @@ Loop:
        INSN2(mov,l     ,R(eax),R(ebx))
 
        INSN1(pop,l     ,R(ebp))
+       CFI_POP(%ebp)
        INSN1(pop,l     ,R(ebx))
+       CFI_POP(%ebx)
        INSN1(pop,l     ,R(esi))
+       CFI_POP(%esi)
        INSN1(pop,l     ,R(edi))
-       ret
+       CFI_POP(%edi)
+       ret_spec_stop
+       CFI_ENDPROC()
 
diff --git a/grub-core/lib/libgcrypt/mpi/i386/mpih-rshift.S 
b/grub-core/lib/libgcrypt/mpi/i386/mpih-rshift.S
index 2920e55d8..5d34696ca 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/mpih-rshift.S
+++ b/grub-core/lib/libgcrypt/mpi/i386/mpih-rshift.S
@@ -43,9 +43,13 @@
        ALIGN (3)
        .globl C_SYMBOL_NAME(_gcry_mpih_rshift)
 C_SYMBOL_NAME(_gcry_mpih_rshift:)
+       CFI_STARTPROC()
        pushl   %edi
+       CFI_PUSH(%edi)
        pushl   %esi
+       CFI_PUSH(%esi)
        pushl   %ebx
+       CFI_PUSH(%ebx)
 
        movl    16(%esp),%edi           /* wp */
        movl    20(%esp),%esi           /* up */
@@ -67,7 +71,7 @@ C_SYMBOL_NAME(_gcry_mpih_rshift:)
        movl    %ebx,%eax
 
        ALIGN (3)
-Loop2:  movl    (%esi,%edx,4),%ebx      /* load next higher limb */
+Loop2: movl     (%esi,%edx,4),%ebx     /* load next higher limb */
        shrdl   %cl,%ebx,%eax           /* compute result limb */
        movl    %eax,(%edi,%edx,4)      /* store it */
        incl    %edx
@@ -85,13 +89,17 @@ L2: movl    (%esi,%edx,4),%eax
        popl    %ebx
        popl    %esi
        popl    %edi
-       ret
+       ret_spec_stop
 
 Lend2: shrl    %cl,%ebx                /* compute most significant limb */
        movl    %ebx,(%edi)             /* store it */
 
        popl    %ebx
+       CFI_POP(%ebx)
        popl    %esi
+       CFI_POP(%esi)
        popl    %edi
-       ret
+       CFI_POP(%edi)
+       ret_spec_stop
+       CFI_ENDPROC()
 
diff --git a/grub-core/lib/libgcrypt/mpi/i386/mpih-sub1.S 
b/grub-core/lib/libgcrypt/mpi/i386/mpih-sub1.S
index f447f7a66..49477ae34 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/mpih-sub1.S
+++ b/grub-core/lib/libgcrypt/mpi/i386/mpih-sub1.S
@@ -45,14 +45,22 @@
        ALIGN (3)
        .globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
 C_SYMBOL_NAME(_gcry_mpih_sub_n:)
+       CFI_STARTPROC()
        pushl %edi
+       CFI_PUSH(%edi)
        pushl %esi
+       CFI_PUSH(%esi)
 
        movl 12(%esp),%edi              /* res_ptr */
        movl 16(%esp),%esi              /* s1_ptr */
        movl 20(%esp),%edx              /* s2_ptr */
        movl 24(%esp),%ecx              /* size */
 
+#if defined __CET__ && (__CET__ & 1) != 0
+       pushl   %ebx
+       CFI_PUSH(%ebx)
+#endif
+
        movl    %ecx,%eax
        shrl    $3,%ecx                 /* compute count for unrolled loop */
        negl    %eax
@@ -64,41 +72,70 @@ C_SYMBOL_NAME(_gcry_mpih_sub_n:)
        subl    %eax,%esi               /* ... by a constant when we ... */
        subl    %eax,%edx               /* ... enter the loop */
        shrl    $2,%eax                 /* restore previous value */
+#if defined __CET__ && (__CET__ & 1) != 0
+       leal    -4(,%eax,4),%ebx        /* Count for 4-byte endbr32 */
+#endif
 #ifdef PIC
 /* Calculate start address in loop for PIC.  Due to limitations in some
    assemblers, Loop-L0-3 cannot be put into the leal */
        call    L0
+       CFI_ADJUST_CFA_OFFSET(4)
 L0:    leal    (%eax,%eax,8),%eax
        addl    (%esp),%eax
        addl    $(Loop-L0-3),%eax
        addl    $4,%esp
+       CFI_ADJUST_CFA_OFFSET(-4)
 #else
 /* Calculate start address in loop for non-PIC.  */
        leal    (Loop - 3)(%eax,%eax,8),%eax
+#endif
+#if defined __CET__ && (__CET__ & 1) != 0
+       addl    %ebx,%eax               /* Adjust for endbr32 */
 #endif
        jmp     *%eax                   /* jump into loop */
        ALIGN (3)
 Loop:  movl    (%esi),%eax
        sbbl    (%edx),%eax
        movl    %eax,(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    4(%esi),%eax
        sbbl    4(%edx),%eax
        movl    %eax,4(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    8(%esi),%eax
        sbbl    8(%edx),%eax
        movl    %eax,8(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    12(%esi),%eax
        sbbl    12(%edx),%eax
        movl    %eax,12(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    16(%esi),%eax
        sbbl    16(%edx),%eax
        movl    %eax,16(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    20(%esi),%eax
        sbbl    20(%edx),%eax
        movl    %eax,20(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    24(%esi),%eax
        sbbl    24(%edx),%eax
        movl    %eax,24(%edi)
+#ifdef _CET_ENDBR
+       _CET_ENDBR
+#endif
        movl    28(%esi),%eax
        sbbl    28(%edx),%eax
        movl    %eax,28(%edi)
@@ -111,7 +148,15 @@ Loop:      movl    (%esi),%eax
        sbbl    %eax,%eax
        negl    %eax
 
+#if defined __CET__ && (__CET__ & 1) != 0
+       popl    %ebx
+       CFI_POP(%ebx)
+#endif
+
        popl %esi
+       CFI_POP(%esi)
        popl %edi
-       ret
+       CFI_POP(%edi)
+       ret_spec_stop
+       CFI_ENDPROC()
 
diff --git a/grub-core/lib/libgcrypt/mpi/i386/syntax.h 
b/grub-core/lib/libgcrypt/mpi/i386/syntax.h
index 88845f28d..af4d9e805 100644
--- a/grub-core/lib/libgcrypt/mpi/i386/syntax.h
+++ b/grub-core/lib/libgcrypt/mpi/i386/syntax.h
@@ -1,6 +1,6 @@
 /* syntax.h -- Definitions for x86 syntax variations.
  *
- *       Copyright (C) 1992, 1994, 1995, 1998,
+ *       Copyright (C) 1992, 1994, 1995, 1998, 
  *                     2001, 2002 Free Software Foundation, Inc.
  *
  * This file is part of Libgcrypt.
@@ -26,6 +26,32 @@
  *      to avoid revealing of sensitive data due to paging etc.
  */
 
+#include <config.h>
+
+#ifdef __i386__
+#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES
+# define CFI_STARTPROC()            .cfi_startproc
+# define CFI_ENDPROC()              .cfi_endproc
+# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off
+# define CFI_REL_OFFSET(reg,off)    .cfi_rel_offset reg, off
+# define CFI_RESTORE(reg)           .cfi_restore reg
+
+# define CFI_PUSH(reg) \
+       CFI_ADJUST_CFA_OFFSET(4); CFI_REL_OFFSET(reg, 0)
+# define CFI_POP(reg) \
+       CFI_ADJUST_CFA_OFFSET(-4); CFI_RESTORE(reg)
+#else
+# define CFI_STARTPROC()
+# define CFI_ENDPROC()
+# define CFI_ADJUST_CFA_OFFSET(off)
+# define CFI_REL_OFFSET(reg,off)
+# define CFI_RESTORE(reg)
+
+# define CFI_PUSH(reg)
+# define CFI_POP(reg)
+#endif
+#endif
+
 #undef ALIGN
 
 #if defined (BSD_SYNTAX) || defined (ELF_SYNTAX)
@@ -66,3 +92,7 @@
 #undef ALIGN
 #define ALIGN(log) .align log,0x90
 #endif
+
+/* 'ret' instruction replacement for straight-line speculation mitigation */
+#define ret_spec_stop \
+       ret; int3;
diff --git a/grub-core/lib/libgcrypt/mpi/i586/Manifest 
b/grub-core/lib/libgcrypt/mpi/i586/Manifest
deleted file mode 100644
index 6d1d7f824..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/Manifest
+++ /dev/null
@@ -1,27 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-add1.S
-mpih-mul1.S
-mpih-mul2.S
-mpih-mul3.S
-mpih-lshift.S
-mpih-rshift.S
-mpih-sub1.S
-$names$ 
iQCVAwUAP+LmQDEAnp832S/7AQKCmgQAhG+E7X0KB4qdVf3sMb6Qr+Iv5Jlehzoub/5vxTRgePKzRuOHidCnTzSSoyzA++UcHrOjHQQDMsXnO6PqpS1d/TKkxjnGN7rE8mvMYlFAT8RsawTozSfh14mCzI0HTDbaKL9Z8pcMJtadB3XqAuqWJNO8kyECJFwurt3DRWXSWS8==Rug5
diff --git a/grub-core/lib/libgcrypt/mpi/i586/README 
b/grub-core/lib/libgcrypt/mpi/i586/README
deleted file mode 100644
index d73b08268..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/README
+++ /dev/null
@@ -1,26 +0,0 @@
-This directory contains mpn functions optimized for Intel Pentium
-processors.
-
-RELEVANT OPTIMIZATION ISSUES
-
-1. Pentium doesn't allocate cache lines on writes, unlike most other modern
-processors.  Since the functions in the mpn class do array writes, we have to
-handle allocating the destination cache lines by reading a word from it in the
-loops, to achieve the best performance.
-
-2. Pairing of memory operations requires that the two issued operations refer
-to different cache banks.  The simplest way to insure this is to read/write
-two words from the same object.  If we make operations on different objects,
-they might or might not be to the same cache bank.
-
-STATUS
-
-1. mpn_lshift and mpn_rshift run at about 6 cycles/limb, but the Pentium
-documentation indicates that they should take only 43/8 = 5.375 cycles/limb,
-or 5 cycles/limb asymptotically.
-
-2. mpn_add_n and mpn_sub_n run at asymptotically 2 cycles/limb.  Due to loop
-overhead and other delays (cache refill?), they run at or near 2.5 cycles/limb.
-
-3. mpn_mul_1, mpn_addmul_1, mpn_submul_1 all run 1 cycle faster than they
-should...
diff --git a/grub-core/lib/libgcrypt/mpi/i586/mpih-add1.S 
b/grub-core/lib/libgcrypt/mpi/i586/mpih-add1.S
deleted file mode 100644
index 7436d5926..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/mpih-add1.S
+++ /dev/null
@@ -1,135 +0,0 @@
-/* i80586 add_n -- Add two limb vectors of the same length > 0 and store
- *                sum in a third limb vector.
- *
- *      Copyright (C) 1992, 1994, 1995, 1996, 1998,
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- *  mpi_limb_t
- *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,       (sp + 4)
- *                mpi_ptr_t s1_ptr,    (sp + 8)
- *                mpi_ptr_t s2_ptr,    (sp + 12)
- *                mpi_size_t size)     (sp + 16)
- */
-
-.text
-       ALIGN (3)
-       .globl C_SYMBOL_NAME(_gcry_mpih_add_n)
-C_SYMBOL_NAME(_gcry_mpih_add_n:)
-       pushl   %edi
-       pushl   %esi
-       pushl   %ebx
-       pushl   %ebp
-
-       movl    20(%esp),%edi           /* res_ptr */
-       movl    24(%esp),%esi           /* s1_ptr */
-       movl    28(%esp),%ebp           /* s2_ptr */
-       movl    32(%esp),%ecx           /* size */
-
-       movl    (%ebp),%ebx
-
-       decl    %ecx
-       movl    %ecx,%edx
-       shrl    $3,%ecx
-       andl    $7,%edx
-       testl   %ecx,%ecx               /* zero carry flag */
-       jz      Lend
-       pushl   %edx
-
-       ALIGN (3)
-Loop:  movl    28(%edi),%eax           /* fetch destination cache line */
-       leal    32(%edi),%edi
-
-L1:    movl    (%esi),%eax
-       movl    4(%esi),%edx
-       adcl    %ebx,%eax
-       movl    4(%ebp),%ebx
-       adcl    %ebx,%edx
-       movl    8(%ebp),%ebx
-       movl    %eax,-32(%edi)
-       movl    %edx,-28(%edi)
-
-L2:    movl    8(%esi),%eax
-       movl    12(%esi),%edx
-       adcl    %ebx,%eax
-       movl    12(%ebp),%ebx
-       adcl    %ebx,%edx
-       movl    16(%ebp),%ebx
-       movl    %eax,-24(%edi)
-       movl    %edx,-20(%edi)
-
-L3:    movl    16(%esi),%eax
-       movl    20(%esi),%edx
-       adcl    %ebx,%eax
-       movl    20(%ebp),%ebx
-       adcl    %ebx,%edx
-       movl    24(%ebp),%ebx
-       movl    %eax,-16(%edi)
-       movl    %edx,-12(%edi)
-
-L4:    movl    24(%esi),%eax
-       movl    28(%esi),%edx
-       adcl    %ebx,%eax
-       movl    28(%ebp),%ebx
-       adcl    %ebx,%edx
-       movl    32(%ebp),%ebx
-       movl    %eax,-8(%edi)
-       movl    %edx,-4(%edi)
-
-       leal    32(%esi),%esi
-       leal    32(%ebp),%ebp
-       decl    %ecx
-       jnz     Loop
-
-       popl    %edx
-Lend:
-       decl    %edx                    /* test %edx w/o clobbering carry */
-       js      Lend2
-       incl    %edx
-Loop2:
-       leal    4(%edi),%edi
-       movl    (%esi),%eax
-       adcl    %ebx,%eax
-       movl    4(%ebp),%ebx
-       movl    %eax,-4(%edi)
-       leal    4(%esi),%esi
-       leal    4(%ebp),%ebp
-       decl    %edx
-       jnz     Loop2
-Lend2:
-       movl    (%esi),%eax
-       adcl    %ebx,%eax
-       movl    %eax,(%edi)
-
-       sbbl    %eax,%eax
-       negl    %eax
-
-       popl    %ebp
-       popl    %ebx
-       popl    %esi
-       popl    %edi
-       ret
-
-
diff --git a/grub-core/lib/libgcrypt/mpi/i586/mpih-lshift.S 
b/grub-core/lib/libgcrypt/mpi/i586/mpih-lshift.S
deleted file mode 100644
index 9d25fe9d7..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/mpih-lshift.S
+++ /dev/null
@@ -1,229 +0,0 @@
-/* i80586   lshift
- *
- *      Copyright (C) 1992, 1994, 1998, 
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_lshift( mpi_ptr_t wp,    (sp + 4)
- *                mpi_ptr_t up,        (sp + 8)
- *                mpi_size_t usize,    (sp + 12)
- *                unsigned cnt)        (sp + 16)
- */
-
-.text
-       ALIGN (3)
-       .globl C_SYMBOL_NAME(_gcry_mpih_lshift)
-C_SYMBOL_NAME(_gcry_mpih_lshift:)
-
-       pushl   %edi
-       pushl   %esi
-       pushl   %ebx
-       pushl   %ebp
-
-       movl    20(%esp),%edi           /* res_ptr */
-       movl    24(%esp),%esi           /* s_ptr */
-       movl    28(%esp),%ebp           /* size */
-       movl    32(%esp),%ecx           /* cnt */
-
-/* We can use faster code for shift-by-1 under certain conditions.  */
-       cmp     $1,%ecx
-       jne     Lnormal
-       leal    4(%esi),%eax
-       cmpl    %edi,%eax
-       jnc     Lspecial                /* jump if s_ptr + 1 >= res_ptr */
-       leal    (%esi,%ebp,4),%eax
-       cmpl    %eax,%edi
-       jnc     Lspecial                /* jump if res_ptr >= s_ptr + size */
-
-Lnormal:
-       leal    -4(%edi,%ebp,4),%edi
-       leal    -4(%esi,%ebp,4),%esi
-
-       movl    (%esi),%edx
-       subl    $4,%esi
-       xorl    %eax,%eax
-       shldl   %cl,%edx,%eax           /* compute carry limb */
-       pushl   %eax                    /* push carry limb onto stack */
-
-       decl    %ebp
-       pushl   %ebp
-       shrl    $3,%ebp
-       jz      Lend
-
-       movl    (%edi),%eax             /* fetch destination cache line */
-
-       ALIGN   (2)
-Loop:  movl    -28(%edi),%eax          /* fetch destination cache line */
-       movl    %edx,%ebx
-
-       movl    (%esi),%eax
-       movl    -4(%esi),%edx
-       shldl   %cl,%eax,%ebx
-       shldl   %cl,%edx,%eax
-       movl    %ebx,(%edi)
-       movl    %eax,-4(%edi)
-
-       movl    -8(%esi),%ebx
-       movl    -12(%esi),%eax
-       shldl   %cl,%ebx,%edx
-       shldl   %cl,%eax,%ebx
-       movl    %edx,-8(%edi)
-       movl    %ebx,-12(%edi)
-
-       movl    -16(%esi),%edx
-       movl    -20(%esi),%ebx
-       shldl   %cl,%edx,%eax
-       shldl   %cl,%ebx,%edx
-       movl    %eax,-16(%edi)
-       movl    %edx,-20(%edi)
-
-       movl    -24(%esi),%eax
-       movl    -28(%esi),%edx
-       shldl   %cl,%eax,%ebx
-       shldl   %cl,%edx,%eax
-       movl    %ebx,-24(%edi)
-       movl    %eax,-28(%edi)
-
-       subl    $32,%esi
-       subl    $32,%edi
-       decl    %ebp
-       jnz     Loop
-
-Lend:  popl    %ebp
-       andl    $7,%ebp
-       jz      Lend2
-Loop2: movl    (%esi),%eax
-       shldl   %cl,%eax,%edx
-       movl    %edx,(%edi)
-       movl    %eax,%edx
-       subl    $4,%esi
-       subl    $4,%edi
-       decl    %ebp
-       jnz     Loop2
-
-Lend2: shll    %cl,%edx                /* compute least significant limb */
-       movl    %edx,(%edi)             /* store it */
-
-       popl    %eax                    /* pop carry limb */
-
-       popl    %ebp
-       popl    %ebx
-       popl    %esi
-       popl    %edi
-       ret
-
-/* We loop from least significant end of the arrays, which is only
-   permissable if the source and destination don't overlap, since the
-   function is documented to work for overlapping source and destination.
-*/
-
-Lspecial:
-       movl    (%esi),%edx
-       addl    $4,%esi
-
-       decl    %ebp
-       pushl   %ebp
-       shrl    $3,%ebp
-
-       addl    %edx,%edx
-       incl    %ebp
-       decl    %ebp
-       jz      LLend
-
-       movl    (%edi),%eax             /* fetch destination cache line */
-
-       ALIGN   (2)
-LLoop: movl    28(%edi),%eax           /* fetch destination cache line */
-       movl    %edx,%ebx
-
-       movl    (%esi),%eax
-       movl    4(%esi),%edx
-       adcl    %eax,%eax
-       movl    %ebx,(%edi)
-       adcl    %edx,%edx
-       movl    %eax,4(%edi)
-
-       movl    8(%esi),%ebx
-       movl    12(%esi),%eax
-       adcl    %ebx,%ebx
-       movl    %edx,8(%edi)
-       adcl    %eax,%eax
-       movl    %ebx,12(%edi)
-
-       movl    16(%esi),%edx
-       movl    20(%esi),%ebx
-       adcl    %edx,%edx
-       movl    %eax,16(%edi)
-       adcl    %ebx,%ebx
-       movl    %edx,20(%edi)
-
-       movl    24(%esi),%eax
-       movl    28(%esi),%edx
-       adcl    %eax,%eax
-       movl    %ebx,24(%edi)
-       adcl    %edx,%edx
-       movl    %eax,28(%edi)
-
-       leal    32(%esi),%esi           /* use leal not to clobber carry */
-       leal    32(%edi),%edi
-       decl    %ebp
-       jnz     LLoop
-
-LLend: popl    %ebp
-       sbbl    %eax,%eax               /* save carry in %eax */
-       andl    $7,%ebp
-       jz      LLend2
-       addl    %eax,%eax               /* restore carry from eax */
-LLoop2: movl   %edx,%ebx
-       movl    (%esi),%edx
-       adcl    %edx,%edx
-       movl    %ebx,(%edi)
-
-       leal    4(%esi),%esi            /* use leal not to clobber carry */
-       leal    4(%edi),%edi
-       decl    %ebp
-       jnz     LLoop2
-
-       jmp     LL1
-LLend2: addl   %eax,%eax               /* restore carry from eax */
-LL1:   movl    %edx,(%edi)             /* store last limb */
-
-       sbbl    %eax,%eax
-       negl    %eax
-
-       popl    %ebp
-       popl    %ebx
-       popl    %esi
-       popl    %edi
-       ret
-
-
diff --git a/grub-core/lib/libgcrypt/mpi/i586/mpih-mul1.S 
b/grub-core/lib/libgcrypt/mpi/i586/mpih-mul1.S
deleted file mode 100644
index 3601d968b..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/mpih-mul1.S
+++ /dev/null
@@ -1,89 +0,0 @@
-/* i80586 mul_1 -- Multiply a limb vector with a limb and store
- *                      the result in a second limb vector.
- *
- *      Copyright (C) 1992, 1994, 1996, 1998,
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,        (sp + 4)
- *               mpi_ptr_t s1_ptr,     (sp + 8)
- *               mpi_size_t s1_size,   (sp + 12)
- *               mpi_limb_t s2_limb)   (sp + 16)
- */
-
-#define res_ptr edi
-#define s1_ptr esi
-#define size   ecx
-#define s2_limb ebp
-
-       TEXT
-       ALIGN (3)
-       GLOBL   C_SYMBOL_NAME(_gcry_mpih_mul_1)
-C_SYMBOL_NAME(_gcry_mpih_mul_1:)
-
-       INSN1(push,l    ,R(edi))
-       INSN1(push,l    ,R(esi))
-       INSN1(push,l    ,R(ebx))
-       INSN1(push,l    ,R(ebp))
-
-       INSN2(mov,l     ,R(res_ptr),MEM_DISP(esp,20))
-       INSN2(mov,l     ,R(s1_ptr),MEM_DISP(esp,24))
-       INSN2(mov,l     ,R(size),MEM_DISP(esp,28))
-       INSN2(mov,l     ,R(s2_limb),MEM_DISP(esp,32))
-
-       INSN2(lea,l     ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
-       INSN2(lea,l     ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
-       INSN1(neg,l     ,R(size))
-       INSN2(xor,l     ,R(ebx),R(ebx))
-       ALIGN (3)
-
-Loop:  INSN2(adc,l     ,R(ebx),$0)
-       INSN2(mov,l     ,R(eax),MEM_INDEX(s1_ptr,size,4))
-
-       INSN1(mul,l     ,R(s2_limb))
-
-       INSN2(add,l     ,R(ebx),R(eax))
-
-       INSN2(mov,l     ,MEM_INDEX(res_ptr,size,4),R(ebx))
-       INSN1(inc,l     ,R(size))
-
-       INSN2(mov,l     ,R(ebx),R(edx))
-       INSN1(jnz,      ,Loop)
-
-       INSN2(adc,l     ,R(ebx),$0)
-       INSN2(mov,l     ,R(eax),R(ebx))
-       INSN1(pop,l     ,R(ebp))
-       INSN1(pop,l     ,R(ebx))
-       INSN1(pop,l     ,R(esi))
-       INSN1(pop,l     ,R(edi))
-       ret
-
diff --git a/grub-core/lib/libgcrypt/mpi/i586/mpih-mul2.S 
b/grub-core/lib/libgcrypt/mpi/i586/mpih-mul2.S
deleted file mode 100644
index f32d363a7..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/mpih-mul2.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/* i80586 addmul_1 -- Multiply a limb vector with a limb and add
- *                   the result to a second limb vector.
- *
- *      Copyright (C) 1992, 1994, 1998, 
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
- *                  mpi_ptr_t s1_ptr,       (sp + 8)
- *                  mpi_size_t s1_size,     (sp + 12)
- *                  mpi_limb_t s2_limb)     (sp + 16)
- */
-
-#define res_ptr edi
-#define s1_ptr esi
-#define size   ecx
-#define s2_limb ebp
-
-       TEXT
-       ALIGN (3)
-       GLOBL   C_SYMBOL_NAME(_gcry_mpih_addmul_1)
-C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
-
-       INSN1(push,l    ,R(edi))
-       INSN1(push,l    ,R(esi))
-       INSN1(push,l    ,R(ebx))
-       INSN1(push,l    ,R(ebp))
-
-       INSN2(mov,l     ,R(res_ptr),MEM_DISP(esp,20))
-       INSN2(mov,l     ,R(s1_ptr),MEM_DISP(esp,24))
-       INSN2(mov,l     ,R(size),MEM_DISP(esp,28))
-       INSN2(mov,l     ,R(s2_limb),MEM_DISP(esp,32))
-
-       INSN2(lea,l     ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
-       INSN2(lea,l     ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
-       INSN1(neg,l     ,R(size))
-       INSN2(xor,l     ,R(ebx),R(ebx))
-       ALIGN (3)
-
-Loop:  INSN2(adc,l     ,R(ebx),$0)
-       INSN2(mov,l     ,R(eax),MEM_INDEX(s1_ptr,size,4))
-
-       INSN1(mul,l     ,R(s2_limb))
-
-       INSN2(add,l     ,R(eax),R(ebx))
-       INSN2(mov,l     ,R(ebx),MEM_INDEX(res_ptr,size,4))
-
-       INSN2(adc,l     ,R(edx),$0)
-       INSN2(add,l     ,R(ebx),R(eax))
-
-       INSN2(mov,l     ,MEM_INDEX(res_ptr,size,4),R(ebx))
-       INSN1(inc,l     ,R(size))
-
-       INSN2(mov,l     ,R(ebx),R(edx))
-       INSN1(jnz,      ,Loop)
-
-       INSN2(adc,l     ,R(ebx),$0)
-       INSN2(mov,l     ,R(eax),R(ebx))
-       INSN1(pop,l     ,R(ebp))
-       INSN1(pop,l     ,R(ebx))
-       INSN1(pop,l     ,R(esi))
-       INSN1(pop,l     ,R(edi))
-       ret
-
diff --git a/grub-core/lib/libgcrypt/mpi/i586/mpih-mul3.S 
b/grub-core/lib/libgcrypt/mpi/i586/mpih-mul3.S
deleted file mode 100644
index fa27d4e1a..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/mpih-mul3.S
+++ /dev/null
@@ -1,93 +0,0 @@
-/* i80586 submul_1 -- Multiply a limb vector with a limb and add
- *                   the result to a second limb vector.
- *
- *      Copyright (C) 1992, 1994, 1998,
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
- *                  mpi_ptr_t s1_ptr,       (sp + 8)
- *                  mpi_size_t s1_size,     (sp + 12)
- *                  mpi_limb_t s2_limb)     (sp + 16)
- */
-
-#define res_ptr edi
-#define s1_ptr esi
-#define size   ecx
-#define s2_limb ebp
-
-       TEXT
-       ALIGN (3)
-       GLOBL   C_SYMBOL_NAME(_gcry_mpih_submul_1)
-C_SYMBOL_NAME(_gcry_mpih_submul_1:)
-
-       INSN1(push,l    ,R(edi))
-       INSN1(push,l    ,R(esi))
-       INSN1(push,l    ,R(ebx))
-       INSN1(push,l    ,R(ebp))
-
-       INSN2(mov,l     ,R(res_ptr),MEM_DISP(esp,20))
-       INSN2(mov,l     ,R(s1_ptr),MEM_DISP(esp,24))
-       INSN2(mov,l     ,R(size),MEM_DISP(esp,28))
-       INSN2(mov,l     ,R(s2_limb),MEM_DISP(esp,32))
-
-       INSN2(lea,l     ,R(res_ptr),MEM_INDEX(res_ptr,size,4))
-       INSN2(lea,l     ,R(s1_ptr),MEM_INDEX(s1_ptr,size,4))
-       INSN1(neg,l     ,R(size))
-       INSN2(xor,l     ,R(ebx),R(ebx))
-       ALIGN (3)
-
-Loop:  INSN2(adc,l     ,R(ebx),$0)
-       INSN2(mov,l     ,R(eax),MEM_INDEX(s1_ptr,size,4))
-
-       INSN1(mul,l     ,R(s2_limb))
-
-       INSN2(add,l     ,R(eax),R(ebx))
-       INSN2(mov,l     ,R(ebx),MEM_INDEX(res_ptr,size,4))
-
-       INSN2(adc,l     ,R(edx),$0)
-       INSN2(sub,l     ,R(ebx),R(eax))
-
-       INSN2(mov,l     ,MEM_INDEX(res_ptr,size,4),R(ebx))
-       INSN1(inc,l     ,R(size))
-
-       INSN2(mov,l     ,R(ebx),R(edx))
-       INSN1(jnz,      ,Loop)
-
-       INSN2(adc,l     ,R(ebx),$0)
-       INSN2(mov,l     ,R(eax),R(ebx))
-       INSN1(pop,l     ,R(ebp))
-       INSN1(pop,l     ,R(ebx))
-       INSN1(pop,l     ,R(esi))
-       INSN1(pop,l     ,R(edi))
-       ret
-
diff --git a/grub-core/lib/libgcrypt/mpi/i586/mpih-rshift.S 
b/grub-core/lib/libgcrypt/mpi/i586/mpih-rshift.S
deleted file mode 100644
index c661e3d3b..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/mpih-rshift.S
+++ /dev/null
@@ -1,228 +0,0 @@
-/* i80586   rshift
- *
- *      Copyright (C) 1992, 1994, 1998,
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_rshift( mpi_ptr_t wp,    (sp + 4)
- *                mpi_ptr_t up,        (sp + 8)
- *                mpi_size_t usize,    (sp + 12)
- *                unsigned cnt)        (sp + 16)
- */
-
-.text
-       ALIGN (3)
-       .globl C_SYMBOL_NAME(_gcry_mpih_rshift)
-C_SYMBOL_NAME(_gcry_mpih_rshift:)
-       pushl   %edi
-       pushl   %esi
-       pushl   %ebx
-       pushl   %ebp
-
-       movl    20(%esp),%edi           /* res_ptr */
-       movl    24(%esp),%esi           /* s_ptr */
-       movl    28(%esp),%ebp           /* size */
-       movl    32(%esp),%ecx           /* cnt */
-
-/* We can use faster code for shift-by-1 under certain conditions.  */
-       cmp     $1,%ecx
-       jne     Rnormal
-       leal    4(%edi),%eax
-       cmpl    %esi,%eax
-       jnc     Rspecial                /* jump if res_ptr + 1 >= s_ptr */
-       leal    (%edi,%ebp,4),%eax
-       cmpl    %eax,%esi
-       jnc     Rspecial                /* jump if s_ptr >= res_ptr + size */
-
-Rnormal:
-       movl    (%esi),%edx
-       addl    $4,%esi
-       xorl    %eax,%eax
-       shrdl   %cl,%edx,%eax           /* compute carry limb */
-       pushl   %eax                    /* push carry limb onto stack */
-
-       decl    %ebp
-       pushl   %ebp
-       shrl    $3,%ebp
-       jz      Rend
-
-       movl    (%edi),%eax             /* fetch destination cache line */
-
-       ALIGN   (2)
-Roop:  movl    28(%edi),%eax           /* fetch destination cache line */
-       movl    %edx,%ebx
-
-       movl    (%esi),%eax
-       movl    4(%esi),%edx
-       shrdl   %cl,%eax,%ebx
-       shrdl   %cl,%edx,%eax
-       movl    %ebx,(%edi)
-       movl    %eax,4(%edi)
-
-       movl    8(%esi),%ebx
-       movl    12(%esi),%eax
-       shrdl   %cl,%ebx,%edx
-       shrdl   %cl,%eax,%ebx
-       movl    %edx,8(%edi)
-       movl    %ebx,12(%edi)
-
-       movl    16(%esi),%edx
-       movl    20(%esi),%ebx
-       shrdl   %cl,%edx,%eax
-       shrdl   %cl,%ebx,%edx
-       movl    %eax,16(%edi)
-       movl    %edx,20(%edi)
-
-       movl    24(%esi),%eax
-       movl    28(%esi),%edx
-       shrdl   %cl,%eax,%ebx
-       shrdl   %cl,%edx,%eax
-       movl    %ebx,24(%edi)
-       movl    %eax,28(%edi)
-
-       addl    $32,%esi
-       addl    $32,%edi
-       decl    %ebp
-       jnz     Roop
-
-Rend:  popl    %ebp
-       andl    $7,%ebp
-       jz      Rend2
-Roop2: movl    (%esi),%eax
-       shrdl   %cl,%eax,%edx           /* compute result limb */
-       movl    %edx,(%edi)
-       movl    %eax,%edx
-       addl    $4,%esi
-       addl    $4,%edi
-       decl    %ebp
-       jnz     Roop2
-
-Rend2: shrl    %cl,%edx                /* compute most significant limb */
-       movl    %edx,(%edi)             /* store it */
-
-       popl    %eax                    /* pop carry limb */
-
-       popl    %ebp
-       popl    %ebx
-       popl    %esi
-       popl    %edi
-       ret
-
-/* We loop from least significant end of the arrays, which is only
-   permissable if the source and destination don't overlap, since the
-   function is documented to work for overlapping source and destination.
-*/
-
-Rspecial:
-       leal    -4(%edi,%ebp,4),%edi
-       leal    -4(%esi,%ebp,4),%esi
-
-       movl    (%esi),%edx
-       subl    $4,%esi
-
-       decl    %ebp
-       pushl   %ebp
-       shrl    $3,%ebp
-
-       shrl    $1,%edx
-       incl    %ebp
-       decl    %ebp
-       jz      RLend
-
-       movl    (%edi),%eax             /* fetch destination cache line */
-
-       ALIGN   (2)
-RLoop: movl    -28(%edi),%eax          /* fetch destination cache line */
-       movl    %edx,%ebx
-
-       movl    (%esi),%eax
-       movl    -4(%esi),%edx
-       rcrl    $1,%eax
-       movl    %ebx,(%edi)
-       rcrl    $1,%edx
-       movl    %eax,-4(%edi)
-
-       movl    -8(%esi),%ebx
-       movl    -12(%esi),%eax
-       rcrl    $1,%ebx
-       movl    %edx,-8(%edi)
-       rcrl    $1,%eax
-       movl    %ebx,-12(%edi)
-
-       movl    -16(%esi),%edx
-       movl    -20(%esi),%ebx
-       rcrl    $1,%edx
-       movl    %eax,-16(%edi)
-       rcrl    $1,%ebx
-       movl    %edx,-20(%edi)
-
-       movl    -24(%esi),%eax
-       movl    -28(%esi),%edx
-       rcrl    $1,%eax
-       movl    %ebx,-24(%edi)
-       rcrl    $1,%edx
-       movl    %eax,-28(%edi)
-
-       leal    -32(%esi),%esi          /* use leal not to clobber carry */
-       leal    -32(%edi),%edi
-       decl    %ebp
-       jnz     RLoop
-
-RLend: popl    %ebp
-       sbbl    %eax,%eax               /* save carry in %eax */
-       andl    $7,%ebp
-       jz      RLend2
-       addl    %eax,%eax               /* restore carry from eax */
-RLoop2: movl   %edx,%ebx
-       movl    (%esi),%edx
-       rcrl    $1,%edx
-       movl    %ebx,(%edi)
-
-       leal    -4(%esi),%esi           /* use leal not to clobber carry */
-       leal    -4(%edi),%edi
-       decl    %ebp
-       jnz     RLoop2
-
-       jmp     RL1
-RLend2: addl   %eax,%eax               /* restore carry from eax */
-RL1:   movl    %edx,(%edi)             /* store last limb */
-
-       movl    $0,%eax
-       rcrl    $1,%eax
-
-       popl    %ebp
-       popl    %ebx
-       popl    %esi
-       popl    %edi
-       ret
-
diff --git a/grub-core/lib/libgcrypt/mpi/i586/mpih-sub1.S 
b/grub-core/lib/libgcrypt/mpi/i586/mpih-sub1.S
deleted file mode 100644
index ef2d58074..000000000
--- a/grub-core/lib/libgcrypt/mpi/i586/mpih-sub1.S
+++ /dev/null
@@ -1,142 +0,0 @@
-/* i80586 sub_n -- Sub two limb vectors of the same length > 0 and store
- *                sum in a third limb vector.
- *
- *      Copyright (C) 1992, 1994, 1995, 1998, 
- *                    2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- *  mpi_limb_t
- *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,       (sp + 4)
- *                mpi_ptr_t s1_ptr,    (sp + 8)
- *                mpi_ptr_t s2_ptr,    (sp + 12)
- *                mpi_size_t size)     (sp + 16)
- */
-
-
-.text
-       ALIGN (3)
-       .globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
-C_SYMBOL_NAME(_gcry_mpih_sub_n:)
-
-       pushl   %edi
-       pushl   %esi
-       pushl   %ebx
-       pushl   %ebp
-
-       movl    20(%esp),%edi           /* res_ptr */
-       movl    24(%esp),%esi           /* s1_ptr */
-       movl    28(%esp),%ebp           /* s2_ptr */
-       movl    32(%esp),%ecx           /* size */
-
-       movl    (%ebp),%ebx
-
-       decl    %ecx
-       movl    %ecx,%edx
-       shrl    $3,%ecx
-       andl    $7,%edx
-       testl   %ecx,%ecx               /* zero carry flag */
-       jz      Lend
-       pushl   %edx
-
-       ALIGN (3)
-Loop:  movl    28(%edi),%eax           /* fetch destination cache line */
-       leal    32(%edi),%edi
-
-L1:    movl    (%esi),%eax
-       movl    4(%esi),%edx
-       sbbl    %ebx,%eax
-       movl    4(%ebp),%ebx
-       sbbl    %ebx,%edx
-       movl    8(%ebp),%ebx
-       movl    %eax,-32(%edi)
-       movl    %edx,-28(%edi)
-
-L2:    movl    8(%esi),%eax
-       movl    12(%esi),%edx
-       sbbl    %ebx,%eax
-       movl    12(%ebp),%ebx
-       sbbl    %ebx,%edx
-       movl    16(%ebp),%ebx
-       movl    %eax,-24(%edi)
-       movl    %edx,-20(%edi)
-
-L3:    movl    16(%esi),%eax
-       movl    20(%esi),%edx
-       sbbl    %ebx,%eax
-       movl    20(%ebp),%ebx
-       sbbl    %ebx,%edx
-       movl    24(%ebp),%ebx
-       movl    %eax,-16(%edi)
-       movl    %edx,-12(%edi)
-
-L4:    movl    24(%esi),%eax
-       movl    28(%esi),%edx
-       sbbl    %ebx,%eax
-       movl    28(%ebp),%ebx
-       sbbl    %ebx,%edx
-       movl    32(%ebp),%ebx
-       movl    %eax,-8(%edi)
-       movl    %edx,-4(%edi)
-
-       leal    32(%esi),%esi
-       leal    32(%ebp),%ebp
-       decl    %ecx
-       jnz     Loop
-
-       popl    %edx
-Lend:
-       decl    %edx                    /* test %edx w/o clobbering carry */
-       js      Lend2
-       incl    %edx
-Loop2:
-       leal    4(%edi),%edi
-       movl    (%esi),%eax
-       sbbl    %ebx,%eax
-       movl    4(%ebp),%ebx
-       movl    %eax,-4(%edi)
-       leal    4(%esi),%esi
-       leal    4(%ebp),%ebp
-       decl    %edx
-       jnz     Loop2
-Lend2:
-       movl    (%esi),%eax
-       sbbl    %ebx,%eax
-       movl    %eax,(%edi)
-
-       sbbl    %eax,%eax
-       negl    %eax
-
-       popl    %ebp
-       popl    %ebx
-       popl    %esi
-       popl    %edi
-       ret
-
diff --git a/grub-core/lib/libgcrypt/mpi/longlong.h 
b/grub-core/lib/libgcrypt/mpi/longlong.h
index b3fce0958..c299534c3 100644
--- a/grub-core/lib/libgcrypt/mpi/longlong.h
+++ b/grub-core/lib/libgcrypt/mpi/longlong.h
@@ -1,5 +1,6 @@
 /* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
-   Note: I added some stuff for use with gnupg
+   Note: This is the Libgcrypt version
+
 
 Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
               2000, 2001, 2002, 2003, 2004, 2011 Free Software Foundation, Inc.
@@ -41,7 +42,7 @@ MA 02111-1307, USA. */
 /* This is used to make sure no undesirable sharing between different libraries
    that use this file takes place.  */
 #ifndef __MPN
-#define __MPN(x) __##x
+# define __MPN(x) __##x
 #endif
 
 /* Define auxiliary asm macros.
@@ -102,19 +103,22 @@ MA 02111-1307, USA. */
 /* We sometimes need to clobber "cc" with gcc2, but that would not be
    understood by gcc1. Use cpp to avoid major code duplication.  */
 #if __GNUC__ < 2
-#define __CLOBBER_CC
-#define __AND_CLOBBER_CC
+# define __CLOBBER_CC
+# define __AND_CLOBBER_CC
 #else /* __GNUC__ >= 2 */
-#define __CLOBBER_CC : "cc"
-#define __AND_CLOBBER_CC , "cc"
+# define __CLOBBER_CC : "cc"
+# define __AND_CLOBBER_CC , "cc"
 #endif /* __GNUC__ < 2 */
 
+/***************************************
+ ****  Begin CPU Specific Versions  ****
+ ***************************************/
 
 /***************************************
  **************  A29K  *****************
  ***************************************/
 #if (defined (__a29k__) || defined (_AM29K)) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add %1,%4,%5\n"   \
            "addc %0,%2,%3"                                              \
           : "=r" ((USItype)(sh)),                                      \
@@ -123,7 +127,7 @@ MA 02111-1307, USA. */
             "rI" ((USItype)(bh)),                                      \
             "%r" ((USItype)(al)),                                      \
             "rI" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub %1,%4,%5\n"                                             \
           "subc %0,%2,%3"                                              \
           : "=r" ((USItype)(sh)),                                      \
@@ -132,7 +136,7 @@ MA 02111-1307, USA. */
             "rI" ((USItype)(bh)),                                      \
             "r" ((USItype)(al)),                                       \
             "rI" ((USItype)(bl)))
-#define umul_ppmm(xh, xl, m0, m1) \
+# define umul_ppmm(xh, xl, m0, m1) \
   do {                                                                 \
     USItype __m0 = (m0), __m1 = (m1);                                  \
     __asm__ ("multiplu %0,%1,%2"                                        \
@@ -144,41 +148,41 @@ MA 02111-1307, USA. */
             : "r" (__m0),                                              \
               "r" (__m1));                                             \
   } while (0)
-#define udiv_qrnnd(q, r, n1, n0, d) \
+# define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("dividu %0,%3,%4"                                            \
           : "=r" ((USItype)(q)),                                       \
             "=q" ((USItype)(r))                                        \
           : "1" ((USItype)(n1)),                                       \
             "r" ((USItype)(n0)),                                       \
             "r" ((USItype)(d)))
-#define count_leading_zeros(count, x) \
+# define count_leading_zeros(count, x) \
     __asm__ ("clz %0,%1"                                                \
             : "=r" ((USItype)(count))                                  \
             : "r" ((USItype)(x)))
-#define COUNT_LEADING_ZEROS_0 32
+# define COUNT_LEADING_ZEROS_0 32
 #endif /* __a29k__ */
 
 
 #if defined (__alpha) && W_TYPE_SIZE == 64
-#define umul_ppmm(ph, pl, m0, m1) \
+# define umul_ppmm(ph, pl, m0, m1) \
   do {                                                                 \
     UDItype __m0 = (m0), __m1 = (m1);                                  \
     __asm__ ("umulh %r1,%2,%0"                                          \
-            : "=r" ((UDItype) ph)                                      \
+            : "=r" ((UDItype)(ph))                                     \
             : "%rJ" (__m0),                                            \
               "rI" (__m1));                                            \
     (pl) = __m0 * __m1;                                                \
   } while (0)
-#define UMUL_TIME 46
-#ifndef LONGLONG_STANDALONE
-#define udiv_qrnnd(q, r, n1, n0, d) \
+# define UMUL_TIME 46
+# ifndef LONGLONG_STANDALONE
+#  define udiv_qrnnd(q, r, n1, n0, d) \
   do { UDItype __r;                                                    \
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                        \
     (r) = __r;                                                         \
   } while (0)
 extern UDItype __udiv_qrnnd ();
-#define UDIV_TIME 220
-#endif /* LONGLONG_STANDALONE */
+#  define UDIV_TIME 220
+# endif /* !LONGLONG_STANDALONE */
 #endif /* __alpha */
 
 /***************************************
@@ -187,30 +191,31 @@ extern UDItype __udiv_qrnnd ();
 #if defined (__arm__) && W_TYPE_SIZE == 32 && \
     (!defined (__thumb__) || defined (__thumb2__))
 /* The __ARM_ARCH define is provided by gcc 4.8.  Construct it otherwise.  */
-#ifndef __ARM_ARCH
-# ifdef __ARM_ARCH_2__
-#  define __ARM_ARCH 2
-# elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
-#  define __ARM_ARCH 3
-# elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__)
-#  define __ARM_ARCH 4
-# elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \
-       || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
-       || defined(__ARM_ARCH_5TEJ__)
-#  define __ARM_ARCH 5
-# elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
-       || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
-       || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
-#  define __ARM_ARCH 6
-# elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
-       || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
-       || defined(__ARM_ARCH_7EM__)
-#  define __ARM_ARCH 7
-# else
+# ifndef __ARM_ARCH
+#  ifdef __ARM_ARCH_2__
+#   define __ARM_ARCH 2
+#  elif defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
+#   define __ARM_ARCH 3
+#  elif defined (__ARM_ARCH_4__) || defined (__ARM_ARCH_4T__)
+#   define __ARM_ARCH 4
+#  elif defined (__ARM_ARCH_5__) || defined (__ARM_ARCH_5E__) \
+        || defined(__ARM_ARCH_5T__) || defined(__ARM_ARCH_5TE__) \
+        || defined(__ARM_ARCH_5TEJ__)
+#   define __ARM_ARCH 5
+#  elif defined (__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+        || defined (__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) \
+        || defined (__ARM_ARCH_6K__) || defined(__ARM_ARCH_6T2__)
+#   define __ARM_ARCH 6
+#  elif defined (__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+        || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+        || defined(__ARM_ARCH_7EM__)
+#   define __ARM_ARCH 7
+#  else
    /* could not detect? */
-# endif
-#endif
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+#  endif
+# endif /* !__ARM_ARCH */
+
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("adds %1, %4, %5\n"                                          \
           "adc  %0, %2, %3"                                            \
           : "=r" ((sh)),                                               \
@@ -219,7 +224,7 @@ extern UDItype __udiv_qrnnd ();
             "rI" ((USItype)(bh)),                                      \
             "%r" ((USItype)(al)),                                      \
             "rI" ((USItype)(bl)) __CLOBBER_CC)
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subs %1, %4, %5\n"                                          \
           "sbc  %0, %2, %3"                                            \
           : "=r" ((sh)),                                               \
@@ -228,8 +233,8 @@ extern UDItype __udiv_qrnnd ();
             "rI" ((USItype)(bh)),                                      \
             "r" ((USItype)(al)),                                       \
             "rI" ((USItype)(bl)) __CLOBBER_CC)
-#if (defined __ARM_ARCH && __ARM_ARCH <= 3)
-#define umul_ppmm(xh, xl, a, b) \
+# if (defined __ARM_ARCH && __ARM_ARCH <= 3)
+#  define umul_ppmm(xh, xl, a, b) \
   __asm__ ("@ Inlined umul_ppmm\n"                                      \
        "mov    %|r0, %2, lsr #16               @ AAAA\n"               \
        "mov    %|r2, %3, lsr #16               @ BBBB\n"               \
@@ -248,30 +253,72 @@ extern UDItype __udiv_qrnnd ();
           : "r" ((USItype)(a)),                                        \
             "r" ((USItype)(b))                                         \
           : "r0", "r1", "r2" __AND_CLOBBER_CC)
-#else /* __ARM_ARCH >= 4 */
-#define umul_ppmm(xh, xl, a, b)                                         \
+# else /* __ARM_ARCH >= 4 */
+#  define umul_ppmm(xh, xl, a, b)                                         \
   __asm__ ("@ Inlined umul_ppmm\n"                                      \
           "umull %1, %0, %2, %3"                                       \
                   : "=&r" ((xh)),                                      \
                     "=r" ((xl))                                        \
                   : "r" ((USItype)(a)),                                \
                     "r" ((USItype)(b)))
-#endif /* __ARM_ARCH >= 4 */
-#define UMUL_TIME 20
-#define UDIV_TIME 100
-#if (defined __ARM_ARCH && __ARM_ARCH >= 5)
-#define count_leading_zeros(count, x) \
+# endif /* __ARM_ARCH >= 4 */
+# define UMUL_TIME 20
+# define UDIV_TIME 100
+# if (defined __ARM_ARCH && __ARM_ARCH >= 5)
+#  define count_leading_zeros(count, x) \
   __asm__ ("clz %0, %1"                                                 \
                   : "=r" ((count))                                     \
                   : "r" ((USItype)(x)))
-#endif /* __ARM_ARCH >= 5 */
+# endif /* __ARM_ARCH >= 5 */
 #endif /* __arm__ */
 
+/***************************************
+ **********  ARM64 / Aarch64  **********
+ ***************************************/
+#if defined(__aarch64__) && W_TYPE_SIZE == 64
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("adds %1, %4, %5\n"                                          \
+           "adc  %0, %2, %3\n"                                          \
+           : "=r" ((sh)),                                               \
+             "=&r" ((sl))                                               \
+           : "r" ((UDItype)(ah)),                                       \
+             "r" ((UDItype)(bh)),                                       \
+             "r" ((UDItype)(al)),                                       \
+             "r" ((UDItype)(bl)) __CLOBBER_CC)
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subs %1, %4, %5\n"                                          \
+           "sbc  %0, %2, %3\n"                                          \
+           : "=r" ((sh)),                                               \
+             "=&r" ((sl))                                               \
+           : "r" ((UDItype)(ah)),                                       \
+             "r" ((UDItype)(bh)),                                       \
+             "r" ((UDItype)(al)),                                       \
+             "r" ((UDItype)(bl)) __CLOBBER_CC)
+# define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                  \
+    UDItype __m0 = (m0), __m1 = (m1), __ph;                             \
+    (pl) = __m0 * __m1;                                                 \
+    __asm__ ("umulh %0,%1,%2"                                           \
+             : "=r" (__ph)                                              \
+             : "r" (__m0),                                              \
+               "r" (__m1));                                             \
+    (ph) = __ph; \
+  } while (0)
+# define count_leading_zeros(count, x) \
+  do {                                                                  \
+    UDItype __co;                                                       \
+    __asm__ ("clz %0, %1\n"                                             \
+             : "=r" (__co)                                              \
+             : "r" ((UDItype)(x)));                                     \
+    (count) = __co;                                                     \
+  } while (0)
+#endif /* __aarch64__ */
+
 /***************************************
  **************  CLIPPER  **************
  ***************************************/
 #if defined (__clipper__) && W_TYPE_SIZE == 32
-#define umul_ppmm(w1, w0, u, v) \
+# define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;                                               \
           struct {USItype __l, __h;} __i;                              \
          } __xx;                                                       \
@@ -280,7 +327,7 @@ extern UDItype __udiv_qrnnd ();
           : "%0" ((USItype)(u)),                                       \
             "r" ((USItype)(v)));                                       \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
-#define smul_ppmm(w1, w0, u, v) \
+# define smul_ppmm(w1, w0, u, v) \
   ({union {DItype __ll;                                                \
           struct {SItype __l, __h;} __i;                               \
          } __xx;                                                       \
@@ -289,7 +336,7 @@ extern UDItype __udiv_qrnnd ();
           : "%0" ((SItype)(u)),                                        \
             "r" ((SItype)(v)));                                        \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
-#define __umulsidi3(u, v) \
+# define __umulsidi3(u, v) \
   ({UDItype __w;                                                       \
     __asm__ ("mulwux %2,%0"                                             \
             : "=r" (__w)                                               \
@@ -303,7 +350,7 @@ extern UDItype __udiv_qrnnd ();
  **************  GMICRO  ***************
  ***************************************/
 #if defined (__gmicro__) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add.w %5,%1\n"                                              \
           "addx %3,%0"                                                 \
           : "=g" ((USItype)(sh)),                                      \
@@ -312,7 +359,7 @@ extern UDItype __udiv_qrnnd ();
             "g" ((USItype)(bh)),                                       \
             "%1" ((USItype)(al)),                                      \
             "g" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub.w %5,%1\n"                                              \
           "subx %3,%0"                                                 \
           : "=g" ((USItype)(sh)),                                      \
@@ -321,20 +368,20 @@ extern UDItype __udiv_qrnnd ();
             "g" ((USItype)(bh)),                                       \
             "1" ((USItype)(al)),                                       \
             "g" ((USItype)(bl)))
-#define umul_ppmm(ph, pl, m0, m1) \
+# define umul_ppmm(ph, pl, m0, m1) \
   __asm__ ("mulx %3,%0,%1"                                              \
           : "=g" ((USItype)(ph)),                                      \
             "=r" ((USItype)(pl))                                       \
           : "%0" ((USItype)(m0)),                                      \
             "g" ((USItype)(m1)))
-#define udiv_qrnnd(q, r, nh, nl, d) \
+# define udiv_qrnnd(q, r, nh, nl, d) \
   __asm__ ("divx %4,%0,%1"                                              \
           : "=g" ((USItype)(q)),                                       \
             "=r" ((USItype)(r))                                        \
           : "1" ((USItype)(nh)),                                       \
             "0" ((USItype)(nl)),                                       \
             "g" ((USItype)(d)))
-#define count_leading_zeros(count, x) \
+# define count_leading_zeros(count, x) \
   __asm__ ("bsch/1 %1,%0"                                               \
           : "=g" (count)                                               \
           : "g" ((USItype)(x)),                                        \
@@ -346,7 +393,7 @@ extern UDItype __udiv_qrnnd ();
  **************  HPPA  *****************
  ***************************************/
 #if defined (__hppa) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("   add %4,%5,%1\n"                                             \
           "    addc %2,%3,%0"                                              \
           : "=r" ((USItype)(sh)),                                      \
@@ -355,7 +402,7 @@ extern UDItype __udiv_qrnnd ();
             "rM" ((USItype)(bh)),                                      \
             "%rM" ((USItype)(al)),                                     \
             "rM" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("   sub %4,%5,%1\n"                                             \
           "    subb %2,%3,%0"                                              \
           : "=r" ((USItype)(sh)),                                      \
@@ -364,8 +411,8 @@ extern UDItype __udiv_qrnnd ();
             "rM" ((USItype)(bh)),                                      \
             "rM" ((USItype)(al)),                                      \
             "rM" ((USItype)(bl)))
-#if defined (_PA_RISC1_1)
-#define umul_ppmm(wh, wl, u, v) \
+# if defined (_PA_RISC1_1)
+#  define umul_ppmm(wh, wl, u, v) \
   do {                                                                 \
     union {UDItype __ll;                                               \
           struct {USItype __h, __l;} __i;                              \
@@ -377,21 +424,21 @@ extern UDItype __udiv_qrnnd ();
     (wh) = __xx.__i.__h;                                               \
     (wl) = __xx.__i.__l;                                               \
   } while (0)
-#define UMUL_TIME 8
-#define UDIV_TIME 60
-#else
-#define UMUL_TIME 40
-#define UDIV_TIME 80
-#endif
-#ifndef LONGLONG_STANDALONE
-#define udiv_qrnnd(q, r, n1, n0, d) \
+#  define UMUL_TIME 8
+#  define UDIV_TIME 60
+# else
+#  define UMUL_TIME 40
+#  define UDIV_TIME 80
+# endif
+# if !defined(LONGLONG_STANDALONE) && !defined(ASM_DISABLED)
+#  define udiv_qrnnd(q, r, n1, n0, d) \
   do { USItype __r;                                                    \
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                        \
     (r) = __r;                                                         \
   } while (0)
 extern USItype __udiv_qrnnd ();
-#endif /* LONGLONG_STANDALONE */
-#define count_leading_zeros(count, x) \
+# endif /* !LONGLONG_STANDALONE && !ASM_DISABLED */
+# define count_leading_zeros(count, x) \
   do {                                                                \
     USItype __tmp;                                                    \
     __asm__ (                                                         \
@@ -419,7 +466,7 @@ extern USItype __udiv_qrnnd ();
  **************  I370  *****************
  ***************************************/
 #if (defined (__i370__) || defined (__mvs__)) && W_TYPE_SIZE == 32
-#define umul_ppmm(xh, xl, m0, m1) \
+# define umul_ppmm(xh, xl, m0, m1) \
   do {                                                                 \
     union {UDItype __ll;                                               \
           struct {USItype __h, __l;} __i;                              \
@@ -434,7 +481,7 @@ extern USItype __udiv_qrnnd ();
     (xh) += ((((SItype) __m0 >> 31) & __m1)                            \
             + (((SItype) __m1 >> 31) & __m0));                         \
   } while (0)
-#define smul_ppmm(xh, xl, m0, m1) \
+# define smul_ppmm(xh, xl, m0, m1) \
   do {                                                                 \
     union {DItype __ll;                                                \
           struct {USItype __h, __l;} __i;                              \
@@ -446,12 +493,12 @@ extern USItype __udiv_qrnnd ();
               "r" (m1));                                               \
     (xh) = __xx.__i.__h; (xl) = __xx.__i.__l;                          \
   } while (0)
-#define sdiv_qrnnd(q, r, n1, n0, d) \
+# define sdiv_qrnnd(q, r, n1, n0, d) \
   do {                                                                 \
     union {DItype __ll;                                                \
           struct {USItype __h, __l;} __i;                              \
          } __xx;                                                       \
-    __xx.__i.__h = n1; __xx.__i.__l = n0;                              \
+    __xx.__i.__h = (n1); __xx.__i.__l = (n0);                          \
     __asm__ ("dr %0,%2"                                                 \
             : "=r" (__xx.__ll)                                         \
             : "0" (__xx.__ll), "r" (d));                               \
@@ -464,7 +511,7 @@ extern USItype __udiv_qrnnd ();
  **************  I386  *****************
  ***************************************/
 #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addl %5,%1\n"                                               \
           "adcl %3,%0"                                                 \
           : "=r" ((sh)),                                               \
@@ -474,7 +521,7 @@ extern USItype __udiv_qrnnd ();
             "%1" ((USItype)(al)),                                      \
             "g" ((USItype)(bl))                                        \
           __CLOBBER_CC)
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subl %5,%1\n"                                               \
           "sbbl %3,%0"                                                 \
           : "=r" ((sh)),                                               \
@@ -484,14 +531,14 @@ extern USItype __udiv_qrnnd ();
             "1" ((USItype)(al)),                                       \
             "g" ((USItype)(bl))                                        \
           __CLOBBER_CC)
-#define umul_ppmm(w1, w0, u, v) \
+# define umul_ppmm(w1, w0, u, v) \
   __asm__ ("mull %3"                                                    \
           : "=a" ((w0)),                                               \
             "=d" ((w1))                                                \
           : "%0" ((USItype)(u)),                                       \
             "rm" ((USItype)(v))                                        \
           __CLOBBER_CC)
-#define udiv_qrnnd(q, r, n1, n0, d) \
+# define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("divl %4"                                                    \
           : "=a" ((q)),                                                \
             "=d" ((r))                                                 \
@@ -499,7 +546,7 @@ extern USItype __udiv_qrnnd ();
             "1" ((USItype)(n1)),                                       \
             "rm" ((USItype)(d))                                        \
           __CLOBBER_CC)
-#define count_leading_zeros(count, x) \
+# define count_leading_zeros(count, x) \
   do {                                                                 \
     USItype __cbtmp;                                                   \
     __asm__ ("bsrl %1,%0"                                               \
@@ -507,22 +554,85 @@ extern USItype __udiv_qrnnd ();
             __CLOBBER_CC);                                             \
     (count) = __cbtmp ^ 31;                                            \
   } while (0)
-#define count_trailing_zeros(count, x) \
+# define count_trailing_zeros(count, x) \
   __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)) __CLOBBER_CC)
-#ifndef UMUL_TIME
-#define UMUL_TIME 40
-#endif
-#ifndef UDIV_TIME
-#define UDIV_TIME 40
-#endif
+# ifndef UMUL_TIME
+#  define UMUL_TIME 40
+# endif
+# ifndef UDIV_TIME
+#  define UDIV_TIME 40
+# endif
 #endif /* 80x86 */
 
+/***************************************
+ *********** AMD64 / x86-64 ************
+ ***************************************/
+#if defined(__x86_64) && W_TYPE_SIZE == 64
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("addq %5,%1\n"                                               \
+          "adcq %3,%0"                                                 \
+          : "=r" ((sh)),                                               \
+            "=&r" ((sl))                                               \
+          : "0" ((UDItype)(ah)),                                       \
+            "g"  ((UDItype)(bh)),                                      \
+            "1" ((UDItype)(al)),                                       \
+            "g"  ((UDItype)(bl))                                       \
+          __CLOBBER_CC)
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("subq %5,%1\n"                                               \
+          "sbbq %3,%0"                                                 \
+          : "=r" ((sh)),                                               \
+            "=&r" ((sl))                                               \
+          : "0" ((UDItype)(ah)),                                       \
+            "g" ((UDItype)(bh)),                                       \
+            "1" ((UDItype)(al)),                                       \
+            "g" ((UDItype)(bl))                                        \
+          __CLOBBER_CC)
+# define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulq %3"                                                    \
+          : "=a" ((w0)),                                               \
+            "=d" ((w1))                                                \
+          : "0" ((UDItype)(u)),                                        \
+            "rm" ((UDItype)(v))                                        \
+          __CLOBBER_CC)
+# define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("divq %4"                                                    \
+          : "=a" ((q)),                                                \
+            "=d" ((r))                                                 \
+          : "0" ((UDItype)(n0)),                                       \
+            "1" ((UDItype)(n1)),                                       \
+            "rm" ((UDItype)(d))                                        \
+          __CLOBBER_CC)
+# define count_leading_zeros(count, x) \
+  do {                                                                  \
+    UDItype __cbtmp;                                                    \
+    __asm__ ("bsrq %1,%0"                                               \
+             : "=r" (__cbtmp) : "rm" ((UDItype)(x))                     \
+             __CLOBBER_CC);                                             \
+    (count) = __cbtmp ^ 63;                                             \
+  } while (0)
+# define count_trailing_zeros(count, x) \
+  do {                                                                  \
+    UDItype __cbtmp;                                                    \
+    __asm__ ("bsfq %1,%0"                                               \
+             : "=r" (__cbtmp) : "rm" ((UDItype)(x))                     \
+             __CLOBBER_CC);                                             \
+    (count) = __cbtmp;                                                  \
+  } while (0)
+# ifndef UMUL_TIME
+#  define UMUL_TIME 40
+# endif
+# ifndef UDIV_TIME
+#  define UDIV_TIME 40
+# endif
+#endif /* __x86_64 */
+
 
 /***************************************
  **************  I860  *****************
  ***************************************/
 #if defined (__i860__) && W_TYPE_SIZE == 32
-#define rshift_rhlc(r,h,l,c) \
+# define rshift_rhlc(r,h,l,c) \
   __asm__ ("shr %3,r0,r0\n"  \
            "shrd %1,%2,%0"   \
           "=r" (r) : "r" (h), "r" (l), "rn" (c))
@@ -532,7 +642,7 @@ extern USItype __udiv_qrnnd ();
  **************  I960  *****************
  ***************************************/
 #if defined (__i960__) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("cmpo 1,0\n"      \
            "addc %5,%4,%1\n" \
            "addc %3,%2,%0"   \
@@ -542,7 +652,7 @@ extern USItype __udiv_qrnnd ();
             "dI" ((USItype)(bh)),                                      \
             "%dI" ((USItype)(al)),                                     \
             "dI" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("cmpo 0,0\n"      \
            "subc %5,%4,%1\n" \
            "subc %3,%2,%0"   \
@@ -552,7 +662,7 @@ extern USItype __udiv_qrnnd ();
             "dI" ((USItype)(bh)),                                      \
             "dI" ((USItype)(al)),                                      \
             "dI" ((USItype)(bl)))
-#define umul_ppmm(w1, w0, u, v) \
+# define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;                                               \
           struct {USItype __l, __h;} __i;                              \
          } __xx;                                                       \
@@ -561,14 +671,14 @@ extern USItype __udiv_qrnnd ();
           : "%dI" ((USItype)(u)),                                      \
             "dI" ((USItype)(v)));                                      \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
-#define __umulsidi3(u, v) \
+# define __umulsidi3(u, v) \
   ({UDItype __w;                                                       \
     __asm__ ("emul      %2,%1,%0"                                       \
             : "=d" (__w)                                               \
             : "%dI" ((USItype)(u)),                                    \
               "dI" ((USItype)(v)));                                    \
     __w; })
-#define udiv_qrnnd(q, r, nh, nl, d) \
+# define udiv_qrnnd(q, r, nh, nl, d) \
   do {                                                                 \
     union {UDItype __ll;                                               \
           struct {USItype __l, __h;} __i;                              \
@@ -580,7 +690,7 @@ extern USItype __udiv_qrnnd ();
             "dI" ((USItype)(d)));                                      \
     (r) = __rq.__i.__l; (q) = __rq.__i.__h;                            \
   } while (0)
-#define count_leading_zeros(count, x) \
+# define count_leading_zeros(count, x) \
   do {                                                                 \
     USItype __cbtmp;                                                   \
     __asm__ ("scanbit %1,%0"                                            \
@@ -588,9 +698,9 @@ extern USItype __udiv_qrnnd ();
             : "r" ((USItype)(x)));                                     \
     (count) = __cbtmp ^ 31;                                            \
   } while (0)
-#define COUNT_LEADING_ZEROS_0 (-32) /* sic */
-#if defined (__i960mx)         /* what is the proper symbol to test??? */
-#define rshift_rhlc(r,h,l,c) \
+# define COUNT_LEADING_ZEROS_0 (-32) /* sic */
+# if defined (__i960mx)  /* what is the proper symbol to test??? */
+#  define rshift_rhlc(r,h,l,c) \
   do {                                                                 \
     union {UDItype __ll;                                               \
           struct {USItype __l, __h;} __i;                              \
@@ -599,15 +709,16 @@ extern USItype __udiv_qrnnd ();
     __asm__ ("shre %2,%1,%0"                                            \
             : "=d" (r) : "dI" (__nn.__ll), "dI" (c));                  \
   }
-#endif /* i960mx */
+# endif /* i960mx */
 #endif /* i960 */
 
 
 /***************************************
  **************  68000 ****************
  ***************************************/
-#if (defined (__mc68000__) || defined (__mc68020__) || defined (__NeXT__) || 
defined(mc68020)) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+#if (defined (__mc68000__) || defined (__mc68020__)                     \
+     || defined (__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add%.l %5,%1\n"                                             \
           "addx%.l %3,%0"                                              \
           : "=d" ((USItype)(sh)),                                      \
@@ -616,7 +727,7 @@ extern USItype __udiv_qrnnd ();
             "d" ((USItype)(bh)),                                       \
             "%1" ((USItype)(al)),                                      \
             "g" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub%.l %5,%1\n"                                             \
           "subx%.l %3,%0"                                              \
           : "=d" ((USItype)(sh)),                                      \
@@ -625,36 +736,36 @@ extern USItype __udiv_qrnnd ();
             "d" ((USItype)(bh)),                                       \
             "1" ((USItype)(al)),                                       \
             "g" ((USItype)(bl)))
-#if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
-#define umul_ppmm(w1, w0, u, v) \
+# if (defined (__mc68020__) || defined (__NeXT__) || defined(mc68020))
+#  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("mulu%.l %3,%1:%0"                                           \
           : "=d" ((USItype)(w0)),                                      \
             "=d" ((USItype)(w1))                                       \
           : "%0" ((USItype)(u)),                                       \
             "dmi" ((USItype)(v)))
-#define UMUL_TIME 45
-#define udiv_qrnnd(q, r, n1, n0, d) \
+#  define UMUL_TIME 45
+#  define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("divu%.l %4,%1:%0"                                           \
           : "=d" ((USItype)(q)),                                       \
             "=d" ((USItype)(r))                                        \
           : "0" ((USItype)(n0)),                                       \
             "1" ((USItype)(n1)),                                       \
             "dmi" ((USItype)(d)))
-#define UDIV_TIME 90
-#define sdiv_qrnnd(q, r, n1, n0, d) \
+#  define UDIV_TIME 90
+#  define sdiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("divs%.l %4,%1:%0"                                           \
           : "=d" ((USItype)(q)),                                       \
             "=d" ((USItype)(r))                                        \
           : "0" ((USItype)(n0)),                                       \
             "1" ((USItype)(n1)),                                       \
             "dmi" ((USItype)(d)))
-#define count_leading_zeros(count, x) \
+#  define count_leading_zeros(count, x) \
   __asm__ ("bfffo %1{%b2:%b2},%0"                                       \
           : "=d" ((USItype)(count))                                    \
           : "od" ((USItype)(x)), "n" (0))
-#define COUNT_LEADING_ZEROS_0 32
-#else /* not mc68020 */
-#define umul_ppmm(xh, xl, a, b) \
+#  define COUNT_LEADING_ZEROS_0 32
+# else /* not mc68020 */
+#  define umul_ppmm(xh, xl, a, b) \
   do { USItype __umul_tmp1, __umul_tmp2;                         \
        __asm__ ("| Inlined umul_ppmm                         \n" \
  "        move%.l %5,%3                                       \n" \
@@ -682,9 +793,9 @@ extern USItype __udiv_qrnnd ();
                "=d" (__umul_tmp1), "=&d" (__umul_tmp2)           \
              : "%2" ((USItype)(a)), "d" ((USItype)(b)));         \
   } while (0)
-#define UMUL_TIME 100
-#define UDIV_TIME 400
-#endif /* not mc68020 */
+#  define UMUL_TIME 100
+#  define UDIV_TIME 400
+# endif /* not mc68020 */
 #endif /* mc68000 */
 
 
@@ -692,7 +803,7 @@ extern USItype __udiv_qrnnd ();
  **************  88000 ****************
  ***************************************/
 #if defined (__m88000__) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addu.co %1,%r4,%r5\n"                                       \
           "addu.ci %0,%r2,%r3"                                         \
           : "=r" ((USItype)(sh)),                                      \
@@ -701,7 +812,7 @@ extern USItype __udiv_qrnnd ();
             "rJ" ((USItype)(bh)),                                      \
             "%rJ" ((USItype)(al)),                                     \
             "rJ" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subu.co %1,%r4,%r5\n"                                       \
           "subu.ci %0,%r2,%r3"                                         \
           : "=r" ((USItype)(sh)),                                      \
@@ -710,7 +821,7 @@ extern USItype __udiv_qrnnd ();
             "rJ" ((USItype)(bh)),                                      \
             "rJ" ((USItype)(al)),                                      \
             "rJ" ((USItype)(bl)))
-#define count_leading_zeros(count, x) \
+# define count_leading_zeros(count, x) \
   do {                                                                 \
     USItype __cbtmp;                                                   \
     __asm__ ("ff1 %0,%1"                                                \
@@ -718,9 +829,9 @@ extern USItype __udiv_qrnnd ();
             : "r" ((USItype)(x)));                                     \
     (count) = __cbtmp ^ 31;                                            \
   } while (0)
-#define COUNT_LEADING_ZEROS_0 63 /* sic */
-#if defined (__m88110__)
-#define umul_ppmm(wh, wl, u, v) \
+# define COUNT_LEADING_ZEROS_0 63 /* sic */
+# if defined (__m88110__)
+#  define umul_ppmm(wh, wl, u, v) \
   do {                                                                 \
     union {UDItype __ll;                                               \
           struct {USItype __h, __l;} __i;                              \
@@ -729,7 +840,7 @@ extern USItype __udiv_qrnnd ();
     (wh) = __x.__i.__h;                                                \
     (wl) = __x.__i.__l;                                                \
   } while (0)
-#define udiv_qrnnd(q, r, n1, n0, d) \
+#  define udiv_qrnnd(q, r, n1, n0, d) \
   ({union {UDItype __ll;                                               \
           struct {USItype __h, __l;} __i;                              \
          } __x, __q;                                                   \
@@ -737,79 +848,81 @@ extern USItype __udiv_qrnnd ();
   __asm__ ("divu.d %0,%1,%2"                                            \
           : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));                \
   (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
-#define UMUL_TIME 5
-#define UDIV_TIME 25
-#else
-#define UMUL_TIME 17
-#define UDIV_TIME 150
-#endif /* __m88110__ */
+#  define UMUL_TIME 5
+#  define UDIV_TIME 25
+# else
+#  define UMUL_TIME 17
+#  define UDIV_TIME 150
+# endif /* __m88110__ */
 #endif /* __m88000__ */
 
 /***************************************
  **************  MIPS  *****************
  ***************************************/
 #if defined (__mips__) && W_TYPE_SIZE == 32
-#if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && __GNUC_MINOR__ 
>= 4)
-#define umul_ppmm(w1, w0, u, v) \
+# if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && \
+                                               __GNUC_MINOR__ >= 4)
+#  define umul_ppmm(w1, w0, u, v) \
   do {                                                                  \
-    UDItype _r;                                                         \
-    _r = (UDItype) u * v;                                               \
-    (w1) = _r >> 32;                                                    \
-    (w0) = (USItype) _r;                                                \
+    UDItype __r;                                                        \
+    __r = (UDItype)(u) * (v);                                           \
+    (w1) = __r >> 32;                                                   \
+    (w0) = (USItype) __r;                                               \
   } while (0)
-#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
-#define umul_ppmm(w1, w0, u, v) \
+# elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("multu %2,%3"                                                \
           : "=l" ((USItype)(w0)),                                      \
             "=h" ((USItype)(w1))                                       \
           : "d" ((USItype)(u)),                                        \
             "d" ((USItype)(v)))
-#else
-#define umul_ppmm(w1, w0, u, v) \
+# else
+#  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("multu %2,%3 \n" \
           "mflo %0 \n"     \
-          "mfhi %1"                                                        \
+          "mfhi %1"                                                    \
           : "=d" ((USItype)(w0)),                                      \
             "=d" ((USItype)(w1))                                       \
           : "d" ((USItype)(u)),                                        \
             "d" ((USItype)(v)))
-#endif
-#define UMUL_TIME 10
-#define UDIV_TIME 100
+# endif
+# define UMUL_TIME 10
+# define UDIV_TIME 100
 #endif /* __mips__ */
 
 /***************************************
  **************  MIPS/64  **************
  ***************************************/
 #if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64
-#if (__GNUC__ >= 5) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)
+# if defined (__clang__) || (__GNUC__ >= 5) || (__GNUC__ == 4 && \
+                                               __GNUC_MINOR__ >= 4)
 typedef unsigned int UTItype __attribute__ ((mode (TI)));
-#define umul_ppmm(w1, w0, u, v) \
-  do {                                                                 \
-    UTItype _r;                                                        \
-    _r = (UTItype) u * v;                                              \
-    (w1) = _r >> 64;                                                   \
-    (w0) = (UDItype) _r;                                               \
+#  define umul_ppmm(w1, w0, u, v) \
+  do {                                                                  \
+    UTItype __r;                                                        \
+    __r = (UTItype)(u) * (v);                                           \
+    (w1) = __r >> 64;                                                   \
+    (w0) = (UDItype) __r;                                               \
   } while (0)
-#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
-#define umul_ppmm(w1, w0, u, v) \
+# elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("dmultu %2,%3"                                               \
           : "=l" ((UDItype)(w0)),                                      \
             "=h" ((UDItype)(w1))                                       \
           : "d" ((UDItype)(u)),                                        \
             "d" ((UDItype)(v)))
-#else
-#define umul_ppmm(w1, w0, u, v) \
+# else
+#  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("dmultu %2,%3 \n"    \
           "mflo %0 \n"         \
-          "mfhi %1"                                                        \
+          "mfhi %1"                                                    \
           : "=d" ((UDItype)(w0)),                                      \
             "=d" ((UDItype)(w1))                                       \
           : "d" ((UDItype)(u)),                                        \
             "d" ((UDItype)(v)))
-#endif
-#define UMUL_TIME 20
-#define UDIV_TIME 140
+# endif
+# define UMUL_TIME 20
+# define UDIV_TIME 140
 #endif /* __mips__ */
 
 
@@ -817,7 +930,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
  **************  32000 ****************
  ***************************************/
 #if defined (__ns32000__) && W_TYPE_SIZE == 32
-#define umul_ppmm(w1, w0, u, v) \
+# define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;                                               \
           struct {USItype __l, __h;} __i;                              \
          } __xx;                                                       \
@@ -826,14 +939,14 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
           : "%0" ((USItype)(u)),                                       \
             "g" ((USItype)(v)));                                       \
   (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;})
-#define __umulsidi3(u, v) \
+# define __umulsidi3(u, v) \
   ({UDItype __w;                                                       \
     __asm__ ("meid %2,%0"                                               \
             : "=g" (__w)                                               \
             : "%0" ((USItype)(u)),                                     \
               "g" ((USItype)(v)));                                     \
     __w; })
-#define udiv_qrnnd(q, r, n1, n0, d) \
+# define udiv_qrnnd(q, r, n1, n0, d) \
   ({union {UDItype __ll;                                               \
           struct {USItype __l, __h;} __i;                              \
          } __xx;                                                       \
@@ -843,7 +956,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
           : "0" (__xx.__ll),                                           \
             "g" ((USItype)(d)));                                       \
   (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
-#define count_trailing_zeros(count,x) \
+# define count_trailing_zeros(count,x) \
   do {
     __asm__ ("ffsd      %2,%0"                                          \
             : "=r" ((USItype) (count))                                 \
@@ -857,7 +970,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
  **************  PPC  ******************
  ***************************************/
 #if (defined (_ARCH_PPC) || defined (_IBMR2)) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     if (__builtin_constant_p (bh) && (bh) == 0)                        \
       __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
@@ -882,7 +995,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
               "%r" ((USItype)(al)),                                    \
               "rI" ((USItype)(bl)));                                   \
   } while (0)
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     if (__builtin_constant_p (ah) && (ah) == 0)                        \
       __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
@@ -921,13 +1034,13 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
                 "rI" ((USItype)(al)),                                  \
                 "r" ((USItype)(bl)));                                  \
   } while (0)
-#define count_leading_zeros(count, x) \
+# define count_leading_zeros(count, x) \
   __asm__ ("{cntlz|cntlzw} %0,%1"                                       \
           : "=r" ((count))                                             \
           : "r" ((USItype)(x)))
-#define COUNT_LEADING_ZEROS_0 32
-#if defined (_ARCH_PPC)
-#define umul_ppmm(ph, pl, m0, m1) \
+# define COUNT_LEADING_ZEROS_0 32
+# if defined (_ARCH_PPC)
+#  define umul_ppmm(ph, pl, m0, m1) \
   do {                                                                 \
     USItype __m0 = (m0), __m1 = (m1);                                  \
     __asm__ ("mulhwu %0,%1,%2"                                          \
@@ -936,8 +1049,8 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
               "r" (__m1));                                             \
     (pl) = __m0 * __m1;                                                \
   } while (0)
-#define UMUL_TIME 15
-#define smul_ppmm(ph, pl, m0, m1) \
+#  define UMUL_TIME 15
+#  define smul_ppmm(ph, pl, m0, m1) \
   do {                                                                 \
     SItype __m0 = (m0), __m1 = (m1);                                   \
     __asm__ ("mulhw %0,%1,%2"                                           \
@@ -946,10 +1059,10 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
               "r" (__m1));                                             \
     (pl) = __m0 * __m1;                                                \
   } while (0)
-#define SMUL_TIME 14
-#define UDIV_TIME 120
-#else
-#define umul_ppmm(xh, xl, m0, m1) \
+#  define SMUL_TIME 14
+#  define UDIV_TIME 120
+# else
+#  define umul_ppmm(xh, xl, m0, m1) \
   do {                                                                 \
     USItype __m0 = (m0), __m1 = (m1);                                  \
     __asm__ ("mul %0,%2,%3"                                             \
@@ -960,26 +1073,25 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
     (xh) += ((((SItype) __m0 >> 31) & __m1)                            \
             + (((SItype) __m1 >> 31) & __m0));                         \
   } while (0)
-#define UMUL_TIME 8
-#define smul_ppmm(xh, xl, m0, m1) \
+#  define UMUL_TIME 8
+#  define smul_ppmm(xh, xl, m0, m1) \
   __asm__ ("mul %0,%2,%3"                                               \
           : "=r" ((SItype)(xh)),                                       \
             "=q" ((SItype)(xl))                                        \
           : "r" (m0),                                                  \
             "r" (m1))
-#define SMUL_TIME 4
-#define sdiv_qrnnd(q, r, nh, nl, d) \
+#  define SMUL_TIME 4
+#  define sdiv_qrnnd(q, r, nh, nl, d) \
   __asm__ ("div %0,%2,%4"                                               \
           : "=r" ((SItype)(q)), "=q" ((SItype)(r))                     \
           : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
-#define UDIV_TIME 100
-#endif
+#  define UDIV_TIME 100
+# endif
 #endif /* Power architecture variants. */
 
 /* Powerpc 64 bit support taken from gmp-4.1.2. */
 /* We should test _IBMR2 here when we add assembly support for the system
    vendor compilers.  */
-#if 0 /* Not yet enabled because we don't have hardware for a test. */
 #if (defined (_ARCH_PPC) || defined (__powerpc__)) && W_TYPE_SIZE == 64
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
@@ -1032,13 +1144,12 @@ typedef unsigned int UTItype __attribute__ ((mode 
(TI)));
 #define SMUL_TIME 14  /* ??? */
 #define UDIV_TIME 120 /* ??? */
 #endif /* 64-bit PowerPC.  */
-#endif /* if 0 */
 
 /***************************************
  **************  PYR  ******************
  ***************************************/
 #if defined (__pyr__) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addw        %5,%1 \n" \
           "addwc       %3,%0"                                          \
           : "=r" ((USItype)(sh)),                                      \
@@ -1047,7 +1158,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
             "g" ((USItype)(bh)),                                       \
             "%1" ((USItype)(al)),                                      \
             "g" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subw        %5,%1 \n" \
           "subwb       %3,%0"                                          \
           : "=r" ((USItype)(sh)),                                      \
@@ -1057,7 +1168,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
             "1" ((USItype)(al)),                                       \
             "g" ((USItype)(bl)))
 /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
-#define umul_ppmm(w1, w0, u, v) \
+# define umul_ppmm(w1, w0, u, v) \
   ({union {UDItype __ll;                                               \
           struct {USItype __h, __l;} __i;                              \
          } __xx;                                                       \
@@ -1074,7 +1185,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
  **************  RT/ROMP  **************
  ***************************************/
 #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("a %1,%5 \n" \
           "ae %0,%3"                                                   \
           : "=r" ((USItype)(sh)),                                      \
@@ -1083,7 +1194,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
             "r" ((USItype)(bh)),                                       \
             "%1" ((USItype)(al)),                                      \
             "r" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("s %1,%5\n" \
           "se %0,%3"                                                   \
           : "=r" ((USItype)(sh)),                                      \
@@ -1092,7 +1203,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
             "r" ((USItype)(bh)),                                       \
             "1" ((USItype)(al)),                                       \
             "r" ((USItype)(bl)))
-#define umul_ppmm(ph, pl, m0, m1) \
+# define umul_ppmm(ph, pl, m0, m1) \
   do {                                                                 \
     USItype __m0 = (m0), __m1 = (m1);                                  \
     __asm__ (                                                          \
@@ -1124,9 +1235,9 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
     (ph) += ((((SItype) __m0 >> 31) & __m1)                            \
             + (((SItype) __m1 >> 31) & __m0));                         \
   } while (0)
-#define UMUL_TIME 20
-#define UDIV_TIME 200
-#define count_leading_zeros(count, x) \
+# define UMUL_TIME 20
+# define UDIV_TIME 200
+# define count_leading_zeros(count, x) \
   do {                                                                 \
     if ((x) >= 0x10000)                                                \
       __asm__ ("clz     %0,%1"                                          \
@@ -1148,7 +1259,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
  ***************************************/
 #if (defined (__sh2__) || defined(__sh3__) || defined(__SH4__) ) \
     && W_TYPE_SIZE == 32
-#define umul_ppmm(w1, w0, u, v) \
+# define umul_ppmm(w1, w0, u, v) \
   __asm__ (                                                            \
         "dmulu.l %2,%3\n"  \
        "sts    macl,%1\n" \
@@ -1158,14 +1269,14 @@ typedef unsigned int UTItype __attribute__ ((mode 
(TI)));
           : "r" ((USItype)(u)),                                        \
             "r" ((USItype)(v))                                         \
           : "macl", "mach")
-#define UMUL_TIME 5
+# define UMUL_TIME 5
 #endif
 
 /***************************************
  **************  SPARC ****************
  ***************************************/
 #if defined (__sparc__) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addcc %r4,%5,%1\n" \
           "addx %r2,%3,%0"                                             \
           : "=r" ((USItype)(sh)),                                      \
@@ -1175,7 +1286,7 @@ typedef unsigned int UTItype __attribute__ ((mode (TI)));
             "%rJ" ((USItype)(al)),                                     \
             "rI" ((USItype)(bl))                                       \
           __CLOBBER_CC)
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subcc %r4,%5,%1\n" \
           "subx %r2,%3,%0"                                             \
           : "=r" ((USItype)(sh)),                                      \
@@ -1185,20 +1296,20 @@ typedef unsigned int UTItype __attribute__ ((mode 
(TI)));
             "rJ" ((USItype)(al)),                                      \
             "rI" ((USItype)(bl))                                       \
           __CLOBBER_CC)
-#if defined (__sparc_v8__)
+# if defined (__sparc_v8__) || defined(__sparcv8)
 /* Don't match immediate range because, 1) it is not often useful,
    2) the 'I' flag thinks of the range as a 13 bit signed interval,
    while we want to match a 13 bit interval, sign extended to 32 bits,
    but INTERPRETED AS UNSIGNED.  */
-#define umul_ppmm(w1, w0, u, v) \
+#  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
           : "=r" ((USItype)(w1)),                                      \
             "=r" ((USItype)(w0))                                       \
           : "r" ((USItype)(u)),                                        \
             "r" ((USItype)(v)))
-#define UMUL_TIME 5
-#ifndef SUPERSPARC     /* SuperSPARC's udiv only handles 53 bit dividends */
-#define udiv_qrnnd(q, r, n1, n0, d) \
+#  define UMUL_TIME 5
+#  ifndef SUPERSPARC   /* SuperSPARC's udiv only handles 53 bit dividends */
+#   define udiv_qrnnd(q, r, n1, n0, d) \
   do {                                                                 \
     USItype __q;                                                       \
     __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"                     \
@@ -1209,20 +1320,20 @@ typedef unsigned int UTItype __attribute__ ((mode 
(TI)));
     (r) = (n0) - __q * (d);                                            \
     (q) = __q;                                                         \
   } while (0)
-#define UDIV_TIME 25
-#endif /* SUPERSPARC */
-#else /* ! __sparc_v8__ */
-#if defined (__sparclite__)
+#   define UDIV_TIME 25
+#  endif /*!SUPERSPARC */
+# else /* ! __sparc_v8__ */
+#  if defined (__sparclite__)
 /* This has hardware multiply but not divide.  It also has two additional
    instructions scan (ffs from high bit) and divscc.  */
-#define umul_ppmm(w1, w0, u, v) \
+#   define umul_ppmm(w1, w0, u, v) \
   __asm__ ("umul %2,%3,%1;rd %%y,%0"                                    \
           : "=r" ((USItype)(w1)),                                      \
             "=r" ((USItype)(w0))                                       \
           : "r" ((USItype)(u)),                                        \
             "r" ((USItype)(v)))
-#define UMUL_TIME 5
-#define udiv_qrnnd(q, r, n1, n0, d) \
+#   define UMUL_TIME 5
+#   define udiv_qrnnd(q, r, n1, n0, d) \
   __asm__ ("! Inlined udiv_qrnnd                                     \n" \
  "        wr   %%g0,%2,%%y     ! Not a delayed write for sparclite  \n" \
  "        tst  %%g0                                                 \n" \
@@ -1268,19 +1379,19 @@ typedef unsigned int UTItype __attribute__ ((mode 
(TI)));
             "r" ((USItype)(n0)),                                       \
             "rI" ((USItype)(d))                                        \
           : "%g1" __AND_CLOBBER_CC)
-#define UDIV_TIME 37
-#define count_leading_zeros(count, x) \
+#   define UDIV_TIME 37
+#   define count_leading_zeros(count, x) \
   __asm__ ("scan %1,0,%0"                                               \
           : "=r" ((USItype)(x))                                        \
           : "r" ((USItype)(count)))
 /* Early sparclites return 63 for an argument of 0, but they warn that future
    implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
    undefined.  */
-#endif /* __sparclite__ */
-#endif /* __sparc_v8__ */
+#  endif /* !__sparclite__ */
+# endif /* !__sparc_v8__ */
 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
-#ifndef umul_ppmm
-#define umul_ppmm(w1, w0, u, v) \
+# ifndef umul_ppmm
+#  define umul_ppmm(w1, w0, u, v) \
   __asm__ ("! Inlined umul_ppmm                                        \n" \
  "        wr   %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr  \n" \
  "        sra  %3,31,%%g2      ! Don't move this insn                 \n" \
@@ -1326,19 +1437,19 @@ typedef unsigned int UTItype __attribute__ ((mode 
(TI)));
           : "%rI" ((USItype)(u)),                                      \
             "r" ((USItype)(v))                                         \
           : "%g1", "%g2" __AND_CLOBBER_CC)
-#define UMUL_TIME 39           /* 39 instructions */
-#endif
-#ifndef udiv_qrnnd
-#ifndef LONGLONG_STANDALONE
-#define udiv_qrnnd(q, r, n1, n0, d) \
+#  define UMUL_TIME 39         /* 39 instructions */
+# endif /* umul_ppmm */
+# ifndef udiv_qrnnd
+#  ifndef LONGLONG_STANDALONE
+#   define udiv_qrnnd(q, r, n1, n0, d) \
   do { USItype __r;                                                    \
     (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                        \
     (r) = __r;                                                         \
   } while (0)
 extern USItype __udiv_qrnnd ();
-#define UDIV_TIME 140
-#endif /* LONGLONG_STANDALONE */
-#endif /* udiv_qrnnd */
+#   define UDIV_TIME 140
+#  endif /* LONGLONG_STANDALONE */
+# endif /* udiv_qrnnd */
 #endif /* __sparc__ */
 
 
@@ -1346,7 +1457,7 @@ extern USItype __udiv_qrnnd ();
  **************  VAX  ******************
  ***************************************/
 #if defined (__vax__) && W_TYPE_SIZE == 32
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addl2 %5,%1\n" \
           "adwc %3,%0"                                                 \
           : "=g" ((USItype)(sh)),                                      \
@@ -1355,7 +1466,7 @@ extern USItype __udiv_qrnnd ();
             "g" ((USItype)(bh)),                                       \
             "%1" ((USItype)(al)),                                      \
             "g" ((USItype)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("subl2 %5,%1\n" \
           "sbwc %3,%0"                                                 \
           : "=g" ((USItype)(sh)),                                      \
@@ -1364,7 +1475,7 @@ extern USItype __udiv_qrnnd ();
             "g" ((USItype)(bh)),                                       \
             "1" ((USItype)(al)),                                       \
             "g" ((USItype)(bl)))
-#define umul_ppmm(xh, xl, m0, m1) \
+# define umul_ppmm(xh, xl, m0, m1) \
   do {                                                                 \
     union {UDItype __ll;                                               \
           struct {USItype __l, __h;} __i;                              \
@@ -1378,12 +1489,12 @@ extern USItype __udiv_qrnnd ();
     (xh) += ((((SItype) __m0 >> 31) & __m1)                            \
             + (((SItype) __m1 >> 31) & __m0));                         \
   } while (0)
-#define sdiv_qrnnd(q, r, n1, n0, d) \
+# define sdiv_qrnnd(q, r, n1, n0, d) \
   do {                                                                 \
     union {DItype __ll;                                                \
           struct {SItype __l, __h;} __i;                               \
          } __xx;                                                       \
-    __xx.__i.__h = n1; __xx.__i.__l = n0;                              \
+    __xx.__i.__h = (n1); __xx.__i.__l = (n0);                          \
     __asm__ ("ediv %3,%2,%0,%1"                                         \
             : "=g" (q), "=g" (r)                                       \
             : "g" (__xx.__ll), "g" (d));                               \
@@ -1395,7 +1506,7 @@ extern USItype __udiv_qrnnd ();
  **************  Z8000 ****************
  ***************************************/
 #if defined (__z8000__) && W_TYPE_SIZE == 16
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("add %H1,%H5\n\tadc  %H0,%H3"                                \
           : "=r" ((unsigned int)(sh)),                                 \
             "=&r" ((unsigned int)(sl))                                 \
@@ -1403,7 +1514,7 @@ extern USItype __udiv_qrnnd ();
             "r" ((unsigned int)(bh)),                                  \
             "%1" ((unsigned int)(al)),                                 \
             "rQR" ((unsigned int)(bl)))
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3"                                \
           : "=r" ((unsigned int)(sh)),                                 \
             "=&r" ((unsigned int)(sl))                                 \
@@ -1411,7 +1522,7 @@ extern USItype __udiv_qrnnd ();
             "r" ((unsigned int)(bh)),                                  \
             "1" ((unsigned int)(al)),                                  \
             "rQR" ((unsigned int)(bl)))
-#define umul_ppmm(xh, xl, m0, m1) \
+# define umul_ppmm(xh, xl, m0, m1) \
   do {                                                                 \
     union {long int __ll;                                              \
           struct {unsigned int __h, __l;} __i;                         \
@@ -1428,6 +1539,59 @@ extern USItype __udiv_qrnnd ();
   } while (0)
 #endif /* __z8000__ */
 
+
+/***************************************
+ *********** s390x/zSeries  ************
+ ***************************************/
+#if defined (__s390x__) && W_TYPE_SIZE == 64 && __GNUC__ >= 4
+# define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ ("algr %1,%5\n"                                               \
+          "alcgr %0,%3\n"                                              \
+          : "=r" ((sh)),                                               \
+            "=&r" ((sl))                                               \
+          : "0" ((UDItype)(ah)),                                       \
+            "r"  ((UDItype)(bh)),                                      \
+            "1" ((UDItype)(al)),                                       \
+            "r"  ((UDItype)(bl))                                       \
+          __CLOBBER_CC)
+# define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  __asm__ ("slgr %1,%5\n"                                               \
+          "slbgr %0,%3\n"                                              \
+          : "=r" ((sh)),                                               \
+            "=&r" ((sl))                                               \
+          : "0" ((UDItype)(ah)),                                       \
+            "r" ((UDItype)(bh)),                                       \
+            "1" ((UDItype)(al)),                                       \
+            "r" ((UDItype)(bl))                                        \
+          __CLOBBER_CC)
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+#  define umul_ppmm(w1, w0, u, v) \
+  do {                                                                  \
+    UTItype ___r;                                                       \
+    __asm__ ("mlgr %0,%2"                                               \
+            : "=r" (___r)                                              \
+            : "0" ((UDItype)(u)),                                      \
+              "r" ((UDItype)(v)));                                     \
+    (w1) = ___r >> 64;                                                  \
+    (w0) = (UDItype) ___r;                                              \
+  } while (0)
+# define udiv_qrnnd(q, r, n1, n0, d) \
+  do {                                                                  \
+    UTItype ___r = ((UTItype)n1 << 64) | n0;                            \
+    __asm__ ("dlgr %0,%2"                                               \
+            : "=r" (___r)                                              \
+            : "0" (___r),                                              \
+              "r" ((UDItype)(d)));                                     \
+    (r) = ___r >> 64;                                                   \
+    (q) = (UDItype) ___r;                                               \
+  } while (0)
+#endif /* __s390x__ */
+
+
+/***************************************
+ *****  End CPU Specific Versions  *****
+ ***************************************/
+
 #endif /* __GNUC__ */
 #endif /* !__riscos__ */
 
@@ -1436,7 +1600,7 @@ extern USItype __udiv_qrnnd ();
  ***********  Generic Versions ********
  ***************************************/
 #if !defined (umul_ppmm) && defined (__umulsidi3)
-#define umul_ppmm(ph, pl, m0, m1) \
+#  define umul_ppmm(ph, pl, m0, m1) \
   {                                                                    \
     UDWtype __ll = __umulsidi3 (m0, m1);                               \
     ph = (UWtype) (__ll >> W_TYPE_SIZE);                               \
@@ -1445,7 +1609,7 @@ extern USItype __udiv_qrnnd ();
 #endif
 
 #if !defined (__umulsidi3)
-#define __umulsidi3(u, v) \
+#  define __umulsidi3(u, v) \
   ({UWtype __hi, __lo;                                                 \
     umul_ppmm (__hi, __lo, u, v);                                      \
     ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
@@ -1454,7 +1618,7 @@ extern USItype __udiv_qrnnd ();
 /* If this machine has no inline assembler, use C macros.  */
 
 #if !defined (add_ssaaaa)
-#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+#  define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     UWtype __x;                                                        \
     __x = (al) + (bl);                                                 \
@@ -1464,7 +1628,7 @@ extern USItype __udiv_qrnnd ();
 #endif
 
 #if !defined (sub_ddmmss)
-#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+#  define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     UWtype __x;                                                        \
     __x = (al) - (bl);                                                 \
@@ -1474,7 +1638,7 @@ extern USItype __udiv_qrnnd ();
 #endif
 
 #if !defined (umul_ppmm)
-#define umul_ppmm(w1, w0, u, v)                                        \
+#  define umul_ppmm(w1, w0, u, v)                                      \
   do {                                                                 \
     UWtype __x0, __x1, __x2, __x3;                                     \
     UHWtype __ul, __vl, __uh, __vh;                                    \
@@ -1501,7 +1665,7 @@ extern USItype __udiv_qrnnd ();
 #endif
 
 #if !defined (umul_ppmm)
-#define smul_ppmm(w1, w0, u, v)                                        \
+#  define smul_ppmm(w1, w0, u, v)                                      \
   do {                                                                 \
     UWtype __w1;                                                       \
     UWtype __m0 = (u), __m1 = (v);                                     \
@@ -1551,7 +1715,7 @@ extern USItype __udiv_qrnnd ();
 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
 #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
-#define udiv_qrnnd(q, r, nh, nl, d) \
+#  define udiv_qrnnd(q, r, nh, nl, d) \
   do {                                                                 \
     UWtype __r;                                                        \
     (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d);                        \
@@ -1561,18 +1725,38 @@ extern USItype __udiv_qrnnd ();
 
 /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
 #if !defined (udiv_qrnnd)
-#define UDIV_NEEDS_NORMALIZATION 1
-#define udiv_qrnnd __udiv_qrnnd_c
+#  define UDIV_NEEDS_NORMALIZATION 1
+#  define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+#if !defined (count_leading_zeros)
+#  if defined (HAVE_BUILTIN_CLZL) && SIZEOF_UNSIGNED_LONG * 8 == W_TYPE_SIZE
+#    define count_leading_zeros(count, x) (count = __builtin_clzl(x))
+#    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
+#  elif defined (HAVE_BUILTIN_CLZ) && SIZEOF_UNSIGNED_INT * 8 == W_TYPE_SIZE
+#    define count_leading_zeros(count, x) (count = __builtin_clz(x))
+#    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
+#  endif
+#endif
+
+#if !defined (count_trailing_zeros)
+#  if defined (HAVE_BUILTIN_CTZL) && SIZEOF_UNSIGNED_LONG * 8 == W_TYPE_SIZE
+#    define count_trailing_zeros(count, x) (count = __builtin_ctzl(x))
+#    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
+#  elif defined (HAVE_BUILTIN_CTZ) && SIZEOF_UNSIGNED_INT * 8 == W_TYPE_SIZE
+#    define count_trailing_zeros(count, x) (count = __builtin_ctz(x))
+#    undef COUNT_LEADING_ZEROS_0 /* Input X=0 is undefined for the builtin. */
+#  endif
 #endif
 
 #if !defined (count_leading_zeros)
 extern
-#ifdef __STDC__
+#  ifdef __STDC__
 const
-#endif
+#  endif
 unsigned char _gcry_clz_tab[];
-#define MPI_INTERNAL_NEED_CLZ_TAB 1
-#define count_leading_zeros(count, x) \
+#  define MPI_INTERNAL_NEED_CLZ_TAB 1
+#  define count_leading_zeros(count, x) \
   do {                                                                 \
     UWtype __xr = (x);                                                 \
     UWtype __a;                                                        \
@@ -1593,21 +1777,25 @@ unsigned char _gcry_clz_tab[];
     (count) = W_TYPE_SIZE - (_gcry_clz_tab[__xr >> __a] + __a);                
\
   } while (0)
 /* This version gives a well-defined value for zero. */
-#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
-#endif
+#  define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif /* !count_leading_zeros */
 
 #if !defined (count_trailing_zeros)
 /* Define count_trailing_zeros using count_leading_zeros.  The latter might be
    defined in asm, but if it is not, the C version above is good enough.  */
-#define count_trailing_zeros(count, x) \
+#  define count_trailing_zeros(count, x) \
   do {                                                                 \
     UWtype __ctz_x = (x);                                              \
     UWtype __ctz_c;                                                    \
     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);                 \
     (count) = W_TYPE_SIZE - 1 - __ctz_c;                               \
   } while (0)
-#endif
+#endif /* !count_trailing_zeros */
 
 #ifndef UDIV_NEEDS_NORMALIZATION
-#define UDIV_NEEDS_NORMALIZATION 0
+#  define UDIV_NEEDS_NORMALIZATION 0
 #endif
+
+/***************************************
+ ******  longlong.h ends here  *********
+ ***************************************/
diff --git a/grub-core/lib/libgcrypt/mpi/m68k/Manifest 
b/grub-core/lib/libgcrypt/mpi/m68k/Manifest
deleted file mode 100644
index 8e0538adf..000000000
--- a/grub-core/lib/libgcrypt/mpi/m68k/Manifest
+++ /dev/null
@@ -1,25 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-syntax.h
-mpih-lshift.S
-mpih-rshift.S
-mpih-add1.S
-mpih-sub1.S
-$names$ 
iQCVAwUAP+LmTDEAnp832S/7AQJHUAP/dxfq2U0pDc5ZLoEizoqgjjcnHIyb9EjMG3YjvgK6jQ62yoAOCuo/jFYlJS+Mdve6bgfdTzYMrnKV7BG2SEcwb263pVnIntS7ZhKQPiMCbFgXWR2VjN3+a1v8yjQDZtgqEgm8OlQ+u7jKBY13Oryiuq5nPNxsXZqJpelG6Zkdg9M==PIee
diff --git a/grub-core/lib/libgcrypt/mpi/m68k/distfiles 
b/grub-core/lib/libgcrypt/mpi/m68k/distfiles
index 1e2e36f05..4c0967b84 100644
--- a/grub-core/lib/libgcrypt/mpi/m68k/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/m68k/distfiles
@@ -1,4 +1,3 @@
-Manifest
 syntax.h
 mpih-lshift.S
 mpih-rshift.S
diff --git a/grub-core/lib/libgcrypt/mpi/m68k/mc68020/Manifest 
b/grub-core/lib/libgcrypt/mpi/m68k/mc68020/Manifest
deleted file mode 100644
index bcb27681e..000000000
--- a/grub-core/lib/libgcrypt/mpi/m68k/mc68020/Manifest
+++ /dev/null
@@ -1,23 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-mul1.S
-mpih-mul2.S
-mpih-mul3.S
-$names$ 
iQCVAwUAP+LmRTEAnp832S/7AQK3rwP/TyGBbii5HCrjDiLCVJHiDNeOdENx6AicRXnu4vuJmMmPZ0y+i7MPusDaeTbIUA0w6RaJx+Ep41nIvthmNDnFePY5Mw0pIUJcpI7AJR4vYqpwNQA6nlEdn/m1jg6sPLKZXUXNUkhroEzcHzoU+12BPS+nvSXlwSksg6rXEGOJ+Ms==XCXP
diff --git a/grub-core/lib/libgcrypt/mpi/m68k/mc68020/distfiles 
b/grub-core/lib/libgcrypt/mpi/m68k/mc68020/distfiles
index 6b96433af..fc7df9fa3 100644
--- a/grub-core/lib/libgcrypt/mpi/m68k/mc68020/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/m68k/mc68020/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-mul1.S
 mpih-mul2.S
 mpih-mul3.S
diff --git a/grub-core/lib/libgcrypt/mpi/m68k/syntax.h 
b/grub-core/lib/libgcrypt/mpi/m68k/syntax.h
index 6a3fea130..e27de98b4 100644
--- a/grub-core/lib/libgcrypt/mpi/m68k/syntax.h
+++ b/grub-core/lib/libgcrypt/mpi/m68k/syntax.h
@@ -2,7 +2,7 @@
  *
  *      Copyright (C) 1992, 1994, 1996, 1998,
  *                    2001, 2002 Free Software Foundation, Inc.
- *
+ *       
  * This file is part of Libgcrypt.
  *
  * Libgcrypt is free software; you can redistribute it and/or modify
diff --git a/grub-core/lib/libgcrypt/mpi/mips3/Manifest 
b/grub-core/lib/libgcrypt/mpi/mips3/Manifest
deleted file mode 100644
index e191184f3..000000000
--- a/grub-core/lib/libgcrypt/mpi/mips3/Manifest
+++ /dev/null
@@ -1,28 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-add1.S
-mpih-sub1.S
-mpih-mul1.S
-mpih-mul2.S
-mpih-mul3.S
-mpih-lshift.S
-mpih-rshift.S
-mpi-asm-defs.h
-$names$ 
iQCVAwUAP+LmUTEAnp832S/7AQLm/gP/RHR2aLMwHPxsq0mGO5H0kneVn8a9l9yDNEZBefkYcOJMb7MZGKxbGspyENiU04Mc2TFnA1wS9gjNHlRWtUYxxn/wyuV6BIRgfstXt2nXGgEQrK07GIz8ETFcYqcxu7JKiICIuXZgnIgdwBJswbBV1zaMUDXeg5B8vkkEeRWj8hQ==IQVO
diff --git a/grub-core/lib/libgcrypt/mpi/mips3/README 
b/grub-core/lib/libgcrypt/mpi/mips3/README
index e94b2c746..4ba4546d9 100644
--- a/grub-core/lib/libgcrypt/mpi/mips3/README
+++ b/grub-core/lib/libgcrypt/mpi/mips3/README
@@ -9,7 +9,7 @@ RELEVANT OPTIMIZATION ISSUES
 
    On the R4600, branches takes a single cycle
 
-   On the R8000, branches often take no noticable cycles, as they are
+   On the R8000, branches often take no noticeable cycles, as they are
    executed in a separate function unit..
 
 2. The R4000 and R4400 have a load latency of 4 cycles.
diff --git a/grub-core/lib/libgcrypt/mpi/mips3/distfiles 
b/grub-core/lib/libgcrypt/mpi/mips3/distfiles
index ef9b6fef3..85260fc8e 100644
--- a/grub-core/lib/libgcrypt/mpi/mips3/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/mips3/distfiles
@@ -1,4 +1,3 @@
-Manifest
 README
 mpih-add1.S
 mpih-sub1.S
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-add.c 
b/grub-core/lib/libgcrypt/mpi/mpi-add.c
index 98abc5650..38dd352f8 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-add.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-add.c
@@ -36,7 +36,7 @@
  * result in W. U and V may be the same.
  */
 void
-gcry_mpi_add_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v )
+_gcry_mpi_add_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v )
 {
     mpi_ptr_t wp, up;
     mpi_size_t usize, wsize;
@@ -85,7 +85,7 @@ gcry_mpi_add_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v )
 
 
 void
-gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
+_gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
 {
     mpi_ptr_t wp, up, vp;
     mpi_size_t usize, vsize, wsize;
@@ -162,7 +162,7 @@ gcry_mpi_add(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
  * result in W.
  */
 void
-gcry_mpi_sub_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v )
+_gcry_mpi_sub_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v )
 {
     mpi_ptr_t wp, up;
     mpi_size_t usize, wsize;
@@ -191,6 +191,7 @@ gcry_mpi_sub_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long v 
)
        cy = _gcry_mpih_add_1(wp, up, usize, v);
        wp[usize] = cy;
        wsize = usize + cy;
+       wsign = 1;
     }
     else {  /* The signs are different.  Need exact comparison to determine
             * which operand to subtract from which.  */
@@ -211,25 +212,25 @@ gcry_mpi_sub_ui(gcry_mpi_t w, gcry_mpi_t u, unsigned long 
v )
 }
 
 void
-gcry_mpi_sub(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
+_gcry_mpi_sub(gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
 {
   gcry_mpi_t vv = mpi_copy (v);
   vv->sign = ! vv->sign;
-  gcry_mpi_add (w, u, vv);
+  mpi_add (w, u, vv);
   mpi_free (vv);
 }
 
 
 void
-gcry_mpi_addm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
+_gcry_mpi_addm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
 {
-    gcry_mpi_add(w, u, v);
-    _gcry_mpi_fdiv_r( w, w, m );
+  mpi_add (w, u, v);
+  mpi_mod (w, w, m);
 }
 
 void
-gcry_mpi_subm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
+_gcry_mpi_subm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
 {
-    gcry_mpi_sub(w, u, v);
-    _gcry_mpi_fdiv_r( w, w, m );
+  mpi_sub (w, u, v);
+  mpi_mod (w, w, m);
 }
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-bit.c 
b/grub-core/lib/libgcrypt/mpi/mpi-bit.c
index cdc6b0b33..e2170401e 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-bit.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-bit.c
@@ -1,5 +1,6 @@
 /* mpi-bit.c  -  MPI bit level functions
  * Copyright (C) 1998, 1999, 2001, 2002, 2006 Free Software Foundation, Inc.
+ * Copyright (C) 2013  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -67,7 +68,7 @@ _gcry_mpi_normalize( gcry_mpi_t a )
  * Return the number of bits in A.
  */
 unsigned int
-gcry_mpi_get_nbits( gcry_mpi_t a )
+_gcry_mpi_get_nbits (gcry_mpi_t a)
 {
     unsigned n;
 
@@ -94,7 +95,7 @@ gcry_mpi_get_nbits( gcry_mpi_t a )
  * Test whether bit N is set.
  */
 int
-gcry_mpi_test_bit( gcry_mpi_t a, unsigned int n )
+_gcry_mpi_test_bit( gcry_mpi_t a, unsigned int n )
 {
     unsigned int limbno, bitno;
     mpi_limb_t limb;
@@ -113,15 +114,23 @@ gcry_mpi_test_bit( gcry_mpi_t a, unsigned int n )
  * Set bit N of A.
  */
 void
-gcry_mpi_set_bit( gcry_mpi_t a, unsigned int n )
+_gcry_mpi_set_bit( gcry_mpi_t a, unsigned int n )
 {
-  unsigned int limbno, bitno;
+  unsigned int i, limbno, bitno;
+
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
 
   limbno = n / BITS_PER_MPI_LIMB;
   bitno  = n % BITS_PER_MPI_LIMB;
 
   if ( limbno >= a->nlimbs )
     {
+      for (i=a->nlimbs; i < a->alloced; i++)
+        a->d[i] = 0;
       mpi_resize (a, limbno+1 );
       a->nlimbs = limbno+1;
     }
@@ -132,15 +141,23 @@ gcry_mpi_set_bit( gcry_mpi_t a, unsigned int n )
  * Set bit N of A. and clear all bits above
  */
 void
-gcry_mpi_set_highbit( gcry_mpi_t a, unsigned int n )
+_gcry_mpi_set_highbit( gcry_mpi_t a, unsigned int n )
 {
-  unsigned int limbno, bitno;
+  unsigned int i, limbno, bitno;
+
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
 
   limbno = n / BITS_PER_MPI_LIMB;
   bitno  = n % BITS_PER_MPI_LIMB;
 
   if ( limbno >= a->nlimbs )
     {
+      for (i=a->nlimbs; i < a->alloced; i++)
+        a->d[i] = 0;
       mpi_resize (a, limbno+1 );
       a->nlimbs = limbno+1;
     }
@@ -154,36 +171,47 @@ gcry_mpi_set_highbit( gcry_mpi_t a, unsigned int n )
  * clear bit N of A and all bits above
  */
 void
-gcry_mpi_clear_highbit( gcry_mpi_t a, unsigned int n )
+_gcry_mpi_clear_highbit( gcry_mpi_t a, unsigned int n )
 {
-    unsigned int limbno, bitno;
+  unsigned int limbno, bitno;
 
-    limbno = n / BITS_PER_MPI_LIMB;
-    bitno  = n % BITS_PER_MPI_LIMB;
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
 
-    if( limbno >= a->nlimbs )
-       return; /* not allocated, therefore no need to clear bits
-                  :-) */
+  limbno = n / BITS_PER_MPI_LIMB;
+  bitno  = n % BITS_PER_MPI_LIMB;
 
-    for( ; bitno < BITS_PER_MPI_LIMB; bitno++ )
-       a->d[limbno] &= ~(A_LIMB_1 << bitno);
-    a->nlimbs = limbno+1;
+  if( limbno >= a->nlimbs )
+    return; /* not allocated, therefore no need to clear bits :-) */
+
+  for( ; bitno < BITS_PER_MPI_LIMB; bitno++ )
+    a->d[limbno] &= ~(A_LIMB_1 << bitno);
+  a->nlimbs = limbno+1;
 }
 
 /****************
  * Clear bit N of A.
  */
 void
-gcry_mpi_clear_bit( gcry_mpi_t a, unsigned int n )
+_gcry_mpi_clear_bit( gcry_mpi_t a, unsigned int n )
 {
-    unsigned int limbno, bitno;
+  unsigned int limbno, bitno;
 
-    limbno = n / BITS_PER_MPI_LIMB;
-    bitno  = n % BITS_PER_MPI_LIMB;
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
 
-    if( limbno >= a->nlimbs )
-       return; /* don't need to clear this bit, it's to far to left */
-    a->d[limbno] &= ~(A_LIMB_1 << bitno);
+  limbno = n / BITS_PER_MPI_LIMB;
+  bitno  = n % BITS_PER_MPI_LIMB;
+
+  if (limbno >= a->nlimbs)
+    return; /* Don't need to clear this bit, it's far too left.  */
+  a->d[limbno] &= ~(A_LIMB_1 << bitno);
 }
 
 
@@ -194,19 +222,26 @@ gcry_mpi_clear_bit( gcry_mpi_t a, unsigned int n )
 void
 _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count )
 {
-    mpi_ptr_t ap = a->d;
-    mpi_size_t n = a->nlimbs;
-    unsigned int i;
+  mpi_ptr_t ap = a->d;
+  mpi_size_t n = a->nlimbs;
+  unsigned int i;
 
-    if( count >= n ) {
-       a->nlimbs = 0;
-       return;
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
     }
 
-    for( i = 0; i < n - count; i++ )
-       ap[i] = ap[i+count];
-    ap[i] = 0;
-    a->nlimbs -= count;
+  if (count >= n)
+    {
+      a->nlimbs = 0;
+      return;
+    }
+
+  for( i = 0; i < n - count; i++ )
+    ap[i] = ap[i+count];
+  ap[i] = 0;
+  a->nlimbs -= count;
 }
 
 
@@ -214,13 +249,19 @@ _gcry_mpi_rshift_limbs( gcry_mpi_t a, unsigned int count )
  * Shift A by N bits to the right.
  */
 void
-gcry_mpi_rshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n )
+_gcry_mpi_rshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n )
 {
   mpi_size_t xsize;
   unsigned int i;
   unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
   unsigned int nbits = (n%BITS_PER_MPI_LIMB);
 
+  if (mpi_is_immutable (x))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
+
   if ( x == a )
     {
       /* In-place operation.  */
@@ -323,11 +364,17 @@ _gcry_mpi_lshift_limbs (gcry_mpi_t a, unsigned int count)
  * Shift A by N bits to the left.
  */
 void
-gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n )
+_gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int n )
 {
   unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
   unsigned int nbits = (n%BITS_PER_MPI_LIMB);
 
+  if (mpi_is_immutable (x))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
+
   if (x == a && !n)
     return;  /* In-place shift with an amount of zero.  */
 
@@ -357,7 +404,7 @@ gcry_mpi_lshift ( gcry_mpi_t x, gcry_mpi_t a, unsigned int 
n )
       /* We use a very dump approach: Shift left by the number of
          limbs plus one and than fix it up by an rshift.  */
       _gcry_mpi_lshift_limbs (x, nlimbs+1);
-      gcry_mpi_rshift (x, x, BITS_PER_MPI_LIMB - nbits);
+      mpi_rshift (x, x, BITS_PER_MPI_LIMB - nbits);
     }
 
   MPN_NORMALIZE (x->d, x->nlimbs);
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-cmp.c 
b/grub-core/lib/libgcrypt/mpi/mpi-cmp.c
index 30e1fce93..8927fa0ec 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-cmp.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-cmp.c
@@ -24,7 +24,7 @@
 #include "mpi-internal.h"
 
 int
-gcry_mpi_cmp_ui (gcry_mpi_t u, unsigned long v)
+_gcry_mpi_cmp_ui (gcry_mpi_t u, unsigned long v)
 {
   mpi_limb_t limb = v;
 
@@ -54,15 +54,19 @@ gcry_mpi_cmp_ui (gcry_mpi_t u, unsigned long v)
 }
 
 
-int
-gcry_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v)
+/* Helper for _gcry_mpi_cmp and _gcry_mpi_cmpabs.  */
+static int
+do_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v, int absmode)
 {
   mpi_size_t usize;
   mpi_size_t vsize;
+  int usign;
+  int vsign;
   int cmp;
 
   if (mpi_is_opaque (u) || mpi_is_opaque (v))
     {
+      /* We have no signan and thus ABSMODE has no efeect here.  */
       if (mpi_is_opaque (u) && !mpi_is_opaque (v))
         return -1;
       if (!mpi_is_opaque (u) && mpi_is_opaque (v))
@@ -82,26 +86,45 @@ gcry_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v)
 
       usize = u->nlimbs;
       vsize = v->nlimbs;
+      usign = absmode? 0 : u->sign;
+      vsign = absmode? 0 : v->sign;
 
-      /* Compare sign bits.  */
+      /* Special treatment for +0 == -0 */
+      if (!usize && !vsize)
+        return 0;
 
-      if (!u->sign && v->sign)
+      /* Compare sign bits.  */
+      if (!usign && vsign)
         return 1;
-      if (u->sign && !v->sign)
+      if (usign && !vsign)
         return -1;
 
       /* U and V are either both positive or both negative.  */
 
-      if (usize != vsize && !u->sign && !v->sign)
+      if (usize != vsize && !usign && !vsign)
         return usize - vsize;
-      if (usize != vsize && u->sign && v->sign)
+      if (usize != vsize && usign && vsign)
         return vsize + usize;
       if (!usize )
         return 0;
       if (!(cmp = _gcry_mpih_cmp (u->d, v->d, usize)))
         return 0;
-      if ((cmp < 0?1:0) == (u->sign?1:0))
+      if ((cmp < 0?1:0) == (usign?1:0))
         return 1;
     }
   return -1;
 }
+
+
+int
+_gcry_mpi_cmp (gcry_mpi_t u, gcry_mpi_t v)
+{
+  return do_mpi_cmp (u, v, 0);
+}
+
+/* Compare only the absolute values.  */
+int
+_gcry_mpi_cmpabs (gcry_mpi_t u, gcry_mpi_t v)
+{
+  return do_mpi_cmp (u, v, 1);
+}
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-div.c 
b/grub-core/lib/libgcrypt/mpi/mpi-div.c
index a6ee3006e..166ab8751 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-div.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-div.c
@@ -50,7 +50,7 @@ _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, 
gcry_mpi_t divisor )
     _gcry_mpi_tdiv_r( rem, dividend, divisor );
 
     if( ((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs )
-       gcry_mpi_add( rem, rem, divisor);
+       mpi_add (rem, rem, divisor);
 
     if( temp_divisor )
        mpi_free(temp_divisor);
@@ -64,8 +64,9 @@ _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, 
gcry_mpi_t divisor )
  * rem is optional
  */
 
-ulong
-_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor )
+unsigned long
+_gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend,
+                     unsigned long divisor )
 {
     mpi_limb_t rlimb;
 
@@ -103,8 +104,8 @@ _gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, 
gcry_mpi_t dividend, gcry_mp
     _gcry_mpi_tdiv_qr( quot, rem, dividend, divisor );
 
     if( (divisor_sign ^ dividend->sign) && rem->nlimbs ) {
-       gcry_mpi_sub_ui( quot, quot, 1 );
-       gcry_mpi_add( rem, rem, divisor);
+       mpi_sub_ui( quot, quot, 1 );
+       mpi_add( rem, rem, divisor);
     }
 
     if( temp_divisor )
@@ -166,6 +167,9 @@ _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, 
gcry_mpi_t num, gcry_mpi_t d
     if( quot )
        mpi_resize( quot, qsize);
 
+    if (!dsize)
+      _gcry_divide_by_zero();
+
     /* Read pointers here, when reallocation is finished.  */
     np = num->d;
     dp = den->d;
@@ -321,14 +325,15 @@ _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, 
unsigned int count )
  * (note: divisor must fit into a limb)
  */
 int
-_gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor )
+_gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor )
 {
     return !_gcry_mpih_mod_1( dividend->d, dividend->nlimbs, divisor );
 }
 
 
 void
-gcry_mpi_div (gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t 
divisor, int round)
+_gcry_mpi_div (gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend,
+               gcry_mpi_t divisor, int round)
 {
   if (!round)
     {
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-gcd.c 
b/grub-core/lib/libgcrypt/mpi/mpi-gcd.c
index 5cbefa121..77ca05a6f 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-gcd.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-gcd.c
@@ -28,7 +28,7 @@
  * Return: true if this 1, false in all other cases
  */
 int
-gcry_mpi_gcd( gcry_mpi_t g, gcry_mpi_t xa, gcry_mpi_t xb )
+_gcry_mpi_gcd (gcry_mpi_t g, gcry_mpi_t xa, gcry_mpi_t xb)
 {
     gcry_mpi_t a, b;
 
@@ -38,14 +38,15 @@ gcry_mpi_gcd( gcry_mpi_t g, gcry_mpi_t xa, gcry_mpi_t xb )
     /* TAOCP Vol II, 4.5.2, Algorithm A */
     a->sign = 0;
     b->sign = 0;
-    while( gcry_mpi_cmp_ui( b, 0 ) ) {
-       _gcry_mpi_fdiv_r( g, a, b ); /* g used as temorary variable */
+    while (mpi_cmp_ui (b, 0))
+      {
+       _gcry_mpi_fdiv_r( g, a, b ); /* G is used as temporary variable. */
        mpi_set(a,b);
        mpi_set(b,g);
-    }
+      }
     mpi_set(g, a);
 
     mpi_free(a);
     mpi_free(b);
-    return !gcry_mpi_cmp_ui( g, 1);
+    return !mpi_cmp_ui( g, 1);
 }
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-internal.h 
b/grub-core/lib/libgcrypt/mpi/mpi-internal.h
index e75b7c6d7..58dc503a8 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-internal.h
+++ b/grub-core/lib/libgcrypt/mpi/mpi-internal.h
@@ -50,6 +50,7 @@
 #endif /*BITS_PER_MPI_LIMB*/
 
 #include "mpi.h"
+#include "const-time.h"
 
 /* If KARATSUBA_THRESHOLD is not already defined, define it to a
  * value which is good on most machines.  */
@@ -79,6 +80,11 @@ typedef int mpi_size_t;        /* (must be a signed type) */
        if( (a)->alloced < (b) )   \
            mpi_resize((a), (b));  \
     } while(0)
+#define RESIZE_AND_CLEAR_IF_NEEDED(a,b) \
+    do {                          \
+       if( (a)->nlimbs < (b) )   \
+           mpi_resize((a), (b));  \
+    } while(0)
 
 /* Copy N limbs from S to D.  */
 #define MPN_COPY( d, s, n) \
@@ -92,7 +98,7 @@ typedef int mpi_size_t;        /* (must be a signed type) */
     do {                               \
        mpi_size_t _i;                  \
        for( _i = 0; _i < (n); _i++ )   \
-           (d)[_i] = (d)[_i];          \
+           (d)[_i] = (s)[_i];          \
     } while (0)
 
 #define MPN_COPY_DECR( d, s, n ) \
@@ -145,7 +151,8 @@ typedef int mpi_size_t;        /* (must be a signed type) */
  */
 #define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
     do {                                                           \
-       mpi_limb_t _q, _ql, _r;                                     \
+        mpi_limb_t _ql GCC_ATTR_UNUSED;                               \
+       mpi_limb_t _q, _r;                                          \
        mpi_limb_t _xh, _xl;                                        \
        umul_ppmm (_q, _ql, (nh), (di));                            \
        _q += (nh);     /* DI is 2**BITS_PER_MPI_LIMB too small */  \
@@ -253,6 +260,49 @@ mpi_limb_t _gcry_mpih_lshift( mpi_ptr_t wp, mpi_ptr_t up, 
mpi_size_t usize,
 mpi_limb_t _gcry_mpih_rshift( mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
                                                           unsigned cnt);
 
+/*-- mpih-const-time.c --*/
+#define mpih_set_cond(w,u,s,o) _gcry_mpih_set_cond ((w),(u),(s),(o))
+#define mpih_add_n_cond(w,u,v,s,o) _gcry_mpih_add_n_cond ((w),(u),(v),(s),(o))
+#define mpih_sub_n_cond(w,u,v,s,o) _gcry_mpih_sub_n_cond ((w),(u),(v),(s),(o))
+#define mpih_swap_cond(u,v,s,o) _gcry_mpih_swap_cond ((u),(v),(s),(o))
+#define mpih_abs_cond(w,u,s,o) _gcry_mpih_abs_cond ((w),(u),(s),(o))
+#define mpih_mod(v,vs,u,us) _gcry_mpih_mod ((v),(vs),(u),(us))
+
+DEFINE_CT_TYPE_GEN_MASK(limb, mpi_limb_t)
+DEFINE_CT_TYPE_GEN_INV_MASK(limb, mpi_limb_t)
+
+static inline int
+mpih_limb_is_zero (mpi_limb_t a)
+{
+  /* Sign bit set if A == 0. */
+  a = ~a & ~(-a);
+
+  return a >> (BITS_PER_MPI_LIMB - 1);
+}
+
+static inline int
+mpih_limb_is_not_zero (mpi_limb_t a)
+{
+  /* Sign bit set if A != 0. */
+  a = a | (-a);
+
+  return a >> (BITS_PER_MPI_LIMB - 1);
+}
+
+void _gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                          unsigned long op_enable);
+mpi_limb_t _gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
+                                  mpi_size_t usize, unsigned long op_enable);
+mpi_limb_t _gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
+                                  mpi_size_t usize, unsigned long op_enable);
+void _gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize,
+                           unsigned long op_enable);
+void _gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up,
+                          mpi_size_t usize, unsigned long op_enable);
+mpi_ptr_t _gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize,
+                          mpi_ptr_t up, mpi_size_t usize);
+int _gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v);
+
 
 /* Define stuff for longlong.h.  */
 #define W_TYPE_SIZE BITS_PER_MPI_LIMB
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-inv.c 
b/grub-core/lib/libgcrypt/mpi/mpi-inv.c
index 5d269466e..7ce874666 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-inv.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-inv.c
@@ -23,17 +23,167 @@
 #include "mpi-internal.h"
 #include "g10lib.h"
 
+/*
+ * This uses a modular inversion algorithm designed by Niels Möller
+ * which was implemented in Nettle.  The same algorithm was later also
+ * adapted to GMP in mpn_sec_invert.
+ *
+ * For the description of the algorithm, see Algorithm 5 in Appendix A
+ * of "Fast Software Polynomial Multiplication on ARM Processors using
+ * the NEON Engine" by Danilo Câmara, Conrado P. L. Gouvêa, Julio
+ * López, and Ricardo Dahab:
+ *   https://hal.inria.fr/hal-01506572/document
+ *
+ * Note that in the reference above, at the line 2 of Algorithm 5,
+ * initial value of V was described as V:=1 wrongly.  It must be V:=0.
+ */
+static mpi_ptr_t
+mpih_invm_odd (mpi_ptr_t ap, mpi_ptr_t np, mpi_size_t nsize)
+{
+  int secure;
+  unsigned int iterations;
+  mpi_ptr_t n1hp;
+  mpi_ptr_t bp;
+  mpi_ptr_t up, vp;
+
+  secure = _gcry_is_secure (ap);
+  up = mpi_alloc_limb_space (nsize, secure);
+  MPN_ZERO (up, nsize);
+  up[0] = 1;
+
+  vp = mpi_alloc_limb_space (nsize, secure);
+  MPN_ZERO (vp, nsize);
+
+  secure = _gcry_is_secure (np);
+  bp = mpi_alloc_limb_space (nsize, secure);
+  MPN_COPY (bp, np, nsize);
+
+  n1hp = mpi_alloc_limb_space (nsize, secure);
+  MPN_COPY (n1hp, np, nsize);
+  _gcry_mpih_rshift (n1hp, n1hp, nsize, 1);
+  _gcry_mpih_add_1 (n1hp, n1hp, nsize, 1);
+
+  iterations = 2 * nsize * BITS_PER_MPI_LIMB;
+
+  while (iterations-- > 0)
+    {
+      mpi_limb_t odd_a, odd_u, underflow, borrow;
+
+      odd_a = ap[0] & 1;
+
+      underflow = mpih_sub_n_cond (ap, ap, bp, nsize, odd_a);
+      mpih_add_n_cond (bp, bp, ap, nsize, underflow);
+      mpih_abs_cond (ap, ap, nsize, underflow);
+      mpih_swap_cond (up, vp, nsize, underflow);
+
+      _gcry_mpih_rshift (ap, ap, nsize, 1);
+
+      borrow = mpih_sub_n_cond (up, up, vp, nsize, odd_a);
+      mpih_add_n_cond (up, up, np, nsize, borrow);
+
+      odd_u = _gcry_mpih_rshift (up, up, nsize, 1) != 0;
+      mpih_add_n_cond (up, up, n1hp, nsize, odd_u);
+    }
+
+  _gcry_mpi_free_limb_space (n1hp, nsize);
+  _gcry_mpi_free_limb_space (up, nsize);
+
+  if (_gcry_mpih_cmp_ui (bp, nsize, 1) == 0)
+    {
+      /* Inverse exists.  */
+      _gcry_mpi_free_limb_space (bp, nsize);
+      return vp;
+    }
+  else
+    {
+      _gcry_mpi_free_limb_space (bp, nsize);
+      _gcry_mpi_free_limb_space (vp, nsize);
+      return NULL;
+    }
+}
+
+
+/*
+ * Calculate the multiplicative inverse X of A mod 2^K
+ * A must be positive.
+ *
+ * See section 7 in "A New Algorithm for Inversion mod p^k" by Çetin
+ * Kaya Koç: https://eprint.iacr.org/2017/411.pdf
+ */
+static mpi_ptr_t
+mpih_invm_pow2 (mpi_ptr_t ap, mpi_size_t asize, unsigned int k)
+{
+  int secure = _gcry_is_secure (ap);
+  mpi_size_t i;
+  unsigned int iterations;
+  mpi_ptr_t xp, wp, up, vp;
+  mpi_size_t usize;
+
+  if (!(ap[0] & 1))
+    return NULL;
+
+  iterations = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB)
+    * BITS_PER_MPI_LIMB;
+  usize = iterations / BITS_PER_MPI_LIMB;
+
+  up = mpi_alloc_limb_space (usize, secure);
+  MPN_ZERO (up, usize);
+  up[0] = 1;
+
+  vp = mpi_alloc_limb_space (usize, secure);
+  for (i = 0; i < (usize < asize ? usize : asize); i++)
+    vp[i] = ap[i];
+  for (; i < usize; i++)
+    vp[i] = 0;
+  if ((k % BITS_PER_MPI_LIMB))
+    for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
+      vp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
+
+  wp = mpi_alloc_limb_space (usize, secure);
+  MPN_COPY (wp, up, usize);
+
+  xp = mpi_alloc_limb_space (usize, secure);
+  MPN_ZERO (xp, usize);
+
+  /*
+   * It can be considered that overflow at _gcry_mpih_sub_n results
+   * adding 2^(USIZE*BITS_PER_MPI_LIMB), which is no problem in modulo
+   * 2^K computation.
+   */
+  for (i = 0; i < iterations; i++)
+    {
+      int b0 = (up[0] & 1);
+
+      xp[i/BITS_PER_MPI_LIMB] |= ((mpi_limb_t)b0<<(i%BITS_PER_MPI_LIMB));
+      _gcry_mpih_sub_n (wp, up, vp, usize);
+      mpih_set_cond (up, wp, usize, b0);
+      _gcry_mpih_rshift (up, up, usize, 1);
+    }
+
+  if ((k % BITS_PER_MPI_LIMB))
+    for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
+      xp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
+
+  _gcry_mpi_free_limb_space (up, usize);
+  _gcry_mpi_free_limb_space (vp, usize);
+  _gcry_mpi_free_limb_space (wp, usize);
+
+  return xp;
+}
+
+
 /****************
  * Calculate the multiplicative inverse X of A mod N
  * That is: Find the solution x for
  *             1 = (a*x) mod n
  */
-int
-gcry_mpi_invm( gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n )
+static int
+mpi_invm_generic (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
 {
+    int is_gcd_one;
 #if 0
+    /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X) */
     gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, q, t1, t2, t3;
-    gcry_mpi_t ta, tb, tc;
 
     u = mpi_copy(a);
     v = mpi_copy(n);
@@ -63,6 +213,8 @@ gcry_mpi_invm( gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n )
        log_mpidump("v2=", v2); */
     mpi_set(x, u1);
 
+    is_gcd_one = (mpi_cmp_ui (u3, 1) == 0);
+
     mpi_free(u1);
     mpi_free(u2);
     mpi_free(u3);
@@ -77,7 +229,11 @@ gcry_mpi_invm( gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n )
     mpi_free(v);
 #elif 0
     /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
-     * modified according to Michael Penk's solution for Exercise 35 */
+     * modified according to Michael Penk's solution for Exercise 35
+     * (in the first edition)
+     * In the third edition, it's Exercise 39, and it is described in
+     * page 646 of ANSWERS TO EXERCISES chapter.
+     */
 
     /* FIXME: we can simplify this in most cases (see Knuth) */
     gcry_mpi_t u, v, u1, u2, u3, v1, v2, v3, t1, t2, t3;
@@ -144,7 +300,8 @@ gcry_mpi_invm( gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n )
            mpi_sub(t2, t2, u);
        }
     } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
-    /* mpi_lshift( u3, k ); */
+    /* mpi_lshift( u3, u3, k ); */
+    is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
     mpi_set(x, u1);
 
     mpi_free(u1);
@@ -160,6 +317,10 @@ gcry_mpi_invm( gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n )
     /* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
      * modified according to Michael Penk's solution for Exercise 35
      * with further enhancement */
+    /* The reference in the comment above is for the first edition.
+     * In the third edition, it's Exercise 39, and it is described in
+     * page 646 of ANSWERS TO EXERCISES chapter.
+     */
     gcry_mpi_t u, v, u1, u2=NULL, u3, v1, v2=NULL, v3, t1, t2=NULL, t3;
     unsigned k;
     int sign;
@@ -245,7 +406,8 @@ gcry_mpi_invm( gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n )
                mpi_sub(t2, t2, u);
        }
     } while( mpi_cmp_ui( t3, 0 ) ); /* while t3 != 0 */
-    /* mpi_lshift( u3, k ); */
+    /* mpi_lshift( u3, u3, k ); */
+    is_gcd_one = (k == 0 && mpi_cmp_ui (u3, 1) == 0);
     mpi_set(x, u1);
 
     mpi_free(u1);
@@ -263,5 +425,141 @@ gcry_mpi_invm( gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n )
     mpi_free(u);
     mpi_free(v);
 #endif
-    return 1;
+    return is_gcd_one;
+}
+
+
+/*
+ * Set X to the multiplicative inverse of A mod M.  Return true if the
+ * inverse exists.
+ */
+int
+_gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t n)
+{
+  mpi_ptr_t ap, xp;
+
+  if (!mpi_cmp_ui (a, 0))
+    return 0; /* Inverse does not exists.  */
+  if (!mpi_cmp_ui (n, 1))
+    return 0; /* Inverse does not exists.  */
+
+  if (mpi_test_bit (n, 0))
+    {
+      if (a->nlimbs <= n->nlimbs)
+        {
+          ap = mpi_alloc_limb_space (n->nlimbs, _gcry_is_secure (a->d));
+          MPN_ZERO (ap, n->nlimbs);
+          MPN_COPY (ap, a->d, a->nlimbs);
+        }
+      else
+        ap = _gcry_mpih_mod (a->d, a->nlimbs, n->d, n->nlimbs);
+
+      xp = mpih_invm_odd (ap, n->d, n->nlimbs);
+      _gcry_mpi_free_limb_space (ap, n->nlimbs);
+
+      if (xp)
+        {
+          _gcry_mpi_assign_limb_space (x, xp, n->nlimbs);
+          x->nlimbs = n->nlimbs;
+          return 1;
+        }
+      else
+        return 0; /* Inverse does not exists.  */
+    }
+  else if (!a->sign && !n->sign)
+    {
+      unsigned int k = mpi_trailing_zeros (n);
+      mpi_size_t x1size = ((k + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB);
+      mpi_size_t hsize;
+      gcry_mpi_t q;
+      mpi_ptr_t x1p, x2p, q_invp, hp, diffp;
+      mpi_size_t i;
+
+      if (k == _gcry_mpi_get_nbits (n) - 1)
+        {
+          x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
+
+          if (x1p)
+            {
+              _gcry_mpi_assign_limb_space (x, x1p, x1size);
+              x->nlimbs = x1size;
+              return 1;
+            }
+          else
+            return 0; /* Inverse does not exists.  */
+        }
+
+      /* N can be expressed as P * Q, where P = 2^K.  P and Q are coprime.  */
+      /*
+       * Compute X1 = invm (A, P) and X2 = invm (A, Q), and combine
+       * them by Garner's formula, to get X = invm (A, P*Q).
+       * A special case of Chinese Remainder Theorem.
+       */
+
+      /* X1 = invm (A, P) */
+      x1p = mpih_invm_pow2 (a->d, a->nlimbs, k);
+      if (!x1p)
+        return 0;               /* Inverse does not exists.  */
+
+      /* Q = N / P          */
+      q = mpi_new (0);
+      mpi_rshift (q, n, k);
+
+      /* X2 = invm (A%Q, Q) */
+      ap = _gcry_mpih_mod (a->d, a->nlimbs, q->d, q->nlimbs);
+      x2p = mpih_invm_odd (ap, q->d, q->nlimbs);
+      _gcry_mpi_free_limb_space (ap, q->nlimbs);
+      if (!x2p)
+        {
+          _gcry_mpi_free_limb_space (x1p, x1size);
+          mpi_free (q);
+          return 0;             /* Inverse does not exists.  */
+        }
+
+      /* Q_inv = Q^(-1) = invm (Q, P) */
+      q_invp = mpih_invm_pow2 (q->d, q->nlimbs, k);
+
+      /* H = (X1 - X2) * Q_inv % P */
+      diffp = mpi_alloc_limb_space (x1size, _gcry_is_secure (a->d));
+      if (x1size >= q->nlimbs)
+        _gcry_mpih_sub (diffp, x1p, x1size, x2p, q->nlimbs);
+      else
+       _gcry_mpih_sub_n (diffp, x1p, x2p, x1size);
+      _gcry_mpi_free_limb_space (x1p, x1size);
+      if ((k % BITS_PER_MPI_LIMB))
+        for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
+          diffp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
+
+      hsize = x1size * 2;
+      hp = mpi_alloc_limb_space (hsize, _gcry_is_secure (a->d));
+      _gcry_mpih_mul_n (hp, diffp, q_invp, x1size);
+      _gcry_mpi_free_limb_space (diffp, x1size);
+      _gcry_mpi_free_limb_space (q_invp, x1size);
+
+      for (i = x1size; i < hsize; i++)
+        hp[i] = 0;
+      if ((k % BITS_PER_MPI_LIMB))
+        for (i = k % BITS_PER_MPI_LIMB; i < BITS_PER_MPI_LIMB; i++)
+          hp[k/BITS_PER_MPI_LIMB] &= ~(((mpi_limb_t)1) << i);
+
+      xp = mpi_alloc_limb_space (x1size + q->nlimbs, _gcry_is_secure (a->d));
+      if (x1size >= q->nlimbs)
+        _gcry_mpih_mul (xp, hp, x1size, q->d, q->nlimbs);
+      else
+        _gcry_mpih_mul (xp, q->d, q->nlimbs, hp, x1size);
+
+      _gcry_mpi_free_limb_space (hp, hsize);
+
+      _gcry_mpih_add (xp, xp, x1size + q->nlimbs, x2p, q->nlimbs);
+      _gcry_mpi_free_limb_space (x2p, q->nlimbs);
+
+      _gcry_mpi_assign_limb_space (x, xp, x1size + q->nlimbs);
+      x->nlimbs = x1size + q->nlimbs;
+
+      mpi_free (q);
+
+      return 1;
+    }
+  else
+    return mpi_invm_generic (x, a, n);
 }
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-mod.c 
b/grub-core/lib/libgcrypt/mpi/mpi-mod.c
index 7ebfe6dca..88624720c 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-mod.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-mod.c
@@ -47,7 +47,6 @@ void
 _gcry_mpi_mod (gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t divisor)
 {
   _gcry_mpi_fdiv_r (rem, dividend, divisor);
-  rem->sign = 0;
 }
 
 
@@ -63,7 +62,7 @@ _gcry_mpi_barrett_init (gcry_mpi_t m, int copy)
   gcry_mpi_t tmp;
 
   mpi_normalize (m);
-  ctx = gcry_xcalloc (1, sizeof *ctx);
+  ctx = xcalloc (1, sizeof *ctx);
 
   if (copy)
     {
@@ -100,7 +99,7 @@ _gcry_mpi_barrett_free (mpi_barrett_t ctx)
         mpi_free (ctx->r3);
       if (ctx->m_copied)
         mpi_free (ctx->m);
-      gcry_free (ctx);
+      xfree (ctx);
     }
 }
 
@@ -111,7 +110,7 @@ _gcry_mpi_barrett_free (mpi_barrett_t ctx)
    _gcry_mpi_barrett_init must have been called to do the
    precalculations.  CTX is the context created by this precalculation
    and also conveys M.  If the Barret reduction could no be done a
-   starightforward reduction method is used.
+   straightforward reduction method is used.
 
    We assume that these conditions are met:
    Input:  x =(x_2k-1 ...x_0)_b
@@ -126,6 +125,7 @@ _gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, 
mpi_barrett_t ctx)
   gcry_mpi_t y = ctx->y;
   gcry_mpi_t r1 = ctx->r1;
   gcry_mpi_t r2 = ctx->r2;
+  int sign;
 
   mpi_normalize (x);
   if (mpi_get_nlimbs (x) > 2*k )
@@ -134,6 +134,9 @@ _gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, 
mpi_barrett_t ctx)
       return;
     }
 
+  sign = x->sign;
+  x->sign = 0;
+
   /* 1. q1 = floor( x / b^k-1)
    *    q2 = q1 * y
    *    q3 = floor( q2 / b^k+1 )
@@ -157,7 +160,7 @@ _gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, 
mpi_barrett_t ctx)
     r2->nlimbs = k+1;
   mpi_sub ( r, r1, r2 );
 
-  if ( mpi_is_neg( r ) )
+  if ( mpi_has_sign ( r ) )
     {
       if (!ctx->r3)
         {
@@ -172,6 +175,7 @@ _gcry_mpi_mod_barrett (gcry_mpi_t r, gcry_mpi_t x, 
mpi_barrett_t ctx)
   while ( mpi_cmp( r, m ) >= 0 )
     mpi_sub ( r, r, m );
 
+  x->sign = sign;
 }
 
 
@@ -179,6 +183,6 @@ void
 _gcry_mpi_mul_barrett (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v,
                        mpi_barrett_t ctx)
 {
-  gcry_mpi_mul (w, u, v);
+  mpi_mul (w, u, v);
   mpi_mod_barrett (w, w, ctx);
 }
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-mpow.c 
b/grub-core/lib/libgcrypt/mpi/mpi-mpow.c
index ca5b3f184..43bd641fb 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-mpow.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-mpow.c
@@ -39,7 +39,7 @@ static void barrett_mulm( gcry_mpi_t w, gcry_mpi_t u, 
gcry_mpi_t v, gcry_mpi_t m
 static gcry_mpi_t init_barrett( gcry_mpi_t m, int *k, gcry_mpi_t *r1, 
gcry_mpi_t *r2 );
 static int calc_barrett( gcry_mpi_t r, gcry_mpi_t x, gcry_mpi_t m, gcry_mpi_t 
y, int k, gcry_mpi_t r1, gcry_mpi_t r2  );
 #else
-#define barrett_mulm( w, u, v, m, y, k, r1, r2 ) gcry_mpi_mulm( (w), (u), (v), 
(m) )
+#define barrett_mulm( w, u, v, m, y, k, r1, r2 ) _gcry_mpi_mulm( (w), (u), 
(v), (m) )
 #endif
 
 
@@ -89,7 +89,7 @@ _gcry_mpi_mulpowm( gcry_mpi_t res, gcry_mpi_t *basearray, 
gcry_mpi_t *exparray,
     gcry_assert (t);
     gcry_assert (k < 10);
 
-    G = gcry_xcalloc( (1<<k) , sizeof *G );
+    G = xcalloc( (1<<k) , sizeof *G );
 #ifdef USE_BARRETT
     barrett_y = init_barrett( m, &barrett_k, &barrett_r1, &barrett_r2 );
 #endif
@@ -130,7 +130,7 @@ _gcry_mpi_mulpowm( gcry_mpi_t res, gcry_mpi_t *basearray, 
gcry_mpi_t *exparray,
 #endif
     for(i=0; i < (1<<k); i++ )
        mpi_free(G[i]);
-    gcry_free(G);
+    xfree(G);
 }
 
 
@@ -204,7 +204,7 @@ calc_barrett( gcry_mpi_t r, gcry_mpi_t x, gcry_mpi_t m, 
gcry_mpi_t y, int k, gcr
        r2->nlimbs = k+1;
     mpi_sub( r, r1, r2 );
 
-    if( mpi_is_neg( r ) ) {
+    if( mpi_has_sign (r) ) {
        gcry_mpi_t tmp;
 
        tmp = mpi_alloc( k + 2 );
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-mul.c 
b/grub-core/lib/libgcrypt/mpi/mpi-mul.c
index 9aefd217a..4f4d7096a 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-mul.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-mul.c
@@ -31,7 +31,7 @@
 
 
 void
-gcry_mpi_mul_ui( gcry_mpi_t prod, gcry_mpi_t mult, unsigned long small_mult )
+_gcry_mpi_mul_ui (gcry_mpi_t prod, gcry_mpi_t mult, unsigned long small_mult)
 {
     mpi_size_t size, prod_size;
     mpi_ptr_t  prod_ptr;
@@ -61,7 +61,7 @@ gcry_mpi_mul_ui( gcry_mpi_t prod, gcry_mpi_t mult, unsigned 
long small_mult )
 
 
 void
-gcry_mpi_mul_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned long cnt)
+_gcry_mpi_mul_2exp (gcry_mpi_t w, gcry_mpi_t u, unsigned long cnt)
 {
     mpi_size_t usize, wsize, limb_cnt;
     mpi_ptr_t wp;
@@ -107,7 +107,7 @@ gcry_mpi_mul_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned 
long cnt)
 
 
 void
-gcry_mpi_mul( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
+_gcry_mpi_mul (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
 {
     mpi_size_t usize, vsize, wsize;
     mpi_ptr_t up, vp, wp;
@@ -205,8 +205,8 @@ gcry_mpi_mul( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v)
 
 
 void
-gcry_mpi_mulm( gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
+_gcry_mpi_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m)
 {
-    gcry_mpi_mul(w, u, v);
-    _gcry_mpi_fdiv_r( w, w, m );
+  mpi_mul (w, u, v);
+  _gcry_mpi_tdiv_r (w, w, m);
 }
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-pow.c 
b/grub-core/lib/libgcrypt/mpi/mpi-pow.c
index 58643fed2..62b4a8083 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-pow.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-pow.c
@@ -34,12 +34,20 @@
 #include "longlong.h"
 
 
+/*
+ * When you need old implementation, please add compilation option
+ * -DUSE_ALGORITHM_SIMPLE_EXPONENTIATION
+ * or expose this line:
+#define USE_ALGORITHM_SIMPLE_EXPONENTIATION 1
+ */
+
+#if defined(USE_ALGORITHM_SIMPLE_EXPONENTIATION)
 /****************
  * RES = BASE ^ EXPO mod MOD
  */
 void
-gcry_mpi_powm (gcry_mpi_t res,
-               gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
+_gcry_mpi_powm (gcry_mpi_t res,
+                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
 {
   /* Pointer to the limbs of the arguments, their size and signs. */
   mpi_ptr_t  rp, ep, mp, bp;
@@ -75,9 +83,10 @@ gcry_mpi_powm (gcry_mpi_t res,
 
   rp = res->d;
   ep = expo->d;
+  MPN_NORMALIZE(ep, esize);
 
   if (!msize)
-    grub_fatal ("mpi division by zero");
+    _gcry_divide_by_zero();
 
   if (!esize)
     {
@@ -169,7 +178,7 @@ gcry_mpi_powm (gcry_mpi_t res,
     }
   MPN_COPY ( rp, bp, bsize );
   rsize = bsize;
-  rsign = bsign;
+  rsign = 0;
 
   /* Main processing.  */
   {
@@ -179,12 +188,18 @@ gcry_mpi_powm (gcry_mpi_t res,
     mpi_limb_t e;
     mpi_limb_t carry_limb;
     struct karatsuba_ctx karactx;
+    struct gcry_mpi w, u;
 
-    xp_nlimbs = msec? (2 * (msize + 1)):0;
-    xp = xp_marker = mpi_alloc_limb_space( 2 * (msize + 1), msec );
+    xp_nlimbs = msec? size:0;
+    xp = xp_marker = mpi_alloc_limb_space( size, msec );
+
+    w.sign = u.sign = 0;
+    w.flags = u.flags = 0;
+    w.alloced = w.nlimbs = size; /* RES->alloc may be longer.  */
+    u.alloced = u.nlimbs = size;
 
     memset( &karactx, 0, sizeof karactx );
-    negative_result = (ep[0] & 1) && base->sign;
+    negative_result = (ep[0] & 1) && bsign;
 
     i = esize - 1;
     e = ep[i];
@@ -258,11 +273,11 @@ gcry_mpi_powm (gcry_mpi_t res,
                     xsize = msize;
                   }
               }
-            if ( (mpi_limb_signed_t)e < 0 )
-              {
-                tp = rp; rp = xp; xp = tp;
-                rsize = xsize;
-              }
+
+            w.d = rp;
+            u.d = xp;
+            mpi_set_cond (&w, &u, ((mpi_limb_signed_t)e < 0));
+
             e <<= 1;
             c--;
           }
@@ -336,3 +351,422 @@ gcry_mpi_powm (gcry_mpi_t res,
   if (tspace)
     _gcry_mpi_free_limb_space( tspace, 0 );
 }
+#else
+/**
+ * Internal function to compute
+ *
+ *    X = R * S mod M
+ *
+ * and set the size of X at the pointer XSIZE_P.
+ * Use karatsuba structure at KARACTX_P.
+ *
+ * Condition:
+ *   RSIZE >= SSIZE
+ *   Enough space for X is allocated beforehand.
+ *
+ * For generic cases, we can/should use gcry_mpi_mulm.
+ * This function is use for specific internal case.
+ */
+static void
+mul_mod (mpi_ptr_t xp, mpi_size_t *xsize_p,
+         mpi_ptr_t rp, mpi_size_t rsize,
+         mpi_ptr_t sp, mpi_size_t ssize,
+         mpi_ptr_t mp, mpi_size_t msize,
+         struct karatsuba_ctx *karactx_p)
+{
+  if( ssize < KARATSUBA_THRESHOLD )
+    _gcry_mpih_mul ( xp, rp, rsize, sp, ssize );
+  else
+    _gcry_mpih_mul_karatsuba_case (xp, rp, rsize, sp, ssize, karactx_p);
+
+   if (rsize + ssize > msize)
+    {
+      _gcry_mpih_divrem (xp + msize, 0, xp, rsize + ssize, mp, msize);
+      *xsize_p = msize;
+    }
+   else
+     *xsize_p = rsize + ssize;
+}
+
+#define SIZE_PRECOMP ((1 << (5 - 1)))
+
+/****************
+ * RES = BASE ^ EXPO mod MOD
+ *
+ * To mitigate the Yarom/Falkner flush+reload cache side-channel
+ * attack on the RSA secret exponent, we don't use the square
+ * routine but multiplication.
+ *
+ * Reference:
+ *   Handbook of Applied Cryptography
+ *       Algorithm 14.83: Modified left-to-right k-ary exponentiation
+ */
+void
+_gcry_mpi_powm (gcry_mpi_t res,
+                gcry_mpi_t base, gcry_mpi_t expo, gcry_mpi_t mod)
+{
+  /* Pointer to the limbs of the arguments, their size and signs. */
+  mpi_ptr_t  rp, ep, mp, bp;
+  mpi_size_t esize, msize, bsize, rsize;
+  int               msign, bsign, rsign;
+  /* Flags telling the secure allocation status of the arguments.  */
+  int        esec,  msec,  bsec;
+  /* Size of the result including space for temporary values.  */
+  mpi_size_t size;
+  /* Helper.  */
+  int mod_shift_cnt;
+  int negative_result;
+  mpi_ptr_t mp_marker = NULL;
+  mpi_ptr_t bp_marker = NULL;
+  mpi_ptr_t ep_marker = NULL;
+  mpi_ptr_t xp_marker = NULL;
+  unsigned int mp_nlimbs = 0;
+  unsigned int bp_nlimbs = 0;
+  unsigned int ep_nlimbs = 0;
+  unsigned int xp_nlimbs = 0;
+  mpi_ptr_t precomp[SIZE_PRECOMP]; /* Pre-computed array: BASE^1, ^3, ^5, ... 
*/
+  mpi_size_t precomp_size[SIZE_PRECOMP];
+  mpi_size_t W;
+  mpi_ptr_t base_u;
+  mpi_size_t base_u_size;
+  mpi_size_t max_u_size;
+
+  esize = expo->nlimbs;
+  msize = mod->nlimbs;
+  size = 2 * msize;
+  msign = mod->sign;
+
+  ep = expo->d;
+  MPN_NORMALIZE(ep, esize);
+
+  if (esize * BITS_PER_MPI_LIMB > 512)
+    W = 5;
+  else if (esize * BITS_PER_MPI_LIMB > 256)
+    W = 4;
+  else if (esize * BITS_PER_MPI_LIMB > 128)
+    W = 3;
+  else if (esize * BITS_PER_MPI_LIMB > 64)
+    W = 2;
+  else
+    W = 1;
+
+  esec = mpi_is_secure(expo);
+  msec = mpi_is_secure(mod);
+  bsec = mpi_is_secure(base);
+
+  rp = res->d;
+
+  if (!msize)
+    _gcry_divide_by_zero();
+
+  if (!esize)
+    {
+      /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0 depending
+         on if MOD equals 1.  */
+      res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+      if (res->nlimbs)
+        {
+          RESIZE_IF_NEEDED (res, 1);
+          rp = res->d;
+          rp[0] = 1;
+        }
+      res->sign = 0;
+      goto leave;
+    }
+
+  /* Normalize MOD (i.e. make its most significant bit set) as
+     required by mpn_divrem.  This will make the intermediate values
+     in the calculation slightly larger, but the correct result is
+     obtained after a final reduction using the original MOD value. */
+  mp_nlimbs = msec? msize:0;
+  mp = mp_marker = mpi_alloc_limb_space(msize, msec);
+  count_leading_zeros (mod_shift_cnt, mod->d[msize-1]);
+  if (mod_shift_cnt)
+    _gcry_mpih_lshift (mp, mod->d, msize, mod_shift_cnt);
+  else
+    MPN_COPY( mp, mod->d, msize );
+
+  bsize = base->nlimbs;
+  bsign = base->sign;
+  if (bsize > msize)
+    {
+      /* The base is larger than the module.  Reduce it.
+
+         Allocate (BSIZE + 1) with space for remainder and quotient.
+         (The quotient is (bsize - msize + 1) limbs.)  */
+      bp_nlimbs = bsec ? (bsize + 1):0;
+      bp = bp_marker = mpi_alloc_limb_space( bsize + 1, bsec );
+      MPN_COPY ( bp, base->d, bsize );
+      /* We don't care about the quotient, store it above the
+       * remainder, at BP + MSIZE.  */
+      _gcry_mpih_divrem( bp + msize, 0, bp, bsize, mp, msize );
+      bsize = msize;
+      /* Canonicalize the base, since we are going to multiply with it
+         quite a few times.  */
+      MPN_NORMALIZE( bp, bsize );
+    }
+  else
+    bp = base->d;
+
+  if (!bsize)
+    {
+      res->nlimbs = 0;
+      res->sign = 0;
+      goto leave;
+    }
+
+
+  /* Make BASE, EXPO not overlap with RES.  We don't need to check MOD
+     because that has already been copied to the MP var.  */
+  if ( rp == bp )
+    {
+      /* RES and BASE are identical.  Allocate temp. space for BASE.  */
+      gcry_assert (!bp_marker);
+      bp_nlimbs = bsec? bsize:0;
+      bp = bp_marker = mpi_alloc_limb_space( bsize, bsec );
+      MPN_COPY(bp, rp, bsize);
+    }
+  if ( rp == ep )
+    {
+      /* RES and EXPO are identical.  Allocate temp. space for EXPO.  */
+      ep_nlimbs = esec? esize:0;
+      ep = ep_marker = mpi_alloc_limb_space( esize, esec );
+      MPN_COPY(ep, rp, esize);
+    }
+
+  /* Copy base to the result.  */
+  if (res->alloced < size)
+    {
+      mpi_resize (res, size);
+      rp = res->d;
+    }
+
+  /* Main processing.  */
+  {
+    mpi_size_t i, j, k;
+    mpi_ptr_t xp;
+    mpi_size_t xsize;
+    int c;
+    mpi_limb_t e;
+    mpi_limb_t carry_limb;
+    struct karatsuba_ctx karactx;
+    mpi_ptr_t tp;
+
+    xp_nlimbs = msec? size:0;
+    xp = xp_marker = mpi_alloc_limb_space( size, msec );
+
+    memset( &karactx, 0, sizeof karactx );
+    negative_result = (ep[0] & 1) && bsign;
+
+    /* Precompute PRECOMP[], BASE^(2 * i + 1), BASE^1, ^3, ^5, ... */
+    if (W > 1)                  /* X := BASE^2 */
+      mul_mod (xp, &xsize, bp, bsize, bp, bsize, mp, msize, &karactx);
+    base_u = precomp[0] = mpi_alloc_limb_space (bsize, esec);
+    base_u_size = max_u_size = precomp_size[0] = bsize;
+    MPN_COPY (precomp[0], bp, bsize);
+    for (i = 1; i < (1 << (W - 1)); i++)
+      {                         /* PRECOMP[i] = BASE^(2 * i + 1) */
+        if (xsize >= base_u_size)
+          mul_mod (rp, &rsize, xp, xsize, base_u, base_u_size,
+                   mp, msize, &karactx);
+        else
+          mul_mod (rp, &rsize, base_u, base_u_size, xp, xsize,
+                   mp, msize, &karactx);
+        base_u = precomp[i] = mpi_alloc_limb_space (rsize, esec);
+        base_u_size = precomp_size[i] = rsize;
+        if (max_u_size < base_u_size)
+          max_u_size = base_u_size;
+        MPN_COPY (precomp[i], rp, rsize);
+      }
+
+    if (msize > max_u_size)
+      max_u_size = msize;
+    base_u = mpi_alloc_limb_space (max_u_size, esec);
+    MPN_ZERO (base_u, max_u_size);
+
+    i = esize - 1;
+
+    /* Main loop.
+
+       Make the result be pointed to alternately by XP and RP.  This
+       helps us avoid block copying, which would otherwise be
+       necessary with the overlap restrictions of
+       _gcry_mpih_divmod. With 50% probability the result after this
+       loop will be in the area originally pointed by RP (==RES->d),
+       and with 50% probability in the area originally pointed to by XP. */
+    rsign = 0;
+    if (W == 1)
+      {
+        rsize = bsize;
+      }
+    else
+      {
+        rsize = msize;
+        MPN_ZERO (rp, rsize);
+      }
+    MPN_COPY ( rp, bp, bsize );
+
+    e = ep[i];
+    count_leading_zeros (c, e);
+    e = (e << c) << 1;
+    c = BITS_PER_MPI_LIMB - 1 - c;
+
+    j = 0;
+
+    for (;;)
+      if (e == 0)
+        {
+          j += c;
+          if ( --i < 0 )
+            break;
+
+          e = ep[i];
+          c = BITS_PER_MPI_LIMB;
+        }
+      else
+        {
+          int c0;
+          mpi_limb_t e0;
+          struct gcry_mpi w, u;
+          w.sign = u.sign = 0;
+          w.flags = u.flags = 0;
+          w.d = base_u;
+
+          count_leading_zeros (c0, e);
+          e = (e << c0);
+          c -= c0;
+          j += c0;
+
+          e0 = (e >> (BITS_PER_MPI_LIMB - W));
+          if (c >= W)
+            c0 = 0;
+          else
+            {
+              if ( --i < 0 )
+                {
+                  e0 = (e >> (BITS_PER_MPI_LIMB - c));
+                  j += c - W;
+                  goto last_step;
+                }
+              else
+                {
+                  c0 = c;
+                  e = ep[i];
+                  c = BITS_PER_MPI_LIMB;
+                  e0 |= (e >> (BITS_PER_MPI_LIMB - (W - c0)));
+                }
+            }
+
+          e = e << (W - c0);
+          c -= (W - c0);
+
+        last_step:
+          count_trailing_zeros (c0, e0);
+          e0 = (e0 >> c0) >> 1;
+
+          for (j += W - c0; j >= 0; j--)
+            {
+
+              /*
+               *  base_u <= precomp[e0]
+               *  base_u_size <= precomp_size[e0]
+               */
+              base_u_size = 0;
+              for (k = 0; k < (1<< (W - 1)); k++)
+                {
+                  w.alloced = w.nlimbs = precomp_size[k];
+                  u.alloced = u.nlimbs = precomp_size[k];
+                  u.d = precomp[k];
+
+                  mpi_set_cond (&w, &u, k == e0);
+                  base_u_size |= ( precomp_size[k] & (0UL - (k == e0)) );
+                }
+
+              w.alloced = w.nlimbs = rsize;
+              u.alloced = u.nlimbs = rsize;
+              u.d = rp;
+              mpi_set_cond (&w, &u, j != 0);
+              base_u_size ^= ((base_u_size ^ rsize)  & (0UL - (j != 0)));
+
+              mul_mod (xp, &xsize, rp, rsize, base_u, base_u_size,
+                       mp, msize, &karactx);
+              tp = rp; rp = xp; xp = tp;
+              rsize = xsize;
+            }
+
+          j = c0;
+          if ( i < 0 )
+            break;
+        }
+
+    while (j--)
+      {
+        mul_mod (xp, &xsize, rp, rsize, rp, rsize, mp, msize, &karactx);
+        tp = rp; rp = xp; xp = tp;
+        rsize = xsize;
+      }
+
+    /* We shifted MOD, the modulo reduction argument, left
+       MOD_SHIFT_CNT steps.  Adjust the result by reducing it with the
+       original MOD.
+
+       Also make sure the result is put in RES->d (where it already
+       might be, see above).  */
+    if ( mod_shift_cnt )
+      {
+        carry_limb = _gcry_mpih_lshift( res->d, rp, rsize, mod_shift_cnt);
+        rp = res->d;
+        if ( carry_limb )
+          {
+            rp[rsize] = carry_limb;
+            rsize++;
+          }
+      }
+    else if (res->d != rp)
+      {
+        MPN_COPY (res->d, rp, rsize);
+        rp = res->d;
+      }
+
+    if ( rsize >= msize )
+      {
+        _gcry_mpih_divrem(rp + msize, 0, rp, rsize, mp, msize);
+        rsize = msize;
+      }
+
+    /* Remove any leading zero words from the result.  */
+    if ( mod_shift_cnt )
+      _gcry_mpih_rshift( rp, rp, rsize, mod_shift_cnt);
+    MPN_NORMALIZE (rp, rsize);
+
+    _gcry_mpih_release_karatsuba_ctx (&karactx );
+    for (i = 0; i < (1 << (W - 1)); i++)
+      _gcry_mpi_free_limb_space( precomp[i], esec ? precomp_size[i] : 0 );
+    _gcry_mpi_free_limb_space (base_u, esec ? max_u_size : 0);
+  }
+
+  /* Fixup for negative results.  */
+  if ( negative_result && rsize )
+    {
+      if ( mod_shift_cnt )
+        _gcry_mpih_rshift( mp, mp, msize, mod_shift_cnt);
+      _gcry_mpih_sub( rp, mp, msize, rp, rsize);
+      rsize = msize;
+      rsign = msign;
+      MPN_NORMALIZE(rp, rsize);
+    }
+  gcry_assert (res->d == rp);
+  res->nlimbs = rsize;
+  res->sign = rsign;
+
+ leave:
+  if (mp_marker)
+    _gcry_mpi_free_limb_space( mp_marker, mp_nlimbs );
+  if (bp_marker)
+    _gcry_mpi_free_limb_space( bp_marker, bp_nlimbs );
+  if (ep_marker)
+    _gcry_mpi_free_limb_space( ep_marker, ep_nlimbs );
+  if (xp_marker)
+    _gcry_mpi_free_limb_space( xp_marker, xp_nlimbs );
+}
+#endif
diff --git a/grub-core/lib/libgcrypt/mpi/mpi-scan.c 
b/grub-core/lib/libgcrypt/mpi/mpi-scan.c
index 2473cd9b7..e27f7faa9 100644
--- a/grub-core/lib/libgcrypt/mpi/mpi-scan.c
+++ b/grub-core/lib/libgcrypt/mpi/mpi-scan.c
@@ -31,79 +31,79 @@
  *
  * FIXME: This code is VERY ugly!
  */
-int
-_gcry_mpi_getbyte( gcry_mpi_t a, unsigned idx )
-{
-    int i, j;
-    unsigned n;
-    mpi_ptr_t ap;
-    mpi_limb_t limb;
+/* int */
+/* _gcry_mpi_getbyte( gcry_mpi_t a, unsigned idx ) */
+/* { */
+/*     int i, j; */
+/*     unsigned n; */
+/*     mpi_ptr_t ap; */
+/*     mpi_limb_t limb; */
 
-    ap = a->d;
-    for(n=0,i=0; i < a->nlimbs; i++ ) {
-       limb = ap[i];
-       for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ )
-           if( n == idx )
-               return (limb >> j*8) & 0xff;
-    }
-    return -1;
-}
+/*     ap = a->d; */
+/*     for(n=0,i=0; i < a->nlimbs; i++ ) { */
+/*     limb = ap[i]; */
+/*     for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) */
+/*         if( n == idx ) */
+/*             return (limb >> j*8) & 0xff; */
+/*     } */
+/*     return -1; */
+/* } */
 
 
 /****************
  * Put a value at position IDX into A. idx counts from lsb to msb
  */
-void
-_gcry_mpi_putbyte( gcry_mpi_t a, unsigned idx, int xc )
-{
-    int i, j;
-    unsigned n;
-    mpi_ptr_t ap;
-    mpi_limb_t limb, c;
+/* void */
+/* _gcry_mpi_putbyte( gcry_mpi_t a, unsigned idx, int xc ) */
+/* { */
+/*     int i, j; */
+/*     unsigned n; */
+/*     mpi_ptr_t ap; */
+/*     mpi_limb_t limb, c; */
 
-    c = xc & 0xff;
-    ap = a->d;
-    for(n=0,i=0; i < a->alloced; i++ ) {
-       limb = ap[i];
-       for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ )
-           if( n == idx ) {
-             #if BYTES_PER_MPI_LIMB == 4
-               if( j == 0 )
-                   limb = (limb & 0xffffff00) | c;
-               else if( j == 1 )
-                   limb = (limb & 0xffff00ff) | (c<<8);
-               else if( j == 2 )
-                   limb = (limb & 0xff00ffff) | (c<<16);
-               else
-                   limb = (limb & 0x00ffffff) | (c<<24);
-             #elif BYTES_PER_MPI_LIMB == 8
-               if( j == 0 )
-                   limb = (limb & 0xffffffffffffff00) | c;
-               else if( j == 1 )
-                   limb = (limb & 0xffffffffffff00ff) | (c<<8);
-               else if( j == 2 )
-                   limb = (limb & 0xffffffffff00ffff) | (c<<16);
-               else if( j == 3 )
-                   limb = (limb & 0xffffffff00ffffff) | (c<<24);
-               else if( j == 4 )
-                   limb = (limb & 0xffffff00ffffffff) | (c<<32);
-               else if( j == 5 )
-                   limb = (limb & 0xffff00ffffffffff) | (c<<40);
-               else if( j == 6 )
-                   limb = (limb & 0xff00ffffffffffff) | (c<<48);
-               else
-                   limb = (limb & 0x00ffffffffffffff) | (c<<56);
-             #else
-                #error please enhance this function, its ugly - i know.
-             #endif
-               if( a->nlimbs <= i )
-                   a->nlimbs = i+1;
-               ap[i] = limb;
-               return;
-           }
-    }
-    abort(); /* index out of range */
-}
+/*     c = xc & 0xff; */
+/*     ap = a->d; */
+/*     for(n=0,i=0; i < a->alloced; i++ ) { */
+/*     limb = ap[i]; */
+/*     for( j=0; j < BYTES_PER_MPI_LIMB; j++, n++ ) */
+/*         if( n == idx ) { */
+/*           #if BYTES_PER_MPI_LIMB == 4 */
+/*             if( j == 0 ) */
+/*                 limb = (limb & 0xffffff00) | c; */
+/*             else if( j == 1 ) */
+/*                 limb = (limb & 0xffff00ff) | (c<<8); */
+/*             else if( j == 2 ) */
+/*                 limb = (limb & 0xff00ffff) | (c<<16); */
+/*             else */
+/*                 limb = (limb & 0x00ffffff) | (c<<24); */
+/*           #elif BYTES_PER_MPI_LIMB == 8 */
+/*             if( j == 0 ) */
+/*                 limb = (limb & 0xffffffffffffff00) | c; */
+/*             else if( j == 1 ) */
+/*                 limb = (limb & 0xffffffffffff00ff) | (c<<8); */
+/*             else if( j == 2 ) */
+/*                 limb = (limb & 0xffffffffff00ffff) | (c<<16); */
+/*             else if( j == 3 ) */
+/*                 limb = (limb & 0xffffffff00ffffff) | (c<<24); */
+/*             else if( j == 4 ) */
+/*                 limb = (limb & 0xffffff00ffffffff) | (c<<32); */
+/*             else if( j == 5 ) */
+/*                 limb = (limb & 0xffff00ffffffffff) | (c<<40); */
+/*             else if( j == 6 ) */
+/*                 limb = (limb & 0xff00ffffffffffff) | (c<<48); */
+/*             else */
+/*                 limb = (limb & 0x00ffffffffffffff) | (c<<56); */
+/*           #else */
+/*              #error please enhance this function, its ugly - i know. */
+/*           #endif */
+/*             if( a->nlimbs <= i ) */
+/*                 a->nlimbs = i+1; */
+/*             ap[i] = limb; */
+/*             return; */
+/*         } */
+/*     } */
+/*     abort(); /\* index out of range *\/ */
+/* } */
 
 
 /****************
diff --git a/grub-core/lib/libgcrypt/mpi/mpicoder.c 
b/grub-core/lib/libgcrypt/mpi/mpicoder.c
index 6fe389165..830ee4e26 100644
--- a/grub-core/lib/libgcrypt/mpi/mpicoder.c
+++ b/grub-core/lib/libgcrypt/mpi/mpicoder.c
@@ -1,6 +1,7 @@
 /* mpicoder.c  -  Coder for the external representation of MPIs
  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
  *               2008 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2014 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -25,9 +26,23 @@
 
 #include "mpi-internal.h"
 #include "g10lib.h"
-
+#include "../cipher/bufhelp.h"
+
+/* The maximum length we support in the functions converting an
+ * external representation to an MPI.  This limit is used to catch
+ * programming errors and to avoid DoS due to insane long allocations.
+ * The 16 MiB limit is actually ridiculous large but some of those PQC
+ * algorithms use quite large keys and they might end up using MPIs
+ * for that.  */
+#define MAX_EXTERN_SCAN_BYTES (16*1024*1024)
+
+/* The maximum length (in bits) we support for OpenPGP MPIs.  Note
+ * that OpenPGP's MPI format uses only two bytes and thus would be
+ * limited to 64k anyway.  Note that this limit matches that used by
+ * GnuPG.  */
 #define MAX_EXTERN_MPI_BITS 16384
 
+
 /* Helper used to scan PGP style MPIs.  Returns NULL on failure. */
 static gcry_mpi_t
 mpi_read_from_buffer (const unsigned char *buffer, unsigned *ret_nread,
@@ -37,8 +52,9 @@ mpi_read_from_buffer (const unsigned char *buffer, unsigned 
*ret_nread,
   unsigned int nbits, nbytes, nlimbs, nread=0;
   mpi_limb_t a;
   gcry_mpi_t val = MPI_NULL;
+  unsigned int max_nread = *ret_nread;
 
-  if ( *ret_nread < 2 )
+  if ( max_nread < 2 )
     goto leave;
   nbits = buffer[0] << 8 | buffer[1];
   if ( nbits > MAX_EXTERN_MPI_BITS )
@@ -59,9 +75,22 @@ mpi_read_from_buffer (const unsigned char *buffer, unsigned 
*ret_nread,
   for ( ; j > 0; j-- )
     {
       a = 0;
+      if (i == 0 && nread + BYTES_PER_MPI_LIMB <= max_nread)
+       {
+#if BYTES_PER_MPI_LIMB == 4
+         a = buf_get_be32 (buffer);
+#elif BYTES_PER_MPI_LIMB == 8
+         a = buf_get_be64 (buffer);
+#else
+#     error please implement for this limb size.
+#endif
+         buffer += BYTES_PER_MPI_LIMB;
+         nread += BYTES_PER_MPI_LIMB;
+         i += BYTES_PER_MPI_LIMB;
+       }
       for (; i < BYTES_PER_MPI_LIMB; i++ )
         {
-          if ( ++nread > *ret_nread )
+          if ( ++nread > max_nread )
             {
 /*               log_debug ("mpi larger than buffer"); */
               mpi_free (val);
@@ -85,8 +114,45 @@ mpi_read_from_buffer (const unsigned char *buffer, unsigned 
*ret_nread,
  * Fill the mpi VAL from the hex string in STR.
  */
 static int
-mpi_fromstr (gcry_mpi_t val, const char *str)
+mpi_fromstr (gcry_mpi_t val, const char *str, size_t slen)
 {
+  static const int hex2int[2][256] =
+  {
+    {
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00, 0x10, 0x20, 0x30,
+      0x40, 0x50, 0x60, 0x70, 0x80, 0x90, -1, -1, -1, -1, -1, -1, -1, 0xa0,
+      0xb0, 0xc0, 0xd0, 0xe0, 0xf0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0xa0,
+      0xb0, 0xc0, 0xd0, 0xe0, 0xf0, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+    },
+    {
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x00, 0x01, 0x02, 0x03,
+      0x04, 0x05, 0x06, 0x07, 0x08, 0x09, -1, -1, -1, -1, -1, -1, -1, 0x0a,
+      0x0b, 0x0c, 0x0d, 0x0e, 0x0f, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0x0a,
+      0x0b, 0x0c, 0x0d, 0x0e, 0x0f, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
+    }
+  };
   int sign = 0;
   int prepend_zero = 0;
   int i, j, c, c1, c2;
@@ -97,13 +163,17 @@ mpi_fromstr (gcry_mpi_t val, const char *str)
     {
       sign = 1;
       str++;
+      slen--;
     }
 
   /* Skip optional hex prefix.  */
   if ( *str == '0' && str[1] == 'x' )
-    str += 2;
+    {
+      str += 2;
+      slen -= 2;
+    }
 
-  nbits = 4 * strlen (str);
+  nbits = slen * 4;
   if ((nbits % 8))
     prepend_zero = 1;
 
@@ -120,6 +190,44 @@ mpi_fromstr (gcry_mpi_t val, const char *str)
   for (; j > 0; j--)
     {
       a = 0;
+
+      if (prepend_zero == 0 && (i & 31) == 0)
+       {
+         while (slen >= sizeof(u32) * 2)
+           {
+             u32 n, m;
+             u32 x, y;
+
+             x = buf_get_le32(str);
+             y = buf_get_le32(str + 4);
+             str += 8;
+             slen -= 8;
+
+             a <<= 31; /* Two step to avoid compiler warning on 32-bit. */
+             a <<= 1;
+
+             n = (hex2int[0][(x >> 0) & 0xff]
+                  | hex2int[1][(x >> 8) & 0xff]) << 8;
+             m = (hex2int[0][(y >> 0) & 0xff]
+                  | hex2int[1][(y >> 8) & 0xff]) << 8;
+             n |= hex2int[0][(x >> 16) & 0xff];
+             n |= hex2int[1][(x >> 24) & 0xff];
+             m |= hex2int[0][(y >> 16) & 0xff];
+             m |= hex2int[1][(y >> 24) & 0xff];
+
+             a |= (n << 16) | m;
+             i += 32;
+             if ((int)(n | m) < 0)
+               {
+                 /* Invalid character. */
+                 mpi_clear (val);
+                 return 1;  /* Error.  */
+               }
+             if (i == BITS_PER_MPI_LIMB)
+               break;
+           }
+       }
+
       for (; i < BYTES_PER_MPI_LIMB; i++)
         {
           if (prepend_zero)
@@ -128,7 +236,10 @@ mpi_fromstr (gcry_mpi_t val, const char *str)
               prepend_zero = 0;
            }
           else
-            c1 = *str++;
+           {
+             c1 = *str++;
+             slen--;
+           }
 
           if (!c1)
             {
@@ -136,30 +247,15 @@ mpi_fromstr (gcry_mpi_t val, const char *str)
               return 1;  /* Error.  */
            }
           c2 = *str++;
+         slen--;
           if (!c2)
             {
               mpi_clear (val);
               return 1;  /* Error.  */
            }
-          if ( c1 >= '0' && c1 <= '9' )
-            c = c1 - '0';
-          else if ( c1 >= 'a' && c1 <= 'f' )
-            c = c1 - 'a' + 10;
-          else if ( c1 >= 'A' && c1 <= 'F' )
-            c = c1 - 'A' + 10;
-          else
-            {
-              mpi_clear (val);
-              return 1;  /* Error.  */
-           }
-          c <<= 4;
-          if ( c2 >= '0' && c2 <= '9' )
-            c |= c2 - '0';
-          else if( c2 >= 'a' && c2 <= 'f' )
-            c |= c2 - 'a' + 10;
-          else if( c2 >= 'A' && c2 <= 'F' )
-            c |= c2 - 'A' + 10;
-          else
+         c = hex2int[0][c1 & 0xff];
+         c |= hex2int[1][c2 & 0xff];
+          if (c < 0)
             {
               mpi_clear(val);
               return 1;  /* Error. */
@@ -175,119 +271,142 @@ mpi_fromstr (gcry_mpi_t val, const char *str)
 }
 
 
-/* Dump the value of A in a format suitable for debugging to
-   Libgcrypt's logging stream.  Note that one leading space but no
-   trailing space or linefeed will be printed.  It is okay to pass
-   NULL for A. */
-void
-gcry_mpi_dump (const gcry_mpi_t a)
-{
-  int i;
-
-  log_printf (" ");
-  if (!a)
-    log_printf ("[MPI_NULL]");
-  else
-    {
-      if (a->sign)
-        log_printf ( "-");
-#if BYTES_PER_MPI_LIMB == 2
-# define X "4"
-#elif BYTES_PER_MPI_LIMB == 4
-# define X "8"
-#elif BYTES_PER_MPI_LIMB == 8
-# define X "16"
-#elif BYTES_PER_MPI_LIMB == 16
-# define X "32"
-#else
-# error please define the format here
-#endif
-      for (i=a->nlimbs; i > 0 ; i-- )
-        {
-          log_printf (i != a->nlimbs? "%0" X "lX":"%lX", (ulong)a->d[i-1]);
-        }
-#undef X
-      if (!a->nlimbs)
-        log_printf ("0");
-    }
-}
-
-/* Convience function used internally. */
-void
-_gcry_log_mpidump (const char *text, gcry_mpi_t a)
-{
-  log_printf ("%s:", text);
-  gcry_mpi_dump (a);
-  log_printf ("\n");
-}
-
-
 /* Return an allocated buffer with the MPI (msb first).  NBYTES
-   receives the length of this buffer.  Caller must free the return
-   string.  This function returns an allocated buffer with NBYTES set
-   to zero if the value of A is zero.  If sign is not NULL, it will be
-   set to the sign of the A.  On error NULL is returned and ERRNO set
-   appropriately.  */
+   receives the length of this buffer.  If FILL_LE is not 0, the
+   returned value is stored as little endian and right padded with
+   zeroes so that the returned buffer has at least FILL_LE bytes.
+
+   If EXTRAALLOC > 0 the returned buffer has these number of bytes
+   extra allocated at the end; if EXTRAALLOC < 0 the returned buffer
+   has the absolute value of EXTRAALLOC allocated at the begin of the
+   buffer (the are not initialized) and the MPI is stored right after
+   this.  This feature is useful to allow the caller to prefix the
+   returned value.  EXTRAALLOC is _not_ included in the value stored
+   at NBYTES.
+
+   Caller must free the return string.  This function returns an
+   allocated buffer with NBYTES set to zero if the value of A is zero.
+   If sign is not NULL, it will be set to the sign of the A.  On error
+   NULL is returned and ERRNO set appropriately.  */
 static unsigned char *
-do_get_buffer (gcry_mpi_t a, unsigned int *nbytes, int *sign, int force_secure)
+do_get_buffer (gcry_mpi_t a, unsigned int fill_le, int extraalloc,
+               unsigned int *nbytes, int *sign, int force_secure)
 {
-  unsigned char *p, *buffer;
+  unsigned char *p, *buffer, *retbuffer;
+  unsigned int length, tmp;
   mpi_limb_t alimb;
   int i;
-  size_t n;
+  size_t n, n2;
 
   if (sign)
     *sign = a->sign;
 
   *nbytes = a->nlimbs * BYTES_PER_MPI_LIMB;
   n = *nbytes? *nbytes:1; /* Allocate at least one byte.  */
-  p = buffer = (force_secure || mpi_is_secure(a))? gcry_malloc_secure (n)
-                                                : gcry_malloc (n);
-  if (!buffer)
+  if (n < fill_le)
+    n = fill_le;
+  if (extraalloc < 0)
+    n2 = n + -extraalloc;
+  else
+    n2 = n + extraalloc;
+
+  retbuffer = (force_secure || mpi_is_secure(a))? xtrymalloc_secure (n2)
+                                                : xtrymalloc (n2);
+  if (!retbuffer)
     return NULL;
+  if (extraalloc < 0)
+    buffer = retbuffer + -extraalloc;
+  else
+    buffer = retbuffer;
+  p = buffer;
 
   for (i=a->nlimbs-1; i >= 0; i--)
     {
       alimb = a->d[i];
 #if BYTES_PER_MPI_LIMB == 4
-      *p++ = alimb >> 24;
-      *p++ = alimb >> 16;
-      *p++ = alimb >>  8;
-      *p++ = alimb       ;
+      buf_put_be32 (p, alimb);
+      p += 4;
 #elif BYTES_PER_MPI_LIMB == 8
-      *p++ = alimb >> 56;
-      *p++ = alimb >> 48;
-      *p++ = alimb >> 40;
-      *p++ = alimb >> 32;
-      *p++ = alimb >> 24;
-      *p++ = alimb >> 16;
-      *p++ = alimb >>  8;
-      *p++ = alimb       ;
+      buf_put_be64 (p, alimb);
+      p += 8;
 #else
 #     error please implement for this limb size.
 #endif
     }
 
+  if (fill_le)
+    {
+      length = *nbytes;
+      /* Reverse buffer and pad with zeroes.  */
+      for (i = 0; i + 8 < length / 2; i += 8)
+       {
+         u64 head = buf_get_be64 (buffer + i);
+         u64 tail = buf_get_be64 (buffer + length - 8 - i);
+         buf_put_le64 (buffer + length - 8 - i, head);
+         buf_put_le64 (buffer + i, tail);
+       }
+      if (i + 4 < length / 2)
+       {
+         u32 head = buf_get_be32 (buffer + i);
+         u32 tail = buf_get_be32 (buffer + length - 4 - i);
+         buf_put_le32 (buffer + length - 4 - i, head);
+         buf_put_le32 (buffer + i, tail);
+         i += 4;
+       }
+      for (; i < length/2; i++)
+        {
+          tmp = buffer[i];
+          buffer[i] = buffer[length-1-i];
+          buffer[length-1-i] = tmp;
+        }
+      /* Pad with zeroes.  */
+      for (p = buffer + length; length < fill_le; length++)
+        *p++ = 0;
+      *nbytes = length;
+
+      return retbuffer;
+    }
+
   /* This is sub-optimal but we need to do the shift operation because
      the caller has to free the returned buffer.  */
   for (p=buffer; *nbytes && !*p; p++, --*nbytes)
     ;
   if (p != buffer)
-    memmove (buffer,p, *nbytes);
-  return buffer;
+    memmove (buffer, p, *nbytes);
+  return retbuffer;
 }
 
 
 byte *
-_gcry_mpi_get_buffer (gcry_mpi_t a, unsigned int *nbytes, int *sign)
+_gcry_mpi_get_buffer (gcry_mpi_t a, unsigned int fill_le,
+                      unsigned int *r_nbytes, int *sign)
 {
-  return do_get_buffer (a, nbytes, sign, 0);
+  if (mpi_get_flag (a, GCRYMPI_FLAG_OPAQUE))
+    {
+      unsigned int nbits;
+      byte *p = _gcry_mpi_get_opaque_copy (a, &nbits);
+
+      if (r_nbytes)
+        *r_nbytes = (nbits+7)/8;
+
+      return p;
+    }
+  else
+    return do_get_buffer (a, fill_le, 0, r_nbytes, sign, 0);
 }
 
 byte *
-_gcry_mpi_get_secure_buffer (gcry_mpi_t a, unsigned *nbytes, int *sign)
+_gcry_mpi_get_buffer_extra (gcry_mpi_t a, unsigned int fill_le, int extraalloc,
+                            unsigned int *r_nbytes, int *sign)
 {
-  return do_get_buffer (a, nbytes, sign, 1);
+  return do_get_buffer (a, fill_le, extraalloc, r_nbytes, sign, 0);
+}
+
+byte *
+_gcry_mpi_get_secure_buffer (gcry_mpi_t a, unsigned int fill_le,
+                             unsigned int *r_nbytes, int *sign)
+{
+  return do_get_buffer (a, fill_le, 0, r_nbytes, sign, 1);
 }
 
 
@@ -305,6 +424,12 @@ _gcry_mpi_set_buffer (gcry_mpi_t a, const void *buffer_arg,
   int nlimbs;
   int i;
 
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
+
   nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
   RESIZE_IF_NEEDED(a, nlimbs);
   a->sign = sign;
@@ -312,53 +437,33 @@ _gcry_mpi_set_buffer (gcry_mpi_t a, const void 
*buffer_arg,
   for (i=0, p = buffer+nbytes-1; p >= buffer+BYTES_PER_MPI_LIMB; )
     {
 #if BYTES_PER_MPI_LIMB == 4
-      alimb  = *p--        ;
-      alimb |= *p-- <<  8 ;
-      alimb |= *p-- << 16 ;
-      alimb |= *p-- << 24 ;
+      alimb = buf_get_be32(p - 4 + 1);
+      p -= 4;
 #elif BYTES_PER_MPI_LIMB == 8
-      alimb  = (mpi_limb_t)*p--        ;
-      alimb |= (mpi_limb_t)*p-- <<  8 ;
-      alimb |= (mpi_limb_t)*p-- << 16 ;
-      alimb |= (mpi_limb_t)*p-- << 24 ;
-      alimb |= (mpi_limb_t)*p-- << 32 ;
-      alimb |= (mpi_limb_t)*p-- << 40 ;
-      alimb |= (mpi_limb_t)*p-- << 48 ;
-      alimb |= (mpi_limb_t)*p-- << 56 ;
+      alimb = buf_get_be64(p - 8 + 1);
+      p -= 8;
 #else
-#       error please implement for this limb size.
+#     error please implement for this limb size.
 #endif
       a->d[i++] = alimb;
     }
   if ( p >= buffer )
     {
+      byte last[BYTES_PER_MPI_LIMB] = { 0 };
+      unsigned int n = (p - buffer) + 1;
+
+      n = n > BYTES_PER_MPI_LIMB ? BYTES_PER_MPI_LIMB : n;
+      memcpy (last + BYTES_PER_MPI_LIMB - n, p - n + 1, n);
+      p -= n;
+
 #if BYTES_PER_MPI_LIMB == 4
-      alimb  = *p--;
-      if (p >= buffer)
-        alimb |= *p-- <<  8;
-      if (p >= buffer)
-        alimb |= *p-- << 16;
-      if (p >= buffer)
-        alimb |= *p-- << 24;
+      alimb = buf_get_be32(last);
 #elif BYTES_PER_MPI_LIMB == 8
-      alimb  = (mpi_limb_t)*p--;
-      if (p >= buffer)
-        alimb |= (mpi_limb_t)*p-- << 8;
-      if (p >= buffer)
-        alimb |= (mpi_limb_t)*p-- << 16;
-      if (p >= buffer)
-        alimb |= (mpi_limb_t)*p-- << 24;
-      if (p >= buffer)
-        alimb |= (mpi_limb_t)*p-- << 32;
-      if (p >= buffer)
-        alimb |= (mpi_limb_t)*p-- << 40;
-      if (p >= buffer)
-        alimb |= (mpi_limb_t)*p-- << 48;
-      if (p >= buffer)
-        alimb |= (mpi_limb_t)*p-- << 56;
+      alimb = buf_get_be64(last);
 #else
 #     error please implement for this limb size.
 #endif
+
       a->d[i++] = alimb;
     }
   a->nlimbs = i;
@@ -366,21 +471,85 @@ _gcry_mpi_set_buffer (gcry_mpi_t a, const void 
*buffer_arg,
 }
 
 
+static void
+onecompl (gcry_mpi_t a)
+{
+  mpi_ptr_t ap;
+  mpi_size_t n;
+  unsigned int i;
+  unsigned int nbits;
+
+  if (!a || mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
+
+  nbits = mpi_get_nbits (a);
+
+  mpi_normalize (a);
+  ap = a->d;
+  n = a->nlimbs;
+
+  for( i = 0; i < n; i++ )
+    ap[i] ^= (mpi_limb_t)(-1);
+
+  a->sign = 0;
+  mpi_clear_highbit (a, nbits-1);
+}
+
+
+/* Perform a two's complement operation on buffer P of size N bytes.  */
+static void
+twocompl (unsigned char *p, unsigned int n)
+{
+  int i;
+
+  for (i=n-1; i >= 0 && !p[i]; i--)
+    ;
+  if (i >= 0)
+    {
+      unsigned char pi = p[i];
+      unsigned int ntz = _gcry_ctz (pi);
+
+      p[i] = ((p[i] ^ (0xfe << ntz)) | (0x01 << ntz)) & (0xff << ntz);
+
+      for (i--; i >= 7; i -= 8)
+       {
+         buf_put_he64(&p[i-7], ~buf_get_he64(&p[i-7]));
+       }
+      if (i >= 3)
+       {
+         buf_put_he32(&p[i-3], ~buf_get_he32(&p[i-3]));
+         i -= 4;
+       }
+      for (; i >= 0; i--)
+       {
+         p[i] ^= 0xff;
+       }
+    }
+}
+
+
 /* Convert the external representation of an integer stored in BUFFER
-   with a length of BUFLEN into a newly create MPI returned in
-   RET_MPI.  If NBYTES is not NULL, it will receive the number of
-   bytes actually scanned after a successful operation.  */
-gcry_error_t
-gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum gcry_mpi_format format,
-               const void *buffer_arg, size_t buflen, size_t *nscanned)
+ * with a length of BUFLEN into a newly create MPI returned in
+ * RET_MPI.  If NSCANNED is not NULL, it will receive the number of
+ * bytes actually scanned after a successful operation.  */
+gcry_err_code_t
+_gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum gcry_mpi_format format,
+                const void *buffer_arg, size_t buflen, size_t *nscanned)
 {
   const unsigned char *buffer = (const unsigned char*)buffer_arg;
   struct gcry_mpi *a = NULL;
   unsigned int len;
-  int secure = (buffer && gcry_is_secure (buffer));
+  int secure = (buffer && _gcry_is_secure (buffer));
 
-  if (!buffer)
-    return gcry_error (GPG_ERR_INV_ARG);
+  if (buflen > MAX_EXTERN_SCAN_BYTES)
+    {
+      if (nscanned)
+        *nscanned = 0;
+      return GPG_ERR_INV_OBJ;
+    }
 
   if (format == GCRYMPI_FMT_SSH)
     len = 0;
@@ -396,15 +565,14 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
                 : mpi_alloc ((len+BYTES_PER_MPI_LIMB-1)/BYTES_PER_MPI_LIMB);
       if (len)
         {
+          _gcry_mpi_set_buffer (a, s, len, 0);
           a->sign = !!(*s & 0x80);
           if (a->sign)
             {
-              /* FIXME: we have to convert from 2compl to magnitude format */
-              mpi_free (a);
-              return gcry_error (GPG_ERR_INTERNAL);
+              onecompl (a);
+              mpi_add_ui (a, a, 1);
+              a->sign = 1;
            }
-          else
-            _gcry_mpi_set_buffer (a, s, len, 0);
        }
       if (ret_mpi)
         {
@@ -413,6 +581,8 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
        }
       else
         mpi_free(a);
+      if (nscanned)
+        *nscanned = len;
       return 0;
     }
   else if (format == GCRYMPI_FMT_USG)
@@ -430,6 +600,8 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
        }
       else
         mpi_free(a);
+      if (nscanned)
+        *nscanned = len;
       return 0;
     }
   else if (format == GCRYMPI_FMT_PGP)
@@ -447,7 +619,7 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
           mpi_free(a);
           a = NULL;
         }
-      return a? 0 : gcry_error (GPG_ERR_INV_OBJ);
+      return a? 0 : GPG_ERR_INV_OBJ;
     }
   else if (format == GCRYMPI_FMT_SSH)
     {
@@ -459,29 +631,28 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
          allow the BUFLEN argument to act as a sanitiy check.  Same
          below. */
       if (len && len < 4)
-        return gcry_error (GPG_ERR_TOO_SHORT);
+        return GPG_ERR_TOO_SHORT;
 
-      n = ((size_t)s[0] << 24 | (size_t)s[1] << 16 | (size_t)s[2] << 8 | 
(size_t)s[3]);
+      n = buf_get_be32 (s);
       s += 4;
       if (len)
         len -= 4;
       if (len && n > len)
-        return gcry_error (GPG_ERR_TOO_LARGE);
+        return GPG_ERR_TOO_LARGE;
 
       a = secure? mpi_alloc_secure ((n+BYTES_PER_MPI_LIMB-1)
                                     /BYTES_PER_MPI_LIMB)
                 : mpi_alloc ((n+BYTES_PER_MPI_LIMB-1)/BYTES_PER_MPI_LIMB);
       if (n)
         {
+          _gcry_mpi_set_buffer( a, s, n, 0 );
           a->sign = !!(*s & 0x80);
           if (a->sign)
             {
-              /* FIXME: we have to convert from 2compl to magnitude format */
-              mpi_free(a);
-              return gcry_error (GPG_ERR_INTERNAL);
+              onecompl (a);
+              mpi_add_ui (a, a, 1);
+              a->sign = 1;
            }
-          else
-            _gcry_mpi_set_buffer( a, s, n, 0 );
        }
       if (nscanned)
         *nscanned = n+4;
@@ -496,15 +667,22 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
     }
   else if (format == GCRYMPI_FMT_HEX)
     {
+      size_t slen;
       /* We can only handle C strings for now.  */
       if (buflen)
-        return gcry_error (GPG_ERR_INV_ARG);
-
-      a = secure? mpi_alloc_secure (0) : mpi_alloc(0);
-      if (mpi_fromstr (a, (const char *)buffer))
+        return GPG_ERR_INV_ARG;
+
+      slen = strlen ((const char *)buffer);
+      if (slen > MAX_EXTERN_SCAN_BYTES)
+       return GPG_ERR_INV_OBJ;
+      a = secure? mpi_alloc_secure ((((slen+1)/2)+BYTES_PER_MPI_LIMB-1)
+                                   /BYTES_PER_MPI_LIMB)
+               : mpi_alloc((((slen+1)/2)+BYTES_PER_MPI_LIMB-1)
+                           /BYTES_PER_MPI_LIMB);
+      if (mpi_fromstr (a, (const char *)buffer, slen))
         {
           mpi_free (a);
-          return gcry_error (GPG_ERR_INV_OBJ);
+          return GPG_ERR_INV_OBJ;
         }
       if (ret_mpi)
         {
@@ -513,10 +691,12 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
        }
       else
         mpi_free(a);
+      if (nscanned)
+        *nscanned = strlen ((const char*)buffer);
       return 0;
     }
   else
-    return gcry_error (GPG_ERR_INV_ARG);
+    return GPG_ERR_INV_ARG;
 }
 
 
@@ -526,18 +706,29 @@ gcry_mpi_scan (struct gcry_mpi **ret_mpi, enum 
gcry_mpi_format format,
    receives the actual length of the external representation unless it
    has been passed as NULL.  BUFFER may be NULL to query the required
    length.  */
-gcry_error_t
-gcry_mpi_print (enum gcry_mpi_format format,
-                unsigned char *buffer, size_t buflen,
-                size_t *nwritten, struct gcry_mpi *a)
+gcry_err_code_t
+_gcry_mpi_print (enum gcry_mpi_format format,
+                 unsigned char *buffer, size_t buflen,
+                 size_t *nwritten, struct gcry_mpi *a)
 {
   unsigned int nbits = mpi_get_nbits (a);
   size_t len;
   size_t dummy_nwritten;
+  int negative;
 
   if (!nwritten)
     nwritten = &dummy_nwritten;
 
+  /* Libgcrypt does no always care to set clear the sign if the value
+     is 0.  For printing this is a bit of a surprise, in particular
+     because if some of the formats don't support negative numbers but
+     should be able to print a zero.  Thus we need this extra test
+     for a negative number.  */
+  if (a->sign && _gcry_mpi_cmp_ui (a, 0))
+    negative = 1;
+  else
+    negative = 0;
+
   len = buflen;
   *nwritten = 0;
   if (format == GCRYMPI_FMT_STD)
@@ -546,33 +737,46 @@ gcry_mpi_print (enum gcry_mpi_format format,
       int extra = 0;
       unsigned int n;
 
-      if (a->sign)
-        return gcry_error (GPG_ERR_INTERNAL); /* Can't handle it yet. */
-
-      tmp = _gcry_mpi_get_buffer (a, &n, NULL);
+      tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL);
       if (!tmp)
-        return gpg_error_from_syserror ();
-      if (n && (*tmp & 0x80))
+        return gpg_err_code_from_syserror ();
+
+      if (negative)
         {
+          twocompl (tmp, n);
+          if (!(*tmp & 0x80))
+            {
+              /* Need to extend the sign.  */
+              n++;
+              extra = 2;
+            }
+        }
+      else if (n && (*tmp & 0x80))
+        {
+          /* Positive but the high bit of the returned buffer is set.
+             Thus we need to print an extra leading 0x00 so that the
+             output is interpreted as a positive number.  */
           n++;
-          extra=1;
+          extra = 1;
        }
 
       if (buffer && n > len)
         {
           /* The provided buffer is too short. */
-          gcry_free (tmp);
-          return gcry_error (GPG_ERR_TOO_SHORT);
+          xfree (tmp);
+          return GPG_ERR_TOO_SHORT;
        }
       if (buffer)
         {
           unsigned char *s = buffer;
 
-          if (extra)
+          if (extra == 1)
             *s++ = 0;
-          memcpy (s, tmp, n-extra);
+          else if (extra)
+            *s++ = 0xff;
+          memcpy (s, tmp, n-!!extra);
        }
-      gcry_free(tmp);
+      xfree (tmp);
       *nwritten = n;
       return 0;
     }
@@ -583,17 +787,18 @@ gcry_mpi_print (enum gcry_mpi_format format,
       /* Note:  We ignore the sign for this format.  */
       /* FIXME: for performance reasons we should put this into
         mpi_aprint because we can then use the buffer directly.  */
+
       if (buffer && n > len)
-        return gcry_error (GPG_ERR_TOO_SHORT);
+        return GPG_ERR_TOO_SHORT;
       if (buffer)
         {
           unsigned char *tmp;
 
-          tmp = _gcry_mpi_get_buffer (a, &n, NULL);
+          tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL);
           if (!tmp)
-            return gpg_error_from_syserror ();
+            return gpg_err_code_from_syserror ();
           memcpy (buffer, tmp, n);
-          gcry_free (tmp);
+          xfree (tmp);
        }
       *nwritten = n;
       return 0;
@@ -603,11 +808,11 @@ gcry_mpi_print (enum gcry_mpi_format format,
       unsigned int n = (nbits + 7)/8;
 
       /* The PGP format can only handle unsigned integers.  */
-      if( a->sign )
-        return gcry_error (GPG_ERR_INV_ARG);
+      if (negative)
+        return GPG_ERR_INV_ARG;
 
       if (buffer && n+2 > len)
-        return gcry_error (GPG_ERR_TOO_SHORT);
+        return GPG_ERR_TOO_SHORT;
 
       if (buffer)
         {
@@ -617,11 +822,11 @@ gcry_mpi_print (enum gcry_mpi_format format,
           s[0] = nbits >> 8;
           s[1] = nbits;
 
-          tmp = _gcry_mpi_get_buffer (a, &n, NULL);
+          tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL);
           if (!tmp)
-            return gpg_error_from_syserror ();
+            return gpg_err_code_from_syserror ();
           memcpy (s+2, tmp, n);
-          gcry_free (tmp);
+          xfree (tmp);
        }
       *nwritten = n+2;
       return 0;
@@ -632,13 +837,21 @@ gcry_mpi_print (enum gcry_mpi_format format,
       int extra = 0;
       unsigned int n;
 
-      if (a->sign)
-        return gcry_error (GPG_ERR_INTERNAL); /* Can't handle it yet.  */
-
-      tmp = _gcry_mpi_get_buffer (a, &n, NULL);
+      tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL);
       if (!tmp)
-        return gpg_error_from_syserror ();
-      if (n && (*tmp & 0x80))
+        return gpg_err_code_from_syserror ();
+
+      if (negative)
+        {
+          twocompl (tmp, n);
+          if (!(*tmp & 0x80))
+            {
+              /* Need to extend the sign.  */
+              n++;
+              extra = 2;
+            }
+        }
+      else if (n && (*tmp & 0x80))
         {
           n++;
           extra=1;
@@ -646,24 +859,23 @@ gcry_mpi_print (enum gcry_mpi_format format,
 
       if (buffer && n+4 > len)
         {
-          gcry_free(tmp);
-          return gcry_error (GPG_ERR_TOO_SHORT);
+          xfree(tmp);
+          return GPG_ERR_TOO_SHORT;
        }
 
       if (buffer)
         {
           unsigned char *s = buffer;
 
-          *s++ = n >> 24;
-          *s++ = n >> 16;
-          *s++ = n >> 8;
-          *s++ = n;
-          if (extra)
+         buf_put_be32 (s, n);
+         s += 4;
+          if (extra == 1)
             *s++ = 0;
-
-          memcpy (s, tmp, n-extra);
+          else if (extra)
+            *s++ = 0xff;
+          memcpy (s, tmp, n-!!extra);
        }
-      gcry_free (tmp);
+      xfree (tmp);
       *nwritten = 4+n;
       return 0;
     }
@@ -674,22 +886,27 @@ gcry_mpi_print (enum gcry_mpi_format format,
       int extra = 0;
       unsigned int n = 0;
 
-      tmp = _gcry_mpi_get_buffer (a, &n, NULL);
+      tmp = _gcry_mpi_get_buffer (a, 0, &n, NULL);
       if (!tmp)
-        return gpg_error_from_syserror ();
+        return gpg_err_code_from_syserror ();
       if (!n || (*tmp & 0x80))
         extra = 2;
 
-      if (buffer && 2*n + extra + !!a->sign + 1 > len)
+      if (buffer && 2*n + extra + negative + 1 > len)
         {
-          gcry_free(tmp);
-          return gcry_error (GPG_ERR_TOO_SHORT);
+          xfree(tmp);
+          return GPG_ERR_TOO_SHORT;
        }
       if (buffer)
         {
+         static const u32 nibble2hex[] =
+         {
+           '0', '1', '2', '3', '4', '5', '6', '7',
+           '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+         };
           unsigned char *s = buffer;
 
-          if (a->sign)
+          if (negative)
             *s++ = '-';
           if (extra)
             {
@@ -697,26 +914,50 @@ gcry_mpi_print (enum gcry_mpi_format format,
               *s++ = '0';
            }
 
-          for (i=0; i < n; i++)
+         for (i = 0; i + 4 < n; i += 4)
+           {
+             u32 c = buf_get_be32(tmp + i);
+             u32 o1, o2;
+
+             o1 = nibble2hex[(c >> 28) & 0xF];
+             o1 <<= 8;
+             o1 |= nibble2hex[(c >> 24) & 0xF];
+             o1 <<= 8;
+             o1 |= nibble2hex[(c >> 20) & 0xF];
+             o1 <<= 8;
+             o1 |= nibble2hex[(c >> 16) & 0xF];
+
+             o2 = nibble2hex[(c >> 12) & 0xF];
+             o2 <<= 8;
+             o2 |= (u64)nibble2hex[(c >> 8) & 0xF];
+             o2 <<= 8;
+             o2 |= (u64)nibble2hex[(c >> 4) & 0xF];
+             o2 <<= 8;
+             o2 |= (u64)nibble2hex[(c >> 0) & 0xF];
+
+             buf_put_be32 (s + 0, o1);
+             buf_put_be32 (s + 4, o2);
+             s += 8;
+           }
+          for (; i < n; i++)
             {
               unsigned int c = tmp[i];
 
-              *s++ = (c >> 4) < 10? '0'+(c>>4) : 'A'+(c>>4)-10 ;
-              c &= 15;
-              *s++ = c < 10? '0'+c : 'A'+c-10 ;
+              *s++ = nibble2hex[c >> 4];
+              *s++ = nibble2hex[c & 0xF];
            }
           *s++ = 0;
           *nwritten = s - buffer;
        }
       else
         {
-          *nwritten = 2*n + extra + !!a->sign + 1;
+          *nwritten = 2*n + extra + negative + 1;
        }
-      gcry_free (tmp);
+      xfree (tmp);
       return 0;
     }
   else
-    return gcry_error (GPG_ERR_INV_ARG);
+    return GPG_ERR_INV_ARG;
 }
 
 
@@ -725,29 +966,89 @@ gcry_mpi_print (enum gcry_mpi_format format,
  * The caller has to supply the address of a pointer.  NWRITTEN may be
  * NULL.
  */
-gcry_error_t
-gcry_mpi_aprint (enum gcry_mpi_format format,
-                 unsigned char **buffer, size_t *nwritten,
-                struct gcry_mpi *a)
+gcry_err_code_t
+_gcry_mpi_aprint (enum gcry_mpi_format format,
+                  unsigned char **buffer, size_t *nwritten,
+                  struct gcry_mpi *a)
 {
   size_t n;
-  gcry_error_t rc;
+  gcry_err_code_t rc;
 
   *buffer = NULL;
-  rc = gcry_mpi_print (format, NULL, 0, &n, a);
+  rc = _gcry_mpi_print (format, NULL, 0, &n, a);
   if (rc)
     return rc;
 
-  *buffer = mpi_is_secure(a) ? gcry_malloc_secure (n) : gcry_malloc (n);
+  *buffer = mpi_is_secure(a) ? xtrymalloc_secure (n?n:1) : xtrymalloc (n?n:1);
   if (!*buffer)
-    return gpg_error_from_syserror ();
-  rc = gcry_mpi_print( format, *buffer, n, &n, a );
+    return gpg_err_code_from_syserror ();
+  /* If the returned buffer will have a length of 0, we nevertheless
+     allocated 1 byte (malloc needs it anyway) and store a 0.  */
+  if (!n)
+    **buffer = 0;
+  rc = _gcry_mpi_print( format, *buffer, n, &n, a );
   if (rc)
     {
-      gcry_free(*buffer);
+      xfree (*buffer);
       *buffer = NULL;
     }
   else if (nwritten)
     *nwritten = n;
   return rc;
 }
+
+
+/* Turn VALUE into an octet string and store it in an allocated buffer
+   at R_FRAME or - if R_RAME is NULL - copy it into the caller
+   provided buffer SPACE; either SPACE or R_FRAME may be used.  If
+   SPACE if not NULL, the caller must provide a buffer of at least
+   NBYTES.  If the resulting octet string is shorter than NBYTES pad
+   it to the left with zeroes.  If VALUE does not fit into NBYTES
+   return an error code.  */
+gpg_err_code_t
+_gcry_mpi_to_octet_string (unsigned char **r_frame, void *space,
+                           gcry_mpi_t value, size_t nbytes)
+{
+  gpg_err_code_t rc;
+  size_t nframe, noff, n;
+  unsigned char *frame;
+
+  if (!r_frame == !space)
+    return GPG_ERR_INV_ARG;  /* Only one may be used.  */
+
+  if (r_frame)
+    *r_frame = NULL;
+
+  rc = _gcry_mpi_print (GCRYMPI_FMT_USG, NULL, 0, &nframe, value);
+  if (rc)
+    return rc;
+  if (nframe > nbytes)
+    return GPG_ERR_TOO_LARGE; /* Value too long to fit into NBYTES.  */
+
+  noff = (nframe < nbytes)? nbytes - nframe : 0;
+  n = nframe + noff;
+  if (space)
+    frame = space;
+  else
+    {
+      frame = mpi_is_secure (value)? xtrymalloc_secure (n) : xtrymalloc (n);
+      if (!frame)
+        {
+          rc = gpg_err_code_from_syserror ();
+          return rc;
+        }
+    }
+  if (noff)
+    memset (frame, 0, noff);
+  nframe += noff;
+  rc = _gcry_mpi_print (GCRYMPI_FMT_USG, frame+noff, nframe-noff, NULL, value);
+  if (rc)
+    {
+      xfree (frame);
+      return rc;
+    }
+
+  if (r_frame)
+    *r_frame = frame;
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/mpi/mpih-const-time.c 
b/grub-core/lib/libgcrypt/mpi/mpih-const-time.c
new file mode 100644
index 000000000..e54224091
--- /dev/null
+++ b/grub-core/lib/libgcrypt/mpi/mpih-const-time.c
@@ -0,0 +1,240 @@
+/* mpih-const-time.c  -  Constant-time MPI helper functions
+ *      Copyright (C) 2020  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi-internal.h"
+#include "g10lib.h"
+#include "const-time.h"
+#include "longlong.h"
+
+#define A_LIMB_1 ((mpi_limb_t)1)
+
+
+/*
+ * Return 1 if X > Y and otherwise return 0.
+ */
+static inline mpi_limb_t
+mpih_ct_limb_greater_than (mpi_limb_t x, mpi_limb_t y)
+{
+  mpi_limb_t diff_hi, diff_lo;
+  sub_ddmmss (diff_hi, diff_lo, 0, y, 0, x);
+  return diff_hi >> (BITS_PER_MPI_LIMB - 1);
+}
+
+
+/*
+ * Return 1 if X < Y and otherwise return 0.
+ */
+static inline mpi_limb_t
+mpih_ct_limb_less_than (mpi_limb_t x, mpi_limb_t y)
+{
+  return mpih_ct_limb_greater_than (y, x);
+}
+
+
+/*
+ *  W = U when OP_ENABLED=1
+ *  otherwise, W keeps old value
+ */
+void
+_gcry_mpih_set_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                     unsigned long op_enable)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
+  mpi_size_t i;
+
+  for (i = 0; i < usize; i++)
+    {
+      wp[i] = (wp[i] & mask2) | (up[i] & mask1);
+    }
+}
+
+
+/*
+ *  W = U + V when OP_ENABLED=1
+ *  otherwise, W = U
+ */
+mpi_limb_t
+_gcry_mpih_add_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
+                       mpi_size_t usize, unsigned long op_enable)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
+  mpi_size_t i;
+  mpi_limb_t cy;
+
+  cy = 0;
+  for (i = 0; i < usize; i++)
+    {
+      mpi_limb_t u = up[i];
+      mpi_limb_t x = u + vp[i];
+      mpi_limb_t cy1 = mpih_ct_limb_less_than(x, u);
+      mpi_limb_t cy2;
+
+      x = x + cy;
+      cy2 = mpih_ct_limb_less_than(x, cy);
+      cy = cy1 | cy2;
+      wp[i] = (u & mask2) | (x & mask1);
+    }
+
+  return cy & mask1;
+}
+
+
+/*
+ *  W = U - V when OP_ENABLED=1
+ *  otherwise, W = U
+ */
+mpi_limb_t
+_gcry_mpih_sub_n_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_ptr_t vp,
+                       mpi_size_t usize, unsigned long op_enable)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
+  mpi_size_t i;
+  mpi_limb_t cy;
+
+  cy = 0;
+  for (i = 0; i < usize; i++)
+    {
+      mpi_limb_t u = up[i];
+      mpi_limb_t x = u - vp[i];
+      mpi_limb_t cy1 = mpih_ct_limb_greater_than(x, u);
+      mpi_limb_t cy2;
+
+      cy2 = mpih_ct_limb_less_than(x, cy);
+      x = x - cy;
+      cy = cy1 | cy2;
+      wp[i] = (u & mask2) | (x & mask1);
+    }
+
+  return cy & mask1;
+}
+
+
+/*
+ *  Swap value of U and V when OP_ENABLED=1
+ *  otherwise, no change
+ */
+void
+_gcry_mpih_swap_cond (mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t usize,
+                      unsigned long op_enable)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
+  mpi_size_t i;
+
+  for (i = 0; i < usize; i++)
+    {
+      mpi_limb_t u = up[i];
+      mpi_limb_t v = vp[i];
+      up[i] = (u & mask2) | (v & mask1);
+      vp[i] = (u & mask1) | (v & mask2);
+    }
+}
+
+
+/*
+ *  W = -U when OP_ENABLED=1
+ *  otherwise, W = U
+ */
+void
+_gcry_mpih_abs_cond (mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                     unsigned long op_enable)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  mpi_limb_t mask1 = ct_limb_gen_mask(op_enable);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(op_enable);
+  mpi_limb_t cy = op_enable;
+  mpi_size_t i;
+
+  for (i = 0; i < usize; i++)
+    {
+      mpi_limb_t u = up[i];
+      mpi_limb_t x = ~u + cy;
+
+      cy = mpih_ct_limb_less_than(x, ~u);
+      wp[i] = (u & mask2) | (x & mask1);
+    }
+}
+
+
+/*
+ * Allocating memory for W,
+ * compute W = V % U, then return W
+ */
+mpi_ptr_t
+_gcry_mpih_mod (mpi_ptr_t vp, mpi_size_t vsize,
+                mpi_ptr_t up, mpi_size_t usize)
+{
+  int secure;
+  mpi_ptr_t rp;
+  mpi_size_t i;
+
+  secure = _gcry_is_secure (vp);
+  rp = mpi_alloc_limb_space (usize, secure);
+  MPN_ZERO (rp, usize);
+
+  for (i = 0; i < vsize * BITS_PER_MPI_LIMB; i++)
+    {
+      unsigned int j = vsize * BITS_PER_MPI_LIMB - 1 - i;
+      unsigned int limbno = j / BITS_PER_MPI_LIMB;
+      unsigned int bitno = j % BITS_PER_MPI_LIMB;
+      mpi_limb_t limb = vp[limbno];
+      unsigned int the_bit = (limb >> bitno) & 1;
+      mpi_limb_t underflow;
+      mpi_limb_t overflow;
+
+      overflow = _gcry_mpih_lshift (rp, rp, usize, 1);
+      rp[0] |= the_bit;
+
+      underflow = _gcry_mpih_sub_n (rp, rp, up, usize);
+      mpih_add_n_cond (rp, rp, up, usize, overflow ^ underflow);
+    }
+
+  return rp;
+}
+
+int
+_gcry_mpih_cmp_ui (mpi_ptr_t up, mpi_size_t usize, unsigned long v)
+{
+  int is_all_zero = 1;
+  mpi_size_t i;
+
+  for (i = 1; i < usize; i++)
+    is_all_zero &= mpih_limb_is_zero (up[i]);
+
+  if (is_all_zero)
+    {
+      if (up[0] < v)
+        return -1;
+      else if (up[0] > v)
+        return 1;
+      else
+        return 0;
+    }
+  return 1;
+}
diff --git a/grub-core/lib/libgcrypt/mpi/mpih-div.c 
b/grub-core/lib/libgcrypt/mpi/mpih-div.c
index 0b458fffd..57c1b5848 100644
--- a/grub-core/lib/libgcrypt/mpi/mpih-div.c
+++ b/grub-core/lib/libgcrypt/mpi/mpih-div.c
@@ -48,7 +48,7 @@ _gcry_mpih_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t 
dividend_size,
 {
     mpi_size_t i;
     mpi_limb_t n1, n0, r;
-    int dummy;
+    mpi_limb_t dummy GCC_ATTR_UNUSED;
 
     /* Botch: Should this be handled at all?  Rely on callers? */
     if( !dividend_size )
@@ -212,10 +212,8 @@ _gcry_mpih_divrem( mpi_ptr_t qp, mpi_size_t qextra_limbs,
 
     switch(dsize) {
       case 0:
-       /* We are asked to divide by zero, so go ahead and do it!  (To make
-          the compiler not remove this statement, return the value.)  */
-       grub_fatal ("mpi division by zero");
-       return 0;
+       _gcry_divide_by_zero();
+       break;
 
       case 1:
        {
@@ -398,7 +396,7 @@ _gcry_mpih_divmod_1( mpi_ptr_t quot_ptr,
 {
     mpi_size_t i;
     mpi_limb_t n1, n0, r;
-    int dummy;
+    mpi_limb_t dummy GCC_ATTR_UNUSED;
 
     if( !dividend_size )
        return 0;
diff --git a/grub-core/lib/libgcrypt/mpi/mpih-mul.c 
b/grub-core/lib/libgcrypt/mpi/mpih-mul.c
index b8e056173..8b6f06a30 100644
--- a/grub-core/lib/libgcrypt/mpi/mpih-mul.c
+++ b/grub-core/lib/libgcrypt/mpi/mpih-mul.c
@@ -353,7 +353,7 @@ _gcry_mpih_mul_n( mpi_ptr_t prodp,
            _gcry_mpih_sqr_n_basecase( prodp, up, size );
        else {
            mpi_ptr_t tspace;
-           secure = gcry_is_secure( up );
+           secure = _gcry_is_secure( up );
            tspace = mpi_alloc_limb_space( 2 * size, secure );
            _gcry_mpih_sqr_n( prodp, up, size, tspace );
            _gcry_mpi_free_limb_space (tspace, 2 * size );
@@ -364,7 +364,7 @@ _gcry_mpih_mul_n( mpi_ptr_t prodp,
            mul_n_basecase( prodp, up, vp, size );
        else {
            mpi_ptr_t tspace;
-           secure = gcry_is_secure( up ) || gcry_is_secure( vp );
+           secure = _gcry_is_secure( up ) || _gcry_is_secure( vp );
            tspace = mpi_alloc_limb_space( 2 * size, secure );
            mul_n (prodp, up, vp, size, tspace);
            _gcry_mpi_free_limb_space (tspace, 2 * size );
@@ -386,9 +386,9 @@ _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp,
        if( ctx->tspace )
            _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs );
         ctx->tspace_nlimbs = 2 * vsize;
-       ctx->tspace = mpi_alloc_limb_space( 2 * vsize,
-                                           (gcry_is_secure( up )
-                                            || gcry_is_secure( vp )) );
+       ctx->tspace = mpi_alloc_limb_space (2 * vsize,
+                                           (_gcry_is_secure (up)
+                                             || _gcry_is_secure (vp)));
        ctx->tspace_size = vsize;
     }
 
@@ -402,8 +402,9 @@ _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp,
            if( ctx->tp )
                _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs );
             ctx->tp_nlimbs = 2 * vsize;
-           ctx->tp = mpi_alloc_limb_space( 2 * vsize, gcry_is_secure( up )
-                                                     || gcry_is_secure( vp ) );
+           ctx->tp = mpi_alloc_limb_space (2 * vsize,
+                                            (_gcry_is_secure (up)
+                                             || _gcry_is_secure (vp)));
            ctx->tp_size = vsize;
        }
 
@@ -423,7 +424,7 @@ _gcry_mpih_mul_karatsuba_case( mpi_ptr_t prodp,
        }
        else {
            if( !ctx->next ) {
-               ctx->next = gcry_xcalloc( 1, sizeof *ctx );
+               ctx->next = xcalloc( 1, sizeof *ctx );
            }
            _gcry_mpih_mul_karatsuba_case( ctx->tspace,
                                        vp, vsize,
@@ -452,7 +453,7 @@ _gcry_mpih_release_karatsuba_ctx( struct karatsuba_ctx *ctx 
)
             _gcry_mpi_free_limb_space( ctx->tp, ctx->tp_nlimbs );
        if( ctx->tspace )
            _gcry_mpi_free_limb_space( ctx->tspace, ctx->tspace_nlimbs );
-       gcry_free( ctx );
+       xfree( ctx );
     }
 }
 
diff --git a/grub-core/lib/libgcrypt/mpi/mpiutil.c 
b/grub-core/lib/libgcrypt/mpi/mpiutil.c
index 76630a629..3a372374f 100644
--- a/grub-core/lib/libgcrypt/mpi/mpiutil.c
+++ b/grub-core/lib/libgcrypt/mpi/mpiutil.c
@@ -1,6 +1,7 @@
 /* mpiutil.ac  -  Utility functions for MPI
  * Copyright (C) 1998, 2000, 2001, 2002, 2003,
  *               2007  Free Software Foundation, Inc.
+ * Copyright (C) 2013  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -26,6 +27,25 @@
 #include "g10lib.h"
 #include "mpi-internal.h"
 #include "mod-source-info.h"
+#include "const-time.h"
+
+
+#if SIZEOF_UNSIGNED_INT == 2
+# define MY_UINT_MAX 0xffff
+/* (visual check:      0123 ) */
+#elif SIZEOF_UNSIGNED_INT == 4
+# define MY_UINT_MAX 0xffffffff
+/* (visual check:      01234567 ) */
+#elif SIZEOF_UNSIGNED_INT == 8
+# define MY_UINT_MAX 0xffffffffffffffff
+/* (visual check:      0123456789abcdef ) */
+#else
+# error Need MY_UINT_MAX for this limb size
+#endif
+
+
+/* Constants allocated right away at startup.  */
+static gcry_mpi_t constants[MPI_NUMBER_OF_CONSTANTS];
 
 
 const char *
@@ -35,6 +55,34 @@ _gcry_mpi_get_hw_config (void)
 }
 
 
+/* Initialize the MPI subsystem.  This is called early and allows to
+   do some initialization without taking care of threading issues.  */
+gcry_err_code_t
+_gcry_mpi_init (void)
+{
+  int idx;
+  unsigned long value;
+
+  for (idx=0; idx < MPI_NUMBER_OF_CONSTANTS; idx++)
+    {
+      switch (idx)
+        {
+        case MPI_C_ZERO:  value = 0; break;
+        case MPI_C_ONE:   value = 1; break;
+        case MPI_C_TWO:   value = 2; break;
+        case MPI_C_THREE: value = 3; break;
+        case MPI_C_FOUR:  value = 4; break;
+        case MPI_C_EIGHT: value = 8; break;
+        default: log_bug ("invalid mpi_const selector %d\n", idx);
+        }
+      constants[idx] = mpi_alloc_set_ui (value);
+      constants[idx]->flags = (16|32);
+    }
+
+  return 0;
+}
+
+
 /****************
  * Note:  It was a bad idea to use the number of limbs to allocate
  *       because on a alpha the limbs are large but we normally need
@@ -48,7 +96,7 @@ _gcry_mpi_alloc( unsigned nlimbs )
 {
     gcry_mpi_t a;
 
-    a = gcry_xmalloc( sizeof *a );
+    a = xmalloc( sizeof *a );
     a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 0 ) : NULL;
     a->alloced = nlimbs;
     a->nlimbs = 0;
@@ -69,7 +117,7 @@ _gcry_mpi_alloc_secure( unsigned nlimbs )
 {
     gcry_mpi_t a;
 
-    a = gcry_xmalloc( sizeof *a );
+    a = xmalloc( sizeof *a );
     a->d = nlimbs? mpi_alloc_limb_space( nlimbs, 1 ) : NULL;
     a->alloced = nlimbs;
     a->flags = 1;
@@ -87,7 +135,7 @@ _gcry_mpi_alloc_limb_space( unsigned int nlimbs, int secure )
     size_t len;
 
     len = (nlimbs ? nlimbs : 1) * sizeof (mpi_limb_t);
-    p = secure ? gcry_xmalloc_secure (len) : gcry_xmalloc (len);
+    p = secure ? xmalloc_secure (len) : xmalloc (len);
     if (! nlimbs)
       *p = 0;
 
@@ -107,7 +155,7 @@ _gcry_mpi_free_limb_space( mpi_ptr_t a, unsigned int nlimbs)
          implemented in user provided allocation functions. */
       if (len)
         wipememory (a, len);
-      gcry_free(a);
+      xfree(a);
     }
 }
 
@@ -143,18 +191,18 @@ _gcry_mpi_resize (gcry_mpi_t a, unsigned nlimbs)
   /* Actually resize the limb space.  */
   if (a->d)
     {
-      a->d = gcry_xrealloc (a->d, nlimbs * sizeof (mpi_limb_t));
-      for (i=a->alloced; i < nlimbs; i++)
+      a->d = xrealloc (a->d, nlimbs * sizeof (mpi_limb_t));
+      for (i=a->nlimbs; i < nlimbs; i++)
         a->d[i] = 0;
     }
   else
     {
       if (a->flags & 1)
        /* Secure memory is wanted.  */
-       a->d = gcry_xcalloc_secure (nlimbs , sizeof (mpi_limb_t));
+       a->d = xcalloc_secure (nlimbs , sizeof (mpi_limb_t));
       else
        /* Standard memory.  */
-       a->d = gcry_xcalloc (nlimbs , sizeof (mpi_limb_t));
+       a->d = xcalloc (nlimbs , sizeof (mpi_limb_t));
     }
   a->alloced = nlimbs;
 }
@@ -162,8 +210,13 @@ _gcry_mpi_resize (gcry_mpi_t a, unsigned nlimbs)
 void
 _gcry_mpi_clear( gcry_mpi_t a )
 {
-    a->nlimbs = 0;
-    a->flags = 0;
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
+  a->nlimbs = 0;
+  a->flags = 0;
 }
 
 
@@ -172,17 +225,38 @@ _gcry_mpi_free( gcry_mpi_t a )
 {
   if (!a )
     return;
+  if ((a->flags & 32))
+  {
+#if GPGRT_VERSION_NUMBER >= 0x011600  /* 1.22 */
+    gpgrt_annotate_leaked_object(a);
+#endif
+    return; /* Never release a constant. */
+  }
   if ((a->flags & 4))
-    gcry_free( a->d );
+    xfree( a->d );
   else
     {
       _gcry_mpi_free_limb_space(a->d, a->alloced);
     }
-  if ((a->flags & ~7))
-    log_bug("invalid flag value in mpi\n");
-  gcry_free(a);
+  /* Check that the flags makes sense.  We better allow for bit 1
+     (value 2) for backward ABI compatibility.  */
+  if ((a->flags & ~(1|2|4|16
+                    |GCRYMPI_FLAG_USER1
+                    |GCRYMPI_FLAG_USER2
+                    |GCRYMPI_FLAG_USER3
+                    |GCRYMPI_FLAG_USER4)))
+    log_bug("invalid flag value in mpi_free\n");
+  xfree (a);
+}
+
+
+void
+_gcry_mpi_immutable_failed (void)
+{
+  log_info ("Warning: trying to change an immutable MPI\n");
 }
 
+
 static void
 mpi_set_secure( gcry_mpi_t a )
 {
@@ -197,7 +271,7 @@ mpi_set_secure( gcry_mpi_t a )
       gcry_assert (!ap);
       return;
     }
-  bp = mpi_alloc_limb_space (a->nlimbs, 1);
+  bp = mpi_alloc_limb_space (a->alloced, 1);
   MPN_COPY( bp, ap, a->nlimbs );
   a->d = bp;
   _gcry_mpi_free_limb_space (ap, a->alloced);
@@ -205,13 +279,19 @@ mpi_set_secure( gcry_mpi_t a )
 
 
 gcry_mpi_t
-gcry_mpi_set_opaque( gcry_mpi_t a, void *p, unsigned int nbits )
+_gcry_mpi_set_opaque (gcry_mpi_t a, void *p, unsigned int nbits)
 {
   if (!a)
     a = mpi_alloc(0);
 
+  if (mpi_is_immutable (a))
+    {
+      mpi_immutable_failed ();
+      return a;
+    }
+
   if( a->flags & 4 )
-    gcry_free( a->d );
+    xfree (a->d);
   else
     _gcry_mpi_free_limb_space (a->d, a->alloced);
 
@@ -219,13 +299,31 @@ gcry_mpi_set_opaque( gcry_mpi_t a, void *p, unsigned int 
nbits )
   a->alloced = 0;
   a->nlimbs = 0;
   a->sign  = nbits;
-  a->flags = 4;
+  a->flags = 4 | (a->flags & (GCRYMPI_FLAG_USER1|GCRYMPI_FLAG_USER2
+                              |GCRYMPI_FLAG_USER3|GCRYMPI_FLAG_USER4));
+  if (_gcry_is_secure (a->d))
+    a->flags |= 1;
   return a;
 }
 
 
+gcry_mpi_t
+_gcry_mpi_set_opaque_copy (gcry_mpi_t a, const void *p, unsigned int nbits)
+{
+  void *d;
+  unsigned int n;
+
+  n = (nbits+7)/8;
+  d = _gcry_is_secure (p)? xtrymalloc_secure (n) : xtrymalloc (n);
+  if (!d)
+    return NULL;
+  memcpy (d, p, n);
+  return mpi_set_opaque (a, d, nbits);
+}
+
+
 void *
-gcry_mpi_get_opaque( gcry_mpi_t a, unsigned int *nbits )
+_gcry_mpi_get_opaque (gcry_mpi_t a, unsigned int *nbits)
 {
     if( !(a->flags & 4) )
        log_bug("mpi_get_opaque on normal mpi\n");
@@ -235,21 +333,44 @@ gcry_mpi_get_opaque( gcry_mpi_t a, unsigned int *nbits )
 }
 
 
+void *
+_gcry_mpi_get_opaque_copy (gcry_mpi_t a, unsigned int *nbits)
+{
+  const void *s;
+  void *d;
+  unsigned int n;
+
+  s = mpi_get_opaque (a, nbits);
+  if (!s && nbits)
+    return NULL;
+  n = (*nbits+7)/8;
+  d = _gcry_is_secure (s)? xtrymalloc_secure (n) : xtrymalloc (n);
+  if (d)
+    memcpy (d, s, n);
+  return d;
+}
+
 /****************
  * Note: This copy function should not interpret the MPI
  *      but copy it transparently.
  */
 gcry_mpi_t
-gcry_mpi_copy( gcry_mpi_t a )
+_gcry_mpi_copy (gcry_mpi_t a)
 {
     int i;
     gcry_mpi_t b;
 
     if( a && (a->flags & 4) ) {
-       void *p = gcry_is_secure(a->d)? gcry_xmalloc_secure( (a->sign+7)/8 )
-                                    : gcry_xmalloc( (a->sign+7)/8 );
-       memcpy( p, a->d, (a->sign+7)/8 );
-       b = gcry_mpi_set_opaque( NULL, p, a->sign );
+        void *p = NULL;
+        if (a->sign) {
+            p = _gcry_is_secure(a->d)? xmalloc_secure ((a->sign+7)/8)
+                                     : xmalloc ((a->sign+7)/8);
+            if (a->d)
+                memcpy( p, a->d, (a->sign+7)/8 );
+        }
+        b = mpi_set_opaque( NULL, p, a->sign );
+        b->flags = a->flags;
+        b->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
     }
     else if( a ) {
        b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs )
@@ -257,6 +378,7 @@ gcry_mpi_copy( gcry_mpi_t a )
        b->nlimbs = a->nlimbs;
        b->sign = a->sign;
        b->flags  = a->flags;
+        b->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
        for(i=0; i < b->nlimbs; i++ )
            b->d[i] = a->d[i];
     }
@@ -266,6 +388,47 @@ gcry_mpi_copy( gcry_mpi_t a )
 }
 
 
+/* Return true if A is negative.  */
+int
+_gcry_mpi_is_neg (gcry_mpi_t a)
+{
+  if (a->sign && _gcry_mpi_cmp_ui (a, 0))
+    return 1;
+  else
+    return 0;
+}
+
+
+/* W = - U */
+void
+_gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u)
+{
+  if (w != u)
+    mpi_set (w, u);
+  else if (mpi_is_immutable (w))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
+
+  w->sign = !u->sign;
+}
+
+
+/* W = [W] */
+void
+_gcry_mpi_abs (gcry_mpi_t w)
+{
+  if (mpi_is_immutable (w))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
+
+  w->sign = 0;
+}
+
+
 /****************
  * This function allocates an MPI which is optimized to hold
  * a value as large as the one given in the argument and allocates it
@@ -278,10 +441,10 @@ _gcry_mpi_alloc_like( gcry_mpi_t a )
 
     if( a && (a->flags & 4) ) {
        int n = (a->sign+7)/8;
-       void *p = gcry_is_secure(a->d)? gcry_malloc_secure( n )
-                                    : gcry_malloc( n );
+       void *p = _gcry_is_secure(a->d)? xtrymalloc_secure (n)
+                                       : xtrymalloc (n);
        memcpy( p, a->d, n );
-       b = gcry_mpi_set_opaque( NULL, p, a->sign );
+       b = mpi_set_opaque( NULL, p, a->sign );
     }
     else if( a ) {
        b = mpi_is_secure(a)? mpi_alloc_secure( a->nlimbs )
@@ -296,8 +459,31 @@ _gcry_mpi_alloc_like( gcry_mpi_t a )
 }
 
 
+/* Set U into W and release U.  If W is NULL only U will be released. */
+void
+_gcry_mpi_snatch (gcry_mpi_t w, gcry_mpi_t u)
+{
+  if (w)
+    {
+      if (mpi_is_immutable (w))
+        {
+          mpi_immutable_failed ();
+          return;
+        }
+      _gcry_mpi_assign_limb_space (w, u->d, u->alloced);
+      w->nlimbs = u->nlimbs;
+      w->sign   = u->sign;
+      w->flags  = u->flags;
+      u->alloced = 0;
+      u->nlimbs = 0;
+      u->d = NULL;
+    }
+  _gcry_mpi_free (u);
+}
+
+
 gcry_mpi_t
-gcry_mpi_set( gcry_mpi_t w, gcry_mpi_t u)
+_gcry_mpi_set (gcry_mpi_t w, gcry_mpi_t u)
 {
   mpi_ptr_t wp, up;
   mpi_size_t usize = u->nlimbs;
@@ -305,24 +491,73 @@ gcry_mpi_set( gcry_mpi_t w, gcry_mpi_t u)
 
   if (!w)
     w = _gcry_mpi_alloc( mpi_get_nlimbs(u) );
+  if (mpi_is_immutable (w))
+    {
+      mpi_immutable_failed ();
+      return w;
+    }
   RESIZE_IF_NEEDED(w, usize);
   wp = w->d;
   up = u->d;
   MPN_COPY( wp, up, usize );
   w->nlimbs = usize;
   w->flags = u->flags;
+  w->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
   w->sign = usign;
   return w;
 }
 
+/****************
+ * Set the value of W by the one of U, when SET is 1.
+ * Leave the value when SET is 0.
+ * This implementation should be constant-time regardless of SET.
+ */
+gcry_mpi_t
+_gcry_mpi_set_cond (gcry_mpi_t w, const gcry_mpi_t u, unsigned long set)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  mpi_limb_t mask1 = ct_limb_gen_mask(set);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(set);
+  mpi_size_t i;
+  mpi_size_t nlimbs = u->alloced;
+  mpi_limb_t xu;
+  mpi_limb_t xw;
+  mpi_limb_t *uu = u->d;
+  mpi_limb_t *uw = w->d;
+
+  if (w->alloced != u->alloced)
+    log_bug ("mpi_set_cond: different sizes\n");
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      xu = uu[i];
+      xw = uw[i];
+      uw[i] = (xw & mask2) | (xu & mask1);
+    }
+
+  xu = u->nlimbs;
+  xw = w->nlimbs;
+  w->nlimbs = (xw & mask2) | (xu & mask1);
+
+  xu = u->sign;
+  xw = w->sign;
+  w->sign = (xw & mask2) | (xu & mask1);
+  return w;
+}
+
 
 gcry_mpi_t
-gcry_mpi_set_ui( gcry_mpi_t w, unsigned long u)
+_gcry_mpi_set_ui (gcry_mpi_t w, unsigned long u)
 {
   if (!w)
     w = _gcry_mpi_alloc (1);
   /* FIXME: If U is 0 we have no need to resize and thus possible
      allocating the the limbs. */
+  if (mpi_is_immutable (w))
+    {
+      mpi_immutable_failed ();
+      return w;
+    }
   RESIZE_IF_NEEDED(w, 1);
   w->d[0] = u;
   w->nlimbs = u? 1:0;
@@ -331,34 +566,29 @@ gcry_mpi_set_ui( gcry_mpi_t w, unsigned long u)
   return w;
 }
 
+/* If U is non-negative and small enough store it as an unsigned int
+ * at W.  If the value does not fit into an unsigned int or is
+ * negative return GPG_ERR_ERANGE.  Note that we return an unsigned
+ * int so that the value can be used with the bit test functions; in
+ * contrast the other _ui functions take an unsigned long so that on
+ * some platforms they may accept a larger value.  On error the value
+ * at W is not changed. */
 gcry_err_code_t
-_gcry_mpi_get_ui (gcry_mpi_t w, unsigned long *u)
+_gcry_mpi_get_ui (unsigned int *w, gcry_mpi_t u)
 {
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  unsigned long x = 0;
+  mpi_limb_t x;
 
-  if (w->nlimbs > 1)
-    err = GPG_ERR_TOO_LARGE;
-  else if (w->nlimbs == 1)
-    x = w->d[0];
-  else
-    x = 0;
+  if (u->nlimbs > 1 || u->sign)
+    return GPG_ERR_ERANGE;
 
-  if (! err)
-    *u = x;
+  x = (u->nlimbs == 1) ? u->d[0] : 0;
+  if (sizeof (x) > sizeof (unsigned int) && x > MY_UINT_MAX)
+    return GPG_ERR_ERANGE;
 
-  return err;
+  *w = x;
+  return 0;
 }
 
-gcry_error_t
-gcry_mpi_get_ui (gcry_mpi_t w, unsigned long *u)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-
-  err = _gcry_mpi_get_ui (w, u);
-
-  return gcry_error (err);
-}
 
 gcry_mpi_t
 _gcry_mpi_alloc_set_ui( unsigned long u)
@@ -371,7 +601,7 @@ _gcry_mpi_alloc_set_ui( unsigned long u)
 }
 
 void
-gcry_mpi_swap( gcry_mpi_t a, gcry_mpi_t b)
+_gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b)
 {
     struct gcry_mpi tmp;
 
@@ -379,8 +609,70 @@ gcry_mpi_swap( gcry_mpi_t a, gcry_mpi_t b)
 }
 
 
+/****************
+ * Swap the value of A and B, when SWAP is 1.
+ * Leave the value when SWAP is 0.
+ * This implementation should be constant-time regardless of SWAP.
+ */
+void
+_gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  mpi_limb_t mask1 = ct_limb_gen_mask(swap);
+  mpi_limb_t mask2 = ct_limb_gen_inv_mask(swap);
+  mpi_size_t i;
+  mpi_size_t nlimbs;
+  mpi_limb_t *ua = a->d;
+  mpi_limb_t *ub = b->d;
+  mpi_limb_t xa;
+  mpi_limb_t xb;
+
+  if (a->alloced > b->alloced)
+    nlimbs = b->alloced;
+  else
+    nlimbs = a->alloced;
+  if (a->nlimbs > nlimbs || b->nlimbs > nlimbs)
+    log_bug ("mpi_swap_cond: different sizes\n");
+
+  for (i = 0; i < nlimbs; i++)
+    {
+      xa = ua[i];
+      xb = ub[i];
+      ua[i] = (xa & mask2) | (xb & mask1);
+      ub[i] = (xa & mask1) | (xb & mask2);
+    }
+
+  xa = a->nlimbs;
+  xb = b->nlimbs;
+  a->nlimbs = (xa & mask2) | (xb & mask1);
+  b->nlimbs = (xa & mask1) | (xb & mask2);
+
+  xa = a->sign;
+  xb = b->sign;
+  a->sign = (xa & mask2) | (xb & mask1);
+  b->sign = (xa & mask1) | (xb & mask2);
+}
+
+
+/****************
+ * Set bit N of A, when SET is 1.
+ * This implementation should be constant-time regardless of SET.
+ */
+void
+_gcry_mpi_set_bit_cond (gcry_mpi_t a, unsigned int n, unsigned long set)
+{
+  unsigned int limbno, bitno;
+  mpi_limb_t set_the_bit = !!set;
+
+  limbno = n / BITS_PER_MPI_LIMB;
+  bitno  = n % BITS_PER_MPI_LIMB;
+
+  a->d[limbno] |= (set_the_bit<<bitno);
+}
+
+
 gcry_mpi_t
-gcry_mpi_new( unsigned int nbits )
+_gcry_mpi_new (unsigned int nbits)
 {
     return _gcry_mpi_alloc ( (nbits+BITS_PER_MPI_LIMB-1)
                              / BITS_PER_MPI_LIMB );
@@ -388,58 +680,85 @@ gcry_mpi_new( unsigned int nbits )
 
 
 gcry_mpi_t
-gcry_mpi_snew( unsigned int nbits )
+_gcry_mpi_snew (unsigned int nbits)
 {
   return _gcry_mpi_alloc_secure ( (nbits+BITS_PER_MPI_LIMB-1)
                                   / BITS_PER_MPI_LIMB );
 }
 
 void
-gcry_mpi_release( gcry_mpi_t a )
+_gcry_mpi_release( gcry_mpi_t a )
 {
     _gcry_mpi_free( a );
 }
 
 void
-gcry_mpi_randomize( gcry_mpi_t w,
-                   unsigned int nbits, enum gcry_random_level level )
+_gcry_mpi_randomize (gcry_mpi_t w,
+                     unsigned int nbits, enum gcry_random_level level)
 {
   unsigned char *p;
   size_t nbytes = (nbits+7)/8;
 
+  if (mpi_is_immutable (w))
+    {
+      mpi_immutable_failed ();
+      return;
+    }
   if (level == GCRY_WEAK_RANDOM)
     {
-      p = mpi_is_secure(w) ? gcry_xmalloc_secure (nbytes)
-                           : gcry_xmalloc (nbytes);
-      gcry_create_nonce (p, nbytes);
+      p = mpi_is_secure(w) ? xmalloc_secure (nbytes)
+                           : xmalloc (nbytes);
+      _gcry_create_nonce (p, nbytes);
     }
   else
     {
-      p = mpi_is_secure(w) ? gcry_random_bytes_secure (nbytes, level)
-                           : gcry_random_bytes (nbytes, level);
+      p = mpi_is_secure(w) ? _gcry_random_bytes_secure (nbytes, level)
+                           : _gcry_random_bytes (nbytes, level);
     }
   _gcry_mpi_set_buffer( w, p, nbytes, 0 );
-  gcry_free (p);
+  xfree (p);
 }
 
 
 void
-gcry_mpi_set_flag( gcry_mpi_t a, enum gcry_mpi_flag flag )
+_gcry_mpi_set_flag (gcry_mpi_t a, enum gcry_mpi_flag flag)
 {
-    switch( flag ) {
-      case GCRYMPI_FLAG_SECURE:  mpi_set_secure(a); break;
-      case GCRYMPI_FLAG_OPAQUE:
-      default: log_bug("invalid flag value\n");
+  switch (flag)
+    {
+    case GCRYMPI_FLAG_SECURE:     mpi_set_secure(a); break;
+    case GCRYMPI_FLAG_CONST:      a->flags |= (16|32); break;
+    case GCRYMPI_FLAG_IMMUTABLE:  a->flags |= 16; break;
+
+    case GCRYMPI_FLAG_USER1:
+    case GCRYMPI_FLAG_USER2:
+    case GCRYMPI_FLAG_USER3:
+    case GCRYMPI_FLAG_USER4:      a->flags |= flag; break;
+
+    case GCRYMPI_FLAG_OPAQUE:
+    default: log_bug("invalid flag value\n");
     }
 }
 
 void
-gcry_mpi_clear_flag( gcry_mpi_t a, enum gcry_mpi_flag flag )
+_gcry_mpi_clear_flag (gcry_mpi_t a, enum gcry_mpi_flag flag)
 {
   (void)a; /* Not yet used. */
 
   switch (flag)
     {
+    case GCRYMPI_FLAG_IMMUTABLE:
+      if (!(a->flags & 32))
+        a->flags &= ~16;
+      break;
+
+    case GCRYMPI_FLAG_USER1:
+    case GCRYMPI_FLAG_USER2:
+    case GCRYMPI_FLAG_USER3:
+    case GCRYMPI_FLAG_USER4:
+      a->flags &= ~flag;
+      break;
+
+    case GCRYMPI_FLAG_CONST:
     case GCRYMPI_FLAG_SECURE:
     case GCRYMPI_FLAG_OPAQUE:
     default: log_bug("invalid flag value\n");
@@ -447,14 +766,34 @@ gcry_mpi_clear_flag( gcry_mpi_t a, enum gcry_mpi_flag 
flag )
 }
 
 int
-gcry_mpi_get_flag( gcry_mpi_t a, enum gcry_mpi_flag flag )
+_gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag flag)
 {
   switch (flag)
     {
-    case GCRYMPI_FLAG_SECURE: return (a->flags & 1);
-    case GCRYMPI_FLAG_OPAQUE: return (a->flags & 4);
+    case GCRYMPI_FLAG_SECURE:    return !!(a->flags & 1);
+    case GCRYMPI_FLAG_OPAQUE:    return !!(a->flags & 4);
+    case GCRYMPI_FLAG_IMMUTABLE: return !!(a->flags & 16);
+    case GCRYMPI_FLAG_CONST:     return !!(a->flags & 32);
+    case GCRYMPI_FLAG_USER1:
+    case GCRYMPI_FLAG_USER2:
+    case GCRYMPI_FLAG_USER3:
+    case GCRYMPI_FLAG_USER4:     return !!(a->flags & flag);
     default: log_bug("invalid flag value\n");
     }
   /*NOTREACHED*/
   return 0;
 }
+
+
+/* Return a constant MPI descripbed by NO which is one of the
+   MPI_C_xxx macros.  There is no need to copy this returned value; it
+   may be used directly.  */
+gcry_mpi_t
+_gcry_mpi_const (enum gcry_mpi_constants no)
+{
+  if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS)
+    log_bug("invalid mpi_const selector %d\n", no);
+  if (!constants[no])
+    log_bug("MPI subsystem not initialized\n");
+  return constants[no];
+}
diff --git a/grub-core/lib/libgcrypt/mpi/pa7100/Manifest 
b/grub-core/lib/libgcrypt/mpi/pa7100/Manifest
deleted file mode 100644
index f075ab056..000000000
--- a/grub-core/lib/libgcrypt/mpi/pa7100/Manifest
+++ /dev/null
@@ -1,22 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-lshift.S
-mpih-rshift.S
-$names$ 
iQCVAwUAP+LmVjEAnp832S/7AQKlEQQAv2+x/d+Z0t8FwwHlxKpIKOJDr9e+Y2i8y8orcIEa3dnwU5LMOH3EzFoNSD9crc31FMokgm/X5xeLjqRTdcmGHyJJQJDPJVJyuaOm6qHJaFzzfJjrfMW66nJxfNSXIiIm4DgpP20NmumaorLCkiIZ5Z81KGAc8FiRggbRVYx+wxo==Vjh9
diff --git a/grub-core/lib/libgcrypt/mpi/pa7100/distfiles 
b/grub-core/lib/libgcrypt/mpi/pa7100/distfiles
index e1cde4d57..fece94310 100644
--- a/grub-core/lib/libgcrypt/mpi/pa7100/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/pa7100/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-lshift.S
 mpih-rshift.S
 
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/README 
b/grub-core/lib/libgcrypt/mpi/pentium4/README
deleted file mode 100644
index 215fc7f8b..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/README
+++ /dev/null
@@ -1,115 +0,0 @@
-Copyright 2001 Free Software Foundation, Inc.
-
-This file is part of the GNU MP Library.
-
-The GNU MP Library is free software; you can redistribute it and/or modify
-it under the terms of the GNU Lesser General Public License as published by
-the Free Software Foundation; either version 2.1 of the License, or (at your
-option) any later version.
-
-The GNU MP Library is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
-License for more details.
-
-You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-02110-1301, USA.
-
-
-
-
-                   INTEL PENTIUM-4 MPN SUBROUTINES
-
-
-This directory contains mpn functions optimized for Intel Pentium-4.
-
-The mmx subdirectory has routines using MMX instructions, the sse2
-subdirectory has routines using SSE2 instructions.  All P4s have these, the
-separate directories are just so configure can omit that code if the
-assembler doesn't support it.
-
-
-STATUS
-
-                                cycles/limb
-
-       mpn_add_n/sub_n            4 normal, 6 in-place
-
-       mpn_mul_1                  4 normal, 6 in-place
-       mpn_addmul_1               6
-       mpn_submul_1               7
-
-       mpn_mul_basecase           6 cycles/crossproduct (approx)
-
-       mpn_sqr_basecase           3.5 cycles/crossproduct (approx)
-                                   or 7.0 cycles/triangleproduct (approx)
-
-       mpn_l/rshift               1.75
-
-
-
-The shifts ought to be able to go at 1.5 c/l, but not much effort has been
-applied to them yet.
-
-In-place operations, and all addmul, submul, mul_basecase and sqr_basecase
-calls, suffer from pipeline anomalies associated with write combining and
-movd reads and writes to the same or nearby locations.  The movq
-instructions do not trigger the same hardware problems.  Unfortunately,
-using movq and splitting/combining seems to require too many extra
-instructions to help.  Perhaps future chip steppings will be better.
-
-
-
-NOTES
-
-The Pentium-4 pipeline "Netburst", provides for quite a number of surprises.
-Many traditional x86 instructions run very slowly, requiring use of
-alterative instructions for acceptable performance.
-
-adcl and sbbl are quite slow at 8 cycles for reg->reg.  paddq of 32-bits
-within a 64-bit mmx register seems better, though the combination
-paddq/psrlq when propagating a carry is still a 4 cycle latency.
-
-incl and decl should be avoided, instead use add $1 and sub $1.  Apparently
-the carry flag is not separately renamed, so incl and decl depend on all
-previous flags-setting instructions.
-
-shll and shrl have a 4 cycle latency, or 8 times the latency of the fastest
-integer instructions (addl, subl, orl, andl, and some more).  shldl and
-shrdl seem to have 13 and 15 cycles latency, respectively.  Bizarre.
-
-movq mmx -> mmx does have 6 cycle latency, as noted in the documentation.
-pxor/por or similar combination at 2 cycles latency can be used instead.
-The movq however executes in the float unit, thereby saving MMX execution
-resources.  With the right juggling, data moves shouldn't be on a dependent
-chain.
-
-L1 is write-through, but the write-combining sounds like it does enough to
-not require explicit destination prefetching.
-
-xmm registers so far haven't found a use, but not much effort has been
-expended.  A configure test for whether the operating system knows
-fxsave/fxrestor will be needed if they're used.
-
-
-
-REFERENCES
-
-Intel Pentium-4 processor manuals,
-
-       http://developer.intel.com/design/pentium4/manuals
-
-"Intel Pentium 4 Processor Optimization Reference Manual", Intel, 2001,
-order number 248966.  Available on-line:
-
-       http://developer.intel.com/design/pentium4/manuals/248966.htm
-
-
-
-----------------
-Local variables:
-mode: text
-fill-column: 76
-End:
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/distfiles 
b/grub-core/lib/libgcrypt/mpi/pentium4/distfiles
deleted file mode 100644
index b419f85a9..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/distfiles
+++ /dev/null
@@ -1,3 +0,0 @@
-README
-
-
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/mmx/distfiles 
b/grub-core/lib/libgcrypt/mpi/pentium4/mmx/distfiles
deleted file mode 100644
index 8f0ea426d..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/mmx/distfiles
+++ /dev/null
@@ -1,2 +0,0 @@
-mpih-lshift.S
-mpih-rshift.S
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-lshift.S 
b/grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-lshift.S
deleted file mode 100644
index e2dd184ba..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-lshift.S
+++ /dev/null
@@ -1,457 +0,0 @@
-/* Intel Pentium-4 mpn_lshift -- left shift.
- *
- * Copyright 2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_lshift( mpi_ptr_t wp,    (sp + 4)
- *                mpi_ptr_t up,        (sp + 8)
- *                mpi_size_t usize,    (sp + 12)
- *                unsigned cnt)        (sp + 16)
- *
- * P4 Willamette, Northwood: 1.75 cycles/limb
- * P4 Prescott:                     2.0 cycles/limb
- */
-
-.text
-       ALIGN (3)
-       .globl C_SYMBOL_NAME(_gcry_mpih_lshift)
-C_SYMBOL_NAME(_gcry_mpih_lshift:)
-
-       
-       pushl   %ebx
-       pushl   %edi
-
-
-       movl    20(%esp), %eax
-       movl    12(%esp), %edx
-
-       movl    16(%esp), %ebx
-       movl    24(%esp), %ecx
-
-       cmp     $5, %eax
-       jae     .Lunroll
-
-       movl    -4(%ebx,%eax,4), %edi   
-       decl    %eax
-
-       jnz     .Lsimple
-
-       shldl   %cl, %edi, %eax 
-
-       shll    %cl, %edi
-
-       movl    %edi, (%edx)            
-       popl    %edi                    
-
-       popl    %ebx
-
-       ret
-
-
-
-
-
-.Lsimple:
-       
-       
-       
-       
-       
-       
-       
-
-
-       movd    (%ebx,%eax,4), %mm5     
-
-       movd    %ecx, %mm6              
-       negl    %ecx
-
-       psllq   %mm6, %mm5
-       addl    $32, %ecx
-
-       movd    %ecx, %mm7
-       psrlq   $32, %mm5               
-
-
-.Lsimple_top:
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    -4(%ebx,%eax,4), %mm0
-       decl    %eax
-
-       psrlq   %mm7, %mm0
-
-       
-
-       movd    %mm0, 4(%edx,%eax,4)
-       jnz     .Lsimple_top
-
-
-       movd    (%ebx), %mm0
-
-       movd    %mm5, %eax
-       psllq   %mm6, %mm0
-
-       popl    %edi
-       popl    %ebx
-
-       movd    %mm0, (%edx)
-
-       emms
-
-       ret
-
-
-
-
-
-       .align  8, 0x90
-.Lunroll:
-       
-       
-       
-       
-       
-       
-       
-
-
-       movd    -4(%ebx,%eax,4), %mm5   
-       leal    (%ebx,%eax,4), %edi
-
-       movd    %ecx, %mm6              
-       andl    $4, %edi
-
-       psllq   %mm6, %mm5
-       jz      .Lstart_src_aligned
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    -8(%ebx,%eax,4), %mm0   
-
-       psllq   %mm6, %mm0
-       decl    %eax
-
-       psrlq   $32, %mm0
-
-       
-
-       movd    %mm0, (%edx,%eax,4)
-.Lstart_src_aligned:
-
-       movq    -8(%ebx,%eax,4), %mm1   
-       leal    (%edx,%eax,4), %edi
-
-       andl    $4, %edi
-       psrlq   $32, %mm5               
-
-       movq    -16(%ebx,%eax,4), %mm3  
-       jz      .Lstart_dst_aligned
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    %mm1, %mm0
-       addl    $32, %ecx               
-
-       psllq   %mm6, %mm0
-
-       movd    %ecx, %mm6
-       psrlq   $32, %mm0
-
-       
-
-       movd    %mm0, -4(%edx,%eax,4)
-       subl    $4, %edx
-.Lstart_dst_aligned:
-
-
-       psllq   %mm6, %mm1
-       negl    %ecx                    
-
-       addl    $64, %ecx               
-       movq    %mm3, %mm2
-
-       movd    %ecx, %mm7
-       subl    $8, %eax                
-
-       psrlq   %mm7, %mm3
-
-       por     %mm1, %mm3              
-       jc      .Lfinish
-
-
-       
-
-       .align  8, 0x90
-.Lunroll_loop:
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    8(%ebx,%eax,4), %mm0
-       psllq   %mm6, %mm2
-
-       movq    %mm0, %mm1
-       psrlq   %mm7, %mm0
-
-       movq    %mm3, 24(%edx,%eax,4)   
-       por     %mm2, %mm0
-
-       movq    (%ebx,%eax,4), %mm3     
-       psllq   %mm6, %mm1              
-
-       movq    %mm0, 16(%edx,%eax,4)
-       movq    %mm3, %mm2              
-
-       psrlq   %mm7, %mm3              
-       subl    $4, %eax
-
-       por     %mm1, %mm3              
-       jnc     .Lunroll_loop
-
-
-
-.Lfinish:
-       
-
-       testb   $2, %al
-
-       jz      .Lfinish_no_two
-
-       movq    8(%ebx,%eax,4), %mm0
-       psllq   %mm6, %mm2
-
-       movq    %mm0, %mm1
-       psrlq   %mm7, %mm0
-
-       movq    %mm3, 24(%edx,%eax,4)   
-       por     %mm2, %mm0
-
-       movq    %mm1, %mm2
-       movq    %mm0, %mm3
-
-       subl    $2, %eax
-.Lfinish_no_two:
-
-
-       
-       
-       
-       
-
-       testb   $1, %al
-       movd    %mm5, %eax      
-
-       popl    %edi
-       jz      .Lfinish_zero
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-       
-       
-
-
-       movd    (%ebx), %mm0
-       psllq   %mm6, %mm2
-
-       movq    %mm3, 12(%edx)
-       psllq   $32, %mm0
-
-       movq    %mm0, %mm1
-       psrlq   %mm7, %mm0
-
-       por     %mm2, %mm0
-       psllq   %mm6, %mm1
-
-       movq    %mm0, 4(%edx)
-       psrlq   $32, %mm1
-
-       andl    $32, %ecx
-       popl    %ebx
-
-       jz      .Lfinish_one_unaligned
-
-       movd    %mm1, (%edx)
-.Lfinish_one_unaligned:
-
-       emms
-
-       ret
-
-
-
-
-.Lfinish_zero:
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-
-
-       movq    %mm3, 8(%edx)
-       andl    $32, %ecx
-
-       psllq   %mm6, %mm2
-       jz      .Lfinish_zero_unaligned
-
-       movq    %mm2, (%edx)
-.Lfinish_zero_unaligned:
-
-       psrlq   $32, %mm2
-       popl    %ebx
-
-       movd    %mm5, %eax      
-
-       movd    %mm2, 4(%edx)
-
-       emms
-
-       ret
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-rshift.S 
b/grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-rshift.S
deleted file mode 100644
index e3374e3ba..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/mmx/mpih-rshift.S
+++ /dev/null
@@ -1,453 +0,0 @@
-/* Intel Pentium-4 mpn_rshift -- right shift.
- *
- * Copyright 2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_rshift( mpi_ptr_t wp,    (sp + 4)
- *                mpi_ptr_t up,        (sp + 8)
- *                mpi_size_t usize,    (sp + 12)
- *                unsigned cnt)        (sp + 16)
- *
- * P4 Willamette, Northwood: 1.75 cycles/limb
- * P4 Prescott:                     2.0 cycles/limb
- */
-
-.text
-       ALIGN (3)
-       .globl C_SYMBOL_NAME(_gcry_mpih_rshift)
-C_SYMBOL_NAME(_gcry_mpih_rshift:)
-       pushl   %ebx
-       pushl   %edi
-
-
-       movl    20(%esp), %eax
-       movl    12(%esp), %edx
-
-       movl    16(%esp), %ebx
-       movl    24(%esp), %ecx
-
-       cmp     $5, %eax
-       jae     .Lunroll
-
-       decl    %eax
-       movl    (%ebx), %edi            
-
-       jnz     .Lsimple
-
-       shrdl   %cl, %edi, %eax 
-
-       shrl    %cl, %edi
-
-       movl    %edi, (%edx)            
-       popl    %edi                    
-
-       popl    %ebx
-
-       ret
-
-
-
-
-
-       .align  8, 0x90
-.Lsimple:
-       
-       
-       
-       
-       
-       
-       
-
-
-       movd    (%ebx), %mm5            
-       leal    (%ebx,%eax,4), %ebx     
-
-       movd    %ecx, %mm6              
-       leal    -4(%edx,%eax,4), %edx   
-
-       psllq   $32, %mm5
-       negl    %eax
-
-
-
-
-
-
-
-.Lsimple_top:
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    (%ebx,%eax,4), %mm0
-       incl    %eax
-
-       psrlq   %mm6, %mm0
-
-       movd    %mm0, (%edx,%eax,4)
-       jnz     .Lsimple_top
-
-
-       movd    (%ebx), %mm0
-       psrlq   %mm6, %mm5              
-
-       psrlq   %mm6, %mm0
-       popl    %edi
-
-       movd    %mm5, %eax
-       popl    %ebx
-
-       movd    %mm0, 4(%edx)
-
-       emms
-
-       ret
-
-
-
-
-
-       .align  8, 0x90
-.Lunroll:
-       
-       
-       
-       
-       
-       
-       
-
-
-       movd    (%ebx), %mm5            
-       movl    $4, %edi
-
-       movd    %ecx, %mm6              
-       testl   %edi, %ebx
-
-       psllq   $32, %mm5
-       jz      .Lstart_src_aligned
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    (%ebx), %mm0            
-
-       psrlq   %mm6, %mm0
-       addl    $4, %ebx
-
-       decl    %eax
-
-       movd    %mm0, (%edx)
-       addl    $4, %edx
-.Lstart_src_aligned:
-
-
-       movq    (%ebx), %mm1
-       testl   %edi, %edx
-
-       psrlq   %mm6, %mm5              
-       jz      .Lstart_dst_aligned
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    %mm1, %mm0
-       addl    $32, %ecx               
-
-       psrlq   %mm6, %mm0
-
-       movd    %ecx, %mm6
-
-       movd    %mm0, (%edx)
-       addl    $4, %edx
-.Lstart_dst_aligned:
-
-
-       movq    8(%ebx), %mm3
-       negl    %ecx
-
-       movq    %mm3, %mm2              
-       addl    $64, %ecx
-
-       movd    %ecx, %mm7
-       psrlq   %mm6, %mm1
-
-       leal    -12(%ebx,%eax,4), %ebx
-       leal    -20(%edx,%eax,4), %edx
-
-       psllq   %mm7, %mm3
-       subl    $7, %eax                
-
-       por     %mm1, %mm3              
-       negl    %eax                    
-
-       jns     .Lfinish
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       .align  8, 0x90
-.Lunroll_loop:
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-       movq    (%ebx,%eax,4), %mm0
-       psrlq   %mm6, %mm2
-
-       movq    %mm0, %mm1
-       psllq   %mm7, %mm0
-
-       movq    %mm3, -8(%edx,%eax,4)   
-       por     %mm2, %mm0
-
-       movq    8(%ebx,%eax,4), %mm3    
-       psrlq   %mm6, %mm1              
-
-       movq    %mm0, (%edx,%eax,4)
-       movq    %mm3, %mm2              
-
-       psllq   %mm7, %mm3              
-       addl    $4, %eax
-
-       por     %mm1, %mm3              
-       js      .Lunroll_loop
-
-
-.Lfinish:
-       
-
-       testb   $2, %al
-
-       jnz     .Lfinish_no_two
-
-       movq    (%ebx,%eax,4), %mm0
-       psrlq   %mm6, %mm2
-
-       movq    %mm0, %mm1
-       psllq   %mm7, %mm0
-
-       movq    %mm3, -8(%edx,%eax,4)   
-       por     %mm2, %mm0
-
-       movq    %mm1, %mm2
-       movq    %mm0, %mm3
-
-       addl    $2, %eax
-.Lfinish_no_two:
-
-
-       
-       
-       
-       
-
-       testb   $1, %al
-       popl    %edi
-
-       movd    %mm5, %eax      
-       jnz     .Lfinish_zero
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-       
-
-
-       movd    8(%ebx), %mm0
-       psrlq   %mm6, %mm2
-
-       movq    %mm0, %mm1
-       psllq   %mm7, %mm0
-
-       movq    %mm3, (%edx)
-       por     %mm2, %mm0
-
-       psrlq   %mm6, %mm1
-       andl    $32, %ecx
-
-       popl    %ebx
-       jz      .Lfinish_one_unaligned
-
-       
-       movd    %mm1, 16(%edx)
-.Lfinish_one_unaligned:
-
-       movq    %mm0, 8(%edx)
-
-       emms
-
-       ret
-
-
-
-
-.Lfinish_zero:
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-       
-
-
-       
-       
-       
-
-
-       movq    %mm3, 4(%edx)
-       psrlq   %mm6, %mm2
-
-       movd    %mm2, 12(%edx)
-       andl    $32, %ecx
-
-       popl    %ebx
-       jz      .Lfinish_zero_unaligned
-
-       movq    %mm2, 12(%edx)
-.Lfinish_zero_unaligned:
-
-       emms
-
-       ret
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-add1.S 
b/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-add1.S
deleted file mode 100644
index 55ed66303..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-add1.S
+++ /dev/null
@@ -1,91 +0,0 @@
-/* Intel Pentium-4 mpn_add_n -- mpn addition.
- *
- * Copyright 2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-       /*******************
- *  mpi_limb_t
- *  _gcry_mpih_add_n( mpi_ptr_t res_ptr,       (sp + 4)
- *                mpi_ptr_t s1_ptr,    (sp + 8)
- *                mpi_ptr_t s2_ptr,    (sp + 12)
- *                mpi_size_t size)     (sp + 16)
- *
- * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
- *                         6.0 cycles/limb if dst==src1 or dst==src2
- * P4 Prescott:                    >= 5 cycles/limb
- *
- * The 4 c/l achieved here isn't particularly good, but is better than 9 c/l
- * for a basic adc loop.
- */
-
-       TEXT
-       ALIGN (3)
-       GLOBL C_SYMBOL_NAME(_gcry_mpih_add_n)
-C_SYMBOL_NAME(_gcry_mpih_add_n:)
-
-       pxor    %mm0, %mm0
-       
-       movl    8(%esp), %eax           /* s1_ptr */
-       movl    %ebx, 8(%esp)           /* re-use parameter space */
-       movl    12(%esp), %ebx          /* res_ptr */
-       movl    4(%esp), %edx           /* s2_ptr */
-       movl    16(%esp), %ecx          /* size */
-
-       leal    (%eax,%ecx,4), %eax     /* src1 end */
-       leal    (%ebx,%ecx,4), %ebx     /* src2 end */
-       leal    (%edx,%ecx,4), %edx     /* dst end */
-       negl    %ecx                    /* -size */
-
-Ltop:
-/*
-       C eax   src1 end
-       C ebx   src2 end
-       C ecx   counter, limbs, negative
-       C edx   dst end
-       C mm0   carry bit
-*/
-
-       movd    (%eax,%ecx,4), %mm1
-       movd    (%ebx,%ecx,4), %mm2
-       paddq   %mm2, %mm1
-
-       paddq   %mm1, %mm0
-       movd    %mm0, (%edx,%ecx,4)
-
-       psrlq   $32, %mm0
-
-       addl    $1, %ecx
-       jnz     Ltop
-
-
-       movd    %mm0, %eax
-       movl    8(%esp), %ebx   /* restore saved EBX */
-       emms
-       ret
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul1.S 
b/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul1.S
deleted file mode 100644
index a0c98fb4d..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul1.S
+++ /dev/null
@@ -1,96 +0,0 @@
-/* Intel Pentium-4 mpn_mul_1 -- Multiply a limb vector with a limb and store
- * the result in a second limb vector.
- *
- * Copyright 2001, 2002, 2003, 2005 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_mul_1( mpi_ptr_t res_ptr,        (sp + 4)
- *               mpi_ptr_t s1_ptr,     (sp + 8)
- *               mpi_size_t s1_size,   (sp + 12)
- *               mpi_limb_t s2_limb)   (sp + 16)
- *
- *                           src != dst      src == dst
- * P6 model 9  (Banias)          ?.?
- * P6 model 13 (Dothan)          4.75            4.75
- * P4 model 0  (Willamette)      4.0             6.0
- * P4 model 1  (?)               4.0             6.0
- * P4 model 2  (Northwood)       4.0             6.0
- * P4 model 3  (Prescott)        ?.?             ?.?
- * P4 model 4  (Nocona)          ?.?             ?.?
- * Unfortunately when src==dst the write-combining described in
- * pentium4/README takes us up to 6 c/l.
- *
- */
-
-       TEXT
-       ALIGN (3)
-       GLOBL   C_SYMBOL_NAME(_gcry_mpih_mul_1)
-C_SYMBOL_NAME(_gcry_mpih_mul_1:); 
-
-       pxor    %mm0, %mm0
-
-.Lstart_1c:
-       movl    8(%esp), %eax
-       movd    16(%esp), %mm7
-       movl    4(%esp), %edx
-       movl    12(%esp), %ecx
-
-.Ltop:
-
-/*
-       C eax   src, incrementing
-       C ebx
-       C ecx   counter, size iterations
-       C edx   dst, incrementing
-       C
-       C mm0   carry limb
-       C mm7   multiplier
-*/
-       
-       movd    (%eax), %mm1
-       addl    $4, %eax
-       pmuludq %mm7, %mm1
-
-       paddq   %mm1, %mm0
-       movd    %mm0, (%edx)
-       addl    $4, %edx
-
-       psrlq   $32, %mm0
-
-       subl    $1, %ecx
-       jnz     .Ltop
-
-
-       movd    %mm0, %eax
-       emms
-       ret
-
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul2.S 
b/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul2.S
deleted file mode 100644
index f975adfca..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul2.S
+++ /dev/null
@@ -1,136 +0,0 @@
-/* Intel Pentium-4 mpn_addmul_1 -- Multiply a limb vector with a limb and add
- * the result to a second limb vector.
- *
- * Copyright 2001, 2002, 2004, 2005 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_addmul_1( mpi_ptr_t res_ptr,      (sp + 4)
- *                  mpi_ptr_t s1_ptr,       (sp + 8)
- *                  mpi_size_t s1_size,     (sp + 12)
- *                  mpi_limb_t s2_limb)     (sp + 16)
- *
- * P3 model 9  (Banias)          ?.?
- * P3 model 13 (Dothan)          5.8
- * P4 model 0  (Willamette)      5.5
- * P4 model 1  (?)               5.5
- * P4 model 2  (Northwood)       5.5
- * P4 model 3  (Prescott)        6.0
- * P4 model 4  (Nocona)
- *
- * Only the carry limb propagation is on the dependent chain, but some other
- * Pentium4 pipeline magic brings down performance to 6 cycles/l from the
- * ideal 4 cycles/l.
- */
-
-
-       TEXT
-       ALIGN (4)
-       GLOBL   C_SYMBOL_NAME(_gcry_mpih_addmul_1)
-C_SYMBOL_NAME(_gcry_mpih_addmul_1:)
-
-       pxor    %mm4, %mm4
-.Lstart_1c:
-       movl    8(%esp), %eax
-       movl    12(%esp), %ecx
-       movl    4(%esp), %edx
-       movd    16(%esp), %mm7
-
-/*
-       C eax   src, incrementing ; 5B
-       C ecx   loop counter, decrementing
-       C edx   dst, incrementing
-       C
-       C mm4   carry, low 32-bits
-       C mm7   multiplier
-*/
-
-       movd            (%eax), %mm2    
-       pmuludq         %mm7, %mm2
-
-       shrl    $1, %ecx
-       jnc     .Leven
-
-       leal            4(%eax), %eax
-       movd            (%edx), %mm1
-       paddq           %mm2, %mm1
-       paddq           %mm1, %mm4
-       movd            %mm4, (%edx)
-       psrlq           $32, %mm4
-
-       testl   %ecx, %ecx
-       jz      .Lrtn
-       leal    4(%edx), %edx
-
-       movd            (%eax), %mm2    
-       pmuludq         %mm7, %mm2
-.Leven:
-       movd            4(%eax), %mm0   
-       movd            (%edx), %mm1    
-       pmuludq         %mm7, %mm0
-
-       subl    $1, %ecx
-       jz      .Lend
-.Lloop:
-       paddq           %mm2, %mm1      
-       movd            8(%eax), %mm2   
-       paddq           %mm1, %mm4      
-       movd            4(%edx), %mm3   
-       pmuludq         %mm7, %mm2
-       movd            %mm4, (%edx)
-       psrlq           $32, %mm4
-
-       paddq           %mm0, %mm3      
-       movd            12(%eax), %mm0  
-       paddq           %mm3, %mm4      
-       movd            8(%edx), %mm1   
-       pmuludq         %mm7, %mm0
-       movd            %mm4, 4(%edx)
-       psrlq           $32, %mm4
-
-       leal    8(%eax), %eax
-       leal    8(%edx), %edx
-       subl    $1, %ecx
-       jnz     .Lloop
-.Lend:
-       paddq           %mm2, %mm1      
-       paddq           %mm1, %mm4      
-       movd            4(%edx), %mm3   
-       movd            %mm4, (%edx)
-       psrlq           $32, %mm4
-       paddq           %mm0, %mm3      
-       paddq           %mm3, %mm4      
-       movd            %mm4, 4(%edx)
-       psrlq           $32, %mm4
-.Lrtn:
-       movd    %mm4, %eax
-       emms
-       ret
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul3.S 
b/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul3.S
deleted file mode 100644
index ebcd2a68e..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-mul3.S
+++ /dev/null
@@ -1,127 +0,0 @@
-/* Intel Pentium-4 mpn_submul_1 -- Multiply a limb vector with a limb and
- * subtract the result from a second limb vector.
- *
- * Copyright 2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- * mpi_limb_t
- * _gcry_mpih_submul_1( mpi_ptr_t res_ptr,      (sp + 4)
- *                  mpi_ptr_t s1_ptr,       (sp + 8)
- *                  mpi_size_t s1_size,     (sp + 12)
- *                  mpi_limb_t s2_limb)     (sp + 16)
- *
- * P4: 7 cycles/limb, unstable timing, at least on early Pentium4 silicon
- *    (stepping 10).
- *
- * This code is not particularly good at 7 c/l.  The dependent chain is only
- * 4 c/l and there's only 4 MMX unit instructions, so it's not clear why that
- * speed isn't achieved.
- *
- * The arrangements made here to get a two instruction dependent chain are
- * slightly subtle.  In the loop the carry (or borrow rather) is a negative
- * so that a paddq can be used to give a low limb ready to store, and a high
- * limb ready to become the new carry after a psrlq.
- *
- * If the carry was a simple twos complement negative then the psrlq shift
- * would need to bring in 0 bits or 1 bits according to whether the high was
- * zero or non-zero, since a non-zero value would represent a negative
- * needing sign extension.  That wouldn't be particularly easy to arrange and
- * certainly would add an instruction to the dependent chain, so instead an
- * offset is applied so that the high limb will be 0xFFFFFFFF+c.  With c in
- * the range -0xFFFFFFFF to 0, the value 0xFFFFFFFF+c is in the range 0 to
- * 0xFFFFFFFF and is therefore always positive and can always have 0 bits
- * shifted in, which is what psrlq does.
- *
- * The extra 0xFFFFFFFF must be subtracted before c is used, but that can be
- * done off the dependent chain.  The total adjustment then is to add
- * 0xFFFFFFFF00000000 to offset the new carry, and subtract
- * 0x00000000FFFFFFFF to remove the offset from the current carry, for a net
- * add of 0xFFFFFFFE00000001.  In the code this is applied to the destination
- * limb when fetched.
- *
- * It's also possible to view the 0xFFFFFFFF adjustment as a ones-complement
- * negative, which is how it's undone for the return value, but that doesn't
- * seem as clear.
-*/
-
-       TEXT
-       ALIGN (4)
-       GLOBL   C_SYMBOL_NAME(_gcry_mpih_submul_1)
-C_SYMBOL_NAME(_gcry_mpih_submul_1:)
-
-       pxor    %mm1, %mm1              
-
-.Lstart_1c:
-       movl    8(%esp), %eax
-       pcmpeqd %mm0, %mm0
-
-       movd    16(%esp), %mm7
-       pcmpeqd %mm6, %mm6
-
-       movl    4(%esp), %edx
-       psrlq   $32, %mm0               
-
-       movl    12(%esp), %ecx
-       psllq   $32, %mm6               
-
-       psubq   %mm0, %mm6              
-
-       psubq   %mm1, %mm0              
-
-/*
-       C eax   src, incrementing
-       C ebx
-       C ecx   loop counter, decrementing
-       C edx   dst, incrementing
-       C
-       C mm0   0xFFFFFFFF - borrow
-       C mm6   0xFFFFFFFE00000001
-       C mm7   multiplier
-*/
-       
-.Lloop:
-       movd    (%eax), %mm1            
-       leal    4(%eax), %eax
-       movd    (%edx), %mm2            
-       paddq   %mm6, %mm2              
-       pmuludq %mm7, %mm1
-       psubq   %mm1, %mm2              
-       paddq   %mm2, %mm0              
-       subl    $1, %ecx
-       movd    %mm0, (%edx)            
-       psrlq   $32, %mm0
-       leal    4(%edx), %edx
-       jnz     .Lloop
-
-       movd    %mm0, %eax
-       notl    %eax
-       emms
-       ret
diff --git a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-sub1.S 
b/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-sub1.S
deleted file mode 100644
index 33900c742..000000000
--- a/grub-core/lib/libgcrypt/mpi/pentium4/sse2/mpih-sub1.S
+++ /dev/null
@@ -1,112 +0,0 @@
-/* Intel Pentium-4 mpn_sub_n -- mpn subtraction.
- *
- * Copyright 2001, 2002 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
- *
- * Note: This code is heavily based on the GNU MP Library.
- *      Actually it's the same code with only minor changes in the
- *      way the data is stored; this is to support the abstraction
- *      of an optional secure memory allocation which may be used
- *      to avoid revealing of sensitive data due to paging etc.
- */
-
-
-#include "sysdep.h"
-#include "asm-syntax.h"
-
-
-/*******************
- *  mpi_limb_t
- *  _gcry_mpih_sub_n( mpi_ptr_t res_ptr,       (sp + 4)
- *                mpi_ptr_t s1_ptr,    (sp + 8)
- *                mpi_ptr_t s2_ptr,    (sp + 12)
- *                mpi_size_t size)     (sp + 16)
- *
- * P4 Willamette, Northwood: 4.0 cycles/limb if dst!=src1 and dst!=src2
- *                          6.0 cycles/limb if dst==src1 or dst==src2
- * P4 Prescott:                     >= 5 cycles/limb
- *
- * The main loop code is 2x unrolled so that the carry bit can alternate
- * between mm0 and mm1.
- */
-
-
-.text
-       ALIGN (3)
-       .globl C_SYMBOL_NAME(_gcry_mpih_sub_n)
-C_SYMBOL_NAME(_gcry_mpih_sub_n:)
-
-       pxor    %mm0, %mm0
-.Lstart_nc:
-       movl    8(%esp), %eax
-       movl    %ebx, 8(%esp)
-       movl    12(%esp), %ebx
-       movl    4(%esp), %edx
-       movl    16(%esp), %ecx
-
-       leal    (%eax,%ecx,4), %eax     
-       leal    (%ebx,%ecx,4), %ebx     
-       leal    (%edx,%ecx,4), %edx     
-       negl    %ecx                    
-
-.Ltop:
-/*
-       C eax   src1 end
-       C ebx   src2 end
-       C ecx   counter, limbs, negative
-       C edx   dst end
-       C mm0   carry bit
-*/
-
-       movd    (%eax,%ecx,4), %mm1
-       movd    (%ebx,%ecx,4), %mm2
-       psubq   %mm2, %mm1
-
-       psubq   %mm0, %mm1
-       movd    %mm1, (%edx,%ecx,4)
-
-       psrlq   $63, %mm1
-
-       addl    $1, %ecx
-       jz      .Ldone_mm1
-
-       movd    (%eax,%ecx,4), %mm0
-       movd    (%ebx,%ecx,4), %mm2
-       psubq   %mm2, %mm0
-
-       psubq   %mm1, %mm0
-       movd    %mm0, (%edx,%ecx,4)
-
-       psrlq   $63, %mm0
-
-       addl    $1, %ecx
-       jnz     .Ltop
-
-
-       movd    %mm0, %eax
-       movl    8(%esp), %ebx
-       emms
-       ret
-
-
-
-.Ldone_mm1:
-       movd    %mm1, %eax
-       movl    8(%esp), %ebx
-       emms
-       ret
diff --git a/grub-core/lib/libgcrypt/mpi/power/Manifest 
b/grub-core/lib/libgcrypt/mpi/power/Manifest
deleted file mode 100644
index c60fc23c2..000000000
--- a/grub-core/lib/libgcrypt/mpi/power/Manifest
+++ /dev/null
@@ -1,27 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-add1.S
-mpih-lshift.S
-mpih-mul1.S
-mpih-mul2.S
-mpih-mul3.S
-mpih-rshift.S
-mpih-sub1.S
-$names$ 
iQCVAwUAP+LmXTEAnp832S/7AQJ+ngP/XYr5Fvl/8WGVHcIKaehxvnKcSD2ILTWZNGubgnWp8ebIxVijjQCxYneTTy+zO0sNaB002neyscyiwaJj/JQIwZXfr06uGweIqlSpwpj9ndkoJc8E4/FZu+5NTO+E3RaBDAD+Tpo+MTfbC1s18p5i+an93VrSTgNck5PPYQrUcPA==sl3t
diff --git a/grub-core/lib/libgcrypt/mpi/power/distfiles 
b/grub-core/lib/libgcrypt/mpi/power/distfiles
index e1bc008bc..e664c8db6 100644
--- a/grub-core/lib/libgcrypt/mpi/power/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/power/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-add1.S
 mpih-lshift.S
 mpih-mul1.S
diff --git a/grub-core/lib/libgcrypt/mpi/powerpc32/Manifest 
b/grub-core/lib/libgcrypt/mpi/powerpc32/Manifest
deleted file mode 100644
index 26ab6ea3e..000000000
--- a/grub-core/lib/libgcrypt/mpi/powerpc32/Manifest
+++ /dev/null
@@ -1,28 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-add1.S
-mpih-sub1.S
-mpih-mul1.S
-mpih-mul2.S
-mpih-mul3.S
-mpih-lshift.S
-mpih-rshift.S
-syntax.h
-$names$ 
iQCVAwUAP+LmYzEAnp832S/7AQI/cQP+Mcg9rF/c/bJTY48PE1/ARt7vCMtpIlv9alZSSSrU3WHzCtv9nVczFmwHU3DdKFawigY2DljQcK92dZ5ZlOfpFNMz4PKlVMWaKDk+jKlqm2dxvlHuqEvXPpjFAE2gHrhq5qLXS5ZHeMLJIEK84GYC6fjfLUMdZU3altXTUBvoXhA==Yax+
diff --git a/grub-core/lib/libgcrypt/mpi/powerpc32/distfiles 
b/grub-core/lib/libgcrypt/mpi/powerpc32/distfiles
index a08661489..af10d795b 100644
--- a/grub-core/lib/libgcrypt/mpi/powerpc32/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/powerpc32/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-add1.S
 mpih-sub1.S
 mpih-mul1.S
diff --git a/grub-core/lib/libgcrypt/mpi/sparc32/Manifest 
b/grub-core/lib/libgcrypt/mpi/sparc32/Manifest
deleted file mode 100644
index d279229b0..000000000
--- a/grub-core/lib/libgcrypt/mpi/sparc32/Manifest
+++ /dev/null
@@ -1,24 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-lshift.S
-mpih-rshift.S
-mpih-add1.S
-udiv.S
-$names$ 
iQCVAwUAP+LmaDEAnp832S/7AQISHgP/Z5orU+CPKBeRFCogSQDm4p7J2VpDovU6mtfMTdjhqWuZG0U6y8WqH0aj3USfziOhtc8YjQHQ+97g3+EnIWZgLjKacWC6pScY/QbATEpF1D0Wrcea5rk3qR1t7isdBVVOrxedZ5vuj5Op2zx/0OlPI+wt6fTtW88BdG/a6w/ZU/8==Py6h
diff --git a/grub-core/lib/libgcrypt/mpi/sparc32/distfiles 
b/grub-core/lib/libgcrypt/mpi/sparc32/distfiles
index a20f18ead..51329dbdb 100644
--- a/grub-core/lib/libgcrypt/mpi/sparc32/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/sparc32/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-lshift.S
 mpih-rshift.S
 mpih-add1.S
diff --git a/grub-core/lib/libgcrypt/mpi/sparc32v8/Manifest 
b/grub-core/lib/libgcrypt/mpi/sparc32v8/Manifest
deleted file mode 100644
index dc1ce6a8e..000000000
--- a/grub-core/lib/libgcrypt/mpi/sparc32v8/Manifest
+++ /dev/null
@@ -1,23 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-mpih-mul1.S
-mpih-mul2.S
-mpih-mul3.S
-$names$ 
iQCVAwUAP+LmbjEAnp832S/7AQKQ2gQAotpCpY9rOJUCdZHbDLXXB9i1UUMraRKbVWimtKq493Y2d2wcqXCK2WaGs1AePK3K6Qk6msxZ0PL5Ho7KgHMkzsZ+wG0EUziiuX0yZRTWNm0r3TYerP6SdWH5GOVdSXn7ckkppk2sVOokfQTy+Tmrnah3+dlYJoujan+fmXWN6Us==DolM
diff --git a/grub-core/lib/libgcrypt/mpi/sparc32v8/distfiles 
b/grub-core/lib/libgcrypt/mpi/sparc32v8/distfiles
index 6e9a53091..2fcb0d1aa 100644
--- a/grub-core/lib/libgcrypt/mpi/sparc32v8/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/sparc32v8/distfiles
@@ -1,4 +1,3 @@
-Manifest
 mpih-mul1.S
 mpih-mul2.S
 mpih-mul3.S
diff --git a/grub-core/lib/libgcrypt/mpi/supersparc/Manifest 
b/grub-core/lib/libgcrypt/mpi/supersparc/Manifest
deleted file mode 100644
index 869b97bdb..000000000
--- a/grub-core/lib/libgcrypt/mpi/supersparc/Manifest
+++ /dev/null
@@ -1,21 +0,0 @@
-# Manifest - checksums 
-# Copyright 2003 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-udiv.S
-$names$ 
iQCVAwUAP+LmdjEAnp832S/7AQIrUgQA3YmurZhK7r20DqRvg0gwNe9jMDcFfUY4ZPhW5HkGzMbmrxXtj5Dx50RIPteum72bXE+IhcngljQb/cskiN5Hi9oc2a2CPhyTqVFEeGyF+kJ170GI1pVfFOfzbVG0F4nEwm5lGHgv/nvFsvrjmmAXVW1v/yk5N35wbiLviOFrLOQ==byFc
diff --git a/grub-core/lib/libgcrypt/mpi/supersparc/distfiles 
b/grub-core/lib/libgcrypt/mpi/supersparc/distfiles
index ef7c0a538..550601cb5 100644
--- a/grub-core/lib/libgcrypt/mpi/supersparc/distfiles
+++ b/grub-core/lib/libgcrypt/mpi/supersparc/distfiles
@@ -1,3 +1,2 @@
-Manifest
 udiv.S
 
diff --git a/grub-core/lib/libgcrypt/src/ChangeLog-2011 
b/grub-core/lib/libgcrypt/src/ChangeLog-2011
index 796fea403..3571fb1e4 100644
--- a/grub-core/lib/libgcrypt/src/ChangeLog-2011
+++ b/grub-core/lib/libgcrypt/src/ChangeLog-2011
@@ -1,13 +1,72 @@
 2011-12-01  Werner Koch  <wk@g10code.com>
 
-        NB: ChangeLog files are no longer manually maintained.  Starting
-        on December 1st, 2011 we put change information only in the GIT
-        commit log, and generate a top-level ChangeLog file from logs at
-        "make dist".  See doc/HACKING for details.
+       NB: ChangeLog files are no longer manually maintained.  Starting
+       on December 1st, 2011 we put change information only in the GIT
+       commit log, and generate a top-level ChangeLog file from logs at
+       "make dist".  See doc/HACKING for details.
 
-2011-09-08  Werner Koch  <wk@g10code.com>
+2011-09-16  Werner Koch  <wk@g10code.com>
 
-       * gcrypt.h.in [GCRYPT_NO_DEPRECATED]: Exclude gcry_ac structures.
+       Change ATH code and turn the thread initialization callbacks in
+       the API into dummy functions.
+
+       * global.c (global_init): Call _gcry_pimegen_init.
+
+       * gcrypt.h.in (GCRY_THREAD_OPTI ON_VERSION): Bump to 1.
+       (GCRY_THREAD_OPTION_PTH_IMPL): Simplify.
+       (GCRY_THREAD_OPTION_PTHREAD_IMPL): Simplify.
+
+       * ath.c (ath_read, ath_write): Remove.  They are only used in the
+       optional random-daemon.
+       (ath_select, ath_waitpid, ath_accept, ath_connect, ath_sendmsg)
+       (ath_recvmsg): Remove.  They are not used.
+       * ath.h: Remove prototypes and corresponding structure fields.
+
+2011-03-11  Werner Koch  <wk@g10code.com>
+
+       * ath.c (mutex_init): Rename second arg to FORCE and invert
+       logic.  Change all callers.
+
+2011-09-15  Werner Koch  <wk@g10code.com>
+
+       * gcrypt.h.in (enum gcry_thread_option): Remove deprecated enum.
+       (gcry_md_start_debug, gcry_md_stop_debug): Remove deprecated these
+       macros.
+
+2011-09-15  Werner Koch  <wk@g10code.com>
+
+       Removal of the gcry_ac and the module register interfaces.
+
+       * Makefile.am (include_HEADERS): Remove gcrypt-module.h.
+       (libgcrypt_la_SOURCES): Add gcrypt-module.h which is now internal
+       header.
+       * gcrypt-module.h (gcry_md_register, gcry_md_unregister): Remove.
+       (gcry_pk_register, gcry_pk_unregister): Remove.
+       (gcry_cipher_register, gcry_cipher_unregister): Remove.
+       * visibility.h: Include gcrypt-module.h.
+       * gcrypt.h.in: Do not include gcrypt-module.h.
+       * gcrypt.h.in: Remove all gcry_ac symbols.
+       (gcry_pk_list, gcry_md_list, gcry_cipher_list): Remove.
+       * visibility.h: Remove all gcry_ac symbols.
+       (gcry_pk_list, gcry_md_list, gcry_cipher_list): Remove.
+       (gcry_cipher_register, gcry_cipher_unregister, gcry_pk_register)
+       (gcry_pk_unregister, gcry_md_register, gcry_md_unregister): Remove.
+       * visibility.c: Remove all gcry_ac wrappers.
+       (gcry_pk_list, gcry_cipher_list, gcry_md_list): Remove.
+       (gcry_cipher_register, gcry_cipher_unregister, gcry_pk_register)
+       (gcry_pk_unregister, gcry_md_register, gcry_md_unregister): Remove.
+       * libgcrypt.vers: Remove all gcry_ac symbols.
+       (GCRYPT_1.2): Rename to GCRYPT_1.6.
+       (gcry_pk_list, gcry_md_list, gcry_cipher_list): Remove.
+       * libgcrypt.def: Remove all gcry_ac symbols.
+       (gcry_pk_list, gcry_md_list, gcry_cipher_list): Remove.
+       * global.c (global_init): Remove comment code with a call to
+       _gcry_ac_init.
+
+2011-09-15  Werner Koch  <wk@g10code.com>
+
+       * hmac256.c (main): Fix endless loop when using pipe input and
+       option --binary.
 
 2011-06-10  Werner Koch  <wk@g10code.com>
 
@@ -2333,3 +2392,7 @@ Tue Dec  8 13:15:16 CET 1998  Werner Koch  
<wk@isil.d.shuttle.de>
  This file is distributed in the hope that it will be useful, but
  WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
  implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+
+Local Variables:
+buffer-read-only: t
+End:
diff --git a/grub-core/lib/libgcrypt/src/Makefile.am 
b/grub-core/lib/libgcrypt/src/Makefile.am
index 91680220e..ea265fc24 100644
--- a/grub-core/lib/libgcrypt/src/Makefile.am
+++ b/grub-core/lib/libgcrypt/src/Makefile.am
@@ -20,20 +20,23 @@
 
 ## Process this file with automake to produce Makefile.in
 
-EXTRA_DIST = Manifest libgcrypt-config.in libgcrypt.m4 libgcrypt.vers \
-             gcrypt.h.in libgcrypt.def
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = libgcrypt.pc
+
+EXTRA_DIST = libgcrypt-config.in libgcrypt.m4 libgcrypt.vers \
+             gcrypt.h.in libgcrypt.def libgcrypt.pc.in gen-note-integrity.sh
 
 bin_SCRIPTS = libgcrypt-config
 m4datadir = $(datadir)/aclocal
 m4data_DATA = libgcrypt.m4
-include_HEADERS = gcrypt.h gcrypt-module.h
+nodist_include_HEADERS = gcrypt.h
 
 lib_LTLIBRARIES = libgcrypt.la
-bin_PROGRAMS = dumpsexp hmac256
-if USE_RANDOM_DAEMON
+bin_PROGRAMS = dumpsexp hmac256 mpicalc
+if ENABLE_RANDOM_DAEMON
 sbin_PROGRAMS = gcryptrnd
 bin_PROGRAMS += getrandom
-endif USE_RANDOM_DAEMON
+endif ENABLE_RANDOM_DAEMON
 
 # Depending on the architecture some targets require libgpg-error.
 if HAVE_W32CE_SYSTEM
@@ -44,6 +47,8 @@ arch_gpg_error_cflags =
 arch_gpg_error_libs   =
 endif
 
+AM_CFLAGS = $(GPG_ERROR_CFLAGS)
+AM_CCASFLAGS = $(NOEXECSTACK_FLAGS)
 
 if HAVE_LD_VERSION_SCRIPT
   libgcrypt_version_script_cmd = -Wl,--version-script=$(srcdir)/libgcrypt.vers
@@ -51,14 +56,19 @@ else
   libgcrypt_version_script_cmd =
 endif
 
-libgcrypt_la_CFLAGS = $(GPG_ERROR_CFLAGS)
-libgcrypt_la_SOURCES = g10lib.h visibility.c visibility.h types.h \
-       cipher.h cipher-proto.h \
-       misc.c global.c sexp.c hwfeatures.c \
+libgcrypt_la_CFLAGS = $(GPG_ERROR_CFLAGS) @DEF_HMAC_BINARY_CHECK@
+libgcrypt_la_SOURCES = \
+        gcrypt-int.h g10lib.h visibility.c visibility.h types.h \
+       gcrypt-testapi.h cipher.h cipher-proto.h \
+       misc.c global.c sexp.c hwfeatures.c hwf-common.h \
        stdmem.c stdmem.h secmem.c secmem.h \
-       mpi.h missing-string.c module.c fips.c \
-       hmac256.c hmac256.h \
-       ath.h ath.c
+       mpi.h missing-string.c fips.c \
+       context.c context.h const-time.h const-time.c \
+       ec-context.h
+
+EXTRA_libgcrypt_la_SOURCES = hwf-x86.c hwf-arm.c hwf-ppc.c hwf-s390x.c
+gcrypt_hwf_modules = @GCRYPT_HWF_MODULES@
+
 
 if HAVE_W32_SYSTEM
 
@@ -74,8 +84,10 @@ SUFFIXES = .rc .lo
 gcrypt_res = versioninfo.lo
 no_undefined = -no-undefined
 export_symbols = -export-symbols $(srcdir)/libgcrypt.def
+extra_ltoptions = -XCClinker -static-libgcc
 
 install-def-file:
+       -$(INSTALL) -d $(DESTDIR)$(libdir)
        $(INSTALL) $(srcdir)/libgcrypt.def $(DESTDIR)$(libdir)/libgcrypt.def
 
 uninstall-def-file:
@@ -89,6 +101,7 @@ gcrypt_res =
 gcrypt_res_ldflag =
 no_undefined =
 export_symbols =
+extra_ltoptions =
 install-def-file:
 uninstall-def-file:
 
@@ -97,47 +110,65 @@ gcrypt_deps =
 endif !HAVE_W32_SYSTEM
 
 
-libgcrypt_la_LDFLAGS = $(no_undefined) $(export_symbols) \
+libgcrypt_la_LDFLAGS = $(no_undefined) $(export_symbols) $(extra_ltoptions) \
        $(libgcrypt_version_script_cmd) -version-info \
        @LIBGCRYPT_LT_CURRENT@:@LIBGCRYPT_LT_REVISION@:@LIBGCRYPT_LT_AGE@
 libgcrypt_la_DEPENDENCIES = \
+         $(gcrypt_hwf_modules) \
        ../cipher/libcipher.la \
        ../random/librandom.la \
        ../mpi/libmpi.la \
        ../compat/libcompat.la \
        $(srcdir)/libgcrypt.vers $(gcrypt_deps)
 libgcrypt_la_LIBADD = $(gcrypt_res) \
+        $(gcrypt_hwf_modules) \
        ../cipher/libcipher.la \
        ../random/librandom.la \
        ../mpi/libmpi.la \
-       ../compat/libcompat.la  $(GPG_ERROR_LIBS)
+       ../compat/libcompat.la $(DL_LIBS) $(GPG_ERROR_LIBS)
 
 
 dumpsexp_SOURCES = dumpsexp.c
 dumpsexp_CFLAGS = $(arch_gpg_error_cflags)
 dumpsexp_LDADD = $(arch_gpg_error_libs)
 
-hmac256_SOURCES = hmac256.c
-hmac256_CFLAGS = -DSTANDALONE $(arch_gpg_error_cflags)
+mpicalc_SOURCES = mpicalc.c
+mpicalc_CFLAGS = $(GPG_ERROR_CFLAGS)
+mpicalc_LDADD = libgcrypt.la $(GPG_ERROR_LIBS)
+EXTRA_mpicalc_DEPENDENCIES = libgcrypt.la.done
+
+hmac256_SOURCES = hmac256.c hmac256.h
+hmac256_CFLAGS = -DSTANDALONE @DEF_HMAC_BINARY_CHECK@ \
+       $(arch_gpg_error_cflags)
 hmac256_LDADD = $(arch_gpg_error_libs)
 
-if USE_RANDOM_DAEMON
+if ENABLE_RANDOM_DAEMON
 gcryptrnd_SOURCES = gcryptrnd.c
 gcryptrnd_CFLAGS = $(GPG_ERROR_CFLAGS) $(PTH_CFLAGS)
 gcryptrnd_LDADD = libgcrypt.la $(GPG_ERROR_LIBS) $(PTH_LIBS)
 
 getrandom_SOURCES = getrandom.c
-endif USE_RANDOM_DAEMON
-
+endif ENABLE_RANDOM_DAEMON
+
+CLEANFILES = libgcrypt.la.done
+if USE_HMAC_BINARY_CHECK
+CLEANFILES += libgcrypt.so.hmac
+
+libgcrypt.la.done: libgcrypt.so.hmac
+       $(OBJCOPY) --add-section .note.fdo.integrity=libgcrypt.so.hmac \
+         --set-section-flags .note.fdo.integrity=noload,readonly \
+         .libs/libgcrypt.so .libs/libgcrypt.so.new
+       mv -f .libs/libgcrypt.so.new .libs/libgcrypt.so.*.*
+       @touch libgcrypt.la.done
+
+libgcrypt.so.hmac: hmac256 libgcrypt.la
+       ECHO_N=$(ECHO_N) READELF=$(READELF) AWK=$(AWK) \
+       $(srcdir)/gen-note-integrity.sh > $@
+else !USE_HMAC_BINARY_CHECK
+libgcrypt.la.done: libgcrypt.la
+       @touch libgcrypt.la.done
+endif !USE_HMAC_BINARY_CHECK
 
 install-data-local: install-def-file
 
 uninstall-local: uninstall-def-file
-
-# FIXME: We need to figure out how to get the actual name (parsing
-# libgcrypt.la?) and how to create the hmac file already at link time
-# so that it can be used without installing libgcrypt first.
-#install-exec-hook:
-#      ./hmac256 "What am I, a doctor or a moonshuttle conductor?" \
-#          < $(DESTDIR)$(libdir)/libgcrypt.so.11.5.0 \
-#          > $(DESTDIR)$(libdir)/.libgcrypt.so.11.5.0.hmac
diff --git a/grub-core/lib/libgcrypt/src/Manifest 
b/grub-core/lib/libgcrypt/src/Manifest
deleted file mode 100644
index 2d003d83e..000000000
--- a/grub-core/lib/libgcrypt/src/Manifest
+++ /dev/null
@@ -1,58 +0,0 @@
-# Manifest - checksums of the src directory
-# Copyright 2004 Free Software Foundation, Inc.
-#
-# This file is part of Libgcrypt.
-#
-# Libgcrypt is free software; you can redistribute it and/or modify
-# it under the terms of the GNU Lesser general Public License as
-# published by the Free Software Foundation; either version 2.1 of
-# the License, or (at your option) any later version.
-#
-# Libgcrypt is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
-
-# Checksums for all source files in this directory. Format is
-# filename, blanks, base-64 part of an OpenPGP detached signature
-# without the header lines.  Blank lines and lines beginning with a
-# hash mark are ignored.  A tool to process this file is available by
-# cvs -d :pserver:anoncvs@cvs.gnupg.org:/cvs/wk co misc-scripts/manifest-tool
-#
-# The special entry "$names$" holds a signature over all sorted
-# filenames excluding itself.
-
-gcrypt.h 
iQCVAwUAQH5RsTEAnp832S/7AQK7xgP+Kc3NY9lipZkaAMrnHDkQVLdHYwTbZWuGOYdTLp8Xy7Auh9wtWV9hrWVUqs+kxDzT/2iF6XkO3WT3rf/PmQ/Q0TIGfOyjE3c/qvB/jVippaxoGda3tnGpODytdI3XPhfPS0Ss8nDzfCStPBGAEq0OVU7imnExrFzhRXt+Gljr0o0==Yagz
-gcrypt-module.h 
iQCVAwUAQH5UXzEAnp832S/7AQJMQgQAzumz9aaZelhw+FxTCeVadphBxt1bbNQvMrnddYYblyJv+AcxZ9ZxGz2oPeusN58Qg54DQcaW3lYhTgnWfXultsi+Ruxlz7400OUrzSXOl3At7KssdODAoscFzZIgh94G9lzQxEBr9lTXI9R3LsPFJP6muNG4frcNBAA42yckK7w==BBp5
-
-ath.c 
iQCVAwUAQH5E+DEAnp832S/7AQKFpgP+KSZHtVcnh9FFggIyHKbALUljW2FXauasZvFyN8Sk/mIMgKxyXFOG1THBAUzWLaKWIEWU+WkYU7uThqBtpnEImM5AenWzbQuJjftPC3gVHO8yjjmBWD4zmJj28htoKDoa/xDsoqumrHxae3FYcaCWtYGVjM/Pbl+OMRMOFAhp0ho==lQZ3
-ath.h 
iQCVAwUAQH5FODEAnp832S/7AQKiuQQAg4K+KOAn1LWBZN32MAhms4FeZKoce0fAuZW7BpyY4cCxIVgxqrtUC90CDykw8XegFfOyyYrgd0NmaMVdY7HZDncNOvIPxpgFQPCZrycsMOoAtoVwjK704RDeNo3zmeyxTKeDH+3M1J7JmLiafaEdSbOC8flX/W0icaV0Ol4dmBc==Ll6w
-
-cipher.h 
iQCVAwUAQH5FUzEAnp832S/7AQJKLgP9GSSk9f7EINIRqSQH1XKX+dYzt3phDHdqFTUGIfYNh7YzGdy0drvgFhG4k15nqDouKRuFVM/hKY3ZVY7JccmKXKGAH6+ZYShoG6LMFfIGgDX8zne0dNxc72PLfns3fVxNn/RlHmHBkrQ+ppjR9HnSthFmOqzbQaW1BKmc3Z2x5GU==lIeW
-g10lib.h 
iQCVAwUAQH5FejEAnp832S/7AQJ75wP/ZjOybwRix5eoXdfVeXPjoPygejzpYJJdMUGN3Y5UtkfBu9mPREsKfvZ6tH+Evjx+3xfeAb4bU/k2mRMp0tiWnk2koToS08vI9uxnioKQr9oulZH6r28S+NLSgMQuEGN1JNUky6RQ9TTNRndeTjKKSrEjZ7V6bv+rb8A1bYCKChs==P5mk
-mpi.h 
iQCVAwUAQH5FwzEAnp832S/7AQJJ4wP9E3jVkcO9M0YtSBHIbjG3hDWKWXzi86AlUh51qiE8/2XP0FfjA4TosyvmicZs7j48HitAByr9tHOSxnbeo7NBf17ICwAo6Eqty+wKDg+eyLeEGUy7VpVK3RJRQAA4H+kl3S2l3YMTKf3WJlbc7qkWSXZspdy5c9sAxeodCKrAubU==oALf
-
-global.c 
iQCVAwUAQH5HFzEAnp832S/7AQJc+QQAvi53ZkMCzLnVULHvhI6W+EX537zi9n8cplYguvIJqUhAZrP68yGAIyqyCONbZVDyB7wqeXdUMLzMk7W8fg+xuk5JSDpppAQf2m/bdQyze6XVqJso682eYBM8+b9z/IVEvLaFwhZcOKO1bcXudBlBCcJgVDpupfTtAWgPnewil9Q==Xwy1
-misc.c 
iQCVAwUAQH5IIjEAnp832S/7AQKNJAQAkEpyY3fCG7tvADJFAW9xA7DEQwLCa8YmiUhHvrEsWOI4YgvS7LUbWWc7VqK+ryORvXLKRAVieznbnHAuy0TKtqdnmA/kUmiurS0ah5SWqR/iuAeJtt0RGsmZaZ6oa2m4PZ2Y2GCHSTZqcclvwsetS9eq5AipxHxYFUltu5wGZNI==twM2
-missing-string.c 
iQCVAwUAQH5JfjEAnp832S/7AQI3ZQQAg55eEJbGQQHyBEJGxvt/FXpQiXcoDit3ZHzvdaQn/NUgdLjCHiWVzhyCXACGivLWMNModDaSaZk073NXxVkWfPcX9vkF//Wugwzidd5P3Bfu5k35o+Xxz82fsk5KuFGGq1mBUZ07xUYQ8KkKkhADUkr0QiQAuypp079Yq0uUC7Q==zvKn
-module.c 
iQCVAwUAQH5JvjEAnp832S/7AQKlMgQAjZYTXMpWb5kHxCMXzRi069Ku/4/xnWsD+S0dje1LiKzCnRpwTTxARzc/y10Y8OcygkMuR4unEaWedO+9syjjty3fBCcue/j7YlLitq5EC9UE4o23poWvWCuX9Tadm2DK5qf4p7smMJ22O22cLTYTVCyAoYTQ2xC8ajzBsBRkX80==yRRD
-secmem.c 
iQCVAwUAQH5LLDEAnp832S/7AQKtFwQAwY2wBr6WJC1cwqp/1DQoKzHx9C3plONxbZMazwR7VMI83NUbBAbv1mcxpeZWXmb2dRrnsR1VBbNPDSbJLN5T6czLQ2nIb6mnq9u8Ip4SAa+GCWfDV4AUtAJ4hN/yvWo8iEKu+KD5iJ6xJh31NdXjt5yk6vnk46SA6R4FkHdIEXc==UKVr
-secmem.h 
iQCVAwUAQH5LTDEAnp832S/7AQIsJwQAkZUu4hvmh9NXCLNm98+tGZFzWYvZO/NffC2wdPE8Q/OTa/m3g+oBbEhaV1ze3oY4t1F/p7ZHFx5CsIp4zVjyPkxlni8AAVMUOQr/LopyxouHn2OjKO+dVqecWQf01+nPWjklbL2FZ3mQ99k2qeWZlVSkz0nm8u39F3v7z3OTCss==AJqE
-sexp.c 
iQCVAwUAQH5LojEAnp832S/7AQKCTQQArlrj1KGwR2x93fcyN3M0iXuGkBq5R9KNu+1Bq04G4SLlpZ1RRY0OjV3L9To1BHTd01lXlO8MNz7NpRxWlG1Sw5FohbBlhWZQRcW8GdAawJPcfIY2Y8Ek6Yx8quZKbk9uD3bcBmStmg0P+TIA0nr20bmtfB3uX2KQVHQqWZQT5qU==P8FE
-stdmem.c 
iQCVAwUAQH5LzjEAnp832S/7AQLOUAP9FU16itXBBrkfRDGmhUjAOeEEKdd+brQ3XdT8xoLvP/IH/6U1Kq3ampP2/xcL4kwVdz2rw6NRzP7jlL/yM3tW722lSS/JPJkH+2+qUkcb0fYNoql/WYPMYp1/Mzu6ttXnjag1cQGlKIyYAD+G6h3FtpLwQy0hEJopnF9+Ovd8U7A==CkiZ
-stdmem.h 
iQCVAwUAQH5L8jEAnp832S/7AQIH0wP+Lyqh0tj++s2L79Tmf/gqgCK+HLMxTddcewF3XbsYf9T5FmLez1gz6Ggti4Ss9VjozOA3ti3trCiA/YNRmV9AYw4zLUPm+MsjJuveL/AgB9HdoD2v+RfJm0WwgSKiysp+8iyjg3Plopmhba4cGuOP5MJ3CWTqYwPmJVscUKC6g38==02MN
-
-types.h 
iQCVAwUAQH5MKTEAnp832S/7AQLqTAP6A3mUMD5MMkBkebq4bRY6Bq0KsgdKfZ8TLhc2o87gFay8YD0Uom3YJNG2LF/rAIct2ih4jYJaIb5dRfJ0KJoPi2ETd462J8OFCL4fjq9TaSjB2pXcB+kWoxzPasGNg2Ukk0dQ6lvF1tSYrtt32PVI7q/UaPsjTylgRmzLfX/VxrU==OMu3
-
-
-# Configuration
-Makefile.am 
iQCVAwUAQH5WVjEAnp832S/7AQLmsQP/bbI8/UWAC5yITVhGcCOCbN/FaMqXVKjxESzo6GTs02jxK1y3RuuaoNU1ssQZGAxpFiMJW8u933V3yTHFMxWpwHemDnEyv/a8YACxJBQ0tQgpgHS716BjMbHOfcuOis2WlCOOm0ErjhAYNa4NQ1q3jwkOvTDLFpdnqaWI2wWn08U==Yjun
-libgcrypt.m4 
iQCVAwUAQH5MbTEAnp832S/7AQJ1uAQA1C6xI7qXiKVtUeXawhPytAldosrzcXmqz34xi7JklQqw83d68WtWHFMBEUa7MKfi4WCbuQb7FjGUvMRw5z/T9ez7CoDekHc63+cIIZLQ23weUK8GaA1uQLoD0scmT41J5RkBlJbH7ck1zRd3d04o75rWNEUNit6KBvrQ4Pd8oQ8==uMgB
-libgcrypt-config.in 
iQCVAwUAQH5UbzEAnp832S/7AQJISgP+Nbd2AQnDM/k8sQLbvz8YZjwX3LigZM+AkF1VAwyAm6YOU3nrXnz5t+cXkQD2dkz4L2F0AAsIkFiJsrgmZgCp2h1L6LeFnH+hoId9RhbYw4NkDaHb+MC9JcalpcfFvvxq6vM/W37bSFimM78P+5RLKypXCytVQNAAaIRgZjVfXY8==IGDS
-libgcrypt.vers 
iQCVAwUAQH5MjTEAnp832S/7AQKCdQQAotG6Z3zdcePI0V33YY2sh91uYkLBNhQw+PzyE3BRRAVhMGLOBD1nSWJHJvE3eyCVOqFY0ZmvpVex51Fa0D/TwsJOO4RVxf1L9bbAncu9OuEXaGXKytLZp54TliDTAWGDq0lvtx1TvDDgtM8TbbaXvMbjfQ4wXBxdLvaenFCTlR4==kgHq
-
-$names$ 
iQCVAwUAQH5UhDEAnp832S/7AQK/jwP9H7A3mI99M1NGuhD+16C+2gJIITB8GJeYeUd3vm8kWQ5n76WyMCdeA62qn0JUddIBjAbagtfvTL5aesnD9MlhEGaNlHauU7SINTIJ8njKf87EAAfDZrhS/tGDziC2nakMPweRxXQCLDWHkBPjYfrspSLLohjdegqBvTNyVM76+KE==3p9Z
diff --git a/grub-core/lib/libgcrypt/src/ath.c 
b/grub-core/lib/libgcrypt/src/ath.c
deleted file mode 100644
index 656ed896f..000000000
--- a/grub-core/lib/libgcrypt/src/ath.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/* ath.c - Thread-safeness library.
-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
-
-   This file is part of Libgcrypt.
-
-   Libgcrypt is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of
-   the License, or (at your option) any later version.
-
-   Libgcrypt is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with Libgcrypt; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.  */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <assert.h>  /* Right: We need to use assert and not gcry_assert.  */
-#include <unistd.h>
-#ifdef HAVE_SYS_SELECT_H
-# include <sys/select.h>
-#else
-# include <sys/time.h>
-#endif
-#include <sys/types.h>
-#ifndef _WIN32
-#include <sys/wait.h>
-#endif
-#include <errno.h>
-
-#include "ath.h"
-
-
-
-/* The interface table.  */
-static struct ath_ops ops;
-
-/* True if we should use the external callbacks.  */
-static int ops_set;
-
-
-/* For the dummy interface.  */
-#define MUTEX_UNLOCKED ((ath_mutex_t) 0)
-#define MUTEX_LOCKED   ((ath_mutex_t) 1)
-#define MUTEX_DESTROYED        ((ath_mutex_t) 2)
-
-
-/* Return the thread type from the option field. */
-#define GET_OPTION(a)    ((a) & 0xff)
-/* Return the version number from the option field.  */
-#define GET_VERSION(a)   (((a) >> 8)& 0xff)
-
-
-
-/* The lock we take while checking for lazy lock initialization.  */
-static ath_mutex_t check_init_lock = ATH_MUTEX_INITIALIZER;
-
-int
-ath_init (void)
-{
-  int err = 0;
-
-  if (ops_set)
-    {
-      if (ops.init)
-       err = (*ops.init) ();
-      if (err)
-       return err;
-      err = (*ops.mutex_init) (&check_init_lock);
-    }
-  return err;
-}
-
-
-/* Initialize the locking library.  Returns 0 if the operation was
-   successful, EINVAL if the operation table was invalid and EBUSY if
-   we already were initialized.  */
-gpg_err_code_t
-ath_install (struct ath_ops *ath_ops, int check_only)
-{
-  if (check_only)
-    {
-      unsigned int option = 0;
-
-      /* Check if the requested thread option is compatible to the
-        thread option we are already committed to.  */
-      if (ath_ops)
-       option = ath_ops->option;
-
-      if (!ops_set && GET_OPTION (option))
-       return GPG_ERR_NOT_SUPPORTED;
-
-      if (GET_OPTION (ops.option) == ATH_THREAD_OPTION_USER
-         || GET_OPTION (option) == ATH_THREAD_OPTION_USER
-         || GET_OPTION (ops.option) != GET_OPTION (option)
-          || GET_VERSION (ops.option) != GET_VERSION (option))
-       return GPG_ERR_NOT_SUPPORTED;
-
-      return 0;
-    }
-
-  if (ath_ops)
-    {
-      /* It is convenient to not require DESTROY.  */
-      if (!ath_ops->mutex_init || !ath_ops->mutex_lock
-         || !ath_ops->mutex_unlock)
-       return GPG_ERR_INV_ARG;
-
-      ops = *ath_ops;
-      ops_set = 1;
-    }
-  else
-    ops_set = 0;
-
-  return 0;
-}
-
-
-static int
-mutex_init (ath_mutex_t *lock, int just_check)
-{
-  int err = 0;
-
-  if (just_check)
-    (*ops.mutex_lock) (&check_init_lock);
-  if (*lock == ATH_MUTEX_INITIALIZER || !just_check)
-    err = (*ops.mutex_init) (lock);
-  if (just_check)
-    (*ops.mutex_unlock) (&check_init_lock);
-  return err;
-}
-
-
-int
-ath_mutex_init (ath_mutex_t *lock)
-{
-  if (ops_set)
-    return mutex_init (lock, 0);
-
-#ifndef NDEBUG
-  *lock = MUTEX_UNLOCKED;
-#endif
-  return 0;
-}
-
-
-int
-ath_mutex_destroy (ath_mutex_t *lock)
-{
-  if (ops_set)
-    {
-      if (!ops.mutex_destroy)
-       return 0;
-
-      (*ops.mutex_lock) (&check_init_lock);
-      if (*lock == ATH_MUTEX_INITIALIZER)
-       {
-         (*ops.mutex_unlock) (&check_init_lock);
-         return 0;
-       }
-      (*ops.mutex_unlock) (&check_init_lock);
-      return (*ops.mutex_destroy) (lock);
-    }
-
-#ifndef NDEBUG
-  assert (*lock == MUTEX_UNLOCKED);
-
-  *lock = MUTEX_DESTROYED;
-#endif
-  return 0;
-}
-
-
-int
-ath_mutex_lock (ath_mutex_t *lock)
-{
-  if (ops_set)
-    {
-      int ret = mutex_init (lock, 1);
-      if (ret)
-       return ret;
-      return (*ops.mutex_lock) (lock);
-    }
-
-#ifndef NDEBUG
-  assert (*lock == MUTEX_UNLOCKED);
-
-  *lock = MUTEX_LOCKED;
-#endif
-  return 0;
-}
-
-
-int
-ath_mutex_unlock (ath_mutex_t *lock)
-{
-  if (ops_set)
-    {
-      int ret = mutex_init (lock, 1);
-      if (ret)
-       return ret;
-      return (*ops.mutex_unlock) (lock);
-    }
-
-#ifndef NDEBUG
-  assert (*lock == MUTEX_LOCKED);
-
-  *lock = MUTEX_UNLOCKED;
-#endif
-  return 0;
-}
-
-
-ssize_t
-ath_read (int fd, void *buf, size_t nbytes)
-{
-  if (ops_set && ops.read)
-    return (*ops.read) (fd, buf, nbytes);
-  else
-    return read (fd, buf, nbytes);
-}
-
-
-ssize_t
-ath_write (int fd, const void *buf, size_t nbytes)
-{
-  if (ops_set && ops.write)
-    return (*ops.write) (fd, buf, nbytes);
-  else
-    return write (fd, buf, nbytes);
-}
-
-
-ssize_t
-#ifdef _WIN32
-ath_select (int nfd, void *rset, void *wset, void *eset,
-           struct timeval *timeout)
-#else
-ath_select (int nfd, fd_set *rset, fd_set *wset, fd_set *eset,
-           struct timeval *timeout)
-#endif
-{
-  if (ops_set && ops.select)
-    return (*ops.select) (nfd, rset, wset, eset, timeout);
-  else
-#ifdef _WIN32
-    return -1;
-#else
-    return select (nfd, rset, wset, eset, timeout);
-#endif
-}
-
-
-ssize_t
-ath_waitpid (pid_t pid, int *status, int options)
-{
-  if (ops_set && ops.waitpid)
-    return (*ops.waitpid) (pid, status, options);
-  else
-#ifdef _WIN32
-    return -1;
-#else
-    return waitpid (pid, status, options);
-#endif
-}
-
-
-int
-#ifdef _WIN32
-ath_accept (int s, void *addr, int *length_ptr)
-#else
-ath_accept (int s, struct sockaddr *addr, socklen_t *length_ptr)
-#endif
-{
-  if (ops_set && ops.accept)
-    return (*ops.accept) (s, addr, length_ptr);
-  else
-#ifdef _WIN32
-    return -1;
-#else
-    return accept (s, addr, length_ptr);
-#endif
-}
-
-
-int
-#ifdef _WIN32
-ath_connect (int s, void *addr, int length)
-#else
-ath_connect (int s, struct sockaddr *addr, socklen_t length)
-#endif
-{
-  if (ops_set && ops.connect)
-    return (*ops.connect) (s, addr, length);
-  else
-#ifdef _WIN32
-    return -1;
-#else
-    return connect (s, addr, length);
-#endif
-}
-
-
-int
-#ifdef _WIN32
-ath_sendmsg (int s, const void *msg, int flags)
-#else
-ath_sendmsg (int s, const struct msghdr *msg, int flags)
-#endif
-{
-  if (ops_set && ops.sendmsg)
-    return (*ops.sendmsg) (s, msg, flags);
-  else
-#ifdef _WIN32
-    return -1;
-#else
-    return sendmsg (s, msg, flags);
-#endif
-}
-
-
-int
-#ifdef _WIN32
-ath_recvmsg (int s, void *msg, int flags)
-#else
-ath_recvmsg (int s, struct msghdr *msg, int flags)
-#endif
-{
-  if (ops_set && ops.recvmsg)
-    return (*ops.recvmsg) (s, msg, flags);
-  else
-#ifdef _WIN32
-    return -1;
-#else
-    return recvmsg (s, msg, flags);
-#endif
-}
diff --git a/grub-core/lib/libgcrypt/src/ath.h 
b/grub-core/lib/libgcrypt/src/ath.h
deleted file mode 100644
index 8769551be..000000000
--- a/grub-core/lib/libgcrypt/src/ath.h
+++ /dev/null
@@ -1,147 +0,0 @@
-/* ath.h - Thread-safeness library.
-   Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
-
-   This file is part of Libgcrypt.
-
-   Libgcrypt is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of
-   the License, or (at your option) any later version.
-
-   Libgcrypt is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with Libgcrypt; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
-   02111-1307, USA.  */
-
-#ifndef ATH_H
-#define ATH_H
-
-#include <config.h>
-
-#ifdef _WIN32
-# include <windows.h>
-#else /* !_WIN32 */
-# ifdef HAVE_SYS_SELECT_H
-#  include <sys/select.h>
-# else
-#  include <sys/time.h>
-# endif
-# include <sys/types.h>
-# ifdef HAVE_SYS_MSG_H
-#  include <sys/msg.h>  /* (e.g. for zOS) */
-# endif
-# include <sys/socket.h>
-#endif /* !_WIN32 */
-#include <gpg-error.h>
-
-
-
-/* Define _ATH_EXT_SYM_PREFIX if you want to give all external symbols
-   a prefix.  */
-#define _ATH_EXT_SYM_PREFIX _gcry_
-
-#ifdef _ATH_EXT_SYM_PREFIX
-#define _ATH_PREFIX1(x,y) x ## y
-#define _ATH_PREFIX2(x,y) _ATH_PREFIX1(x,y)
-#define _ATH_PREFIX(x) _ATH_PREFIX2(_ATH_EXT_SYM_PREFIX,x)
-#define ath_install _ATH_PREFIX(ath_install)
-#define ath_init _ATH_PREFIX(ath_init)
-#define ath_mutex_init _ATH_PREFIX(ath_mutex_init)
-#define ath_mutex_destroy _ATH_PREFIX(ath_mutex_destroy)
-#define ath_mutex_lock _ATH_PREFIX(ath_mutex_lock)
-#define ath_mutex_unlock _ATH_PREFIX(ath_mutex_unlock)
-#define ath_read _ATH_PREFIX(ath_read)
-#define ath_write _ATH_PREFIX(ath_write)
-#define ath_select _ATH_PREFIX(ath_select)
-#define ath_waitpid _ATH_PREFIX(ath_waitpid)
-#define ath_connect _ATH_PREFIX(ath_connect)
-#define ath_accept _ATH_PREFIX(ath_accept)
-#define ath_sendmsg _ATH_PREFIX(ath_sendmsg)
-#define ath_recvmsg _ATH_PREFIX(ath_recvmsg)
-#endif
-
-
-enum ath_thread_option
-  {
-    ATH_THREAD_OPTION_DEFAULT = 0,
-    ATH_THREAD_OPTION_USER = 1,
-    ATH_THREAD_OPTION_PTH = 2,
-    ATH_THREAD_OPTION_PTHREAD = 3
-  };
-
-struct ath_ops
-{
-  /* The OPTION field encodes the thread model and the version number
-     of this structure.
-       Bits  7 - 0  are used for the thread model
-       Bits 15 - 8  are used for the version number.
-  */
-  unsigned int option;
-
-  int (*init) (void);
-  int (*mutex_init) (void **priv);
-  int (*mutex_destroy) (void *priv);
-  int (*mutex_lock) (void *priv);
-  int (*mutex_unlock) (void *priv);
-  ssize_t (*read) (int fd, void *buf, size_t nbytes);
-  ssize_t (*write) (int fd, const void *buf, size_t nbytes);
-#ifdef _WIN32
-  ssize_t (*select) (int nfd, void *rset, void *wset, void *eset,
-                    struct timeval *timeout);
-  ssize_t (*waitpid) (pid_t pid, int *status, int options);
-  int (*accept) (int s, void  *addr, int *length_ptr);
-  int (*connect) (int s, void *addr, int length);
-  int (*sendmsg) (int s, const void *msg, int flags);
-  int (*recvmsg) (int s, void *msg, int flags);
-#else
-  ssize_t (*select) (int nfd, fd_set *rset, fd_set *wset, fd_set *eset,
-                    struct timeval *timeout);
-  ssize_t (*waitpid) (pid_t pid, int *status, int options);
-  int (*accept) (int s, struct sockaddr *addr, socklen_t *length_ptr);
-  int (*connect) (int s, struct sockaddr *addr, socklen_t length);
-  int (*sendmsg) (int s, const struct msghdr *msg, int flags);
-  int (*recvmsg) (int s, struct msghdr *msg, int flags);
-#endif
-};
-
-gpg_err_code_t ath_install (struct ath_ops *ath_ops, int check_only);
-int ath_init (void);
-
-
-/* Functions for mutual exclusion.  */
-typedef void *ath_mutex_t;
-#define ATH_MUTEX_INITIALIZER 0
-
-int ath_mutex_init (ath_mutex_t *mutex);
-int ath_mutex_destroy (ath_mutex_t *mutex);
-int ath_mutex_lock (ath_mutex_t *mutex);
-int ath_mutex_unlock (ath_mutex_t *mutex);
-
-/* Replacement for the POSIX functions, which can be used to allow
-   other (user-level) threads to run.  */
-ssize_t ath_read (int fd, void *buf, size_t nbytes);
-ssize_t ath_write (int fd, const void *buf, size_t nbytes);
-#ifdef _WIN32
-ssize_t ath_select (int nfd, void *rset, void *wset, void *eset,
-                   struct timeval *timeout);
-ssize_t ath_waitpid (pid_t pid, int *status, int options);
-int ath_accept (int s, void *addr, int *length_ptr);
-int ath_connect (int s, void *addr, int length);
-int ath_sendmsg (int s, const void *msg, int flags);
-int ath_recvmsg (int s, void *msg, int flags);
-#else
-ssize_t ath_select (int nfd, fd_set *rset, fd_set *wset, fd_set *eset,
-                   struct timeval *timeout);
-ssize_t ath_waitpid (pid_t pid, int *status, int options);
-int ath_accept (int s, struct sockaddr *addr, socklen_t *length_ptr);
-int ath_connect (int s, struct sockaddr *addr, socklen_t length);
-int ath_sendmsg (int s, const struct msghdr *msg, int flags);
-int ath_recvmsg (int s, struct msghdr *msg, int flags);
-#endif
-
-#endif /* ATH_H */
diff --git a/grub-core/lib/libgcrypt/src/cipher-proto.h 
b/grub-core/lib/libgcrypt/src/cipher-proto.h
index 347681ffe..36729165d 100644
--- a/grub-core/lib/libgcrypt/src/cipher-proto.h
+++ b/grub-core/lib/libgcrypt/src/cipher-proto.h
@@ -23,6 +23,10 @@
 #ifndef G10_CIPHER_PROTO_H
 #define G10_CIPHER_PROTO_H
 
+
+enum pk_encoding;
+
+
 /* Definition of a function used to report selftest failures.
    DOMAIN is a string describing the function block:
           "cipher", "digest", "pubkey or "random",
@@ -38,77 +42,220 @@ typedef void (*selftest_report_func_t)(const char *domain,
 typedef gpg_err_code_t (*selftest_func_t)
      (int algo, int extended, selftest_report_func_t report);
 
+
+/*
+ *
+ * Public key related definitions.
+ *
+ */
 
-/* An extended type of the generate function.  */
-typedef gcry_err_code_t (*pk_ext_generate_t)
-     (int algo,
-      unsigned int nbits,
-      unsigned long evalue,
-      gcry_sexp_t genparms,
-      gcry_mpi_t *skey,
-      gcry_mpi_t **retfactors,
-      gcry_sexp_t *extrainfo);
+/* Type for the pk_generate function.  */
+typedef gcry_err_code_t (*gcry_pk_generate_t) (gcry_sexp_t genparms,
+                                               gcry_sexp_t *r_skey);
 
-/* The type used to compute the keygrip.  */
-typedef gpg_err_code_t (*pk_comp_keygrip_t)
-     (gcry_md_hd_t md, gcry_sexp_t keyparm);
+/* Type for the pk_check_secret_key function.  */
+typedef gcry_err_code_t (*gcry_pk_check_secret_key_t) (gcry_sexp_t keyparms);
 
-/* The type used to query ECC curve parameters.  */
-typedef gcry_err_code_t (*pk_get_param_t)
-     (const char *name, gcry_mpi_t *pkey);
+/* Type for the pk_encrypt function.  */
+typedef gcry_err_code_t (*gcry_pk_encrypt_t) (gcry_sexp_t *r_ciph,
+                                              gcry_sexp_t s_data,
+                                              gcry_sexp_t keyparms);
+
+/* Type for the pk_decrypt function.  */
+typedef gcry_err_code_t (*gcry_pk_decrypt_t) (gcry_sexp_t *r_plain,
+                                              gcry_sexp_t s_data,
+                                              gcry_sexp_t keyparms);
+
+/* Type for the pk_sign function.  */
+typedef gcry_err_code_t (*gcry_pk_sign_t) (gcry_sexp_t *r_sig,
+                                           gcry_sexp_t s_data,
+                                           gcry_sexp_t keyparms);
+
+/* Type for the pk_verify function.  */
+typedef gcry_err_code_t (*gcry_pk_verify_t) (gcry_sexp_t s_sig,
+                                             gcry_sexp_t s_data,
+                                             gcry_sexp_t keyparms);
+
+/* Type for the pk_get_nbits function.  */
+typedef unsigned (*gcry_pk_get_nbits_t) (gcry_sexp_t keyparms);
+
+
+/* The type used to compute the keygrip.  */
+typedef gpg_err_code_t (*pk_comp_keygrip_t) (gcry_md_hd_t md,
+                                             gcry_sexp_t keyparm);
 
 /* The type used to query an ECC curve name.  */
-typedef const char *(*pk_get_curve_t)(gcry_mpi_t *pkey, int iterator,
+typedef const char *(*pk_get_curve_t)(gcry_sexp_t keyparms, int iterator,
                                       unsigned int *r_nbits);
 
 /* The type used to query ECC curve parameters by name.  */
 typedef gcry_sexp_t (*pk_get_curve_param_t)(const char *name);
 
+
+/* Module specification structure for public key algorithms.  */
+typedef struct gcry_pk_spec
+{
+  int algo;
+  struct {
+    unsigned int disabled:1;
+    unsigned int fips:1;
+  } flags;
+  int use;
+  const char *name;
+  const char **aliases;
+  const char *elements_pkey;
+  const char *elements_skey;
+  const char *elements_enc;
+  const char *elements_sig;
+  const char *elements_grip;
+  gcry_pk_generate_t generate;
+  gcry_pk_check_secret_key_t check_secret_key;
+  gcry_pk_encrypt_t encrypt;
+  gcry_pk_decrypt_t decrypt;
+  gcry_pk_sign_t sign;
+  gcry_pk_verify_t verify;
+  gcry_pk_get_nbits_t get_nbits;
+  selftest_func_t selftest;
+  pk_comp_keygrip_t comp_keygrip;
+  pk_get_curve_t get_curve;
+  pk_get_curve_param_t get_curve_param;
+} gcry_pk_spec_t;
+
+
+
+/*
+ *
+ * Symmetric cipher related definitions.
+ *
+ */
+
+struct cipher_bulk_ops;
+
+/* Type for the cipher_setkey function.  */
+typedef gcry_err_code_t (*gcry_cipher_setkey_t) (void *c,
+                                                const unsigned char *key,
+                                                unsigned keylen,
+                                                struct cipher_bulk_ops 
*bulk_ops);
+
+/* Type for the cipher_encrypt function.  */
+typedef unsigned int (*gcry_cipher_encrypt_t) (void *c,
+                                              unsigned char *outbuf,
+                                              const unsigned char *inbuf);
+
+/* Type for the cipher_decrypt function.  */
+typedef unsigned int (*gcry_cipher_decrypt_t) (void *c,
+                                              unsigned char *outbuf,
+                                              const unsigned char *inbuf);
+
+/* Type for the cipher_stencrypt function.  */
+typedef void (*gcry_cipher_stencrypt_t) (void *c,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        size_t n);
+
+/* Type for the cipher_stdecrypt function.  */
+typedef void (*gcry_cipher_stdecrypt_t) (void *c,
+                                        unsigned char *outbuf,
+                                        const unsigned char *inbuf,
+                                        size_t n);
+
 /* The type used to convey additional information to a cipher.  */
 typedef gpg_err_code_t (*cipher_set_extra_info_t)
      (void *c, int what, const void *buffer, size_t buflen);
 
+/* The type used to set an IV directly in the algorithm module.  */
+typedef void (*cipher_setiv_func_t)(void *c, const byte *iv, size_t ivlen);
+
+/* A structure to map OIDs to encryption modes.  */
+typedef struct gcry_cipher_oid_spec
+{
+  const char *oid;
+  int mode;
+} gcry_cipher_oid_spec_t;
+
 
-/* Extra module specification structures.  These are used for internal
-   modules which provide more functions than available through the
-   public algorithm register APIs.  */
-typedef struct cipher_extra_spec
+/* Module specification structure for ciphers.  */
+typedef struct gcry_cipher_spec
 {
+  int algo;
+  struct {
+    unsigned int disabled:1;
+    unsigned int fips:1;
+  } flags;
+  const char *name;
+  const char **aliases;
+  const gcry_cipher_oid_spec_t *oids;
+  size_t blocksize;
+  size_t keylen;
+  size_t contextsize;
+  gcry_cipher_setkey_t setkey;
+  gcry_cipher_encrypt_t encrypt;
+  gcry_cipher_decrypt_t decrypt;
+  gcry_cipher_stencrypt_t stencrypt;
+  gcry_cipher_stdecrypt_t stdecrypt;
   selftest_func_t selftest;
   cipher_set_extra_info_t set_extra_info;
-} cipher_extra_spec_t;
+  cipher_setiv_func_t setiv;
+} gcry_cipher_spec_t;
+
+
+
+/*
+ *
+ * Message digest related definitions.
+ *
+ */
+
+/* Type for the md_init function.  */
+typedef void (*gcry_md_init_t) (void *c, unsigned int flags);
+
+/* Type for the md_write function.  */
+typedef void (*gcry_md_write_t) (void *c, const void *buf, size_t nbytes);
 
-typedef struct md_extra_spec
+/* Type for the md_final function.  */
+typedef void (*gcry_md_final_t) (void *c);
+
+/* Type for the md_read function.  */
+typedef unsigned char *(*gcry_md_read_t) (void *c);
+
+/* Type for the md_extract function.  */
+typedef void (*gcry_md_extract_t) (void *c, void *outbuf, size_t nbytes);
+
+/* Type for the md_hash_buffers function. */
+typedef void (*gcry_md_hash_buffers_t) (void *outbuf, size_t nbytes,
+                                       const gcry_buffer_t *iov,
+                                       int iovcnt);
+
+typedef struct gcry_md_oid_spec
 {
-  selftest_func_t selftest;
-} md_extra_spec_t;
+  const char *oidstring;
+} gcry_md_oid_spec_t;
 
-typedef struct pk_extra_spec
+/* Module specification structure for message digests.  */
+typedef struct gcry_md_spec
 {
+  int algo;
+  struct {
+    unsigned int disabled:1;
+    unsigned int fips:1;
+  } flags;
+  const char *name;
+  const unsigned char *asnoid;
+  int asnlen;
+  const gcry_md_oid_spec_t *oids;
+  int mdlen;
+  gcry_md_init_t init;
+  gcry_md_write_t write;
+  gcry_md_final_t final;
+  gcry_md_read_t read;
+  gcry_md_extract_t extract;
+  gcry_md_hash_buffers_t hash_buffers;
+  size_t contextsize; /* allocate this amount of context */
   selftest_func_t selftest;
-  pk_ext_generate_t ext_generate;
-  pk_comp_keygrip_t comp_keygrip;
-  pk_get_param_t get_param;
-  pk_get_curve_t get_curve;
-  pk_get_curve_param_t get_curve_param;
-} pk_extra_spec_t;
-
-
+} gcry_md_spec_t;
 
-/* The private register functions. */
-gcry_error_t _gcry_cipher_register (gcry_cipher_spec_t *cipher,
-                                    cipher_extra_spec_t *extraspec,
-                                    int *algorithm_id,
-                                    gcry_module_t *module);
-gcry_error_t _gcry_md_register (gcry_md_spec_t *cipher,
-                                md_extra_spec_t *extraspec,
-                                unsigned int *algorithm_id,
-                                gcry_module_t *module);
-gcry_error_t _gcry_pk_register (gcry_pk_spec_t *cipher,
-                                pk_extra_spec_t *extraspec,
-                                unsigned int *algorithm_id,
-                                gcry_module_t *module);
 
+
 /* The selftest functions.  */
 gcry_error_t _gcry_cipher_selftest (int algo, int extended,
                                     selftest_report_func_t report);
@@ -116,9 +263,14 @@ gcry_error_t _gcry_md_selftest (int algo, int extended,
                                 selftest_report_func_t report);
 gcry_error_t _gcry_pk_selftest (int algo, int extended,
                                 selftest_report_func_t report);
-gcry_error_t _gcry_hmac_selftest (int algo, int extended,
-                                  selftest_report_func_t report);
+gcry_error_t _gcry_mac_selftest (int algo, int extended,
+                                 selftest_report_func_t report);
+gcry_error_t _gcry_kdf_selftest (int algo, int extended,
+                                 selftest_report_func_t report);
 
 gcry_error_t _gcry_random_selftest (selftest_report_func_t report);
 
+
+
+
 #endif /*G10_CIPHER_PROTO_H*/
diff --git a/grub-core/lib/libgcrypt/src/cipher.h 
b/grub-core/lib/libgcrypt/src/cipher.h
index 48eeeda5e..87f8c4d04 100644
--- a/grub-core/lib/libgcrypt/src/cipher.h
+++ b/grub-core/lib/libgcrypt/src/cipher.h
@@ -20,13 +20,31 @@
 #ifndef G10_CIPHER_H
 #define G10_CIPHER_H
 
-#include <gcrypt.h>
+#include "gcrypt-int.h"
 
 #define DBG_CIPHER _gcry_get_debug_flag( 1 )
 
 #include "../random/random.h"
 
 #define PUBKEY_FLAG_NO_BLINDING    (1 << 0)
+#define PUBKEY_FLAG_RFC6979        (1 << 1)
+#define PUBKEY_FLAG_FIXEDLEN       (1 << 2)
+#define PUBKEY_FLAG_LEGACYRESULT   (1 << 3)
+#define PUBKEY_FLAG_RAW_FLAG       (1 << 4)
+#define PUBKEY_FLAG_TRANSIENT_KEY  (1 << 5)
+#define PUBKEY_FLAG_USE_X931       (1 << 6)
+#define PUBKEY_FLAG_USE_FIPS186    (1 << 7)
+#define PUBKEY_FLAG_USE_FIPS186_2  (1 << 8)
+#define PUBKEY_FLAG_PARAM          (1 << 9)
+#define PUBKEY_FLAG_COMP           (1 << 10)
+#define PUBKEY_FLAG_NOCOMP         (1 << 11)
+#define PUBKEY_FLAG_EDDSA          (1 << 12)
+#define PUBKEY_FLAG_GOST           (1 << 13)
+#define PUBKEY_FLAG_NO_KEYTEST     (1 << 14)
+#define PUBKEY_FLAG_DJB_TWEAK      (1 << 15)
+#define PUBKEY_FLAG_SM2            (1 << 16)
+#define PUBKEY_FLAG_PREHASH        (1 << 17)
+
 
 enum pk_operation
   {
@@ -40,6 +58,7 @@ enum pk_encoding
   {
     PUBKEY_ENC_RAW,
     PUBKEY_ENC_PKCS1,
+    PUBKEY_ENC_PKCS1_RAW,
     PUBKEY_ENC_OAEP,
     PUBKEY_ENC_PSS,
     PUBKEY_ENC_UNKNOWN
@@ -70,31 +89,39 @@ struct pk_encoding_ctx
 
 #include "cipher-proto.h"
 
+/* The internal encryption modes. */
+enum gcry_cipher_internal_modes
+  {
+    GCRY_CIPHER_MODE_INTERNAL = 0x10000,
+    GCRY_CIPHER_MODE_CMAC     = 0x10000 + 1   /* Cipher-based MAC. */
+  };
+
+
+/*-- cipher.c --*/
+gcry_err_code_t _gcry_cipher_open_internal (gcry_cipher_hd_t *handle,
+                                           int algo, int mode,
+                                           unsigned int flags);
+
+/*-- cipher-cmac.c --*/
+gcry_err_code_t _gcry_cipher_cmac_authenticate
+/*           */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t 
abuflen);
+gcry_err_code_t _gcry_cipher_cmac_get_tag
+/*           */ (gcry_cipher_hd_t c,
+                 unsigned char *outtag, size_t taglen);
+gcry_err_code_t _gcry_cipher_cmac_check_tag
+/*           */ (gcry_cipher_hd_t c,
+                 const unsigned char *intag, size_t taglen);
+gcry_err_code_t _gcry_cipher_cmac_set_subkeys
+/*           */ (gcry_cipher_hd_t c);
 
-/*-- rmd160.c --*/
-void _gcry_rmd160_hash_buffer (void *outbuf,
-                               const void *buffer, size_t length);
 /*-- sha1.c --*/
 void _gcry_sha1_hash_buffer (void *outbuf,
                              const void *buffer, size_t length);
 
-/*-- rijndael.c --*/
-void _gcry_aes_cfb_enc (void *context, unsigned char *iv,
-                        void *outbuf, const void *inbuf,
-                        unsigned int nblocks);
-void _gcry_aes_cfb_dec (void *context, unsigned char *iv,
-                        void *outbuf_arg, const void *inbuf_arg,
-                        unsigned int nblocks);
-void _gcry_aes_cbc_enc (void *context, unsigned char *iv,
-                        void *outbuf_arg, const void *inbuf_arg,
-                        unsigned int nblocks, int cbc_mac);
-void _gcry_aes_cbc_dec (void *context, unsigned char *iv,
-                        void *outbuf_arg, const void *inbuf_arg,
-                        unsigned int nblocks);
-void _gcry_aes_ctr_enc (void *context, unsigned char *ctr,
-                        void *outbuf_arg, const void *inbuf_arg,
-                        unsigned int nblocks);
-
+/*-- blake2.c --*/
+gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags,
+                                          const unsigned char *key,
+                                          size_t keylen, int algo);
 
 /*-- dsa.c --*/
 void _gcry_register_pk_dsa_progress (gcry_handler_progress_t cbc, void 
*cb_data);
@@ -114,7 +141,6 @@ void _gcry_register_primegen_progress 
(gcry_handler_progress_t cb,
                                        void *cb_data);
 
 /*-- pubkey.c --*/
-const char * _gcry_pk_aliased_algo_name (int algorithm);
 
 /* Declarations for the cipher specifications.  */
 extern gcry_cipher_spec_t _gcry_cipher_spec_blowfish;
@@ -131,52 +157,64 @@ extern gcry_cipher_spec_t _gcry_cipher_spec_serpent128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_serpent192;
 extern gcry_cipher_spec_t _gcry_cipher_spec_serpent256;
 extern gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_40;
+extern gcry_cipher_spec_t _gcry_cipher_spec_rfc2268_128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_seed;
 extern gcry_cipher_spec_t _gcry_cipher_spec_camellia128;
 extern gcry_cipher_spec_t _gcry_cipher_spec_camellia192;
 extern gcry_cipher_spec_t _gcry_cipher_spec_camellia256;
 extern gcry_cipher_spec_t _gcry_cipher_spec_idea;
-
-extern cipher_extra_spec_t _gcry_cipher_extraspec_tripledes;
-extern cipher_extra_spec_t _gcry_cipher_extraspec_aes;
-extern cipher_extra_spec_t _gcry_cipher_extraspec_aes192;
-extern cipher_extra_spec_t _gcry_cipher_extraspec_aes256;
-
+extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20;
+extern gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12;
+extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147;
+extern gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh;
+extern gcry_cipher_spec_t _gcry_cipher_spec_chacha20;
+extern gcry_cipher_spec_t _gcry_cipher_spec_sm4;
 
 /* Declarations for the digest specifications.  */
-extern gcry_md_spec_t _gcry_digest_spec_crc32;
-extern gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510;
-extern gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440;
-extern gcry_md_spec_t _gcry_digest_spec_md4;
-extern gcry_md_spec_t _gcry_digest_spec_md5;
-extern gcry_md_spec_t _gcry_digest_spec_rmd160;
-extern gcry_md_spec_t _gcry_digest_spec_sha1;
-extern gcry_md_spec_t _gcry_digest_spec_sha224;
-extern gcry_md_spec_t _gcry_digest_spec_sha256;
-extern gcry_md_spec_t _gcry_digest_spec_sha512;
-extern gcry_md_spec_t _gcry_digest_spec_sha384;
-extern gcry_md_spec_t _gcry_digest_spec_tiger;
-extern gcry_md_spec_t _gcry_digest_spec_tiger1;
-extern gcry_md_spec_t _gcry_digest_spec_tiger2;
-extern gcry_md_spec_t _gcry_digest_spec_whirlpool;
-
-extern md_extra_spec_t _gcry_digest_extraspec_sha1;
-extern md_extra_spec_t _gcry_digest_extraspec_sha224;
-extern md_extra_spec_t _gcry_digest_extraspec_sha256;
-extern md_extra_spec_t _gcry_digest_extraspec_sha384;
-extern md_extra_spec_t _gcry_digest_extraspec_sha512;
+extern const gcry_md_spec_t _gcry_digest_spec_crc32;
+extern const gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510;
+extern const gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440;
+extern const gcry_md_spec_t _gcry_digest_spec_gost3411_94;
+extern const gcry_md_spec_t _gcry_digest_spec_gost3411_cp;
+extern const gcry_md_spec_t _gcry_digest_spec_stribog_256;
+extern const gcry_md_spec_t _gcry_digest_spec_stribog_512;
+extern const gcry_md_spec_t _gcry_digest_spec_md2;
+extern const gcry_md_spec_t _gcry_digest_spec_md4;
+extern const gcry_md_spec_t _gcry_digest_spec_md5;
+extern const gcry_md_spec_t _gcry_digest_spec_rmd160;
+extern const gcry_md_spec_t _gcry_digest_spec_sha1;
+extern const gcry_md_spec_t _gcry_digest_spec_sha224;
+extern const gcry_md_spec_t _gcry_digest_spec_sha256;
+extern const gcry_md_spec_t _gcry_digest_spec_sha384;
+extern const gcry_md_spec_t _gcry_digest_spec_sha512;
+extern const gcry_md_spec_t _gcry_digest_spec_sha512_224;
+extern const gcry_md_spec_t _gcry_digest_spec_sha512_256;
+extern const gcry_md_spec_t _gcry_digest_spec_sha3_224;
+extern const gcry_md_spec_t _gcry_digest_spec_sha3_256;
+extern const gcry_md_spec_t _gcry_digest_spec_sha3_512;
+extern const gcry_md_spec_t _gcry_digest_spec_sha3_384;
+extern const gcry_md_spec_t _gcry_digest_spec_shake128;
+extern const gcry_md_spec_t _gcry_digest_spec_shake256;
+extern const gcry_md_spec_t _gcry_digest_spec_tiger;
+extern const gcry_md_spec_t _gcry_digest_spec_tiger1;
+extern const gcry_md_spec_t _gcry_digest_spec_tiger2;
+extern const gcry_md_spec_t _gcry_digest_spec_whirlpool;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2b_512;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2b_384;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2b_256;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2b_160;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2s_256;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2s_224;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2s_160;
+extern const gcry_md_spec_t _gcry_digest_spec_blake2s_128;
+extern const gcry_md_spec_t _gcry_digest_spec_sm3;
 
 /* Declarations for the pubkey cipher specifications.  */
 extern gcry_pk_spec_t _gcry_pubkey_spec_rsa;
 extern gcry_pk_spec_t _gcry_pubkey_spec_elg;
+extern gcry_pk_spec_t _gcry_pubkey_spec_elg_e;
 extern gcry_pk_spec_t _gcry_pubkey_spec_dsa;
-extern gcry_pk_spec_t _gcry_pubkey_spec_ecdsa;
-extern gcry_pk_spec_t _gcry_pubkey_spec_ecdh;
-
-extern pk_extra_spec_t _gcry_pubkey_extraspec_rsa;
-extern pk_extra_spec_t _gcry_pubkey_extraspec_dsa;
-extern pk_extra_spec_t _gcry_pubkey_extraspec_elg;
-extern pk_extra_spec_t _gcry_pubkey_extraspec_ecdsa;
+extern gcry_pk_spec_t _gcry_pubkey_spec_ecc;
 
 
 #endif /*G10_CIPHER_H*/
diff --git a/grub-core/lib/libgcrypt/src/const-time.c 
b/grub-core/lib/libgcrypt/src/const-time.c
new file mode 100644
index 000000000..0fb53a074
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/const-time.c
@@ -0,0 +1,88 @@
+/* const-time.c  -  Constant-time functions
+ *      Copyright (C) 2023  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see 
<https://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "g10lib.h"
+#include "const-time.h"
+
+
+#ifndef HAVE_GCC_ASM_VOLATILE_MEMORY
+/* These variables are used to generate masks from conditional operation
+ * flag parameters.  Use of volatile prevents compiler optimizations from
+ * converting AND-masking to conditional branches.  */
+volatile unsigned int _gcry_ct_vzero = 0;
+volatile unsigned int _gcry_ct_vone = 1;
+#endif
+
+
+/*
+ * Compare byte arrays of length LEN, return 1 if it's not same,
+ * 0, otherwise.
+ */
+unsigned int
+_gcry_ct_not_memequal (const void *b1, const void *b2, size_t len)
+{
+  const byte *a = b1;
+  const byte *b = b2;
+  int ab, ba;
+  size_t i;
+
+  /* Constant-time compare. */
+  for (i = 0, ab = 0, ba = 0; i < len; i++)
+    {
+      /* If a[i] != b[i], either ab or ba will be negative. */
+      ab |= a[i] - b[i];
+      ba |= b[i] - a[i];
+    }
+
+  /* 'ab | ba' is negative when buffers are not equal, extract sign bit.  */
+  return ((unsigned int)(ab | ba) >> (sizeof(unsigned int) * 8 - 1)) & 1;
+}
+
+/*
+ * Compare byte arrays of length LEN, return 0 if it's not same,
+ * 1, otherwise.
+ */
+unsigned int
+_gcry_ct_memequal (const void *b1, const void *b2, size_t len)
+{
+  return _gcry_ct_not_memequal (b1, b2, len) ^ 1;
+}
+
+/*
+ * Copy LEN bytes from memory area SRC to memory area DST, when
+ * OP_ENABLED=1.  When DST <= SRC, the memory areas may overlap.  When
+ * DST > SRC, the memory areas must not overlap.
+ */
+void
+_gcry_ct_memmov_cond (void *dst, const void *src, size_t len,
+                     unsigned long op_enable)
+{
+  /* Note: dual mask with AND/OR used for EM leakage mitigation */
+  unsigned char mask1 = ct_ulong_gen_mask(op_enable);
+  unsigned char mask2 = ct_ulong_gen_inv_mask(op_enable);
+  unsigned char *b_dst = dst;
+  const unsigned char *b_src = src;
+  size_t i;
+
+  for (i = 0; i < len; i++)
+    b_dst[i] = (b_dst[i] & mask2) | (b_src[i] & mask1);
+}
diff --git a/grub-core/lib/libgcrypt/src/const-time.h 
b/grub-core/lib/libgcrypt/src/const-time.h
new file mode 100644
index 000000000..fe07cc7aa
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/const-time.h
@@ -0,0 +1,167 @@
+/* const-time.h  -  Constant-time functions
+ *      Copyright (C) 2023  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see 
<https://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_CONST_TIME_H
+#define GCRY_CONST_TIME_H
+
+#include "types.h"
+
+
+#define ct_not_memequal _gcry_ct_not_memequal
+#define ct_memequal _gcry_ct_memequal
+#define ct_memmov_cond _gcry_ct_memmov_cond
+
+
+#ifndef HAVE_GCC_ASM_VOLATILE_MEMORY
+extern volatile unsigned int _gcry_ct_vzero;
+extern volatile unsigned int _gcry_ct_vone;
+#endif
+
+
+/*
+ * Return 0 if A is 0 and return 1 otherwise.
+ */
+static inline unsigned int
+ct_is_not_zero (unsigned int a)
+{
+  /* Sign bit set if A != 0. */
+  a = a | (-a);
+
+  return a >> (sizeof(unsigned int) * 8 - 1);
+}
+
+/*
+ * Return 1 if A is 0 and return 0 otherwise.
+ */
+static inline unsigned int
+ct_is_zero (unsigned int a)
+{
+  /* Sign bit set if A == 0. */
+  a = ~a & ~(-a);
+
+  return a >> (sizeof(unsigned int) * 8 - 1);
+}
+
+/*
+ * Return 1 if it's not same, 0 if same.
+ */
+static inline unsigned int
+ct_not_equal_byte (unsigned char b0, unsigned char b1)
+{
+  unsigned int diff;
+
+  diff = b0;
+  diff ^= b1;
+
+  return (0U - diff) >> (sizeof (unsigned int)*8 - 1);
+}
+
+/* Compare byte-arrays of length LEN, return 1 if it's not same, 0
+   otherwise.  We use pointer of void *, so that it can be used with
+   any structure.  */
+unsigned int _gcry_ct_not_memequal (const void *b1, const void *b2, size_t 
len);
+
+/* Compare byte-arrays of length LEN, return 0 if it's not same, 1
+   otherwise.  We use pointer of void *, so that it can be used with
+   any structure.  */
+unsigned int _gcry_ct_memequal (const void *b1, const void *b2, size_t len);
+
+/*
+ * Return all bits set if A is 1 and return 0 otherwise.
+ */
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+#  define DEFINE_CT_TYPE_GEN_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_mask (unsigned long op_enable) \
+     { \
+       type mask = -(type)op_enable; \
+       asm volatile ("\n" : "+r" (mask) :: "memory"); \
+       return mask; \
+     }
+#else
+#  define DEFINE_CT_TYPE_GEN_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_mask (unsigned long op_enable) \
+     { \
+       type mask = (type)_gcry_ct_vzero - (type)op_enable; \
+       return mask; \
+     }
+#endif
+DEFINE_CT_TYPE_GEN_MASK(uintptr, uintptr_t)
+DEFINE_CT_TYPE_GEN_MASK(ulong, unsigned long)
+
+/*
+ * Return all bits set if A is 0 and return 1 otherwise.
+ */
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+#  define DEFINE_CT_TYPE_GEN_INV_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_inv_mask (unsigned long op_enable) \
+     { \
+       type mask = (type)op_enable - (type)1; \
+       asm volatile ("\n" : "+r" (mask) :: "memory"); \
+       return mask; \
+     }
+#else
+#  define DEFINE_CT_TYPE_GEN_INV_MASK(name, type) \
+     static inline type \
+     ct_##name##_gen_inv_mask (unsigned long op_enable) \
+     { \
+       type mask = (type)op_enable - (type)_gcry_ct_vone; \
+       return mask; \
+     }
+#endif
+DEFINE_CT_TYPE_GEN_INV_MASK(uintptr, uintptr_t)
+DEFINE_CT_TYPE_GEN_INV_MASK(ulong, unsigned long)
+
+/*
+ *  Return A when OP_ENABLED=1
+ *  otherwise, return B
+ */
+#define DEFINE_CT_TYPE_SELECT_FUNC(name, type) \
+  static inline type \
+  ct_##name##_select (type a, type b, unsigned long op_enable) \
+  { \
+    type mask_b = ct_##name##_gen_inv_mask(op_enable); \
+    type mask_a = ct_##name##_gen_mask(op_enable); \
+    return (mask_a & a) | (mask_b & b); \
+  }
+DEFINE_CT_TYPE_SELECT_FUNC(uintptr, uintptr_t)
+DEFINE_CT_TYPE_SELECT_FUNC(ulong, unsigned long)
+
+/*
+ *  Return NULL when OP_ENABLED=1
+ *  otherwise, return W
+ */
+static inline gcry_sexp_t
+sexp_null_cond (gcry_sexp_t w, unsigned long op_enable)
+{
+  uintptr_t o = ct_uintptr_select((uintptr_t)NULL, (uintptr_t)w, op_enable);
+  return (gcry_sexp_t)(void *)o;
+}
+
+/*
+ * Copy LEN bytes from memory area SRC to memory area DST, when
+ * OP_ENABLED=1.  When DST <= SRC, the memory areas may overlap.  When
+ * DST > SRC, the memory areas must not overlap.
+ */
+void _gcry_ct_memmov_cond (void *dst, const void *src, size_t len,
+                          unsigned long op_enable);
+
+#endif /*GCRY_CONST_TIME_H*/
diff --git a/grub-core/lib/libgcrypt/src/context.c 
b/grub-core/lib/libgcrypt/src/context.c
new file mode 100644
index 000000000..da9948a66
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/context.c
@@ -0,0 +1,139 @@
+/* context.c - Context management
+ * Copyright (C) 2013  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#include "g10lib.h"
+#include "mpi.h"
+#include "context.h"
+
+#define CTX_MAGIC "cTx"
+#define CTX_MAGIC_LEN 3
+
+
+/* The definition of the generic context object.  The public typedef
+   gcry_ctx_t is used to access it.  */
+struct gcry_context
+{
+  char magic[CTX_MAGIC_LEN]; /* Magic value to cross check that this
+                                is really a context object. */
+  char type;     /* The type of the context (CONTEXT_TYPE_foo).  */
+
+  void (*deinit)(void*); /* Function used to free the private part. */
+  PROPERLY_ALIGNED_TYPE u;
+};
+
+
+/* Allocate a fresh generic context of contect TYPE and allocate
+   LENGTH extra bytes for private use of the type handler. DEINIT is a
+   function used called to deinitialize the private part; it may be
+   NULL if de-initialization is not required.  Returns NULL and sets
+   ERRNO if memory allocation failed.  */
+gcry_ctx_t
+_gcry_ctx_alloc (int type, size_t length, void (*deinit)(void*))
+{
+  gcry_ctx_t ctx;
+
+  switch (type)
+    {
+    case CONTEXT_TYPE_EC:
+    case CONTEXT_TYPE_RANDOM_OVERRIDE:
+      break;
+    default:
+      log_bug ("bad context type %d given to _gcry_ctx_alloc\n", type);
+      break;
+    }
+
+  if (length < sizeof (PROPERLY_ALIGNED_TYPE))
+    length = sizeof (PROPERLY_ALIGNED_TYPE);
+
+  ctx = xtrycalloc (1, sizeof *ctx - sizeof (PROPERLY_ALIGNED_TYPE) + length);
+  if (!ctx)
+    return NULL;
+  memcpy (ctx->magic, CTX_MAGIC, CTX_MAGIC_LEN);
+  ctx->type = type;
+  ctx->deinit = deinit;
+
+  return ctx;
+}
+
+
+/* Return a pointer to the private part of the context CTX.  TYPE is
+   the requested context type.  Using an explicit type allows to cross
+   check the type and eventually allows to store several private
+   contexts in one context object.  The function does not return an
+   error but aborts if the provided CTX is not valid.  */
+void *
+_gcry_ctx_get_pointer (gcry_ctx_t ctx, int type)
+{
+  if (!ctx || memcmp (ctx->magic, CTX_MAGIC, CTX_MAGIC_LEN))
+    log_fatal ("bad pointer %p passed to _gcry_ctx_get_pointer\n", ctx);
+  if (ctx->type != type)
+    log_fatal ("wrong context type %d request for context %p of type %d\n",
+               type, ctx, ctx->type);
+  return &ctx->u;
+}
+
+/* Return a pointer to the private part of the context CTX.  TYPE is
+   the requested context type.  Using an explicit type allows to cross
+   check the type and eventually allows to store several private
+   contexts in one context object.  In contrast to
+   _gcry_ctx_get_pointer, this function returns NULL if no context for
+   the given type was found.  If CTX is NULL the function does not
+   abort but returns NULL.  */
+void *
+_gcry_ctx_find_pointer (gcry_ctx_t ctx, int type)
+{
+  if (!ctx)
+    return NULL;
+  if (memcmp (ctx->magic, CTX_MAGIC, CTX_MAGIC_LEN))
+    log_fatal ("bad pointer %p passed to _gcry_ctx_get_pointer\n", ctx);
+  if (ctx->type != type)
+    return NULL;
+  return &ctx->u;
+}
+
+
+/* Release the generic context CTX.  */
+void
+_gcry_ctx_release (gcry_ctx_t ctx)
+{
+  if (!ctx)
+    return;
+  if (memcmp (ctx->magic, CTX_MAGIC, CTX_MAGIC_LEN))
+    log_fatal ("bad pointer %p passed to gcry_ctx_relase\n", ctx);
+  switch (ctx->type)
+    {
+    case CONTEXT_TYPE_EC:
+    case CONTEXT_TYPE_RANDOM_OVERRIDE:
+      break;
+    default:
+      log_fatal ("bad context type %d detected in gcry_ctx_relase\n",
+                 ctx->type);
+      break;
+    }
+  if (ctx->deinit)
+    ctx->deinit (&ctx->u);
+  xfree (ctx);
+}
diff --git a/grub-core/lib/libgcrypt/src/context.h 
b/grub-core/lib/libgcrypt/src/context.h
new file mode 100644
index 000000000..5be367b2f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/context.h
@@ -0,0 +1,32 @@
+/* context.h - Declarations for the context management
+ * Copyright (C) 2013  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_CONTEXT_H
+#define GCRY_CONTEXT_H
+
+/* Context types as used in struct gcry_context.  */
+#define CONTEXT_TYPE_EC 1  /* The context is used with EC functions.  */
+#define CONTEXT_TYPE_RANDOM_OVERRIDE 2  /* Used with pubkey functions.  */
+
+gcry_ctx_t _gcry_ctx_alloc (int type, size_t length, void (*deinit)(void*));
+void *_gcry_ctx_get_pointer (gcry_ctx_t ctx, int type);
+void *_gcry_ctx_find_pointer (gcry_ctx_t ctx, int type);
+
+
+#endif /*GCRY_CONTEXT_H*/
diff --git a/grub-core/lib/libgcrypt/src/dumpsexp.c 
b/grub-core/lib/libgcrypt/src/dumpsexp.c
index f6384d7d5..cedc4f4b4 100644
--- a/grub-core/lib/libgcrypt/src/dumpsexp.c
+++ b/grub-core/lib/libgcrypt/src/dumpsexp.c
@@ -1,18 +1,19 @@
 /* dumpsexp.c - Dump S-expressions.
  * Copyright (C) 2007, 2010 Free Software Foundation, Inc.
+ * Copyright (C) 2010 g10 Code GmbH.
  *
  * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published
- * by the Free Software Foundation; either version 3 of the License,
- * or (at your option) any later version.
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
  *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <config.h>
@@ -43,8 +44,8 @@ print_version (int with_help)
 {
   fputs (MYVERSION_LINE "\n"
          "Copyright (C) 2010 Free Software Foundation, Inc.\n"
-         "License GPLv3+: GNU GPL version 3 or later "
-         "<http://gnu.org/licenses/gpl.html>\n"
+         "License LGPLv2.1+: GNU LGPL version 2.1 or later "
+         "<http://gnu.org/licenses/>\n"
          "This is free software: you are free to change and redistribute it.\n"
          "There is NO WARRANTY, to the extent permitted by law.\n",
          stdout);
@@ -546,6 +547,7 @@ parse_and_print (FILE *fp)
           state = IN_DATA;
           printctl ("begindata");
           init_data ();
+         /* fall through */
         case IN_DATA:
           if (datalen)
             {
diff --git a/grub-core/lib/libgcrypt/src/ec-context.h 
b/grub-core/lib/libgcrypt/src/ec-context.h
new file mode 100644
index 000000000..479862f6f
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/ec-context.h
@@ -0,0 +1,107 @@
+/* ec-context.h - Private definitions for CONTEXT_TYPE_EC.
+ * Copyright (C) 2013  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_EC_CONTEXT_H
+#define GCRY_EC_CONTEXT_H
+
+/* This context is used with all our EC functions. */
+struct mpi_ec_ctx_s
+{
+  enum gcry_mpi_ec_models model; /* The model describing this curve.  */
+
+  enum ecc_dialects dialect;     /* The ECC dialect used with the curve.  */
+
+  int flags;                     /* Public key flags (not always used).  */
+
+  unsigned int nbits;            /* Number of bits.  */
+
+  /* Domain parameters.  Note that they may not all be set and if set
+     the MPIs may be flaged as constant. */
+  gcry_mpi_t p;         /* Prime specifying the field GF(p).  */
+  gcry_mpi_t a;         /* First coefficient of the Weierstrass equation.  */
+  gcry_mpi_t b;         /* Second coefficient of the Weierstrass equation.  */
+  gcry_mpi_point_t G;   /* Base point (generator).  */
+  gcry_mpi_t n;         /* Order of G.  */
+  unsigned int h;       /* Cofactor.  */
+
+  /* The actual key.  May not be set.  */
+  gcry_mpi_point_t Q;   /* Public key.   */
+  gcry_mpi_t d;         /* Private key.  */
+
+  const char *name;      /* Name of the curve.  */
+
+  /* This structure is private to mpi/ec.c! */
+  struct {
+    struct {
+      unsigned int a_is_pminus3:1;
+      unsigned int two_inv_p:1;
+    } valid; /* Flags to help setting the helper vars below.  */
+
+    int a_is_pminus3;  /* True if A = P - 3. */
+
+    gcry_mpi_t two_inv_p;
+
+    mpi_barrett_t p_barrett;
+
+    /* Scratch variables.  */
+    gcry_mpi_t scratch[11];
+
+    /* Helper for fast reduction.  */
+    /*   int nist_nbits; /\* If this is a NIST curve, the # of bits.  *\/ */
+    /*   gcry_mpi_t s[10]; */
+    /*   gcry_mpi_t c; */
+  } t;
+
+  /* Curve specific computation routines for the field.  */
+  void (* addm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx);
+  void (* subm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ec);
+  void (* mulm) (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, mpi_ec_t ctx);
+  void (* pow2) (gcry_mpi_t w, const gcry_mpi_t b, mpi_ec_t ctx);
+  void (* mul2) (gcry_mpi_t w, gcry_mpi_t u, mpi_ec_t ctx);
+  void (* mod) (gcry_mpi_t w, mpi_ec_t ctx);
+};
+
+
+/*-- mpi/ec.c --*/
+void _gcry_mpi_ec_get_reset (mpi_ec_t ec);
+
+
+/*-- cipher/ecc-curves.c --*/
+gcry_mpi_t       _gcry_ecc_get_mpi (const char *name, mpi_ec_t ec, int copy);
+gcry_mpi_point_t _gcry_ecc_get_point (const char *name, mpi_ec_t ec);
+gpg_err_code_t   _gcry_ecc_set_mpi (const char *name,
+                                    gcry_mpi_t newvalue, mpi_ec_t ec);
+gpg_err_code_t   _gcry_ecc_set_point (const char *name,
+                                      gcry_mpi_point_t newvalue, mpi_ec_t ec);
+
+/*-- cipher/ecc-misc.c --*/
+gpg_err_code_t _gcry_ecc_sec_decodepoint (gcry_mpi_t value, mpi_ec_t ec,
+                                          mpi_point_t result);
+gpg_err_code_t _gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx,
+                                           mpi_point_t result);
+
+/*-- cipher/ecc-eddsa.c --*/
+gpg_err_code_t _gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx,
+                                            mpi_point_t result,
+                                            unsigned char **r_encpk,
+                                            unsigned int *r_encpklen);
+
+
+
+#endif /*GCRY_EC_CONTEXT_H*/
diff --git a/grub-core/lib/libgcrypt/src/fips.c 
b/grub-core/lib/libgcrypt/src/fips.c
index c5737a78d..5d71b208e 100644
--- a/grub-core/lib/libgcrypt/src/fips.c
+++ b/grub-core/lib/libgcrypt/src/fips.c
@@ -25,20 +25,20 @@
 #include <string.h>
 #ifdef ENABLE_HMAC_BINARY_CHECK
 # include <dlfcn.h>
+# include <elf.h>
+# include <limits.h>
+# include <link.h>
 #endif
 #ifdef HAVE_SYSLOG
 # include <syslog.h>
 #endif /*HAVE_SYSLOG*/
 
-#include "g10lib.h"
-#include "ath.h"
-#include "cipher-proto.h"
-#include "hmac256.h"
-
-
-/* The name of the file used to foce libgcrypt into fips mode. */
+/* The name of the file used to force libgcrypt into fips mode. */
 #define FIPS_FORCE_FILE "/etc/gcrypt/fips_enabled"
 
+#include "g10lib.h"
+#include "cipher-proto.h"
+#include "../random/random.h"
 
 /* The states of the finite state machine used in fips mode.  */
 enum module_states
@@ -58,18 +58,10 @@ enum module_states
    that fips mode is the default unless changed by the initialization
    code. To check whether fips mode is enabled, use the function
    fips_mode()! */
-static int no_fips_mode_required;
-
-/* Flag to indicate that we are in the enforced FIPS mode.  */
-static int enforced_fips_mode;
-
-/* If this flag is set, the application may no longer assume that the
-   process is running in FIPS mode.  This flag is protected by the
-   FSM_LOCK.  */
-static int inactive_fips_mode;
+int _gcry_no_fips_mode_required;
 
 /* This is the lock we use to protect the FSM.  */
-static ath_mutex_t fsm_lock = ATH_MUTEX_INITIALIZER;
+GPGRT_LOCK_DEFINE (fsm_lock);
 
 /* The current state of the FSM.  The whole state machinery is only
    used while in fips mode. Change this only while holding fsm_lock. */
@@ -92,36 +84,18 @@ static void fips_new_state (enum module_states new_state);
 
 
 
-/* Check whether the OS is in FIPS mode and record that in a module
-   local variable.  If FORCE is passed as true, fips mode will be
-   enabled anyway. Note: This function is not thread-safe and should
-   be called before any threads are created.  This function may only
-   be called once.  */
-void
-_gcry_initialize_fips_mode (int force)
+/*
+ * Returns 1 if the FIPS mode is to be activated based on the
+ * environment variable LIBGCRYPT_FORCE_FIPS_MODE, the file defined by
+ * FIPS_FORCE_FILE, or /proc/sys/crypto/fips_enabled.
+ * This function aborts on misconfigured filesystems.
+ */
+static int
+check_fips_system_setting (void)
 {
-  static int done;
-  gpg_error_t err;
-
-  /* Make sure we are not accidently called twice.  */
-  if (done)
-    {
-      if ( fips_mode () )
-        {
-          fips_new_state (STATE_FATALERROR);
-          fips_noreturn ();
-        }
-      /* If not in fips mode an assert is sufficient.  */
-      gcry_assert (!done);
-    }
-  done = 1;
-
-  /* If the calling application explicitly requested fipsmode, do so.  */
-  if (force)
-    {
-      gcry_assert (!no_fips_mode_required);
-      goto leave;
-    }
+  /* Do we have the environment variable set?  */
+  if (getenv ("LIBGCRYPT_FORCE_FIPS_MODE"))
+    return 1;
 
   /* For testing the system it is useful to override the system
      provided detection of the FIPS mode and force FIPS mode using a
@@ -129,10 +103,7 @@ _gcry_initialize_fips_mode (int force)
      confusion on whether /etc/gcrypt/ or /usr/local/etc/gcrypt/ is
      actually used.  The file itself may be empty.  */
   if ( !access (FIPS_FORCE_FILE, F_OK) )
-    {
-      gcry_assert (!no_fips_mode_required);
-      goto leave;
-    }
+    return 1;
 
   /* Checking based on /proc file properties.  */
   {
@@ -149,8 +120,7 @@ _gcry_initialize_fips_mode (int force)
           {
             /* System is in fips mode.  */
             fclose (fp);
-            gcry_assert (!no_fips_mode_required);
-            goto leave;
+            return 1;
           }
         fclose (fp);
       }
@@ -171,49 +141,87 @@ _gcry_initialize_fips_mode (int force)
       }
   }
 
+  return 0;
+}
+
+/*
+ * Initial check if the FIPS mode should be activated on startup.
+ * Called by the constructor at the initialization of the library.
+ */
+int
+_gcry_fips_to_activate (void)
+{
+  return check_fips_system_setting ();
+}
+
+
+/* Check whether the OS is in FIPS mode and record that in a module
+   local variable.  If FORCE is passed as true, fips mode will be
+   enabled anyway. Note: This function is not thread-safe and should
+   be called before any threads are created.  This function may only
+   be called once.  */
+void
+_gcry_initialize_fips_mode (int force)
+{
+  static int done;
+  gpg_error_t err;
+
+  /* Make sure we are not accidentally called twice.  */
+  if (done)
+    {
+      if ( fips_mode () )
+        {
+          fips_new_state (STATE_FATALERROR);
+          fips_noreturn ();
+        }
+      /* If not in fips mode an assert is sufficient.  */
+      gcry_assert (!done);
+    }
+  done = 1;
+
+  /* If the calling application explicitly requested fipsmode, do so.  */
+  if (force)
+    {
+      gcry_assert (!_gcry_no_fips_mode_required);
+      goto leave;
+    }
+
+  /* If the system explicitly requested fipsmode, do so.  */
+  if (check_fips_system_setting ())
+    {
+      gcry_assert (!_gcry_no_fips_mode_required);
+      goto leave;
+    }
+
   /* Fips not not requested, set flag.  */
-  no_fips_mode_required = 1;
+  _gcry_no_fips_mode_required = 1;
 
  leave:
-  if (!no_fips_mode_required)
+  if (!_gcry_no_fips_mode_required)
     {
       /* Yes, we are in FIPS mode.  */
-      FILE *fp;
 
       /* Intitialize the lock to protect the FSM.  */
-      err = ath_mutex_init (&fsm_lock);
+      err = gpgrt_lock_init (&fsm_lock);
       if (err)
         {
           /* If that fails we can't do anything but abort the
              process. We need to use log_info so that the FSM won't
              get involved.  */
           log_info ("FATAL: failed to create the FSM lock in libgcrypt: %s\n",
-                     strerror (err));
+                    gpg_strerror (err));
 #ifdef HAVE_SYSLOG
           syslog (LOG_USER|LOG_ERR, "Libgcrypt error: "
                   "creating FSM lock failed: %s - abort",
-                  strerror (err));
+                  gpg_strerror (err));
 #endif /*HAVE_SYSLOG*/
           abort ();
         }
 
-
-      /* If the FIPS force files exists, is readable and has a number
-         != 0 on its first line, we enable the enforced fips mode.  */
-      fp = fopen (FIPS_FORCE_FILE, "r");
-      if (fp)
-        {
-          char line[256];
-
-          if (fgets (line, sizeof line, fp) && atoi (line))
-            enforced_fips_mode = 1;
-          fclose (fp);
-        }
-
       /* Now get us into the INIT state.  */
       fips_new_state (STATE_INIT);
-
     }
+
   return;
 }
 
@@ -222,15 +230,15 @@ lock_fsm (void)
 {
   gpg_error_t err;
 
-  err = ath_mutex_lock (&fsm_lock);
+  err = gpgrt_lock_lock (&fsm_lock);
   if (err)
     {
       log_info ("FATAL: failed to acquire the FSM lock in libgrypt: %s\n",
-                strerror (err));
+                gpg_strerror (err));
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_ERR, "Libgcrypt error: "
               "acquiring FSM lock failed: %s - abort",
-              strerror (err));
+              gpg_strerror (err));
 #endif /*HAVE_SYSLOG*/
       abort ();
     }
@@ -241,99 +249,21 @@ unlock_fsm (void)
 {
   gpg_error_t err;
 
-  err = ath_mutex_unlock (&fsm_lock);
+  err = gpgrt_lock_unlock (&fsm_lock);
   if (err)
     {
       log_info ("FATAL: failed to release the FSM lock in libgrypt: %s\n",
-                strerror (err));
+                gpg_strerror (err));
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_ERR, "Libgcrypt error: "
               "releasing FSM lock failed: %s - abort",
-              strerror (err));
+              gpg_strerror (err));
 #endif /*HAVE_SYSLOG*/
       abort ();
     }
 }
 
 
-/* This function returns true if fips mode is enabled.  This is
-   independent of the fips required finite state machine and only used
-   to enable fips specific code.  Please use the fips_mode macro
-   instead of calling this function directly. */
-int
-_gcry_fips_mode (void)
-{
-  /* No locking is required because we have the requirement that this
-     variable is only initialized once with no other threads
-     existing.  */
-  return !no_fips_mode_required;
-}
-
-
-/* Return a flag telling whether we are in the enforced fips mode.  */
-int
-_gcry_enforced_fips_mode (void)
-{
-  if (!_gcry_fips_mode ())
-    return 0;
-  return enforced_fips_mode;
-}
-
-/* Set a flag telling whether we are in the enforced fips mode.  */
-void
-_gcry_set_enforced_fips_mode (void)
-{
-  enforced_fips_mode = 1;
-}
-
-/* If we do not want to enforce the fips mode, we can set a flag so
-   that the application may check whether it is still in fips mode.
-   TEXT will be printed as part of a syslog message.  This function
-   may only be be called if in fips mode. */
-void
-_gcry_inactivate_fips_mode (const char *text)
-{
-  gcry_assert (_gcry_fips_mode ());
-
-  if (_gcry_enforced_fips_mode () )
-    {
-      /* Get us into the error state. */
-      fips_signal_error (text);
-      return;
-    }
-
-  lock_fsm ();
-  if (!inactive_fips_mode)
-    {
-      inactive_fips_mode = 1;
-      unlock_fsm ();
-#ifdef HAVE_SYSLOG
-      syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
-              "%s - FIPS mode inactivated", text);
-#endif /*HAVE_SYSLOG*/
-    }
-  else
-    unlock_fsm ();
-}
-
-
-/* Return the FIPS mode inactive flag.  If it is true the FIPS mode is
-   not anymore active.  */
-int
-_gcry_is_fips_mode_inactive (void)
-{
-  int flag;
-
-  if (!_gcry_fips_mode ())
-    return 0;
-  lock_fsm ();
-  flag = inactive_fips_mode;
-  unlock_fsm ();
-  return flag;
-}
-
-
-
 static const char *
 state2str (enum module_states state)
 {
@@ -378,10 +308,13 @@ _gcry_fips_is_operational (void)
              (GCRYCTL_INITIALIZATION_FINISHED) where the latter will
              run the selftests.  The drawback of these on-demand
              self-tests are a small chance that self-tests are
-             performed by severeal threads; that is no problem because
+             performed by several threads; that is no problem because
              our FSM make sure that we won't oversee any error. */
           unlock_fsm ();
           _gcry_fips_run_selftests (0);
+
+          /* Release resources for random.  */
+          _gcry_random_close_fds ();
           lock_fsm ();
         }
 
@@ -411,6 +344,166 @@ _gcry_fips_test_operational (void)
   return result;
 }
 
+int
+_gcry_fips_indicator_cipher (va_list arg_ptr)
+{
+  enum gcry_cipher_algos alg = va_arg (arg_ptr, enum gcry_cipher_algos);
+  enum gcry_cipher_modes mode;
+
+  switch (alg)
+    {
+    case GCRY_CIPHER_AES:
+    case GCRY_CIPHER_AES192:
+    case GCRY_CIPHER_AES256:
+      mode = va_arg (arg_ptr, enum gcry_cipher_modes);
+      switch (mode)
+        {
+        case GCRY_CIPHER_MODE_ECB:
+        case GCRY_CIPHER_MODE_CBC:
+        case GCRY_CIPHER_MODE_CFB:
+        case GCRY_CIPHER_MODE_CFB8:
+        case GCRY_CIPHER_MODE_OFB:
+        case GCRY_CIPHER_MODE_CTR:
+        case GCRY_CIPHER_MODE_CCM:
+        case GCRY_CIPHER_MODE_XTS:
+        case GCRY_CIPHER_MODE_AESWRAP:
+          return GPG_ERR_NO_ERROR;
+        default:
+          return GPG_ERR_NOT_SUPPORTED;
+        }
+    default:
+      return GPG_ERR_NOT_SUPPORTED;
+    }
+}
+
+int
+_gcry_fips_indicator_mac (va_list arg_ptr)
+{
+  enum gcry_mac_algos alg = va_arg (arg_ptr, enum gcry_mac_algos);
+
+  switch (alg)
+    {
+    case GCRY_MAC_CMAC_AES:
+    case GCRY_MAC_HMAC_SHA1:
+    case GCRY_MAC_HMAC_SHA224:
+    case GCRY_MAC_HMAC_SHA256:
+    case GCRY_MAC_HMAC_SHA384:
+    case GCRY_MAC_HMAC_SHA512:
+    case GCRY_MAC_HMAC_SHA512_224:
+    case GCRY_MAC_HMAC_SHA512_256:
+    case GCRY_MAC_HMAC_SHA3_224:
+    case GCRY_MAC_HMAC_SHA3_256:
+    case GCRY_MAC_HMAC_SHA3_384:
+    case GCRY_MAC_HMAC_SHA3_512:
+      return GPG_ERR_NO_ERROR;
+    default:
+      return GPG_ERR_NOT_SUPPORTED;
+    }
+}
+
+int
+_gcry_fips_indicator_md (va_list arg_ptr)
+{
+  enum gcry_md_algos alg = va_arg (arg_ptr, enum gcry_md_algos);
+
+  switch (alg)
+    {
+    case GCRY_MD_SHA1:
+    case GCRY_MD_SHA224:
+    case GCRY_MD_SHA256:
+    case GCRY_MD_SHA384:
+    case GCRY_MD_SHA512:
+    case GCRY_MD_SHA512_224:
+    case GCRY_MD_SHA512_256:
+    case GCRY_MD_SHA3_224:
+    case GCRY_MD_SHA3_256:
+    case GCRY_MD_SHA3_384:
+    case GCRY_MD_SHA3_512:
+    case GCRY_MD_SHAKE128:
+    case GCRY_MD_SHAKE256:
+      return GPG_ERR_NO_ERROR;
+    default:
+      return GPG_ERR_NOT_SUPPORTED;
+    }
+}
+
+int
+_gcry_fips_indicator_kdf (va_list arg_ptr)
+{
+  enum gcry_kdf_algos alg = va_arg (arg_ptr, enum gcry_kdf_algos);
+
+  switch (alg)
+    {
+    case GCRY_KDF_PBKDF2:
+      return GPG_ERR_NO_ERROR;
+    default:
+      return GPG_ERR_NOT_SUPPORTED;
+    }
+}
+
+int
+_gcry_fips_indicator_function (va_list arg_ptr)
+{
+  const char *function = va_arg (arg_ptr, const char *);
+
+  if (strcmp (function, "gcry_pk_sign") == 0 ||
+      strcmp (function, "gcry_pk_verify") == 0 ||
+      strcmp (function, "gcry_pk_encrypt") == 0 ||
+      strcmp (function, "gcry_pk_decrypt") == 0 ||
+      strcmp (function, "gcry_pk_random_override_new") == 0)
+    return GPG_ERR_NOT_SUPPORTED;
+
+  return GPG_ERR_NO_ERROR;
+}
+
+/* Note: the array should be sorted.  */
+static const char *valid_string_in_sexp[] = {
+  "curve",
+  "d",
+  "data",
+  "e",
+  "ecdsa",
+  "flags",
+  "genkey",
+  "hash",
+  "n",
+  "nbits",
+  "pkcs1",
+  "private-key",
+  "pss",
+  "public-key",
+  "q",
+  "r",
+  "raw",
+  "rsa",
+  "rsa-use-e",
+  "s",
+  "salt-length",
+  "sig-val",
+  "value"
+};
+
+static int
+compare_string (const void *v1, const void *v2)
+{
+  const char * const *p_str1 = v1;
+  const char * const *p_str2 = v2;
+
+  return strcmp (*p_str1, *p_str2);
+}
+
+int
+_gcry_fips_indicator_pk_flags (va_list arg_ptr)
+{
+  const char *flag = va_arg (arg_ptr, const char *);
+
+  if (bsearch (&flag, valid_string_in_sexp, DIM (valid_string_in_sexp),
+               sizeof (char *), compare_string))
+    return GPG_ERR_NO_ERROR;
+
+  return GPG_ERR_NOT_SUPPORTED;
+}
+
 
 /* This is a test on whether the library is in the error or
    operational state. */
@@ -456,7 +549,6 @@ run_cipher_selftests (int extended)
 {
   static int algos[] =
     {
-      GCRY_CIPHER_3DES,
       GCRY_CIPHER_AES128,
       GCRY_CIPHER_AES192,
       GCRY_CIPHER_AES256,
@@ -487,7 +579,9 @@ run_digest_selftests (int extended)
     {
       GCRY_MD_SHA1,
       GCRY_MD_SHA224,
+#ifndef ENABLE_HMAC_BINARY_CHECK
       GCRY_MD_SHA256,
+#endif
       GCRY_MD_SHA384,
       GCRY_MD_SHA512,
       0
@@ -508,17 +602,24 @@ run_digest_selftests (int extended)
 }
 
 
-/* Run self-tests for all HMAC algorithms.  Return 0 on success. */
+/* Run self-tests for MAC algorithms.  Return 0 on success. */
 static int
-run_hmac_selftests (int extended)
+run_mac_selftests (int extended)
 {
   static int algos[] =
     {
-      GCRY_MD_SHA1,
-      GCRY_MD_SHA224,
-      GCRY_MD_SHA256,
-      GCRY_MD_SHA384,
-      GCRY_MD_SHA512,
+      GCRY_MAC_HMAC_SHA1,
+      GCRY_MAC_HMAC_SHA224,
+#ifndef ENABLE_HMAC_BINARY_CHECK
+      GCRY_MAC_HMAC_SHA256,
+#endif
+      GCRY_MAC_HMAC_SHA384,
+      GCRY_MAC_HMAC_SHA512,
+      GCRY_MAC_HMAC_SHA3_224,
+      GCRY_MAC_HMAC_SHA3_256,
+      GCRY_MAC_HMAC_SHA3_384,
+      GCRY_MAC_HMAC_SHA3_512,
+      GCRY_MAC_CMAC_AES,
       0
     };
   int idx;
@@ -527,8 +628,8 @@ run_hmac_selftests (int extended)
 
   for (idx=0; algos[idx]; idx++)
     {
-      err = _gcry_hmac_selftest (algos[idx], extended, reporter);
-      reporter ("hmac", algos[idx], NULL,
+      err = _gcry_mac_selftest (algos[idx], extended, reporter);
+      reporter ("mac", algos[idx], NULL,
                 err? gpg_strerror (err):NULL);
       if (err)
         anyerr = 1;
@@ -536,6 +637,29 @@ run_hmac_selftests (int extended)
   return anyerr;
 }
 
+/* Run self-tests for all KDF algorithms.  Return 0 on success. */
+static int
+run_kdf_selftests (int extended)
+{
+  static int algos[] =
+    {
+      GCRY_KDF_PBKDF2,
+      0
+    };
+  int idx;
+  gpg_error_t err;
+  int anyerr = 0;
+
+  for (idx=0; algos[idx]; idx++)
+    {
+      err = _gcry_kdf_selftest (algos[idx], extended, reporter);
+      reporter ("kdf", algos[idx], NULL, err? gpg_strerror (err):NULL);
+      if (err)
+        anyerr = 1;
+    }
+  return anyerr;
+}
+
 
 /* Run self-tests for all required public key algorithms.  Return 0 on
    success. */
@@ -545,8 +669,7 @@ run_pubkey_selftests (int extended)
   static int algos[] =
     {
       GCRY_PK_RSA,
-      GCRY_PK_DSA,
-      /* GCRY_PK_ECDSA is not enabled in fips mode.  */
+      GCRY_PK_ECC,
       0
     };
   int idx;
@@ -578,96 +701,268 @@ run_random_selftests (void)
   return !!err;
 }
 
+#ifdef ENABLE_HMAC_BINARY_CHECK
+# ifndef KEY_FOR_BINARY_CHECK
+# define KEY_FOR_BINARY_CHECK "What am I, a doctor or a moonshuttle conductor?"
+# endif
+#define HMAC_LEN 32
+
+/*
+ * In the ELF file opened as FP, fill the ELF header to the pointer
+ * EHDR_P, determine the maximum offset of segments in R_OFFSET.
+ * Also, find the section which contains the hmac value and return it
+ * in HMAC.  Rewinds FP to the beginning on success.
+ */
+static gpg_error_t
+get_file_offset (FILE *fp, ElfW (Ehdr) *ehdr_p,
+                 unsigned long *r_offset, unsigned char hmac[HMAC_LEN])
+{
+  ElfW (Phdr) phdr;
+  ElfW (Shdr) shdr;
+  int i;
+  unsigned long off_segment = 0;
+
+  /* Read the ELF header */
+  if (fseek (fp, 0, SEEK_SET) != 0)
+    return gpg_error_from_syserror ();
+  if (fread (ehdr_p, sizeof (*ehdr_p), 1, fp) != 1)
+    return gpg_error_from_syserror ();
+
+  /* The program header entry size should match the size of the phdr struct */
+  if (ehdr_p->e_phentsize != sizeof (phdr))
+    return gpg_error (GPG_ERR_INV_OBJ);
+  if (ehdr_p->e_phoff == 0)
+    return gpg_error (GPG_ERR_INV_OBJ);
+
+  /* Jump to the first program header */
+  if (fseek (fp, ehdr_p->e_phoff, SEEK_SET) != 0)
+    return gpg_error_from_syserror ();
+
+  /* Iterate over the program headers, determine the last offset of
+     segments.  */
+  for (i = 0; i < ehdr_p->e_phnum; i++)
+    {
+      unsigned long off;
+
+      if (fread (&phdr, sizeof (phdr), 1, fp) != 1)
+        return gpg_error_from_syserror ();
+
+      off = phdr.p_offset + phdr.p_filesz;
+      if (off_segment < off)
+        off_segment = off;
+    }
+
+  if (!off_segment)
+    /* No segment found in the file */
+    return gpg_error (GPG_ERR_INV_OBJ);
+
+  /* The section header entry size should match the size of the shdr struct */
+  if (ehdr_p->e_shentsize != sizeof (shdr))
+    return gpg_error (GPG_ERR_INV_OBJ);
+  if (ehdr_p->e_shoff == 0)
+    return gpg_error (GPG_ERR_INV_OBJ);
+
+  /* Jump to the first section header */
+  if (fseek (fp, ehdr_p->e_shoff, SEEK_SET) != 0)
+    return gpg_error_from_syserror ();
+
+  /* Iterate over the section headers, determine the note section,
+     read the hmac value.  */
+  for (i = 0; i < ehdr_p->e_shnum; i++)
+    {
+      long off;
+
+      if (fread (&shdr, sizeof (shdr), 1, fp) != 1)
+        return gpg_error_from_syserror ();
+
+      off = ftell (fp);
+      if (off < 0)
+        return gpg_error_from_syserror ();
+      if (shdr.sh_type == SHT_NOTE && shdr.sh_flags == 0 && shdr.sh_size == 48)
+        {
+          const char header_of_the_note[] = {
+            0x04, 0x00, 0x00, 0x00,
+            0x20, 0x00, 0x00, 0x00,
+            0xca, 0xfe, 0x2a, 0x8e,
+            'F', 'D', 'O', 0x00
+          };
+          unsigned char header[16];
+
+          /* Jump to the note section.  */
+          if (fseek (fp, shdr.sh_offset, SEEK_SET) != 0)
+            return gpg_error_from_syserror ();
+
+          if (fread (header, sizeof (header), 1, fp) != 1)
+            return gpg_error_from_syserror ();
+
+          if (!memcmp (header, header_of_the_note, 16))
+            {
+              /* Found.  Read the hmac value into HMAC.  */
+              if (fread (hmac, HMAC_LEN, 1, fp) != 1)
+                return gpg_error_from_syserror ();
+              break;
+            }
+
+          /* Back to the next section header.  */
+          if (fseek (fp, off, SEEK_SET) != 0)
+            return gpg_error_from_syserror ();
+        }
+    }
+
+  if (i == ehdr_p->e_shnum)
+    /* The note section not found.  */
+    return gpg_error (GPG_ERR_INV_OBJ);
+
+  /* Fix up the ELF header, clean all section information.  */
+  ehdr_p->e_shoff = 0;
+  ehdr_p->e_shentsize = 0;
+  ehdr_p->e_shnum = 0;
+  ehdr_p->e_shstrndx = 0;
+
+  *r_offset = off_segment;
+  if (fseek (fp, 0, SEEK_SET) != 0)
+    return gpg_error_from_syserror ();
+
+  return 0;
+}
+
+static gpg_error_t
+hmac256_check (const char *filename, const char *key)
+{
+  gpg_error_t err;
+  FILE *fp;
+  gcry_md_hd_t hd;
+  const size_t buffer_size = 32768;
+  size_t nread;
+  char *buffer;
+  unsigned long offset = 0;
+  unsigned long pos = 0;
+  ElfW (Ehdr) ehdr;
+  unsigned char hmac[HMAC_LEN];
+
+  fp = fopen (filename, "rb");
+  if (!fp)
+    return gpg_error (GPG_ERR_INV_OBJ);
+
+  err = get_file_offset (fp, &ehdr, &offset, hmac);
+  if (err)
+    {
+      fclose (fp);
+      return err;
+    }
+
+  err = _gcry_md_open (&hd, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
+  if (err)
+    {
+      fclose (fp);
+      return err;
+    }
+
+  err = _gcry_md_setkey (hd, key, strlen (key));
+  if (err)
+    {
+      fclose (fp);
+      _gcry_md_close (hd);
+      return err;
+    }
+
+  buffer = xtrymalloc (buffer_size);
+  if (!buffer)
+    {
+      err = gpg_error_from_syserror ();
+      fclose (fp);
+      _gcry_md_close (hd);
+      return err;
+    }
+
+  while (1)
+    {
+      nread = fread (buffer, 1, buffer_size, fp);
+      if (pos + nread >= offset)
+        nread = offset - pos;
+
+      /* Copy the fixed ELF header at the beginning.  */
+      if (pos == 0)
+        memcpy (buffer, &ehdr, sizeof (ehdr));
+
+      _gcry_md_write (hd, buffer, nread);
+
+      if (nread < buffer_size)
+        break;
+
+      pos += nread;
+    }
+
+  if (ferror (fp))
+    err = gpg_error (GPG_ERR_INV_HANDLE);
+  else
+    {
+      unsigned char *digest;
+
+      digest = _gcry_md_read (hd, 0);
+      if (!memcmp (digest, hmac, HMAC_LEN))
+        /* Success.  */
+        err = 0;
+      else
+        err = gpg_error (GPG_ERR_CHECKSUM);
+    }
+
+  _gcry_md_close (hd);
+  xfree (buffer);
+  fclose (fp);
+
+  return err;
+}
+
 /* Run an integrity check on the binary.  Returns 0 on success.  */
 static int
 check_binary_integrity (void)
 {
-#ifdef ENABLE_HMAC_BINARY_CHECK
   gpg_error_t err;
   Dl_info info;
-  unsigned char digest[32];
-  int dlen;
-  char *fname = NULL;
-  const char key[] = "What am I, a doctor or a moonshuttle conductor?";
+  const char *key = KEY_FOR_BINARY_CHECK;
 
-  if (!dladdr ("gcry_check_version", &info))
+  if (!dladdr (hmac256_check, &info))
     err = gpg_error_from_syserror ();
   else
-    {
-      dlen = _gcry_hmac256_file (digest, sizeof digest, info.dli_fname,
-                                 key, strlen (key));
-      if (dlen < 0)
-        err = gpg_error_from_syserror ();
-      else if (dlen != 32)
-        err = gpg_error (GPG_ERR_INTERNAL);
-      else
-        {
-          fname = gcry_malloc (strlen (info.dli_fname) + 1 + 5 + 1 );
-          if (!fname)
-            err = gpg_error_from_syserror ();
-          else
-            {
-              FILE *fp;
-              char *p;
-
-              /* Prefix the basename with a dot.  */
-              strcpy (fname, info.dli_fname);
-              p = strrchr (fname, '/');
-              if (p)
-                p++;
-              else
-                p = fname;
-              memmove (p+1, p, strlen (p)+1);
-              *p = '.';
-              strcat (fname, ".hmac");
-
-              /* Open the file.  */
-              fp = fopen (fname, "r");
-              if (!fp)
-                err = gpg_error_from_syserror ();
-              else
-                {
-                  /* A buffer of 64 bytes plus one for a LF and one to
-                     detect garbage.  */
-                  unsigned char buffer[64+1+1];
-                  const unsigned char *s;
-                  int n;
-
-                  /* The HMAC files consists of lowercase hex digits
-                     only with an optional trailing linefeed.  Fail if
-                     there is any garbage.  */
-                  err = gpg_error (GPG_ERR_SELFTEST_FAILED);
-                  n = fread (buffer, 1, sizeof buffer, fp);
-                  if (n == 64 || (n == 65 && buffer[64] == '\n'))
-                    {
-                      buffer[64] = 0;
-                      for (n=0, s= buffer;
-                           n < 32 && loxdigit_p (s) && loxdigit_p (s+1);
-                           n++, s += 2)
-                        buffer[n] = loxtoi_2 (s);
-                      if ( n == 32 && !memcmp (digest, buffer, 32) )
-                        err = 0;
-                    }
-                  fclose (fp);
-                }
-            }
-        }
-    }
-  reporter ("binary", 0, fname, err? gpg_strerror (err):NULL);
+    err = hmac256_check (info.dli_fname, key);
+
+  reporter ("binary", 0, NULL, err? gpg_strerror (err):NULL);
 #ifdef HAVE_SYSLOG
   if (err)
     syslog (LOG_USER|LOG_ERR, "Libgcrypt error: "
-            "integrity check using `%s' failed: %s",
-            fname? fname:"[?]", gpg_strerror (err));
+            "integrity check failed: %s",
+            gpg_strerror (err));
 #endif /*HAVE_SYSLOG*/
-  gcry_free (fname);
   return !!err;
-#else
-  return 0;
-#endif
 }
 
 
+/* Run self-tests for HMAC-SHA256 algorithm before verifying library integrity.
+ * Return 0 on success. */
+static int
+run_hmac_sha256_selftests (int extended)
+{
+  gpg_error_t err;
+  int anyerr = 0;
+
+  err = _gcry_md_selftest (GCRY_MD_SHA256, extended, reporter);
+  reporter ("digest", GCRY_MD_SHA256, NULL,
+            err? gpg_strerror (err):NULL);
+  if (err)
+    anyerr = 1;
+
+  err = _gcry_mac_selftest (GCRY_MAC_HMAC_SHA256, extended, reporter);
+  reporter ("mac", GCRY_MAC_HMAC_SHA256, NULL,
+            err? gpg_strerror (err):NULL);
+  if (err)
+    anyerr = 1;
+
+  return anyerr;
+}
+#endif
+
+
 /* Run the self-tests.  If EXTENDED is true, extended versions of the
    selftest are run, that is more tests than required by FIPS.  */
 gpg_err_code_t
@@ -679,13 +974,29 @@ _gcry_fips_run_selftests (int extended)
   if (fips_mode ())
     fips_new_state (STATE_SELFTEST);
 
+#ifdef ENABLE_HMAC_BINARY_CHECK
+  if (run_hmac_sha256_selftests (extended))
+    goto leave;
+
+  if (fips_mode ())
+    {
+      /* Now check the integrity of the binary.  We do this this after
+         having checked the HMAC code.  */
+      if (check_binary_integrity ())
+        goto leave;
+    }
+#endif
+
   if (run_cipher_selftests (extended))
     goto leave;
 
   if (run_digest_selftests (extended))
     goto leave;
 
-  if (run_hmac_selftests (extended))
+  if (run_mac_selftests (extended))
+    goto leave;
+
+  if (run_kdf_selftests (extended))
     goto leave;
 
   /* Run random tests before the pubkey tests because the latter
@@ -696,11 +1007,6 @@ _gcry_fips_run_selftests (int extended)
   if (run_pubkey_selftests (extended))
     goto leave;
 
-  /* Now check the integrity of the binary.  We do this this after
-     having checked the HMAC code.  */
-  if (check_binary_integrity ())
-    goto leave;
-
   /* All selftests passed.  */
   result = STATE_OPERATIONAL;
   ec = 0;
diff --git a/grub-core/lib/libgcrypt/src/g10lib.h 
b/grub-core/lib/libgcrypt/src/g10lib.h
index 6bde20f1e..a7aee80d9 100644
--- a/grub-core/lib/libgcrypt/src/g10lib.h
+++ b/grub-core/lib/libgcrypt/src/g10lib.h
@@ -66,9 +66,36 @@
 #define GCC_ATTR_FORMAT_ARG(a)
 #endif
 
+/* I am not sure since when the unused attribute is really supported.
+   In any case it it only needed for gcc versions which print a
+   warning.  Thus let us require gcc >= 3.5.  */
+#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 5 )
+#define GCC_ATTR_UNUSED  __attribute__ ((unused))
+#else
+#define GCC_ATTR_UNUSED
+#endif
+
+#if __GNUC__ > 3
+#define NOINLINE_FUNC     __attribute__((noinline))
+#else
+#define NOINLINE_FUNC
+#endif
+
+#if __GNUC__ >= 3
+#define LIKELY(expr)      __builtin_expect( !!(expr), 1 )
+#define UNLIKELY(expr)    __builtin_expect( !!(expr), 0 )
+#define CONSTANT_P(expr)  __builtin_constant_p( expr )
+#else
+#define LIKELY(expr)      (!!(expr))
+#define UNLIKELY(expr)    (!!(expr))
+#define CONSTANT_P(expr)  (0)
+#endif
 
 /* Gettext macros.  */
 
+#define _(a)  _gcry_gettext(a)
+#define N_(a) (a)
+
 /* Some handy macros */
 #ifndef STR
 #define STR(v) #v
@@ -77,14 +104,52 @@
 #define DIM(v) (sizeof(v)/sizeof((v)[0]))
 #define DIMof(type,member)   DIM(((type *)0)->member)
 
+#define my_isascii(c) (!((c) & 0x80))
+
+
 
 
 /*-- src/global.c -*/
+extern int _gcry_global_any_init_done;
 int _gcry_global_is_operational (void);
-gcry_error_t _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr);
-void  _gcry_check_heap (const void *a);
+gcry_err_code_t _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr);
+void _gcry_check_heap (const void *a);
+void _gcry_pre_syscall (void);
+void _gcry_post_syscall (void);
 int _gcry_get_debug_flag (unsigned int mask);
 
+char *_gcry_get_config (int mode, const char *what);
+
+/* Malloc functions and common wrapper macros.  */
+void *_gcry_malloc (size_t n) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_calloc (size_t n, size_t m) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_malloc_secure (size_t n) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_calloc_secure (size_t n, size_t m) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_realloc (void *a, size_t n);
+char *_gcry_strdup (const char *string) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_xmalloc (size_t n) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_xcalloc (size_t n, size_t m) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_xmalloc_secure (size_t n) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_xcalloc_secure (size_t n, size_t m) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_xrealloc (void *a, size_t n);
+char *_gcry_xstrdup (const char * a) _GCRY_GCC_ATTR_MALLOC;
+void  _gcry_free (void *a);
+int   _gcry_is_secure (const void *a) _GCRY_GCC_ATTR_PURE;
+
+#define xtrymalloc(a)    _gcry_malloc ((a))
+#define xtrycalloc(a,b)  _gcry_calloc ((a),(b))
+#define xtrymalloc_secure(a)   _gcry_malloc_secure ((a))
+#define xtrycalloc_secure(a,b) _gcry_calloc_secure ((a),(b))
+#define xtryrealloc(a,b) _gcry_realloc ((a),(b))
+#define xtrystrdup(a)    _gcry_strdup ((a))
+#define xmalloc(a)       _gcry_xmalloc ((a))
+#define xcalloc(a,b)     _gcry_xcalloc ((a),(b))
+#define xmalloc_secure(a)   _gcry_xmalloc_secure ((a))
+#define xcalloc_secure(a,b) _gcry_xcalloc_secure ((a),(b))
+#define xrealloc(a,b)    _gcry_xrealloc ((a),(b))
+#define xstrdup(a)       _gcry_xstrdup ((a))
+#define xfree(a)         _gcry_free ((a))
+
 
 /*-- src/misc.c --*/
 
@@ -98,33 +163,38 @@ void _gcry_bug (const char *file, int line);
 void _gcry_assert_failed (const char *expr, const char *file, int line);
 #endif
 
+void _gcry_divide_by_zero (void) JNLIB_GCC_A_NR;
+
 const char *_gcry_gettext (const char *key) GCC_ATTR_FORMAT_ARG(1);
 void _gcry_fatal_error(int rc, const char *text ) JNLIB_GCC_A_NR;
+void _gcry_logv (int level,
+                 const char *fmt, va_list arg_ptr) JNLIB_GCC_A_PRINTF(2,0);
 void _gcry_log( int level, const char *fmt, ... ) JNLIB_GCC_A_PRINTF(2,3);
 void _gcry_log_bug( const char *fmt, ... )   JNLIB_GCC_A_NR_PRINTF(1,2);
 void _gcry_log_fatal( const char *fmt, ... ) JNLIB_GCC_A_NR_PRINTF(1,2);
 void _gcry_log_error( const char *fmt, ... ) JNLIB_GCC_A_PRINTF(1,2);
 void _gcry_log_info( const char *fmt, ... )  JNLIB_GCC_A_PRINTF(1,2);
-int  _gcry_log_info_with_dummy_fp (FILE *fp, const char *fmt, ... )
-                                             JNLIB_GCC_A_PRINTF(2,3);
 void _gcry_log_debug( const char *fmt, ... ) JNLIB_GCC_A_PRINTF(1,2);
 void _gcry_log_printf ( const char *fmt, ... ) JNLIB_GCC_A_PRINTF(1,2);
 void _gcry_log_printhex (const char *text, const void *buffer, size_t length);
+void _gcry_log_printmpi (const char *text, gcry_mpi_t mpi);
+void _gcry_log_printsxp (const char *text, gcry_sexp_t sexp);
 
 void _gcry_set_log_verbosity( int level );
 int _gcry_log_verbosity( int level );
 
+
 #ifdef JNLIB_GCC_M_FUNCTION
 #define BUG() _gcry_bug( __FILE__ , __LINE__, __FUNCTION__ )
-#define gcry_assert(expr) ((expr)? (void)0 \
+#define gcry_assert(expr) (LIKELY(expr)? (void)0 \
          : _gcry_assert_failed (STR(expr), __FILE__, __LINE__, __FUNCTION__))
 #elif __STDC_VERSION__ >= 199901L
 #define BUG() _gcry_bug( __FILE__ , __LINE__, __func__ )
-#define gcry_assert(expr) ((expr)? (void)0 \
+#define gcry_assert(expr) (LIKELY(expr)? (void)0 \
          : _gcry_assert_failed (STR(expr), __FILE__, __LINE__, __func__))
 #else
 #define BUG() _gcry_bug( __FILE__ , __LINE__ )
-#define gcry_assert(expr) ((expr)? (void)0 \
+#define gcry_assert(expr) (LIKELY(expr)? (void)0 \
          : _gcry_assert_failed (STR(expr), __FILE__, __LINE__))
 #endif
 
@@ -136,20 +206,68 @@ int _gcry_log_verbosity( int level );
 #define log_debug   _gcry_log_debug
 #define log_printf  _gcry_log_printf
 #define log_printhex _gcry_log_printhex
+#define log_printmpi _gcry_log_printmpi
+#define log_printsxp _gcry_log_printsxp
 
+/* Compatibility macro.  */
+#define log_mpidump _gcry_log_printmpi
+
+/* Tokeninze STRING and return a malloced array.  */
+char **_gcry_strtokenize (const char *string, const char *delim);
 
-/*-- src/hwfeatures.c --*/
-/* (Do not change these values unless synced with the asm code.)  */
-#define HWF_PADLOCK_RNG  1
-#define HWF_PADLOCK_AES  2
-#define HWF_PADLOCK_SHA  4
-#define HWF_PADLOCK_MMUL 8
 
-#define HWF_INTEL_AESNI  256
+/*-- src/hwfeatures.c --*/
+#if defined(HAVE_CPU_ARCH_X86)
+
+#define HWF_PADLOCK_RNG         (1 << 0)
+#define HWF_PADLOCK_AES         (1 << 1)
+#define HWF_PADLOCK_SHA         (1 << 2)
+#define HWF_PADLOCK_MMUL        (1 << 3)
+
+#define HWF_INTEL_CPU           (1 << 4)
+#define HWF_INTEL_FAST_SHLD     (1 << 5)
+#define HWF_INTEL_BMI2          (1 << 6)
+#define HWF_INTEL_SSSE3         (1 << 7)
+#define HWF_INTEL_SSE4_1        (1 << 8)
+#define HWF_INTEL_PCLMUL        (1 << 9)
+#define HWF_INTEL_AESNI         (1 << 10)
+#define HWF_INTEL_RDRAND        (1 << 11)
+#define HWF_INTEL_AVX           (1 << 12)
+#define HWF_INTEL_AVX2          (1 << 13)
+#define HWF_INTEL_FAST_VPGATHER (1 << 14)
+#define HWF_INTEL_RDTSC         (1 << 15)
+#define HWF_INTEL_SHAEXT        (1 << 16)
+#define HWF_INTEL_VAES_VPCLMUL  (1 << 17)
+
+#elif defined(HAVE_CPU_ARCH_ARM)
+
+#define HWF_ARM_NEON            (1 << 0)
+#define HWF_ARM_AES             (1 << 1)
+#define HWF_ARM_SHA1            (1 << 2)
+#define HWF_ARM_SHA2            (1 << 3)
+#define HWF_ARM_PMULL           (1 << 4)
+
+#elif defined(HAVE_CPU_ARCH_PPC)
+
+#define HWF_PPC_VCRYPTO         (1 << 0)
+#define HWF_PPC_ARCH_3_00       (1 << 1)
+#define HWF_PPC_ARCH_2_07       (1 << 2)
+#define HWF_PPC_ARCH_3_10       (1 << 3)
+
+#elif defined(HAVE_CPU_ARCH_S390X)
+
+#define HWF_S390X_MSA           (1 << 0)
+#define HWF_S390X_MSA_4         (1 << 1)
+#define HWF_S390X_MSA_8         (1 << 2)
+#define HWF_S390X_MSA_9         (1 << 3)
+#define HWF_S390X_VX            (1 << 4)
 
+#endif
 
+gpg_err_code_t _gcry_disable_hw_feature (const char *name);
+void _gcry_detect_hw_features (void);
 unsigned int _gcry_get_hw_features (void);
-void _gcry_detect_hw_features (unsigned int);
+const char *_gcry_enum_hw_features (int idx, unsigned int *r_feature);
 
 
 /*-- mpi/mpiutil.c --*/
@@ -164,6 +282,7 @@ const char *_gcry_mpi_get_hw_config (void);
 #endif
 
 /*-- primegen.c --*/
+gcry_err_code_t _gcry_primegen_init (void);
 gcry_mpi_t _gcry_generate_secret_prime (unsigned int nbits,
                                  gcry_random_level_t random_level,
                                  int (*extra_check)(void*, gcry_mpi_t),
@@ -172,9 +291,12 @@ gcry_mpi_t _gcry_generate_public_prime (unsigned int nbits,
                                  gcry_random_level_t random_level,
                                  int (*extra_check)(void*, gcry_mpi_t),
                                  void *extra_check_arg);
-gcry_mpi_t _gcry_generate_elg_prime (int mode,
-                                     unsigned int pbits, unsigned int qbits,
-                                     gcry_mpi_t g, gcry_mpi_t **factors);
+gcry_err_code_t _gcry_generate_elg_prime (int mode,
+                                          unsigned int pbits,
+                                          unsigned int qbits,
+                                          gcry_mpi_t g,
+                                          gcry_mpi_t *r_prime,
+                                          gcry_mpi_t **factors);
 gcry_mpi_t _gcry_derive_x931_prime (const gcry_mpi_t xp,
                                     const gcry_mpi_t xp1, const gcry_mpi_t xp2,
                                     const gcry_mpi_t e,
@@ -192,6 +314,9 @@ gpg_err_code_t _gcry_generate_fips186_3_prime
                   int *r_counter,
                   void **r_seed, size_t *r_seedlen, int *r_hashalgo);
 
+gpg_err_code_t _gcry_fips186_4_prime_check (const gcry_mpi_t x,
+                                            unsigned int bits);
+
 
 /* Replacements of missing functions (missing-string.c).  */
 #ifndef HAVE_STPCPY
@@ -201,6 +326,8 @@ char *stpcpy (char *a, const char *b);
 int strcasecmp (const char *a, const char *b) _GCRY_GCC_ATTR_PURE;
 #endif
 
+#include "../compat/libcompat.h"
+
 
 /* Macros used to rename missing functions.  */
 #ifndef HAVE_STRTOUL
@@ -222,19 +349,45 @@ int strcasecmp (const char *a, const char *b) 
_GCRY_GCC_ATTR_PURE;
 
 /* Stack burning.  */
 
-void _gcry_burn_stack (int bytes);
-
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+#define  __gcry_burn_stack_dummy() asm volatile ("":::"memory")
+#else
+void __gcry_burn_stack_dummy (void);
+#endif
 
-/* To avoid that a compiler optimizes certain memset calls away, these
-   macros may be used instead. */
+void __gcry_burn_stack (unsigned int bytes);
+#define _gcry_burn_stack(bytes) \
+       do { __gcry_burn_stack (bytes); \
+            __gcry_burn_stack_dummy (); } while(0)
+
+/* To avoid that a compiler optimizes certain memset calls away, this
+   macro may be used instead.  For constant length buffers, memory
+   wiping is inlined.  Dead store elimination of inlined memset is
+   avoided here by using assembly block after memset.  For non-constant
+   length buffers, memory is wiped through _gcry_fast_wipememory.  */
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+#define fast_wipememory2_inline(_ptr,_set,_len) do { \
+             memset((_ptr), (_set), (_len)); \
+             asm volatile ("\n" :: "r" (_ptr) : "memory"); \
+           } while(0)
+#else
+#define fast_wipememory2_inline(_ptr,_set,_len) \
+           _gcry_fast_wipememory2((void *)_ptr, _set, _len)
+#endif
 #define wipememory2(_ptr,_set,_len) do { \
-              volatile char *_vptr=(volatile char *)(_ptr); \
-              size_t _vlen=(_len); \
-              while(_vlen) { *_vptr=(_set); _vptr++; _vlen--; } \
-                  } while(0)
-#define wipememory(_ptr,_len) wipememory2(_ptr,0,_len)
-
-
+             if (!CONSTANT_P(_len) || !CONSTANT_P(_set)) { \
+               if (CONSTANT_P(_set) && (_set) == 0) \
+                 _gcry_fast_wipememory((void *)(_ptr), (_len)); \
+               else \
+                 _gcry_fast_wipememory2((void *)(_ptr), (_set), (_len)); \
+             } else { \
+               fast_wipememory2_inline((void *)(_ptr), (_set), (_len)); \
+             } \
+           } while(0)
+#define wipememory(_ptr,_len) wipememory2((_ptr),0,(_len))
+
+void _gcry_fast_wipememory(void *ptr, size_t len);
+void _gcry_fast_wipememory2(void *ptr, int set, size_t len);
 
 /* Digit predicates.  */
 
@@ -246,26 +399,43 @@ void _gcry_burn_stack (int bytes);
                       || (*(a) >= 'A' && *(a) <= 'F')  \
                       || (*(a) >= 'a' && *(a) <= 'f'))
 
+/* Init functions.  */
+
+gcry_err_code_t _gcry_cipher_init (void);
+gcry_err_code_t _gcry_md_init (void);
+gcry_err_code_t _gcry_mac_init (void);
+gcry_err_code_t _gcry_pk_init (void);
+gcry_err_code_t _gcry_secmem_module_init (void);
+gcry_err_code_t _gcry_mpi_init (void);
+
+/* Memory management.  */
+#define GCRY_ALLOC_FLAG_SECURE (1 << 0)
+#define GCRY_ALLOC_FLAG_XHINT  (1 << 1)  /* Called from xmalloc.  */
+
 
 /*-- sexp.c --*/
-gcry_error_t _gcry_sexp_vbuild (gcry_sexp_t *retsexp, size_t *erroff,
-                                const char *format, va_list arg_ptr);
+gcry_err_code_t _gcry_sexp_vbuild (gcry_sexp_t *retsexp, size_t *erroff,
+                                   const char *format, va_list arg_ptr);
 char *_gcry_sexp_nth_string (const gcry_sexp_t list, int number);
+gpg_err_code_t _gcry_sexp_vextract_param (gcry_sexp_t sexp, const char *path,
+                                          const char *list, va_list arg_ptr);
 
 
 /*-- fips.c --*/
 
-void _gcry_initialize_fips_mode (int force);
-
-int _gcry_fips_mode (void);
-#define fips_mode() _gcry_fips_mode ()
+extern int _gcry_no_fips_mode_required;
 
-int _gcry_enforced_fips_mode (void);
+void _gcry_initialize_fips_mode (int force);
+int _gcry_fips_to_activate (void);
 
-void _gcry_set_enforced_fips_mode (void);
+/* This macro returns true if fips mode is enabled.  This is
+   independent of the fips required finite state machine and only used
+   to enable fips specific code.
 
-void _gcry_inactivate_fips_mode (const char *text);
-int _gcry_is_fips_mode_inactive (void);
+   No locking is required because we have the requirement that this
+   variable is only initialized once with no other threads
+   existing.  */
+#define fips_mode() (!_gcry_no_fips_mode_required)
 
 
 void _gcry_fips_signal_error (const char *srcfile,
@@ -285,9 +455,22 @@ void _gcry_fips_signal_error (const char *srcfile,
            _gcry_fips_signal_error (__FILE__, __LINE__, NULL, 1, (a))
 #endif
 
+int _gcry_fips_indicator_cipher (va_list arg_ptr);
+int _gcry_fips_indicator_mac (va_list arg_ptr);
+int _gcry_fips_indicator_md (va_list arg_ptr);
+int _gcry_fips_indicator_kdf (va_list arg_ptr);
+int _gcry_fips_indicator_function (va_list arg_ptr);
+int _gcry_fips_indicator_pk_flags (va_list arg_ptr);
+
 int _gcry_fips_is_operational (void);
-#define fips_is_operational()   (_gcry_global_is_operational ())
-#define fips_not_operational()  (GCRY_GPG_ERR_NOT_OPERATIONAL)
+
+/* Return true if the library is in the operational state.  */
+#define fips_is_operational()   \
+        (!_gcry_global_any_init_done ? \
+                _gcry_global_is_operational() : \
+                (!fips_mode () || _gcry_global_is_operational ()))
+
+#define fips_not_operational()  (GPG_ERR_NOT_OPERATIONAL)
 
 int _gcry_fips_test_operational (void);
 int _gcry_fips_test_error_or_operational (void);
diff --git a/grub-core/lib/libgcrypt/src/gcrypt-int.h 
b/grub-core/lib/libgcrypt/src/gcrypt-int.h
new file mode 100644
index 000000000..3071b421e
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/gcrypt-int.h
@@ -0,0 +1,563 @@
+/* gcrypt-int.h - Internal version of gcrypt.h
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef GCRY_GCRYPT_INT_H
+#define GCRY_GCRYPT_INT_H
+
+#ifdef _GCRYPT_H
+#error  gcrypt.h already included
+#endif
+
+#include "gcrypt.h"
+#include "types.h"
+
+/* These error codes are used but not defined in the required
+ * libgpg-error N.MM.  Define them here.  [None right now.] */
+
+
+
+/* Context used with elliptic curve functions.  */
+struct mpi_ec_ctx_s;
+typedef struct mpi_ec_ctx_s *mpi_ec_t;
+
+
+
+/* Underscore prefixed internal versions of the public functions.
+   They return gpg_err_code_t and not gpg_error_t.  Some macros also
+   need an underscore prefixed internal version.
+
+   Note that the memory allocation functions and macros (xmalloc etc.)
+   are not defined here but in g10lib.h because this file here is
+   included by some test programs which define theie own xmalloc
+   macros.  */
+
+gpg_err_code_t _gcry_cipher_open (gcry_cipher_hd_t *handle,
+                                  int algo, int mode, unsigned int flags);
+void _gcry_cipher_close (gcry_cipher_hd_t h);
+gpg_err_code_t _gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void *buffer,
+                             size_t buflen);
+gpg_err_code_t _gcry_cipher_info (gcry_cipher_hd_t h, int what, void *buffer,
+                                  size_t *nbytes);
+gpg_err_code_t _gcry_cipher_algo_info (int algo, int what, void *buffer,
+                                       size_t *nbytes);
+const char *_gcry_cipher_algo_name (int algorithm) _GCRY_GCC_ATTR_PURE;
+int _gcry_cipher_map_name (const char *name) _GCRY_GCC_ATTR_PURE;
+int _gcry_cipher_mode_from_oid (const char *string) _GCRY_GCC_ATTR_PURE;
+gpg_err_code_t _gcry_cipher_encrypt (gcry_cipher_hd_t h,
+                                     void *out, size_t outsize,
+                                     const void *in, size_t inlen);
+gpg_err_code_t _gcry_cipher_decrypt (gcry_cipher_hd_t h,
+                                     void *out, size_t outsize,
+                                     const void *in, size_t inlen);
+gcry_err_code_t _gcry_cipher_setkey (gcry_cipher_hd_t hd,
+                                     const void *key, size_t keylen);
+gcry_err_code_t _gcry_cipher_setiv (gcry_cipher_hd_t hd,
+                                    const void *iv, size_t ivlen);
+gpg_err_code_t _gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void 
*abuf,
+                                          size_t abuflen);
+gpg_err_code_t _gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag,
+                                    size_t taglen);
+gpg_err_code_t _gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag,
+                                      size_t taglen);
+gpg_err_code_t _gcry_cipher_setctr (gcry_cipher_hd_t hd,
+                                    const void *ctr, size_t ctrlen);
+gpg_err_code_t _gcry_cipher_getctr (gcry_cipher_hd_t hd,
+                                    void *ctr, size_t ctrlen);
+size_t _gcry_cipher_get_algo_keylen (int algo);
+size_t _gcry_cipher_get_algo_blklen (int algo);
+
+#define _gcry_cipher_reset(h)  _gcry_cipher_ctl ((h), GCRYCTL_RESET, NULL, 0)
+
+
+
+
+gpg_err_code_t _gcry_pk_encrypt (gcry_sexp_t *result,
+                                 gcry_sexp_t data, gcry_sexp_t pkey);
+gpg_err_code_t _gcry_pk_decrypt (gcry_sexp_t *result,
+                                 gcry_sexp_t data, gcry_sexp_t skey);
+gpg_err_code_t _gcry_pk_sign (gcry_sexp_t *result,
+                              gcry_sexp_t data, gcry_sexp_t skey);
+gpg_err_code_t _gcry_pk_verify (gcry_sexp_t sigval,
+                                gcry_sexp_t data, gcry_sexp_t pkey);
+gpg_err_code_t _gcry_pk_testkey (gcry_sexp_t key);
+gpg_err_code_t _gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms);
+gpg_err_code_t _gcry_pk_ctl (int cmd, void *buffer, size_t buflen);
+gpg_err_code_t _gcry_pk_algo_info (int algo, int what,
+                                   void *buffer, size_t *nbytes);
+const char *_gcry_pk_algo_name (int algorithm) _GCRY_GCC_ATTR_PURE;
+int _gcry_pk_map_name (const char* name) _GCRY_GCC_ATTR_PURE;
+unsigned int _gcry_pk_get_nbits (gcry_sexp_t key) _GCRY_GCC_ATTR_PURE;
+unsigned char *_gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char *array);
+const char *_gcry_pk_get_curve (gcry_sexp_t key, int iterator,
+                                unsigned int *r_nbits);
+gcry_sexp_t _gcry_pk_get_param (int algo, const char *name);
+gpg_err_code_t _gcry_pubkey_get_sexp (gcry_sexp_t *r_sexp,
+                                      int mode, gcry_ctx_t ctx);
+unsigned int _gcry_ecc_get_algo_keylen (int algo);
+gpg_error_t _gcry_ecc_mul_point (int algo, unsigned char *result,
+                                 const unsigned char *scalar,
+                                 const unsigned char *point);
+gcry_err_code_t _gcry_pk_sign_md (gcry_sexp_t *r_sig, const char *tmpl,
+                                  gcry_md_hd_t hd, gcry_sexp_t s_skey,
+                                  gcry_ctx_t ctx);
+gcry_err_code_t _gcry_pk_verify_md (gcry_sexp_t s_sig, const char *tmpl,
+                                    gcry_md_hd_t hd, gcry_sexp_t s_pkey,
+                                    gcry_ctx_t ctx);
+gpg_err_code_t _gcry_pk_random_override_new (gcry_ctx_t *r_ctx,
+                                             const unsigned char *p,
+                                             size_t len);
+gpg_err_code_t _gcry_pk_get_random_override (gcry_ctx_t ctx,
+                                             const unsigned char **r_p,
+                                             size_t *r_len);
+
+gpg_err_code_t _gcry_md_open (gcry_md_hd_t *h, int algo, unsigned int flags);
+void _gcry_md_close (gcry_md_hd_t hd);
+gpg_err_code_t _gcry_md_enable (gcry_md_hd_t hd, int algo);
+gpg_err_code_t _gcry_md_copy (gcry_md_hd_t *bhd, gcry_md_hd_t ahd);
+void _gcry_md_reset (gcry_md_hd_t hd);
+gpg_err_code_t _gcry_md_ctl (gcry_md_hd_t hd, int cmd,
+                          void *buffer, size_t buflen);
+void _gcry_md_write (gcry_md_hd_t hd, const void *buffer, size_t length);
+unsigned char *_gcry_md_read (gcry_md_hd_t hd, int algo);
+gpg_err_code_t _gcry_md_extract (gcry_md_hd_t hd, int algo, void *buffer,
+                                 size_t length);
+void _gcry_md_hash_buffer (int algo, void *digest,
+                           const void *buffer, size_t length);
+gpg_err_code_t _gcry_md_hash_buffers_extract (int algo, unsigned int flags,
+                                              void *digest, int digestlen,
+                                              const gcry_buffer_t *iov,
+                                              int iovcnt);
+gpg_err_code_t _gcry_md_hash_buffers (int algo, unsigned int flags,
+                                      void *digest,
+                                      const gcry_buffer_t *iov, int iovcnt);
+int _gcry_md_get_algo (gcry_md_hd_t hd);
+unsigned int _gcry_md_get_algo_dlen (int algo);
+int _gcry_md_is_enabled (gcry_md_hd_t a, int algo);
+int _gcry_md_is_secure (gcry_md_hd_t a);
+gpg_err_code_t _gcry_md_info (gcry_md_hd_t h, int what, void *buffer,
+                          size_t *nbytes);
+gpg_err_code_t _gcry_md_algo_info (int algo, int what, void *buffer,
+                                size_t *nbytes);
+const char *_gcry_md_algo_name (int algo) _GCRY_GCC_ATTR_PURE;
+int _gcry_md_map_name (const char* name) _GCRY_GCC_ATTR_PURE;
+gpg_err_code_t _gcry_md_setkey (gcry_md_hd_t hd,
+                                const void *key, size_t keylen);
+void _gcry_md_debug (gcry_md_hd_t hd, const char *suffix);
+
+#define _gcry_md_test_algo(a) \
+            _gcry_md_algo_info ((a), GCRYCTL_TEST_ALGO, NULL, NULL)
+
+#define _gcry_md_final(a) \
+            _gcry_md_ctl ((a), GCRYCTL_FINALIZE, NULL, 0)
+
+#define _gcry_md_putc(h,c)  \
+            do {                                          \
+                gcry_md_hd_t h__ = (h);                   \
+                if( (h__)->bufpos == (h__)->bufsize )     \
+                    _gcry_md_write( (h__), NULL, 0 );     \
+                (h__)->buf[(h__)->bufpos++] = (c) & 0xff; \
+            } while(0)
+
+
+
+gpg_err_code_t _gcry_mac_open (gcry_mac_hd_t *handle, int algo,
+                            unsigned int flags, gcry_ctx_t ctx);
+void _gcry_mac_close (gcry_mac_hd_t h);
+gpg_err_code_t _gcry_mac_ctl (gcry_mac_hd_t h, int cmd, void *buffer,
+                           size_t buflen);
+gpg_err_code_t _gcry_mac_algo_info (int algo, int what, void *buffer,
+                                 size_t *nbytes);
+gpg_err_code_t _gcry_mac_setkey (gcry_mac_hd_t hd, const void *key,
+                              size_t keylen);
+gpg_err_code_t _gcry_mac_setiv (gcry_mac_hd_t hd, const void *iv,
+                             size_t ivlen);
+gpg_err_code_t _gcry_mac_write (gcry_mac_hd_t hd, const void *buffer,
+                             size_t length);
+gpg_err_code_t _gcry_mac_read (gcry_mac_hd_t hd, void *buffer, size_t *buflen);
+gpg_err_code_t _gcry_mac_verify (gcry_mac_hd_t hd, const void *buffer,
+                                 size_t buflen);
+int _gcry_mac_get_algo (gcry_mac_hd_t hd);
+unsigned int _gcry_mac_get_algo_maclen (int algo);
+unsigned int _gcry_mac_get_algo_keylen (int algo);
+const char *_gcry_mac_algo_name (int algorithm) _GCRY_GCC_ATTR_PURE;
+int _gcry_mac_map_name (const char *name) _GCRY_GCC_ATTR_PURE;
+
+#define _gcry_mac_reset(h)  _gcry_mac_ctl ((h), GCRYCTL_RESET, NULL, 0)
+
+
+gpg_err_code_t _gcry_kdf_derive (const void *passphrase, size_t passphraselen,
+                                 int algo, int subalgo,
+                                 const void *salt, size_t saltlen,
+                                 unsigned long iterations,
+                                 size_t keysize, void *keybuffer);
+
+gpg_err_code_t _gcry_kdf_open (gcry_kdf_hd_t *hd, int algo, int subalgo,
+                               const unsigned long *param,
+                               unsigned int paramlen,
+                               const void *passphrase, size_t passphraselen,
+                               const void *salt, size_t saltlen,
+                               const void *key, size_t keylen,
+                               const void *ad, size_t adlen);
+gcry_err_code_t _gcry_kdf_compute (gcry_kdf_hd_t h,
+                                   const struct gcry_kdf_thread_ops *ops);
+gpg_err_code_t _gcry_kdf_final (gcry_kdf_hd_t h, size_t resultlen, void 
*result);
+void _gcry_kdf_close (gcry_kdf_hd_t h);
+
+
+gpg_err_code_t _gcry_prime_generate (gcry_mpi_t *prime,
+                                     unsigned int prime_bits,
+                                     unsigned int factor_bits,
+                                     gcry_mpi_t **factors,
+                                     gcry_prime_check_func_t cb_func,
+                                     void *cb_arg,
+                                     gcry_random_level_t random_level,
+                                     unsigned int flags);
+gpg_err_code_t _gcry_prime_group_generator (gcry_mpi_t *r_g,
+                                            gcry_mpi_t prime,
+                                            gcry_mpi_t *factors,
+                                            gcry_mpi_t start_g);
+void _gcry_prime_release_factors (gcry_mpi_t *factors);
+gpg_err_code_t _gcry_prime_check (gcry_mpi_t x, unsigned int flags);
+
+
+void _gcry_randomize (void *buffer, size_t length,
+                      enum gcry_random_level level);
+gpg_err_code_t _gcry_random_add_bytes (const void *buffer, size_t length,
+                                    int quality);
+void *_gcry_random_bytes (size_t nbytes, enum gcry_random_level level)
+                         _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_random_bytes_secure (size_t nbytes, enum gcry_random_level level)
+                                _GCRY_GCC_ATTR_MALLOC;
+void _gcry_mpi_randomize (gcry_mpi_t w,
+                         unsigned int nbits, enum gcry_random_level level);
+void _gcry_create_nonce (void *buffer, size_t length);
+
+
+void _gcry_ctx_release (gcry_ctx_t ctx);
+
+
+const char *_gcry_check_version (const char *req_version);
+
+void _gcry_set_allocation_handler (gcry_handler_alloc_t func_alloc,
+                                  gcry_handler_alloc_t func_alloc_secure,
+                                  gcry_handler_secure_check_t 
func_secure_check,
+                                  gcry_handler_realloc_t func_realloc,
+                                  gcry_handler_free_t func_free);
+void _gcry_set_outofcore_handler (gcry_handler_no_mem_t h, void *opaque);
+void _gcry_set_fatalerror_handler (gcry_handler_error_t fnc, void *opaque);
+void _gcry_set_log_handler (gcry_handler_log_t f, void *opaque);
+void _gcry_set_gettext_handler (const char *(*f)(const char*));
+void _gcry_set_progress_handler (gcry_handler_progress_t cb, void *cb_data);
+
+
+/* Return a pointer to a string containing a description of the error
+   code in the error value ERR.  */
+static inline const char *
+_gcry_strerror (gcry_error_t err)
+{
+  return gpg_strerror (err);
+}
+
+/* Return a pointer to a string containing a description of the error
+   source in the error value ERR.  */
+static inline const char *
+_gcry_strsource (gcry_error_t err)
+{
+  return gpg_strsource (err);
+}
+
+/* Retrieve the error code for the system error ERR.  This returns
+   GPG_ERR_UNKNOWN_ERRNO if the system error is not mapped (report
+   this).  */
+static inline gcry_err_code_t
+_gcry_err_code_from_errno (int err)
+{
+  return gpg_err_code_from_errno (err);
+}
+
+/* Retrieve the system error for the error code CODE.  This returns 0
+   if CODE is not a system error code.  */
+static inline int
+_gcry_err_code_to_errno (gcry_err_code_t code)
+{
+  return gpg_err_code_to_errno (code);
+}
+
+/* Return an error value with the error source SOURCE and the system
+   error ERR.  */
+static inline gcry_error_t
+_gcry_err_make_from_errno (gpg_err_source_t source, int err)
+{
+  return gpg_err_make_from_errno (source, err);
+}
+
+
+/* Return an error value with the system error ERR.  */
+static inline gcry_error_t
+_gcry_error_from_errno (int err)
+{
+  return gpg_error (gpg_err_code_from_errno (err));
+}
+
+
+
+gpg_err_code_t _gcry_sexp_new (gcry_sexp_t *retsexp,
+                               const void *buffer, size_t length,
+                               int autodetect);
+gpg_err_code_t _gcry_sexp_create (gcry_sexp_t *retsexp,
+                                  void *buffer, size_t length,
+                                  int autodetect, void (*freefnc) (void *));
+gpg_err_code_t _gcry_sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
+                              const char *buffer, size_t length);
+gpg_err_code_t _gcry_sexp_build (gcry_sexp_t *retsexp, size_t *erroff,
+                                 const char *format, ...);
+gpg_err_code_t _gcry_sexp_build_array (gcry_sexp_t *retsexp, size_t *erroff,
+                                       const char *format, void **arg_list);
+void _gcry_sexp_release (gcry_sexp_t sexp);
+size_t _gcry_sexp_canon_len (const unsigned char *buffer, size_t length,
+                            size_t *erroff, gcry_err_code_t *errcode);
+size_t _gcry_sexp_sprint (gcry_sexp_t sexp, int mode, void *buffer,
+                          size_t maxlength);
+void _gcry_sexp_dump (const gcry_sexp_t a);
+gcry_sexp_t _gcry_sexp_cons (const gcry_sexp_t a, const gcry_sexp_t b);
+gcry_sexp_t _gcry_sexp_alist (const gcry_sexp_t *array);
+gcry_sexp_t _gcry_sexp_vlist (const gcry_sexp_t a, ...);
+gcry_sexp_t _gcry_sexp_append (const gcry_sexp_t a, const gcry_sexp_t n);
+gcry_sexp_t _gcry_sexp_prepend (const gcry_sexp_t a, const gcry_sexp_t n);
+gcry_sexp_t _gcry_sexp_find_token (gcry_sexp_t list,
+                                   const char *tok, size_t toklen);
+int _gcry_sexp_length (const gcry_sexp_t list);
+gcry_sexp_t _gcry_sexp_nth (const gcry_sexp_t list, int number);
+gcry_sexp_t _gcry_sexp_car (const gcry_sexp_t list);
+gcry_sexp_t _gcry_sexp_cdr (const gcry_sexp_t list);
+gcry_sexp_t _gcry_sexp_cadr (const gcry_sexp_t list);
+const char *_gcry_sexp_nth_data (const gcry_sexp_t list, int number,
+                                 size_t *datalen);
+void *_gcry_sexp_nth_buffer (const gcry_sexp_t list, int number,
+                             size_t *rlength);
+char *_gcry_sexp_nth_string (gcry_sexp_t list, int number);
+gcry_mpi_t _gcry_sexp_nth_mpi (gcry_sexp_t list, int number, int mpifmt);
+gpg_err_code_t _gcry_sexp_extract_param (gcry_sexp_t sexp,
+                                         const char *path,
+                                         const char *list,
+                                         ...) _GCRY_GCC_ATTR_SENTINEL(0);
+
+#define sexp_new(a, b, c, d)         _gcry_sexp_new ((a), (b), (c), (d))
+#define sexp_create(a, b, c, d, e)   _gcry_sexp_create ((a), (b), (c), (d), 
(e))
+#define sexp_sscan(a, b, c, d)       _gcry_sexp_sscan ((a), (b), (c), (d))
+#define sexp_build                   _gcry_sexp_build
+#define sexp_build_array(a, b, c, d) _gcry_sexp_build_array ((a), (b), (c), 
(d))
+#define sexp_release(a)              _gcry_sexp_release ((a))
+#define sexp_canon_len(a, b, c, d)   _gcry_sexp_canon_len ((a), (b), (c), (d))
+#define sexp_sprint(a, b, c, d)      _gcry_sexp_sprint ((a), (b), (c), (d))
+#define sexp_dump(a)                 _gcry_sexp_dump ((a))
+#define sexp_cons(a, b)              _gcry_sexp_cons ((a), (b))
+#define sexp_alist(a)                _gcry_sexp_alist ((a))
+#define sexp_vlist                   _gcry_sexp_vlist
+#define sexp_append(a, b)            _gcry_sexp_append ((a), (b))
+#define sexp_prepend(a, b)           _gcry_sexp_prepend ((a), (b))
+#define sexp_find_token(a, b, c)     _gcry_sexp_find_token ((a), (b), (c))
+#define sexp_length(a)               _gcry_sexp_length ((a))
+#define sexp_nth(a, b)               _gcry_sexp_nth ((a), (b))
+#define sexp_car(a)                  _gcry_sexp_car ((a))
+#define sexp_cdr(a)                  _gcry_sexp_cdr ((a))
+#define sexp_cadr(a)                 _gcry_sexp_cadr ((a))
+#define sexp_nth_data(a, b, c)       _gcry_sexp_nth_data ((a), (b), (c))
+#define sexp_nth_buffer(a, b, c)     _gcry_sexp_nth_buffer ((a), (b), (c))
+#define sexp_nth_string(a, b)        _gcry_sexp_nth_string ((a), (b))
+#define sexp_nth_mpi(a, b, c)        _gcry_sexp_nth_mpi ((a), (b), (c))
+#define sexp_extract_param           _gcry_sexp_extract_param
+
+
+
+gcry_mpi_t _gcry_mpi_new (unsigned int nbits);
+gcry_mpi_t _gcry_mpi_snew (unsigned int nbits);
+void _gcry_mpi_release (gcry_mpi_t a);
+gcry_mpi_t _gcry_mpi_copy (const gcry_mpi_t a);
+void _gcry_mpi_snatch (gcry_mpi_t w, gcry_mpi_t u);
+gcry_mpi_t _gcry_mpi_set (gcry_mpi_t w, const gcry_mpi_t u);
+gcry_mpi_t _gcry_mpi_set_ui (gcry_mpi_t w, unsigned long u);
+gcry_err_code_t _gcry_mpi_get_ui (unsigned int *w, gcry_mpi_t u);
+void _gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b);
+int _gcry_mpi_is_neg (gcry_mpi_t a);
+void _gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u);
+void _gcry_mpi_abs (gcry_mpi_t w);
+int _gcry_mpi_cmp (const gcry_mpi_t u, const gcry_mpi_t v);
+int _gcry_mpi_cmpabs (const gcry_mpi_t u, const gcry_mpi_t v);
+int _gcry_mpi_cmp_ui (const gcry_mpi_t u, unsigned long v);
+gpg_err_code_t _gcry_mpi_scan (gcry_mpi_t *ret_mpi, enum gcry_mpi_format 
format,
+                              const void *buffer, size_t buflen,
+                              size_t *nscanned);
+gpg_err_code_t _gcry_mpi_print (enum gcry_mpi_format format,
+                               unsigned char *buffer, size_t buflen,
+                               size_t *nwritten,
+                               const gcry_mpi_t a);
+gpg_err_code_t _gcry_mpi_aprint (enum gcry_mpi_format format,
+                                unsigned char **buffer, size_t *nwritten,
+                                const gcry_mpi_t a);
+void _gcry_mpi_dump (const gcry_mpi_t a);
+void _gcry_mpi_add (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v);
+void _gcry_mpi_add_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v);
+void _gcry_mpi_addm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m);
+void _gcry_mpi_sub (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v);
+void _gcry_mpi_sub_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v );
+void _gcry_mpi_subm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m);
+void _gcry_mpi_mul (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v);
+void _gcry_mpi_mul_ui (gcry_mpi_t w, gcry_mpi_t u, unsigned long v );
+void _gcry_mpi_mulm (gcry_mpi_t w, gcry_mpi_t u, gcry_mpi_t v, gcry_mpi_t m);
+void _gcry_mpi_mul_2exp (gcry_mpi_t w, gcry_mpi_t u, unsigned long cnt);
+void _gcry_mpi_div (gcry_mpi_t q, gcry_mpi_t r,
+                   gcry_mpi_t dividend, gcry_mpi_t divisor, int round);
+void _gcry_mpi_mod (gcry_mpi_t r, gcry_mpi_t dividend, gcry_mpi_t divisor);
+void _gcry_mpi_powm (gcry_mpi_t w,
+                    const gcry_mpi_t b, const gcry_mpi_t e,
+                    const gcry_mpi_t m);
+int _gcry_mpi_gcd (gcry_mpi_t g, gcry_mpi_t a, gcry_mpi_t b);
+int _gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t m);
+gcry_mpi_point_t _gcry_mpi_point_new (unsigned int nbits);
+void _gcry_mpi_point_release (gcry_mpi_point_t point);
+gcry_mpi_point_t _gcry_mpi_point_copy (gcry_mpi_point_t point);
+void _gcry_mpi_point_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                         gcry_mpi_point_t point);
+void _gcry_mpi_point_snatch_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                                gcry_mpi_point_t point);
+gcry_mpi_point_t _gcry_mpi_point_set (gcry_mpi_point_t point,
+                                     gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z);
+gcry_mpi_point_t _gcry_mpi_point_snatch_set (gcry_mpi_point_t point,
+                                            gcry_mpi_t x, gcry_mpi_t y,
+                                            gcry_mpi_t z);
+
+gcry_mpi_t _gcry_mpi_ec_get_mpi (const char *name, gcry_ctx_t ctx, int copy);
+gcry_mpi_point_t _gcry_mpi_ec_get_point (const char *name,
+                                        gcry_ctx_t ctx, int copy);
+int _gcry_mpi_ec_get_affine (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_point_t 
point,
+                             mpi_ec_t ctx);
+void _gcry_mpi_ec_point_resize (gcry_mpi_point_t p, mpi_ec_t ctx);
+void _gcry_mpi_ec_dup (gcry_mpi_point_t w, gcry_mpi_point_t u, gcry_ctx_t ctx);
+void _gcry_mpi_ec_add (gcry_mpi_point_t w,
+                       gcry_mpi_point_t u, gcry_mpi_point_t v, mpi_ec_t ctx);
+void _gcry_mpi_ec_sub (gcry_mpi_point_t w,
+                       gcry_mpi_point_t u, gcry_mpi_point_t v, mpi_ec_t ctx);
+void _gcry_mpi_ec_mul (gcry_mpi_point_t w, gcry_mpi_t n, gcry_mpi_point_t u,
+                       mpi_ec_t ctx);
+int _gcry_mpi_ec_curve_point (gcry_mpi_point_t w, mpi_ec_t ctx);
+unsigned int _gcry_mpi_get_nbits (gcry_mpi_t a);
+int _gcry_mpi_test_bit (gcry_mpi_t a, unsigned int n);
+void _gcry_mpi_set_bit (gcry_mpi_t a, unsigned int n);
+void _gcry_mpi_clear_bit (gcry_mpi_t a, unsigned int n);
+void _gcry_mpi_set_highbit (gcry_mpi_t a, unsigned int n);
+void _gcry_mpi_clear_highbit (gcry_mpi_t a, unsigned int n);
+void _gcry_mpi_rshift (gcry_mpi_t x, gcry_mpi_t a, unsigned int n);
+void _gcry_mpi_lshift (gcry_mpi_t x, gcry_mpi_t a, unsigned int n);
+gcry_mpi_t _gcry_mpi_set_opaque (gcry_mpi_t a, void *p, unsigned int nbits);
+gcry_mpi_t _gcry_mpi_set_opaque_copy (gcry_mpi_t a,
+                                     const void *p, unsigned int nbits);
+void *_gcry_mpi_get_opaque (gcry_mpi_t a, unsigned int *nbits);
+void _gcry_mpi_set_flag (gcry_mpi_t a, enum gcry_mpi_flag flag);
+void _gcry_mpi_clear_flag (gcry_mpi_t a, enum gcry_mpi_flag flag);
+int _gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag flag);
+
+
+/* Private function - do not use.  */
+/* gcry_mpi_t _gcry_mpi_get_const (int no); */
+
+/* We need our internal versions of the macros.  */
+#ifndef GCRYPT_NO_MPI_MACROS
+# error GCRYPT_NO_MPI_MACROS is not defined
+#endif
+
+#define mpi_new(n)             _gcry_mpi_new ((n))
+#define mpi_secure_new( n )    _gcry_mpi_snew ((n))
+#define mpi_snew(n)            _gcry_mpi_snew ((n))
+
+#define mpi_release(a)        \
+  do                          \
+    {                         \
+      _gcry_mpi_release ((a));\
+      (a) = NULL;             \
+    }                         \
+  while (0)
+
+#define mpi_snatch( w, u)      _gcry_mpi_snatch( (w), (u) )
+#define mpi_set( w, u)         _gcry_mpi_set( (w), (u) )
+#define mpi_set_ui( w, u)      _gcry_mpi_set_ui( (w), (u) )
+#define mpi_get_ui(w,u)        _gcry_mpi_get_ui( (w), (u) )
+#define mpi_swap(a,b)          _gcry_mpi_swap ((a),(b))
+#define mpi_abs( w )           _gcry_mpi_abs( (w) )
+#define mpi_neg( w, u)         _gcry_mpi_neg( (w), (u) )
+#define mpi_cmp( u, v )        _gcry_mpi_cmp( (u), (v) )
+#define mpi_cmpabs( u, v )     _gcry_mpi_cmpabs( (u), (v) )
+#define mpi_cmp_ui( u, v )     _gcry_mpi_cmp_ui( (u), (v) )
+#define mpi_is_neg( a )        _gcry_mpi_is_neg ((a))
+
+#define mpi_add_ui(w,u,v)      _gcry_mpi_add_ui((w),(u),(v))
+#define mpi_add(w,u,v)         _gcry_mpi_add ((w),(u),(v))
+#define mpi_addm(w,u,v,m)      _gcry_mpi_addm ((w),(u),(v),(m))
+#define mpi_sub_ui(w,u,v)      _gcry_mpi_sub_ui ((w),(u),(v))
+#define mpi_sub(w,u,v)         _gcry_mpi_sub ((w),(u),(v))
+#define mpi_subm(w,u,v,m)      _gcry_mpi_subm ((w),(u),(v),(m))
+#define mpi_mul_ui(w,u,v)      _gcry_mpi_mul_ui ((w),(u),(v))
+#define mpi_mul_2exp(w,u,v)    _gcry_mpi_mul_2exp ((w),(u),(v))
+#define mpi_mul(w,u,v)         _gcry_mpi_mul ((w),(u),(v))
+#define mpi_mulm(w,u,v,m)      _gcry_mpi_mulm ((w),(u),(v),(m))
+#define mpi_powm(w,b,e,m)      _gcry_mpi_powm ( (w), (b), (e), (m) )
+#define mpi_tdiv(q,r,a,m)      _gcry_mpi_div ( (q), (r), (a), (m), 0)
+#define mpi_fdiv(q,r,a,m)      _gcry_mpi_div ( (q), (r), (a), (m), -1)
+#define mpi_mod(r,a,m)         _gcry_mpi_mod ((r), (a), (m))
+#define mpi_gcd(g,a,b)         _gcry_mpi_gcd ( (g), (a), (b) )
+#define mpi_invm(g,a,b)        _gcry_mpi_invm ( (g), (a), (b) )
+
+#define mpi_point_new(n)       _gcry_mpi_point_new((n))
+
+#define mpi_point_release(p)                     \
+  do                                             \
+    {                                            \
+      _gcry_mpi_point_release ((p));             \
+      (p) = NULL;                                \
+    }                                            \
+  while (0)
+
+#define mpi_point_copy(p)      _gcry_mpi_point_copy((p))
+
+#define mpi_point_get(x,y,z,p)        _gcry_mpi_point_get((x),(y),(z),(p))
+#define mpi_point_snatch_get(x,y,z,p) _gcry_mpi_point_snatch_get((x),(y), \
+                                                                 (z),(p))
+#define mpi_point_set(p,x,y,z)        _gcry_mpi_point_set((p),(x),(y),(z))
+#define mpi_point_snatch_set(p,x,y,z) _gcry_mpi_point_snatch_set((p),(x), \
+                                                                 (y),(z))
+#define mpi_point_resize(p,ctx) _gcry_mpi_ec_point_resize (p, ctx)
+
+#define mpi_get_nbits(a)       _gcry_mpi_get_nbits ((a))
+#define mpi_test_bit(a,b)      _gcry_mpi_test_bit ((a),(b))
+#define mpi_set_bit(a,b)       _gcry_mpi_set_bit ((a),(b))
+#define mpi_set_highbit(a,b)   _gcry_mpi_set_highbit ((a),(b))
+#define mpi_clear_bit(a,b)     _gcry_mpi_clear_bit ((a),(b))
+#define mpi_clear_highbit(a,b) _gcry_mpi_clear_highbit ((a),(b))
+#define mpi_rshift(a,b,c)      _gcry_mpi_rshift ((a),(b),(c))
+#define mpi_lshift(a,b,c)      _gcry_mpi_lshift ((a),(b),(c))
+
+#define mpi_set_opaque(a,b,c)  _gcry_mpi_set_opaque ((a), (b), (c))
+#define mpi_get_opaque(a,b)    _gcry_mpi_get_opaque ((a), (b))
+#define mpi_set_flag(a,f)      _gcry_mpi_set_flag ((a), (f))
+#define mpi_set_flag(a,f)      _gcry_mpi_set_flag ((a), (f))
+#define mpi_clear_flag(a,f)    _gcry_mpi_clear_flag ((a), (f))
+#define mpi_get_flag(a,f)      _gcry_mpi_get_flag ((a), (f))
+
+
+#endif /*GCRY_GCRYPT_INT_H*/
diff --git a/grub-core/lib/libgcrypt/src/gcrypt-module.h 
b/grub-core/lib/libgcrypt/src/gcrypt-module.h
deleted file mode 100644
index f39e2b5c7..000000000
--- a/grub-core/lib/libgcrypt/src/gcrypt-module.h
+++ /dev/null
@@ -1,240 +0,0 @@
-/* gcrypt-module.h - GNU Cryptographic Library Interface
-   Copyright (C) 2003, 2007 Free Software Foundation, Inc.
-
-   This file is part of Libgcrypt.
-
-   Libgcrypt is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of
-   the License, or (at your option) any later version.
-
-   Libgcrypt is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-/*
-   This file contains the necessary declarations/definitions for
-   working with Libgcrypt modules.
- */
-
-#ifndef _GCRYPT_MODULE_H
-#define _GCRYPT_MODULE_H
-
-#ifdef __cplusplus
-extern "C" {
-#if 0 /* keep Emacsens's auto-indent happy */
-}
-#endif
-#endif
-
-/* The interfaces using the module system reserve a certain range of
-   IDs for application use.  These IDs are not valid within Libgcrypt
-   but Libgcrypt makes sure never to allocate such a module ID.  */
-#define GCRY_MODULE_ID_USER      1024
-#define GCRY_MODULE_ID_USER_LAST 4095
-
-
-/* This type represents a `module'.  */
-typedef struct gcry_module *gcry_module_t;
-
-/* Check that the library fulfills the version requirement.  */
-
-/* Type for the cipher_setkey function.  */
-typedef gcry_err_code_t (*gcry_cipher_setkey_t) (void *c,
-                                                const unsigned char *key,
-                                                unsigned keylen);
-
-/* Type for the cipher_encrypt function.  */
-typedef void (*gcry_cipher_encrypt_t) (void *c,
-                                      unsigned char *outbuf,
-                                      const unsigned char *inbuf);
-
-/* Type for the cipher_decrypt function.  */
-typedef void (*gcry_cipher_decrypt_t) (void *c,
-                                      unsigned char *outbuf,
-                                      const unsigned char *inbuf);
-
-/* Type for the cipher_stencrypt function.  */
-typedef void (*gcry_cipher_stencrypt_t) (void *c,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned int n);
-
-/* Type for the cipher_stdecrypt function.  */
-typedef void (*gcry_cipher_stdecrypt_t) (void *c,
-                                        unsigned char *outbuf,
-                                        const unsigned char *inbuf,
-                                        unsigned int n);
-
-typedef struct gcry_cipher_oid_spec
-{
-  const char *oid;
-  int mode;
-} gcry_cipher_oid_spec_t;
-
-/* Module specification structure for ciphers.  */
-typedef struct gcry_cipher_spec
-{
-  const char *name;
-  const char **aliases;
-  gcry_cipher_oid_spec_t *oids;
-  size_t blocksize;
-  size_t keylen;
-  size_t contextsize;
-  gcry_cipher_setkey_t setkey;
-  gcry_cipher_encrypt_t encrypt;
-  gcry_cipher_decrypt_t decrypt;
-  gcry_cipher_stencrypt_t stencrypt;
-  gcry_cipher_stdecrypt_t stdecrypt;
-} gcry_cipher_spec_t;
-
-/* Register a new cipher module whose specification can be found in
-   CIPHER.  On success, a new algorithm ID is stored in ALGORITHM_ID
-   and a pointer representing this module is stored in MODULE.  */
-gcry_error_t gcry_cipher_register (gcry_cipher_spec_t *cipher,
-                                  int *algorithm_id,
-                                  gcry_module_t *module)
-  /* */  _GCRY_ATTR_INTERNAL;
-
-
-/* Unregister the cipher identified by MODULE, which must have been
-   registered with gcry_cipher_register.  */
-void gcry_cipher_unregister (gcry_module_t module)
-  /* */  _GCRY_ATTR_INTERNAL;
-
-/* ********************** */
-
-/* Type for the pk_generate function.  */
-typedef gcry_err_code_t (*gcry_pk_generate_t) (int algo,
-                                              unsigned int nbits,
-                                              unsigned long use_e,
-                                              gcry_mpi_t *skey,
-                                              gcry_mpi_t **retfactors);
-
-/* Type for the pk_check_secret_key function.  */
-typedef gcry_err_code_t (*gcry_pk_check_secret_key_t) (int algo,
-                                                      gcry_mpi_t *skey);
-
-/* Type for the pk_encrypt function.  */
-typedef gcry_err_code_t (*gcry_pk_encrypt_t) (int algo,
-                                             gcry_mpi_t *resarr,
-                                             gcry_mpi_t data,
-                                             gcry_mpi_t *pkey,
-                                             int flags);
-
-/* Type for the pk_decrypt function.  */
-typedef gcry_err_code_t (*gcry_pk_decrypt_t) (int algo,
-                                             gcry_mpi_t *result,
-                                             gcry_mpi_t *data,
-                                             gcry_mpi_t *skey,
-                                             int flags);
-
-/* Type for the pk_sign function.  */
-typedef gcry_err_code_t (*gcry_pk_sign_t) (int algo,
-                                          gcry_mpi_t *resarr,
-                                          gcry_mpi_t data,
-                                          gcry_mpi_t *skey);
-
-/* Type for the pk_verify function.  */
-typedef gcry_err_code_t (*gcry_pk_verify_t) (int algo,
-                                            gcry_mpi_t hash,
-                                            gcry_mpi_t *data,
-                                            gcry_mpi_t *pkey,
-                                            int (*cmp) (void *, gcry_mpi_t),
-                                            void *opaquev);
-
-/* Type for the pk_get_nbits function.  */
-typedef unsigned (*gcry_pk_get_nbits_t) (int algo, gcry_mpi_t *pkey);
-
-/* Module specification structure for message digests.  */
-typedef struct gcry_pk_spec
-{
-  const char *name;
-  const char **aliases;
-  const char *elements_pkey;
-  const char *elements_skey;
-  const char *elements_enc;
-  const char *elements_sig;
-  const char *elements_grip;
-  int use;
-  gcry_pk_generate_t generate;
-  gcry_pk_check_secret_key_t check_secret_key;
-  gcry_pk_encrypt_t encrypt;
-  gcry_pk_decrypt_t decrypt;
-  gcry_pk_sign_t sign;
-  gcry_pk_verify_t verify;
-  gcry_pk_get_nbits_t get_nbits;
-} gcry_pk_spec_t;
-
-/* Register a new pubkey module whose specification can be found in
-   PUBKEY.  On success, a new algorithm ID is stored in ALGORITHM_ID
-   and a pointer representhing this module is stored in MODULE.  */
-gcry_error_t gcry_pk_register (gcry_pk_spec_t *pubkey,
-                              unsigned int *algorithm_id,
-                              gcry_module_t *module)
-  /* */  _GCRY_ATTR_INTERNAL;
-
-/* Unregister the pubkey identified by ID, which must have been
-   registered with gcry_pk_register.  */
-void gcry_pk_unregister (gcry_module_t module)
-  /* */  _GCRY_ATTR_INTERNAL;
-
-/* ********************** */
-
-/* Type for the md_init function.  */
-typedef void (*gcry_md_init_t) (void *c);
-
-/* Type for the md_write function.  */
-typedef void (*gcry_md_write_t) (void *c, const void *buf, size_t nbytes);
-
-/* Type for the md_final function.  */
-typedef void (*gcry_md_final_t) (void *c);
-
-/* Type for the md_read function.  */
-typedef unsigned char *(*gcry_md_read_t) (void *c);
-
-typedef struct gcry_md_oid_spec
-{
-  const char *oidstring;
-} gcry_md_oid_spec_t;
-
-/* Module specification structure for message digests.  */
-typedef struct gcry_md_spec
-{
-  const char *name;
-  unsigned char *asnoid;
-  int asnlen;
-  gcry_md_oid_spec_t *oids;
-  int mdlen;
-  gcry_md_init_t init;
-  gcry_md_write_t write;
-  gcry_md_final_t final;
-  gcry_md_read_t read;
-  size_t contextsize; /* allocate this amount of context */
-} gcry_md_spec_t;
-
-/* Register a new digest module whose specification can be found in
-   DIGEST.  On success, a new algorithm ID is stored in ALGORITHM_ID
-   and a pointer representhing this module is stored in MODULE.  */
-gcry_error_t gcry_md_register (gcry_md_spec_t *digest,
-                              unsigned int *algorithm_id,
-                              gcry_module_t *module)
-  /* */  _GCRY_ATTR_INTERNAL;
-
-/* Unregister the digest identified by ID, which must have been
-   registered with gcry_digest_register.  */
-void gcry_md_unregister (gcry_module_t module)
-  /* */  _GCRY_ATTR_INTERNAL;
-
-#if 0 /* keep Emacsens's auto-indent happy */
-{
-#endif
-#ifdef __cplusplus
-}
-#endif
-#endif
diff --git a/grub-core/lib/libgcrypt/src/gcrypt-testapi.h 
b/grub-core/lib/libgcrypt/src/gcrypt-testapi.h
new file mode 100644
index 000000000..0417754f4
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/gcrypt-testapi.h
@@ -0,0 +1,69 @@
+/* gcrypt-testapi.h - Definitiona for the Regression test API
+ * Copyright (C) 2016 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * WARNING: This is a private API to be used by regression tests.  In
+ * particular this API does not constitute a well defined ABI.  The
+ * header may only be used with its matching Libgcrypt version.
+ */
+
+#ifndef GCRY_GCRYPT_TESTAPI_H
+#define GCRY_GCRYPT_TESTAPI_H
+
+/* For use with gcry_control:  */
+#define PRIV_CTL_INIT_EXTRNG_TEST   58
+#define PRIV_CTL_RUN_EXTRNG_TEST    59
+#define PRIV_CTL_DEINIT_EXTRNG_TEST 60
+#define PRIV_CTL_EXTERNAL_LOCK_TEST 61
+#define PRIV_CTL_DUMP_SECMEM_STATS  62
+
+#define EXTERNAL_LOCK_TEST_INIT       30111
+#define EXTERNAL_LOCK_TEST_LOCK       30112
+#define EXTERNAL_LOCK_TEST_UNLOCK     30113
+#define EXTERNAL_LOCK_TEST_DESTROY    30114
+
+/* For use with gcry_cipher_ctl:  */
+#define PRIV_CIPHERCTL_DISABLE_WEAK_KEY   61
+#define PRIV_CIPHERCTL_GET_INPUT_VECTOR   62
+
+
+/* Private interfaces for testing of random-drbg.c. */
+struct gcry_drbg_test_vector
+{
+  const char *flagstr;
+  unsigned char *entropy;
+  size_t entropylen;
+  unsigned char *entpra;
+  unsigned char *entprb;
+  size_t entprlen;
+  unsigned char *addtla;
+  unsigned char *addtlb;
+  size_t addtllen;
+  unsigned char *pers;
+  size_t perslen;
+  unsigned char *expected;
+  size_t expectedlen;
+  unsigned char *entropyreseed;
+  size_t entropyreseed_len;
+  unsigned char *addtl_reseed;
+  size_t addtl_reseed_len;
+};
+
+
+#endif /*GCRY_GCRYPT_TESTAPI_H*/
diff --git a/grub-core/lib/libgcrypt/src/gcrypt.h.in 
b/grub-core/lib/libgcrypt/src/gcrypt.h.in
index b65184e6b..58581daab 100644
--- a/grub-core/lib/libgcrypt/src/gcrypt.h.in
+++ b/grub-core/lib/libgcrypt/src/gcrypt.h.in
@@ -1,25 +1,25 @@
 /* gcrypt.h -  GNU Cryptographic Library Interface              -*- c -*-
-   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2006
-                 2007, 2008, 2009, 2010, 2011,
-                 2012  Free Software Foundation, Inc.
-   Copyright (C) 2012, 2013  g10 Code GmbH
-
-   This file is part of Libgcrypt.
-
-   Libgcrypt is free software; you can redistribute it and/or modify
-   it under the terms of the GNU Lesser General Public License as
-   published by the Free Software Foundation; either version 2.1 of
-   the License, or (at your option) any later version.
-
-   Libgcrypt is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with this program; if not, see <http://www.gnu.org/licenses/>.
-
-   File: @configure_input@ */
+ * Copyright (C) 2012-2023 g10 Code GmbH
+ * Copyright (C) 2013-2023 Jussi Kivilinna
+ * Copyright (C) 1998-2018 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * File: @configure_input@
+ */
 
 #ifndef _GCRYPT_H
 #define _GCRYPT_H
@@ -33,20 +33,11 @@
 #include <sys/types.h>
 
 #if defined _WIN32 || defined __WIN32__
-# include <winsock2.h>
-# include <ws2tcpip.h>
-# include <time.h>
 # ifndef __GNUC__
   typedef long ssize_t;
   typedef int  pid_t;
 # endif /*!__GNUC__*/
-#else
-# include <sys/socket.h>
-# include <sys/time.h>
-#@INSERT_SYS_SELECT_H@
-#endif /*!_WIN32*/
-
-@FALLBACK_SOCKLEN_T@
+#endif /*_WIN32*/
 
 /* This is required for error code compatibility. */
 #define _GCRY_ERR_SOURCE_DEFAULT GPG_ERR_SOURCE_GCRYPT
@@ -99,6 +90,12 @@ extern "C" {
 #define _GCRY_GCC_ATTR_MALLOC  __attribute__ ((__malloc__))
 #endif
 
+#define _GCRY_GCC_ATTR_PRINTF(f,a)  __attribute__ ((format (printf,f,a)))
+
+#if _GCRY_GCC_VERSION >= 40000
+#define _GCRY_GCC_ATTR_SENTINEL(a) __attribute__ ((sentinel(a)))
+#endif
+
 #endif /*__GNUC__*/
 
 #ifndef _GCRY_GCC_ATTR_DEPRECATED
@@ -110,13 +107,19 @@ extern "C" {
 #ifndef _GCRY_GCC_ATTR_MALLOC
 #define _GCRY_GCC_ATTR_MALLOC
 #endif
+#ifndef _GCRY_GCC_ATTR_PRINTF
+#define _GCRY_GCC_ATTR_PRINTF(f,a)
+#endif
+#ifndef _GCRY_GCC_ATTR_SENTINEL
+#define _GCRY_GCC_ATTR_SENTINEL(a)
+#endif
 
 /* Make up an attribute to mark functions and types as deprecated but
    allow internal use by Libgcrypt.  */
 #ifdef _GCRYPT_IN_LIBGCRYPT
 #define _GCRY_ATTR_INTERNAL
 #else
-#define _GCRY_ATTR_INTERNAL    _GCRY_GCC_ATTR_DEPRECATED
+#define _GCRY_ATTR_INTERNAL     _GCRY_GCC_ATTR_DEPRECATED
 #endif
 
 /* Wrappers for the libgpg-error library.  */
@@ -178,18 +181,69 @@ int gcry_err_code_to_errno (gcry_err_code_t code);
 gcry_error_t gcry_err_make_from_errno (gcry_err_source_t source, int err);
 
 /* Return an error value with the system error ERR.  */
-gcry_err_code_t gcry_error_from_errno (int err);
+gcry_error_t gcry_error_from_errno (int err);
+
+
+/* NOTE: Since Libgcrypt 1.6 the thread callbacks are not anymore
+   used.  However we keep it to allow for some source code
+   compatibility if used in the standard way.  */
+
+/* Constants defining the thread model to use.  Used with the OPTION
+   field of the struct gcry_thread_cbs.  */
+#define GCRY_THREAD_OPTION_DEFAULT  0
+#define GCRY_THREAD_OPTION_USER     1
+#define GCRY_THREAD_OPTION_PTH      2
+#define GCRY_THREAD_OPTION_PTHREAD  3
+
+/* The version number encoded in the OPTION field of the struct
+   gcry_thread_cbs.  */
+#define GCRY_THREAD_OPTION_VERSION  1
+
+/* Wrapper for struct ath_ops.  */
+struct gcry_thread_cbs
+{
+  /* The OPTION field encodes the thread model and the version number
+     of this structure.
+       Bits  7 - 0  are used for the thread model
+       Bits 15 - 8  are used for the version number.  */
+  unsigned int option;
+} _GCRY_ATTR_INTERNAL;
+
+#define GCRY_THREAD_OPTION_PTH_IMPL                                     \
+  static struct gcry_thread_cbs gcry_threads_pth = {                    \
+    (GCRY_THREAD_OPTION_PTH | (GCRY_THREAD_OPTION_VERSION << 8))}
+
+#define GCRY_THREAD_OPTION_PTHREAD_IMPL                                 \
+  static struct gcry_thread_cbs gcry_threads_pthread = {                \
+    (GCRY_THREAD_OPTION_PTHREAD | (GCRY_THREAD_OPTION_VERSION << 8))}
+
 
 
-/* The data object used to hold a multi precision integer.  */
+/* A generic context object as used by some functions.  */
+struct gcry_context;
+typedef struct gcry_context *gcry_ctx_t;
+
+/* The data objects used to hold multi precision integers.  */
 struct gcry_mpi;
 typedef struct gcry_mpi *gcry_mpi_t;
+struct gcry_mpi_point;
+typedef struct gcry_mpi_point *gcry_mpi_point_t;
 
 #ifndef GCRYPT_NO_DEPRECATED
 typedef struct gcry_mpi *GCRY_MPI _GCRY_GCC_ATTR_DEPRECATED;
 typedef struct gcry_mpi *GcryMPI _GCRY_GCC_ATTR_DEPRECATED;
 #endif
 
+/* A structure used for scatter gather hashing.  */
+typedef struct
+{
+  size_t size;  /* The allocated size of the buffer or 0.  */
+  size_t off;   /* Offset into the buffer.  */
+  size_t len;   /* The used length of the buffer.  */
+  void *data;   /* The buffer.  */
+} gcry_buffer_t;
+
+
 
 
 /* Check that the library fulfills the version requirement.  */
@@ -200,8 +254,7 @@ const char *gcry_check_version (const char *req_version);
 /* Codes used with the gcry_control function. */
 enum gcry_ctl_cmds
   {
-    GCRYCTL_SET_KEY  = 1,
-    GCRYCTL_SET_IV   = 2,
+    /* Note: 1 .. 2 are not anymore used. */
     GCRYCTL_CFB_SYNC = 3,
     GCRYCTL_RESET    = 4,   /* e.g. for MDs */
     GCRYCTL_FINALIZE = 5,
@@ -241,7 +294,7 @@ enum gcry_ctl_cmds
     GCRYCTL_ANY_INITIALIZATION_P = 40,
     GCRYCTL_SET_CBC_CTS = 41,
     GCRYCTL_SET_CBC_MAC = 42,
-    GCRYCTL_SET_CTR = 43,
+    /* Note: 43 is not anymore used. */
     GCRYCTL_ENABLE_QUICK_RANDOM = 44,
     GCRYCTL_SET_RANDOM_SEED_FILE = 45,
     GCRYCTL_UPDATE_RANDOM_SEED_FILE = 46,
@@ -258,7 +311,30 @@ enum gcry_ctl_cmds
     GCRYCTL_SELFTEST = 57,
     /* Note: 58 .. 62 are used internally.  */
     GCRYCTL_DISABLE_HWF = 63,
-    GCRYCTL_SET_ENFORCED_FIPS_FLAG = 64
+    GCRYCTL_SET_ENFORCED_FIPS_FLAG = 64,
+    GCRYCTL_SET_PREFERRED_RNG_TYPE = 65,
+    GCRYCTL_GET_CURRENT_RNG_TYPE = 66,
+    GCRYCTL_DISABLE_LOCKED_SECMEM = 67,
+    GCRYCTL_DISABLE_PRIV_DROP = 68,
+    GCRYCTL_SET_CCM_LENGTHS = 69,
+    GCRYCTL_CLOSE_RANDOM_DEVICE = 70,
+    GCRYCTL_INACTIVATE_FIPS_FLAG = 71,
+    GCRYCTL_REACTIVATE_FIPS_FLAG = 72,
+    GCRYCTL_SET_SBOX = 73,
+    GCRYCTL_DRBG_REINIT = 74,
+    GCRYCTL_SET_TAGLEN = 75,
+    GCRYCTL_GET_TAGLEN = 76,
+    GCRYCTL_REINIT_SYSCALL_CLAMP = 77,
+    GCRYCTL_AUTO_EXPAND_SECMEM = 78,
+    GCRYCTL_SET_ALLOW_WEAK_KEY = 79,
+    GCRYCTL_SET_DECRYPTION_TAG = 80,
+    GCRYCTL_FIPS_SERVICE_INDICATOR_CIPHER = 81,
+    GCRYCTL_FIPS_SERVICE_INDICATOR_KDF = 82,
+    GCRYCTL_NO_FIPS_MODE = 83,
+    GCRYCTL_FIPS_SERVICE_INDICATOR_FUNCTION = 84,
+    GCRYCTL_FIPS_SERVICE_INDICATOR_MAC = 85,
+    GCRYCTL_FIPS_SERVICE_INDICATOR_MD = 86,
+    GCRYCTL_FIPS_SERVICE_INDICATOR_PK_FLAGS = 87
   };
 
 /* Perform various operations defined by CMD. */
@@ -312,12 +388,12 @@ gcry_error_t gcry_sexp_build (gcry_sexp_t *retsexp, 
size_t *erroff,
 /* Like gcry_sexp_build, but uses an array instead of variable
    function arguments.  */
 gcry_error_t gcry_sexp_build_array (gcry_sexp_t *retsexp, size_t *erroff,
-                                   const char *format, void **arg_list);
+                                    const char *format, void **arg_list);
 
 /* Release the S-expression object SEXP */
 void gcry_sexp_release (gcry_sexp_t sexp);
 
-/* Calculate the length of an canonized S-expresion in BUFFER and
+/* Calculate the length of an canonized S-expression in BUFFER and
    check for a valid encoding. */
 size_t gcry_sexp_canon_len (const unsigned char *buffer, size_t length,
                             size_t *erroff, gcry_error_t *errcode);
@@ -377,6 +453,13 @@ gcry_sexp_t gcry_sexp_cadr (const gcry_sexp_t list);
 const char *gcry_sexp_nth_data (const gcry_sexp_t list, int number,
                                 size_t *datalen);
 
+/* This function is used to get data from a LIST.  A malloced buffer to the
+   data with index NUMBER is returned and the length of this
+   data will be stored to RLENGTH.  If there is no data at the given
+   index or the index represents another list, `NULL' is returned.  */
+void *gcry_sexp_nth_buffer (const gcry_sexp_t list, int number,
+                            size_t *rlength);
+
 /* This function is used to get and convert data from a LIST.  The
    data is assumed to be a Nul terminated string.  The caller must
    release the returned value using `gcry_free'.  If there is no data
@@ -392,6 +475,55 @@ char *gcry_sexp_nth_string (gcry_sexp_t list, int number);
    value can't be converted to an MPI, `NULL' is returned.  */
 gcry_mpi_t gcry_sexp_nth_mpi (gcry_sexp_t list, int number, int mpifmt);
 
+/* Extract MPIs from an s-expression using a list of parameters.  The
+ * names of these parameters are given by the string LIST.  Some
+ * special characters may be given to control the conversion:
+ *
+ *    + :: Switch to unsigned integer format (default).
+ *    - :: Switch to standard signed format.
+ *    / :: Switch to opaque format.
+ *    & :: Switch to buffer descriptor mode - see below.
+ *    ? :: The previous parameter is optional.
+ *
+ * In general parameter names are single letters.  To use a string for
+ * a parameter name, enclose the name in single quotes.
+ *
+ * Unless in gcry_buffer_t mode for each parameter name a pointer to
+ * an MPI variable is expected that must be set to NULL prior to
+ * invoking this function, and finally a NULL is expected.  Example:
+ *
+ *   _gcry_sexp_extract_param (key, NULL, "n/x+ed",
+ *                             &mpi_n, &mpi_x, &mpi_e, NULL)
+ *
+ * This stores the parameter "N" from KEY as an unsigned MPI into
+ * MPI_N, the parameter "X" as an opaque MPI into MPI_X, and the
+ * parameter "E" again as an unsigned MPI into MPI_E.
+ *
+ * If in buffer descriptor mode a pointer to gcry_buffer_t descriptor
+ * is expected instead of a pointer to an MPI.  The caller may use two
+ * different operation modes: If the DATA field of the provided buffer
+ * descriptor is NULL, the function allocates a new buffer and stores
+ * it at DATA; the other fields are set accordingly with OFF being 0.
+ * If DATA is not NULL, the function assumes that DATA, SIZE, and OFF
+ * describe a buffer where to but the data; on return the LEN field
+ * receives the number of bytes copied to that buffer; if the buffer
+ * is too small, the function immediately returns with an error code
+ * (and LEN set to 0).
+ *
+ * PATH is an optional string used to locate a token.  The exclamation
+ * mark separated tokens are used to via gcry_sexp_find_token to find
+ * a start point inside SEXP.
+ *
+ * The function returns 0 on success.  On error an error code is
+ * returned, all passed MPIs that might have been allocated up to this
+ * point are deallocated and set to NULL, and all passed buffers are
+ * either truncated if the caller supplied the buffer, or deallocated
+ * if the function allocated the buffer.
+ */
+gpg_error_t gcry_sexp_extract_param (gcry_sexp_t sexp,
+                                     const char *path,
+                                     const char *list,
+                                     ...) _GCRY_GCC_ATTR_SENTINEL(0);
 
 
 /*******************************************
@@ -408,19 +540,33 @@ enum gcry_mpi_format
     GCRYMPI_FMT_PGP = 2,    /* As used by OpenPGP (unsigned only).  */
     GCRYMPI_FMT_SSH = 3,    /* As used by SSH (like STD but with length).  */
     GCRYMPI_FMT_HEX = 4,    /* Hex format. */
-    GCRYMPI_FMT_USG = 5     /* Like STD but unsigned. */
+    GCRYMPI_FMT_USG = 5,    /* Like STD but unsigned. */
+    GCRYMPI_FMT_OPAQUE = 8  /* Opaque format (some functions only).  */
   };
 
 /* Flags used for creating big integers.  */
 enum gcry_mpi_flag
   {
     GCRYMPI_FLAG_SECURE = 1,  /* Allocate the number in "secure" memory.  */
-    GCRYMPI_FLAG_OPAQUE = 2   /* The number is not a real one but just
+    GCRYMPI_FLAG_OPAQUE = 2,  /* The number is not a real one but just
                                  a way to store some bytes.  This is
                                  useful for encrypted big integers.  */
+    GCRYMPI_FLAG_IMMUTABLE = 4, /* Mark the MPI as immutable.  */
+    GCRYMPI_FLAG_CONST     = 8, /* Mark the MPI as a constant.  */
+    GCRYMPI_FLAG_USER1 = 0x0100,/* User flag 1.  */
+    GCRYMPI_FLAG_USER2 = 0x0200,/* User flag 2.  */
+    GCRYMPI_FLAG_USER3 = 0x0400,/* User flag 3.  */
+    GCRYMPI_FLAG_USER4 = 0x0800 /* User flag 4.  */
   };
 
 
+/* Macros to return pre-defined MPI constants.  */
+#define GCRYMPI_CONST_ONE   (_gcry_mpi_get_const (1))
+#define GCRYMPI_CONST_TWO   (_gcry_mpi_get_const (2))
+#define GCRYMPI_CONST_THREE (_gcry_mpi_get_const (3))
+#define GCRYMPI_CONST_FOUR  (_gcry_mpi_get_const (4))
+#define GCRYMPI_CONST_EIGHT (_gcry_mpi_get_const (8))
+
 /* Allocate a new big integer object, initialize it with 0 and
    initially allocate memory for a number of at least NBITS. */
 gcry_mpi_t gcry_mpi_new (unsigned int nbits);
@@ -434,15 +580,30 @@ void gcry_mpi_release (gcry_mpi_t a);
 /* Create a new number with the same value as A. */
 gcry_mpi_t gcry_mpi_copy (const gcry_mpi_t a);
 
+/* Store the big integer value U in W and release U.  */
+void gcry_mpi_snatch (gcry_mpi_t w, gcry_mpi_t u);
+
 /* Store the big integer value U in W. */
 gcry_mpi_t gcry_mpi_set (gcry_mpi_t w, const gcry_mpi_t u);
 
 /* Store the unsigned integer value U in W. */
 gcry_mpi_t gcry_mpi_set_ui (gcry_mpi_t w, unsigned long u);
 
+/* Store U as an unsigned int at W or return GPG_ERR_ERANGE. */
+gpg_error_t gcry_mpi_get_ui (unsigned int *w, gcry_mpi_t u);
+
 /* Swap the values of A and B. */
 void gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b);
 
+/* Return 1 if A is negative; 0 if zero or positive.  */
+int gcry_mpi_is_neg (gcry_mpi_t a);
+
+/* W = - U */
+void gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u);
+
+/* W = [W] */
+void gcry_mpi_abs (gcry_mpi_t w);
+
 /* Compare the big integer number U and V returning 0 for equality, a
    positive value for U > V and a negative for U < V. */
 int gcry_mpi_cmp (const gcry_mpi_t u, const gcry_mpi_t v);
@@ -470,7 +631,7 @@ gcry_error_t gcry_mpi_print (enum gcry_mpi_format format,
                              size_t *nwritten,
                              const gcry_mpi_t a);
 
-/* Convert the big integer A int the external representation described
+/* Convert the big integer A into the external representation described
    by FORMAT and store it in a newly allocated buffer which address
    will be put into BUFFER.  NWRITTEN receives the actual lengths of the
    external representation. */
@@ -536,6 +697,79 @@ int gcry_mpi_gcd (gcry_mpi_t g, gcry_mpi_t a, gcry_mpi_t 
b);
    Return true if the value exists. */
 int gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t m);
 
+/* Create a new point object.  NBITS is usually 0.  */
+gcry_mpi_point_t gcry_mpi_point_new (unsigned int nbits);
+
+/* Release the object POINT.  POINT may be NULL. */
+void gcry_mpi_point_release (gcry_mpi_point_t point);
+
+/* Return a copy of POINT. */
+gcry_mpi_point_t gcry_mpi_point_copy (gcry_mpi_point_t point);
+
+/* Store the projective coordinates from POINT into X, Y, and Z.  */
+void gcry_mpi_point_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                         gcry_mpi_point_t point);
+
+/* Store the projective coordinates from POINT into X, Y, and Z and
+   release POINT.  */
+void gcry_mpi_point_snatch_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                                gcry_mpi_point_t point);
+
+/* Store the projective coordinates X, Y, and Z into POINT.  */
+gcry_mpi_point_t gcry_mpi_point_set (gcry_mpi_point_t point,
+                                     gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z);
+
+/* Store the projective coordinates X, Y, and Z into POINT and release
+   X, Y, and Z.  */
+gcry_mpi_point_t gcry_mpi_point_snatch_set (gcry_mpi_point_t point,
+                                            gcry_mpi_t x, gcry_mpi_t y,
+                                            gcry_mpi_t z);
+
+/* Allocate a new context for elliptic curve operations based on the
+   parameters given by KEYPARAM or using CURVENAME.  */
+gpg_error_t gcry_mpi_ec_new (gcry_ctx_t *r_ctx,
+                             gcry_sexp_t keyparam, const char *curvename);
+
+/* Get a named MPI from an elliptic curve context.  */
+gcry_mpi_t gcry_mpi_ec_get_mpi (const char *name, gcry_ctx_t ctx, int copy);
+
+/* Get a named point from an elliptic curve context.  */
+gcry_mpi_point_t gcry_mpi_ec_get_point (const char *name,
+                                        gcry_ctx_t ctx, int copy);
+
+/* Store a named MPI into an elliptic curve context.  */
+gpg_error_t gcry_mpi_ec_set_mpi (const char *name, gcry_mpi_t newvalue,
+                                 gcry_ctx_t ctx);
+
+/* Store a named point into an elliptic curve context.  */
+gpg_error_t gcry_mpi_ec_set_point (const char *name, gcry_mpi_point_t newvalue,
+                                   gcry_ctx_t ctx);
+
+/* Decode and store VALUE into RESULT.  */
+gpg_error_t gcry_mpi_ec_decode_point (gcry_mpi_point_t result,
+                                      gcry_mpi_t value, gcry_ctx_t ctx);
+
+/* Store the affine coordinates of POINT into X and Y.  */
+int gcry_mpi_ec_get_affine (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_point_t point,
+                            gcry_ctx_t ctx);
+
+/* W = 2 * U.  */
+void gcry_mpi_ec_dup (gcry_mpi_point_t w, gcry_mpi_point_t u, gcry_ctx_t ctx);
+
+/* W = U + V.  */
+void gcry_mpi_ec_add (gcry_mpi_point_t w,
+                      gcry_mpi_point_t u, gcry_mpi_point_t v, gcry_ctx_t ctx);
+
+/* W = U - V.  */
+void gcry_mpi_ec_sub (gcry_mpi_point_t w,
+                      gcry_mpi_point_t u, gcry_mpi_point_t v, gcry_ctx_t ctx);
+
+/* W = N * U.  */
+void gcry_mpi_ec_mul (gcry_mpi_point_t w, gcry_mpi_t n, gcry_mpi_point_t u,
+                      gcry_ctx_t ctx);
+
+/* Return true if POINT is on the curve described by CTX.  */
+int gcry_mpi_ec_curve_point (gcry_mpi_point_t w, gcry_ctx_t ctx);
 
 /* Return the number of bits required to represent A. */
 unsigned int gcry_mpi_get_nbits (gcry_mpi_t a);
@@ -562,10 +796,18 @@ void     gcry_mpi_rshift (gcry_mpi_t x, gcry_mpi_t a, 
unsigned int n);
 void     gcry_mpi_lshift (gcry_mpi_t x, gcry_mpi_t a, unsigned int n);
 
 /* Store NBITS of the value P points to in A and mark A as an opaque
-   value.  WARNING: Never use an opaque MPI for anything thing else then
+   value.  On success A received the the ownership of the value P.
+   WARNING: Never use an opaque MPI for anything thing else than
    gcry_mpi_release, gcry_mpi_get_opaque. */
 gcry_mpi_t gcry_mpi_set_opaque (gcry_mpi_t a, void *p, unsigned int nbits);
 
+/* Store NBITS of the value P points to in A and mark A as an opaque
+   value.  The function takes a copy of the provided value P.
+   WARNING: Never use an opaque MPI for anything thing else than
+   gcry_mpi_release, gcry_mpi_get_opaque. */
+gcry_mpi_t gcry_mpi_set_opaque_copy (gcry_mpi_t a,
+                                     const void *p, unsigned int nbits);
+
 /* Return a pointer to an opaque value stored in A and return its size
    in NBITS.  Note that the returned pointer is still owned by A and
    that the function should never be used for an non-opaque MPI. */
@@ -580,9 +822,12 @@ void gcry_mpi_set_flag (gcry_mpi_t a, enum gcry_mpi_flag 
flag);
    currently useless as no flags are allowed. */
 void gcry_mpi_clear_flag (gcry_mpi_t a, enum gcry_mpi_flag flag);
 
-/* Return true when the FLAG is set for A. */
+/* Return true if the FLAG is set for A. */
 int gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag flag);
 
+/* Private function - do not use.  */
+gcry_mpi_t _gcry_mpi_get_const (int no);
+
 /* Unless the GCRYPT_NO_MPI_MACROS is used, provide a couple of
    convenience macros for the big integer functions. */
 #ifndef GCRYPT_NO_MPI_MACROS
@@ -597,10 +842,15 @@ int gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag 
flag);
   while (0)
 
 #define mpi_copy( a )          gcry_mpi_copy( (a) )
+#define mpi_snatch( w, u)      gcry_mpi_snatch( (w), (u) )
 #define mpi_set( w, u)         gcry_mpi_set( (w), (u) )
 #define mpi_set_ui( w, u)      gcry_mpi_set_ui( (w), (u) )
+#define mpi_get_ui( w, u)      gcry_mpi_get_ui( (w), (u) )
+#define mpi_abs( w )           gcry_mpi_abs( (w) )
+#define mpi_neg( w, u)         gcry_mpi_neg( (w), (u) )
 #define mpi_cmp( u, v )        gcry_mpi_cmp( (u), (v) )
 #define mpi_cmp_ui( u, v )     gcry_mpi_cmp_ui( (u), (v) )
+#define mpi_is_neg( a )        gcry_mpi_is_neg ((a))
 
 #define mpi_add_ui(w,u,v)      gcry_mpi_add_ui((w),(u),(v))
 #define mpi_add(w,u,v)         gcry_mpi_add ((w),(u),(v))
@@ -619,6 +869,20 @@ int gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag 
flag);
 #define mpi_gcd(g,a,b)         gcry_mpi_gcd ( (g), (a), (b) )
 #define mpi_invm(g,a,b)        gcry_mpi_invm ( (g), (a), (b) )
 
+#define mpi_point_new(n)              gcry_mpi_point_new((n))
+#define mpi_point_release(p)                    \
+  do                                            \
+    {                                           \
+      gcry_mpi_point_release ((p));             \
+      (p) = NULL;                               \
+    }                                           \
+  while (0)
+#define mpi_point_copy(p)             gcry_mpi_point_copy((p))
+#define mpi_point_get(x,y,z,p)        gcry_mpi_point_get((x),(y),(z),(p))
+#define mpi_point_snatch_get(x,y,z,p) 
gcry_mpi_point_snatch_get((x),(y),(z),(p))
+#define mpi_point_set(p,x,y,z)        gcry_mpi_point_set((p),(x),(y),(z))
+#define mpi_point_snatch_set(p,x,y,z) 
gcry_mpi_point_snatch_set((p),(x),(y),(z))
+
 #define mpi_get_nbits(a)       gcry_mpi_get_nbits ((a))
 #define mpi_test_bit(a,b)      gcry_mpi_test_bit ((a),(b))
 #define mpi_set_bit(a,b)       gcry_mpi_set_bit ((a),(b))
@@ -677,7 +941,13 @@ enum gcry_cipher_algos
     GCRY_CIPHER_SEED        = 309,  /* 128 bit cipher described in RFC4269. */
     GCRY_CIPHER_CAMELLIA128 = 310,
     GCRY_CIPHER_CAMELLIA192 = 311,
-    GCRY_CIPHER_CAMELLIA256 = 312
+    GCRY_CIPHER_CAMELLIA256 = 312,
+    GCRY_CIPHER_SALSA20     = 313,
+    GCRY_CIPHER_SALSA20R12  = 314,
+    GCRY_CIPHER_GOST28147   = 315,
+    GCRY_CIPHER_CHACHA20    = 316,
+    GCRY_CIPHER_GOST28147_MESH   = 317, /* With CryptoPro key meshing.  */
+    GCRY_CIPHER_SM4         = 318
   };
 
 /* The Rijndael algorithm is basically AES, so provide some macros. */
@@ -691,14 +961,23 @@ enum gcry_cipher_algos
    supported for each algorithm. */
 enum gcry_cipher_modes
   {
-    GCRY_CIPHER_MODE_NONE   = 0,  /* Not yet specified. */
-    GCRY_CIPHER_MODE_ECB    = 1,  /* Electronic codebook. */
-    GCRY_CIPHER_MODE_CFB    = 2,  /* Cipher feedback. */
-    GCRY_CIPHER_MODE_CBC    = 3,  /* Cipher block chaining. */
-    GCRY_CIPHER_MODE_STREAM = 4,  /* Used with stream ciphers. */
-    GCRY_CIPHER_MODE_OFB    = 5,  /* Outer feedback. */
-    GCRY_CIPHER_MODE_CTR    = 6,  /* Counter. */
-    GCRY_CIPHER_MODE_AESWRAP= 7   /* AES-WRAP algorithm.  */
+    GCRY_CIPHER_MODE_NONE     = 0,   /* Not yet specified. */
+    GCRY_CIPHER_MODE_ECB      = 1,   /* Electronic codebook. */
+    GCRY_CIPHER_MODE_CFB      = 2,   /* Cipher feedback. */
+    GCRY_CIPHER_MODE_CBC      = 3,   /* Cipher block chaining. */
+    GCRY_CIPHER_MODE_STREAM   = 4,   /* Used with stream ciphers. */
+    GCRY_CIPHER_MODE_OFB      = 5,   /* Outer feedback. */
+    GCRY_CIPHER_MODE_CTR      = 6,   /* Counter. */
+    GCRY_CIPHER_MODE_AESWRAP  = 7,   /* AES-WRAP algorithm.  */
+    GCRY_CIPHER_MODE_CCM      = 8,   /* Counter with CBC-MAC.  */
+    GCRY_CIPHER_MODE_GCM      = 9,   /* Galois Counter Mode. */
+    GCRY_CIPHER_MODE_POLY1305 = 10,  /* Poly1305 based AEAD mode. */
+    GCRY_CIPHER_MODE_OCB      = 11,  /* OCB3 mode.  */
+    GCRY_CIPHER_MODE_CFB8     = 12,  /* Cipher feedback (8 bit mode). */
+    GCRY_CIPHER_MODE_XTS      = 13,  /* XTS mode.  */
+    GCRY_CIPHER_MODE_EAX      = 14,  /* EAX mode.  */
+    GCRY_CIPHER_MODE_SIV      = 15,  /* SIV mode.  */
+    GCRY_CIPHER_MODE_GCM_SIV  = 16   /* GCM-SIV mode.  */
   };
 
 /* Flags used with the open function. */
@@ -707,16 +986,31 @@ enum gcry_cipher_flags
     GCRY_CIPHER_SECURE      = 1,  /* Allocate in secure memory. */
     GCRY_CIPHER_ENABLE_SYNC = 2,  /* Enable CFB sync mode. */
     GCRY_CIPHER_CBC_CTS     = 4,  /* Enable CBC cipher text stealing (CTS). */
-    GCRY_CIPHER_CBC_MAC     = 8   /* Enable CBC message auth. code (MAC). */
+    GCRY_CIPHER_CBC_MAC     = 8,  /* Enable CBC message auth. code (MAC).  */
+    GCRY_CIPHER_EXTENDED    = 16  /* Enable extended AES-WRAP.  */
   };
 
+/* GCM works only with blocks of 128 bits */
+#define GCRY_GCM_BLOCK_LEN  (128 / 8)
+
+/* CCM works only with blocks of 128 bits.  */
+#define GCRY_CCM_BLOCK_LEN  (128 / 8)
+
+/* OCB works only with blocks of 128 bits.  */
+#define GCRY_OCB_BLOCK_LEN  (128 / 8)
+
+/* XTS works only with blocks of 128 bits.  */
+#define GCRY_XTS_BLOCK_LEN  (128 / 8)
+
+/* SIV and GCM-SIV works only with blocks of 128 bits */
+#define GCRY_SIV_BLOCK_LEN  (128 / 8)
 
 /* Create a handle for algorithm ALGO to be used in MODE.  FLAGS may
    be given as an bitwise OR of the gcry_cipher_flags values. */
 gcry_error_t gcry_cipher_open (gcry_cipher_hd_t *handle,
                               int algo, int mode, unsigned int flags);
 
-/* Close the cioher handle H and release all resource. */
+/* Close the cipher handle H and release all resource. */
 void gcry_cipher_close (gcry_cipher_hd_t h);
 
 /* Perform various operations on the cipher object H. */
@@ -767,6 +1061,17 @@ gcry_error_t gcry_cipher_setkey (gcry_cipher_hd_t hd,
 gcry_error_t gcry_cipher_setiv (gcry_cipher_hd_t hd,
                                 const void *iv, size_t ivlen);
 
+/* Provide additional authentication data for AEAD modes/ciphers.  */
+gcry_error_t gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf,
+                                       size_t abuflen);
+
+/* Get authentication tag for AEAD modes/ciphers.  */
+gcry_error_t gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag,
+                                 size_t taglen);
+
+/* Check authentication tag for AEAD modes/ciphers.  */
+gcry_error_t gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag,
+                                   size_t taglen);
 
 /* Reset the handle to the state after open.  */
 #define gcry_cipher_reset(h)  gcry_cipher_ctl ((h), GCRYCTL_RESET, NULL, 0)
@@ -775,10 +1080,19 @@ gcry_error_t gcry_cipher_setiv (gcry_cipher_hd_t hd,
    cipher handle H. */
 #define gcry_cipher_sync(h)  gcry_cipher_ctl( (h), GCRYCTL_CFB_SYNC, NULL, 0)
 
-/* Enable or disable CTS in future calls to gcry_encrypt(). CBC mode only. */
+/* Enable or disable CTS in future calls to gcry_cipher_encrypt().
+ * CBC mode only. */
 #define gcry_cipher_cts(h,on)  gcry_cipher_ctl( (h), GCRYCTL_SET_CBC_CTS, \
                                                                    NULL, on )
 
+#define gcry_cipher_set_sbox(h,oid) gcry_cipher_ctl( (h), GCRYCTL_SET_SBOX, \
+                                                     (void *) oid, 0);
+
+/* Indicate to the encrypt and decrypt functions that the next call
+   provides the final data.  Only used with some modes.  */
+#define gcry_cipher_final(a) \
+            gcry_cipher_ctl ((a), GCRYCTL_FINALIZE, NULL, 0)
+
 /* Set counter for CTR mode.  (CTR,CTRLEN) must denote a buffer of
    block size length, or (NULL,0) to set the CTR to the all-zero block. */
 gpg_error_t gcry_cipher_setctr (gcry_cipher_hd_t hd,
@@ -794,13 +1108,10 @@ size_t gcry_cipher_get_algo_blklen (int algo);
 #define gcry_cipher_test_algo(a) \
             gcry_cipher_algo_info( (a), GCRYCTL_TEST_ALGO, NULL, NULL )
 
-/* Get a list consisting of the IDs of the loaded cipher modules.  If
-   LIST is zero, write the number of loaded cipher modules to
-   LIST_LENGTH and return.  If LIST is non-zero, the first
-   *LIST_LENGTH algorithm IDs are stored in LIST, which must be of
-   according size.  In case there are less cipher modules than
-   *LIST_LENGTH, *LIST_LENGTH is updated to the correct number.  */
-gcry_error_t gcry_cipher_list (int *list, int *list_length);
+/* Setup tag for decryption (for SIV and GCM-SIV mode). */
+#define gcry_cipher_set_decryption_tag(a, tag, taglen) \
+            gcry_cipher_ctl ((a), GCRYCTL_SET_DECRYPTION_TAG, \
+                             (void *)(tag), (taglen))
 
 
 /************************************
@@ -809,17 +1120,19 @@ gcry_error_t gcry_cipher_list (int *list, int 
*list_length);
  *                                  *
  ************************************/
 
-/* The algorithms and their IDs we support. */
+/* The algorithms and their IDs we support.  */
 enum gcry_pk_algos
   {
-    GCRY_PK_RSA   = 1,
-    GCRY_PK_RSA_E = 2,      /* (deprecated) */
-    GCRY_PK_RSA_S = 3,      /* (deprecated) */
-    GCRY_PK_ELG_E = 16,
-    GCRY_PK_DSA   = 17,
-    GCRY_PK_ELG   = 20,
-    GCRY_PK_ECDSA = 301,
-    GCRY_PK_ECDH  = 302
+    GCRY_PK_RSA   = 1,      /* RSA */
+    GCRY_PK_RSA_E = 2,      /* (deprecated: use 1).  */
+    GCRY_PK_RSA_S = 3,      /* (deprecated: use 1).  */
+    GCRY_PK_ELG_E = 16,     /* (deprecated: use 20). */
+    GCRY_PK_DSA   = 17,     /* Digital Signature Algorithm.  */
+    GCRY_PK_ECC   = 18,     /* Generic ECC.  */
+    GCRY_PK_ELG   = 20,     /* Elgamal       */
+    GCRY_PK_ECDSA = 301,    /* (only for external use).  */
+    GCRY_PK_ECDH  = 302,    /* (only for external use).  */
+    GCRY_PK_EDDSA = 303     /* (only for external use).  */
   };
 
 /* Flags describing usage capabilities of a PK algorithm. */
@@ -829,6 +1142,10 @@ enum gcry_pk_algos
 #define GCRY_PK_USAGE_AUTH 8   /* Good for authentication. */
 #define GCRY_PK_USAGE_UNKN 128 /* Unknown usage flag. */
 
+/* Modes used with gcry_pubkey_get_sexp.  */
+#define GCRY_PK_GET_PUBKEY 1
+#define GCRY_PK_GET_SECKEY 2
+
 /* Encrypt the DATA using the public key PKEY and store the result as
    a newly created S-expression at RESULT. */
 gcry_error_t gcry_pk_encrypt (gcry_sexp_t *result,
@@ -876,8 +1193,8 @@ int gcry_pk_map_name (const char* name) 
_GCRY_GCC_ATTR_PURE;
    public or private KEY.  */
 unsigned int gcry_pk_get_nbits (gcry_sexp_t key) _GCRY_GCC_ATTR_PURE;
 
-/* Please note that keygrip is still experimental and should not be
-   used without contacting the author. */
+/* Return the so called KEYGRIP which is the SHA-1 hash of the public
+   key parameters expressed in a way depending on the algorithm.  */
 unsigned char *gcry_pk_get_keygrip (gcry_sexp_t key, unsigned char *array);
 
 /* Return the name of the curve matching KEY.  */
@@ -892,13 +1209,31 @@ gcry_sexp_t gcry_pk_get_param (int algo, const char 
*name);
 #define gcry_pk_test_algo(a) \
             gcry_pk_algo_info( (a), GCRYCTL_TEST_ALGO, NULL, NULL )
 
-/* Get a list consisting of the IDs of the loaded pubkey modules.  If
-   LIST is zero, write the number of loaded pubkey modules to
-   LIST_LENGTH and return.  If LIST is non-zero, the first
-   *LIST_LENGTH algorithm IDs are stored in LIST, which must be of
-   according size.  In case there are less pubkey modules than
-   *LIST_LENGTH, *LIST_LENGTH is updated to the correct number.  */
-gcry_error_t gcry_pk_list (int *list, int *list_length);
+/* Return an S-expression representing the context CTX.  */
+gcry_error_t gcry_pubkey_get_sexp (gcry_sexp_t *r_sexp,
+                                   int mode, gcry_ctx_t ctx);
+
+/************************************
+ *                                  *
+ *    Modern ECC Functions          *
+ *                                  *
+ ************************************/
+
+/* The curves we support.  */
+enum gcry_ecc_curves
+  {
+    GCRY_ECC_CURVE25519 = 1,
+    GCRY_ECC_CURVE448   = 2
+  };
+
+/* Get the length of point to prepare buffer for the result.  */
+unsigned int gcry_ecc_get_algo_keylen (int curveid);
+
+/* Convenience function to compute scalar multiplication of the
+ * Montgomery form of curve.  */
+gpg_error_t gcry_ecc_mul_point (int curveid, unsigned char *result,
+                                const unsigned char *scalar,
+                                const unsigned char *point);
 
 
 
@@ -909,7 +1244,7 @@ gcry_error_t gcry_pk_list (int *list, int *list_length);
  ************************************/
 
 /* Algorithm IDs for the hash functions we know about. Not all of them
-   are implemnted. */
+   are implemented. */
 enum gcry_md_algos
   {
     GCRY_MD_NONE    = 0,
@@ -923,20 +1258,43 @@ enum gcry_md_algos
     GCRY_MD_SHA384  = 9,
     GCRY_MD_SHA512  = 10,
     GCRY_MD_SHA224  = 11,
-    GCRY_MD_MD4     = 301,
+
+    GCRY_MD_MD4           = 301,
     GCRY_MD_CRC32         = 302,
     GCRY_MD_CRC32_RFC1510 = 303,
     GCRY_MD_CRC24_RFC2440 = 304,
-    GCRY_MD_WHIRLPOOL = 305,
-    GCRY_MD_TIGER1  = 306, /* TIGER fixed.  */
-    GCRY_MD_TIGER2  = 307  /* TIGER2 variant.   */
+    GCRY_MD_WHIRLPOOL     = 305,
+    GCRY_MD_TIGER1        = 306, /* TIGER fixed.  */
+    GCRY_MD_TIGER2        = 307, /* TIGER2 variant.   */
+    GCRY_MD_GOSTR3411_94  = 308, /* GOST R 34.11-94.  */
+    GCRY_MD_STRIBOG256    = 309, /* GOST R 34.11-2012, 256 bit.  */
+    GCRY_MD_STRIBOG512    = 310, /* GOST R 34.11-2012, 512 bit.  */
+    GCRY_MD_GOSTR3411_CP  = 311, /* GOST R 34.11-94 with CryptoPro-A S-Box.  */
+    GCRY_MD_SHA3_224      = 312,
+    GCRY_MD_SHA3_256      = 313,
+    GCRY_MD_SHA3_384      = 314,
+    GCRY_MD_SHA3_512      = 315,
+    GCRY_MD_SHAKE128      = 316,
+    GCRY_MD_SHAKE256      = 317,
+    GCRY_MD_BLAKE2B_512   = 318,
+    GCRY_MD_BLAKE2B_384   = 319,
+    GCRY_MD_BLAKE2B_256   = 320,
+    GCRY_MD_BLAKE2B_160   = 321,
+    GCRY_MD_BLAKE2S_256   = 322,
+    GCRY_MD_BLAKE2S_224   = 323,
+    GCRY_MD_BLAKE2S_160   = 324,
+    GCRY_MD_BLAKE2S_128   = 325,
+    GCRY_MD_SM3           = 326,
+    GCRY_MD_SHA512_256    = 327,
+    GCRY_MD_SHA512_224    = 328
   };
 
 /* Flags used with the open function.  */
 enum gcry_md_flags
   {
     GCRY_MD_FLAG_SECURE = 1,  /* Allocate all buffers in "secure" memory.  */
-    GCRY_MD_FLAG_HMAC   = 2   /* Make an HMAC out of this algorithm.  */
+    GCRY_MD_FLAG_HMAC   = 2,  /* Make an HMAC out of this algorithm.  */
+    GCRY_MD_FLAG_BUGEMU1 = 0x0100
   };
 
 /* (Forward declaration.)  */
@@ -993,14 +1351,23 @@ void gcry_md_write (gcry_md_hd_t hd, const void *buffer, 
size_t length);
    algorithm ALGO. */
 unsigned char *gcry_md_read (gcry_md_hd_t hd, int algo);
 
+/* Read more output from algorithm ALGO to BUFFER of size LENGTH from
+ * digest object HD. Algorithm needs to be 'expendable-output function'. */
+gpg_error_t gcry_md_extract (gcry_md_hd_t hd, int algo, void *buffer,
+                             size_t length);
+
 /* Convenience function to calculate the hash from the data in BUFFER
-   of size LENGTH using the algorithm ALGO avoiding the creating of a
+   of size LENGTH using the algorithm ALGO avoiding the creation of a
    hash object.  The hash is returned in the caller provided buffer
    DIGEST which must be large enough to hold the digest of the given
    algorithm. */
 void gcry_md_hash_buffer (int algo, void *digest,
                           const void *buffer, size_t length);
 
+/* Convenience function to hash multiple buffers.  */
+gpg_error_t gcry_md_hash_buffers (int algo, unsigned int flags, void *digest,
+                                  const gcry_buffer_t *iov, int iovcnt);
+
 /* Retrieve the algorithm used with HD.  This does not work reliable
    if more than one algorithm is enabled in HD. */
 int gcry_md_get_algo (gcry_md_hd_t hd);
@@ -1016,9 +1383,9 @@ int gcry_md_is_enabled (gcry_md_hd_t a, int algo);
 /* Return true if the digest object A is allocated in "secure" memory. */
 int gcry_md_is_secure (gcry_md_hd_t a);
 
-/* Retrieve various information about the object H.  */
+/* Deprecated: Use gcry_md_is_enabled or gcry_md_is_secure.  */
 gcry_error_t gcry_md_info (gcry_md_hd_t h, int what, void *buffer,
-                          size_t *nbytes);
+                          size_t *nbytes) _GCRY_ATTR_INTERNAL;
 
 /* Retrieve various information about the algorithm ALGO.  */
 gcry_error_t gcry_md_algo_info (int algo, int what, void *buffer,
@@ -1069,438 +1436,150 @@ void gcry_md_debug (gcry_md_hd_t hd, const char 
*suffix);
 #define gcry_md_get_asnoid(a,b,n) \
             gcry_md_algo_info((a), GCRYCTL_GET_ASNOID, (b), (n))
 
-/* Enable debugging for digest object A; i.e. create files named
-   dbgmd-<n>.<string> while hashing.  B is a string used as the suffix
-   for the filename.  This macro is deprecated, use gcry_md_debug. */
-#ifndef GCRYPT_NO_DEPRECATED
-#define gcry_md_start_debug(a,b) \
-            gcry_md_ctl( (a), GCRYCTL_START_DUMP, (b), 0 )
-
-/* Disable the debugging of A.  This macro is deprecated, use
-   gcry_md_debug.  */
-#define gcry_md_stop_debug(a,b) \
-            gcry_md_ctl( (a), GCRYCTL_STOP_DUMP, (b), 0 )
-#endif
-
-/* Get a list consisting of the IDs of the loaded message digest
-   modules.  If LIST is zero, write the number of loaded message
-   digest modules to LIST_LENGTH and return.  If LIST is non-zero, the
-   first *LIST_LENGTH algorithm IDs are stored in LIST, which must be
-   of according size.  In case there are less message digest modules
-   than *LIST_LENGTH, *LIST_LENGTH is updated to the correct
-   number.  */
-gcry_error_t gcry_md_list (int *list, int *list_length);
-
 
-#if !defined(GCRYPT_NO_DEPRECATED) || defined(_GCRYPT_IN_LIBGCRYPT)
-/* Alternative interface for asymmetric cryptography.  This interface
-   is deprecated.  */
 
-/* The algorithm IDs. */
-typedef enum gcry_ac_id
-  {
-    GCRY_AC_RSA = 1,
-    GCRY_AC_DSA = 17,
-    GCRY_AC_ELG = 20,
-    GCRY_AC_ELG_E = 16
-  }
-gcry_ac_id_t _GCRY_ATTR_INTERNAL;
+/**********************************************
+ *                                            *
+ *   Message Authentication Code Functions    *
+ *                                            *
+ **********************************************/
 
-/* Key types.  */
-typedef enum gcry_ac_key_type
-  {
-    GCRY_AC_KEY_SECRET,
-    GCRY_AC_KEY_PUBLIC
-  }
-gcry_ac_key_type_t _GCRY_ATTR_INTERNAL;
+/* The data object used to hold a handle to an encryption object.  */
+struct gcry_mac_handle;
+typedef struct gcry_mac_handle *gcry_mac_hd_t;
 
-/* Encoding methods.  */
-typedef enum gcry_ac_em
+/* Algorithm IDs for the hash functions we know about. Not all of them
+   are implemented. */
+enum gcry_mac_algos
   {
-    GCRY_AC_EME_PKCS_V1_5,
-    GCRY_AC_EMSA_PKCS_V1_5
-  }
-gcry_ac_em_t _GCRY_ATTR_INTERNAL;
+    GCRY_MAC_NONE               = 0,
+    GCRY_MAC_GOST28147_IMIT     = 1,
+
+    GCRY_MAC_HMAC_SHA256        = 101,
+    GCRY_MAC_HMAC_SHA224        = 102,
+    GCRY_MAC_HMAC_SHA512        = 103,
+    GCRY_MAC_HMAC_SHA384        = 104,
+    GCRY_MAC_HMAC_SHA1          = 105,
+    GCRY_MAC_HMAC_MD5           = 106,
+    GCRY_MAC_HMAC_MD4           = 107,
+    GCRY_MAC_HMAC_RMD160        = 108,
+    GCRY_MAC_HMAC_TIGER1        = 109, /* The fixed TIGER variant */
+    GCRY_MAC_HMAC_WHIRLPOOL     = 110,
+    GCRY_MAC_HMAC_GOSTR3411_94  = 111,
+    GCRY_MAC_HMAC_STRIBOG256    = 112,
+    GCRY_MAC_HMAC_STRIBOG512    = 113,
+    GCRY_MAC_HMAC_MD2           = 114,
+    GCRY_MAC_HMAC_SHA3_224      = 115,
+    GCRY_MAC_HMAC_SHA3_256      = 116,
+    GCRY_MAC_HMAC_SHA3_384      = 117,
+    GCRY_MAC_HMAC_SHA3_512      = 118,
+    GCRY_MAC_HMAC_GOSTR3411_CP  = 119,
+    GCRY_MAC_HMAC_BLAKE2B_512   = 120,
+    GCRY_MAC_HMAC_BLAKE2B_384   = 121,
+    GCRY_MAC_HMAC_BLAKE2B_256   = 122,
+    GCRY_MAC_HMAC_BLAKE2B_160   = 123,
+    GCRY_MAC_HMAC_BLAKE2S_256   = 124,
+    GCRY_MAC_HMAC_BLAKE2S_224   = 125,
+    GCRY_MAC_HMAC_BLAKE2S_160   = 126,
+    GCRY_MAC_HMAC_BLAKE2S_128   = 127,
+    GCRY_MAC_HMAC_SM3           = 128,
+    GCRY_MAC_HMAC_SHA512_256    = 129,
+    GCRY_MAC_HMAC_SHA512_224    = 130,
+
+    GCRY_MAC_CMAC_AES           = 201,
+    GCRY_MAC_CMAC_3DES          = 202,
+    GCRY_MAC_CMAC_CAMELLIA      = 203,
+    GCRY_MAC_CMAC_CAST5         = 204,
+    GCRY_MAC_CMAC_BLOWFISH      = 205,
+    GCRY_MAC_CMAC_TWOFISH       = 206,
+    GCRY_MAC_CMAC_SERPENT       = 207,
+    GCRY_MAC_CMAC_SEED          = 208,
+    GCRY_MAC_CMAC_RFC2268       = 209,
+    GCRY_MAC_CMAC_IDEA          = 210,
+    GCRY_MAC_CMAC_GOST28147     = 211,
+    GCRY_MAC_CMAC_SM4           = 212,
+
+    GCRY_MAC_GMAC_AES           = 401,
+    GCRY_MAC_GMAC_CAMELLIA      = 402,
+    GCRY_MAC_GMAC_TWOFISH       = 403,
+    GCRY_MAC_GMAC_SERPENT       = 404,
+    GCRY_MAC_GMAC_SEED          = 405,
+
+    GCRY_MAC_POLY1305           = 501,
+    GCRY_MAC_POLY1305_AES       = 502,
+    GCRY_MAC_POLY1305_CAMELLIA  = 503,
+    GCRY_MAC_POLY1305_TWOFISH   = 504,
+    GCRY_MAC_POLY1305_SERPENT   = 505,
+    GCRY_MAC_POLY1305_SEED      = 506
+  };
 
-/* Encryption and Signature schemes.  */
-typedef enum gcry_ac_scheme
+/* Flags used with the open function.  */
+enum gcry_mac_flags
   {
-    GCRY_AC_ES_PKCS_V1_5,
-    GCRY_AC_SSA_PKCS_V1_5
-  }
-gcry_ac_scheme_t _GCRY_ATTR_INTERNAL;
+    GCRY_MAC_FLAG_SECURE = 1   /* Allocate all buffers in "secure" memory.  */
+  };
 
-/* AC data.  */
-#define GCRY_AC_FLAG_DEALLOC     (1 << 0)
-#define GCRY_AC_FLAG_COPY        (1 << 1)
-#define GCRY_AC_FLAG_NO_BLINDING (1 << 2)
+/* Create a MAC handle for algorithm ALGO.  FLAGS may be given as an bitwise OR
+   of the gcry_mac_flags values.  CTX maybe NULL or gcry_ctx_t object to be
+   associated with HANDLE.  */
+gcry_error_t gcry_mac_open (gcry_mac_hd_t *handle, int algo,
+                            unsigned int flags, gcry_ctx_t ctx);
 
-/* This type represents a `data set'.  */
-typedef struct gcry_ac_data *gcry_ac_data_t _GCRY_ATTR_INTERNAL;
+/* Close the MAC handle H and release all resource. */
+void gcry_mac_close (gcry_mac_hd_t h);
 
-/* This type represents a single `key', either a secret one or a
-   public one.  */
-typedef struct gcry_ac_key *gcry_ac_key_t _GCRY_ATTR_INTERNAL;
+/* Perform various operations on the MAC object H. */
+gcry_error_t gcry_mac_ctl (gcry_mac_hd_t h, int cmd, void *buffer,
+                           size_t buflen);
 
-/* This type represents a `key pair' containing a secret and a public
-   key.  */
-typedef struct gcry_ac_key_pair *gcry_ac_key_pair_t _GCRY_ATTR_INTERNAL;
+/* Retrieve various information about the MAC algorithm ALGO. */
+gcry_error_t gcry_mac_algo_info (int algo, int what, void *buffer,
+                                 size_t *nbytes);
 
-/* This type represents a `handle' that is needed by functions
-   performing cryptographic operations.  */
-typedef struct gcry_ac_handle *gcry_ac_handle_t _GCRY_ATTR_INTERNAL;
+/* Set KEY of length KEYLEN bytes for the MAC handle HD.  */
+gcry_error_t gcry_mac_setkey (gcry_mac_hd_t hd, const void *key,
+                              size_t keylen);
 
-typedef gpg_error_t (*gcry_ac_data_read_cb_t) (void *opaque,
-                                              unsigned char *buffer,
-                                              size_t *buffer_n)
-  /* */  _GCRY_ATTR_INTERNAL;
+/* Set initialization vector IV of length IVLEN for the MAC handle HD. */
+gcry_error_t gcry_mac_setiv (gcry_mac_hd_t hd, const void *iv,
+                             size_t ivlen);
 
-typedef gpg_error_t (*gcry_ac_data_write_cb_t) (void *opaque,
-                                               unsigned char *buffer,
-                                               size_t buffer_n)
-  /* */  _GCRY_ATTR_INTERNAL;
+/* Pass LENGTH bytes of data in BUFFER to the MAC object HD so that
+   it can update the MAC values.  */
+gcry_error_t gcry_mac_write (gcry_mac_hd_t hd, const void *buffer,
+                             size_t length);
 
-typedef enum
-  {
-    GCRY_AC_IO_READABLE,
-    GCRY_AC_IO_WRITABLE
-  }
-gcry_ac_io_mode_t _GCRY_ATTR_INTERNAL;
+/* Read out the final authentication code from the MAC object HD to BUFFER. */
+gcry_error_t gcry_mac_read (gcry_mac_hd_t hd, void *buffer, size_t *buflen);
 
-typedef enum
-  {
-    GCRY_AC_IO_STRING,
-    GCRY_AC_IO_CALLBACK
-  }
-gcry_ac_io_type_t _GCRY_ATTR_INTERNAL;
+/* Verify the final authentication code from the MAC object HD with BUFFER. */
+gcry_error_t gcry_mac_verify (gcry_mac_hd_t hd, const void *buffer,
+                              size_t buflen);
 
-typedef struct gcry_ac_io
-{
-  /* This is an INTERNAL structure, do NOT use manually.  */
-  gcry_ac_io_mode_t mode _GCRY_ATTR_INTERNAL;
-  gcry_ac_io_type_t type _GCRY_ATTR_INTERNAL;
-  union
-  {
-    union
-    {
-      struct
-      {
-       gcry_ac_data_read_cb_t cb;
-       void *opaque;
-      } callback;
-      struct
-      {
-       unsigned char *data;
-       size_t data_n;
-      } string;
-      void *opaque;
-    } readable;
-    union
-    {
-      struct
-      {
-       gcry_ac_data_write_cb_t cb;
-       void *opaque;
-      } callback;
-      struct
-      {
-       unsigned char **data;
-       size_t *data_n;
-      } string;
-      void *opaque;
-    } writable;
-  } io _GCRY_ATTR_INTERNAL;
-}
-gcry_ac_io_t _GCRY_ATTR_INTERNAL;
-
-/* The caller of gcry_ac_key_pair_generate can provide one of these
-   structures in order to influence the key generation process in an
-   algorithm-specific way.  */
-typedef struct gcry_ac_key_spec_rsa
-{
-  gcry_mpi_t e;                 /* E to use.  */
-} gcry_ac_key_spec_rsa_t _GCRY_ATTR_INTERNAL;
+/* Retrieve the algorithm used with MAC. */
+int gcry_mac_get_algo (gcry_mac_hd_t hd);
 
-/* Structure used for passing data to the implementation of the
-   `EME-PKCS-V1_5' encoding method.  */
-typedef struct gcry_ac_eme_pkcs_v1_5
-{
-  size_t key_size;
-} gcry_ac_eme_pkcs_v1_5_t _GCRY_ATTR_INTERNAL;
+/* Retrieve the length in bytes of the MAC yielded by algorithm ALGO. */
+unsigned int gcry_mac_get_algo_maclen (int algo);
 
-typedef enum gcry_md_algos gcry_md_algo_t _GCRY_ATTR_INTERNAL;
+/* Retrieve the default key length in bytes used with algorithm A. */
+unsigned int gcry_mac_get_algo_keylen (int algo);
 
-/* Structure used for passing data to the implementation of the
-   `EMSA-PKCS-V1_5' encoding method.  */
-typedef struct gcry_ac_emsa_pkcs_v1_5
-{
-  gcry_md_algo_t md;
-  size_t em_n;
-} gcry_ac_emsa_pkcs_v1_5_t _GCRY_ATTR_INTERNAL;
+/* Map the MAC algorithm whose ID is contained in ALGORITHM to a
+   string representation of the algorithm name.  For unknown algorithm
+   IDs this function returns "?".  */
+const char *gcry_mac_algo_name (int algorithm) _GCRY_GCC_ATTR_PURE;
 
-/* Structure used for passing data to the implementation of the
-   `SSA-PKCS-V1_5' signature scheme.  */
-typedef struct gcry_ac_ssa_pkcs_v1_5
-{
-  gcry_md_algo_t md;
-} gcry_ac_ssa_pkcs_v1_5_t _GCRY_ATTR_INTERNAL;
-#endif /* !GCRYPT_NO_DEPRECATED || !_GCRYPT_IN_LIBGCRYPT */
+/* Map the algorithm name NAME to an MAC algorithm ID.  Return 0 if
+   the algorithm name is not known. */
+int gcry_mac_map_name (const char *name) _GCRY_GCC_ATTR_PURE;
 
+/* Reset the handle to the state after open/setkey.  */
+#define gcry_mac_reset(h)  gcry_mac_ctl ((h), GCRYCTL_RESET, NULL, 0)
 
-#ifndef GCRYPT_NO_DEPRECATED
-/* Returns a new, empty data set in DATA.  */
-gcry_error_t gcry_ac_data_new (gcry_ac_data_t *data)
-  /* */                       _GCRY_ATTR_INTERNAL;
-
-/* Destroy the data set DATA.  */
-void gcry_ac_data_destroy (gcry_ac_data_t data)
-  /* */                       _GCRY_ATTR_INTERNAL;
-
-/* Create a copy of the data set DATA and store it in DATA_CP.  */
-gcry_error_t gcry_ac_data_copy (gcry_ac_data_t *data_cp,
-                                gcry_ac_data_t data)
-  /* */                       _GCRY_ATTR_INTERNAL;
-
-/* Return the number of named MPI values inside of the data set
-   DATA.  */
-unsigned int gcry_ac_data_length (gcry_ac_data_t data)
-  /* */                       _GCRY_ATTR_INTERNAL;
-
-/* Destroy any values contained in the data set DATA.  */
-void gcry_ac_data_clear (gcry_ac_data_t data)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Add the value MPI to DATA with the label NAME.  If FLAGS contains
-   GCRY_AC_FLAG_DATA_COPY, the data set will contain copies of NAME
-   and MPI.  If FLAGS contains GCRY_AC_FLAG_DATA_DEALLOC or
-   GCRY_AC_FLAG_DATA_COPY, the values contained in the data set will
-   be deallocated when they are to be removed from the data set.  */
-gcry_error_t gcry_ac_data_set (gcry_ac_data_t data, unsigned int flags,
-                               const char *name, gcry_mpi_t mpi)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Store the value labelled with NAME found in DATA in MPI.  If FLAGS
-   contains GCRY_AC_FLAG_COPY, store a copy of the MPI value contained
-   in the data set.  MPI may be NULL.  */
-gcry_error_t gcry_ac_data_get_name (gcry_ac_data_t data, unsigned int flags,
-                                    const char *name, gcry_mpi_t *mpi)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Stores in NAME and MPI the named MPI value contained in the data
-   set DATA with the index IDX.  If FLAGS contains GCRY_AC_FLAG_COPY,
-   store copies of the values contained in the data set. NAME or MPI
-   may be NULL.  */
-gcry_error_t gcry_ac_data_get_index (gcry_ac_data_t data, unsigned int flags,
-                                     unsigned int idx,
-                                     const char **name, gcry_mpi_t *mpi)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Convert the data set DATA into a new S-Expression, which is to be
-   stored in SEXP, according to the identifiers contained in
-   IDENTIFIERS.  */
-gcry_error_t gcry_ac_data_to_sexp (gcry_ac_data_t data, gcry_sexp_t *sexp,
-                                  const char **identifiers)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Create a new data set, which is to be stored in DATA_SET, from the
-   S-Expression SEXP, according to the identifiers contained in
-   IDENTIFIERS.  */
-gcry_error_t gcry_ac_data_from_sexp (gcry_ac_data_t *data, gcry_sexp_t sexp,
-                                    const char **identifiers)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Initialize AC_IO according to MODE, TYPE and the variable list of
-   arguments.  The list of variable arguments to specify depends on
-   the given TYPE.  */
-void gcry_ac_io_init (gcry_ac_io_t *ac_io, gcry_ac_io_mode_t mode,
-                     gcry_ac_io_type_t type, ...)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Initialize AC_IO according to MODE, TYPE and the variable list of
-   arguments AP.  The list of variable arguments to specify depends on
-   the given TYPE.  */
-void gcry_ac_io_init_va (gcry_ac_io_t *ac_io, gcry_ac_io_mode_t mode,
-                        gcry_ac_io_type_t type, va_list ap)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Create a new ac handle.  */
-gcry_error_t gcry_ac_open (gcry_ac_handle_t *handle,
-                           gcry_ac_id_t algorithm, unsigned int flags)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Destroy an ac handle.  */
-void gcry_ac_close (gcry_ac_handle_t handle)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Initialize a key from a given data set.  */
-gcry_error_t gcry_ac_key_init (gcry_ac_key_t *key, gcry_ac_handle_t handle,
-                               gcry_ac_key_type_t type, gcry_ac_data_t data)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Generates a new key pair via the handle HANDLE of NBITS bits and
-   stores it in KEY_PAIR.  In case non-standard settings are wanted, a
-   pointer to a structure of type gcry_ac_key_spec_<algorithm>_t,
-   matching the selected algorithm, can be given as KEY_SPEC.
-   MISC_DATA is not used yet.  */
-gcry_error_t gcry_ac_key_pair_generate (gcry_ac_handle_t handle,
-                                        unsigned int nbits, void *spec,
-                                        gcry_ac_key_pair_t *key_pair,
-                                        gcry_mpi_t **misc_data)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Returns the key of type WHICH out of the key pair KEY_PAIR.  */
-gcry_ac_key_t gcry_ac_key_pair_extract (gcry_ac_key_pair_t key_pair,
-                                        gcry_ac_key_type_t which)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Returns the data set contained in the key KEY.  */
-gcry_ac_data_t gcry_ac_key_data_get (gcry_ac_key_t key)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Verifies that the key KEY is sane via HANDLE.  */
-gcry_error_t gcry_ac_key_test (gcry_ac_handle_t handle, gcry_ac_key_t key)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Stores the number of bits of the key KEY in NBITS via HANDLE.  */
-gcry_error_t gcry_ac_key_get_nbits (gcry_ac_handle_t handle,
-                                    gcry_ac_key_t key, unsigned int *nbits)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Writes the 20 byte long key grip of the key KEY to KEY_GRIP via
-   HANDLE.  */
-gcry_error_t gcry_ac_key_get_grip (gcry_ac_handle_t handle, gcry_ac_key_t key,
-                                   unsigned char *key_grip)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Destroy a key.  */
-void gcry_ac_key_destroy (gcry_ac_key_t key)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Destroy a key pair.  */
-void gcry_ac_key_pair_destroy (gcry_ac_key_pair_t key_pair)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Encodes a message according to the encoding method METHOD.  OPTIONS
-   must be a pointer to a method-specific structure
-   (gcry_ac_em*_t).  */
-gcry_error_t gcry_ac_data_encode (gcry_ac_em_t method,
-                                 unsigned int flags, void *options,
-                                 gcry_ac_io_t *io_read,
-                                 gcry_ac_io_t *io_write)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Decodes a message according to the encoding method METHOD.  OPTIONS
-   must be a pointer to a method-specific structure
-   (gcry_ac_em*_t).  */
-gcry_error_t gcry_ac_data_decode (gcry_ac_em_t method,
-                                 unsigned int flags, void *options,
-                                 gcry_ac_io_t *io_read,
-                                 gcry_ac_io_t *io_write)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Encrypt the plain text MPI value DATA_PLAIN with the key KEY under
-   the control of the flags FLAGS and store the resulting data set
-   into DATA_ENCRYPTED.  */
-gcry_error_t gcry_ac_data_encrypt (gcry_ac_handle_t handle,
-                                   unsigned int flags,
-                                   gcry_ac_key_t key,
-                                   gcry_mpi_t data_plain,
-                                   gcry_ac_data_t *data_encrypted)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Decrypt the decrypted data contained in the data set DATA_ENCRYPTED
-   with the key KEY under the control of the flags FLAGS and store the
-   resulting plain text MPI value in DATA_PLAIN.  */
-gcry_error_t gcry_ac_data_decrypt (gcry_ac_handle_t handle,
-                                   unsigned int flags,
-                                   gcry_ac_key_t key,
-                                   gcry_mpi_t *data_plain,
-                                   gcry_ac_data_t data_encrypted)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Sign the data contained in DATA with the key KEY and store the
-   resulting signature in the data set DATA_SIGNATURE.  */
-gcry_error_t gcry_ac_data_sign (gcry_ac_handle_t handle,
-                                gcry_ac_key_t key,
-                                gcry_mpi_t data,
-                                gcry_ac_data_t *data_signature)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Verify that the signature contained in the data set DATA_SIGNATURE
-   is indeed the result of signing the data contained in DATA with the
-   secret key belonging to the public key KEY.  */
-gcry_error_t gcry_ac_data_verify (gcry_ac_handle_t handle,
-                                  gcry_ac_key_t key,
-                                  gcry_mpi_t data,
-                                  gcry_ac_data_t data_signature)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Encrypts the plain text readable from IO_MESSAGE through HANDLE
-   with the public key KEY according to SCHEME, FLAGS and OPTS.  If
-   OPTS is not NULL, it has to be a pointer to a structure specific to
-   the chosen scheme (gcry_ac_es_*_t).  The encrypted message is
-   written to IO_CIPHER. */
-gcry_error_t gcry_ac_data_encrypt_scheme (gcry_ac_handle_t handle,
-                                         gcry_ac_scheme_t scheme,
-                                         unsigned int flags, void *opts,
-                                         gcry_ac_key_t key,
-                                         gcry_ac_io_t *io_message,
-                                         gcry_ac_io_t *io_cipher)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Decrypts the cipher text readable from IO_CIPHER through HANDLE
-   with the secret key KEY according to SCHEME, @var{flags} and OPTS.
-   If OPTS is not NULL, it has to be a pointer to a structure specific
-   to the chosen scheme (gcry_ac_es_*_t).  The decrypted message is
-   written to IO_MESSAGE.  */
-gcry_error_t gcry_ac_data_decrypt_scheme (gcry_ac_handle_t handle,
-                                         gcry_ac_scheme_t scheme,
-                                         unsigned int flags, void *opts,
-                                         gcry_ac_key_t key,
-                                         gcry_ac_io_t *io_cipher,
-                                         gcry_ac_io_t *io_message)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Signs the message readable from IO_MESSAGE through HANDLE with the
-   secret key KEY according to SCHEME, FLAGS and OPTS.  If OPTS is not
-   NULL, it has to be a pointer to a structure specific to the chosen
-   scheme (gcry_ac_ssa_*_t).  The signature is written to
-   IO_SIGNATURE.  */
-gcry_error_t gcry_ac_data_sign_scheme (gcry_ac_handle_t handle,
-                                      gcry_ac_scheme_t scheme,
-                                      unsigned int flags, void *opts,
-                                      gcry_ac_key_t key,
-                                      gcry_ac_io_t *io_message,
-                                      gcry_ac_io_t *io_signature)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Verifies through HANDLE that the signature readable from
-   IO_SIGNATURE is indeed the result of signing the message readable
-   from IO_MESSAGE with the secret key belonging to the public key KEY
-   according to SCHEME and OPTS.  If OPTS is not NULL, it has to be an
-   anonymous structure (gcry_ac_ssa_*_t) specific to the chosen
-   scheme.  */
-gcry_error_t gcry_ac_data_verify_scheme (gcry_ac_handle_t handle,
-                                        gcry_ac_scheme_t scheme,
-                                        unsigned int flags, void *opts,
-                                        gcry_ac_key_t key,
-                                        gcry_ac_io_t *io_message,
-                                        gcry_ac_io_t *io_signature)
-  /* */ _GCRY_ATTR_INTERNAL;
-
-/* Store the textual representation of the algorithm whose id is given
-   in ALGORITHM in NAME.  This function is deprecated; use
-   gcry_pk_algo_name. */
-gcry_error_t gcry_ac_id_to_name (gcry_ac_id_t algorithm,
-                                 const char **name)
-     /* */                      _GCRY_GCC_ATTR_DEPRECATED;
-/* Store the numeric ID of the algorithm whose textual representation
-   is contained in NAME in ALGORITHM.  This function is deprecated;
-   use gcry_pk_map_name. */
-gcry_error_t gcry_ac_name_to_id (const char *name,
-                                 gcry_ac_id_t *algorithm)
-     /* */                      _GCRY_GCC_ATTR_DEPRECATED;
-#endif /*GCRYPT_NO_DEPRECATED*/
+/* Return 0 if the algorithm A is available for use. */
+#define gcry_mac_test_algo(a) \
+            gcry_mac_algo_info( (a), GCRYCTL_TEST_ALGO, NULL, NULL )
 
 
 /******************************
@@ -1517,7 +1596,17 @@ enum gcry_kdf_algos
     GCRY_KDF_SALTED_S2K = 17,
     GCRY_KDF_ITERSALTED_S2K = 19,
     GCRY_KDF_PBKDF1 = 33,
-    GCRY_KDF_PBKDF2 = 34
+    GCRY_KDF_PBKDF2 = 34,
+    GCRY_KDF_SCRYPT = 48,
+    GCRY_KDF_ARGON2   = 64,
+    GCRY_KDF_BALLOON  = 65
+  };
+
+enum gcry_kdf_subalgo_argon2
+  {
+    GCRY_KDF_ARGON2D  = 0,
+    GCRY_KDF_ARGON2I  = 1,
+    GCRY_KDF_ARGON2ID = 2
   };
 
 /* Derive a key from a passphrase.  */
@@ -1527,8 +1616,33 @@ gpg_error_t gcry_kdf_derive (const void *passphrase, 
size_t passphraselen,
                              unsigned long iterations,
                              size_t keysize, void *keybuffer);
 
+/* Another API to derive a key from a passphrase.  */
+typedef struct gcry_kdf_handle *gcry_kdf_hd_t;
 
+typedef void (*gcry_kdf_job_fn_t) (void *priv);
+typedef int (*gcry_kdf_dispatch_job_fn_t) (void *jobs_context,
+                                           gcry_kdf_job_fn_t job_fn,
+                                           void *job_priv);
+typedef int (*gcry_kdf_wait_all_jobs_fn_t) (void *jobs_context);
 
+/* Exposed structure for KDF computation to decouple thread functionality.  */
+typedef struct gcry_kdf_thread_ops
+{
+  void *jobs_context;
+  gcry_kdf_dispatch_job_fn_t dispatch_job;
+  gcry_kdf_wait_all_jobs_fn_t wait_all_jobs;
+} gcry_kdf_thread_ops_t;
+
+gcry_error_t gcry_kdf_open (gcry_kdf_hd_t *hd, int algo, int subalgo,
+                            const unsigned long *param, unsigned int paramlen,
+                            const void *passphrase, size_t passphraselen,
+                            const void *salt, size_t saltlen,
+                            const void *key, size_t keylen,
+                            const void *ad, size_t adlen);
+gcry_error_t gcry_kdf_compute (gcry_kdf_hd_t h,
+                               const gcry_kdf_thread_ops_t *ops);
+gcry_error_t gcry_kdf_final (gcry_kdf_hd_t h, size_t resultlen, void *result);
+void gcry_kdf_close (gcry_kdf_hd_t h);
 
 /************************************
  *                                  *
@@ -1536,6 +1650,14 @@ gpg_error_t gcry_kdf_derive (const void *passphrase, 
size_t passphraselen,
  *                                  *
  ************************************/
 
+/* The type of the random number generator.  */
+enum gcry_rng_types
+  {
+    GCRY_RNG_TYPE_STANDARD   = 1, /* The default CSPRNG generator.  */
+    GCRY_RNG_TYPE_FIPS       = 2, /* The FIPS X9.31 AES generator.  */
+    GCRY_RNG_TYPE_SYSTEM     = 3  /* The system's native generator. */
+  };
+
 /* The possible values for the random quality.  The rule of thumb is
    to use STRONG for session keys and VERY_STRONG for key material.
    WEAK is usually an alias for STRONG and should not be used anymore
@@ -1571,8 +1693,7 @@ void *gcry_random_bytes (size_t nbytes, enum 
gcry_random_level level)
                          _GCRY_GCC_ATTR_MALLOC;
 
 /* Return NBYTES of allocated random using a random numbers of quality
-   LEVEL.  The random numbers are created returned in "secure"
-   memory. */
+   LEVEL.  The random is returned in "secure" memory.  */
 void *gcry_random_bytes_secure (size_t nbytes, enum gcry_random_level level)
                                 _GCRY_GCC_ATTR_MALLOC;
 
@@ -1634,7 +1755,7 @@ gcry_error_t gcry_prime_generate (gcry_mpi_t *prime,
 /* Find a generator for PRIME where the factorization of (prime-1) is
    in the NULL terminated array FACTORS. Return the generator as a
    newly allocated MPI in R_G.  If START_G is not NULL, use this as
-   teh start for the search. */
+   the start for the search. */
 gcry_error_t gcry_prime_group_generator (gcry_mpi_t *r_g,
                                          gcry_mpi_t prime,
                                          gcry_mpi_t *factors,
@@ -1645,7 +1766,7 @@ gcry_error_t gcry_prime_group_generator (gcry_mpi_t *r_g,
 void gcry_prime_release_factors (gcry_mpi_t *factors);
 
 
-/* Check wether the number X is prime.  */
+/* Check whether the number X is prime.  */
 gcry_error_t gcry_prime_check (gcry_mpi_t x, unsigned int flags);
 
 
@@ -1656,6 +1777,19 @@ gcry_error_t gcry_prime_check (gcry_mpi_t x, unsigned 
int flags);
  *                                  *
  ************************************/
 
+/* Release the context object CTX.  */
+void gcry_ctx_release (gcry_ctx_t ctx);
+
+/* Log data using Libgcrypt's own log interface.  */
+void gcry_log_debug (const char *fmt, ...) _GCRY_GCC_ATTR_PRINTF(1,2);
+void gcry_log_debughex (const char *text, const void *buffer, size_t length);
+void gcry_log_debugmpi (const char *text, gcry_mpi_t mpi);
+void gcry_log_debugpnt (const char *text,
+                        gcry_mpi_point_t point, gcry_ctx_t ctx);
+void gcry_log_debugsxp (const char *text, gcry_sexp_t sexp);
+
+char *gcry_get_config (int mode, const char *what);
+
 /* Log levels used by the internal logging facility. */
 enum gcry_log_levels
   {
@@ -1742,9 +1876,32 @@ int gcry_is_secure (const void *a) _GCRY_GCC_ATTR_PURE;
 /* Return true if Libgcrypt is in FIPS mode.  */
 #define gcry_fips_mode_active()  !!gcry_control (GCRYCTL_FIPS_MODE_P, 0)
 
-
-/* Include support for Libgcrypt modules.  */
-#include <gcrypt-module.h>
+/* Variant of gcry_pk_sign which takes as additional parameter a HD
+ * handle for hash and an optional context.  The hash algorithm used by the
+ * handle needs to be enabled and input needs to be supplied beforehand.
+ * DATA-TMPL specifies a template to compose an S-expression to be signed.
+ * A template should include '(hash %s %b)' or '(hash ALGONAME %b)'.
+ * For the former case, '%s' is substituted by the string of algorithm
+ * of gcry_md_get_algo (HD) and when gcry_md_read is called, ALGO=0 is
+ * used internally.  For the latter case, hash algorithm by ALGONAME
+ * is used when gcry_md_read is called internally.
+ * The hash handle must not yet been finalized; the function
+ * takes a copy of the state and does a finalize on the copy.  This
+ * function shall be used if a policy requires that hashing and signing
+ * is done by the same function.  CTX is currently not used and should
+ * be passed as NULL.  */
+gcry_error_t gcry_pk_hash_sign (gcry_sexp_t *result,
+                                const char *data_tmpl, gcry_sexp_t skey,
+                                gcry_md_hd_t hd, gcry_ctx_t ctx);
+
+/* Variant of gcry_pk_verify which takes as additional parameter a HD
+ * handle for hash and an optional context.  Similar to gcry_pk_hash_sign.  */
+gcry_error_t gcry_pk_hash_verify (gcry_sexp_t sigval,
+                                  const char *data_tmpl, gcry_sexp_t pkey,
+                                  gcry_md_hd_t hd, gcry_ctx_t ctx);
+
+gcry_error_t gcry_pk_random_override_new (gcry_ctx_t *r_ctx,
+                                          const unsigned char *p, size_t len);
 
 #if 0 /* (Keep Emacsens' auto-indent happy.) */
 {
diff --git a/grub-core/lib/libgcrypt/src/gen-note-integrity.sh 
b/grub-core/lib/libgcrypt/src/gen-note-integrity.sh
new file mode 100755
index 000000000..50071bf5d
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/gen-note-integrity.sh
@@ -0,0 +1,123 @@
+#! /bin/sh
+
+#
+# gen-note-integrity.sh - Build tool to generate hmac hash section
+#
+# Copyright (C) 2022  g10 Code GmbH
+#
+# This file is part of libgcrypt.
+#
+# libgcrypt is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public License
+# as published by the Free Software Foundation; either version 2.1 of
+# the License, or (at your option) any later version.
+#
+# libgcrypt is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this program; if not, see <https://www.gnu.org/licenses/>.
+#
+
+set -e
+
+#
+# Following variables should be defined to invoke this script
+#
+#   READELF
+#   AWK
+#   ECHO_N
+#
+
+######## Emit ElfN_Nhdr for note.fdo.integrity ########
+
+NOTE_NAME="FDO"
+
+# n_namesz = 4 including NUL
+printf '%b' '\004'
+printf '%b' '\000'
+printf '%b' '\000'
+printf '%b' '\000'
+
+# n_descsz = 32
+printf '%b' '\040'
+printf '%b' '\000'
+printf '%b' '\000'
+printf '%b' '\000'
+
+# n_type: NT_FDO_INTEGRITY=0xCAFE2A8E
+printf '%b' '\312'
+printf '%b' '\376'
+printf '%b' '\052'
+printf '%b' '\216'
+
+# the name
+echo $ECHO_N $NOTE_NAME
+printf '%b' '\000'
+
+# Here comes the alignment.  As the size of name is 4, it's none.
+# NO PADDING HERE.
+
+######## Rest is to generate hmac hash ########
+
+AWK_VERSION_OUTPUT=$($AWK 'BEGIN { print PROCINFO["version"] }')
+if test -n "$AWK_VERSION_OUTPUT"; then
+    # It's GNU awk, which supports PROCINFO.
+    AWK_OPTION=--non-decimal-data
+fi
+
+FILE=.libs/libgcrypt.so
+
+#
+# Fixup the ELF header to clean up section information
+#
+BYTE002=$(printf '%b' '\002')
+CLASS_BYTE=$(dd ibs=1 skip=4 count=1 if=$FILE status=none)
+if test "$CLASS_BYTE" = "$BYTE002"; then
+    CLASS=64
+    HEADER_SIZE=64
+else
+    CLASS=32
+    HEADER_SIZE=52
+fi
+
+if test $CLASS -eq 64; then
+    dd ibs=1         count=40 if=$FILE     status=none
+    dd ibs=1         count=8  if=/dev/zero status=none
+    dd ibs=1 skip=48 count=10 if=$FILE     status=none
+    dd ibs=1         count=6  if=/dev/zero status=none
+else
+    dd ibs=1         count=32 if=$FILE     status=none
+    dd ibs=1         count=4  if=/dev/zero status=none
+    dd ibs=1 skip=36 count=10 if=$FILE     status=none
+    dd ibs=1         count=6  if=/dev/zero status=none
+fi > header-fixed.bin
+
+#
+# Compute the end of segments, and emit the COUNT to read
+# (For each segment in program headers, calculate the offset
+#  and select the maximum)
+#
+# This require computation in hexadecimal, and GNU awk needs
+# --non-decimal-data option
+#
+COUNT=$($READELF --wide --program-headers $FILE | \
+         $AWK $AWK_OPTION \
+"BEGIN { max_offset=0 }
+/^\$/ { if (program_headers_start) program_headers_end=1 }
+(program_headers_start && !program_headers_end) { offset = \$2 + \$5 }
+(max_offset < offset) { max_offset = offset }
+/^  Type/ { program_headers_start=1 }
+END { print max_offset- $HEADER_SIZE }")
+
+#
+# Feed the header fixed and all segments to HMAC256
+# to generate hmac hash of the FILE
+#
+(cat header-fixed.bin; \
+ dd ibs=1 skip=$HEADER_SIZE count=$COUNT if=$FILE status=none) \
+ | ./hmac256 --stdkey --binary
+
+rm -f header-fixed.bin
diff --git a/grub-core/lib/libgcrypt/src/global.c 
b/grub-core/lib/libgcrypt/src/global.c
index 9b9d531ad..9a356a041 100644
--- a/grub-core/lib/libgcrypt/src/global.c
+++ b/grub-core/lib/libgcrypt/src/global.c
@@ -1,6 +1,8 @@
 /* global.c  - global control functions
  * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003
- *               2004, 2005, 2006, 2008, 2011  Free Software Foundation, Inc.
+ *               2004, 2005, 2006, 2008, 2011,
+ *               2012  Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2014, 2017 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -33,10 +35,11 @@
 #endif /*HAVE_SYSLOG*/
 
 #include "g10lib.h"
+#include "gcrypt-testapi.h"
 #include "cipher.h"
 #include "stdmem.h" /* our own memory allocator */
 #include "secmem.h" /* our own secmem allocator */
-#include "ath.h"
+
 
 
 
@@ -51,26 +54,15 @@ static unsigned int debug_flags;
 static int force_fips_mode;
 
 /* Controlled by global_init().  */
-static int any_init_done;
+int _gcry_global_any_init_done;
 
-/* A table to map hardware features to a string.  */
-static struct
-{
-  unsigned int flag;
-  const char *desc;
-} hwflist[] =
-  {
-    { HWF_PADLOCK_RNG, "padlock-rng" },
-    { HWF_PADLOCK_AES, "padlock-aes" },
-    { HWF_PADLOCK_SHA, "padlock-sha" },
-    { HWF_PADLOCK_MMUL,"padlock-mmul"},
-    { HWF_INTEL_AESNI, "intel-aesni" },
-    { 0, NULL}
-  };
-
-/* A bit vector with the hardware features which shall not be used.
-   This variable must be set prior to any initialization.  */
-static unsigned int disabled_hw_features;
+/*
+ * Functions called before and after blocking syscalls.
+ * Initialized by global_init and used via
+ * _gcry_pre_syscall and _gcry_post_syscall.
+ */
+static void (*pre_syscall_func)(void);
+static void (*post_syscall_func)(void);
 
 
 /* Memory management. */
@@ -84,6 +76,8 @@ static gcry_handler_no_mem_t outofcore_handler;
 static void *outofcore_handler_value;
 static int no_secure_memory;
 
+/* Prototypes.  */
+static gpg_err_code_t external_lock_test (int cmd);
 
 
 
@@ -97,41 +91,48 @@ global_init (void)
 {
   gcry_error_t err = 0;
 
-  if (any_init_done)
+  if (_gcry_global_any_init_done)
     return;
-  any_init_done = 1;
+  _gcry_global_any_init_done = 1;
 
-  /* Initialize our portable thread/mutex wrapper.  */
-  err = ath_init ();
-  if (err)
-    goto fail;
+  /* Tell the random module that we have seen an init call.  */
+  _gcry_set_preferred_rng_type (0);
+
+  /* Get the system call clamp functions.  */
+  if (!pre_syscall_func)
+    gpgrt_get_syscall_clamp (&pre_syscall_func, &post_syscall_func);
 
   /* See whether the system is in FIPS mode.  This needs to come as
-     early as possible put after the ATH has been initialized.  */
+     early as possible but after ATH has been initialized.  */
   _gcry_initialize_fips_mode (force_fips_mode);
 
   /* Before we do any other initialization we need to test available
      hardware features.  */
-  _gcry_detect_hw_features (disabled_hw_features);
+  _gcry_detect_hw_features ();
 
+  /* Initialize the modules - this is mainly allocating some memory and
+     creating mutexes.  */
   err = _gcry_cipher_init ();
   if (err)
     goto fail;
   err = _gcry_md_init ();
+  if (err)
+    goto fail;
+  err = _gcry_mac_init ();
   if (err)
     goto fail;
   err = _gcry_pk_init ();
   if (err)
     goto fail;
-#if 0
-  /* Hmmm, as of now ac_init does nothing. */
-  if ( !fips_mode () )
-    {
-      err = _gcry_ac_init ();
-      if (err)
-        goto fail;
-    }
-#endif
+  err = _gcry_primegen_init ();
+  if (err)
+    goto fail;
+  err = _gcry_secmem_module_init ();
+  if (err)
+    goto fail;
+  err = _gcry_mpi_init ();
+  if (err)
+    goto fail;
 
   return;
 
@@ -139,6 +140,25 @@ global_init (void)
   BUG ();
 }
 
+#ifdef ENABLE_HMAC_BINARY_CHECK
+# if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 7 )
+# define GCC_ATTR_CONSTRUCTOR  __attribute__ ((__constructor__))
+
+static void GCC_ATTR_CONSTRUCTOR
+_gcry_global_constructor (void)
+{
+  force_fips_mode = _gcry_fips_to_activate ();
+  if (force_fips_mode)
+    {
+      no_secure_memory = 1;
+      global_init ();
+      _gcry_fips_run_selftests (0);
+      _gcry_random_close_fds ();
+      no_secure_memory = 0;
+    }
+}
+# endif
+#endif /* ENABLE_HMAC_BINARY_CHECK */
 
 /* This function is called by the macro fips_is_operational and makes
    sure that the minimal initialization has been done.  This is far
@@ -160,7 +180,7 @@ global_init (void)
 int
 _gcry_global_is_operational (void)
 {
-  if (!any_init_done)
+  if (!_gcry_global_any_init_done)
     {
 #ifdef HAVE_SYSLOG
       syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: "
@@ -229,7 +249,7 @@ parse_version_string( const char *s, int *major, int 
*minor, int *micro )
    If a NULL is passed to this function, no check is done, but the
    string representation of the library is simply returned.  */
 const char *
-gcry_check_version( const char *req_version )
+_gcry_check_version (const char *req_version)
 {
     const char *ver = VERSION;
     int my_major, my_minor, my_micro;
@@ -260,7 +280,8 @@ gcry_check_version( const char *req_version )
     /* Compare version numbers.  */
     if ( my_major > rq_major
        || (my_major == rq_major && my_minor > rq_minor)
-       || (my_major == rq_major && my_minor == rq_minor                        
                                 && my_micro > rq_micro)
+       || (my_major == rq_major && my_minor == rq_minor
+           && my_micro > rq_micro)
        || (my_major == rq_major && my_minor == rq_minor
                                  && my_micro == rq_micro))
       {
@@ -272,54 +293,232 @@ gcry_check_version( const char *req_version )
 
 
 static void
-print_config ( int (*fnc)(FILE *fp, const char *format, ...), FILE *fp)
+print_config (const char *what, gpgrt_stream_t fp)
 {
-  unsigned int hwf;
   int i;
+  const char *s;
+
+  if (!what || !strcmp (what, "version"))
+    {
+      gpgrt_fprintf (fp, "version:%s:%x:%s:%x:\n",
+                     VERSION, GCRYPT_VERSION_NUMBER,
+                     GPGRT_VERSION, GPGRT_VERSION_NUMBER);
+    }
+  if (!what || !strcmp (what, "cc"))
+    {
+      gpgrt_fprintf (fp, "cc:%d:%s:\n",
+#if GPGRT_VERSION_NUMBER >= 0x011b00 /* 1.27 */
+                     GPGRT_GCC_VERSION
+#else
+                     _GPG_ERR_GCC_VERSION /* Due to a bug in gpg-error.h.  */
+#endif
+                     ,
+#ifdef __clang__
+                     "clang:" __VERSION__
+#elif __GNUC__
+                     "gcc:" __VERSION__
+#else
+                     ":"
+#endif
+                     );
+    }
 
-  fnc (fp, "version:%s:\n", VERSION);
-  fnc (fp, "ciphers:%s:\n", LIBGCRYPT_CIPHERS);
-  fnc (fp, "pubkeys:%s:\n", LIBGCRYPT_PUBKEY_CIPHERS);
-  fnc (fp, "digests:%s:\n", LIBGCRYPT_DIGESTS);
-  fnc (fp, "rnd-mod:"
+  if (!what || !strcmp (what, "ciphers"))
+    gpgrt_fprintf (fp, "ciphers:%s:\n", LIBGCRYPT_CIPHERS);
+  if (!what || !strcmp (what, "pubkeys"))
+    gpgrt_fprintf (fp, "pubkeys:%s:\n", LIBGCRYPT_PUBKEY_CIPHERS);
+  if (!what || !strcmp (what, "digests"))
+    gpgrt_fprintf (fp, "digests:%s:\n", LIBGCRYPT_DIGESTS);
+
+  if (!what || !strcmp (what, "rnd-mod"))
+    {
+      gpgrt_fprintf (fp, "rnd-mod:"
 #if USE_RNDEGD
-                "egd:"
+                     "egd:"
 #endif
-#if USE_RNDLINUX
-                "linux:"
+#if USE_RNDGETENTROPY
+                     "getentropy:"
+#endif
+#if USE_RNDOLDLINUX
+                     "oldlinux:"
 #endif
 #if USE_RNDUNIX
-                "unix:"
+                     "unix:"
 #endif
 #if USE_RNDW32
-                "w32:"
+                     "w32:"
+#endif
+                     "\n");
+    }
+
+  if (!what || !strcmp (what, "cpu-arch"))
+    {
+      gpgrt_fprintf (fp, "cpu-arch:"
+#if defined(HAVE_CPU_ARCH_X86)
+                     "x86"
+#elif defined(HAVE_CPU_ARCH_ALPHA)
+                     "alpha"
+#elif defined(HAVE_CPU_ARCH_SPARC)
+                     "sparc"
+#elif defined(HAVE_CPU_ARCH_MIPS)
+                     "mips"
+#elif defined(HAVE_CPU_ARCH_M68K)
+                     "m68k"
+#elif defined(HAVE_CPU_ARCH_PPC)
+                     "ppc"
+#elif defined(HAVE_CPU_ARCH_ARM)
+                     "arm"
 #endif
-       "\n");
-  fnc (fp, "mpi-asm:%s:\n", _gcry_mpi_get_hw_config ());
-  hwf = _gcry_get_hw_features ();
-  fnc (fp, "hwflist:");
-  for (i=0; hwflist[i].desc; i++)
-    if ( (hwf & hwflist[i].flag) )
-      fnc (fp, "%s:", hwflist[i].desc);
-  fnc (fp, "\n");
-  /* We use y/n instead of 1/0 for the simple reason that Emacsen's
-     compile error parser would accidently flag that line when printed
-     during "make check" as an error.  */
-  fnc (fp, "fips-mode:%c:%c:\n",
-       fips_mode ()? 'y':'n',
-       _gcry_enforced_fips_mode ()? 'y':'n' );
+                     ":\n");
+    }
+
+  if (!what || !strcmp (what, "mpi-asm"))
+    gpgrt_fprintf (fp, "mpi-asm:%s:\n", _gcry_mpi_get_hw_config ());
+
+  if (!what || !strcmp (what, "hwflist"))
+    {
+      unsigned int hwfeatures, afeature;
+
+      hwfeatures = _gcry_get_hw_features ();
+      gpgrt_fprintf (fp, "hwflist:");
+      for (i=0; (s = _gcry_enum_hw_features (i, &afeature)); i++)
+        if ((hwfeatures & afeature))
+          gpgrt_fprintf (fp, "%s:", s);
+      gpgrt_fprintf (fp, "\n");
+    }
+
+  if (!what || !strcmp (what, "fips-mode"))
+    {
+      /* We use y/n instead of 1/0 for the stupid reason that
+       * Emacsen's compile error parser would accidentally flag that
+       * line when printed during "make check" as an error.  The
+       * second field is obsolete and thus empty (used to be used for
+       * a so-called enforced-fips-mode).  The third field has an
+       * option static string describing the module versions; this is
+       * an optional configure option.  */
+      gpgrt_fprintf (fp, "fips-mode:%c::%s:\n",
+                     fips_mode ()? 'y':'n',
+#ifdef FIPS_MODULE_VERSION
+                     fips_mode () ? FIPS_MODULE_VERSION : ""
+#else
+                     ""
+#endif /* FIPS_MODULE_VERSION */
+                     );
+    }
+
+  if (!what || !strcmp (what, "rng-type"))
+    {
+      /* The currently used RNG type.  */
+      unsigned int jver;
+      int active;
+
+      i = _gcry_get_rng_type (0);
+      switch (i)
+        {
+        case GCRY_RNG_TYPE_STANDARD: s = "standard"; break;
+        case GCRY_RNG_TYPE_FIPS:     s = "fips"; break;
+        case GCRY_RNG_TYPE_SYSTEM:   s = "system"; break;
+        default: BUG ();
+        }
+      jver = _gcry_rndjent_get_version (&active);
+      gpgrt_fprintf (fp, "rng-type:%s:%d:%u:%d:\n", s, i, jver, active);
+    }
+
+  if (!what || !strcmp (what, "compliance"))
+    {
+      /* Right now we have no certification for 1.9 so we return an
+       * empty string.  As soon as this version has been approved for
+       * VS-Nfd we will put the string "de-vs" into the second
+       * field. If further specifications are required they are added
+       * as parameters to that field.  Other certifications will go
+       * into field 3 and so on.
+       *  field 1: keyword "compliance"
+       *  field 2: German VS-Nfd is marked with "de-vs"
+       *  field 3: reserved for FIPS.
+       */
+      gpgrt_fprintf (fp, "compliance:%s::\n", "");
+    }
+}
+
+
+/* With a MODE of 0 return a malloced string with configured features.
+ * In that case a WHAT of NULL returns everything in the same way
+ * GCRYCTL_PRINT_CONFIG would do.  With a specific WHAT string only
+ * the requested feature is returned (w/o the trailing LF.  On error
+ * NULL is returned.  */
+char *
+_gcry_get_config (int mode, const char *what)
+{
+  gpgrt_stream_t fp;
+  int save_errno;
+  void *data;
+  char *p;
+
+  if (mode)
+    {
+      gpg_err_set_errno (EINVAL);
+      return NULL;
+    }
+
+  fp = gpgrt_fopenmem (0, "w+b,samethread");
+  if (!fp)
+    return NULL;
+
+  print_config (what, fp);
+
+  if (!what)
+    {
+      /* Null-terminate bulk output. */
+      gpgrt_fwrite ("\0", 1, 1, fp);
+    }
+
+  if (gpgrt_ferror (fp))
+    {
+      save_errno = errno;
+      gpgrt_fclose (fp);
+      gpg_err_set_errno (save_errno);
+      return NULL;
+    }
+
+  gpgrt_rewind (fp);
+  if (gpgrt_fclose_snatch (fp, &data, NULL))
+    {
+      save_errno = errno;
+      gpgrt_fclose (fp);
+      gpg_err_set_errno (save_errno);
+      return NULL;
+    }
+
+  if (!data)
+    {
+      /* Nothing was printed (unknown value for WHAT).  This is okay,
+       * so clear ERRNO to indicate this. */
+      gpg_err_set_errno (0);
+      return NULL;
+    }
+
+  /* Strip trailing LF.  */
+  if (what && (p = strchr (data, '\n')))
+    *p = 0;
+
+  return data;
 }
 
 
 
 
+#if _GCRY_GCC_VERSION >= 40200
+# pragma GCC diagnostic push
+# pragma GCC diagnostic ignored "-Wswitch"
+#endif
+
 /* Command dispatcher function, acting as general control
    function.  */
-gcry_error_t
+gcry_err_code_t
 _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
 {
   static int init_finished = 0;
-  gcry_err_code_t err = 0;
+  gcry_err_code_t rc = 0;
 
   switch (cmd)
     {
@@ -328,6 +527,7 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
       break;
 
     case GCRYCTL_ENABLE_QUICK_RANDOM:
+      _gcry_set_preferred_rng_type (0);
       _gcry_enable_quick_random_gen ();
       break;
 
@@ -335,7 +535,7 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
       /* Return an error if the RNG is faked one (e.g. enabled by
          ENABLE_QUICK_RANDOM. */
       if (_gcry_random_is_faked ())
-        err = GPG_ERR_GENERAL;  /* Use as TRUE value.  */
+        rc = GPG_ERR_GENERAL;  /* Use as TRUE value.  */
       break;
 
     case GCRYCTL_DUMP_RANDOM_STATS:
@@ -347,7 +547,7 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
       break;
 
     case GCRYCTL_DUMP_SECMEM_STATS:
-      _gcry_secmem_dump_stats ();
+      _gcry_secmem_dump_stats (0);
       break;
 
     case GCRYCTL_DROP_PRIVS:
@@ -357,14 +557,16 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
 
     case GCRYCTL_DISABLE_SECMEM:
       global_init ();
-      no_secure_memory = 1;
+      /* When FIPS enabled, no effect at all.  */
+      if (!fips_mode ())
+        no_secure_memory = 1;
       break;
 
     case GCRYCTL_INIT_SECMEM:
       global_init ();
       _gcry_secmem_init (va_arg (arg_ptr, unsigned int));
       if ((_gcry_secmem_get_flags () & GCRY_SECMEM_FLAG_NOT_LOCKED))
-        err = GPG_ERR_GENERAL;
+        rc = GPG_ERR_GENERAL;
       break;
 
     case GCRYCTL_TERM_SECMEM:
@@ -373,35 +575,45 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
       break;
 
     case GCRYCTL_DISABLE_SECMEM_WARN:
+      _gcry_set_preferred_rng_type (0);
       _gcry_secmem_set_flags ((_gcry_secmem_get_flags ()
                               | GCRY_SECMEM_FLAG_NO_WARNING));
       break;
 
     case GCRYCTL_SUSPEND_SECMEM_WARN:
+      _gcry_set_preferred_rng_type (0);
       _gcry_secmem_set_flags ((_gcry_secmem_get_flags ()
                               | GCRY_SECMEM_FLAG_SUSPEND_WARNING));
       break;
 
     case GCRYCTL_RESUME_SECMEM_WARN:
+      _gcry_set_preferred_rng_type (0);
       _gcry_secmem_set_flags ((_gcry_secmem_get_flags ()
                               & ~GCRY_SECMEM_FLAG_SUSPEND_WARNING));
       break;
 
+    case GCRYCTL_AUTO_EXPAND_SECMEM:
+      _gcry_secmem_set_auto_expand (va_arg (arg_ptr, unsigned int));
+      break;
+
     case GCRYCTL_USE_SECURE_RNDPOOL:
       global_init ();
       _gcry_secure_random_alloc (); /* Put random number into secure memory. */
       break;
 
     case GCRYCTL_SET_RANDOM_SEED_FILE:
+      _gcry_set_preferred_rng_type (0);
       _gcry_set_random_seed_file (va_arg (arg_ptr, const char *));
       break;
 
     case GCRYCTL_UPDATE_RANDOM_SEED_FILE:
+      _gcry_set_preferred_rng_type (0);
       if ( fips_is_operational () )
         _gcry_update_random_seed_file ();
       break;
 
     case GCRYCTL_SET_VERBOSITY:
+      _gcry_set_preferred_rng_type (0);
       _gcry_set_log_verbosity (va_arg (arg_ptr, int));
       break;
 
@@ -419,13 +631,13 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
       break;
 
     case GCRYCTL_ANY_INITIALIZATION_P:
-      if (any_init_done)
-       err = GPG_ERR_GENERAL;
+      if (_gcry_global_any_init_done)
+       rc = GPG_ERR_GENERAL;
       break;
 
     case GCRYCTL_INITIALIZATION_FINISHED_P:
       if (init_finished)
-       err = GPG_ERR_GENERAL; /* Yes.  */
+       rc = GPG_ERR_GENERAL; /* Yes.  */
       break;
 
     case GCRYCTL_INITIALIZATION_FINISHED:
@@ -447,12 +659,14 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
       break;
 
     case GCRYCTL_SET_THREAD_CBS:
-      err = ath_install (va_arg (arg_ptr, void *), any_init_done);
-      if (! err)
-       global_init ();
+      /* This is now a dummy call.  We used to install our own thread
+         library here. */
+      _gcry_set_preferred_rng_type (0);
+      global_init ();
       break;
 
     case GCRYCTL_FAST_POLL:
+      _gcry_set_preferred_rng_type (0);
       /* We need to do make sure that the random pool is really
          initialized so that the poll function is not a NOP. */
       _gcry_random_initialize (1);
@@ -463,46 +677,57 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
 
     case GCRYCTL_SET_RNDEGD_SOCKET:
 #if USE_RNDEGD
-      err = _gcry_rndegd_set_socket_name (va_arg (arg_ptr, const char *));
+      _gcry_set_preferred_rng_type (0);
+      rc = _gcry_rndegd_set_socket_name (va_arg (arg_ptr, const char *));
 #else
-      err = gpg_error (GPG_ERR_NOT_SUPPORTED);
+      rc = GPG_ERR_NOT_SUPPORTED;
 #endif
       break;
 
     case GCRYCTL_SET_RANDOM_DAEMON_SOCKET:
-      _gcry_set_random_daemon_socket (va_arg (arg_ptr, const char *));
+      rc = GPG_ERR_NOT_SUPPORTED;
       break;
 
     case GCRYCTL_USE_RANDOM_DAEMON:
-      /* We need to do make sure that the random pool is really
-         initialized so that the poll function is not a NOP. */
-      _gcry_random_initialize (1);
-      _gcry_use_random_daemon (!! va_arg (arg_ptr, int));
+      rc = GPG_ERR_NOT_SUPPORTED;
+      break;
+
+    case GCRYCTL_CLOSE_RANDOM_DEVICE:
+      _gcry_random_close_fds ();
       break;
 
       /* This command dumps information pertaining to the
          configuration of libgcrypt to the given stream.  It may be
          used before the initialization has been finished but not
-         before a gcry_version_check. */
+         before a gcry_version_check.  See also gcry_get_config.  */
     case GCRYCTL_PRINT_CONFIG:
       {
         FILE *fp = va_arg (arg_ptr, FILE *);
-        print_config (fp?fprintf:_gcry_log_info_with_dummy_fp, fp);
+        char *tmpstr;
+        _gcry_set_preferred_rng_type (0);
+        tmpstr = _gcry_get_config (0, NULL);
+        if (tmpstr)
+          {
+            if (fp)
+              fputs (tmpstr, fp);
+            else
+              log_info ("%s", tmpstr);
+            xfree (tmpstr);
+          }
       }
       break;
 
     case GCRYCTL_OPERATIONAL_P:
       /* Returns true if the library is in an operational state.  This
          is always true for non-fips mode.  */
+      _gcry_set_preferred_rng_type (0);
       if (_gcry_fips_test_operational ())
-        err = GPG_ERR_GENERAL; /* Used as TRUE value */
+        rc = GPG_ERR_GENERAL; /* Used as TRUE value */
       break;
 
     case GCRYCTL_FIPS_MODE_P:
-      if (fips_mode ()
-          && !_gcry_is_fips_mode_inactive ()
-          && !no_secure_memory)
-       err = GPG_ERR_GENERAL; /* Used as TRUE value */
+      if (fips_mode ())
+       rc = GPG_ERR_GENERAL; /* Used as TRUE value */
       break;
 
     case GCRYCTL_FORCE_FIPS_MODE:
@@ -510,9 +735,10 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
          the library has already been initialized into fips mode, a
          selftest is triggered.  It is not possible to put the libraty
          into fips mode after having passed the initialization. */
-      if (!any_init_done)
+      _gcry_set_preferred_rng_type (0);
+      if (!_gcry_global_any_init_done)
         {
-          /* Not yet intialized at all.  Set a flag so that we are put
+          /* Not yet initialized at all.  Set a flag so that we are put
              into fips mode during initialization.  */
           force_fips_mode = 1;
         }
@@ -524,165 +750,183 @@ _gcry_vcontrol (enum gcry_ctl_cmds cmd, va_list arg_ptr)
           if (_gcry_fips_test_error_or_operational ())
             _gcry_fips_run_selftests (1);
           if (_gcry_fips_is_operational ())
-            err = GPG_ERR_GENERAL; /* Used as TRUE value */
+            rc = GPG_ERR_GENERAL; /* Used as TRUE value */
       }
       break;
 
+    case GCRYCTL_NO_FIPS_MODE:
+      /* Performing this command puts the library into non-fips mode,
+         even if system has fips setting.  It is not possible to put
+         the libraty into non-fips mode after having passed the
+         initialization. */
+      _gcry_set_preferred_rng_type (0);
+      if (!_gcry_global_any_init_done)
+        {
+          /* Not yet initialized at all.  Set a flag so that we are put
+             into non-fips mode during initialization.  */
+          force_fips_mode = 0;
+        }
+      else if (!init_finished)
+        {
+          /* Already initialized.  */
+          _gcry_no_fips_mode_required = 1;
+        }
+      else
+       rc = GPG_ERR_GENERAL;
+      break;
+
     case GCRYCTL_SELFTEST:
       /* Run a selftest.  This works in fips mode as well as in
          standard mode.  In contrast to the power-up tests, we use an
          extended version of the selftests. Returns 0 on success or an
          error code. */
       global_init ();
-      err = _gcry_fips_run_selftests (1);
+      rc = _gcry_fips_run_selftests (1);
       break;
 
-#if _GCRY_GCC_VERSION >= 40600
-# pragma GCC diagnostic push
-# pragma GCC diagnostic ignored "-Wswitch"
-#endif
-    case 58:  /* Init external random test.  */
-      {
-        void **rctx        = va_arg (arg_ptr, void **);
-        unsigned int flags = va_arg (arg_ptr, unsigned int);
-        const void *key    = va_arg (arg_ptr, const void *);
-        size_t keylen      = va_arg (arg_ptr, size_t);
-        const void *seed   = va_arg (arg_ptr, const void *);
-        size_t seedlen     = va_arg (arg_ptr, size_t);
-        const void *dt     = va_arg (arg_ptr, const void *);
-        size_t dtlen       = va_arg (arg_ptr, size_t);
-        if (!fips_is_operational ())
-          err = fips_not_operational ();
-        else
-          err = _gcry_random_init_external_test (rctx, flags, key, keylen,
-                                                 seed, seedlen, dt, dtlen);
-      }
+    case GCRYCTL_FIPS_SERVICE_INDICATOR_CIPHER:
+      /* Get FIPS Service Indicator for a given symmetric algorithm and
+       * optional mode. Returns GPG_ERR_NO_ERROR if algorithm is allowed or
+       * GPG_ERR_NOT_SUPPORTED otherwise */
+      rc = _gcry_fips_indicator_cipher (arg_ptr);
+      break;
+
+    case GCRYCTL_FIPS_SERVICE_INDICATOR_MAC:
+      /* Get FIPS Service Indicator for a given message authentication code.
+       * Returns GPG_ERR_NO_ERROR if algorithm is allowed or
+       * GPG_ERR_NOT_SUPPORTED otherwise */
+      rc = _gcry_fips_indicator_mac (arg_ptr);
+      break;
+
+    case GCRYCTL_FIPS_SERVICE_INDICATOR_MD:
+      /* Get FIPS Service Indicator for a given message digest. Returns
+       * GPG_ERR_NO_ERROR if algorithm is allowed or GPG_ERR_NOT_SUPPORTED
+       * otherwise */
+      rc = _gcry_fips_indicator_md (arg_ptr);
+      break;
+
+    case GCRYCTL_FIPS_SERVICE_INDICATOR_KDF:
+      /* Get FIPS Service Indicator for a given KDF. Returns GPG_ERR_NO_ERROR
+       * if algorithm is allowed or GPG_ERR_NOT_SUPPORTED otherwise */
+      rc = _gcry_fips_indicator_kdf (arg_ptr);
       break;
-    case 59:  /* Run external random test.  */
+
+    case GCRYCTL_FIPS_SERVICE_INDICATOR_FUNCTION:
+      /* Get FIPS Service Indicator for a given function from the API.
+       * Returns GPG_ERR_NO_ERROR if the function is allowed or
+       * GPG_ERR_NOT_SUPPORTED otherwise */
+      rc = _gcry_fips_indicator_function (arg_ptr);
+      break;
+
+    case GCRYCTL_FIPS_SERVICE_INDICATOR_PK_FLAGS:
+      /* Get FIPS Service Indicator for a public key operation flags.
+       * Returns GPG_ERR_NO_ERROR if the flag is allowed to be used or
+       * GPG_ERR_NOT_SUPPORTED otherwise */
+      rc = _gcry_fips_indicator_pk_flags (arg_ptr);
+      break;
+
+    case PRIV_CTL_INIT_EXTRNG_TEST:  /* Init external random test.  */
+      rc = GPG_ERR_NOT_SUPPORTED;
+      break;
+    case PRIV_CTL_RUN_EXTRNG_TEST:  /* Run external DRBG test.  */
       {
-        void *ctx     = va_arg (arg_ptr, void *);
-        void *buffer  = va_arg (arg_ptr, void *);
-        size_t buflen = va_arg (arg_ptr, size_t);
-        if (!fips_is_operational ())
-          err = fips_not_operational ();
+        struct gcry_drbg_test_vector *test =
+         va_arg (arg_ptr, struct gcry_drbg_test_vector *);
+        unsigned char *buf = va_arg (arg_ptr, unsigned char *);
+
+        if (buf)
+          rc = _gcry_rngdrbg_cavs_test (test, buf);
         else
-          err = _gcry_random_run_external_test (ctx, buffer, buflen);
+          rc = _gcry_rngdrbg_healthcheck_one (test);
       }
       break;
-    case 60:  /* Deinit external random test.  */
-      {
-        void *ctx = va_arg (arg_ptr, void *);
-        _gcry_random_deinit_external_test (ctx);
-      }
+    case PRIV_CTL_DEINIT_EXTRNG_TEST:  /* Deinit external random test.  */
+      rc = GPG_ERR_NOT_SUPPORTED;
       break;
-    case 61:  /* RFU */
+    case PRIV_CTL_EXTERNAL_LOCK_TEST:  /* Run external lock test */
+      rc = external_lock_test (va_arg (arg_ptr, int));
       break;
-    case 62:  /* RFU */
+    case PRIV_CTL_DUMP_SECMEM_STATS:
+      _gcry_secmem_dump_stats (1);
       break;
-#if _GCRY_GCC_VERSION >= 40600
-# pragma GCC diagnostic pop
-#endif
 
     case GCRYCTL_DISABLE_HWF:
       {
         const char *name = va_arg (arg_ptr, const char *);
-        int i;
-
-        for (i=0; hwflist[i].desc; i++)
-          if (!strcmp (hwflist[i].desc, name))
-            {
-              disabled_hw_features |= hwflist[i].flag;
-              break;
-            }
-        if (!hwflist[i].desc)
-          err = GPG_ERR_INV_NAME;
+        rc = _gcry_disable_hw_feature (name);
       }
       break;
 
     case GCRYCTL_SET_ENFORCED_FIPS_FLAG:
-      if (!any_init_done)
-        {
-          /* Not yet intialized at all.  Set the enforced fips mode flag */
-          _gcry_set_enforced_fips_mode ();
-        }
-      else
-        err = GPG_ERR_GENERAL;
+      /* Obsolete - ignore  */
       break;
 
-    default:
-      err = GPG_ERR_INV_OP;
-    }
-
-  return gcry_error (err);
-}
-
-
-/* Command dispatcher function, acting as general control
-   function.  */
-gcry_error_t
-gcry_control (enum gcry_ctl_cmds cmd, ...)
-{
-  gcry_error_t err;
-  va_list arg_ptr;
-
-  va_start (arg_ptr, cmd);
-  err = _gcry_vcontrol (cmd, arg_ptr);
-  va_end(arg_ptr);
-  return err;
-}
-
+    case GCRYCTL_SET_PREFERRED_RNG_TYPE:
+      /* This may be called before gcry_check_version.  */
+      {
+        int i = va_arg (arg_ptr, int);
+        /* Note that we may not pass 0 to _gcry_set_preferred_rng_type.  */
+        if (i > 0)
+          _gcry_set_preferred_rng_type (i);
+      }
+      break;
 
+    case GCRYCTL_GET_CURRENT_RNG_TYPE:
+      {
+        int *ip = va_arg (arg_ptr, int*);
+        if (ip)
+          *ip = _gcry_get_rng_type (!_gcry_global_any_init_done);
+      }
+      break;
 
-/* Return a pointer to a string containing a description of the error
-   code in the error value ERR.  */
-const char *
-gcry_strerror (gcry_error_t err)
-{
-  return gpg_strerror (err);
-}
+    case GCRYCTL_DISABLE_LOCKED_SECMEM:
+      _gcry_set_preferred_rng_type (0);
+      _gcry_secmem_set_flags ((_gcry_secmem_get_flags ()
+                              | GCRY_SECMEM_FLAG_NO_MLOCK));
+      break;
 
-/* Return a pointer to a string containing a description of the error
-   source in the error value ERR.  */
-const char *
-gcry_strsource (gcry_error_t err)
-{
-  return gpg_strsource (err);
-}
+    case GCRYCTL_DISABLE_PRIV_DROP:
+      _gcry_set_preferred_rng_type (0);
+      _gcry_secmem_set_flags ((_gcry_secmem_get_flags ()
+                              | GCRY_SECMEM_FLAG_NO_PRIV_DROP));
+      break;
 
-/* Retrieve the error code for the system error ERR.  This returns
-   GPG_ERR_UNKNOWN_ERRNO if the system error is not mapped (report
-   this).  */
-gcry_err_code_t
-gcry_err_code_from_errno (int err)
-{
-  return gpg_err_code_from_errno (err);
-}
+    case GCRYCTL_INACTIVATE_FIPS_FLAG:
+    case GCRYCTL_REACTIVATE_FIPS_FLAG:
+      rc = GPG_ERR_NOT_IMPLEMENTED;
+      break;
 
+    case GCRYCTL_DRBG_REINIT:
+      {
+        const char *flagstr = va_arg (arg_ptr, const char *);
+        gcry_buffer_t *pers = va_arg (arg_ptr, gcry_buffer_t *);
+        int npers = va_arg (arg_ptr, int);
+        if (va_arg (arg_ptr, void *) || npers < 0)
+          rc = GPG_ERR_INV_ARG;
+        else if (_gcry_get_rng_type (!_gcry_global_any_init_done)
+                 != GCRY_RNG_TYPE_FIPS)
+          rc = GPG_ERR_NOT_SUPPORTED;
+        else
+          rc = _gcry_rngdrbg_reinit (flagstr, pers, npers);
+      }
+      break;
 
-/* Retrieve the system error for the error code CODE.  This returns 0
-   if CODE is not a system error code.  */
-int
-gcry_err_code_to_errno (gcry_err_code_t code)
-{
-  return gpg_err_code_from_errno (code);
-}
+    case GCRYCTL_REINIT_SYSCALL_CLAMP:
+      if (!pre_syscall_func)
+        gpgrt_get_syscall_clamp (&pre_syscall_func, &post_syscall_func);
+      break;
 
+    default:
+      _gcry_set_preferred_rng_type (0);
+      rc = GPG_ERR_INV_OP;
+    }
 
-/* Return an error value with the error source SOURCE and the system
-   error ERR.  */
-gcry_error_t
-gcry_err_make_from_errno (gpg_err_source_t source, int err)
-{
-  return gpg_err_make_from_errno (source, err);
+  return rc;
 }
 
-
-/* Return an error value with the system error ERR.  */
-gcry_err_code_t
-gcry_error_from_errno (int err)
-{
-  return gcry_error (gpg_err_code_from_errno (err));
-}
+#if _GCRY_GCC_VERSION >= 40200
+# pragma GCC diagnostic pop
+#endif
 
 
 /* Set custom allocation handlers.  This is in general not useful
@@ -690,20 +934,21 @@ gcry_error_from_errno (int err)
  * provide proper allocation handlers which zeroize memory if needed.
  * NOTE: All 5 functions should be set.  */
 void
-gcry_set_allocation_handler (gcry_handler_alloc_t new_alloc_func,
-                            gcry_handler_alloc_t new_alloc_secure_func,
-                            gcry_handler_secure_check_t new_is_secure_func,
-                            gcry_handler_realloc_t new_realloc_func,
-                            gcry_handler_free_t new_free_func)
+_gcry_set_allocation_handler (gcry_handler_alloc_t new_alloc_func,
+                              gcry_handler_alloc_t new_alloc_secure_func,
+                              gcry_handler_secure_check_t new_is_secure_func,
+                              gcry_handler_realloc_t new_realloc_func,
+                              gcry_handler_free_t new_free_func)
 {
   global_init ();
 
   if (fips_mode ())
     {
-      /* We do not want to enforce the fips mode, but merely set a
-         flag so that the application may check whether it is still in
-         fips mode.  */
-      _gcry_inactivate_fips_mode ("custom allocation handler");
+      /* In FIPS mode, we can not use custom allocation handlers because
+       * fips requires explicit zeroization and we can not guarantee that
+       * with custom free functions (and we can not do it transparently as
+       * in free we do not know the zize). */
+      return;
     }
 
   alloc_func = new_alloc_func;
@@ -730,35 +975,17 @@ gcry_set_allocation_handler (gcry_handler_alloc_t 
new_alloc_func,
  *     bit 0 set = secure memory has been requested.
  */
 void
-gcry_set_outofcore_handler( int (*f)( void*, size_t, unsigned int ),
-                                                       void *value )
+_gcry_set_outofcore_handler (int (*f)(void*, size_t, unsigned int), void 
*value)
 {
   global_init ();
 
-  if (fips_mode () )
-    {
-      log_info ("out of core handler ignored in FIPS mode\n");
-      return;
-    }
+  if (fips_mode ())
+    return;
 
   outofcore_handler = f;
   outofcore_handler_value = value;
 }
 
-/* Return the no_secure_memory flag.  */
-static int
-get_no_secure_memory (void)
-{
-  if (!no_secure_memory)
-    return 0;
-  if (_gcry_enforced_fips_mode ())
-    {
-      no_secure_memory = 0;
-      return 0;
-    }
-  return no_secure_memory;
-}
-
 
 static gcry_err_code_t
 do_malloc (size_t n, unsigned int flags, void **mem)
@@ -766,12 +993,12 @@ do_malloc (size_t n, unsigned int flags, void **mem)
   gcry_err_code_t err = 0;
   void *m;
 
-  if ((flags & GCRY_ALLOC_FLAG_SECURE) && !get_no_secure_memory ())
+  if ((flags & GCRY_ALLOC_FLAG_SECURE) && !no_secure_memory)
     {
       if (alloc_secure_func)
        m = (*alloc_secure_func) (n);
       else
-       m = _gcry_private_malloc_secure (n);
+       m = _gcry_private_malloc_secure (n, !!(flags & GCRY_ALLOC_FLAG_XHINT));
     }
   else
     {
@@ -796,7 +1023,7 @@ do_malloc (size_t n, unsigned int flags, void **mem)
 }
 
 void *
-gcry_malloc (size_t n)
+_gcry_malloc (size_t n)
 {
   void *mem = NULL;
 
@@ -805,20 +1032,27 @@ gcry_malloc (size_t n)
   return mem;
 }
 
-void *
-gcry_malloc_secure (size_t n)
+static void *
+_gcry_malloc_secure_core (size_t n, int xhint)
 {
   void *mem = NULL;
 
-  do_malloc (n, GCRY_ALLOC_FLAG_SECURE, &mem);
+  do_malloc (n, (GCRY_ALLOC_FLAG_SECURE | (xhint? GCRY_ALLOC_FLAG_XHINT:0)),
+             &mem);
 
   return mem;
 }
 
+void *
+_gcry_malloc_secure (size_t n)
+{
+  return _gcry_malloc_secure_core (n, 0);
+}
+
 int
-gcry_is_secure (const void *a)
+_gcry_is_secure (const void *a)
 {
-  if (get_no_secure_memory ())
+  if (no_secure_memory)
     return 0;
   if (is_secure_func)
     return is_secure_func (a) ;
@@ -839,32 +1073,40 @@ _gcry_check_heap( const void *a )
 #endif
 }
 
-void *
-gcry_realloc (void *a, size_t n)
+static void *
+_gcry_realloc_core (void *a, size_t n, int xhint)
 {
   void *p;
 
   /* To avoid problems with non-standard realloc implementations and
      our own secmem_realloc, we divert to malloc and free here.  */
   if (!a)
-    return gcry_malloc (n);
+    return _gcry_malloc (n);
   if (!n)
     {
-      gcry_free (a);
+      xfree (a);
       return NULL;
     }
 
   if (realloc_func)
     p = realloc_func (a, n);
   else
-    p =  _gcry_private_realloc (a, n);
+    p =  _gcry_private_realloc (a, n, xhint);
   if (!p && !errno)
     gpg_err_set_errno (ENOMEM);
   return p;
 }
 
+
+void *
+_gcry_realloc (void *a, size_t n)
+{
+  return _gcry_realloc_core (a, n, 0);
+}
+
+
 void
-gcry_free (void *p)
+_gcry_free (void *p)
 {
   int save_errno;
 
@@ -872,7 +1114,7 @@ gcry_free (void *p)
     return;
 
   /* In case ERRNO is set we better save it so that the free machinery
-     may not accidently change ERRNO.  We restore it only if it was
+     may not accidentally change ERRNO.  We restore it only if it was
      already set to comply with the usual C semantic for ERRNO.  */
   save_errno = errno;
   if (free_func)
@@ -880,12 +1122,12 @@ gcry_free (void *p)
   else
     _gcry_private_free (p);
 
-  if (save_errno)
+  if (save_errno && save_errno != errno)
     gpg_err_set_errno (save_errno);
 }
 
 void *
-gcry_calloc (size_t n, size_t m)
+_gcry_calloc (size_t n, size_t m)
 {
   size_t bytes;
   void *p;
@@ -898,14 +1140,14 @@ gcry_calloc (size_t n, size_t m)
       return NULL;
     }
 
-  p = gcry_malloc (bytes);
+  p = _gcry_malloc (bytes);
   if (p)
     memset (p, 0, bytes);
   return p;
 }
 
 void *
-gcry_calloc_secure (size_t n, size_t m)
+_gcry_calloc_secure (size_t n, size_t m)
 {
   size_t bytes;
   void *p;
@@ -918,29 +1160,25 @@ gcry_calloc_secure (size_t n, size_t m)
       return NULL;
     }
 
-  p = gcry_malloc_secure (bytes);
+  p = _gcry_malloc_secure (bytes);
   if (p)
     memset (p, 0, bytes);
   return p;
 }
 
 
-/* Create and return a copy of the null-terminated string STRING.  If
-   it is contained in secure memory, the copy will be contained in
-   secure memory as well.  In an out-of-memory condition, NULL is
-   returned.  */
-char *
-gcry_strdup (const char *string)
+static char *
+_gcry_strdup_core (const char *string, int xhint)
 {
   char *string_cp = NULL;
   size_t string_n = 0;
 
   string_n = strlen (string);
 
-  if (gcry_is_secure (string))
-    string_cp = gcry_malloc_secure (string_n + 1);
+  if (_gcry_is_secure (string))
+    string_cp = _gcry_malloc_secure_core (string_n + 1, xhint);
   else
-    string_cp = gcry_malloc (string_n + 1);
+    string_cp = _gcry_malloc (string_n + 1);
 
   if (string_cp)
     strcpy (string_cp, string);
@@ -948,13 +1186,22 @@ gcry_strdup (const char *string)
   return string_cp;
 }
 
+/* Create and return a copy of the null-terminated string STRING.  If
+ * it is contained in secure memory, the copy will be contained in
+ * secure memory as well.  In an out-of-memory condition, NULL is
+ * returned.  */
+char *
+_gcry_strdup (const char *string)
+{
+  return _gcry_strdup_core (string, 0);
+}
 
 void *
-gcry_xmalloc( size_t n )
+_gcry_xmalloc( size_t n )
 {
   void *p;
 
-  while ( !(p = gcry_malloc( n )) )
+  while ( !(p = _gcry_malloc( n )) )
     {
       if ( fips_mode ()
            || !outofcore_handler
@@ -967,16 +1214,16 @@ gcry_xmalloc( size_t n )
 }
 
 void *
-gcry_xrealloc( void *a, size_t n )
+_gcry_xrealloc( void *a, size_t n )
 {
   void *p;
 
-  while ( !(p = gcry_realloc( a, n )) )
+  while (!(p = _gcry_realloc_core (a, n, 1)))
     {
       if ( fips_mode ()
            || !outofcore_handler
            || !outofcore_handler (outofcore_handler_value, n,
-                                   gcry_is_secure(a)? 3:2 ) )
+                                  _gcry_is_secure(a)? 3:2))
         {
           _gcry_fatal_error (gpg_err_code_from_errno (errno), NULL );
        }
@@ -985,11 +1232,11 @@ gcry_xrealloc( void *a, size_t n )
 }
 
 void *
-gcry_xmalloc_secure( size_t n )
+_gcry_xmalloc_secure( size_t n )
 {
   void *p;
 
-  while ( !(p = gcry_malloc_secure( n )) )
+  while (!(p = _gcry_malloc_secure_core (n, 1)))
     {
       if ( fips_mode ()
            || !outofcore_handler
@@ -1004,7 +1251,7 @@ gcry_xmalloc_secure( size_t n )
 
 
 void *
-gcry_xcalloc( size_t n, size_t m )
+_gcry_xcalloc( size_t n, size_t m )
 {
   size_t nbytes;
   void *p;
@@ -1016,13 +1263,13 @@ gcry_xcalloc( size_t n, size_t m )
       _gcry_fatal_error(gpg_err_code_from_errno (errno), NULL );
     }
 
-  p = gcry_xmalloc ( nbytes );
+  p = _gcry_xmalloc ( nbytes );
   memset ( p, 0, nbytes );
   return p;
 }
 
 void *
-gcry_xcalloc_secure( size_t n, size_t m )
+_gcry_xcalloc_secure( size_t n, size_t m )
 {
   size_t nbytes;
   void *p;
@@ -1034,20 +1281,20 @@ gcry_xcalloc_secure( size_t n, size_t m )
       _gcry_fatal_error(gpg_err_code_from_errno (errno), NULL );
     }
 
-  p = gcry_xmalloc_secure ( nbytes );
+  p = _gcry_xmalloc_secure ( nbytes );
   memset ( p, 0, nbytes );
   return p;
 }
 
 char *
-gcry_xstrdup (const char *string)
+_gcry_xstrdup (const char *string)
 {
   char *p;
 
-  while ( !(p = gcry_strdup (string)) )
+  while ( !(p = _gcry_strdup_core (string, 1)) )
     {
       size_t n = strlen (string);
-      int is_sec = !!gcry_is_secure (string);
+      int is_sec = !!_gcry_is_secure (string);
 
       if (fips_mode ()
           || !outofcore_handler
@@ -1062,6 +1309,24 @@ gcry_xstrdup (const char *string)
 }
 
 
+/* Used before blocking system calls.  */
+void
+_gcry_pre_syscall (void)
+{
+  if (pre_syscall_func)
+    pre_syscall_func ();
+}
+
+
+/* Used after blocking system calls.  */
+void
+_gcry_post_syscall (void)
+{
+  if (post_syscall_func)
+    post_syscall_func ();
+}
+
+
 int
 _gcry_get_debug_flag (unsigned int mask)
 {
@@ -1109,8 +1374,8 @@ _gcry_get_debug_flag (unsigned int mask)
             Only used in debugging mode.
 */
 void
-gcry_set_progress_handler (void (*cb)(void *,const char*,int, int, int),
-                           void *cb_data)
+_gcry_set_progress_handler (void (*cb)(void *,const char*,int, int, int),
+                            void *cb_data)
 {
 #if USE_DSA
   _gcry_register_pk_dsa_progress (cb, cb_data);
@@ -1121,3 +1386,48 @@ gcry_set_progress_handler (void (*cb)(void *,const 
char*,int, int, int),
   _gcry_register_primegen_progress (cb, cb_data);
   _gcry_register_random_progress (cb, cb_data);
 }
+
+
+
+/* This is a helper for the regression test suite to test Libgcrypt's locks.
+   It works using a one test lock with CMD controlling what to do:
+
+     30111 - Allocate and init lock
+     30112 - Take lock
+     30113 - Release lock
+     30114 - Destroy lock.
+
+   This function is used by tests/t-lock.c - it is not part of the
+   public API!
+ */
+static gpg_err_code_t
+external_lock_test (int cmd)
+{
+  GPGRT_LOCK_DEFINE (testlock);
+  gpg_err_code_t rc = 0;
+
+  switch (cmd)
+    {
+    case 30111:  /* Init Lock.  */
+      rc = gpgrt_lock_init (&testlock);
+      break;
+
+    case 30112:  /* Take Lock.  */
+      rc = gpgrt_lock_lock (&testlock);
+      break;
+
+    case 30113:  /* Release Lock.  */
+      rc = gpgrt_lock_unlock (&testlock);
+      break;
+
+    case 30114:  /* Destroy Lock.  */
+      rc = gpgrt_lock_destroy (&testlock);
+      break;
+
+    default:
+      rc = GPG_ERR_INV_OP;
+      break;
+    }
+
+  return rc;
+}
diff --git a/grub-core/lib/libgcrypt/src/hmac256.c 
b/grub-core/lib/libgcrypt/src/hmac256.c
index f3bc09244..899e6d158 100644
--- a/grub-core/lib/libgcrypt/src/hmac256.c
+++ b/grub-core/lib/libgcrypt/src/hmac256.c
@@ -24,8 +24,7 @@
     internal consistency checks.  It should not be used for sensitive
     data because no mechanisms to clear the stack etc are used.
 
-    This module may be used standalone and requires only a few
-    standard definitions to be provided in a config.h file.
+    This module may be used standalone.
 
     Types:
 
@@ -36,7 +35,7 @@
      WORDS_BIGENDIAN       Defined to 1 on big endian systems.
      inline                If defined, it should yield the keyword used
                            to inline a function.
-     HAVE_U32_TYPEDEF      Defined if the u32 type is available.
+     HAVE_TYPE_U32         Defined if the u32 type is available.
      SIZEOF_UNSIGNED_INT   Defined to the size in bytes of an unsigned int.
      SIZEOF_UNSIGNED_LONG  Defined to the size in bytes of an unsigned long.
 
@@ -46,7 +45,22 @@
                            for testing this included module.
  */
 
+#ifdef STANDALONE
+# ifndef KEY_FOR_BINARY_CHECK
+# define KEY_FOR_BINARY_CHECK "What am I, a doctor or a moonshuttle conductor?"
+# endif
+#include <stdint.h>
+#define HAVE_TYPE_U32 1
+typedef uint32_t u32;
+#define VERSION "standalone"
+/* For GCC, we can detect endianness.  If not GCC, please define manually.  */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define WORDS_BIGENDIAN 1
+#endif
+#else
 #include <config.h>
+#endif
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -56,20 +70,19 @@
 # include <fcntl.h> /* We need setmode().  */
 #endif
 
-/* For a native WindowsCE binary we need to include gpg-error.h to
-   provide a replacement for strerror.  In other cases we need a
-   replacement macro for gpg_err_set_errno.  */
-#ifdef __MINGW32CE__
-# include <gpg-error.h>
+#ifdef STANDALONE
+#define xtrymalloc(a) malloc((a))
+#define gpg_err_set_errno(a) (errno = (a))
+#define xfree(a) free((a))
 #else
-# define gpg_err_set_errno(a) (errno = (a))
+#include "g10lib.h"
 #endif
 
 #include "hmac256.h"
 
 
 
-#ifndef HAVE_U32_TYPEDEF
+#ifndef HAVE_TYPE_U32
 # undef u32 /* Undef a possible macro with that name.  */
 # if SIZEOF_UNSIGNED_INT == 4
    typedef unsigned int u32;
@@ -78,7 +91,7 @@
 # else
 #  error no typedef for u32
 # endif
-# define HAVE_U32_TYPEDEF
+# define HAVE_TYPE_U32
 #endif
 
 
@@ -98,18 +111,10 @@ struct hmac256_context
 
 
 /* Rotate a 32 bit word.  */
-#if defined(__GNUC__) && defined(__i386__)
-static inline u32
-ror(u32 x, int n)
+static inline u32 ror(u32 x, int n)
 {
-       __asm__("rorl %%cl,%0"
-               :"=r" (x)
-               :"0" (x),"c" (n));
-       return x;
+       return ( ((x) >> (n)) | ((x) << (32-(n))) );
 }
-#else
-#define ror(x,n) ( ((x) >> (n)) | ((x) << (32-(n))) )
-#endif
 
 #define my_wipememory2(_ptr,_set,_len) do { \
               volatile char *_vptr=(volatile char *)(_ptr); \
@@ -304,7 +309,7 @@ _gcry_hmac256_new (const void *key, size_t keylen)
 {
   hmac256_context_t hd;
 
-  hd = malloc (sizeof *hd);
+  hd = xtrymalloc (sizeof *hd);
   if (!hd)
     return NULL;
 
@@ -340,7 +345,7 @@ _gcry_hmac256_new (const void *key, size_t keylen)
           tmphd = _gcry_hmac256_new (NULL, 0);
           if (!tmphd)
             {
-              free (hd);
+              xfree (hd);
               return NULL;
             }
           _gcry_hmac256_update (tmphd, key, keylen);
@@ -372,7 +377,7 @@ _gcry_hmac256_release (hmac256_context_t ctx)
       /* Note: We need to take care not to modify errno.  */
       if (ctx->use_hmac)
         my_wipememory (ctx->opad, 64);
-      free (ctx);
+      xfree (ctx);
     }
 }
 
@@ -434,10 +439,8 @@ _gcry_hmac256_finalize (hmac256_context_t hd, size_t 
*r_dlen)
 
       tmphd = _gcry_hmac256_new (NULL, 0);
       if (!tmphd)
-        {
-          free (hd);
-          return NULL;
-        }
+       return NULL;
+
       _gcry_hmac256_update (tmphd, hd->opad, 64);
       _gcry_hmac256_update (tmphd, hd->buf, 32);
       finalize (tmphd);
@@ -479,7 +482,7 @@ _gcry_hmac256_file (void *result, size_t resultsize, const 
char *filename,
     }
 
   buffer_size = 32768;
-  buffer = malloc (buffer_size);
+  buffer = xtrymalloc (buffer_size);
   if (!buffer)
     {
       fclose (fp);
@@ -490,7 +493,7 @@ _gcry_hmac256_file (void *result, size_t resultsize, const 
char *filename,
   while ( (nread = fread (buffer, 1, buffer_size, fp)))
     _gcry_hmac256_update (hd, buffer, nread);
 
-  free (buffer);
+  xfree (buffer);
 
   if (ferror (fp))
     {
@@ -656,6 +659,7 @@ main (int argc, char **argv)
   size_t n, dlen, idx;
   int use_stdin = 0;
   int use_binary = 0;
+  int use_stdkey = 0;
 
   assert (sizeof (u32) == 4);
 #ifdef __WIN32
@@ -699,11 +703,16 @@ main (int argc, char **argv)
           argc--; argv++;
           use_binary = 1;
         }
+      else if (!strcmp (*argv, "--stdkey"))
+        {
+          argc--; argv++;
+          use_stdkey = 1;
+        }
     }
 
-  if (argc < 1)
+  if (argc < 1 && !use_stdkey)
     {
-      fprintf (stderr, "usage: %s [--binary] key [filename]\n", pgm);
+      fprintf (stderr, "usage: %s [--binary] [--stdkey|key] [filename]\n", 
pgm);
       exit (1);
     }
 
@@ -712,8 +721,13 @@ main (int argc, char **argv)
     setmode (fileno (stdout), O_BINARY);
 #endif
 
-  key = *argv;
-  argc--, argv++;
+  if (use_stdkey)
+    key = KEY_FOR_BINARY_CHECK;
+  else
+    {
+      key = *argv;
+      argc--, argv++;
+    }
   keylen = strlen (key);
   use_stdin = !argc;
 
@@ -766,6 +780,9 @@ main (int argc, char **argv)
                        pgm, strerror (errno));
               exit (1);
             }
+          _gcry_hmac256_release (hd);
+          if (use_stdin)
+            break;
         }
       else
         {
diff --git a/grub-core/lib/libgcrypt/src/hwf-arm.c 
b/grub-core/lib/libgcrypt/src/hwf-arm.c
new file mode 100644
index 000000000..411885834
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/hwf-arm.c
@@ -0,0 +1,393 @@
+/* hwf-arm.c - Detect hardware features - ARM part
+ * Copyright (C) 2013,2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \
+    defined(HAVE_ELF_AUX_INFO))
+#include <sys/auxv.h>
+#endif
+
+#include "g10lib.h"
+#include "hwf-common.h"
+
+#if !defined (__arm__) && !defined (__aarch64__)
+# error Module build for wrong CPU.
+#endif
+
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \
+    !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP)
+#define HAVE_GETAUXVAL
+static unsigned long getauxval(unsigned long type)
+{
+  unsigned long auxval = 0;
+  int err;
+
+  /* FreeBSD provides 'elf_aux_info' function that does the same as
+   * 'getauxval' on Linux. */
+
+  err = elf_aux_info (type, &auxval, sizeof(auxval));
+  if (err)
+    {
+      errno = err;
+      auxval = 0;
+    }
+
+  return auxval;
+}
+#endif
+
+
+#undef HAS_SYS_AT_HWCAP
+#if defined(__linux__) || \
+    (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL))
+#define HAS_SYS_AT_HWCAP 1
+
+struct feature_map_s {
+  unsigned int hwcap_flag;
+  unsigned int hwcap2_flag;
+  const char *feature_match;
+  unsigned int hwf_flag;
+};
+
+#ifdef __arm__
+
+/* Note: These macros have same values on Linux and FreeBSD. */
+#ifndef AT_HWCAP
+# define AT_HWCAP      16
+#endif
+#ifndef AT_HWCAP2
+# define AT_HWCAP2     26
+#endif
+
+#ifndef HWCAP_NEON
+# define HWCAP_NEON    4096
+#endif
+
+#ifndef HWCAP2_AES
+# define HWCAP2_AES    1
+#endif
+#ifndef HWCAP2_PMULL
+# define HWCAP2_PMULL  2
+#endif
+#ifndef HWCAP2_SHA1
+# define HWCAP2_SHA1   4
+#endif
+#ifndef HWCAP2_SHA2
+# define HWCAP2_SHA2   8
+#endif
+
+static const struct feature_map_s arm_features[] =
+  {
+#ifdef ENABLE_NEON_SUPPORT
+    { HWCAP_NEON, 0, " neon", HWF_ARM_NEON },
+#endif
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+    { 0, HWCAP2_AES, " aes", HWF_ARM_AES },
+    { 0, HWCAP2_SHA1," sha1", HWF_ARM_SHA1 },
+    { 0, HWCAP2_SHA2, " sha2", HWF_ARM_SHA2 },
+    { 0, HWCAP2_PMULL, " pmull", HWF_ARM_PMULL },
+#endif
+  };
+
+#elif defined(__aarch64__)
+
+/* Note: These macros have same values on Linux and FreeBSD. */
+#ifndef AT_HWCAP
+# define AT_HWCAP    16
+#endif
+#ifndef AT_HWCAP2
+# define AT_HWCAP2   -1
+#endif
+
+#ifndef HWCAP_ASIMD
+# define HWCAP_ASIMD 2
+#endif
+#ifndef HWCAP_AES
+# define HWCAP_AES   8
+#endif
+#ifndef HWCAP_PMULL
+# define HWCAP_PMULL 16
+#endif
+#ifndef HWCAP_SHA1
+# define HWCAP_SHA1  32
+#endif
+#ifndef HWCAP_SHA2
+# define HWCAP_SHA2  64
+#endif
+
+static const struct feature_map_s arm_features[] =
+  {
+#ifdef ENABLE_NEON_SUPPORT
+    { HWCAP_ASIMD, 0, " asimd", HWF_ARM_NEON },
+#endif
+#ifdef ENABLE_ARM_CRYPTO_SUPPORT
+    { HWCAP_AES, 0, " aes", HWF_ARM_AES },
+    { HWCAP_SHA1, 0, " sha1", HWF_ARM_SHA1 },
+    { HWCAP_SHA2, 0, " sha2", HWF_ARM_SHA2 },
+    { HWCAP_PMULL, 0, " pmull", HWF_ARM_PMULL },
+#endif
+  };
+
+#endif
+
+static int
+get_hwcap(unsigned int *hwcap, unsigned int *hwcap2)
+{
+  struct { unsigned long a_type; unsigned long a_val; } auxv;
+  FILE *f;
+  int err = -1;
+  static int hwcap_initialized = 0;
+  static unsigned int stored_hwcap = 0;
+  static unsigned int stored_hwcap2 = 0;
+
+  if (hwcap_initialized)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)
+  errno = 0;
+  auxv.a_val = getauxval (AT_HWCAP);
+  if (errno == 0)
+    {
+      stored_hwcap |= auxv.a_val;
+      hwcap_initialized = 1;
+    }
+
+  if (AT_HWCAP2 >= 0)
+    {
+      errno = 0;
+      auxv.a_val = getauxval (AT_HWCAP2);
+      if (errno == 0)
+       {
+         stored_hwcap2 |= auxv.a_val;
+         hwcap_initialized = 1;
+       }
+    }
+
+  if (hwcap_initialized && (stored_hwcap || stored_hwcap2))
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+#endif
+
+  f = fopen("/proc/self/auxv", "r");
+  if (!f)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return -1;
+    }
+
+  while (fread(&auxv, sizeof(auxv), 1, f) > 0)
+    {
+      if (auxv.a_type == AT_HWCAP)
+        {
+          stored_hwcap |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+
+      if (auxv.a_type == AT_HWCAP2)
+        {
+          stored_hwcap2 |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+    }
+
+  if (hwcap_initialized)
+    err = 0;
+
+  fclose(f);
+  *hwcap = stored_hwcap;
+  *hwcap2 = stored_hwcap2;
+  return err;
+}
+
+static unsigned int
+detect_arm_at_hwcap(void)
+{
+  unsigned int hwcap;
+  unsigned int hwcap2;
+  unsigned int features = 0;
+  unsigned int i;
+
+  if (get_hwcap(&hwcap, &hwcap2) < 0)
+    return features;
+
+  for (i = 0; i < DIM(arm_features); i++)
+    {
+      if (hwcap & arm_features[i].hwcap_flag)
+        features |= arm_features[i].hwf_flag;
+
+      if (hwcap2 & arm_features[i].hwcap2_flag)
+        features |= arm_features[i].hwf_flag;
+    }
+
+  return features;
+}
+
+#endif
+
+#undef HAS_PROC_CPUINFO
+#ifdef __linux__
+#define HAS_PROC_CPUINFO 1
+
+static unsigned int
+detect_arm_proc_cpuinfo(unsigned int *broken_hwfs)
+{
+  char buf[1024]; /* large enough */
+  char *str_features, *str_feat;
+  int cpu_implementer, cpu_arch, cpu_variant, cpu_part, cpu_revision;
+  FILE *f;
+  int readlen, i;
+  size_t mlen;
+  static int cpuinfo_initialized = 0;
+  static unsigned int stored_cpuinfo_features;
+  static unsigned int stored_broken_hwfs;
+  struct {
+    const char *name;
+    int *value;
+  } cpu_entries[5] = {
+    { "CPU implementer", &cpu_implementer },
+    { "CPU architecture", &cpu_arch },
+    { "CPU variant", &cpu_variant },
+    { "CPU part", &cpu_part },
+    { "CPU revision", &cpu_revision },
+  };
+
+  if (cpuinfo_initialized)
+    {
+      *broken_hwfs |= stored_broken_hwfs;
+      return stored_cpuinfo_features;
+    }
+
+  f = fopen("/proc/cpuinfo", "r");
+  if (!f)
+    return 0;
+
+  memset (buf, 0, sizeof(buf));
+  readlen = fread (buf, 1, sizeof(buf), f);
+  fclose (f);
+  if (readlen <= 0 || readlen > sizeof(buf))
+    return 0;
+
+  buf[sizeof(buf) - 1] = '\0';
+
+  cpuinfo_initialized = 1;
+  stored_cpuinfo_features = 0;
+  stored_broken_hwfs = 0;
+
+  /* Find features line. */
+  str_features = strstr(buf, "Features");
+  if (!str_features)
+    return stored_cpuinfo_features;
+
+  /* Find CPU version information. */
+  for (i = 0; i < DIM(cpu_entries); i++)
+    {
+      char *str;
+
+      *cpu_entries[i].value = -1;
+
+      str = strstr(buf, cpu_entries[i].name);
+      if (!str)
+        continue;
+
+      str = strstr(str, ": ");
+      if (!str)
+        continue;
+
+      str += 2;
+      if (strcmp(cpu_entries[i].name, "CPU architecture") == 0
+          && strcmp(str, "AArch64") == 0)
+        *cpu_entries[i].value = 8;
+      else
+        *cpu_entries[i].value = strtoul(str, NULL, 0);
+    }
+
+  /* Lines to strings. */
+  for (i = 0; i < sizeof(buf); i++)
+    if (buf[i] == '\n')
+      buf[i] = '\0';
+
+  /* Check features. */
+  for (i = 0; i < DIM(arm_features); i++)
+    {
+      str_feat = strstr(str_features, arm_features[i].feature_match);
+      if (str_feat)
+        {
+          mlen = strlen(arm_features[i].feature_match);
+          if (str_feat[mlen] == ' ' || str_feat[mlen] == '\0')
+            {
+              stored_cpuinfo_features |= arm_features[i].hwf_flag;
+            }
+        }
+    }
+
+  /* Check for CPUs with broken NEON implementation. See
+   * https://code.google.com/p/chromium/issues/detail?id=341598
+   */
+  if (cpu_implementer == 0x51
+      && cpu_arch == 7
+      && cpu_variant == 1
+      && cpu_part == 0x4d
+      && cpu_revision == 0)
+    {
+      stored_broken_hwfs = HWF_ARM_NEON;
+    }
+
+  *broken_hwfs |= stored_broken_hwfs;
+  return stored_cpuinfo_features;
+}
+
+#endif /* __linux__ */
+
+unsigned int
+_gcry_hwf_detect_arm (void)
+{
+  unsigned int ret = 0;
+  unsigned int broken_hwfs = 0;
+
+#if defined (HAS_SYS_AT_HWCAP)
+  ret |= detect_arm_at_hwcap ();
+#endif
+
+#if defined (HAS_PROC_CPUINFO)
+  ret |= detect_arm_proc_cpuinfo (&broken_hwfs);
+#endif
+
+#if defined(__ARM_NEON) && defined(ENABLE_NEON_SUPPORT)
+  ret |= HWF_ARM_NEON;
+#endif
+
+  ret &= ~broken_hwfs;
+
+  return ret;
+}
diff --git a/grub-core/lib/libgcrypt/src/hwf-common.h 
b/grub-core/lib/libgcrypt/src/hwf-common.h
new file mode 100644
index 000000000..b10f86be6
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/hwf-common.h
@@ -0,0 +1,28 @@
+/* hwf-common.h - Declarations for hwf-CPU.c modules
+ * Copyright (C) 2012  g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser general Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HWF_COMMON_H
+#define HWF_COMMON_H
+
+unsigned int _gcry_hwf_detect_x86 (void);
+unsigned int _gcry_hwf_detect_arm (void);
+unsigned int _gcry_hwf_detect_ppc (void);
+unsigned int _gcry_hwf_detect_s390x (void);
+
+#endif /*HWF_COMMON_H*/
diff --git a/grub-core/lib/libgcrypt/src/hwf-ppc.c 
b/grub-core/lib/libgcrypt/src/hwf-ppc.c
new file mode 100644
index 000000000..6f8c97464
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/hwf-ppc.c
@@ -0,0 +1,247 @@
+/* hwf-ppc.c - Detect hardware features - PPC part
+ * Copyright (C) 2013,2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2019 Shawn Landden <shawn@git.icu>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \
+    defined(HAVE_ELF_AUX_INFO))
+#include <sys/auxv.h>
+#endif
+
+#include "g10lib.h"
+#include "hwf-common.h"
+
+#if !defined (__powerpc__) && !defined (__powerpc64__)
+# error Module build for wrong CPU.
+#endif
+
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \
+    !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP)
+#define HAVE_GETAUXVAL
+static unsigned long getauxval(unsigned long type)
+{
+  unsigned long auxval = 0;
+  int err;
+
+  /* FreeBSD provides 'elf_aux_info' function that does the same as
+   * 'getauxval' on Linux. */
+
+  err = elf_aux_info (type, &auxval, sizeof(auxval));
+  if (err)
+    {
+      errno = err;
+      auxval = 0;
+    }
+
+  return auxval;
+}
+#endif
+
+
+#undef HAS_SYS_AT_HWCAP
+#if defined(__linux__) || \
+    (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL))
+#define HAS_SYS_AT_HWCAP 1
+
+struct feature_map_s
+  {
+    unsigned int hwcap_flag;
+    unsigned int hwcap2_flag;
+    unsigned int hwf_flag;
+  };
+
+#if defined(__powerpc__) || defined(__powerpc64__)
+
+/* Note: These macros have same values on Linux and FreeBSD. */
+#ifndef AT_HWCAP
+# define AT_HWCAP      16
+#endif
+#ifndef AT_HWCAP2
+# define AT_HWCAP2     26
+#endif
+
+#ifndef PPC_FEATURE2_ARCH_2_07
+# define PPC_FEATURE2_ARCH_2_07     0x80000000
+#endif
+#ifndef PPC_FEATURE2_VEC_CRYPTO
+# define PPC_FEATURE2_VEC_CRYPTO    0x02000000
+#endif
+#ifndef PPC_FEATURE2_ARCH_3_00
+# define PPC_FEATURE2_ARCH_3_00     0x00800000
+#endif
+#ifndef PPC_FEATURE2_ARCH_3_10
+# define PPC_FEATURE2_ARCH_3_10     0x00040000
+#endif
+
+static const struct feature_map_s ppc_features[] =
+  {
+    { 0, PPC_FEATURE2_ARCH_2_07, HWF_PPC_ARCH_2_07 },
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+    { 0, PPC_FEATURE2_VEC_CRYPTO, HWF_PPC_VCRYPTO },
+#endif
+    { 0, PPC_FEATURE2_ARCH_3_00, HWF_PPC_ARCH_3_00 },
+    { 0, PPC_FEATURE2_ARCH_3_10, HWF_PPC_ARCH_3_10 },
+  };
+#endif
+
+static int
+get_hwcap(unsigned int *hwcap, unsigned int *hwcap2)
+{
+  struct { unsigned long a_type; unsigned long a_val; } auxv;
+  FILE *f;
+  int err = -1;
+  static int hwcap_initialized = 0;
+  static unsigned int stored_hwcap = 0;
+  static unsigned int stored_hwcap2 = 0;
+
+  if (hwcap_initialized)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+
+#if 0 /* TODO: configure.ac detection for __builtin_cpu_supports */
+      /* TODO: move to 'detect_ppc_builtin_cpu_supports' */
+#if defined(__GLIBC__) && defined(__GNUC__) && __GNUC__ >= 6
+  /* __builtin_cpu_supports returns 0 if glibc support doesn't exist, so
+   * we can only trust positive results. */
+#ifdef ENABLE_PPC_CRYPTO_SUPPORT
+  if (__builtin_cpu_supports("vcrypto")) /* TODO: Configure.ac */
+    {
+      stored_hwcap2 |= PPC_FEATURE2_VEC_CRYPTO;
+      hwcap_initialized = 1;
+    }
+#endif
+
+  if (__builtin_cpu_supports("arch_3_00")) /* TODO: Configure.ac */
+    {
+      stored_hwcap2 |= PPC_FEATURE2_ARCH_3_00;
+      hwcap_initialized = 1;
+    }
+#endif
+#endif
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)
+  errno = 0;
+  auxv.a_val = getauxval (AT_HWCAP);
+  if (errno == 0)
+    {
+      stored_hwcap |= auxv.a_val;
+      hwcap_initialized = 1;
+    }
+
+  if (AT_HWCAP2 >= 0)
+    {
+      errno = 0;
+      auxv.a_val = getauxval (AT_HWCAP2);
+      if (errno == 0)
+       {
+         stored_hwcap2 |= auxv.a_val;
+         hwcap_initialized = 1;
+       }
+    }
+
+  if (hwcap_initialized && (stored_hwcap || stored_hwcap2))
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return 0;
+    }
+#endif
+
+  f = fopen("/proc/self/auxv", "r");
+  if (!f)
+    {
+      *hwcap = stored_hwcap;
+      *hwcap2 = stored_hwcap2;
+      return -1;
+    }
+
+  while (fread(&auxv, sizeof(auxv), 1, f) > 0)
+    {
+      if (auxv.a_type == AT_HWCAP)
+        {
+          stored_hwcap |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+
+      if (auxv.a_type == AT_HWCAP2)
+        {
+          stored_hwcap2 |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+    }
+
+  if (hwcap_initialized)
+      err = 0;
+
+  fclose(f);
+
+  *hwcap = stored_hwcap;
+  *hwcap2 = stored_hwcap2;
+  return err;
+}
+
+static unsigned int
+detect_ppc_at_hwcap(void)
+{
+  unsigned int hwcap;
+  unsigned int hwcap2;
+  unsigned int features = 0;
+  unsigned int i;
+
+  if (get_hwcap(&hwcap, &hwcap2) < 0)
+      return features;
+
+  for (i = 0; i < DIM(ppc_features); i++)
+    {
+      if (hwcap & ppc_features[i].hwcap_flag)
+        features |= ppc_features[i].hwf_flag;
+
+      if (hwcap2 & ppc_features[i].hwcap2_flag)
+        features |= ppc_features[i].hwf_flag;
+    }
+
+  return features;
+}
+
+#endif
+
+unsigned int
+_gcry_hwf_detect_ppc (void)
+{
+  unsigned int ret = 0;
+  unsigned int broken_hwfs = 0;
+
+#if defined (HAS_SYS_AT_HWCAP)
+  ret |= detect_ppc_at_hwcap ();
+#endif
+
+  ret &= ~broken_hwfs;
+
+  return ret;
+}
diff --git a/grub-core/lib/libgcrypt/src/hwf-s390x.c 
b/grub-core/lib/libgcrypt/src/hwf-s390x.c
new file mode 100644
index 000000000..74590fc36
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/hwf-s390x.c
@@ -0,0 +1,231 @@
+/* hwf-s390x.c - Detect hardware features - s390x/zSeries part
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <errno.h>
+#if defined(HAVE_SYS_AUXV_H) && (defined(HAVE_GETAUXVAL) || \
+    defined(HAVE_ELF_AUX_INFO))
+#include <sys/auxv.h>
+#endif
+
+#include "g10lib.h"
+#include "hwf-common.h"
+
+#if !defined (__s390x__)
+# error Module build for wrong CPU.
+#endif
+
+#undef HAVE_STFLE
+#ifdef HAVE_GCC_INLINE_ASM_S390X
+# define HAVE_STFLE 1
+#endif
+
+#ifndef AT_HWCAP
+# define AT_HWCAP         16
+#endif
+#ifndef HWCAP_S390_STFLE
+# define HWCAP_S390_STFLE 4
+#endif
+#ifndef HWCAP_S390_VXRS
+# define HWCAP_S390_VXRS  2048
+#endif
+
+struct feature_map_s
+  {
+    unsigned int facilities_bit;
+    unsigned int hwcap_flag;
+    unsigned int hwf_flag;
+  };
+
+static const struct feature_map_s s390x_features[] =
+  {
+    { 17,  0, HWF_S390X_MSA },
+    { 77,  0, HWF_S390X_MSA_4 },
+    { 146, 0, HWF_S390X_MSA_8 },
+    { 155, 0, HWF_S390X_MSA_9 },
+#ifdef HAVE_GCC_INLINE_ASM_S390X_VX
+    { 129, HWCAP_S390_VXRS, HWF_S390X_VX },
+#endif
+  };
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_ELF_AUX_INFO) && \
+    !defined(HAVE_GETAUXVAL) && defined(AT_HWCAP)
+#define HAVE_GETAUXVAL
+static unsigned long getauxval(unsigned long type)
+{
+  unsigned long auxval = 0;
+  int err;
+
+  /* FreeBSD provides 'elf_aux_info' function that does the same as
+   * 'getauxval' on Linux. */
+
+  err = elf_aux_info (type, &auxval, sizeof(auxval));
+  if (err)
+    {
+      errno = err;
+      auxval = 0;
+    }
+
+  return auxval;
+}
+#endif
+
+
+#undef HAS_SYS_AT_HWCAP
+#if defined(__linux__) || \
+    (defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL))
+#define HAS_SYS_AT_HWCAP 1
+
+struct facilities_s
+  {
+    u64 bits[3];
+  };
+
+static int
+get_hwcap(unsigned int *hwcap)
+{
+  struct { unsigned long a_type; unsigned long a_val; } auxv;
+  FILE *f;
+  int err = -1;
+  static int hwcap_initialized = 0;
+  static unsigned int stored_hwcap = 0;
+
+  if (hwcap_initialized)
+    {
+      *hwcap = stored_hwcap;
+      return 0;
+    }
+
+#if defined(HAVE_SYS_AUXV_H) && defined(HAVE_GETAUXVAL)
+  errno = 0;
+  auxv.a_val = getauxval (AT_HWCAP);
+  if (errno == 0)
+    {
+      stored_hwcap |= auxv.a_val;
+      hwcap_initialized = 1;
+    }
+
+  if (hwcap_initialized && stored_hwcap)
+    {
+      *hwcap = stored_hwcap;
+      return 0;
+    }
+#endif
+
+  f = fopen("/proc/self/auxv", "r");
+  if (!f)
+    {
+      *hwcap = stored_hwcap;
+      return -1;
+    }
+
+  while (fread(&auxv, sizeof(auxv), 1, f) > 0)
+    {
+      if (auxv.a_type == AT_HWCAP)
+        {
+          stored_hwcap |= auxv.a_val;
+          hwcap_initialized = 1;
+        }
+    }
+
+  if (hwcap_initialized)
+      err = 0;
+
+  fclose(f);
+
+  *hwcap = stored_hwcap;
+  return err;
+}
+#endif
+
+#ifdef HAVE_STFLE
+static void
+get_stfle(struct facilities_s *out)
+{
+  static int stfle_initialized = 0;
+  static struct facilities_s stored_facilities;
+
+  if (!stfle_initialized)
+    {
+      register unsigned long reg0 asm("0") = DIM(stored_facilities.bits) - 1;
+
+      asm ("stfle %1\n\t"
+          : "+d" (reg0),
+            "=Q" (stored_facilities.bits[0])
+          :
+          : "cc", "memory");
+
+      stfle_initialized = 1;
+    }
+
+  *out = stored_facilities;
+}
+#endif
+
+static unsigned int
+detect_s390x_features(void)
+{
+  struct facilities_s facilities = { { 0, } };
+  unsigned int hwcap = 0;
+  unsigned int features = 0;
+  unsigned int i;
+
+#if defined (HAS_SYS_AT_HWCAP)
+  if (get_hwcap(&hwcap) < 0)
+    return features;
+#endif
+
+  if ((hwcap & HWCAP_S390_STFLE) == 0)
+    return features;
+
+#ifdef HAVE_STFLE
+  get_stfle(&facilities);
+#endif
+
+  for (i = 0; i < DIM(s390x_features); i++)
+    {
+      if (s390x_features[i].hwcap_flag == 0 ||
+         (s390x_features[i].hwcap_flag & hwcap))
+       {
+         unsigned int idx = s390x_features[i].facilities_bit;
+         unsigned int u64_idx = idx / 64;
+         unsigned int u64_bit = 63 - (idx % 64);
+
+         if (facilities.bits[u64_idx] & (U64_C(1) << u64_bit))
+           features |= s390x_features[i].hwf_flag;
+       }
+    }
+
+  return features;
+}
+
+unsigned int
+_gcry_hwf_detect_s390x (void)
+{
+  unsigned int ret = 0;
+
+  ret |= detect_s390x_features ();
+
+  return ret;
+}
diff --git a/grub-core/lib/libgcrypt/src/hwf-x86.c 
b/grub-core/lib/libgcrypt/src/hwf-x86.c
new file mode 100644
index 000000000..a1aa02e78
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/hwf-x86.c
@@ -0,0 +1,414 @@
+/* hwf-x86.c - Detect hardware features - x86 part
+ * Copyright (C) 2007, 2011, 2012  Free Software Foundation, Inc.
+ * Copyright (C) 2012  Jussi Kivilinna
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <unistd.h>
+
+#include "g10lib.h"
+#include "hwf-common.h"
+
+#if !defined (__i386__) && !defined (__x86_64__)
+# error Module build for wrong CPU.
+#endif
+
+/* We use the next macro to decide whether we can test for certain
+   features.  */
+#undef HAS_X86_CPUID
+
+#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+# define HAS_X86_CPUID 1
+
+#if _GCRY_GCC_VERSION >= 40700 /* 4.7 */
+# define FORCE_FUNC_FRAME_POINTER \
+       __attribute__ ((optimize("no-omit-frame-pointer")))
+#else
+# define FORCE_FUNC_FRAME_POINTER
+#endif
+
+static FORCE_FUNC_FRAME_POINTER int
+is_cpuid_available(void)
+{
+  int has_cpuid = 0;
+
+  /* Detect the CPUID feature by testing some undefined behaviour (16
+     vs 32 bit pushf/popf). */
+  asm volatile
+    ("pushf\n\t"                 /* Copy flags to EAX.  */
+     "popl %%eax\n\t"
+     "movl %%eax, %%ecx\n\t"     /* Save flags into ECX.  */
+     "xorl $0x200000, %%eax\n\t" /* Toggle ID bit and copy it to the flags.  */
+     "pushl %%eax\n\t"
+     "popf\n\t"
+     "pushf\n\t"                 /* Copy changed flags again to EAX.  */
+     "popl %%eax\n\t"
+     "pushl %%ecx\n\t"           /* Restore flags from ECX.  */
+     "popf\n\t"
+     "xorl %%eax, %%ecx\n\t"     /* Compare flags against saved flags.  */
+     "jz .Lno_cpuid%=\n\t"       /* Toggling did not work, thus no CPUID.  */
+     "movl $1, %0\n"             /* Worked. true -> HAS_CPUID.  */
+     ".Lno_cpuid%=:\n\t"
+     : "+r" (has_cpuid)
+     :
+     : "%eax", "%ecx", "cc", "memory"
+     );
+
+  return has_cpuid;
+}
+
+static void
+get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
+          unsigned int *ecx, unsigned int *edx)
+{
+  unsigned int regs[4];
+
+  asm volatile
+    ("xchgl %%ebx, %1\n\t"     /* Save GOT register.  */
+     "cpuid\n\t"
+     "xchgl %%ebx, %1\n\t"     /* Restore GOT register. */
+     : "=a" (regs[0]), "=D" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+     : "0" (in), "1" (0), "2" (0), "3" (0)
+     : "cc"
+     );
+
+  if (eax)
+    *eax = regs[0];
+  if (ebx)
+    *ebx = regs[1];
+  if (ecx)
+    *ecx = regs[2];
+  if (edx)
+    *edx = regs[3];
+}
+
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+static unsigned int
+get_xgetbv(void)
+{
+  unsigned int t_eax, t_edx;
+
+  asm volatile
+    ("xgetbv\n\t"
+     : "=a" (t_eax), "=d" (t_edx)
+     : "c" (0)
+    );
+
+  return t_eax;
+}
+#endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
+
+#endif /* i386 && GNUC */
+
+
+#if defined (__x86_64__) && defined (__GNUC__)
+# define HAS_X86_CPUID 1
+
+static int
+is_cpuid_available(void)
+{
+  return 1;
+}
+
+static void
+get_cpuid(unsigned int in, unsigned int *eax, unsigned int *ebx,
+          unsigned int *ecx, unsigned int *edx)
+{
+  unsigned int regs[4];
+
+  asm volatile
+    ("cpuid\n\t"
+     : "=a" (regs[0]), "=b" (regs[1]), "=c" (regs[2]), "=d" (regs[3])
+     : "0" (in), "1" (0), "2" (0), "3" (0)
+     : "cc"
+     );
+
+  if (eax)
+    *eax = regs[0];
+  if (ebx)
+    *ebx = regs[1];
+  if (ecx)
+    *ecx = regs[2];
+  if (edx)
+    *edx = regs[3];
+}
+
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+static unsigned int
+get_xgetbv(void)
+{
+  unsigned int t_eax, t_edx;
+
+  asm volatile
+    ("xgetbv\n\t"
+     : "=a" (t_eax), "=d" (t_edx)
+     : "c" (0)
+    );
+
+  return t_eax;
+}
+#endif /* ENABLE_AVX_SUPPORT || ENABLE_AVX2_SUPPORT */
+
+#endif /* x86-64 && GNUC */
+
+
+#ifdef HAS_X86_CPUID
+static unsigned int
+detect_x86_gnuc (void)
+{
+  union
+  {
+    char c[12+1];
+    unsigned int ui[3];
+  } vendor_id;
+  unsigned int features, features2;
+  unsigned int os_supports_avx_avx2_registers = 0;
+  unsigned int max_cpuid_level;
+  unsigned int fms, family, model;
+  unsigned int result = 0;
+  unsigned int avoid_vpgather = 0;
+
+  (void)os_supports_avx_avx2_registers;
+
+  if (!is_cpuid_available())
+    return 0;
+
+  get_cpuid(0, &max_cpuid_level, &vendor_id.ui[0], &vendor_id.ui[2],
+            &vendor_id.ui[1]);
+  vendor_id.c[12] = 0;
+
+  if (0)
+    ; /* Just to make "else if" and ifdef macros look pretty.  */
+#ifdef ENABLE_PADLOCK_SUPPORT
+  else if (!strcmp (vendor_id.c, "CentaurHauls"))
+    {
+      /* This is a VIA CPU.  Check what PadLock features we have.  */
+
+      /* Check for extended centaur (EAX).  */
+      get_cpuid(0xC0000000, &features, NULL, NULL, NULL);
+
+      /* Has extended centaur features? */
+      if (features > 0xC0000000)
+        {
+           /* Ask for the extended feature flags (EDX). */
+           get_cpuid(0xC0000001, NULL, NULL, NULL, &features);
+
+           /* Test bits 2 and 3 to see whether the RNG exists and is enabled. 
*/
+           if ((features & 0x0C) == 0x0C)
+             result |= HWF_PADLOCK_RNG;
+
+           /* Test bits 6 and 7 to see whether the ACE exists and is enabled. 
*/
+           if ((features & 0xC0) == 0xC0)
+             result |= HWF_PADLOCK_AES;
+
+           /* Test bits 10 and 11 to see whether the PHE exists and is
+              enabled.  */
+           if ((features & 0xC00) == 0xC00)
+             result |= HWF_PADLOCK_SHA;
+
+           /* Test bits 12 and 13 to see whether the MONTMUL exists and is
+              enabled.  */
+           if ((features & 0x3000) == 0x3000)
+             result |= HWF_PADLOCK_MMUL;
+        }
+    }
+#endif /*ENABLE_PADLOCK_SUPPORT*/
+  else if (!strcmp (vendor_id.c, "GenuineIntel"))
+    {
+      /* This is an Intel CPU.  */
+      result |= HWF_INTEL_CPU;
+    }
+  else if (!strcmp (vendor_id.c, "AuthenticAMD"))
+    {
+      /* This is an AMD CPU.  */
+    }
+
+  /* Detect Intel features, that might also be supported by other
+     vendors.  */
+
+  /* Get CPU family/model/stepping (EAX) and Intel feature flags (ECX, EDX).  
*/
+  get_cpuid(1, &fms, NULL, &features, &features2);
+
+  family = ((fms & 0xf00) >> 8) + ((fms & 0xff00000) >> 20);
+  model = ((fms & 0xf0) >> 4) + ((fms & 0xf0000) >> 12);
+
+  if ((result & HWF_INTEL_CPU) && family == 6)
+    {
+      /* These Intel Core processor models have SHLD/SHRD instruction that
+       * can do integer rotation faster actual ROL/ROR instructions. */
+      switch (model)
+       {
+       case 0x2A:
+       case 0x2D:
+       case 0x3A:
+       case 0x3C:
+       case 0x3F:
+       case 0x45:
+       case 0x46:
+       case 0x3D:
+       case 0x4F:
+       case 0x56:
+       case 0x47:
+       case 0x4E:
+       case 0x5E:
+       case 0x8E:
+       case 0x9E:
+       case 0x55:
+       case 0x66:
+         result |= HWF_INTEL_FAST_SHLD;
+         break;
+       }
+
+      /* These Intel Core processors that have AVX2 have slow VPGATHER and
+       * should be avoided for table-lookup use. */
+      switch (model)
+       {
+       case 0x3C:
+       case 0x3F:
+       case 0x45:
+       case 0x46:
+         /* Haswell */
+         avoid_vpgather |= 1;
+         break;
+       }
+    }
+  else
+    {
+      /* Avoid VPGATHER for non-Intel CPUs as testing is needed to
+       * make sure it is fast enough. */
+
+      avoid_vpgather |= 1;
+    }
+
+#ifdef ENABLE_FORCE_SOFT_HWFEATURES
+  /* Soft HW features mark functionality that is available on all systems
+   * but not feasible to use because of slow HW implementation. */
+
+  /* SHLD is faster at rotating register than actual ROR/ROL instructions
+   * on older Intel systems (~sandy-bridge era). However, SHLD is very
+   * slow on almost anything else and later Intel processors have faster
+   * ROR/ROL. Therefore in regular build HWF_INTEL_FAST_SHLD is enabled
+   * only for those Intel processors that benefit from the SHLD
+   * instruction. Enabled here unconditionally as requested. */
+  result |= HWF_INTEL_FAST_SHLD;
+
+  /* VPGATHER instructions are used for look-up table based
+   * implementations which require VPGATHER to be fast enough to beat
+   * regular parallelized look-up table implementations (see Twofish).
+   * So far, only Intel processors beginning with skylake have had
+   * VPGATHER fast enough to be enabled. AMD Zen3 comes close to
+   * being feasible, but not quite (where twofish-avx2 is few percent
+   * slower than twofish-3way). Enable VPGATHER here unconditionally
+   * as requested. */
+  avoid_vpgather = 0;
+#endif
+
+#ifdef ENABLE_PCLMUL_SUPPORT
+  /* Test bit 1 for PCLMUL.  */
+  if (features & 0x00000002)
+     result |= HWF_INTEL_PCLMUL;
+#endif
+  /* Test bit 9 for SSSE3.  */
+  if (features & 0x00000200)
+     result |= HWF_INTEL_SSSE3;
+  /* Test bit 19 for SSE4.1.  */
+  if (features & 0x00080000)
+     result |= HWF_INTEL_SSE4_1;
+#ifdef ENABLE_AESNI_SUPPORT
+  /* Test bit 25 for AES-NI.  */
+  if (features & 0x02000000)
+     result |= HWF_INTEL_AESNI;
+#endif /*ENABLE_AESNI_SUPPORT*/
+#if defined(ENABLE_AVX_SUPPORT) || defined(ENABLE_AVX2_SUPPORT)
+  /* Test bit 27 for OSXSAVE (required for AVX/AVX2).  */
+  if (features & 0x08000000)
+    {
+      /* Check that OS has enabled both XMM and YMM state support.  */
+      if ((get_xgetbv() & 0x6) == 0x6)
+        os_supports_avx_avx2_registers = 1;
+    }
+#endif
+#ifdef ENABLE_AVX_SUPPORT
+  /* Test bit 28 for AVX.  */
+  if (features & 0x10000000)
+    if (os_supports_avx_avx2_registers)
+      result |= HWF_INTEL_AVX;
+#endif /*ENABLE_AVX_SUPPORT*/
+#ifdef ENABLE_DRNG_SUPPORT
+  /* Test bit 30 for RDRAND.  */
+  if (features & 0x40000000)
+     result |= HWF_INTEL_RDRAND;
+#endif /*ENABLE_DRNG_SUPPORT*/
+
+  /* Test bit 4 of EDX for TSC.  */
+  if (features2 & 0x00000010)
+    result |= HWF_INTEL_RDTSC;
+
+  /* Check additional Intel feature flags.  Early Intel P5 processors report
+   * too high max_cpuid_level, so don't check level 7 if processor does not
+   * support SSE3 (as cpuid:7 contains only features for newer processors).
+   * Source: http://www.sandpile.org/x86/cpuid.htm  */
+  if (max_cpuid_level >= 7 && (features & 0x00000001))
+    {
+      /* Get CPUID:7 contains further Intel feature flags. */
+      get_cpuid(7, NULL, &features, &features2, NULL);
+
+      /* Test bit 8 for BMI2.  */
+      if (features & 0x00000100)
+          result |= HWF_INTEL_BMI2;
+
+#ifdef ENABLE_AVX2_SUPPORT
+      /* Test bit 5 for AVX2.  */
+      if (features & 0x00000020)
+        if (os_supports_avx_avx2_registers)
+          result |= HWF_INTEL_AVX2;
+
+      if ((result & HWF_INTEL_AVX2) && !avoid_vpgather)
+        result |= HWF_INTEL_FAST_VPGATHER;
+#endif /*ENABLE_AVX_SUPPORT*/
+
+      /* Test bit 29 for SHA Extensions. */
+      if (features & (1 << 29))
+        result |= HWF_INTEL_SHAEXT;
+
+#if defined(ENABLE_AVX2_SUPPORT) && defined(ENABLE_AESNI_SUPPORT) && \
+    defined(ENABLE_PCLMUL_SUPPORT)
+      /* Test bit 9 for VAES and bit 10 for VPCLMULDQD */
+      if ((features2 & 0x00000200) && (features2 & 0x00000400))
+        result |= HWF_INTEL_VAES_VPCLMUL;
+#endif
+    }
+
+  return result;
+}
+#endif /* HAS_X86_CPUID */
+
+
+unsigned int
+_gcry_hwf_detect_x86 (void)
+{
+#if defined (HAS_X86_CPUID)
+  return detect_x86_gnuc ();
+#else
+  return 0;
+#endif
+}
diff --git a/grub-core/lib/libgcrypt/src/hwfeatures.c 
b/grub-core/lib/libgcrypt/src/hwfeatures.c
index c3567989d..97e67b3c0 100644
--- a/grub-core/lib/libgcrypt/src/hwfeatures.c
+++ b/grub-core/lib/libgcrypt/src/hwfeatures.c
@@ -1,5 +1,6 @@
 /* hwfeatures.c - Detect hardware features.
  * Copyright (C) 2007, 2011  Free Software Foundation, Inc.
+ * Copyright (C) 2012  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -19,18 +20,115 @@
 
 #include <config.h>
 #include <stdio.h>
+#include <ctype.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdarg.h>
 #include <unistd.h>
+#ifdef HAVE_SYSLOG
+# include <syslog.h>
+#endif /*HAVE_SYSLOG*/
 
 #include "g10lib.h"
+#include "hwf-common.h"
+
+/* The name of a file used to globally disable selected features. */
+#define HWF_DENY_FILE "/etc/gcrypt/hwf.deny"
+
+/* A table to map hardware features to a string.
+ * Note: Remember to add new HW features to 'doc/gcrypt.texi'.  */
+static struct
+{
+  unsigned int flag;
+  const char *desc;
+} hwflist[] =
+  {
+#if defined(HAVE_CPU_ARCH_X86)
+    { HWF_PADLOCK_RNG,         "padlock-rng" },
+    { HWF_PADLOCK_AES,         "padlock-aes" },
+    { HWF_PADLOCK_SHA,         "padlock-sha" },
+    { HWF_PADLOCK_MMUL,        "padlock-mmul"},
+    { HWF_INTEL_CPU,           "intel-cpu" },
+    { HWF_INTEL_FAST_SHLD,     "intel-fast-shld" },
+    { HWF_INTEL_BMI2,          "intel-bmi2" },
+    { HWF_INTEL_SSSE3,         "intel-ssse3" },
+    { HWF_INTEL_SSE4_1,        "intel-sse4.1" },
+    { HWF_INTEL_PCLMUL,        "intel-pclmul" },
+    { HWF_INTEL_AESNI,         "intel-aesni" },
+    { HWF_INTEL_RDRAND,        "intel-rdrand" },
+    { HWF_INTEL_AVX,           "intel-avx" },
+    { HWF_INTEL_AVX2,          "intel-avx2" },
+    { HWF_INTEL_FAST_VPGATHER, "intel-fast-vpgather" },
+    { HWF_INTEL_RDTSC,         "intel-rdtsc" },
+    { HWF_INTEL_SHAEXT,        "intel-shaext" },
+    { HWF_INTEL_VAES_VPCLMUL,  "intel-vaes-vpclmul" },
+#elif defined(HAVE_CPU_ARCH_ARM)
+    { HWF_ARM_NEON,            "arm-neon" },
+    { HWF_ARM_AES,             "arm-aes" },
+    { HWF_ARM_SHA1,            "arm-sha1" },
+    { HWF_ARM_SHA2,            "arm-sha2" },
+    { HWF_ARM_PMULL,           "arm-pmull" },
+#elif defined(HAVE_CPU_ARCH_PPC)
+    { HWF_PPC_VCRYPTO,         "ppc-vcrypto" },
+    { HWF_PPC_ARCH_3_00,       "ppc-arch_3_00" },
+    { HWF_PPC_ARCH_2_07,       "ppc-arch_2_07" },
+    { HWF_PPC_ARCH_3_10,       "ppc-arch_3_10" },
+#elif defined(HAVE_CPU_ARCH_S390X)
+    { HWF_S390X_MSA,           "s390x-msa" },
+    { HWF_S390X_MSA_4,         "s390x-msa-4" },
+    { HWF_S390X_MSA_8,         "s390x-msa-8" },
+    { HWF_S390X_MSA_9,         "s390x-msa-9" },
+    { HWF_S390X_VX,            "s390x-vx" },
+#endif
+  };
+
+/* A bit vector with the hardware features which shall not be used.
+   This variable must be set prior to any initialization.  */
+static unsigned int disabled_hw_features;
 
 /* A bit vector describing the hardware features currently
    available. */
 static unsigned int hw_features;
 
 
+
+/* Disable a feature by name.  This function must be called *before*
+   _gcry_detect_hw_features is called.  */
+gpg_err_code_t
+_gcry_disable_hw_feature (const char *name)
+{
+  int i;
+  size_t n1, n2;
+
+  while (name && *name)
+    {
+      n1 = strcspn (name, ":,");
+      if (!n1)
+        ;
+      else if (n1 == 3 && !strncmp (name, "all", 3))
+        disabled_hw_features = ~0;
+      else
+        {
+          for (i=0; i < DIM (hwflist); i++)
+            {
+              n2 = strlen (hwflist[i].desc);
+              if (n1 == n2 && !strncmp (hwflist[i].desc, name, n2))
+                {
+                  disabled_hw_features |= hwflist[i].flag;
+                  break;
+                }
+            }
+          if (!(i < DIM (hwflist)))
+            return GPG_ERR_INV_NAME;
+        }
+      name += n1;
+      if (*name)
+        name++; /* Skip delimiter ':' or ','.  */
+    }
+  return 0;
+}
+
+
 /* Return a bit vector describing the available hardware features.
    The HWF_ constants are used to test for them. */
 unsigned int
@@ -40,153 +138,101 @@ _gcry_get_hw_features (void)
 }
 
 
-#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__)
+/* Enumerate all features.  The caller is expected to start with an
+   IDX of 0 and then increment IDX until NULL is returned.  */
+const char *
+_gcry_enum_hw_features (int idx, unsigned int *r_feature)
+{
+  if (idx < 0 || idx >= DIM (hwflist))
+    return NULL;
+  if (r_feature)
+    *r_feature = hwflist[idx].flag;
+  return hwflist[idx].desc;
+}
+
+
+/* Read a file with features which shall not be used.  The file is a
+   simple text file where empty lines and lines with the first non
+   white-space character being '#' are ignored.  */
 static void
-detect_ia32_gnuc (void)
+parse_hwf_deny_file (void)
 {
-  /* The code here is only useful for the PadLock engine thus we don't
-     build it if that support has been disabled.  */
-  int has_cpuid = 0;
-  char vendor_id[12+1];
-
-  /* Detect the CPUID feature by testing some undefined behaviour (16
-     vs 32 bit pushf/popf). */
-  asm volatile
-    ("pushf\n\t"                 /* Copy flags to EAX.  */
-     "popl %%eax\n\t"
-     "movl %%eax, %%ecx\n\t"     /* Save flags into ECX.  */
-     "xorl $0x200000, %%eax\n\t" /* Toggle ID bit and copy it to the flags.  */
-     "pushl %%eax\n\t"
-     "popf\n\t"
-     "pushf\n\t"                 /* Copy changed flags again to EAX.  */
-     "popl %%eax\n\t"
-     "pushl %%ecx\n\t"           /* Restore flags from ECX.  */
-     "popf\n\t"
-     "xorl %%eax, %%ecx\n\t"     /* Compare flags against saved flags.  */
-     "jz .Lno_cpuid%=\n\t"       /* Toggling did not work, thus no CPUID.  */
-     "movl $1, %0\n"             /* Worked. true -> HAS_CPUID.  */
-     ".Lno_cpuid%=:\n\t"
-     : "+r" (has_cpuid)
-     :
-     : "%eax", "%ecx", "cc"
-     );
-
-  if (!has_cpuid)
-    return;  /* No way.  */
-
-  asm volatile
-    ("pushl %%ebx\n\t"           /* Save GOT register.  */
-     "xorl  %%eax, %%eax\n\t"    /* 0 -> EAX.  */
-     "cpuid\n\t"                 /* Get vendor ID.  */
-     "movl  %%ebx, (%0)\n\t"     /* EBX,EDX,ECX -> VENDOR_ID.  */
-     "movl  %%edx, 4(%0)\n\t"
-     "movl  %%ecx, 8(%0)\n\t"
-     "popl  %%ebx\n"
-     :
-     : "S" (&vendor_id[0])
-     : "%eax", "%ecx", "%edx", "cc"
-     );
-  vendor_id[12] = 0;
-
-  if (0)
-    ; /* Just to make "else if" and ifdef macros look pretty.  */
-#ifdef ENABLE_PADLOCK_SUPPORT
-  else if (!strcmp (vendor_id, "CentaurHauls"))
-    {
-      /* This is a VIA CPU.  Check what PadLock features we have.  */
-      asm volatile
-        ("pushl %%ebx\n\t"             /* Save GOT register.  */
-         "movl $0xC0000000, %%eax\n\t"  /* Check for extended centaur  */
-         "cpuid\n\t"                    /* feature flags.              */
-         "popl %%ebx\n\t"              /* Restore GOT register. */
-         "cmpl $0xC0000001, %%eax\n\t"
-         "jb .Lready%=\n\t"             /* EAX < 0xC0000000 => no padlock.  */
-
-         "pushl %%ebx\n\t"             /* Save GOT register. */
-         "movl $0xC0000001, %%eax\n\t"  /* Ask for the extended */
-         "cpuid\n\t"                    /* feature flags.       */
-         "popl %%ebx\n\t"              /* Restore GOT register. */
-
-         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
-         "andl $0x0C, %%eax\n\t"        /* Test bits 2 and 3 to see whether */
-         "cmpl $0x0C, %%eax\n\t"        /* the RNG exists and is enabled.   */
-         "jnz .Lno_rng%=\n\t"
-         "orl $1, %0\n"                 /* Set our HWF_PADLOCK_RNG bit.  */
-
-         ".Lno_rng%=:\n\t"
-         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
-         "andl $0xC0, %%eax\n\t"        /* Test bits 6 and 7 to see whether */
-         "cmpl $0xC0, %%eax\n\t"        /* the ACE exists and is enabled.   */
-         "jnz .Lno_ace%=\n\t"
-         "orl $2, %0\n"                 /* Set our HWF_PADLOCK_AES bit.  */
-
-         ".Lno_ace%=:\n\t"
-         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
-         "andl $0xC00, %%eax\n\t"       /* Test bits 10, 11 to see whether  */
-         "cmpl $0xC00, %%eax\n\t"       /* the PHE exists and is enabled.   */
-         "jnz .Lno_phe%=\n\t"
-         "orl $4, %0\n"                 /* Set our HWF_PADLOCK_SHA bit.  */
-
-         ".Lno_phe%=:\n\t"
-         "movl %%edx, %%eax\n\t"        /* Take copy of feature flags.  */
-         "andl $0x3000, %%eax\n\t"      /* Test bits 12, 13 to see whether  */
-         "cmpl $0x3000, %%eax\n\t"      /* MONTMUL exists and is enabled.   */
-         "jnz .Lready%=\n\t"
-         "orl $8, %0\n"                 /* Set our HWF_PADLOCK_MMUL bit.  */
-
-         ".Lready%=:\n"
-         : "+r" (hw_features)
-         :
-         : "%eax", "%edx", "cc"
-         );
-    }
-#endif /*ENABLE_PADLOCK_SUPPORT*/
-  else if (!strcmp (vendor_id, "GenuineIntel"))
-    {
-      /* This is an Intel CPU.  */
-      asm volatile
-        ("pushl %%ebx\n\t"             /* Save GOT register.  */
-         "movl $1, %%eax\n\t"           /* Get CPU info and feature flags.  */
-         "cpuid\n"
-         "popl %%ebx\n\t"              /* Restore GOT register. */
-         "testl $0x02000000, %%ecx\n\t" /* Test bit 25.  */
-         "jz .Lno_aes%=\n\t"            /* No AES support.  */
-         "orl $256, %0\n"               /* Set our HWF_INTEL_AES bit.  */
-
-         ".Lno_aes%=:\n"
-         : "+r" (hw_features)
-         :
-         : "%eax", "%ecx", "%edx", "cc"
-         );
-    }
-  else if (!strcmp (vendor_id, "AuthenticAMD"))
-    {
-      /* This is an AMD CPU.  */
+  const char *fname = HWF_DENY_FILE;
+  FILE *fp;
+  char buffer[256];
+  char *p, *pend;
+  int lnr = 0;
 
+  fp = fopen (fname, "r");
+  if (!fp)
+    return;
+
+  for (;;)
+    {
+      if (!fgets (buffer, sizeof buffer, fp))
+        {
+          if (!feof (fp))
+            {
+#ifdef HAVE_SYSLOG
+              syslog (LOG_USER|LOG_WARNING,
+                      "Libgcrypt warning: error reading '%s', line %d",
+                      fname, lnr);
+#endif /*HAVE_SYSLOG*/
+            }
+          fclose (fp);
+          return;
+        }
+      lnr++;
+      for (p=buffer; my_isascii (*p) && isspace (*p); p++)
+        ;
+      pend = strchr (p, '\n');
+      if (pend)
+        *pend = 0;
+      pend = p + (*p? (strlen (p)-1):0);
+      for ( ;pend > p; pend--)
+        if (my_isascii (*pend) && isspace (*pend))
+          *pend = 0;
+      if (!*p || *p == '#')
+        continue;
+
+      if (_gcry_disable_hw_feature (p) == GPG_ERR_INV_NAME)
+        {
+#ifdef HAVE_SYSLOG
+          syslog (LOG_USER|LOG_WARNING,
+                  "Libgcrypt warning: unknown feature in '%s', line %d",
+                  fname, lnr);
+#endif /*HAVE_SYSLOG*/
+        }
     }
 }
-#endif /* __i386__ && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ */
 
 
 /* Detect the available hardware features.  This function is called
    once right at startup and we assume that no other threads are
    running.  */
 void
-_gcry_detect_hw_features (unsigned int disabled_features)
+_gcry_detect_hw_features (void)
 {
   hw_features = 0;
 
-  if (fips_mode ())
-    return; /* Hardware support is not to be evaluated.  */
-
-#if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4
-#ifdef __GNUC__
-  detect_ia32_gnuc ();
+  parse_hwf_deny_file ();
+
+#if defined (HAVE_CPU_ARCH_X86)
+  {
+    hw_features = _gcry_hwf_detect_x86 ();
+  }
+#elif defined (HAVE_CPU_ARCH_ARM)
+  {
+    hw_features = _gcry_hwf_detect_arm ();
+  }
+#elif defined (HAVE_CPU_ARCH_PPC)
+  {
+    hw_features = _gcry_hwf_detect_ppc ();
+  }
+#elif defined (HAVE_CPU_ARCH_S390X)
+  {
+    hw_features = _gcry_hwf_detect_s390x ();
+  }
 #endif
-#elif defined (__i386__) && SIZEOF_UNSIGNED_LONG == 8
-#ifdef __GNUC__
-#endif
-#endif
-
-  hw_features &= ~disabled_features;
+  hw_features &= ~disabled_hw_features;
 }
diff --git a/grub-core/lib/libgcrypt/src/libgcrypt-config.in 
b/grub-core/lib/libgcrypt/src/libgcrypt-config.in
index c052638cd..6b3b35677 100644
--- a/grub-core/lib/libgcrypt/src/libgcrypt-config.in
+++ b/grub-core/lib/libgcrypt/src/libgcrypt-config.in
@@ -14,7 +14,7 @@
 # General.
 prefix="@prefix@"
 exec_prefix="@exec_prefix@"
-version="@VERSION@"
+version="@PACKAGE_VERSION@"
 includedir="@includedir@"
 libdir="@libdir@"
 gpg_error_libs="@GPG_ERROR_LIBS@"
@@ -96,7 +96,19 @@ while test $# -gt 0; do
        --exec-prefix)
            echo_exec_prefix=yes
            ;;
-       --version)
+        --variable=*)
+            case "${1#*=}" in
+                prefix) echo "$prefix" ;;
+                exec_prefix) echo "$exec_prefix" ;;
+                host) echo "$my_host" ;;
+                api_version) echo "$api_version" ;;
+               symmetric_ciphers) echo "$symmetric_ciphers" ;;
+               asymmetric_ciphers) echo "$asymmetric_ciphers" ;;
+               digests) echo "$digests" ;;
+            esac
+            exit 0
+            ;;
+       --modversion|--version)
            echo_version=yes
            ;;
         --api-version)
diff --git a/grub-core/lib/libgcrypt/src/libgcrypt.def 
b/grub-core/lib/libgcrypt/src/libgcrypt.def
index 031b9410b..d6de731f7 100644
--- a/grub-core/lib/libgcrypt/src/libgcrypt.def
+++ b/grub-core/lib/libgcrypt/src/libgcrypt.def
@@ -118,7 +118,7 @@ EXPORTS
       gcry_mpi_set_flag  @88
       gcry_mpi_clear_flag  @89
       gcry_mpi_get_flag  @90
-
+      gcry_mpi_get_ui    @91
 
       gcry_cipher_open  @92
       gcry_cipher_close  @93
@@ -132,7 +132,8 @@ EXPORTS
       gcry_cipher_decrypt  @101
       gcry_cipher_get_algo_keylen  @102
       gcry_cipher_get_algo_blklen  @103
-      gcry_cipher_list  @104
+
+;; @104 used to be part of the module register interface
 
       gcry_pk_encrypt  @105
       gcry_pk_decrypt  @106
@@ -146,33 +147,13 @@ EXPORTS
       gcry_pk_map_name  @114
       gcry_pk_get_nbits  @115
       gcry_pk_get_keygrip  @116
-      gcry_pk_list  @117
-
-      gcry_ac_data_new  @118
-      gcry_ac_data_destroy  @119
-      gcry_ac_data_set  @120
-      gcry_ac_data_copy  @121
-      gcry_ac_data_length  @122
-      gcry_ac_data_get_name  @123
-      gcry_ac_data_get_index  @124
-      gcry_ac_data_clear  @125
-      gcry_ac_open  @126
-      gcry_ac_close  @127
-      gcry_ac_key_init  @128
-      gcry_ac_key_pair_generate  @129
-      gcry_ac_key_pair_extract  @130
-      gcry_ac_key_data_get  @131
-      gcry_ac_key_test  @132
-      gcry_ac_key_get_nbits  @133
-      gcry_ac_key_get_grip  @134
-      gcry_ac_key_destroy  @135
-      gcry_ac_key_pair_destroy  @136
-      gcry_ac_data_encrypt  @137
-      gcry_ac_data_decrypt  @138
-      gcry_ac_data_sign  @139
-      gcry_ac_data_verify  @140
-      gcry_ac_id_to_name  @141
-      gcry_ac_name_to_id  @142
+
+;; @117 used to be part of the module register interface
+
+;;
+;; 118 to 142 were used in previous Libgcrypt versions for the gcry_ac
+;; interface
+;;
 
       gcry_md_open  @143
       gcry_md_close  @144
@@ -192,8 +173,7 @@ EXPORTS
       gcry_md_algo_name  @158
       gcry_md_map_name  @159
       gcry_md_setkey  @160
-      gcry_md_list  @161
-
+;; @161 used to be part of the module register interface
       gcry_randomize  @162
       gcry_random_add_bytes  @163
       gcry_random_bytes  @164
@@ -209,21 +189,15 @@ EXPORTS
 
       gcry_md_debug  @172
 
-      gcry_cipher_register  @173
-      gcry_cipher_unregister @174
-      gcry_md_register  @175
-      gcry_md_unregister @176
-      gcry_pk_register  @177
-      gcry_pk_unregister @178
-
-      gcry_ac_data_from_sexp  @179
-      gcry_ac_data_to_sexp  @180
-      gcry_ac_io_init  @181
-      gcry_ac_io_init_va  @182
-      gcry_ac_data_encrypt_scheme  @183
-      gcry_ac_data_decrypt_scheme  @184
-      gcry_ac_data_sign_scheme  @185
-      gcry_ac_data_verify_scheme  @186
+;; @173 used to be part of the module register interface
+;; @174 used to be part of the module register interface
+;; @175 used to be part of the module register interface
+;; @176 used to be part of the module register interface
+;; @177 used to be part of the module register interface
+;; @178 used to be part of the module register interface
+;;
+;; @179 to @186 used to be part of the removed gcry_ac interface
+;;
 
       gcry_sexp_nth_string  @187
 
@@ -237,3 +211,91 @@ EXPORTS
       gcry_pk_get_param     @193
 
       gcry_kdf_derive       @194
+
+      gcry_mpi_snatch       @195
+
+      gcry_mpi_point_new        @196
+      gcry_mpi_point_release    @197
+      gcry_mpi_point_get        @198
+      gcry_mpi_point_snatch_get @199
+      gcry_mpi_point_set        @200
+      gcry_mpi_point_snatch_set @201
+
+      gcry_ctx_release          @202
+
+      gcry_mpi_ec_new           @203
+      gcry_mpi_ec_get_mpi       @204
+      gcry_mpi_ec_get_point     @205
+      gcry_mpi_ec_set_mpi       @206
+      gcry_mpi_ec_set_point     @207
+      gcry_mpi_ec_get_affine    @208
+      gcry_mpi_ec_dup           @209
+      gcry_mpi_ec_add           @210
+      gcry_mpi_ec_mul           @211
+
+      gcry_pubkey_get_sexp      @212
+
+      _gcry_mpi_get_const       @213
+
+      gcry_sexp_nth_buffer      @214
+
+      gcry_mpi_is_neg           @215
+      gcry_mpi_neg              @216
+      gcry_mpi_abs              @217
+
+      gcry_mpi_ec_curve_point   @218
+
+      gcry_md_hash_buffers      @219
+
+      gcry_log_debug            @220
+      gcry_log_debughex         @221
+      gcry_log_debugmpi         @222
+      gcry_log_debugpnt         @223
+      gcry_log_debugsxp         @224
+
+      gcry_sexp_extract_param   @225
+
+      gcry_cipher_authenticate  @226
+      gcry_cipher_gettag        @227
+      gcry_cipher_checktag      @228
+
+      gcry_mpi_set_opaque_copy  @229
+
+      gcry_mac_algo_info        @230
+      gcry_mac_algo_name        @231
+      gcry_mac_map_name         @232
+      gcry_mac_get_algo_maclen  @233
+      gcry_mac_get_algo_keylen  @234
+      gcry_mac_open             @235
+      gcry_mac_close            @236
+      gcry_mac_setkey           @237
+      gcry_mac_setiv            @238
+      gcry_mac_write            @239
+      gcry_mac_read             @240
+      gcry_mac_verify           @241
+      gcry_mac_ctl              @242
+      gcry_mac_get_algo         @243
+
+      gcry_mpi_ec_sub           @244
+
+      gcry_md_extract           @245
+
+      gcry_mpi_ec_decode_point  @246
+
+      gcry_get_config           @247
+
+      gcry_mpi_point_copy       @248
+
+      gcry_ecc_get_algo_keylen  @249
+      gcry_ecc_mul_point        @250
+
+      gcry_pk_hash_sign         @255
+      gcry_pk_hash_verify       @256
+      gcry_pk_random_override_new @257
+
+      gcry_kdf_open             @258
+      gcry_kdf_compute          @259
+      gcry_kdf_final            @260
+      gcry_kdf_close            @261
+
+;; end of file with public symbols for Windows.
diff --git a/grub-core/lib/libgcrypt/src/libgcrypt.m4 
b/grub-core/lib/libgcrypt/src/libgcrypt.m4
index 831dc0c6f..cd4249e87 100644
--- a/grub-core/lib/libgcrypt/src/libgcrypt.m4
+++ b/grub-core/lib/libgcrypt/src/libgcrypt.m4
@@ -1,37 +1,70 @@
-dnl Autoconf macros for libgcrypt
-dnl       Copyright (C) 2002, 2004 Free Software Foundation, Inc.
-dnl
-dnl This file is free software; as a special exception the author gives
-dnl unlimited permission to copy and/or distribute it, with or without
-dnl modifications, as long as this notice is preserved.
-dnl
-dnl This file is distributed in the hope that it will be useful, but
-dnl WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
-dnl implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+# libgcrypt.m4 - Autoconf macros to detect libgcrypt
+# Copyright (C) 2002, 2003, 2004, 2011, 2014, 2018, 2020 g10 Code GmbH
+#
+# This file is free software; as a special exception the author gives
+# unlimited permission to copy and/or distribute it, with or without
+# modifications, as long as this notice is preserved.
+#
+# This file is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY, to the extent permitted by law; without even the
+# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Last-changed: 2022-11-01
 
 
 dnl AM_PATH_LIBGCRYPT([MINIMUM-VERSION,
 dnl                   [ACTION-IF-FOUND [, ACTION-IF-NOT-FOUND ]]])
 dnl Test for libgcrypt and define LIBGCRYPT_CFLAGS and LIBGCRYPT_LIBS.
-dnl MINIMUN-VERSION is a string with the version number optionalliy prefixed
+dnl MINIMUM-VERSION is a string with the version number optionally prefixed
 dnl with the API version to also check the API compatibility. Example:
-dnl a MINIMUN-VERSION of 1:1.2.5 won't pass the test unless the installed
+dnl a MINIMUM-VERSION of 1:1.2.5 won't pass the test unless the installed
 dnl version of libgcrypt is at least 1.2.5 *and* the API number is 1.  Using
 dnl this features allows to prevent build against newer versions of libgcrypt
 dnl with a changed API.
 dnl
+dnl If a prefix option is not used, the config script is first
+dnl searched in $SYSROOT/bin and then along $PATH.  If the used
+dnl config script does not match the host specification the script
+dnl is added to the gpg_config_script_warn variable.
+dnl
 AC_DEFUN([AM_PATH_LIBGCRYPT],
-[ AC_ARG_WITH(libgcrypt-prefix,
-            AC_HELP_STRING([--with-libgcrypt-prefix=PFX],
+[ AC_REQUIRE([AC_CANONICAL_HOST])
+  AC_ARG_WITH(libgcrypt-prefix,
+            AS_HELP_STRING([--with-libgcrypt-prefix=PFX],
                            [prefix where LIBGCRYPT is installed (optional)]),
      libgcrypt_config_prefix="$withval", libgcrypt_config_prefix="")
-  if test x$libgcrypt_config_prefix != x ; then
-     if test x${LIBGCRYPT_CONFIG+set} != xset ; then
-        LIBGCRYPT_CONFIG=$libgcrypt_config_prefix/bin/libgcrypt-config
+  if test x"${LIBGCRYPT_CONFIG}" = x ; then
+     if test x"${libgcrypt_config_prefix}" != x ; then
+        LIBGCRYPT_CONFIG="${libgcrypt_config_prefix}/bin/libgcrypt-config"
      fi
   fi
 
-  AC_PATH_TOOL(LIBGCRYPT_CONFIG, libgcrypt-config, no)
+  use_gpgrt_config=""
+  if test x"$GPGRT_CONFIG" != x -a "$GPGRT_CONFIG" != "no"; then
+    if $GPGRT_CONFIG libgcrypt --exists; then
+      LIBGCRYPT_CONFIG="$GPGRT_CONFIG libgcrypt"
+      AC_MSG_NOTICE([Use gpgrt-config as libgcrypt-config])
+      use_gpgrt_config=yes
+    fi
+  fi
+  if test -z "$use_gpgrt_config"; then
+    if test x"${LIBGCRYPT_CONFIG}" = x ; then
+      case "${SYSROOT}" in
+         /*)
+           if test -x "${SYSROOT}/bin/libgcrypt-config" ; then
+             LIBGCRYPT_CONFIG="${SYSROOT}/bin/libgcrypt-config"
+           fi
+           ;;
+         '')
+           ;;
+          *)
+           AC_MSG_WARN([Ignoring \$SYSROOT as it is not an absolute path.])
+           ;;
+      esac
+    fi
+    AC_PATH_PROG(LIBGCRYPT_CONFIG, libgcrypt-config, no)
+  fi
+
   tmp=ifelse([$1], ,1:1.2.0,$1)
   if echo "$tmp" | grep ':' >/dev/null 2>/dev/null ; then
      req_libgcrypt_api=`echo "$tmp"     | sed 's/\(.*\):\(.*\)/\1/'`
@@ -50,7 +83,11 @@ AC_DEFUN([AM_PATH_LIBGCRYPT],
                sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\)/\2/'`
     req_micro=`echo $min_libgcrypt_version | \
                sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\)/\3/'`
-    libgcrypt_config_version=`$LIBGCRYPT_CONFIG --version`
+    if test -z "$use_gpgrt_config"; then
+      libgcrypt_config_version=`$LIBGCRYPT_CONFIG --version`
+    else
+      libgcrypt_config_version=`$LIBGCRYPT_CONFIG --modversion`
+    fi
     major=`echo $libgcrypt_config_version | \
                sed 's/\([[0-9]]*\)\.\([[0-9]]*\)\.\([[0-9]]*\).*/\1/'`
     minor=`echo $libgcrypt_config_version | \
@@ -82,7 +119,11 @@ AC_DEFUN([AM_PATH_LIBGCRYPT],
      # If we have a recent libgcrypt, we should also check that the
      # API is compatible
      if test "$req_libgcrypt_api" -gt 0 ; then
-        tmp=`$LIBGCRYPT_CONFIG --api-version 2>/dev/null || echo 0`
+        if test -z "$use_gpgrt_config"; then
+           tmp=`$LIBGCRYPT_CONFIG --api-version 2>/dev/null || echo 0`
+       else
+           tmp=`$LIBGCRYPT_CONFIG --variable=api_version 2>/dev/null || echo 0`
+       fi
         if test "$tmp" -gt 0 ; then
            AC_MSG_CHECKING([LIBGCRYPT API version])
            if test "$req_libgcrypt_api" -eq "$tmp" ; then
@@ -98,19 +139,22 @@ AC_DEFUN([AM_PATH_LIBGCRYPT],
     LIBGCRYPT_CFLAGS=`$LIBGCRYPT_CONFIG --cflags`
     LIBGCRYPT_LIBS=`$LIBGCRYPT_CONFIG --libs`
     ifelse([$2], , :, [$2])
-    if test x"$host" != x ; then
+    if test -z "$use_gpgrt_config"; then
       libgcrypt_config_host=`$LIBGCRYPT_CONFIG --host 2>/dev/null || echo none`
-      if test x"$libgcrypt_config_host" != xnone ; then
-        if test x"$libgcrypt_config_host" != x"$host" ; then
+    else
+      libgcrypt_config_host=`$LIBGCRYPT_CONFIG --variable=host 2>/dev/null || 
echo none`
+    fi
+    if test x"$libgcrypt_config_host" != xnone ; then
+      if test x"$libgcrypt_config_host" != x"$host" ; then
   AC_MSG_WARN([[
 ***
-*** The config script $LIBGCRYPT_CONFIG was
+*** The config script "$LIBGCRYPT_CONFIG" was
 *** built for $libgcrypt_config_host and thus may not match the
 *** used host $host.
 *** You may want to use the configure option --with-libgcrypt-prefix
-*** to specify a matching config script.
+*** to specify a matching config script or use \$SYSROOT.
 ***]])
-        fi
+        gpg_config_script_warn="$gpg_config_script_warn libgcrypt"
       fi
     fi
   else
diff --git a/grub-core/lib/libgcrypt/src/libgcrypt.pc.in 
b/grub-core/lib/libgcrypt/src/libgcrypt.pc.in
new file mode 100644
index 000000000..325f5c2b6
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/libgcrypt.pc.in
@@ -0,0 +1,18 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+includedir=@includedir@
+libdir=@libdir@
+host=@LIBGCRYPT_CONFIG_HOST@
+api_version=@LIBGCRYPT_CONFIG_API_VERSION@
+symmetric_ciphers="@LIBGCRYPT_CIPHERS@"
+asymmetric_ciphers="@LIBGCRYPT_PUBKEY_CIPHERS@"
+digests="@LIBGCRYPT_DIGESTS@"
+
+Name: libgcrypt
+Description: General purpose cryptographic library
+Requires.private: gpg-error
+Version: @PACKAGE_VERSION@
+Cflags: -I${includedir} @LIBGCRYPT_CONFIG_CFLAGS@
+Libs: -L${libdir} @LIBGCRYPT_CONFIG_LIBS@
+Libs.private: @DL_LIBS@
+URL: https://www.gnupg.org/software/libgcrypt/index.html
diff --git a/grub-core/lib/libgcrypt/src/libgcrypt.vers 
b/grub-core/lib/libgcrypt/src/libgcrypt.vers
index 5a617ccbe..2e274f600 100644
--- a/grub-core/lib/libgcrypt/src/libgcrypt.vers
+++ b/grub-core/lib/libgcrypt/src/libgcrypt.vers
@@ -1,5 +1,5 @@
 # libgcrypt.vers  - What symbols to export                  -*- std -*-
-# Copyright (C) 2002, 2004, 2008 Free Software Foundation, Inc.
+# Copyright (C) 2002, 2004, 2008, 2011 Free Software Foundation, Inc.
 #
 # This file is part of Libgcrypt.
 #
@@ -20,7 +20,7 @@
 # NOTE: When adding new functions, please make sure to add them to
 # visibility.h and libgcrypt.def as well.
 
-GCRYPT_1.2 {
+GCRYPT_1.6 {
   global:
     gcry_check_version; gcry_control;
     gcry_set_allocation_handler; gcry_set_fatalerror_handler;
@@ -39,41 +39,36 @@ GCRYPT_1.2 {
     gcry_md_algo_info; gcry_md_algo_name; gcry_md_close;
     gcry_md_copy; gcry_md_ctl; gcry_md_enable; gcry_md_get;
     gcry_md_get_algo; gcry_md_get_algo_dlen; gcry_md_hash_buffer;
+    gcry_md_hash_buffers;
     gcry_md_info; gcry_md_is_enabled; gcry_md_is_secure;
-    gcry_md_list; gcry_md_map_name; gcry_md_open; gcry_md_read;
-    gcry_md_register; gcry_md_reset; gcry_md_setkey;
-    gcry_md_unregister; gcry_md_write; gcry_md_debug;
+    gcry_md_map_name; gcry_md_open; gcry_md_read; gcry_md_extract;
+    gcry_md_reset; gcry_md_setkey;
+    gcry_md_write; gcry_md_debug;
 
     gcry_cipher_algo_info; gcry_cipher_algo_name; gcry_cipher_close;
     gcry_cipher_ctl; gcry_cipher_decrypt; gcry_cipher_encrypt;
     gcry_cipher_get_algo_blklen; gcry_cipher_get_algo_keylen;
-    gcry_cipher_info; gcry_cipher_list; gcry_cipher_map_name;
+    gcry_cipher_info; gcry_cipher_map_name;
     gcry_cipher_mode_from_oid; gcry_cipher_open;
-    gcry_cipher_register; gcry_cipher_unregister;
     gcry_cipher_setkey; gcry_cipher_setiv; gcry_cipher_setctr;
+    gcry_cipher_authenticate; gcry_cipher_gettag; gcry_cipher_checktag;
+
+    gcry_mac_algo_info; gcry_mac_algo_name; gcry_mac_map_name;
+    gcry_mac_get_algo_maclen; gcry_mac_get_algo_keylen; gcry_mac_get_algo;
+    gcry_mac_open; gcry_mac_close; gcry_mac_setkey; gcry_mac_setiv;
+    gcry_mac_write; gcry_mac_read; gcry_mac_verify; gcry_mac_ctl;
 
     gcry_pk_algo_info; gcry_pk_algo_name; gcry_pk_ctl;
     gcry_pk_decrypt; gcry_pk_encrypt; gcry_pk_genkey;
-    gcry_pk_get_keygrip; gcry_pk_get_nbits; gcry_pk_list;
+    gcry_pk_get_keygrip; gcry_pk_get_nbits;
     gcry_pk_map_name; gcry_pk_register; gcry_pk_sign;
-    gcry_pk_testkey; gcry_pk_unregister; gcry_pk_verify;
+    gcry_pk_testkey; gcry_pk_verify;
     gcry_pk_get_curve; gcry_pk_get_param;
 
-    gcry_ac_data_new; gcry_ac_data_destroy; gcry_ac_data_copy;
-    gcry_ac_data_length; gcry_ac_data_clear; gcry_ac_data_set;
-    gcry_ac_data_get_name; gcry_ac_data_get_index; gcry_ac_open;
-    gcry_ac_close; gcry_ac_key_init; gcry_ac_key_pair_generate;
-    gcry_ac_key_pair_extract; gcry_ac_key_data_get; gcry_ac_key_test;
-    gcry_ac_key_get_nbits; gcry_ac_key_get_grip; gcry_ac_key_destroy;
-    gcry_ac_key_pair_destroy; gcry_ac_data_encrypt; gcry_ac_data_decrypt;
-    gcry_ac_data_sign; gcry_ac_data_verify; gcry_ac_id_to_name;
-    gcry_ac_name_to_id; gcry_ac_list; gcry_ac_data_encode;
-    gcry_ac_data_decode; gcry_ac_mpi_to_os; gcry_ac_mpi_to_os_alloc;
-    gcry_ac_os_to_mpi; gcry_ac_data_encrypt_scheme;
-    gcry_ac_data_decrypt_scheme;
-    gcry_ac_data_sign_scheme; gcry_ac_data_verify_scheme;
-    gcry_ac_data_to_sexp; gcry_ac_data_from_sexp;
-    gcry_ac_io_init; gcry_ac_io_init_va;
+    gcry_pubkey_get_sexp;
+
+    gcry_ecc_get_algo_keylen;
+    gcry_ecc_mul_point;
 
     gcry_kdf_derive;
 
@@ -87,11 +82,12 @@ GCRYPT_1.2 {
     gcry_sexp_build_array; gcry_sexp_cadr; gcry_sexp_canon_len;
     gcry_sexp_car; gcry_sexp_cdr; gcry_sexp_cons; gcry_sexp_create;
     gcry_sexp_dump; gcry_sexp_find_token; gcry_sexp_length;
-    gcry_sexp_new; gcry_sexp_nth; gcry_sexp_nth_data;
+    gcry_sexp_new; gcry_sexp_nth; gcry_sexp_nth_buffer; gcry_sexp_nth_data;
     gcry_sexp_nth_mpi; gcry_sexp_prepend; gcry_sexp_release;
     gcry_sexp_sprint; gcry_sexp_sscan; gcry_sexp_vlist;
-    gcry_sexp_nth_string;
+    gcry_sexp_nth_string; gcry_sexp_extract_param;
 
+    gcry_mpi_is_neg; gcry_mpi_neg; gcry_mpi_abs;
     gcry_mpi_add; gcry_mpi_add_ui; gcry_mpi_addm; gcry_mpi_aprint;
     gcry_mpi_clear_bit; gcry_mpi_clear_flag; gcry_mpi_clear_highbit;
     gcry_mpi_cmp; gcry_mpi_cmp_ui; gcry_mpi_copy; gcry_mpi_div;
@@ -100,11 +96,35 @@ GCRYPT_1.2 {
     gcry_mpi_mul_2exp; gcry_mpi_mul_ui; gcry_mpi_mulm; gcry_mpi_new;
     gcry_mpi_powm; gcry_mpi_print; gcry_mpi_randomize; gcry_mpi_release;
     gcry_mpi_rshift; gcry_mpi_scan; gcry_mpi_set; gcry_mpi_set_bit;
-    gcry_mpi_set_flag; gcry_mpi_set_highbit; gcry_mpi_set_opaque;
+    gcry_mpi_set_flag; gcry_mpi_set_highbit;
+    gcry_mpi_set_opaque; gcry_mpi_set_opaque_copy;
     gcry_mpi_set_ui; gcry_mpi_snew; gcry_mpi_sub; gcry_mpi_sub_ui;
     gcry_mpi_subm; gcry_mpi_swap; gcry_mpi_test_bit;
-    gcry_mpi_lshift;
+    gcry_mpi_lshift; gcry_mpi_snatch;
+    gcry_mpi_point_new; gcry_mpi_point_release;
+    gcry_mpi_point_get; gcry_mpi_point_snatch_get;
+    gcry_mpi_point_set; gcry_mpi_point_snatch_set;
+    gcry_mpi_ec_new;
+    gcry_mpi_ec_get_mpi; gcry_mpi_ec_get_point;
+    gcry_mpi_ec_set_mpi; gcry_mpi_ec_set_point;
+    gcry_mpi_ec_get_affine;
+    gcry_mpi_ec_dup; gcry_mpi_ec_add; gcry_mpi_ec_sub; gcry_mpi_ec_mul;
+    gcry_mpi_ec_curve_point; gcry_mpi_ec_decode_point;
+    gcry_mpi_point_copy;
+    gcry_mpi_get_ui;
+
+    gcry_log_debug;
+    gcry_log_debughex; gcry_log_debugmpi; gcry_log_debugpnt; gcry_log_debugsxp;
+
+    gcry_get_config;
+
+    _gcry_mpi_get_const;
+
+    gcry_ctx_release;
+
+    gcry_pk_hash_sign; gcry_pk_hash_verify; gcry_pk_random_override_new;
 
+    gcry_kdf_open; gcry_kdf_compute; gcry_kdf_final; gcry_kdf_close;
   local:
     *;
 
diff --git a/grub-core/lib/libgcrypt/src/misc.c 
b/grub-core/lib/libgcrypt/src/misc.c
index 17bd54671..4db2d9a4d 100644
--- a/grub-core/lib/libgcrypt/src/misc.c
+++ b/grub-core/lib/libgcrypt/src/misc.c
@@ -19,6 +19,7 @@
  */
 
 #include <config.h>
+#include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -27,9 +28,16 @@
 
 #include "g10lib.h"
 #include "secmem.h"
+#include "mpi.h"
 
 static int verbosity_level = 0;
 
+#ifndef HAVE_EXPLICIT_MEMSET
+/* Prevent compiler from optimizing away the call to memset by accessing
+   memset through volatile pointer. */
+static void *(*volatile memset_ptr)(void *, int, size_t) = (void *)memset;
+#endif
+
 static void (*fatal_error_handler)(void*,int, const char*) = NULL;
 static void *fatal_error_handler_value = 0;
 static void (*log_handler)(void*,int, const char*, va_list) = NULL;
@@ -38,7 +46,7 @@ static void *log_handler_value = 0;
 static const char *(*user_gettext_handler)( const char * ) = NULL;
 
 void
-gcry_set_gettext_handler( const char *(*f)(const char*) )
+_gcry_set_gettext_handler (const char *(*f)(const char*))
 {
     user_gettext_handler = f;
 }
@@ -54,7 +62,7 @@ _gcry_gettext( const char *key )
 }
 
 void
-gcry_set_fatalerror_handler( void (*fnc)(void*,int, const char*), void *value)
+_gcry_set_fatalerror_handler( void (*fnc)(void*,int, const char*), void *value)
 {
     fatal_error_handler_value = value;
     fatal_error_handler = fnc;
@@ -90,8 +98,7 @@ _gcry_fatal_error (int rc, const char *text)
 }
 
 void
-gcry_set_log_handler( void (*f)(void*,int, const char*, va_list ),
-                                                           void *opaque )
+_gcry_set_log_handler (void (*f)(void*,int, const char*, va_list), void 
*opaque)
 {
     log_handler = f;
     log_handler_value = opaque;
@@ -113,7 +120,7 @@ _gcry_log_verbosity( int level )
  * This is our log function which prints all log messages to stderr or
  * using the function defined with gcry_set_log_handler().
  */
-static void
+void
 _gcry_logv( int level, const char *fmt, va_list arg_ptr )
 {
   if (log_handler)
@@ -197,18 +204,6 @@ _gcry_log_info( const char *fmt, ... )
     va_end(arg_ptr);
 }
 
-int
-_gcry_log_info_with_dummy_fp (FILE *fp, const char *fmt, ... )
-{
-    va_list arg_ptr;
-
-    (void)fp;
-    va_start( arg_ptr, fmt ) ;
-    _gcry_logv( GCRY_LOG_INFO, fmt, arg_ptr );
-    va_end(arg_ptr);
-    return 0;
-}
-
 void
 _gcry_log_error( const char *fmt, ... )
 {
@@ -266,33 +261,319 @@ _gcry_log_printf (const char *fmt, ...)
     }
 }
 
+
+/* Helper for _gcry_log_printhex and _gcry_log_printmpi.  */
+static void
+do_printhex (const char *text, const char *text2,
+             const void *buffer, size_t length)
+{
+  int wrap = 0;
+  int cnt = 0;
+
+  if (text && *text)
+    {
+      wrap = 1;
+      log_debug ("%s:%s", text, text2);
+      if (text2[1] == '[' && length && buffer)
+        {
+          /* Start with a new line so that we get nice output for
+             opaque MPIS:
+               "value: [31 bit]"
+               "        01020300"  */
+          log_printf ("\n");
+          text2 = " ";
+          log_debug ("%*s  ", (int)strlen(text), "");
+        }
+    }
+  if (length && buffer)
+    {
+      const unsigned char *p = buffer;
+      for (; length--; p++)
+        {
+          log_printf ("%02x", *p);
+          if (wrap && ++cnt == 32 && length)
+            {
+              cnt = 0;
+              log_printf (" \\\n");
+              log_debug ("%*s %*s",
+                         (int)strlen(text), "", (int)strlen(text2), "");
+            }
+        }
+    }
+  if (text)
+    log_printf ("\n");
+}
+
+
 /* Print a hexdump of BUFFER.  With TEXT of NULL print just the raw
-   dump, with TEXT an empty string, print a trailing linefeed,
-   otherwise print an entire debug line. */
+   dump without any wrappping, with TEXT an empty string, print a
+   trailing linefeed, otherwise print an entire debug line. */
 void
 _gcry_log_printhex (const char *text, const void *buffer, size_t length)
 {
+  do_printhex (text, " ", buffer, length);
+}
+
+
+/* Print MPI in hex notation.  To make clear that the output is an MPI
+   a sign is always printed. With TEXT of NULL print just the raw dump
+   without any wrapping, with TEXT an empty string, print a trailing
+   linefeed, otherwise print an entire debug line. */
+void
+_gcry_log_printmpi (const char *text, gcry_mpi_t mpi)
+{
+  unsigned char *rawmpi;
+  unsigned int rawmpilen;
+  int sign;
+
+  if (!mpi)
+    do_printhex (text? text:" ", " (null)", NULL, 0);
+  else if (mpi_is_opaque (mpi))
+    {
+      unsigned int nbits;
+      const unsigned char *p;
+      char prefix[30];
+
+      p = mpi_get_opaque (mpi, &nbits);
+      snprintf (prefix, sizeof prefix, " [%u bit]", nbits);
+      do_printhex (text? text:" ", prefix, p, (nbits+7)/8);
+    }
+  else
+    {
+      rawmpi = _gcry_mpi_get_buffer (mpi, 0, &rawmpilen, &sign);
+      if (!rawmpi)
+        do_printhex (text? text:" ", " [out of core]", NULL, 0);
+      else
+        {
+          if (!rawmpilen)
+            do_printhex (text, sign? "-":"+", "", 1);
+          else
+            do_printhex (text, sign? "-":"+", rawmpi, rawmpilen);
+          xfree (rawmpi);
+        }
+    }
+}
+
+
+static int
+count_closing_parens (const char *p)
+{
+  int count = 0;
+
+  for (; *p; p++)
+    if (*p == ')')
+      count++;
+    else if (!strchr ("\n \t", *p))
+      return 0;
+
+  return count;
+}
+
+
+/* Print SEXP in human readabale format.  With TEXT of NULL print just the raw
+   dump without any wrappping, with TEXT an empty string, print a
+   trailing linefeed, otherwise print the full debug output. */
+void
+_gcry_log_printsxp (const char *text, gcry_sexp_t sexp)
+{
+  int with_lf = 0;
+
   if (text && *text)
-    log_debug ("%s ", text);
-  if (length)
     {
-      const unsigned char *p = buffer;
-      log_printf ("%02X", *p);
-      for (length--, p++; length--; p++)
-        log_printf (" %02X", *p);
+      if ((with_lf = !!strchr (text, '\n')))
+        log_debug ("%s", text);
+      else
+        log_debug ("%s: ", text);
     }
-  if (text)
+  if (sexp)
+    {
+      int any = 0;
+      int n_closing;
+      char *buf, *pend;
+      const char *p;
+      size_t size;
+
+      size = sexp_sprint (sexp, GCRYSEXP_FMT_ADVANCED, NULL, 0);
+      p = buf = xmalloc (size);
+      sexp_sprint (sexp, GCRYSEXP_FMT_ADVANCED, buf, size);
+
+      do
+        {
+          if (any && !with_lf)
+            log_debug ("%*s  ", text?(int)strlen(text):0, "");
+          else
+            any = 1;
+          pend = strchr (p, '\n');
+          size = pend? (pend - p) : strlen (p);
+          if (with_lf)
+            log_debug ("%.*s", (int)size, p);
+          else
+            log_printf ("%.*s", (int)size, p);
+          if (pend)
+            p = pend + 1;
+          else
+            p += size;
+          n_closing = count_closing_parens (p);
+          if (n_closing)
+            {
+              while (n_closing--)
+                log_printf (")");
+              p = "";
+            }
+          log_printf ("\n");
+        }
+      while (*p);
+      xfree (buf);
+    }
+  else if (text)
     log_printf ("\n");
 }
 
 
+/*
+ * Tokenize STRING using the set of delimiters in DELIM.  Leading
+ * white spaces are removed from all tokens.  The caller must xfree
+ * the result.
+ *
+ * Returns: A malloced and NULL delimited array with the tokens.  On
+ *          memory error NULL is returned and ERRNO is set.
+ */
+char **
+_gcry_strtokenize (const char *string, const char *delim)
+{
+  const char *s;
+  size_t fields;
+  size_t bytes, n;
+  char *buffer;
+  char *p, *px, *pend;
+  char **result;
+  char const ws[] = " \t\v\f\r\n";
+
+  if (!delim)
+    delim = ws;
+
+  /* Count the number of fields.  */
+  for (fields = 1, s = strpbrk (string, delim); s; s = strpbrk (s + 1, delim))
+    fields++;
+  fields++; /* Add one for the terminating NULL.  */
+
+  /* Allocate an array for all fields, a terminating NULL, and space
+     for a copy of the string.  */
+  bytes = fields * sizeof *result;
+  if (bytes / sizeof *result != fields)
+    {
+      gpg_err_set_errno (ENOMEM);
+      return NULL;
+    }
+  n = strlen (string) + 1;
+  bytes += n;
+  if (bytes < n)
+    {
+      gpg_err_set_errno (ENOMEM);
+      return NULL;
+    }
+  result = xtrymalloc (bytes);
+  if (!result)
+    return NULL;
+  buffer = (char*)(result + fields);
+
+  /* Copy and parse the string.  */
+  strcpy (buffer, string);
+  for (n = 0, p = buffer; (pend = strpbrk (p, delim)); p = pend + 1)
+    {
+      *pend = 0;
+      while (strchr (ws, *(byte*)p))
+        p++;
+      for (px = pend - 1; px >= p && strchr (ws, *(byte*)px); px--)
+        *px = 0;
+      result[n++] = p;
+    }
+  while (*p && strchr (ws, *(byte*)p))
+    p++;
+  for (px = p + strlen (p) - 1; px >= p && strchr (ws, *(byte*)px); px--)
+    *px = 0;
+  /* Traling spaces may result in an empty field.  We do not want to
+     store that.  */
+  result[n++] = *p? p : NULL;
+  result[n] = NULL;
+
+  gcry_assert ((char*)(result + n + 1) == buffer);
+
+  return result;
+}
+
+
+void
+_gcry_fast_wipememory (void *ptr, size_t len)
+{
+  /* Note: This function is called from wipememory/wipememory2 only if LEN
+     is large or unknown at compile time. New wipe function alternatives
+     need to be checked before adding to this function. New implementations
+     need to be faster than wipememory/wipememory2 macros in 'g10lib.h'.
+
+     Following implementations were found to have suboptimal performance:
+
+     - [_WIN32/mingw32] SecureZeroMemory; Inline function, equivalent to
+       volatile byte buffer set: while(buflen--) (volatile char *)(buf++)=set;
+   */
+#ifdef HAVE_EXPLICIT_BZERO
+  explicit_bzero (ptr, len);
+#elif defined(HAVE_EXPLICIT_MEMSET)
+  explicit_memset (ptr, 0, len);
+#else
+  memset_ptr (ptr, 0, len);
+#endif
+}
+
+
+void
+_gcry_fast_wipememory2 (void *ptr, int set, size_t len)
+{
+#ifdef HAVE_EXPLICIT_MEMSET
+  explicit_memset (ptr, set, len);
+#else
+#ifdef HAVE_EXPLICIT_BZERO
+  if (set == 0)
+    {
+      explicit_bzero (ptr, len);
+      return;
+    }
+#endif
+
+  memset_ptr (ptr, set, len);
+#endif
+}
+
+
+void NOINLINE_FUNC
+__gcry_burn_stack (unsigned int bytes)
+{
+#ifdef HAVE_VLA
+  /* (bytes == 0 ? 1 : bytes) == (!bytes + bytes) */
+  unsigned int buflen = ((!bytes + bytes) + 63) & ~63;
+  char buf[buflen];
+
+  _gcry_fast_wipememory (buf, buflen);
+#else
+  volatile char buf[64];
+
+  _gcry_fast_wipememory (buf, sizeof buf);
+
+  if (bytes > sizeof buf)
+      _gcry_burn_stack (bytes - sizeof buf);
+#endif
+}
+
+#ifndef HAVE_GCC_ASM_VOLATILE_MEMORY
 void
-_gcry_burn_stack (int bytes)
+__gcry_burn_stack_dummy (void)
 {
-    char buf[64];
+}
+#endif
 
-    wipememory (buf, sizeof buf);
-    bytes -= sizeof buf;
-    if (bytes > 0)
-        _gcry_burn_stack (bytes);
+void
+_gcry_divide_by_zero (void)
+{
+    gpg_err_set_errno (EDOM);
+    _gcry_fatal_error (gpg_err_code_from_errno (errno), "divide by zero");
 }
diff --git a/grub-core/lib/libgcrypt/src/module.c 
b/grub-core/lib/libgcrypt/src/module.c
deleted file mode 100644
index 32f668de4..000000000
--- a/grub-core/lib/libgcrypt/src/module.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/* module.c - Module management for libgcrypt.
- * Copyright (C) 2003, 2008 Free Software Foundation, Inc.
- *
- * This file is part of Libgcrypt.
- *
- * Libgcrypt is free software; you can redistribute it and/or modify
- * it under the terms of the GNU Lesser general Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * Libgcrypt is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <config.h>
-#include <errno.h>
-#include "g10lib.h"
-
-/* Please match these numbers with the allocated algorithm
-   numbers.  */
-#define MODULE_ID_MIN 600
-#define MODULE_ID_LAST 65500
-#define MODULE_ID_USER GCRY_MODULE_ID_USER
-#define MODULE_ID_USER_LAST GCRY_MODULE_ID_USER_LAST
-
-#if MODULE_ID_MIN >= MODULE_ID_USER
-#error Need to implement a different search strategy
-#endif
-
-/* Internal function.  Generate a new, unique module ID for a module
-   that should be inserted into the module chain starting at
-   MODULES.  */
-static gcry_err_code_t
-_gcry_module_id_new (gcry_module_t modules, unsigned int *id_new)
-{
-  unsigned int mod_id;
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  gcry_module_t module;
-
-  /* Search for unused ID.  */
-  for (mod_id = MODULE_ID_MIN; mod_id < MODULE_ID_LAST; mod_id++)
-    {
-      if (mod_id == MODULE_ID_USER)
-        {
-          mod_id = MODULE_ID_USER_LAST;
-          continue;
-        }
-
-      /* Search for a module with the current ID.  */
-      for (module = modules; module; module = module->next)
-       if (mod_id == module->mod_id)
-         break;
-
-      if (! module)
-       /* None found -> the ID is available for use.  */
-       break;
-    }
-
-  if (mod_id < MODULE_ID_LAST)
-    /* Done.  */
-    *id_new = mod_id;
-  else
-    /* No free ID found.  */
-    err = GPG_ERR_INTERNAL;
-
-  return err;
-}
-
-/* Add a module specification to the list ENTRIES.  The new module has
-   it's use-counter set to one.  */
-gcry_err_code_t
-_gcry_module_add (gcry_module_t *entries, unsigned int mod_id,
-                 void *spec, void *extraspec, gcry_module_t *module)
-{
-  gcry_err_code_t err = 0;
-  gcry_module_t entry;
-
-  if (! mod_id)
-    err = _gcry_module_id_new (*entries, &mod_id);
-
-  if (! err)
-    {
-      entry = gcry_malloc (sizeof (struct gcry_module));
-      if (! entry)
-       err = gpg_err_code_from_errno (errno);
-    }
-
-  if (! err)
-    {
-      /* Fill new module entry.  */
-      entry->flags = 0;
-      entry->counter = 1;
-      entry->spec = spec;
-      entry->extraspec = extraspec;
-      entry->mod_id = mod_id;
-
-      /* Link it into the list.  */
-      entry->next = *entries;
-      entry->prevp = entries;
-      if (*entries)
-       (*entries)->prevp = &entry->next;
-      *entries = entry;
-
-      /* And give it to the caller.  */
-      if (module)
-       *module = entry;
-    }
-  return err;
-}
-
-/* Internal function.  Unlink CIPHER_ENTRY from the list of registered
-   ciphers and destroy it.  */
-static void
-_gcry_module_drop (gcry_module_t entry)
-{
-  *entry->prevp = entry->next;
-  if (entry->next)
-    entry->next->prevp = entry->prevp;
-
-  gcry_free (entry);
-}
-
-/* Lookup a module specification by it's ID.  After a successful
-   lookup, the module has it's resource counter incremented.  */
-gcry_module_t
-_gcry_module_lookup_id (gcry_module_t entries, unsigned int mod_id)
-{
-  gcry_module_t entry;
-
-  for (entry = entries; entry; entry = entry->next)
-    if (entry->mod_id == mod_id)
-      {
-       entry->counter++;
-       break;
-      }
-
-  return entry;
-}
-
-/* Lookup a module specification.  After a successful lookup, the
-   module has it's resource counter incremented.  FUNC is a function
-   provided by the caller, which is responsible for identifying the
-   wanted module.  */
-gcry_module_t
-_gcry_module_lookup (gcry_module_t entries, void *data,
-                    gcry_module_lookup_t func)
-{
-  gcry_module_t entry;
-
-  for (entry = entries; entry; entry = entry->next)
-    if ((*func) (entry->spec, data))
-      {
-       entry->counter++;
-       break;
-      }
-
-  return entry;
-}
-
-/* Release a module.  In case the use-counter reaches zero, destroy
-   the module.  Passing MODULE as NULL is a dummy operation (similar
-   to free()). */
-void
-_gcry_module_release (gcry_module_t module)
-{
-  if (module && ! --module->counter)
-    _gcry_module_drop (module);
-}
-
-/* Add a reference to a module.  */
-void
-_gcry_module_use (gcry_module_t module)
-{
-  ++module->counter;
-}
-
-/* If LIST is zero, write the number of modules identified by MODULES
-   to LIST_LENGTH and return.  If LIST is non-zero, the first
-   *LIST_LENGTH algorithm IDs are stored in LIST, which must be of
-   according size.  In case there are less cipher modules than
-   *LIST_LENGTH, *LIST_LENGTH is updated to the correct number.  */
-gcry_err_code_t
-_gcry_module_list (gcry_module_t modules,
-                  int *list, int *list_length)
-{
-  gcry_err_code_t err = GPG_ERR_NO_ERROR;
-  gcry_module_t module;
-  int length, i;
-
-  for (module = modules, length = 0; module; module = module->next, length++);
-
-  if (list)
-    {
-      if (length > *list_length)
-       length = *list_length;
-
-      for (module = modules, i = 0; i < length; module = module->next, i++)
-       list[i] = module->mod_id;
-
-      if (length < *list_length)
-       *list_length = length;
-    }
-  else
-    *list_length = length;
-
-  return err;
-}
diff --git a/grub-core/lib/libgcrypt/src/mpi.h 
b/grub-core/lib/libgcrypt/src/mpi.h
index 5883196b1..9e234eff7 100644
--- a/grub-core/lib/libgcrypt/src/mpi.h
+++ b/grub-core/lib/libgcrypt/src/mpi.h
@@ -69,14 +69,16 @@ struct gcry_mpi
   int sign;           /* Indicates a negative number and is also used
                          for opaque MPIs to store the length.  */
   unsigned int flags; /* Bit 0: Array to be allocated in secure memory space.*/
-                      /* Bit 2: the limb is a pointer to some m_alloced data.*/
+                      /* Bit 2: The limb is a pointer to some m_alloced data.*/
+                      /* Bit 4: Immutable MPI - the MPI may not be modified.  
*/
+                      /* Bit 5: Constant MPI - the MPI will not be freed.  */
   mpi_limb_t *d;      /* Array with the limbs */
 };
 
 #define MPI_NULL NULL
 
 #define mpi_get_nlimbs(a)     ((a)->nlimbs)
-#define mpi_is_neg(a)        ((a)->sign)
+#define mpi_has_sign(a)              ((a)->sign)
 
 /*-- mpiutil.c --*/
 
@@ -104,56 +106,73 @@ struct gcry_mpi
   gcry_mpi_t  _gcry_mpi_copy( gcry_mpi_t a );
 #endif
 
-#define gcry_mpi_copy _gcry_mpi_copy
+void _gcry_mpi_immutable_failed (void);
+#define mpi_immutable_failed() _gcry_mpi_immutable_failed ()
 
-#define mpi_is_opaque(a) ((a) && ((a)->flags&4))
-#define mpi_is_secure(a) ((a) && ((a)->flags&1))
+#define mpi_is_const(a)       ((a)->flags&32)
+#define mpi_is_immutable(a)   ((a)->flags&16)
+#define mpi_is_opaque(a)      ((a) && ((a)->flags&4))
+#define mpi_is_secure(a)      ((a) && ((a)->flags&1))
 #define mpi_clear(a)          _gcry_mpi_clear ((a))
 #define mpi_alloc_like(a)     _gcry_mpi_alloc_like((a))
-#define mpi_set(a,b)          gcry_mpi_set ((a),(b))
-#define mpi_set_ui(a,b)       gcry_mpi_set_ui ((a),(b))
-#define mpi_get_ui(a,b)       _gcry_mpi_get_ui ((a),(b))
+
 #define mpi_alloc_set_ui(a)   _gcry_mpi_alloc_set_ui ((a))
 #define mpi_m_check(a)        _gcry_mpi_m_check ((a))
-#define mpi_swap(a,b)         _gcry_mpi_swap ((a),(b))
-#define mpi_new(n)            _gcry_mpi_new ((n))
-#define mpi_snew(n)           _gcry_mpi_snew ((n))
+#define mpi_const(n)          _gcry_mpi_const ((n))
+#define mpi_swap_cond(a,b,sw)  _gcry_mpi_swap_cond ((a),(b),(sw))
+#define mpi_set_cond(w,u,set)  _gcry_mpi_set_cond ((w),(u),(set))
+#define mpi_set_bit_cond(a,n,set) _gcry_mpi_set_bit_cond ((a),(n),(set))
 
 void _gcry_mpi_clear( gcry_mpi_t a );
+gcry_mpi_t _gcry_mpi_set_cond (gcry_mpi_t w, const gcry_mpi_t u,
+                               unsigned long swap);
 gcry_mpi_t  _gcry_mpi_alloc_like( gcry_mpi_t a );
 gcry_mpi_t  _gcry_mpi_alloc_set_ui( unsigned long u);
-gcry_err_code_t _gcry_mpi_get_ui (gcry_mpi_t w, ulong *u);
-gcry_err_code_t gcry_mpi_get_ui (gcry_mpi_t w, ulong *u);
 void _gcry_mpi_m_check( gcry_mpi_t a );
 void _gcry_mpi_swap( gcry_mpi_t a, gcry_mpi_t b);
+void _gcry_mpi_swap_cond (gcry_mpi_t a, gcry_mpi_t b, unsigned long swap);
+void _gcry_mpi_set_bit_cond (gcry_mpi_t a, unsigned int n, unsigned long set);
 gcry_mpi_t _gcry_mpi_new (unsigned int nbits);
 gcry_mpi_t _gcry_mpi_snew (unsigned int nbits);
+gcry_mpi_t _gcry_mpi_set_opaque_copy (gcry_mpi_t a,
+                                      const void *p, unsigned int nbits);
+void *_gcry_mpi_get_opaque_copy (gcry_mpi_t a, unsigned int *nbits);
+int _gcry_mpi_is_neg (gcry_mpi_t a);
+void _gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u);
+void _gcry_mpi_abs (gcry_mpi_t w);
+
+/* Constants used to return constant MPIs.  See _gcry_mpi_init if you
+   want to add more constants. */
+#define MPI_NUMBER_OF_CONSTANTS 6
+enum gcry_mpi_constants
+  {
+    MPI_C_ZERO,
+    MPI_C_ONE,
+    MPI_C_TWO,
+    MPI_C_THREE,
+    MPI_C_FOUR,
+    MPI_C_EIGHT
+  };
+
+
+gcry_mpi_t _gcry_mpi_const (enum gcry_mpi_constants no);
+
 
 /*-- mpicoder.c --*/
 void  _gcry_log_mpidump( const char *text, gcry_mpi_t a );
 u32   _gcry_mpi_get_keyid( gcry_mpi_t a, u32 *keyid );
-byte *_gcry_mpi_get_buffer( gcry_mpi_t a, unsigned *nbytes, int *sign );
-byte *_gcry_mpi_get_secure_buffer( gcry_mpi_t a, unsigned *nbytes, int *sign );
+byte *_gcry_mpi_get_buffer (gcry_mpi_t a, unsigned int fill_le,
+                            unsigned int *r_nbytes, int *sign);
+byte *_gcry_mpi_get_buffer_extra (gcry_mpi_t a, unsigned int fill_le,
+                                  int extraalloc,
+                                  unsigned int *r_nbytes, int *sign);
+byte *_gcry_mpi_get_secure_buffer (gcry_mpi_t a, unsigned int fill_le,
+                                   unsigned *r_nbytes, int *sign);
 void  _gcry_mpi_set_buffer ( gcry_mpi_t a, const void *buffer,
                              unsigned int nbytes, int sign );
-
-#define log_mpidump _gcry_log_mpidump
-
-/*-- mpi-add.c --*/
-#define mpi_add_ui(w,u,v) gcry_mpi_add_ui((w),(u),(v))
-#define mpi_add(w,u,v)    gcry_mpi_add ((w),(u),(v))
-#define mpi_addm(w,u,v,m) gcry_mpi_addm ((w),(u),(v),(m))
-#define mpi_sub_ui(w,u,v) gcry_mpi_sub_ui ((w),(u),(v))
-#define mpi_sub(w,u,v)    gcry_mpi_sub ((w),(u),(v))
-#define mpi_subm(w,u,v,m) gcry_mpi_subm ((w),(u),(v),(m))
-
-
-/*-- mpi-mul.c --*/
-#define mpi_mul_ui(w,u,v)   gcry_mpi_mul_ui ((w),(u),(v))
-#define mpi_mul_2exp(w,u,v) gcry_mpi_mul_2exp ((w),(u),(v))
-#define mpi_mul(w,u,v)      gcry_mpi_mul ((w),(u),(v))
-#define mpi_mulm(w,u,v,m)   gcry_mpi_mulm ((w),(u),(v),(m))
-
+gpg_err_code_t _gcry_mpi_to_octet_string (unsigned char **r_frame,
+                                          void *space,
+                                          gcry_mpi_t value, size_t nbytes);
 
 /*-- mpi-div.c --*/
 #define mpi_fdiv_r_ui(a,b,c)   _gcry_mpi_fdiv_r_ui((a),(b),(c))
@@ -165,25 +184,22 @@ void  _gcry_mpi_set_buffer ( gcry_mpi_t a, const void 
*buffer,
 #define mpi_tdiv_q_2exp(a,b,c) _gcry_mpi_tdiv_q_2exp((a),(b),(c))
 #define mpi_divisible_ui(a,b)  _gcry_mpi_divisible_ui((a),(b))
 
-ulong _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, ulong divisor 
);
+unsigned long _gcry_mpi_fdiv_r_ui( gcry_mpi_t rem, gcry_mpi_t dividend, 
unsigned long divisor );
 void  _gcry_mpi_fdiv_r( gcry_mpi_t rem, gcry_mpi_t dividend, gcry_mpi_t 
divisor );
 void  _gcry_mpi_fdiv_q( gcry_mpi_t quot, gcry_mpi_t dividend, gcry_mpi_t 
divisor );
 void  _gcry_mpi_fdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t dividend, 
gcry_mpi_t divisor );
 void  _gcry_mpi_tdiv_r( gcry_mpi_t rem, gcry_mpi_t num, gcry_mpi_t den);
 void  _gcry_mpi_tdiv_qr( gcry_mpi_t quot, gcry_mpi_t rem, gcry_mpi_t num, 
gcry_mpi_t den);
 void  _gcry_mpi_tdiv_q_2exp( gcry_mpi_t w, gcry_mpi_t u, unsigned count );
-int   _gcry_mpi_divisible_ui(gcry_mpi_t dividend, ulong divisor );
+int   _gcry_mpi_divisible_ui(gcry_mpi_t dividend, unsigned long divisor );
 
 
 /*-- mpi-mod.c --*/
-#define mpi_mod(r,a,m)            _gcry_mpi_mod ((r), (a), (m))
 #define mpi_barrett_init(m,f)     _gcry_mpi_barrett_init ((m),(f))
 #define mpi_barrett_free(c)       _gcry_mpi_barrett_free ((c))
 #define mpi_mod_barrett(r,a,c)    _gcry_mpi_mod_barrett ((r), (a), (c))
 #define mpi_mul_barrett(r,u,v,c)  _gcry_mpi_mul_barrett ((r), (u), (v), (c))
 
-void _gcry_mpi_mod (gcry_mpi_t r, gcry_mpi_t dividend, gcry_mpi_t divisor);
-
 /* Context used with Barrett reduction.  */
 struct barrett_ctx_s;
 typedef struct barrett_ctx_s *mpi_barrett_t;
@@ -195,19 +211,10 @@ void _gcry_mpi_mul_barrett (gcry_mpi_t w, gcry_mpi_t u, 
gcry_mpi_t v,
                             mpi_barrett_t ctx);
 
 
-
-/*-- mpi-gcd.c --*/
-
 /*-- mpi-mpow.c --*/
 #define mpi_mulpowm(a,b,c,d) _gcry_mpi_mulpowm ((a),(b),(c),(d))
 void _gcry_mpi_mulpowm( gcry_mpi_t res, gcry_mpi_t *basearray, gcry_mpi_t 
*exparray, gcry_mpi_t mod);
 
-/*-- mpi-cmp.c --*/
-#define mpi_cmp_ui(a,b) gcry_mpi_cmp_ui ((a),(b))
-#define mpi_cmp(a,b)    gcry_mpi_cmp ((a),(b))
-int gcry_mpi_cmp_ui( gcry_mpi_t u, ulong v );
-int gcry_mpi_cmp( gcry_mpi_t u, gcry_mpi_t v );
-
 /*-- mpi-scan.c --*/
 #define mpi_trailing_zeros(a) _gcry_mpi_trailing_zeros ((a))
 int      _gcry_mpi_getbyte( gcry_mpi_t a, unsigned idx );
@@ -216,50 +223,104 @@ unsigned _gcry_mpi_trailing_zeros( gcry_mpi_t a );
 
 /*-- mpi-bit.c --*/
 #define mpi_normalize(a)       _gcry_mpi_normalize ((a))
-#define mpi_get_nbits(a)       gcry_mpi_get_nbits ((a))
-#define mpi_test_bit(a,b)      gcry_mpi_test_bit ((a),(b))
-#define mpi_set_bit(a,b)       gcry_mpi_set_bit ((a),(b))
-#define mpi_set_highbit(a,b)   gcry_mpi_set_highbit ((a),(b))
-#define mpi_clear_bit(a,b)     gcry_mpi_clear_bit ((a),(b))
-#define mpi_clear_highbit(a,b) gcry_mpi_clear_highbit ((a),(b))
-#define mpi_rshift(a,b,c)      gcry_mpi_rshift ((a),(b),(c))
-#define mpi_lshift(a,b,c)      gcry_mpi_lshift ((a),(b),(c))
 
 void _gcry_mpi_normalize( gcry_mpi_t a );
 
-/*-- mpi-inv.c --*/
-#define mpi_invm(a,b,c) gcry_mpi_invm ((a),(b),(c))
-
 /*-- ec.c --*/
 
 /* Object to represent a point in projective coordinates. */
-struct mpi_point_s;
-typedef struct mpi_point_s mpi_point_t;
-struct mpi_point_s
+struct gcry_mpi_point
 {
   gcry_mpi_t x;
   gcry_mpi_t y;
   gcry_mpi_t z;
 };
-
-/* Context used with elliptic curve functions.  */
-struct mpi_ec_ctx_s;
-typedef struct mpi_ec_ctx_s *mpi_ec_t;
-
-void _gcry_mpi_ec_point_init (mpi_point_t *p);
-void _gcry_mpi_ec_point_free (mpi_point_t *p);
-mpi_ec_t _gcry_mpi_ec_init (gcry_mpi_t p, gcry_mpi_t a);
+typedef struct gcry_mpi_point mpi_point_struct;
+typedef struct gcry_mpi_point *mpi_point_t;
+
+void _gcry_mpi_point_init (mpi_point_t p);
+void _gcry_mpi_point_free_parts (mpi_point_t p);
+void _gcry_mpi_get_point (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                          mpi_point_t point);
+void _gcry_mpi_snatch_point (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                             mpi_point_t point);
+
+
+/* Models describing an elliptic curve.  */
+enum gcry_mpi_ec_models
+  {
+    /* The Short Weierstrass equation is
+          y^2 = x^3 + ax + b
+     */
+    MPI_EC_WEIERSTRASS = 0,
+    /* The Montgomery equation is
+          by^2 = x^3 + ax^2 + x
+     */
+    MPI_EC_MONTGOMERY,
+    /* The Twisted Edwards equation is
+          ax^2 + y^2 = 1 + bx^2y^2
+       Note that we use 'b' instead of the commonly used 'd'.  */
+    MPI_EC_EDWARDS
+  };
+
+/* Dialects used with elliptic curves.  It is easier to keep the
+   definition here than in ecc-common.h. */
+enum ecc_dialects
+  {
+    ECC_DIALECT_STANDARD = 0,
+    ECC_DIALECT_ED25519,
+    ECC_DIALECT_SAFECURVE
+  };
+
+
+void _gcry_mpi_point_log (const char *name, mpi_point_t point, mpi_ec_t ctx);
+#define log_printpnt(a,p,c) _gcry_mpi_point_log ((a), (p), (c))
+
+mpi_ec_t _gcry_mpi_ec_p_internal_new (enum gcry_mpi_ec_models model,
+                                      enum ecc_dialects dialect,
+                                      int flags,
+                                      gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t 
b);
+gpg_err_code_t _gcry_mpi_ec_p_new (gcry_ctx_t *r_ctx,
+                                   enum gcry_mpi_ec_models model,
+                                   enum ecc_dialects dialect,
+                                   int flags,
+                                   gcry_mpi_t p, gcry_mpi_t a, gcry_mpi_t b);
 void _gcry_mpi_ec_free (mpi_ec_t ctx);
-int _gcry_mpi_ec_get_affine (gcry_mpi_t x, gcry_mpi_t y, mpi_point_t *point,
-                             mpi_ec_t ctx);
-void _gcry_mpi_ec_dup_point (mpi_point_t *result,
-                             mpi_point_t *point, mpi_ec_t ctx);
-void _gcry_mpi_ec_add_points (mpi_point_t *result,
-                              mpi_point_t *p1, mpi_point_t *p2,
+
+void _gcry_mpi_ec_dup_point (mpi_point_t result,
+                             mpi_point_t point, mpi_ec_t ctx);
+void _gcry_mpi_ec_add_points (mpi_point_t result,
+                              mpi_point_t p1, mpi_point_t p2,
+                              mpi_ec_t ctx);
+void _gcry_mpi_ec_sub_points (mpi_point_t result,
+                              mpi_point_t p1, mpi_point_t p2,
                               mpi_ec_t ctx);
-void _gcry_mpi_ec_mul_point (mpi_point_t *result,
-                             gcry_mpi_t scalar, mpi_point_t *point,
+void _gcry_mpi_ec_mul_point (mpi_point_t result,
+                             gcry_mpi_t scalar, mpi_point_t point,
                              mpi_ec_t ctx);
+int  _gcry_mpi_ec_curve_point (gcry_mpi_point_t point, mpi_ec_t ctx);
+int _gcry_mpi_ec_bad_point (gcry_mpi_point_t point, mpi_ec_t ctx);
+
+gcry_mpi_t _gcry_mpi_ec_ec2os (gcry_mpi_point_t point, mpi_ec_t ectx);
+
+gcry_mpi_t _gcry_mpi_ec_get_mpi (const char *name, gcry_ctx_t ctx, int copy);
+gcry_mpi_point_t _gcry_mpi_ec_get_point (const char *name,
+                                         gcry_ctx_t ctx, int copy);
+gpg_err_code_t _gcry_mpi_ec_set_mpi (const char *name, gcry_mpi_t newvalue,
+                                     gcry_ctx_t ctx);
+gpg_err_code_t _gcry_mpi_ec_set_point (const char *name,
+                                       gcry_mpi_point_t newvalue,
+                                       gcry_ctx_t ctx);
+gpg_err_code_t _gcry_mpi_ec_decode_point (mpi_point_t result,
+                                          gcry_mpi_t value, mpi_ec_t ec);
+
+/*-- ecc-curves.c --*/
+gpg_err_code_t _gcry_mpi_ec_new (gcry_ctx_t *r_ctx,
+                                 gcry_sexp_t keyparam, const char *curvename);
+gpg_err_code_t _gcry_mpi_ec_internal_new (mpi_ec_t *r_ec, int *r_flags,
+                                          const char *name_op,
+                                          gcry_sexp_t keyparam,
+                                          const char *curvename);
 
 
 
diff --git a/grub-core/lib/libgcrypt/src/mpicalc.c 
b/grub-core/lib/libgcrypt/src/mpicalc.c
new file mode 100644
index 000000000..ca413cf41
--- /dev/null
+++ b/grub-core/lib/libgcrypt/src/mpicalc.c
@@ -0,0 +1,648 @@
+/* mpicalc.c - Simple RPN calculator using gcry_mpi functions
+ * Copyright (C) 1997, 1998, 1999, 2004, 2006, 2013  Werner Koch
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+   This program is a simple RPN calculator which was originally used
+   to develop the mpi functions of GnuPG.  Values must be given in
+   hex.  Operation is like dc(1) except that the input/output radix is
+   always 16 and you can use a '-' to prefix a negative number.
+   Addition operators: ++ and --.  All operators must be delimited by
+   a blank.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#ifdef _GCRYPT_IN_LIBGCRYPT
+# undef _GCRYPT_IN_LIBGCRYPT
+# include "gcrypt.h"
+#else
+# include <gcrypt.h>
+#endif
+
+
+#define MPICALC_VERSION "2.0"
+#define NEED_LIBGCRYPT_VERSION "1.6.0"
+
+#define STACKSIZE  500
+static gcry_mpi_t stack[STACKSIZE];
+static int stackidx;
+
+
+static int
+scan_mpi (gcry_mpi_t retval, const char *string)
+{
+  gpg_error_t err;
+  gcry_mpi_t val;
+
+  err = gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL);
+  if (err)
+    {
+      fprintf (stderr, "scanning input failed: %s\n", gpg_strerror (err));
+      return -1;
+    }
+  mpi_set (retval, val);
+  mpi_release (val);
+  return 0;
+}
+
+
+static void
+print_mpi (gcry_mpi_t a)
+{
+  gpg_error_t err;
+  char *buf;
+  void *bufaddr = &buf;
+
+  err = gcry_mpi_aprint (GCRYMPI_FMT_HEX, bufaddr, NULL, a);
+  if (err)
+    fprintf (stderr, "[error printing number: %s]\n", gpg_strerror (err));
+  else
+    {
+      fputs (buf, stdout);
+      gcry_free (buf);
+    }
+}
+
+
+
+static void
+do_add (int usemod)
+{
+  if (stackidx < (usemod?3:2))
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  if (usemod)
+    {
+      mpi_addm (stack[stackidx - 3], stack[stackidx - 3],
+                stack[stackidx - 2], stack[stackidx - 1]);
+      stackidx--;
+    }
+  else
+    mpi_add (stack[stackidx - 2], stack[stackidx - 2], stack[stackidx - 1]);
+  stackidx--;
+}
+
+static void
+do_sub (int usemod)
+{
+  if (stackidx < (usemod?3:2))
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  if (usemod)
+    {
+      mpi_subm (stack[stackidx - 3], stack[stackidx - 3],
+                stack[stackidx - 2], stack[stackidx - 1]);
+      stackidx--;
+    }
+  else
+    mpi_sub (stack[stackidx - 2], stack[stackidx - 2], stack[stackidx - 1]);
+  stackidx--;
+}
+
+static void
+do_inc (void)
+{
+  if (stackidx < 1)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  mpi_add_ui (stack[stackidx - 1], stack[stackidx - 1], 1);
+}
+
+static void
+do_dec (void)
+{
+  if (stackidx < 1)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  /* mpi_sub_ui( stack[stackidx-1], stack[stackidx-1], 1 ); */
+}
+
+static void
+do_mul (void)
+{
+  if (stackidx < 2)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  mpi_mul (stack[stackidx - 2], stack[stackidx - 2], stack[stackidx - 1]);
+  stackidx--;
+}
+
+static void
+do_mulm (void)
+{
+  if (stackidx < 3)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  mpi_mulm (stack[stackidx - 3], stack[stackidx - 3],
+           stack[stackidx - 2], stack[stackidx - 1]);
+  stackidx -= 2;
+}
+
+static void
+do_div (void)
+{
+  if (stackidx < 2)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  mpi_fdiv (stack[stackidx - 2], NULL,
+            stack[stackidx - 2], stack[stackidx - 1]);
+  stackidx--;
+}
+
+static void
+do_rem (void)
+{
+  if (stackidx < 2)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  mpi_mod (stack[stackidx - 2],
+           stack[stackidx - 2], stack[stackidx - 1]);
+  stackidx--;
+}
+
+static void
+do_powm (void)
+{
+  gcry_mpi_t a;
+  if (stackidx < 3)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  a = mpi_new (0);
+  mpi_powm (a, stack[stackidx - 3], stack[stackidx - 2], stack[stackidx - 1]);
+  mpi_release (stack[stackidx - 3]);
+  stack[stackidx - 3] = a;
+  stackidx -= 2;
+}
+
+static void
+do_inv (void)
+{
+  gcry_mpi_t a;
+
+  if (stackidx < 2)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  a = mpi_new (0);
+  mpi_invm (a, stack[stackidx - 2], stack[stackidx - 1]);
+  mpi_set (stack[stackidx - 2], a);
+  mpi_release (a);
+  stackidx--;
+}
+
+static void
+do_gcd (void)
+{
+  gcry_mpi_t a;
+
+  if (stackidx < 2)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  a = mpi_new (0);
+  mpi_gcd (a, stack[stackidx - 2], stack[stackidx - 1]);
+  mpi_set (stack[stackidx - 2], a);
+  mpi_release (a);
+  stackidx--;
+}
+
+static void
+do_lshift (void)
+{
+  if (stackidx < 1)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  mpi_lshift (stack[stackidx - 1], stack[stackidx - 1], 1);
+}
+
+static void
+do_rshift (void)
+{
+  if (stackidx < 1)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  mpi_rshift (stack[stackidx - 1], stack[stackidx - 1], 1);
+}
+
+static void
+do_nbits (void)
+{
+  unsigned int n;
+
+  if (stackidx < 1)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  n = mpi_get_nbits (stack[stackidx - 1]);
+  mpi_set_ui (stack[stackidx - 1], n);
+}
+
+
+static void
+do_primecheck (void)
+{
+  gpg_error_t err;
+
+  if (stackidx < 1)
+    {
+      fputs ("stack underflow\n", stderr);
+      return;
+    }
+  err = gcry_prime_check (stack[stackidx - 1], 0);
+  mpi_set_ui (stack[stackidx - 1], !err);
+  if (err && gpg_err_code (err) != GPG_ERR_NO_PRIME)
+    fprintf (stderr, "checking prime failed: %s\n", gpg_strerror (err));
+}
+
+
+static int
+my_getc (void)
+{
+  static int shown;
+  int c;
+
+  for (;;)
+    {
+      if ((c = getc (stdin)) == EOF)
+        return EOF;
+      if (!(c & 0x80))
+        return c;
+
+      if (!shown)
+        {
+          shown = 1;
+          fputs ("note: Non ASCII characters are ignored\n", stderr);
+        }
+    }
+}
+
+
+static void
+print_help (void)
+{
+  fputs ("+   add           [0] := [1] + [0]          {-1}\n"
+         "-   subtract      [0] := [1] - [0]          {-1}\n"
+         "*   multiply      [0] := [1] * [0]          {-1}\n"
+         "/   divide        [0] := [1] / [0]          {-1}\n"
+         "%   modulo        [0] := [1] % [0]          {-1}\n"
+         "<   left shift    [0] := [0] << 1           {0}\n"
+         ">   right shift   [0] := [0] >> 1           {0}\n"
+         "++  increment     [0] := [0]++              {0}\n"
+         "--  decrement     [0] := [0]--              {0}\n"
+         "m   multiply mod  [0] := [2] * [1] mod [0]  {-2}\n"
+         "^   power mod     [0] := [2] ^ [1] mod [0]  {-2}\n"
+         "I   inverse mod   [0] := [1]^-1 mod [0]     {-1}\n"
+         "G   gcd           [0] := gcd([1],[0])       {-1}\n"
+         "i   remove item   [0] := [1]                {-1}\n"
+         "d   dup item      [-1] := [0]               {+1}\n"
+         "r   reverse       [0] := [1], [1] := [0]    {0}\n"
+         "b   # of bits     [0] := nbits([0])         {0}\n"
+         "P   prime check   [0] := is_prime([0])?1:0  {0}\n"
+         "M   use mod for next '+' and '-'\n"
+         "c   clear stack\n"
+         "p   print top item\n"
+         "f   print the stack\n"
+         "#   ignore until end of line\n"
+         "?   print this help\n"
+         , stdout);
+}
+
+
+
+int
+main (int argc, char **argv)
+{
+  const char *pgm;
+  int last_argc = -1;
+  int print_config = 0;
+  int i, c;
+  int state = 0;
+  char strbuf[4096];
+  int stridx = 0;
+  int usemod = 0;
+
+  if (argc)
+    {
+      pgm = strrchr (*argv, '/');
+      if (pgm)
+        pgm++;
+      else
+        pgm = *argv;
+      argc--; argv++;
+    }
+  else
+    pgm = "?";
+
+  while (argc && last_argc != argc )
+    {
+      last_argc = argc;
+      if (!strcmp (*argv, "--"))
+        {
+          argc--; argv++;
+          break;
+        }
+      else if (!strcmp (*argv, "--version")
+               || !strcmp (*argv, "--help"))
+        {
+          printf ("%s " MPICALC_VERSION "\n"
+                  "libgcrypt %s\n"
+                  "Copyright (C) 1997, 2013  Werner Koch\n"
+                  "License LGPLv2.1+: GNU LGPL version 2.1 or later "
+                  "<http://gnu.org/licenses/old-licenses/lgpl-2.1.html>\n"
+                  "This is free software: you are free to change and "
+                  "redistribute it.\n"
+                  "There is NO WARRANTY, to the extent permitted by law.\n"
+                  "\n"
+                  "Syntax: mpicalc [options]\n"
+                  "Simple interactive big integer RPN calculator\n"
+                  "\n"
+                  "Options:\n"
+                  "  --version           print version information\n"
+                  "  --print-config      print the Libgcrypt config\n"
+                  "  --disable-hwf NAME  disable feature NAME\n",
+                  pgm, gcry_check_version (NULL));
+          exit (0);
+        }
+      else if (!strcmp (*argv, "--print-config"))
+        {
+          argc--; argv++;
+          print_config = 1;
+        }
+      else if (!strcmp (*argv, "--disable-hwf"))
+        {
+          argc--; argv++;
+          if (argc)
+            {
+              if (gcry_control (GCRYCTL_DISABLE_HWF, *argv, NULL))
+                fprintf (stderr, "%s: unknown hardware feature `%s'"
+                         " - option ignored\n", pgm, *argv);
+              argc--; argv++;
+            }
+        }
+    }
+
+  if (argc)
+    {
+      fprintf (stderr, "usage: %s [options]  (--help for help)\n", pgm);
+      exit (1);
+    }
+
+  if (!gcry_check_version (NEED_LIBGCRYPT_VERSION))
+    {
+      fprintf (stderr, "%s: Libgcrypt is too old (need %s, have %s)\n",
+               pgm, NEED_LIBGCRYPT_VERSION, gcry_check_version (NULL) );
+      exit (1);
+    }
+  gcry_control (GCRYCTL_DISABLE_SECMEM, 0);
+  gcry_control (GCRYCTL_INITIALIZATION_FINISHED, 0);
+  if (print_config)
+    {
+      gcry_control (GCRYCTL_PRINT_CONFIG, stdout);
+      exit (0);
+    }
+
+  for (i = 0; i < STACKSIZE; i++)
+    stack[i] = NULL;
+  stackidx = 0;
+
+  while ((c = my_getc ()) != EOF)
+    {
+      if (!state) /* waiting */
+       {
+         if (isdigit (c))
+           {
+             state = 1;
+             ungetc (c, stdin);
+             strbuf[0] = '0';
+             strbuf[1] = 'x';
+             stridx = 2;
+           }
+         else if (isspace (c))
+           ;
+         else
+           {
+             switch (c)
+               {
+                case '#':
+                  state = 2;
+                  break;
+               case '+':
+                 if ((c = my_getc ()) == '+')
+                   do_inc ();
+                 else
+                   {
+                     ungetc (c, stdin);
+                     do_add (usemod);
+                      usemod = 0;
+                   }
+                 break;
+                case '-':
+                 if ((c = my_getc ()) == '-')
+                   do_dec ();
+                 else if (isdigit (c)
+                           || (c >= 'A' && c <= 'F')
+                           || (c >= 'a' && c <= 'f'))
+                   {
+                     state = 1;
+                     ungetc (c, stdin);
+                     strbuf[0] = '-';
+                     strbuf[1] = '0';
+                     strbuf[2] = 'x';
+                     stridx = 3;
+                   }
+                 else
+                   {
+                     ungetc (c, stdin);
+                     do_sub (usemod);
+                      usemod = 0;
+                   }
+                 break;
+               case '*':
+                 do_mul ();
+                 break;
+               case 'm':
+                 do_mulm ();
+                 break;
+               case '/':
+                 do_div ();
+                 break;
+               case '%':
+                 do_rem ();
+                 break;
+               case '^':
+                 do_powm ();
+                 break;
+               case '<':
+                 do_lshift ();
+                 break;
+               case '>':
+                 do_rshift ();
+                 break;
+               case 'I':
+                 do_inv ();
+                 break;
+               case 'G':
+                 do_gcd ();
+                 break;
+               case 'i':       /* dummy */
+                 if (!stackidx)
+                   fputs ("stack underflow\n", stderr);
+                 else
+                   {
+                     mpi_release (stack[stackidx - 1]);
+                     stackidx--;
+                   }
+                 break;
+               case 'd':       /* duplicate the tos */
+                 if (!stackidx)
+                   fputs ("stack underflow\n", stderr);
+                 else if (stackidx < STACKSIZE)
+                   {
+                     mpi_release (stack[stackidx]);
+                     stack[stackidx] = mpi_copy (stack[stackidx - 1]);
+                     stackidx++;
+                   }
+                 else
+                   fputs ("stack overflow\n", stderr);
+                 break;
+               case 'r':       /* swap top elements */
+                 if (stackidx < 2)
+                   fputs ("stack underflow\n", stderr);
+                 else if (stackidx < STACKSIZE)
+                   {
+                     gcry_mpi_t tmp = stack[stackidx-1];
+                      stack[stackidx-1] = stack[stackidx - 2];
+                      stack[stackidx-2] = tmp;
+                   }
+                 break;
+                case 'b':
+                  do_nbits ();
+                  break;
+                case 'P':
+                  do_primecheck ();
+                  break;
+                case 'M':
+                  usemod = 1;
+                  break;
+               case 'c':
+                 for (i = 0; i < stackidx; i++)
+                    {
+                      mpi_release (stack[i]); stack[i] = NULL;
+                    }
+                 stackidx = 0;
+                 break;
+               case 'p':       /* print the tos */
+                 if (!stackidx)
+                   puts ("stack is empty");
+                 else
+                   {
+                     print_mpi (stack[stackidx - 1]);
+                     putchar ('\n');
+                   }
+                 break;
+               case 'f':       /* print the stack */
+                 for (i = stackidx - 1; i >= 0; i--)
+                   {
+                     printf ("[%2d]: ", i);
+                     print_mpi (stack[i]);
+                     putchar ('\n');
+                   }
+                 break;
+                case '?':
+                  print_help ();
+                  break;
+               default:
+                 fputs ("invalid operator\n", stderr);
+               }
+           }
+       }
+      else if (state == 1) /* In a number. */
+       {
+         if (!isxdigit (c))
+           {
+              /* Store the number */
+             state = 0;
+             ungetc (c, stdin);
+             if (stridx < sizeof strbuf)
+               strbuf[stridx] = 0;
+
+             if (stackidx < STACKSIZE)
+               {
+                 if (!stack[stackidx])
+                   stack[stackidx] = mpi_new (0);
+                 if (scan_mpi (stack[stackidx], strbuf))
+                   fputs ("invalid number\n", stderr);
+                 else
+                   stackidx++;
+               }
+             else
+               fputs ("stack overflow\n", stderr);
+           }
+         else
+           { /* Store a digit.  */
+             if (stridx < sizeof strbuf - 1)
+               strbuf[stridx++] = c;
+             else if (stridx == sizeof strbuf - 1)
+               {
+                 strbuf[stridx] = 0;
+                 fputs ("input too large - truncated\n", stderr);
+                 stridx++;
+               }
+           }
+       }
+      else if (state == 2) /* In a comment. */
+        {
+          if (c == '\n')
+            state = 0;
+        }
+
+    }
+
+  for (i = 0; i < stackidx; i++)
+    mpi_release (stack[i]);
+  return 0;
+}
diff --git a/grub-core/lib/libgcrypt/src/secmem.c 
b/grub-core/lib/libgcrypt/src/secmem.c
index 2beb234ac..b36c44f6d 100644
--- a/grub-core/lib/libgcrypt/src/secmem.c
+++ b/grub-core/lib/libgcrypt/src/secmem.c
@@ -1,6 +1,7 @@
 /* secmem.c  - memory allocation from a secure heap
  * Copyright (C) 1998, 1999, 2000, 2001, 2002,
  *               2003, 2007 Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2016 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -36,7 +37,6 @@
 #endif
 #endif
 
-#include "ath.h"
 #include "g10lib.h"
 #include "secmem.h"
 
@@ -59,35 +59,55 @@ typedef struct memblock
 /* This flag specifies that the memory block is in use.  */
 #define MB_FLAG_ACTIVE (1 << 0)
 
-/* The pool of secure memory.  */
-static void *pool;
+/* An object describing a memory pool.  */
+typedef struct pooldesc_s
+{
+  /* A link to the next pool.  This is used to connect the overflow
+   * pools.  */
+  struct pooldesc_s * volatile next;
+
+  /* A memory buffer used as allocation pool.  */
+  void *mem;
+
+  /* The allocated size of MEM. */
+  size_t size;
 
-/* Size of POOL in bytes.  */
-static size_t pool_size;
+  /* Flag indicating that this memory pool is ready for use.  May be
+   * checked in an atexit function.  */
+  volatile int okay;
 
-/* True, if the memory pool is ready for use.  May be checked in an
-   atexit function.  */
-static volatile int pool_okay;
+  /* Flag indicating whether MEM is mmapped.  */
+  volatile int is_mmapped;
 
-/* True, if the memory pool is mmapped.  */
-static volatile int pool_is_mmapped;
+  /* The number of allocated bytes and the number of used blocks in
+   * this pool.  */
+  unsigned int cur_alloced, cur_blocks;
+} pooldesc_t;
 
-/* FIXME?  */
+
+/* The pool of secure memory.  This is the head of a linked list with
+ * the first element being the standard mlock-ed pool and the
+ * following elements being the overflow pools. */
+static pooldesc_t mainpool;
+
+
+/* A couple of flags with some being set early.  */
 static int disable_secmem;
 static int show_warning;
 static int not_locked;
 static int no_warning;
 static int suspend_warning;
+static int no_mlock;
+static int no_priv_drop;
+static unsigned int auto_expand;
 
-/* Stats.  */
-static unsigned int cur_alloced, cur_blocks;
 
-/* Lock protecting accesses to the memory pool.  */
-static ath_mutex_t secmem_lock;
+/* Lock protecting accesses to the memory pools.  */
+GPGRT_LOCK_DEFINE (secmem_lock);
 
 /* Convenient macros.  */
-#define SECMEM_LOCK   ath_mutex_lock   (&secmem_lock)
-#define SECMEM_UNLOCK ath_mutex_unlock (&secmem_lock)
+#define SECMEM_LOCK   gpgrt_lock_lock   (&secmem_lock)
+#define SECMEM_UNLOCK gpgrt_lock_unlock (&secmem_lock)
 
 /* The size of the memblock structure; this does not include the
    memory that is available to the user.  */
@@ -96,48 +116,77 @@ static ath_mutex_t secmem_lock;
 
 /* Convert an address into the according memory block structure.  */
 #define ADDR_TO_BLOCK(addr) \
-  (memblock_t *) ((char *) addr - BLOCK_HEAD_SIZE)
+  (memblock_t *) (void *) ((char *) addr - BLOCK_HEAD_SIZE)
 
-/* Check whether P points into the pool.  */
-static int
-ptr_into_pool_p (const void *p)
+/* Prototypes. */
+static void secmem_dump_stats_internal (int extended);
+
+
+/*
+ * Functions
+ */
+
+/* Memory barrier */
+static inline void
+memory_barrier(void)
+{
+#ifdef HAVE_SYNC_SYNCHRONIZE
+#ifdef HAVE_GCC_ASM_VOLATILE_MEMORY
+  asm volatile ("":::"memory");
+#endif
+  /* Use GCC / clang intrinsic for memory barrier. */
+  __sync_synchronize();
+#else
+  /* Slow portable alternative, implement memory barrier by using mutex. */
+  gpgrt_lock_t tmp;
+  memset (&tmp, 0, sizeof(tmp));
+  gpgrt_lock_init (&tmp);
+  gpgrt_lock_lock (&tmp);
+  gpgrt_lock_unlock (&tmp);
+  gpgrt_lock_destroy (&tmp);
+#endif
+}
+
+
+/* Check whether P points into POOL.  */
+static inline int
+ptr_into_pool_p (pooldesc_t *pool, const void *p)
 {
   /* We need to convert pointers to addresses.  This is required by
-     C-99 6.5.8 to avoid undefined behaviour.  Using size_t is at
-     least only implementation defined.  See also
+     C-99 6.5.8 to avoid undefined behaviour.  See also
      http://lists.gnupg.org/pipermail/gcrypt-devel/2007-February/001102.html
   */
-  size_t p_addr = (size_t)p;
-  size_t pool_addr = (size_t)pool;
+  uintptr_t p_addr    = (uintptr_t)p;
+  uintptr_t pool_addr = (uintptr_t)pool->mem;
 
-  return p_addr >= pool_addr && p_addr <  pool_addr+pool_size;
+  return p_addr >= pool_addr && p_addr <  pool_addr + pool->size;
 }
 
 /* Update the stats.  */
 static void
-stats_update (size_t add, size_t sub)
+stats_update (pooldesc_t *pool, size_t add, size_t sub)
 {
   if (add)
     {
-      cur_alloced += add;
-      cur_blocks++;
+      pool->cur_alloced += add;
+      pool->cur_blocks++;
     }
   if (sub)
     {
-      cur_alloced -= sub;
-      cur_blocks--;
+      pool->cur_alloced -= sub;
+      pool->cur_blocks--;
     }
 }
 
 /* Return the block following MB or NULL, if MB is the last block.  */
 static memblock_t *
-mb_get_next (memblock_t *mb)
+mb_get_next (pooldesc_t *pool, memblock_t *mb)
 {
   memblock_t *mb_next;
 
-  mb_next = (memblock_t *) ((char *) mb + BLOCK_HEAD_SIZE + mb->size);
+  mb_next = (memblock_t *) (void *) ((char *) mb + BLOCK_HEAD_SIZE + mb->size);
 
-  if (! ptr_into_pool_p (mb_next))
+  if (! ptr_into_pool_p (pool, mb_next))
     mb_next = NULL;
 
   return mb_next;
@@ -146,18 +195,18 @@ mb_get_next (memblock_t *mb)
 /* Return the block preceding MB or NULL, if MB is the first
    block.  */
 static memblock_t *
-mb_get_prev (memblock_t *mb)
+mb_get_prev (pooldesc_t *pool, memblock_t *mb)
 {
   memblock_t *mb_prev, *mb_next;
 
-  if (mb == pool)
+  if (mb == pool->mem)
     mb_prev = NULL;
   else
     {
-      mb_prev = (memblock_t *) pool;
+      mb_prev = (memblock_t *) pool->mem;
       while (1)
        {
-         mb_next = mb_get_next (mb_prev);
+         mb_next = mb_get_next (pool, mb_prev);
          if (mb_next == mb)
            break;
          else
@@ -171,12 +220,12 @@ mb_get_prev (memblock_t *mb)
 /* If the preceding block of MB and/or the following block of MB
    exist and are not active, merge them to form a bigger block.  */
 static void
-mb_merge (memblock_t *mb)
+mb_merge (pooldesc_t *pool, memblock_t *mb)
 {
   memblock_t *mb_prev, *mb_next;
 
-  mb_prev = mb_get_prev (mb);
-  mb_next = mb_get_next (mb);
+  mb_prev = mb_get_prev (pool, mb);
+  mb_next = mb_get_next (pool, mb);
 
   if (mb_prev && (! (mb_prev->flags & MB_FLAG_ACTIVE)))
     {
@@ -189,11 +238,11 @@ mb_merge (memblock_t *mb)
 
 /* Return a new block, which can hold SIZE bytes.  */
 static memblock_t *
-mb_get_new (memblock_t *block, size_t size)
+mb_get_new (pooldesc_t *pool, memblock_t *block, size_t size)
 {
   memblock_t *mb, *mb_split;
 
-  for (mb = block; ptr_into_pool_p (mb); mb = mb_get_next (mb))
+  for (mb = block; ptr_into_pool_p (pool, mb); mb = mb_get_next (pool, mb))
     if (! (mb->flags & MB_FLAG_ACTIVE) && mb->size >= size)
       {
        /* Found a free block.  */
@@ -203,20 +252,21 @@ mb_get_new (memblock_t *block, size_t size)
          {
            /* Split block.  */
 
-           mb_split = (memblock_t *) (((char *) mb) + BLOCK_HEAD_SIZE + size);
+           mb_split = (memblock_t *) (void *) (((char *) mb) + BLOCK_HEAD_SIZE
+                                               + size);
            mb_split->size = mb->size - size - BLOCK_HEAD_SIZE;
            mb_split->flags = 0;
 
            mb->size = size;
 
-           mb_merge (mb_split);
+           mb_merge (pool, mb_split);
 
          }
 
        break;
       }
 
-  if (! ptr_into_pool_p (mb))
+  if (! ptr_into_pool_p (pool, mb))
     {
       gpg_err_set_errno (ENOMEM);
       mb = NULL;
@@ -233,30 +283,46 @@ print_warn (void)
     log_info (_("Warning: using insecure memory!\n"));
 }
 
-/* Lock the memory pages into core and drop privileges.  */
+
+/* Lock the memory pages of pool P of size N into core and drop
+ * privileges.  */
 static void
-lock_pool (void *p, size_t n)
+lock_pool_pages (void *p, size_t n)
 {
 #if defined(USE_CAPABILITIES) && defined(HAVE_MLOCK)
   int err;
 
-  cap_set_proc (cap_from_text ("cap_ipc_lock+ep"));
-  err = mlock (p, n);
-  if (err && errno)
-    err = errno;
-  cap_set_proc (cap_from_text ("cap_ipc_lock+p"));
+  {
+    cap_t cap;
+
+    if (!no_priv_drop)
+      {
+        cap = cap_from_text ("cap_ipc_lock+ep");
+        cap_set_proc (cap);
+        cap_free (cap);
+      }
+    err = no_mlock? 0 : mlock (p, n);
+    if (err && errno)
+      err = errno;
+    if (!no_priv_drop)
+      {
+        cap = cap_from_text ("cap_ipc_lock+p");
+        cap_set_proc (cap);
+        cap_free(cap);
+      }
+  }
 
   if (err)
     {
-      if (errno != EPERM
-#ifdef EAGAIN  /* OpenBSD returns this */
-         && errno != EAGAIN
+      if (err != EPERM
+#ifdef EAGAIN  /* BSD and also Linux may return EAGAIN */
+         && err != EAGAIN
 #endif
 #ifdef ENOSYS  /* Some SCOs return this (function not implemented) */
-         && errno != ENOSYS
+         && err != ENOSYS
 #endif
 #ifdef ENOMEM  /* Linux might return this. */
-            && errno != ENOMEM
+            && err != ENOMEM
 #endif
          )
        log_error ("can't lock memory: %s\n", strerror (err));
@@ -282,35 +348,40 @@ lock_pool (void *p, size_t n)
     }
   else
     {
-      err = mlock (p, n);
+      err = no_mlock? 0 : mlock (p, n);
       if (err && errno)
        err = errno;
     }
 #else /* !HAVE_BROKEN_MLOCK */
-  err = mlock (p, n);
+  err = no_mlock? 0 : mlock (p, n);
   if (err && errno)
     err = errno;
 #endif /* !HAVE_BROKEN_MLOCK */
 
+  /* Test whether we are running setuid(0).  */
   if (uid && ! geteuid ())
     {
-      /* check that we really dropped the privs.
-       * Note: setuid(0) should always fail */
-      if (setuid (uid) || getuid () != geteuid () || !setuid (0))
-       log_fatal ("failed to reset uid: %s\n", strerror (errno));
+      /* Yes, we are.  */
+      if (!no_priv_drop)
+        {
+          /* Check that we really dropped the privs.
+           * Note: setuid(0) should always fail */
+          if (setuid (uid) || getuid () != geteuid () || !setuid (0))
+            log_fatal ("failed to reset uid: %s\n", strerror (errno));
+        }
     }
 
   if (err)
     {
-      if (errno != EPERM
-#ifdef EAGAIN  /* OpenBSD returns this. */
-         && errno != EAGAIN
+      if (err != EPERM
+#ifdef EAGAIN  /* BSD and also Linux may return this. */
+         && err != EAGAIN
 #endif
 #ifdef ENOSYS  /* Some SCOs return this (function not implemented). */
-         && errno != ENOSYS
+         && err != ENOSYS
 #endif
 #ifdef ENOMEM  /* Linux might return this. */
-            && errno != ENOMEM
+            && err != ENOMEM
 #endif
          )
        log_error ("can't lock memory: %s\n", strerror (err));
@@ -339,81 +410,104 @@ lock_pool (void *p, size_t n)
 #else
   (void)p;
   (void)n;
-  log_info ("Please note that you don't have secure memory on this system\n");
+  if (!no_mlock)
+    log_info ("Please note that you don't have secure memory on this 
system\n");
 #endif
 }
 
 /* Initialize POOL.  */
 static void
-init_pool (size_t n)
+init_pool (pooldesc_t *pool, size_t n)
 {
-  size_t pgsize;
-  long int pgsize_val;
   memblock_t *mb;
 
-  pool_size = n;
+  pool->size = n;
 
   if (disable_secmem)
     log_bug ("secure memory is disabled");
 
-#if defined(HAVE_SYSCONF) && defined(_SC_PAGESIZE)
-  pgsize_val = sysconf (_SC_PAGESIZE);
-#elif defined(HAVE_GETPAGESIZE)
-  pgsize_val = getpagesize ();
-#else
-  pgsize_val = -1;
-#endif
-  pgsize = (pgsize_val != -1 && pgsize_val > 0)? pgsize_val:DEFAULT_PAGE_SIZE;
-
 
 #if HAVE_MMAP
-  pool_size = (pool_size + pgsize - 1) & ~(pgsize - 1);
-#ifdef MAP_ANONYMOUS
-  pool = mmap (0, pool_size, PROT_READ | PROT_WRITE,
-              MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-#else /* map /dev/zero instead */
   {
-    int fd;
+    size_t pgsize;
+    long int pgsize_val;
+
+# if defined(HAVE_SYSCONF) && defined(_SC_PAGESIZE)
+    pgsize_val = sysconf (_SC_PAGESIZE);
+# elif defined(HAVE_GETPAGESIZE)
+    pgsize_val = getpagesize ();
+# else
+    pgsize_val = -1;
+# endif
+    pgsize = (pgsize_val > 0)? pgsize_val:DEFAULT_PAGE_SIZE;
+
+    pool->size = (pool->size + pgsize - 1) & ~(pgsize - 1);
+# ifdef MAP_ANONYMOUS
+    pool->mem = mmap (0, pool->size, PROT_READ | PROT_WRITE,
+                     MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+# else /* map /dev/zero instead */
+    {
+      int fd;
 
-    fd = open ("/dev/zero", O_RDWR);
-    if (fd == -1)
-      {
-       log_error ("can't open /dev/zero: %s\n", strerror (errno));
-       pool = (void *) -1;
-      }
+      fd = open ("/dev/zero", O_RDWR);
+      if (fd == -1)
+        {
+          log_error ("can't open /dev/zero: %s\n", strerror (errno));
+          pool->mem = (void *) -1;
+        }
+      else
+        {
+          pool->mem = mmap (0, pool->size,
+                           (PROT_READ | PROT_WRITE), MAP_PRIVATE, fd, 0);
+          close (fd);
+        }
+    }
+# endif
+    if (pool->mem == (void *) -1)
+      log_info ("can't mmap pool of %u bytes: %s - using malloc\n",
+                (unsigned) pool->size, strerror (errno));
     else
       {
-       pool = mmap (0, pool_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
-        close (fd);
+        pool->is_mmapped = 1;
+        pool->okay = 1;
       }
   }
-#endif
-  if (pool == (void *) -1)
-    log_info ("can't mmap pool of %u bytes: %s - using malloc\n",
-             (unsigned) pool_size, strerror (errno));
-  else
-    {
-      pool_is_mmapped = 1;
-      pool_okay = 1;
-    }
+#endif /*HAVE_MMAP*/
 
-#endif
-  if (!pool_okay)
+  if (!pool->okay)
     {
-      pool = malloc (pool_size);
-      if (!pool)
+      pool->mem = malloc (pool->size);
+      if (!pool->mem)
        log_fatal ("can't allocate memory pool of %u bytes\n",
-                  (unsigned) pool_size);
+                  (unsigned) pool->size);
       else
-       pool_okay = 1;
+       pool->okay = 1;
     }
 
   /* Initialize first memory block.  */
-  mb = (memblock_t *) pool;
-  mb->size = pool_size;
+  mb = (memblock_t *) pool->mem;
+  mb->size = pool->size - BLOCK_HEAD_SIZE;
   mb->flags = 0;
 }
 
+
+/* Enable overflow pool allocation in all cases.  CHUNKSIZE is a hint
+ * on how large to allocate overflow pools.  */
+void
+_gcry_secmem_set_auto_expand (unsigned int chunksize)
+{
+  /* Round up to a multiple of the STANDARD_POOL_SIZE.  */
+  chunksize = ((chunksize + (2*STANDARD_POOL_SIZE) - 1)
+               / STANDARD_POOL_SIZE ) * STANDARD_POOL_SIZE;
+  if (chunksize < STANDARD_POOL_SIZE) /* In case of overflow.  */
+    chunksize = STANDARD_POOL_SIZE;
+
+  SECMEM_LOCK;
+  auto_expand = chunksize;
+  SECMEM_UNLOCK;
+}
+
+
 void
 _gcry_secmem_set_flags (unsigned flags)
 {
@@ -424,6 +518,8 @@ _gcry_secmem_set_flags (unsigned flags)
   was_susp = suspend_warning;
   no_warning = flags & GCRY_SECMEM_FLAG_NO_WARNING;
   suspend_warning = flags & GCRY_SECMEM_FLAG_SUSPEND_WARNING;
+  no_mlock      = flags & GCRY_SECMEM_FLAG_NO_MLOCK;
+  no_priv_drop = flags & GCRY_SECMEM_FLAG_NO_PRIV_DROP;
 
   /* and now issue the warning if it is not longer suspended */
   if (was_susp && !suspend_warning && show_warning)
@@ -445,6 +541,8 @@ _gcry_secmem_get_flags (void)
   flags = no_warning ? GCRY_SECMEM_FLAG_NO_WARNING : 0;
   flags |= suspend_warning ? GCRY_SECMEM_FLAG_SUSPEND_WARNING : 0;
   flags |= not_locked ? GCRY_SECMEM_FLAG_NOT_LOCKED : 0;
+  flags |= no_mlock ? GCRY_SECMEM_FLAG_NO_MLOCK : 0;
+  flags |= no_priv_drop ? GCRY_SECMEM_FLAG_NO_PRIV_DROP : 0;
 
   SECMEM_UNLOCK;
 
@@ -452,16 +550,26 @@ _gcry_secmem_get_flags (void)
 }
 
 
-/* See _gcry_secmem_init.  This function is expected to be called with
-   the secmem lock held. */
+/* This function initializes the main memory pool MAINPOOL.  It is
+ * expected to be called with the secmem lock held.  */
 static void
-secmem_init (size_t n)
+_gcry_secmem_init_internal (size_t n)
 {
+  pooldesc_t *pool;
+
+  pool = &mainpool;
   if (!n)
     {
 #ifdef USE_CAPABILITIES
       /* drop all capabilities */
-      cap_set_proc (cap_from_text ("all-eip"));
+      if (!no_priv_drop)
+        {
+          cap_t cap;
+
+          cap = cap_from_text ("all-eip");
+          cap_set_proc (cap);
+          cap_free (cap);
+        }
 
 #elif !defined(HAVE_DOSISH_SYSTEM)
       uid_t uid;
@@ -479,10 +587,10 @@ secmem_init (size_t n)
     {
       if (n < MINIMUM_POOL_SIZE)
        n = MINIMUM_POOL_SIZE;
-      if (! pool_okay)
+      if (! pool->okay)
        {
-         init_pool (n);
-         lock_pool (pool, n);
+         init_pool (pool, n);
+         lock_pool_pages (pool->mem, n);
        }
       else
        log_error ("Oops, secure memory pool already initialized\n");
@@ -500,22 +608,33 @@ _gcry_secmem_init (size_t n)
 {
   SECMEM_LOCK;
 
-  secmem_init (n);
+  _gcry_secmem_init_internal (n);
 
   SECMEM_UNLOCK;
 }
 
 
+gcry_err_code_t
+_gcry_secmem_module_init ()
+{
+  /* Not anymore needed.  */
+  return 0;
+}
+
+
 static void *
-_gcry_secmem_malloc_internal (size_t size)
+_gcry_secmem_malloc_internal (size_t size, int xhint)
 {
+  pooldesc_t *pool;
   memblock_t *mb;
 
-  if (!pool_okay)
+  pool = &mainpool;
+
+  if (!pool->okay)
     {
       /* Try to initialize the pool if the user forgot about it.  */
-      secmem_init (STANDARD_POOL_SIZE);
-      if (!pool_okay)
+      _gcry_secmem_init_internal (STANDARD_POOL_SIZE);
+      if (!pool->okay)
         {
           log_info (_("operation is not possible without "
                       "initialized secure memory\n"));
@@ -538,33 +657,108 @@ _gcry_secmem_malloc_internal (size_t size)
   /* Blocks are always a multiple of 32. */
   size = ((size + 31) / 32) * 32;
 
-  mb = mb_get_new ((memblock_t *) pool, size);
+  mb = mb_get_new (pool, (memblock_t *) pool->mem, size);
   if (mb)
-    stats_update (size, 0);
+    {
+      stats_update (pool, mb->size, 0);
+      return &mb->aligned.c;
+    }
+
+  /* If we are called from xmalloc style functions resort to the
+   * overflow pools to return memory.  We don't do this in FIPS mode,
+   * though.  If the auto-expand option is active we do the expanding
+   * also for the standard malloc functions.
+   *
+   * The idea of using them by default only for the xmalloc function
+   * is so that a user can control whether memory will be allocated in
+   * the initial created mlock protected secmem area or may also be
+   * allocated from the overflow pools.  */
+  if ((xhint || auto_expand) && !fips_mode ())
+    {
+      /* Check whether we can allocate from the overflow pools.  */
+      for (pool = pool->next; pool; pool = pool->next)
+        {
+          mb = mb_get_new (pool, (memblock_t *) pool->mem, size);
+          if (mb)
+            {
+              stats_update (pool, mb->size, 0);
+              return &mb->aligned.c;
+            }
+        }
+      /* Allocate a new overflow pool.  We put a new pool right after
+       * the mainpool so that the next allocation will happen in that
+       * pool and not in one of the older pools.  When this new pool
+       * gets full we will try to find space in the older pools.  */
+      pool = calloc (1, sizeof *pool);
+      if (!pool)
+        return NULL;  /* Not enough memory for a new pool descriptor.  */
+      pool->size = auto_expand? auto_expand : STANDARD_POOL_SIZE;
+      pool->mem = malloc (pool->size);
+      if (!pool->mem)
+        {
+          free (pool);
+          return NULL; /* Not enough memory available for a new pool.  */
+        }
+      /* Initialize first memory block.  */
+      mb = (memblock_t *) pool->mem;
+      mb->size = pool->size - BLOCK_HEAD_SIZE;
+      mb->flags = 0;
+
+      pool->okay = 1;
+
+      /* Take care: in _gcry_private_is_secure we do not lock and thus
+       * we assume that the second assignment below is atomic.  Memory
+       * barrier prevents reordering of stores to new pool structure after
+       * MAINPOOL.NEXT assigment and prevents _gcry_private_is_secure seeing
+       * non-initialized POOL->NEXT pointers.  */
+      pool->next = mainpool.next;
+      memory_barrier();
+      mainpool.next = pool;
+
+      /* After the first time we allocated an overflow pool, print a
+       * warning.  */
+      if (!pool->next)
+        print_warn ();
+
+      /* Allocate.  */
+      mb = mb_get_new (pool, (memblock_t *) pool->mem, size);
+      if (mb)
+        {
+          stats_update (pool, mb->size, 0);
+          return &mb->aligned.c;
+        }
+    }
 
-  return mb ? &mb->aligned.c : NULL;
+  return NULL;
 }
 
+
+/* Allocate a block from the secmem of SIZE.  With XHINT set assume
+ * that the caller is a xmalloc style function.  */
 void *
-_gcry_secmem_malloc (size_t size)
+_gcry_secmem_malloc (size_t size, int xhint)
 {
   void *p;
 
   SECMEM_LOCK;
-  p = _gcry_secmem_malloc_internal (size);
+  p = _gcry_secmem_malloc_internal (size, xhint);
   SECMEM_UNLOCK;
 
   return p;
 }
 
-static void
+static int
 _gcry_secmem_free_internal (void *a)
 {
+  pooldesc_t *pool;
   memblock_t *mb;
   int size;
 
-  if (!a)
-    return;
+  for (pool = &mainpool; pool; pool = pool->next)
+    if (pool->okay && ptr_into_pool_p (pool, a))
+      break;
+  if (!pool)
+    return 0; /* A does not belong to use.  */
 
   mb = ADDR_TO_BLOCK (a);
   size = mb->size;
@@ -572,42 +766,50 @@ _gcry_secmem_free_internal (void *a)
   /* This does not make much sense: probably this memory is held in the
    * cache. We do it anyway: */
 #define MB_WIPE_OUT(byte) \
-  wipememory2 ((memblock_t *) ((char *) mb + BLOCK_HEAD_SIZE), (byte), size);
+  wipememory2 (((char *) mb + BLOCK_HEAD_SIZE), (byte), size);
 
   MB_WIPE_OUT (0xff);
   MB_WIPE_OUT (0xaa);
   MB_WIPE_OUT (0x55);
   MB_WIPE_OUT (0x00);
 
-  stats_update (0, size);
+  /* Update stats.  */
+  stats_update (pool, 0, size);
 
   mb->flags &= ~MB_FLAG_ACTIVE;
 
-  /* Update stats.  */
+  mb_merge (pool, mb);
 
-  mb_merge (mb);
+  return 1; /* Freed.  */
 }
 
-/* Wipe out and release memory.  */
-void
+
+/* Wipe out and release memory.  Returns true if this function
+ * actually released A.  */
+int
 _gcry_secmem_free (void *a)
 {
+  int mine;
+
+  if (!a)
+    return 1; /* Tell caller that we handled it.  */
+
   SECMEM_LOCK;
-  _gcry_secmem_free_internal (a);
+  mine = _gcry_secmem_free_internal (a);
   SECMEM_UNLOCK;
+  return mine;
 }
 
-/* Realloc memory.  */
-void *
-_gcry_secmem_realloc (void *p, size_t newsize)
+
+static void *
+_gcry_secmem_realloc_internal (void *p, size_t newsize, int xhint)
 {
   memblock_t *mb;
   size_t size;
   void *a;
 
-  SECMEM_LOCK;
-
-  mb = (memblock_t *) ((char *) p - ((size_t) &((memblock_t *) 0)->aligned.c));
+  mb = (memblock_t *) (void *) ((char *) p
+                               - ((size_t) &((memblock_t *) 0)->aligned.c));
   size = mb->size;
   if (newsize < size)
     {
@@ -616,7 +818,7 @@ _gcry_secmem_realloc (void *p, size_t newsize)
     }
   else
     {
-      a = _gcry_secmem_malloc_internal (newsize);
+      a = _gcry_secmem_malloc_internal (newsize, xhint);
       if (a)
        {
          memcpy (a, p, size);
@@ -625,17 +827,43 @@ _gcry_secmem_realloc (void *p, size_t newsize)
        }
     }
 
+  return a;
+}
+
+
+/* Realloc memory.  With XHINT set assume that the caller is a xmalloc
+ * style function.  */
+void *
+_gcry_secmem_realloc (void *p, size_t newsize, int xhint)
+{
+  void *a;
+
+  SECMEM_LOCK;
+  a = _gcry_secmem_realloc_internal (p, newsize, xhint);
   SECMEM_UNLOCK;
 
   return a;
 }
 
 
-/* Return true if P points into the secure memory area.  */
+/* Return true if P points into the secure memory areas.  */
 int
 _gcry_private_is_secure (const void *p)
 {
-  return pool_okay && ptr_into_pool_p (p);
+  pooldesc_t *pool;
+
+  /* We do no lock here because once a pool is allocated it will not
+   * be removed anymore (except for gcry_secmem_term).  Further, as
+   * assigment of POOL->NEXT in new pool structure is visible in
+   * this thread before assigment of MAINPOOL.NEXT, pool list can be
+   * iterated locklessly.  This visiblity is ensured by memory barrier
+   * between POOL->NEXT and MAINPOOL.NEXT assignments in
+   * _gcry_secmem_malloc_internal. */
+  for (pool = &mainpool; pool; pool = pool->next)
+    if (pool->okay && ptr_into_pool_p (pool, p))
+      return 1;
+
+  return 0;
 }
 
 
@@ -650,47 +878,75 @@ _gcry_private_is_secure (const void *p)
 void
 _gcry_secmem_term ()
 {
-  if (!pool_okay)
-    return;
+  pooldesc_t *pool, *next;
 
-  wipememory2 (pool, 0xff, pool_size);
-  wipememory2 (pool, 0xaa, pool_size);
-  wipememory2 (pool, 0x55, pool_size);
-  wipememory2 (pool, 0x00, pool_size);
+  for (pool = &mainpool; pool; pool = next)
+    {
+      next = pool->next;
+      if (!pool->okay)
+        continue;
+
+      wipememory2 (pool->mem, 0xff, pool->size);
+      wipememory2 (pool->mem, 0xaa, pool->size);
+      wipememory2 (pool->mem, 0x55, pool->size);
+      wipememory2 (pool->mem, 0x00, pool->size);
+      if (0)
+        ;
 #if HAVE_MMAP
-  if (pool_is_mmapped)
-    munmap (pool, pool_size);
+      else if (pool->is_mmapped)
+        munmap (pool->mem, pool->size);
 #endif
-  pool = NULL;
-  pool_okay = 0;
-  pool_size = 0;
+      else
+        free (pool->mem);
+      pool->mem = NULL;
+      pool->okay = 0;
+      pool->size = 0;
+      if (pool != &mainpool)
+        free (pool);
+    }
+  mainpool.next = NULL;
   not_locked = 0;
 }
 
 
+/* Print stats of the secmem allocator.  With EXTENDED passwed as true
+ * a detiled listing is returned (used for testing).  */
 void
-_gcry_secmem_dump_stats ()
+_gcry_secmem_dump_stats (int extended)
 {
-#if 1
   SECMEM_LOCK;
-
- if (pool_okay)
-    log_info ("secmem usage: %u/%lu bytes in %u blocks\n",
-             cur_alloced, (unsigned long)pool_size, cur_blocks);
+  secmem_dump_stats_internal (extended);
   SECMEM_UNLOCK;
-#else
-  memblock_t *mb;
-  int i;
+}
 
-  SECMEM_LOCK;
 
-  for (i = 0, mb = (memblock_t *) pool;
-       ptr_into_pool_p (mb);
-       mb = mb_get_next (mb), i++)
-    log_info ("SECMEM: [%s] block: %i; size: %i\n",
-             (mb->flags & MB_FLAG_ACTIVE) ? "used" : "free",
-             i,
-             mb->size);
-  SECMEM_UNLOCK;
-#endif
+static void
+secmem_dump_stats_internal (int extended)
+{
+  pooldesc_t *pool;
+  memblock_t *mb;
+  int i, poolno;
+
+  for (pool = &mainpool, poolno = 0; pool; pool = pool->next, poolno++)
+    {
+      if (!extended)
+        {
+          if (pool->okay)
+            log_info ("%-13s %u/%lu bytes in %u blocks\n",
+                      pool == &mainpool? "secmem usage:":"",
+                      pool->cur_alloced, (unsigned long)pool->size,
+                      pool->cur_blocks);
+        }
+      else
+        {
+          for (i = 0, mb = (memblock_t *) pool->mem;
+               ptr_into_pool_p (pool, mb);
+               mb = mb_get_next (pool, mb), i++)
+            log_info ("SECMEM: pool %d %s block %i size %i\n",
+                      poolno,
+                      (mb->flags & MB_FLAG_ACTIVE) ? "used" : "free",
+                      i,
+                      mb->size);
+        }
+    }
 }
diff --git a/grub-core/lib/libgcrypt/src/secmem.h 
b/grub-core/lib/libgcrypt/src/secmem.h
index 29e151af4..8ad6ef1a3 100644
--- a/grub-core/lib/libgcrypt/src/secmem.h
+++ b/grub-core/lib/libgcrypt/src/secmem.h
@@ -23,10 +23,11 @@
 
 void _gcry_secmem_init (size_t npool);
 void _gcry_secmem_term (void);
-void *_gcry_secmem_malloc (size_t size) _GCRY_GCC_ATTR_MALLOC;
-void *_gcry_secmem_realloc (void *a, size_t newsize);
-void _gcry_secmem_free (void *a);
-void _gcry_secmem_dump_stats (void);
+void *_gcry_secmem_malloc (size_t size, int xhint) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_secmem_realloc (void *a, size_t newsize, int xhint);
+int  _gcry_secmem_free (void *a);
+void _gcry_secmem_dump_stats (int extended);
+void _gcry_secmem_set_auto_expand (unsigned int chunksize);
 void _gcry_secmem_set_flags (unsigned flags);
 unsigned _gcry_secmem_get_flags(void);
 int _gcry_private_is_secure (const void *p);
@@ -35,5 +36,7 @@ int _gcry_private_is_secure (const void *p);
 #define GCRY_SECMEM_FLAG_NO_WARNING      (1 << 0)
 #define GCRY_SECMEM_FLAG_SUSPEND_WARNING (1 << 1)
 #define GCRY_SECMEM_FLAG_NOT_LOCKED      (1 << 2)
+#define GCRY_SECMEM_FLAG_NO_MLOCK        (1 << 3)
+#define GCRY_SECMEM_FLAG_NO_PRIV_DROP    (1 << 4)
 
 #endif /* G10_SECMEM_H */
diff --git a/grub-core/lib/libgcrypt/src/sexp.c 
b/grub-core/lib/libgcrypt/src/sexp.c
index 78013fdf3..d15f1a790 100644
--- a/grub-core/lib/libgcrypt/src/sexp.c
+++ b/grub-core/lib/libgcrypt/src/sexp.c
@@ -1,6 +1,7 @@
 /* sexp.c  -  S-Expression handling
  * Copyright (C) 1999, 2000, 2001, 2002, 2003,
  *               2004, 2006, 2007, 2008, 2011  Free Software Foundation, Inc.
+ * Copyright (C) 2013, 2014, 2017 g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -14,9 +15,9 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ * SPDX-License-Identifier: LGPL-2.1+
  */
 
 
@@ -31,7 +32,55 @@
 #define GCRYPT_NO_MPI_MACROS 1
 #include "g10lib.h"
 
-typedef struct gcry_sexp *NODE;
+
+/* Notes on the internal memory layout.
+
+   We store an S-expression as one memory buffer with tags, length and
+   value.  The simplest list would thus be:
+
+   /----------+----------+---------+------+-----------+----------\
+   | open_tag | data_tag | datalen | data | close_tag | stop_tag |
+   \----------+----------+---------+------+-----------+----------/
+
+   Expressed more compact and with an example:
+
+   /----+----+----+---+----+----\
+   | OT | DT | DL | D | CT | ST |  "(foo)"
+   \----+----+----+---+----+----/
+
+   The open tag must always be the first tag of a list as requires by
+   the S-expression specs.  At least data element (data_tag, datalen,
+   data) is required as well.  The close_tag finishes the list and
+   would actually be sufficient.  For fail-safe reasons a final stop
+   tag is always the last byte in a buffer; it has a value of 0 so
+   that string function accidentally applied to an S-expression will
+   never access unallocated data.  We do not support display hints and
+   thus don't need to represent them.  A list may have more an
+   arbitrary number of data elements but at least one is required.
+   The length of each data must be greater than 0 and has a current
+   limit to 65535 bytes (by means of the DATALEN type).
+
+   A list with two data elements:
+
+   /----+----+----+---+----+----+---+----+----\
+   | OT | DT | DL | D | DT | DL | D | CT | ST |  "(foo bar)"
+   \----+----+----+---+----+----+---+----+----/
+
+   In the above example both DL fields have a value of 3.
+   A list of a list with one data element:
+
+   /----+----+----+----+---+----+----+----\
+   | OT | OT | DT | DL | D | CT | CT | ST |  "((foo))"
+   \----+----+----+----+---+----+----+----/
+
+   A list with one element followed by another list:
+
+   /----+----+----+---+----+----+----+---+----+----+----\
+   | OT | DT | DL | D | OT | DT | DL | D | CT | CT | ST |  "(foo (bar))"
+   \----+----+----+---+----+----+----+---+----+----+----/
+
+ */
+
 typedef unsigned short DATALEN;
 
 struct gcry_sexp
@@ -41,11 +90,11 @@ struct gcry_sexp
 
 #define ST_STOP  0
 #define ST_DATA  1  /* datalen follows */
-#define ST_HINT  2  /* datalen follows */
+/*#define ST_HINT  2   datalen follows (currently not used) */
 #define ST_OPEN  3
 #define ST_CLOSE 4
 
-/* the atoi macros assume that the buffer has only valid digits */
+/* The atoi macros assume that the buffer has only valid digits.  */
 #define atoi_1(p)   (*(p) - '0' )
 #define xtoi_1(p)   (*(p) <= '9'? (*(p)- '0'): \
                      *(p) <= 'F'? (*(p)-'A'+10):(*(p)-'a'+10))
@@ -53,15 +102,15 @@ struct gcry_sexp
 
 #define TOKEN_SPECIALS  "-./_:*+="
 
-static gcry_error_t
-vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
-            const char *buffer, size_t length, int argflag,
-            void **arg_list, va_list arg_ptr);
+static gcry_err_code_t
+do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
+                const char *buffer, size_t length, int argflag,
+                void **arg_list, va_list arg_ptr);
 
-static gcry_error_t
-sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
-           const char *buffer, size_t length, int argflag,
-           void **arg_list, ...);
+static gcry_err_code_t
+do_sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
+               const char *buffer, size_t length, int argflag,
+               void **arg_list, ...);
 
 /* Return true if P points to a byte containing a whitespace according
    to the S-expressions definition. */
@@ -122,7 +171,7 @@ dump_string (const byte *p, size_t n, int delim )
 
 
 void
-gcry_sexp_dump (const gcry_sexp_t a)
+_gcry_sexp_dump (const gcry_sexp_t a)
 {
   const byte *p;
   int indent = 0;
@@ -166,9 +215,10 @@ gcry_sexp_dump (const gcry_sexp_t a)
     }
 }
 
-/****************
- * Pass list through except when it is an empty list - in that case
- * return NULL and release the passed list.
+
+/* Pass list through except when it is an empty list - in that case
+ * return NULL and release the passed list.  This is used to make sure
+ * that no forbidden empty lists are created.
  */
 static gcry_sexp_t
 normalize ( gcry_sexp_t list )
@@ -181,13 +231,13 @@ normalize ( gcry_sexp_t list )
   if ( *p == ST_STOP )
     {
       /* this is "" */
-      gcry_sexp_release ( list );
+      sexp_release ( list );
       return NULL;
     }
   if ( *p == ST_OPEN && p[1] == ST_CLOSE )
     {
       /* this is "()" */
-      gcry_sexp_release ( list );
+      sexp_release ( list );
       return NULL;
     }
 
@@ -209,23 +259,23 @@ normalize ( gcry_sexp_t list )
 
    This function returns 0 and and the pointer to the new object in
    RETSEXP or an error code in which case RETSEXP is set to NULL.  */
-gcry_error_t
-gcry_sexp_create (gcry_sexp_t *retsexp, void *buffer, size_t length,
+gcry_err_code_t
+_gcry_sexp_create (gcry_sexp_t *retsexp, void *buffer, size_t length,
                   int autodetect, void (*freefnc)(void*) )
 {
-  gcry_error_t errcode;
+  gcry_err_code_t errcode;
   gcry_sexp_t se;
 
   if (!retsexp)
-    return gcry_error (GPG_ERR_INV_ARG);
+    return GPG_ERR_INV_ARG;
   *retsexp = NULL;
   if (autodetect < 0 || autodetect > 1 || !buffer)
-    return gcry_error (GPG_ERR_INV_ARG);
+    return GPG_ERR_INV_ARG;
 
   if (!length && !autodetect)
     { /* What a brave caller to assume that there is really a canonical
          encoded S-expression in buffer */
-      length = gcry_sexp_canon_len (buffer, 0, NULL, &errcode);
+      length = _gcry_sexp_canon_len (buffer, 0, NULL, &errcode);
       if (!length)
         return errcode;
     }
@@ -234,7 +284,7 @@ gcry_sexp_create (gcry_sexp_t *retsexp, void *buffer, 
size_t length,
       length = strlen ((char *)buffer);
     }
 
-  errcode = sexp_sscan (&se, NULL, buffer, length, 0, NULL);
+  errcode = do_sexp_sscan (&se, NULL, buffer, length, 0, NULL);
   if (errcode)
     return errcode;
 
@@ -248,15 +298,15 @@ gcry_sexp_create (gcry_sexp_t *retsexp, void *buffer, 
size_t length,
          GCRYSEXP object and use the BUFFER directly.  */
       freefnc (buffer);
     }
-  return gcry_error (GPG_ERR_NO_ERROR);
+  return 0;
 }
 
 /* Same as gcry_sexp_create but don't transfer ownership */
-gcry_error_t
-gcry_sexp_new (gcry_sexp_t *retsexp, const void *buffer, size_t length,
+gcry_err_code_t
+_gcry_sexp_new (gcry_sexp_t *retsexp, const void *buffer, size_t length,
                int autodetect)
 {
-  return gcry_sexp_create (retsexp, (void *)buffer, length, autodetect, NULL);
+  return _gcry_sexp_create (retsexp, (void *)buffer, length, autodetect, NULL);
 }
 
 
@@ -264,11 +314,11 @@ gcry_sexp_new (gcry_sexp_t *retsexp, const void *buffer, 
size_t length,
  * Release resource of the given SEXP object.
  */
 void
-gcry_sexp_release( gcry_sexp_t sexp )
+_gcry_sexp_release( gcry_sexp_t sexp )
 {
   if (sexp)
     {
-      if (gcry_is_secure (sexp))
+      if (_gcry_is_secure (sexp))
         {
           /* Extra paranoid wiping. */
           const byte *p = sexp->d;
@@ -297,7 +347,7 @@ gcry_sexp_release( gcry_sexp_t sexp )
             }
           wipememory (sexp->d, p - sexp->d);
         }
-      gcry_free ( sexp );
+      xfree ( sexp );
     }
 }
 
@@ -308,7 +358,7 @@ gcry_sexp_release( gcry_sexp_t sexp )
  * element straight into the new pair.
  */
 gcry_sexp_t
-gcry_sexp_cons( const gcry_sexp_t a, const gcry_sexp_t b )
+_gcry_sexp_cons( const gcry_sexp_t a, const gcry_sexp_t b )
 {
   (void)a;
   (void)b;
@@ -325,7 +375,7 @@ gcry_sexp_cons( const gcry_sexp_t a, const gcry_sexp_t b )
  * with a NULL.
  */
 gcry_sexp_t
-gcry_sexp_alist( const gcry_sexp_t *array )
+_gcry_sexp_alist( const gcry_sexp_t *array )
 {
   (void)array;
 
@@ -339,7 +389,7 @@ gcry_sexp_alist( const gcry_sexp_t *array )
  * Make a list from all items, the end of list is indicated by a NULL
  */
 gcry_sexp_t
-gcry_sexp_vlist( const gcry_sexp_t a, ... )
+_gcry_sexp_vlist( const gcry_sexp_t a, ... )
 {
   (void)a;
   /* NYI: Implementation should be quite easy with our new data
@@ -351,10 +401,10 @@ gcry_sexp_vlist( const gcry_sexp_t a, ... )
 
 /****************
  * Append n to the list a
- * Returns: a new ist (which maybe a)
+ * Returns: a new list (which maybe a)
  */
 gcry_sexp_t
-gcry_sexp_append( const gcry_sexp_t a, const gcry_sexp_t n )
+_gcry_sexp_append( const gcry_sexp_t a, const gcry_sexp_t n )
 {
   (void)a;
   (void)n;
@@ -365,7 +415,7 @@ gcry_sexp_append( const gcry_sexp_t a, const gcry_sexp_t n )
 }
 
 gcry_sexp_t
-gcry_sexp_prepend( const gcry_sexp_t a, const gcry_sexp_t n )
+_gcry_sexp_prepend( const gcry_sexp_t a, const gcry_sexp_t n )
 {
   (void)a;
   (void)n;
@@ -382,7 +432,7 @@ gcry_sexp_prepend( const gcry_sexp_t a, const gcry_sexp_t n 
)
  * Returns: A new list with this sublist or NULL if not found.
  */
 gcry_sexp_t
-gcry_sexp_find_token( const gcry_sexp_t list, const char *tok, size_t toklen )
+_gcry_sexp_find_token( const gcry_sexp_t list, const char *tok, size_t toklen )
 {
   const byte *p;
   DATALEN n;
@@ -433,7 +483,7 @@ gcry_sexp_find_token( const gcry_sexp_t list, const char 
*tok, size_t toklen )
                }
               n = p - head;
 
-              newlist = gcry_malloc ( sizeof *newlist + n );
+              newlist = xtrymalloc ( sizeof *newlist + n );
               if (!newlist)
                 {
                   /* No way to return an error code, so we can only
@@ -462,41 +512,45 @@ gcry_sexp_find_token( const gcry_sexp_t list, const char 
*tok, size_t toklen )
  * Return the length of the given list
  */
 int
-gcry_sexp_length( const gcry_sexp_t list )
+_gcry_sexp_length (const gcry_sexp_t list)
 {
-    const byte *p;
-    DATALEN n;
-    int type;
-    int length = 0;
-    int level = 0;
-
-    if ( !list )
-       return 0;
-
-    p = list->d;
-    while ( (type=*p) != ST_STOP ) {
-       p++;
-       if ( type == ST_DATA ) {
-           memcpy ( &n, p, sizeof n );
-           p += sizeof n + n;
-           if ( level == 1 )
-               length++;
+  const byte *p;
+  DATALEN n;
+  int type;
+  int length = 0;
+  int level = 0;
+
+  if (!list)
+    return 0;
+
+  p = list->d;
+  while ((type=*p) != ST_STOP)
+    {
+      p++;
+      if (type == ST_DATA)
+        {
+          memcpy (&n, p, sizeof n);
+          p += sizeof n + n;
+          if (level == 1)
+            length++;
        }
-       else if ( type == ST_OPEN ) {
-           if ( level == 1 )
-               length++;
-           level++;
+      else if (type == ST_OPEN)
+        {
+          if (level == 1)
+            length++;
+          level++;
        }
-       else if ( type == ST_CLOSE ) {
-           level--;
+      else if (type == ST_CLOSE)
+        {
+          level--;
        }
     }
-    return length;
+  return length;
 }
 
 
 /* Return the internal lengths offset of LIST.  That is the size of
-   the buffer from the first ST_OPEN, which is retruned at R_OFF, to
+   the buffer from the first ST_OPEN, which is returned at R_OFF, to
    the corresponding ST_CLOSE inclusive.  */
 static size_t
 get_internal_buffer (const gcry_sexp_t list, size_t *r_off)
@@ -537,112 +591,116 @@ get_internal_buffer (const gcry_sexp_t list, size_t 
*r_off)
 
 
 
-/* Extract the CAR of the given list.  May return NULL for bad lists
-   or memory failure.  */
+/* Extract the n-th element of the given LIST.  Returns NULL for
+   no-such-element, a corrupt list, or memory failure.  */
 gcry_sexp_t
-gcry_sexp_nth( const gcry_sexp_t list, int number )
+_gcry_sexp_nth (const gcry_sexp_t list, int number)
 {
-    const byte *p;
-    DATALEN n;
-    gcry_sexp_t newlist;
-    byte *d;
-    int level = 0;
-
-    if ( !list || list->d[0] != ST_OPEN )
-       return NULL;
-    p = list->d;
-
-    while ( number > 0 ) {
-       p++;
-       if ( *p == ST_DATA ) {
-           memcpy ( &n, ++p, sizeof n );
-           p += sizeof n + n;
-           p--;
-           if ( !level )
-               number--;
+  const byte *p;
+  DATALEN n;
+  gcry_sexp_t newlist;
+  byte *d;
+  int level = 0;
+
+  if (!list || list->d[0] != ST_OPEN)
+    return NULL;
+  p = list->d;
+
+  while (number > 0)
+    {
+      p++;
+      if (*p == ST_DATA)
+        {
+          memcpy (&n, ++p, sizeof n);
+          p += sizeof n + n;
+          p--;
+          if (!level)
+            number--;
        }
-       else if ( *p == ST_OPEN ) {
-           level++;
+      else if (*p == ST_OPEN)
+        {
+          level++;
        }
-       else if ( *p == ST_CLOSE ) {
-           level--;
-           if ( !level )
-               number--;
+      else if (*p == ST_CLOSE)
+        {
+          level--;
+          if ( !level )
+            number--;
        }
-       else if ( *p == ST_STOP ) {
-           return NULL;
+      else if (*p == ST_STOP)
+        {
+          return NULL;
        }
     }
-    p++;
+  p++;
 
-    if ( *p == ST_DATA ) {
-        memcpy ( &n, p, sizeof n );
-        /* Allocate 1 (=sizeof *newlist) byte for ST_OPEN
-                    1 byte for ST_DATA
-                    sizeof n byte for n
-                    n byte for the data
-                    1 byte for ST_CLOSE
-                    1 byte for ST_STOP */
-        newlist = gcry_malloc ( sizeof *newlist + 1 + sizeof n + n + 2 );
-        if (!newlist)
-            return NULL;
-        d = newlist->d;
-        *d = ST_OPEN;   /* Put the ST_OPEN flag */
-        d++;            /* Move forward */
-        /* Copy ST_DATA, n and the data from p to d */
-        memcpy ( d, p, 1 + sizeof n + n );
-        d += 1 + sizeof n + n;  /* Move after the data copied */
-        *d = ST_CLOSE;          /* Put the ST_CLOSE flag */
-        d++;                    /* Move forward */
-        *d = ST_STOP;           /* Put the ST_STOP flag */
+  if (*p == ST_DATA)
+    {
+      memcpy (&n, p+1, sizeof n);
+      newlist = xtrymalloc (sizeof *newlist + 1 + 1 + sizeof n + n + 1);
+      if (!newlist)
+        return NULL;
+      d = newlist->d;
+      *d++ = ST_OPEN;
+      memcpy (d, p, 1 + sizeof n + n);
+      d += 1 + sizeof n + n;
+      *d++ = ST_CLOSE;
+      *d = ST_STOP;
     }
-    else if ( *p == ST_OPEN ) {
-       const byte *head = p;
-
-       level = 1;
-       do {
-           p++;
-           if ( *p == ST_DATA ) {
-               memcpy ( &n, ++p, sizeof n );
-               p += sizeof n + n;
-               p--;
-           }
-           else if ( *p == ST_OPEN ) {
-               level++;
-           }
-           else if ( *p == ST_CLOSE ) {
-               level--;
-           }
-           else if ( *p == ST_STOP ) {
-               BUG ();
-           }
-       } while ( level );
-       n = p + 1 - head;
+  else if (*p == ST_OPEN)
+    {
+      const byte *head = p;
 
-       newlist = gcry_malloc ( sizeof *newlist + n );
-        if (!newlist)
-          return NULL;
-       d = newlist->d;
-       memcpy ( d, head, n ); d += n;
-       *d++ = ST_STOP;
+      level = 1;
+      do {
+        p++;
+        if (*p == ST_DATA)
+          {
+            memcpy (&n, ++p, sizeof n);
+            p += sizeof n + n;
+            p--;
+          }
+        else if (*p == ST_OPEN)
+          {
+            level++;
+          }
+        else if (*p == ST_CLOSE)
+          {
+            level--;
+          }
+        else if (*p == ST_STOP)
+          {
+            BUG ();
+          }
+      } while (level);
+      n = p + 1 - head;
+
+      newlist = xtrymalloc (sizeof *newlist + n);
+      if (!newlist)
+        return NULL;
+      d = newlist->d;
+      memcpy (d, head, n);
+      d += n;
+      *d++ = ST_STOP;
     }
-    else
-       newlist = NULL;
+  else
+    newlist = NULL;
 
-    return normalize (newlist);
+  return normalize (newlist);
 }
 
+
 gcry_sexp_t
-gcry_sexp_car( const gcry_sexp_t list )
+_gcry_sexp_car (const gcry_sexp_t list)
 {
-    return gcry_sexp_nth ( list, 0 );
+  return _gcry_sexp_nth (list, 0);
 }
 
 
 /* Helper to get data from the car.  The returned value is valid as
    long as the list is not modified. */
 static const char *
-sexp_nth_data (const gcry_sexp_t list, int number, size_t *datalen)
+do_sexp_nth_data (const gcry_sexp_t list, int number, size_t *datalen)
 {
   const byte *p;
   DATALEN n;
@@ -659,9 +717,9 @@ sexp_nth_data (const gcry_sexp_t list, int number, size_t 
*datalen)
     return NULL;     /* Not a list but N > 0 requested. */
 
   /* Skip over N elements. */
-  while ( number > 0 )
+  while (number > 0)
     {
-      if ( *p == ST_DATA )
+      if (*p == ST_DATA)
         {
           memcpy ( &n, ++p, sizeof n );
           p += sizeof n + n;
@@ -669,17 +727,17 @@ sexp_nth_data (const gcry_sexp_t list, int number, size_t 
*datalen)
           if ( !level )
             number--;
        }
-      else if ( *p == ST_OPEN )
+      else if (*p == ST_OPEN)
         {
           level++;
        }
-      else if ( *p == ST_CLOSE )
+      else if (*p == ST_CLOSE)
         {
           level--;
           if ( !level )
             number--;
        }
-      else if ( *p == ST_STOP )
+      else if (*p == ST_STOP)
         {
           return NULL;
        }
@@ -687,7 +745,7 @@ sexp_nth_data (const gcry_sexp_t list, int number, size_t 
*datalen)
     }
 
   /* If this is data, return it.  */
-  if ( *p == ST_DATA )
+  if (*p == ST_DATA)
     {
       memcpy ( &n, ++p, sizeof n );
       *datalen = n;
@@ -701,25 +759,49 @@ sexp_nth_data (const gcry_sexp_t list, int number, size_t 
*datalen)
 /* Get data from the car.  The returned value is valid as long as the
    list is not modified.  */
 const char *
-gcry_sexp_nth_data (const gcry_sexp_t list, int number, size_t *datalen )
+_gcry_sexp_nth_data (const gcry_sexp_t list, int number, size_t *datalen )
 {
-  return sexp_nth_data (list, number, datalen);
+  return do_sexp_nth_data (list, number, datalen);
+}
+
+
+/* Get the nth element of a list which needs to be a simple object.
+   The returned value is a malloced buffer and needs to be freed by
+   the caller.  This is basically the same as gcry_sexp_nth_data but
+   with an allocated result. */
+void *
+_gcry_sexp_nth_buffer (const gcry_sexp_t list, int number, size_t *rlength)
+{
+  const char *s;
+  size_t n;
+  char *buf;
+
+  *rlength = 0;
+  s = do_sexp_nth_data (list, number, &n);
+  if (!s || !n)
+    return NULL;
+  buf = xtrymalloc (n);
+  if (!buf)
+    return NULL;
+  memcpy (buf, s, n);
+  *rlength = n;
+  return buf;
 }
 
 
 /* Get a string from the car.  The returned value is a malloced string
    and needs to be freed by the caller.  */
 char *
-gcry_sexp_nth_string (const gcry_sexp_t list, int number)
+_gcry_sexp_nth_string (const gcry_sexp_t list, int number)
 {
   const char *s;
   size_t n;
   char *buf;
 
-  s = sexp_nth_data (list, number, &n);
+  s = do_sexp_nth_data (list, number, &n);
   if (!s || n < 1 || (n+1) < 1)
     return NULL;
-  buf = gcry_malloc (n+1);
+  buf = xtrymalloc (n+1);
   if (!buf)
     return NULL;
   memcpy (buf, s, n);
@@ -727,25 +809,44 @@ gcry_sexp_nth_string (const gcry_sexp_t list, int number)
   return buf;
 }
 
+
 /*
  * Get a MPI from the car
  */
 gcry_mpi_t
-gcry_sexp_nth_mpi( gcry_sexp_t list, int number, int mpifmt )
+_gcry_sexp_nth_mpi (gcry_sexp_t list, int number, int mpifmt)
 {
-  const char *s;
   size_t n;
   gcry_mpi_t a;
 
-  if ( !mpifmt )
-    mpifmt = GCRYMPI_FMT_STD;
+  if (mpifmt == GCRYMPI_FMT_OPAQUE)
+    {
+      char *p;
 
-  s = sexp_nth_data (list, number, &n);
-  if (!s)
-    return NULL;
+      p = _gcry_sexp_nth_buffer (list, number, &n);
+      if (!p)
+        return NULL;
 
-  if ( gcry_mpi_scan ( &a, mpifmt, s, n, NULL ) )
-    return NULL;
+      a = _gcry_is_secure (list)? _gcry_mpi_snew (0) : _gcry_mpi_new (0);
+      if (a)
+        mpi_set_opaque (a, p, n*8);
+      else
+        xfree (p);
+    }
+  else
+    {
+      const char *s;
+
+      if (!mpifmt)
+        mpifmt = GCRYMPI_FMT_STD;
+
+      s = do_sexp_nth_data (list, number, &n);
+      if (!s)
+        return NULL;
+
+      if (_gcry_mpi_scan (&a, mpifmt, s, n, NULL))
+        return NULL;
+    }
 
   return a;
 }
@@ -755,118 +856,121 @@ gcry_sexp_nth_mpi( gcry_sexp_t list, int number, int 
mpifmt )
  * Get the CDR
  */
 gcry_sexp_t
-gcry_sexp_cdr( const gcry_sexp_t list )
+_gcry_sexp_cdr(const gcry_sexp_t list)
 {
-    const byte *p;
-    const byte *head;
-    DATALEN n;
-    gcry_sexp_t newlist;
-    byte *d;
-    int level = 0;
-    int skip = 1;
-
-    if ( !list || list->d[0] != ST_OPEN )
-       return NULL;
-    p = list->d;
-
-    while ( skip > 0 ) {
-       p++;
-       if ( *p == ST_DATA ) {
-           memcpy ( &n, ++p, sizeof n );
-           p += sizeof n + n;
-           p--;
-           if ( !level )
-               skip--;
+  const byte *p;
+  const byte *head;
+  DATALEN n;
+  gcry_sexp_t newlist;
+  byte *d;
+  int level = 0;
+  int skip = 1;
+
+  if (!list || list->d[0] != ST_OPEN)
+    return NULL;
+  p = list->d;
+
+  while (skip > 0)
+    {
+      p++;
+      if (*p == ST_DATA)
+        {
+          memcpy ( &n, ++p, sizeof n );
+          p += sizeof n + n;
+          p--;
+          if ( !level )
+            skip--;
        }
-       else if ( *p == ST_OPEN ) {
-           level++;
+      else if (*p == ST_OPEN)
+        {
+          level++;
        }
-       else if ( *p == ST_CLOSE ) {
-           level--;
-           if ( !level )
-               skip--;
+      else if (*p == ST_CLOSE)
+        {
+          level--;
+          if ( !level )
+            skip--;
        }
-       else if ( *p == ST_STOP ) {
-           return NULL;
+      else if (*p == ST_STOP)
+        {
+          return NULL;
        }
     }
+  p++;
+
+  head = p;
+  level = 0;
+  do {
+    if (*p == ST_DATA)
+      {
+        memcpy ( &n, ++p, sizeof n );
+        p += sizeof n + n;
+        p--;
+      }
+    else if (*p == ST_OPEN)
+      {
+        level++;
+      }
+    else if (*p == ST_CLOSE)
+      {
+        level--;
+      }
+    else if (*p == ST_STOP)
+      {
+        return NULL;
+      }
     p++;
+  } while (level);
+  n = p - head;
 
-    head = p;
-    level = 0;
-    do {
-       if ( *p == ST_DATA ) {
-           memcpy ( &n, ++p, sizeof n );
-           p += sizeof n + n;
-           p--;
-       }
-       else if ( *p == ST_OPEN ) {
-           level++;
-       }
-       else if ( *p == ST_CLOSE ) {
-           level--;
-       }
-       else if ( *p == ST_STOP ) {
-           return NULL;
-       }
-       p++;
-    } while ( level );
-    n = p - head;
-
-    newlist = gcry_malloc ( sizeof *newlist + n + 2 );
-    if (!newlist)
-      return NULL;
-    d = newlist->d;
-    *d++ = ST_OPEN;
-    memcpy ( d, head, n ); d += n;
-    *d++ = ST_CLOSE;
-    *d++ = ST_STOP;
-
-    return normalize (newlist);
+  newlist = xtrymalloc (sizeof *newlist + n + 2);
+  if (!newlist)
+    return NULL;
+  d = newlist->d;
+  *d++ = ST_OPEN;
+  memcpy (d, head, n);
+  d += n;
+  *d++ = ST_CLOSE;
+  *d++ = ST_STOP;
+
+  return normalize (newlist);
 }
 
+
 gcry_sexp_t
-gcry_sexp_cadr ( const gcry_sexp_t list )
+_gcry_sexp_cadr ( const gcry_sexp_t list )
 {
-    gcry_sexp_t a, b;
+  gcry_sexp_t a, b;
 
-    a = gcry_sexp_cdr ( list );
-    b = gcry_sexp_car ( a );
-    gcry_sexp_release ( a );
-    return b;
+  a = _gcry_sexp_cdr (list);
+  b = _gcry_sexp_car (a);
+  sexp_release (a);
+  return b;
 }
 
 
-
-static int
-hextobyte( const byte *s )
+static GPG_ERR_INLINE int
+hextonibble (int s)
 {
-    int c=0;
-
-    if( *s >= '0' && *s <= '9' )
-       c = 16 * (*s - '0');
-    else if( *s >= 'A' && *s <= 'F' )
-       c = 16 * (10 + *s - 'A');
-    else if( *s >= 'a' && *s <= 'f' ) {
-       c = 16 * (10 + *s - 'a');
-    }
-    s++;
-    if( *s >= '0' && *s <= '9' )
-       c += *s - '0';
-    else if( *s >= 'A' && *s <= 'F' )
-       c += 10 + *s - 'A';
-    else if( *s >= 'a' && *s <= 'f' ) {
-       c += 10 + *s - 'a';
-    }
-    return c;
+  if (s >= '0' && s <= '9')
+    return s - '0';
+  else if (s >= 'A' && s <= 'F')
+    return 10 + s - 'A';
+  else if (s >= 'a' && s <= 'f')
+    return 10 + s - 'a';
+  else
+    return 0;
 }
 
-struct make_space_ctx {
-    gcry_sexp_t sexp;
-    size_t allocated;
-    byte *pos;
+
+struct make_space_ctx
+{
+  gcry_sexp_t sexp;
+  size_t allocated;
+  byte *pos;
 };
 
+
 static gpg_err_code_t
 make_space ( struct make_space_ctx *c, size_t n )
 {
@@ -881,7 +985,7 @@ make_space ( struct make_space_ctx *c, size_t n )
       newsize = c->allocated + 2*(n+sizeof(DATALEN)+1);
       if (newsize <= c->allocated)
         return GPG_ERR_TOO_LARGE;
-      newsexp = gcry_realloc ( c->sexp, sizeof *newsexp + newsize - 1);
+      newsexp = xtryrealloc ( c->sexp, sizeof *newsexp + newsize - 1);
       if (!newsexp)
         return gpg_err_code_from_errno (errno);
       c->allocated = newsize;
@@ -988,10 +1092,10 @@ unquote_string (const char *string, size_t length, 
unsigned char *buf)
  * common operation gcry_sexp_cdr_mpi() will always return a secure MPI
  * regardless whether it is needed or not.
  */
-static gcry_error_t
-vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
-            const char *buffer, size_t length, int argflag,
-            void **arg_list, va_list arg_ptr)
+static gpg_err_code_t
+do_vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
+                const char *buffer, size_t length, int argflag,
+                void **arg_list, va_list arg_ptr)
 {
   gcry_err_code_t err = 0;
   static const char tokenchars[] =
@@ -1008,13 +1112,21 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
   const char *disphint = NULL;
   const char *percent = NULL;
   int hexcount = 0;
+  int b64count = 0;
   int quoted_esc = 0;
-  int datalen = 0;
+  size_t datalen = 0;
   size_t dummy_erroff;
   struct make_space_ctx c;
   int arg_counter = 0;
   int level = 0;
 
+  if (!retsexp)
+    return GPG_ERR_INV_ARG;
+  *retsexp = NULL;
+
+  if (!buffer)
+    return GPG_ERR_INV_ARG;
+
   if (!erroff)
     erroff = &dummy_erroff;
 
@@ -1056,10 +1168,10 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
      the provided one.  However, we add space for one extra datalen so
      that the code which does the ST_CLOSE can use MAKE_SPACE */
   c.allocated = length + sizeof(DATALEN);
-  if (buffer && length && gcry_is_secure (buffer))
-    c.sexp = gcry_malloc_secure (sizeof *c.sexp + c.allocated - 1);
+  if (length && _gcry_is_secure (buffer))
+    c.sexp = xtrymalloc_secure (sizeof *c.sexp + c.allocated - 1);
   else
-    c.sexp = gcry_malloc (sizeof *c.sexp + c.allocated - 1);
+    c.sexp = xtrymalloc (sizeof *c.sexp + c.allocated - 1);
   if (!c.sexp)
     {
       err = gpg_err_code_from_errno (errno);
@@ -1192,10 +1304,19 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
              STORE_LEN (c.pos, datalen);
              for (hexfmt++; hexfmt < p; hexfmt++)
                {
+                  int tmpc;
+
                  if (whitespacep (hexfmt))
                    continue;
-                 *c.pos++ = hextobyte ((const unsigned char*)hexfmt);
-                 hexfmt++;
+                 tmpc = hextonibble (*(const unsigned char*)hexfmt);
+                  for (hexfmt++; hexfmt < p && whitespacep (hexfmt); hexfmt++)
+                   ;
+                  if (hexfmt < p)
+                    {
+                      tmpc *= 16;
+                      tmpc += hextonibble (*(const unsigned char*)hexfmt);
+                    }
+                  *c.pos++ = tmpc;
                }
              hexfmt = NULL;
            }
@@ -1207,9 +1328,61 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
            }
        }
       else if (base64)
-       {
-         if (*p == '|')
-           base64 = NULL;
+        {
+          if (digitp (p) || alphap (p) || *p == '+' || *p == '/' || *p == '=')
+            b64count++;
+          else if (*p == '|')
+            {
+              gpgrt_b64state_t b64state;
+              char *b64buf;
+              int i;
+
+              base64++;         /* Skip beginning '|' */
+              b64buf = xtrymalloc (b64count);
+              if (!b64buf)
+                {
+                  err = gpg_err_code_from_syserror ();
+                  goto leave;
+                }
+              memcpy (b64buf, base64, b64count);
+
+              b64state = gpgrt_b64dec_start (NULL);
+              if (!b64state)
+                {
+                  err = gpg_err_code_from_syserror ();
+                  xfree (b64buf);
+                  goto leave;
+                }
+              err = gpgrt_b64dec_proc (b64state, b64buf, b64count,
+                                       &datalen);
+              if (err && gpg_err_code (err) != GPG_ERR_EOF)
+                {
+                  xfree (b64state);
+                  xfree (b64buf);
+                  goto leave;
+                }
+              err = gpgrt_b64dec_finish (b64state);
+              if (err)
+                {
+                  xfree (b64buf);
+                  goto leave;
+                }
+
+              MAKE_SPACE (datalen);
+              *c.pos++ = ST_DATA;
+              STORE_LEN (c.pos, datalen);
+              for (i = 0; i < datalen; i++)
+                *c.pos++ = b64buf[i];
+
+              xfree (b64buf);
+              base64 = NULL;
+            }
+          else
+            {
+              *erroff = p - buffer;
+              err = GPG_ERR_SEXP_BAD_CHARACTER;
+              goto leave;
+            }
        }
       else if (digptr)
        {
@@ -1247,11 +1420,12 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
              hexfmt = p;
              hexcount = 0;
            }
-         else if (*p == '|')
-           {
-             digptr = NULL; /* We ignore the optional length.  */
-             base64 = p;
-           }
+          else if (*p == '|')
+            {
+              digptr = NULL; /* We ignore the optional length.  */
+              base64 = p;
+              b64count = 0;
+            }
          else
            {
              *erroff = p - buffer;
@@ -1270,25 +1444,25 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
 
              ARG_NEXT (m, gcry_mpi_t);
 
-              if (gcry_mpi_get_flag (m, GCRYMPI_FLAG_OPAQUE))
+              if (mpi_get_flag (m, GCRYMPI_FLAG_OPAQUE))
                 {
                   void *mp;
                   unsigned int nbits;
 
-                  mp = gcry_mpi_get_opaque (m, &nbits);
+                  mp = mpi_get_opaque (m, &nbits);
                   nm = (nbits+7)/8;
                   if (mp && nm)
                     {
                       MAKE_SPACE (nm);
-                      if (!gcry_is_secure (c.sexp->d)
-                          && gcry_mpi_get_flag (m, GCRYMPI_FLAG_SECURE))
+                      if (!_gcry_is_secure (c.sexp->d)
+                          && mpi_get_flag (m, GCRYMPI_FLAG_SECURE))
                         {
                           /* We have to switch to secure allocation.  */
                           gcry_sexp_t newsexp;
                           byte *newhead;
 
-                          newsexp = gcry_malloc_secure (sizeof *newsexp
-                                                        + c.allocated - 1);
+                          newsexp = xtrymalloc_secure (sizeof *newsexp
+                                                       + c.allocated - 1);
                           if (!newsexp)
                             {
                               err = gpg_err_code_from_errno (errno);
@@ -1297,7 +1471,7 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
                           newhead = newsexp->d;
                           memcpy (newhead, c.sexp->d, (c.pos - c.sexp->d));
                           c.pos = newhead + (c.pos - c.sexp->d);
-                          gcry_free (c.sexp);
+                          xfree (c.sexp);
                           c.sexp = newsexp;
                         }
 
@@ -1309,19 +1483,26 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
                 }
               else
                 {
-                  if (gcry_mpi_print (mpifmt, NULL, 0, &nm, m))
-                    BUG ();
+                  if (mpifmt == GCRYMPI_FMT_USG && mpi_cmp_ui (m, 0) < 0)
+                    {
+                      err = GPG_ERR_INV_ARG;
+                      goto leave;
+                    }
+
+                  err = _gcry_mpi_print (mpifmt, NULL, 0, &nm, m);
+                  if (err)
+                    goto leave;
 
                   MAKE_SPACE (nm);
-                  if (!gcry_is_secure (c.sexp->d)
-                      && gcry_mpi_get_flag ( m, GCRYMPI_FLAG_SECURE))
+                  if (!_gcry_is_secure (c.sexp->d)
+                      && mpi_get_flag ( m, GCRYMPI_FLAG_SECURE))
                     {
                       /* We have to switch to secure allocation.  */
                       gcry_sexp_t newsexp;
                       byte *newhead;
 
-                      newsexp = gcry_malloc_secure (sizeof *newsexp
-                                                    + c.allocated - 1);
+                      newsexp = xtrymalloc_secure (sizeof *newsexp
+                                                   + c.allocated - 1);
                       if (!newsexp)
                         {
                           err = gpg_err_code_from_errno (errno);
@@ -1330,14 +1511,15 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
                       newhead = newsexp->d;
                       memcpy (newhead, c.sexp->d, (c.pos - c.sexp->d));
                       c.pos = newhead + (c.pos - c.sexp->d);
-                      gcry_free (c.sexp);
+                      xfree (c.sexp);
                       c.sexp = newsexp;
                     }
 
                   *c.pos++ = ST_DATA;
                   STORE_LEN (c.pos, nm);
-                  if (gcry_mpi_print (mpifmt, c.pos, nm, &nm, m))
-                    BUG ();
+                  err = _gcry_mpi_print (mpifmt, c.pos, nm, &nm, m);
+                  if (err)
+                    goto leave;
                   c.pos += nm;
                 }
            }
@@ -1365,17 +1547,24 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
              ARG_NEXT (alen, int);
              ARG_NEXT (astr, const char *);
 
+              if (alen < 0)
+                {
+                  *erroff = p - buffer;
+                 err = GPG_ERR_INV_ARG;
+                  goto leave;
+                }
+
              MAKE_SPACE (alen);
              if (alen
-                  && !gcry_is_secure (c.sexp->d)
-                 && gcry_is_secure (astr))
+                  && !_gcry_is_secure (c.sexp->d)
+                 && _gcry_is_secure (astr))
               {
                  /* We have to switch to secure allocation.  */
                  gcry_sexp_t newsexp;
                  byte *newhead;
 
-                 newsexp = gcry_malloc_secure (sizeof *newsexp
-                                                + c.allocated - 1);
+                 newsexp = xtrymalloc_secure (sizeof *newsexp
+                                               + c.allocated - 1);
                   if (!newsexp)
                     {
                       err = gpg_err_code_from_errno (errno);
@@ -1384,7 +1573,7 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
                  newhead = newsexp->d;
                  memcpy (newhead, c.sexp->d, (c.pos - c.sexp->d));
                  c.pos = newhead + (c.pos - c.sexp->d);
-                 gcry_free (c.sexp);
+                 xfree (c.sexp);
                  c.sexp = newsexp;
                }
 
@@ -1401,7 +1590,7 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
              char buf[35];
 
              ARG_NEXT (aint, int);
-             sprintf (buf, "%d", aint);
+             snprintf (buf, sizeof buf, "%d", aint);
              alen = strlen (buf);
              MAKE_SPACE (alen);
              *c.pos++ = ST_DATA;
@@ -1417,7 +1606,7 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
              char buf[35];
 
              ARG_NEXT (aint, unsigned int);
-             sprintf (buf, "%u", aint);
+             snprintf (buf, sizeof buf, "%u", aint);
              alen = strlen (buf);
              MAKE_SPACE (alen);
              *c.pos++ = ST_DATA;
@@ -1472,6 +1661,13 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
              err = GPG_ERR_SEXP_UNMATCHED_DH;
               goto leave;
            }
+
+         if (level == 0)
+           {
+             *erroff = p - buffer;
+             err = GPG_ERR_SEXP_UNMATCHED_PAREN;
+             goto leave;
+           }
          MAKE_SPACE (0);
          *c.pos++ = ST_CLOSE;
          level--;
@@ -1487,7 +1683,10 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
          hexcount = 0;
        }
       else if (*p == '|')
-       base64 = p;
+        {
+          base64 = p;
+          b64count = 0;
+        }
       else if (*p == '[')
        {
          if (disphint)
@@ -1565,78 +1764,76 @@ vsexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
       if (c.sexp)
         {
           /* Extra paranoid wipe on error. */
-          if (gcry_is_secure (c.sexp))
+          if (_gcry_is_secure (c.sexp))
             wipememory (c.sexp, sizeof (struct gcry_sexp) + c.allocated - 1);
-          gcry_free (c.sexp);
+          xfree (c.sexp);
         }
-      /* This might be expected by existing code...  */
-      *retsexp = NULL;
     }
   else
     *retsexp = normalize (c.sexp);
 
-  return gcry_error (err);
+  return err;
 #undef MAKE_SPACE
 #undef STORE_LEN
 }
 
 
-static gcry_error_t
-sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
-           const char *buffer, size_t length, int argflag,
-           void **arg_list, ...)
+static gpg_err_code_t
+do_sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
+               const char *buffer, size_t length, int argflag,
+               void **arg_list, ...)
 {
-  gcry_error_t rc;
+  gcry_err_code_t rc;
   va_list arg_ptr;
 
   va_start (arg_ptr, arg_list);
-  rc = vsexp_sscan (retsexp, erroff, buffer, length, argflag,
-                   arg_list, arg_ptr);
+  rc = do_vsexp_sscan (retsexp, erroff, buffer, length, argflag,
+                       arg_list, arg_ptr);
   va_end (arg_ptr);
 
   return rc;
 }
 
 
-gcry_error_t
-gcry_sexp_build (gcry_sexp_t *retsexp, size_t *erroff, const char *format, ...)
+gpg_err_code_t
+_gcry_sexp_build (gcry_sexp_t *retsexp, size_t *erroff, const char *format, 
...)
 {
-  gcry_error_t rc;
+  gcry_err_code_t rc;
   va_list arg_ptr;
 
   va_start (arg_ptr, format);
-  rc = vsexp_sscan (retsexp, erroff, format, strlen(format), 1,
-                   NULL, arg_ptr);
+  rc = do_vsexp_sscan (retsexp, erroff, format, strlen(format), 1,
+                       NULL, arg_ptr);
   va_end (arg_ptr);
 
   return rc;
 }
 
 
-gcry_error_t
+gcry_err_code_t
 _gcry_sexp_vbuild (gcry_sexp_t *retsexp, size_t *erroff,
                    const char *format, va_list arg_ptr)
 {
-  return vsexp_sscan (retsexp, erroff, format, strlen(format), 1,
-                     NULL, arg_ptr);
+  return do_vsexp_sscan (retsexp, erroff, format, strlen(format), 1,
+                         NULL, arg_ptr);
 }
 
 
 /* Like gcry_sexp_build, but uses an array instead of variable
    function arguments.  */
-gcry_error_t
-gcry_sexp_build_array (gcry_sexp_t *retsexp, size_t *erroff,
-                      const char *format, void **arg_list)
+gcry_err_code_t
+_gcry_sexp_build_array (gcry_sexp_t *retsexp, size_t *erroff,
+                        const char *format, void **arg_list)
 {
-  return sexp_sscan (retsexp, erroff, format, strlen(format), 1, arg_list);
+  return do_sexp_sscan (retsexp, erroff, format, strlen(format), 1, arg_list);
 }
 
 
-gcry_error_t
-gcry_sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
-                const char *buffer, size_t length)
+gcry_err_code_t
+_gcry_sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
+                  const char *buffer, size_t length)
 {
-  return sexp_sscan (retsexp, erroff, buffer, length, 0, NULL);
+  return do_sexp_sscan (retsexp, erroff, buffer, length, 0, NULL);
 }
 
 
@@ -1654,8 +1851,14 @@ suitable_encoding (const unsigned char *buffer, size_t 
length)
   if (!length)
     return 1;
 
+  if (*buffer & 0x80)
+    return 0; /* If the MSB is set we assume that buffer represents a
+                 negative number.  */
+
   for (s=buffer; length; s++, length--)
     {
+      if (!*s)
+        return 0; /*binary*/
       if ( (*s < 0x20 || (*s >= 0x7f && *s <= 0xa0))
            && !strchr ("\b\t\v\n\f\r\"\'\\", *s))
         return 0; /*binary*/
@@ -1679,7 +1882,7 @@ convert_to_hex (const unsigned char *src, size_t len, 
char *dest)
     {
       *dest++ = '#';
       for (i=0; i < len; i++, dest += 2 )
-        sprintf (dest, "%02X", src[i]);
+        snprintf (dest, 3, "%02X", src[i]);
       *dest++ = '#';
     }
   return len*2+2;
@@ -1708,7 +1911,7 @@ convert_to_string (const unsigned char *s, size_t len, 
char *dest)
             default:
               if ( (*s < 0x20 || (*s >= 0x7f && *s <= 0xa0)))
                 {
-                  sprintf (p, "\\x%02x", *s);
+                  snprintf (p, 5, "\\x%02x", *s);
                   p += 4;
                 }
               else
@@ -1763,8 +1966,8 @@ convert_to_token (const unsigned char *src, size_t len, 
char *dest)
  * the required length is returned.
  */
 size_t
-gcry_sexp_sprint (const gcry_sexp_t list, int mode,
-                  void *buffer, size_t maxlength )
+_gcry_sexp_sprint (const gcry_sexp_t list, int mode,
+                   void *buffer, size_t maxlength )
 {
   static unsigned char empty[3] = { ST_OPEN, ST_CLOSE, ST_STOP };
   const unsigned char *s;
@@ -1868,7 +2071,7 @@ gcry_sexp_sprint (const gcry_sexp_t list, int mode,
             }
           else
             {
-              sprintf (numbuf, "%u:", (unsigned int)n );
+              snprintf (numbuf, sizeof numbuf, "%u:", (unsigned int)n );
               len += strlen (numbuf) + n;
               if ( buffer )
                 {
@@ -1914,14 +2117,14 @@ gcry_sexp_sprint (const gcry_sexp_t list, int mode,
    data passed from outside. errorcode and erroff may both be passed as
    NULL.  */
 size_t
-gcry_sexp_canon_len (const unsigned char *buffer, size_t length,
-                     size_t *erroff, gcry_error_t *errcode)
+_gcry_sexp_canon_len (const unsigned char *buffer, size_t length,
+                      size_t *erroff, gcry_err_code_t *errcode)
 {
   const unsigned char *p;
   const unsigned char *disphint = NULL;
   unsigned int datalen = 0;
   size_t dummy_erroff;
-  gcry_error_t dummy_errcode;
+  gcry_err_code_t dummy_errcode;
   size_t count = 0;
   int level = 0;
 
@@ -1930,13 +2133,13 @@ gcry_sexp_canon_len (const unsigned char *buffer, 
size_t length,
   if (!errcode)
     errcode = &dummy_errcode;
 
-  *errcode = gcry_error (GPG_ERR_NO_ERROR);
+  *errcode = GPG_ERR_NO_ERROR;
   *erroff = 0;
   if (!buffer)
     return 0;
   if (*buffer != '(')
     {
-      *errcode = gcry_error (GPG_ERR_SEXP_NOT_CANONICAL);
+      *errcode = GPG_ERR_SEXP_NOT_CANONICAL;
       return 0;
     }
 
@@ -1945,7 +2148,7 @@ gcry_sexp_canon_len (const unsigned char *buffer, size_t 
length,
       if (length && count >= length)
         {
           *erroff = count;
-          *errcode = gcry_error (GPG_ERR_SEXP_STRING_TOO_LONG);
+          *errcode = GPG_ERR_SEXP_STRING_TOO_LONG;
           return 0;
         }
 
@@ -1956,7 +2159,7 @@ gcry_sexp_canon_len (const unsigned char *buffer, size_t 
length,
               if (length && (count+datalen) >= length)
                 {
                   *erroff = count;
-                  *errcode = gcry_error (GPG_ERR_SEXP_STRING_TOO_LONG);
+                  *errcode = GPG_ERR_SEXP_STRING_TOO_LONG;
                   return 0;
                 }
               count += datalen;
@@ -1968,7 +2171,7 @@ gcry_sexp_canon_len (const unsigned char *buffer, size_t 
length,
           else
             {
               *erroff = count;
-              *errcode = gcry_error (GPG_ERR_SEXP_INV_LEN_SPEC);
+              *errcode = GPG_ERR_SEXP_INV_LEN_SPEC;
               return 0;
            }
        }
@@ -1977,7 +2180,7 @@ gcry_sexp_canon_len (const unsigned char *buffer, size_t 
length,
           if (disphint)
             {
               *erroff = count;
-              *errcode = gcry_error (GPG_ERR_SEXP_UNMATCHED_DH);
+              *errcode = GPG_ERR_SEXP_UNMATCHED_DH;
               return 0;
            }
           level++;
@@ -1987,13 +2190,13 @@ gcry_sexp_canon_len (const unsigned char *buffer, 
size_t length,
           if (!level)
             {
               *erroff = count;
-              *errcode = gcry_error (GPG_ERR_SEXP_UNMATCHED_PAREN);
+              *errcode = GPG_ERR_SEXP_UNMATCHED_PAREN;
               return 0;
            }
           if (disphint)
             {
               *erroff = count;
-              *errcode = gcry_error (GPG_ERR_SEXP_UNMATCHED_DH);
+              *errcode = GPG_ERR_SEXP_UNMATCHED_DH;
               return 0;
            }
           if (!--level)
@@ -2004,7 +2207,7 @@ gcry_sexp_canon_len (const unsigned char *buffer, size_t 
length,
           if (disphint)
             {
               *erroff = count;
-              *errcode = gcry_error (GPG_ERR_SEXP_NESTED_DH);
+              *errcode = GPG_ERR_SEXP_NESTED_DH;
               return 0;
             }
           disphint = p;
@@ -2014,7 +2217,7 @@ gcry_sexp_canon_len (const unsigned char *buffer, size_t 
length,
           if ( !disphint )
             {
               *erroff = count;
-              *errcode = gcry_error (GPG_ERR_SEXP_UNMATCHED_DH);
+              *errcode = GPG_ERR_SEXP_UNMATCHED_DH;
               return 0;
            }
           disphint = NULL;
@@ -2024,7 +2227,7 @@ gcry_sexp_canon_len (const unsigned char *buffer, size_t 
length,
           if (*p == '0')
             {
               *erroff = count;
-              *errcode = gcry_error (GPG_ERR_SEXP_ZERO_PREFIX);
+              *errcode = GPG_ERR_SEXP_ZERO_PREFIX;
               return 0;
            }
           datalen = atoi_1 (p);
@@ -2032,14 +2235,465 @@ gcry_sexp_canon_len (const unsigned char *buffer, 
size_t length,
       else if (*p == '&' || *p == '\\')
         {
           *erroff = count;
-          *errcode = gcry_error (GPG_ERR_SEXP_UNEXPECTED_PUNC);
+          *errcode = GPG_ERR_SEXP_UNEXPECTED_PUNC;
           return 0;
        }
       else
         {
           *erroff = count;
-          *errcode = gcry_error (GPG_ERR_SEXP_BAD_CHARACTER);
+          *errcode = GPG_ERR_SEXP_BAD_CHARACTER;
           return 0;
        }
     }
 }
+
+
+/* Extract MPIs from an s-expression using a list of parameters.  The
+ * names of these parameters are given by the string LIST.  Some
+ * special characters may be given to control the conversion:
+ *
+ *   +   :: Switch to unsigned integer format (default).
+ *   -   :: Switch to standard signed format.
+ *   /   :: Switch to opaque format
+ *   &   :: Switch to buffer descriptor mode - see below.
+ *   %s  :: Switch to allocated string arguments.
+ *   %#s :: Switch to allocated string arguments for a list of string flags.
+ *   %u  :: Switch to unsigned integer arguments.
+ *   %lu :: Switch to unsigned long integer arguments.
+ *   %zu :: Switch to size_t arguments.
+ *   %d  :: Switch to signed integer arguments.
+ *   %ld :: Switch to signed long integer arguments.
+ *   ?   :: The previous parameter is optional.
+ *
+ * In general parameter names are single letters.  To use a string for
+ * a parameter name, enclose the name in single quotes.
+ *
+ * Unless in gcry_buffer_t mode for each parameter name a pointer to
+ * an MPI variable is expected that must be set to NULL prior to
+ * invoking this function, and finally a NULL is expected.  Example:
+ *
+ *   _gcry_sexp_extract_param (key, NULL, "n/x+ed",
+ *                             &mpi_n, &mpi_x, &mpi_e, NULL)
+ *
+ * This stores the parameter "N" from KEY as an unsigned MPI into
+ * MPI_N, the parameter "X" as an opaque MPI into MPI_X, and the
+ * parameter "E" again as an unsigned MPI into MPI_E.
+ *
+ * If in buffer descriptor mode a pointer to gcry_buffer_t descriptor
+ * is expected instead of a pointer to an MPI.  The caller may use two
+ * different operation modes: If the DATA field of the provided buffer
+ * descriptor is NULL, the function allocates a new buffer and stores
+ * it at DATA; the other fields are set accordingly with OFF being 0.
+ * If DATA is not NULL, the function assumes that DATA, SIZE, and OFF
+ * describe a buffer where to but the data; on return the LEN field
+ * receives the number of bytes copied to that buffer; if the buffer
+ * is too small, the function immediately returns with an error code
+ * (and LEN set to 0).
+ *
+ * For a flag list ("%#s") which has other lists as elements these
+ * sub-lists are skipped and a indicated by "()" in the output.
+ *
+ * PATH is an optional string used to locate a token.  The exclamation
+ * mark separated tokens are used to via gcry_sexp_find_token to find
+ * a start point inside SEXP.
+ *
+ * The function returns 0 on success.  On error an error code is
+ * returned, all passed MPIs that might have been allocated up to this
+ * point are deallocated and set to NULL, and all passed buffers are
+ * either truncated if the caller supplied the buffer, or deallocated
+ * if the function allocated the buffer.
+ */
+gpg_err_code_t
+_gcry_sexp_vextract_param (gcry_sexp_t sexp, const char *path,
+                           const char *list, va_list arg_ptr)
+{
+  gpg_err_code_t rc;
+  const char *s, *s2;
+  void **array[20];
+  char arrayisdesc[20];
+  int idx, i;
+  gcry_sexp_t l1 = NULL;
+  int mode = '+'; /* Default to GCRYMPI_FMT_USG.  */
+  int submode = 0;
+  gcry_sexp_t freethis = NULL;
+  char *tmpstr = NULL;
+
+  /* Values in ARRAYISDESC describing what the ARRAY holds.
+   *  0  - MPI
+   *  1  - gcry_buffer_t provided by caller.
+   *  2  - gcry_buffer_t allocated by us.
+   * 's' - String allocated by us.
+   * 'x' - Ignore
+   */
+  memset (arrayisdesc, 0, sizeof arrayisdesc);
+
+  /* First copy all the args into an array.  This is required so that
+     we are able to release already allocated MPIs if later an error
+     was found.  */
+  for (s=list, idx=0; *s && idx < DIM (array); s++)
+    {
+      if (*s == '&' || *s == '+' || *s == '-' || *s == '/' || *s == '?')
+        ;
+      else if (*s == '%')
+        {
+          s++;
+          if (*s == 'l' && (s[1] == 'u' || s[1] == 'd'))
+            s++;
+          else if (*s == 'z' && s[1] == 'u')
+            s++;
+          else if (*s == '#' && s[1] == 's')
+            s++;
+          continue;
+        }
+      else if (whitespacep (s))
+        ;
+      else
+        {
+          if (*s == '\'')
+            {
+              s++;
+              s2 = strchr (s, '\'');
+              if (!s2 || s2 == s)
+                {
+                  /* Closing quote not found or empty string.  */
+                  return GPG_ERR_SYNTAX;
+                }
+              s = s2;
+            }
+          array[idx] = va_arg (arg_ptr, void *);
+          if (!array[idx])
+            return GPG_ERR_MISSING_VALUE; /* NULL pointer given.  */
+          idx++;
+        }
+    }
+  if (*s)
+    return GPG_ERR_LIMIT_REACHED;  /* Too many list elements.  */
+  if (va_arg (arg_ptr, gcry_mpi_t *))
+    return GPG_ERR_INV_ARG;  /* Not enough list elemends.  */
+
+  /* Drill down.  */
+  while (path && *path)
+    {
+      size_t n;
+
+      s = strchr (path, '!');
+      if (s == path)
+        {
+          rc = GPG_ERR_NOT_FOUND;
+          goto cleanup;
+        }
+      n = s? s - path : 0;
+      l1 = _gcry_sexp_find_token (sexp, path, n);
+      if (!l1)
+        {
+          rc = GPG_ERR_NOT_FOUND;
+          goto cleanup;
+        }
+      sexp = l1; l1 = NULL;
+      sexp_release (freethis);
+      freethis = sexp;
+      if (n)
+        path += n + 1;
+      else
+        path = NULL;
+    }
+
+
+  /* Now extract all parameters.  */
+  for (s=list, idx=0; *s; s++)
+    {
+      if (*s == '&' || *s == '+' || *s == '-' || *s == '/')
+        mode = *s;
+      else if (*s == '%')
+        {
+          s++;
+          if (!*s)
+            continue;  /* Ignore at end of format.  */
+          if (*s == 's' || *s == 'd' || *s == 'u')
+            {
+              mode = *s;
+              submode = 0;
+            }
+          else if (*s == 'l' && (s[1] == 'u' || s[1] == 'd'))
+            {
+              mode = s[1];
+              submode = 'l';
+              s++;
+            }
+          else if (*s == 'z' && s[1] == 'u')
+            {
+              mode = s[1];
+              submode = 'z';
+              s++;
+            }
+          else if (*s == '#' && s[1] == 's')
+            {
+              mode = s[1];
+              submode = '#';
+              s++;
+            }
+          continue;
+        }
+      else if (whitespacep (s))
+        ;
+      else if (*s == '?')
+        ; /* Only used via lookahead.  */
+      else
+        {
+          if (*s == '\'')
+            {
+              /* Find closing quote, find token, set S to closing quote.  */
+              s++;
+              s2 = strchr (s, '\'');
+              if (!s2 || s2 == s)
+                {
+                  /* Closing quote not found or empty string.  */
+                  rc = GPG_ERR_SYNTAX;
+                  goto cleanup;
+                }
+              l1 = _gcry_sexp_find_token (sexp, s, s2 - s);
+              s = s2;
+            }
+          else
+            l1 = _gcry_sexp_find_token (sexp, s, 1);
+
+          if (!l1 && s[1] == '?')
+            {
+              /* Optional element not found.  */
+              if (mode == '&')
+                {
+                  gcry_buffer_t *spec = (gcry_buffer_t*)array[idx];
+                  if (!spec->data)
+                    {
+                      spec->size = 0;
+                      spec->off = 0;
+                    }
+                  spec->len = 0;
+                }
+              else if (mode == 's')
+                {
+                  *array[idx] = NULL;
+                  arrayisdesc[idx] = 's';
+                }
+              else if (mode == 'd')
+                {
+                  if (submode == 'l')
+                    *(long *)array[idx] = 0;
+                  else
+                    *(int *)array[idx] = 0;
+                  arrayisdesc[idx] = 'x';
+                }
+              else if (mode == 'u')
+                {
+                  if (submode == 'l')
+                    *(unsigned long *)array[idx] = 0;
+                  else if (submode == 'z')
+                    *(size_t *)array[idx] = 0;
+                  else
+                    *(unsigned int *)array[idx] = 0;
+                  arrayisdesc[idx] = 'x';
+                }
+              else
+                *array[idx] = NULL;
+            }
+          else if (!l1)
+            {
+              rc = GPG_ERR_NO_OBJ;  /* List element not found.  */
+              goto cleanup;
+            }
+           else
+            {
+              if (mode == '&')
+                {
+                  gcry_buffer_t *spec = (gcry_buffer_t*)array[idx];
+
+                  if (spec->data)
+                    {
+                      const char *pbuf;
+                      size_t nbuf;
+
+                      pbuf = _gcry_sexp_nth_data (l1, 1, &nbuf);
+                      if (!pbuf || !nbuf)
+                        {
+                          rc = GPG_ERR_INV_OBJ;
+                          goto cleanup;
+                        }
+                      if (spec->off + nbuf > spec->size)
+                        {
+                          rc = GPG_ERR_BUFFER_TOO_SHORT;
+                          goto cleanup;
+                        }
+                      memcpy ((char*)spec->data + spec->off, pbuf, nbuf);
+                      spec->len = nbuf;
+                      arrayisdesc[idx] = 1;
+                    }
+                  else
+                    {
+                      spec->data = _gcry_sexp_nth_buffer (l1, 1, &spec->size);
+                      if (!spec->data)
+                        {
+                          rc = GPG_ERR_INV_OBJ; /* Or out of core.  */
+                          goto cleanup;
+                        }
+                      spec->len = spec->size;
+                      spec->off = 0;
+                      arrayisdesc[idx] = 2;
+                    }
+                }
+              else if (mode == 's')
+                {
+                  if (submode == '#')
+                    {
+                      size_t needed = 0;
+                      size_t n;
+                      int l1len;
+                      char *p;
+
+                      l1len = l1? sexp_length (l1) : 0;
+                      for (i = 1; i < l1len; i++)
+                        {
+                          s2 = sexp_nth_data (l1, i, &n);
+                          if (!s2)
+                            n = 2; /* Not a data element; we use "()". */
+                          needed += n + 1;
+                        }
+                      if (!needed)
+                        {
+                          *array[idx] = p = xtrymalloc (1);
+                          if (p)
+                            *p = 0;
+                        }
+                      else if ((*array[idx] = p = xtrymalloc (needed)))
+                        {
+                          for (i = 1; i < l1len; i++)
+                            {
+                              s2 = sexp_nth_data (l1, i, &n);
+                              if (!s2)
+                                memcpy (p, "()", (n=2));
+                              else
+                                memcpy (p, s2, n);
+                              p[n] = ' ';
+                              p += n + 1;
+                            }
+                          if (p != *array[idx])
+                            p[-1] = 0;
+                        }
+                    }
+                  else
+                    *array[idx] = _gcry_sexp_nth_string (l1, 1);
+                  if (!*array[idx])
+                    {
+                      rc = gpg_err_code_from_syserror ();
+                      goto cleanup;
+                    }
+                  arrayisdesc[idx] = 's';
+                }
+              else if (mode == 'd')
+                {
+                  long along;
+
+                  xfree (tmpstr);
+                  tmpstr = _gcry_sexp_nth_string (l1, 1);
+                  if (!tmpstr)
+                    {
+                      rc = gpg_err_code_from_syserror ();
+                      goto cleanup;
+                    }
+                  along = strtol (tmpstr, NULL, 10);
+                  if (submode == 'l')
+                    *(long *)array[idx] = along;
+                  else
+                    *(int *)array[idx] = along;
+                  arrayisdesc[idx] = 'x';
+                }
+              else if (mode == 'u')
+                {
+                  long aulong;
+
+                  xfree (tmpstr);
+                  tmpstr = _gcry_sexp_nth_string (l1, 1);
+                  if (!tmpstr)
+                    {
+                      rc = gpg_err_code_from_syserror ();
+                      goto cleanup;
+                    }
+                  aulong = strtoul (tmpstr, NULL, 10);
+                  if (submode == 'l')
+                    *(unsigned long *)array[idx] = aulong;
+                  else if (submode == 'z')
+                    *(size_t *)array[idx] = aulong;
+                  else
+                    *(unsigned int *)array[idx] = aulong;
+                  arrayisdesc[idx] = 'x';
+                }
+              else
+                {
+                  if (mode == '/')
+                    *array[idx] = _gcry_sexp_nth_mpi (l1,1,GCRYMPI_FMT_OPAQUE);
+                  else if (mode == '-')
+                    *array[idx] = _gcry_sexp_nth_mpi (l1,1,GCRYMPI_FMT_STD);
+                  else
+                    *array[idx] = _gcry_sexp_nth_mpi (l1,1,GCRYMPI_FMT_USG);
+                  if (!*array[idx])
+                    {
+                      rc = GPG_ERR_INV_OBJ;  /* Conversion failed.  */
+                      goto cleanup;
+                    }
+                }
+              sexp_release (l1); l1 = NULL;
+            }
+          idx++;
+        }
+    }
+
+  xfree (tmpstr);
+  sexp_release (freethis);
+  return 0;
+
+ cleanup:
+  xfree (tmpstr);
+  sexp_release (freethis);
+  sexp_release (l1);
+  while (idx--)
+    {
+      if (!arrayisdesc[idx])
+        {
+          _gcry_mpi_release (*array[idx]);
+          *array[idx] = NULL;
+        }
+      else if (arrayisdesc[idx] == 1)
+        {
+          /* Caller provided buffer.  */
+          gcry_buffer_t *spec = (gcry_buffer_t*)array[idx];
+          spec->len = 0;
+        }
+      else if (arrayisdesc[idx] == 2)
+        {
+          /* We might have allocated a buffer.  */
+          gcry_buffer_t *spec = (gcry_buffer_t*)array[idx];
+          xfree (spec->data);
+          spec->data = NULL;
+          spec->size = spec->off = spec->len = 0;
+        }
+      else if (arrayisdesc[idx] == 's')
+        {
+          /* We might have allocated a buffer.  */
+          xfree (*array[idx]);
+          *array[idx] = NULL;
+        }
+     }
+  return rc;
+}
+
+gpg_err_code_t
+_gcry_sexp_extract_param (gcry_sexp_t sexp, const char *path,
+                          const char *list, ...)
+{
+  gcry_err_code_t rc;
+  va_list arg_ptr;
+
+  va_start (arg_ptr, list);
+  rc = _gcry_sexp_vextract_param (sexp, path, list, arg_ptr);
+  va_end (arg_ptr);
+  return rc;
+}
diff --git a/grub-core/lib/libgcrypt/src/stdmem.c 
b/grub-core/lib/libgcrypt/src/stdmem.c
index 189da3720..04ce64fba 100644
--- a/grub-core/lib/libgcrypt/src/stdmem.c
+++ b/grub-core/lib/libgcrypt/src/stdmem.c
@@ -117,10 +117,11 @@ _gcry_private_malloc (size_t n)
 
 /*
  * Allocate memory of size N from the secure memory pool.  Return NULL
- * if we are out of memory.
+ * if we are out of memory.  XHINT tells the allocator that the caller
+ * used an xmalloc style call.
  */
 void *
-_gcry_private_malloc_secure (size_t n)
+_gcry_private_malloc_secure (size_t n, int xhint)
 {
   if (!n)
     {
@@ -133,7 +134,7 @@ _gcry_private_malloc_secure (size_t n)
     {
       char *p;
 
-      if ( !(p = _gcry_secmem_malloc (n +EXTRA_ALIGN+ 5)) )
+      if (!(p = _gcry_secmem_malloc (n + EXTRA_ALIGN + 5, xhint)))
         return NULL;
       ((byte*)p)[EXTRA_ALIGN+0] = n;
       ((byte*)p)[EXTRA_ALIGN+1] = n >> 8 ;
@@ -144,17 +145,18 @@ _gcry_private_malloc_secure (size_t n)
     }
   else
     {
-      return _gcry_secmem_malloc( n );
+      return _gcry_secmem_malloc (n, xhint);
     }
 }
 
 
 /*
- * Realloc and clear the old space
- * Return NULL if there is not enough memory.
+ * Realloc and clear the old space.  XHINT tells the allocator that
+ * the caller used an xmalloc style call.  Returns NULL if there is
+ * not enough memory.
  */
 void *
-_gcry_private_realloc ( void *a, size_t n )
+_gcry_private_realloc (void *a, size_t n, int xhint)
 {
   if (use_m_guard)
     {
@@ -172,7 +174,7 @@ _gcry_private_realloc ( void *a, size_t n )
       if( len >= n ) /* We don't shrink for now. */
         return a;
       if (p[-1] == MAGIC_SEC_BYTE)
-        b = _gcry_private_malloc_secure(n);
+        b = _gcry_private_malloc_secure (n, xhint);
       else
         b = _gcry_private_malloc(n);
       if (!b)
@@ -184,7 +186,7 @@ _gcry_private_realloc ( void *a, size_t n )
     }
   else if ( _gcry_private_is_secure(a) )
     {
-      return _gcry_secmem_realloc( a, n );
+      return _gcry_secmem_realloc (a, n, xhint);
     }
   else
     {
@@ -222,21 +224,23 @@ void
 _gcry_private_free (void *a)
 {
   unsigned char *p = a;
+  unsigned char *freep;
 
   if (!p)
     return;
-  if (use_m_guard )
+  if (use_m_guard)
     {
-      _gcry_private_check_heap(p);
-      if ( _gcry_private_is_secure(a) )
-        _gcry_secmem_free(p-EXTRA_ALIGN-4);
-      else
-        {
-          free(p-EXTRA_ALIGN-4);
-       }
+      _gcry_private_check_heap (p);
+      freep = p - EXTRA_ALIGN - 4;
     }
-  else if ( _gcry_private_is_secure(a) )
-    _gcry_secmem_free(p);
   else
-    free(p);
+    {
+      freep = p;
+    }
+
+  if (!_gcry_private_is_secure (freep) ||
+      !_gcry_secmem_free (freep))
+    {
+      free (freep);
+    }
 }
diff --git a/grub-core/lib/libgcrypt/src/stdmem.h 
b/grub-core/lib/libgcrypt/src/stdmem.h
index b476e7e50..c52aab540 100644
--- a/grub-core/lib/libgcrypt/src/stdmem.h
+++ b/grub-core/lib/libgcrypt/src/stdmem.h
@@ -24,8 +24,8 @@
 void _gcry_private_enable_m_guard(void);
 
 void *_gcry_private_malloc (size_t n) _GCRY_GCC_ATTR_MALLOC;
-void *_gcry_private_malloc_secure (size_t n) _GCRY_GCC_ATTR_MALLOC;
-void *_gcry_private_realloc (void *a, size_t n);
+void *_gcry_private_malloc_secure (size_t n, int xhint) _GCRY_GCC_ATTR_MALLOC;
+void *_gcry_private_realloc (void *a, size_t n, int xhint);
 void _gcry_private_check_heap (const void *a);
 void _gcry_private_free (void *a);
 
diff --git a/grub-core/lib/libgcrypt/src/types.h 
b/grub-core/lib/libgcrypt/src/types.h
index ee0a62bb9..b4f28bc4f 100644
--- a/grub-core/lib/libgcrypt/src/types.h
+++ b/grub-core/lib/libgcrypt/src/types.h
@@ -21,108 +21,116 @@
 #ifndef GCRYPT_TYPES_H
 #define GCRYPT_TYPES_H
 
+#ifndef _GCRYPT_CONFIG_H_INCLUDED
+# error config.h must be included before types.h
+#endif
 
 /* The AC_CHECK_SIZEOF() in configure fails for some machines.
  * we provide some fallback values here */
 #if !SIZEOF_UNSIGNED_SHORT
-#undef SIZEOF_UNSIGNED_SHORT
-#define SIZEOF_UNSIGNED_SHORT 2
+# undef SIZEOF_UNSIGNED_SHORT
+# define SIZEOF_UNSIGNED_SHORT 2
 #endif
 #if !SIZEOF_UNSIGNED_INT
-#undef SIZEOF_UNSIGNED_INT
-#define SIZEOF_UNSIGNED_INT 4
+# undef SIZEOF_UNSIGNED_INT
+# define SIZEOF_UNSIGNED_INT 4
 #endif
 #if !SIZEOF_UNSIGNED_LONG
-#undef SIZEOF_UNSIGNED_LONG
-#define SIZEOF_UNSIGNED_LONG 4
+# undef SIZEOF_UNSIGNED_LONG
+# define SIZEOF_UNSIGNED_LONG 4
 #endif
 
 
 #include <sys/types.h>
 
-
-#ifndef HAVE_BYTE_TYPEDEF
-#undef byte        /* maybe there is a macro with this name */
-/* Windows typedefs byte in the rpc headers.  Avoid warning about
-   double definition.  */
-#if !(defined(_WIN32) && defined(cbNDRContext))
-  typedef unsigned char byte;
-#endif
-#define HAVE_BYTE_TYPEDEF
+/* Provide uintptr_t */
+#ifdef HAVE_STDINT_H
+# include <stdint.h> /* uintptr_t */
+#elif defined(HAVE_INTTYPES_H)
+# include <inttypes.h>
+#else
+/* In this case, uintptr_t is provided by config.h. */
 #endif
 
-#ifndef HAVE_USHORT_TYPEDEF
-#undef ushort     /* maybe there is a macro with this name */
-  typedef unsigned short ushort;
-#define HAVE_USHORT_TYPEDEF
-#endif
 
-#ifndef HAVE_ULONG_TYPEDEF
-#undef ulong       /* maybe there is a macro with this name */
-  typedef unsigned long ulong;
-#define HAVE_ULONG_TYPEDEF
-#endif
 
-#ifndef HAVE_U16_TYPEDEF
-#undef u16         /* maybe there is a macro with this name */
-#if SIZEOF_UNSIGNED_INT == 2
-    typedef unsigned int   u16;
-#elif SIZEOF_UNSIGNED_SHORT == 2
-    typedef unsigned short u16;
-#else
-#error no typedef for u16
+#ifndef HAVE_TYPE_BYTE
+# undef byte   /* In case there is a macro with that name.  */
+# if !(defined(_WIN32) && defined(cbNDRContext))
+   /* Windows typedefs byte in the rpc headers.  Avoid warning about
+      double definition.  */
+   typedef unsigned char byte;
+# endif
+# define HAVE_TYPE_BYTE
 #endif
-#define HAVE_U16_TYPEDEF
+
+#ifndef HAVE_TYPE_USHORT
+# undef ushort  /* In case there is a macro with that name.  */
+  typedef unsigned short ushort;
+# define HAVE_TYPE_USHORT
 #endif
 
-#ifndef HAVE_U32_TYPEDEF
-#undef u32         /* maybe there is a macro with this name */
-#if SIZEOF_UNSIGNED_INT == 4
-    typedef unsigned int u32;
-#elif SIZEOF_UNSIGNED_LONG == 4
-    typedef unsigned long u32;
-#else
-#error no typedef for u32
+#ifndef HAVE_TYPE_U16
+# undef u16    /* In case there is a macro with that name.  */
+# if SIZEOF_UNSIGNED_INT == 2
+   typedef unsigned int   u16;
+# elif SIZEOF_UNSIGNED_SHORT == 2
+   typedef unsigned short u16;
+# else
+#  error no typedef for u16
+# endif
+# define HAVE_TYPE_U16
 #endif
-#define HAVE_U32_TYPEDEF
+
+#ifndef HAVE_TYPE_U32
+# undef u32    /* In case there is a macro with that name.  */
+# if SIZEOF_UNSIGNED_INT == 4
+   typedef unsigned int  u32;
+# elif SIZEOF_UNSIGNED_LONG == 4
+   typedef unsigned long u32;
+# else
+#  error no typedef for u32
+# endif
+# define HAVE_TYPE_U32
 #endif
 
-/****************
+/*
  * Warning: Some systems segfault when this u64 typedef and
  * the dummy code in cipher/md.c is not available.  Examples are
  * Solaris and IRIX.
  */
-#ifndef HAVE_U64_TYPEDEF
-#undef u64         /* maybe there is a macro with this name */
-#if SIZEOF_UNSIGNED_INT == 8
-    typedef unsigned int u64;
-#define U64_C(c) (c ## U)
-#define HAVE_U64_TYPEDEF
-#elif SIZEOF_UNSIGNED_LONG == 8
-    typedef unsigned long u64;
-#define U64_C(c) (c ## UL)
-#define HAVE_U64_TYPEDEF
-#elif SIZEOF_UNSIGNED_LONG_LONG == 8
-    typedef unsigned long long u64;
-#define U64_C(c) (c ## ULL)
-#define HAVE_U64_TYPEDEF
-#elif SIZEOF_UINT64_T == 8
-    typedef uint64_t u64;
-#define U64_C(c) (UINT64_C(c))
-#define HAVE_U64_TYPEDEF
-#endif
+#ifndef HAVE_TYPE_U64
+# undef u64    /* In case there is a macro with that name.  */
+# if SIZEOF_UINT64_T == 8
+   typedef uint64_t u64;
+#  define U64_C(c) (UINT64_C(c))
+#  define HAVE_TYPE_U64
+# elif SIZEOF_UNSIGNED_INT == 8
+   typedef unsigned int u64;
+#  define U64_C(c) (c ## U)
+#  define HAVE_TYPE_U64
+# elif SIZEOF_UNSIGNED_LONG == 8
+   typedef unsigned long u64;
+#  define U64_C(c) (c ## UL)
+#  define HAVE_TYPE_U64
+# elif SIZEOF_UNSIGNED_LONG_LONG == 8
+   typedef unsigned long long u64;
+#  define U64_C(c) (c ## ULL)
+#  define HAVE_TYPE_U64
+# else
+#  error No way to declare a 64 bit integer type
+# endif
 #endif
 
-typedef union {
-    int a;
-    short b;
-    char c[1];
-    long d;
-#ifdef HAVE_U64_TYPEDEF
-    u64 e;
-#endif
-    float f;
-    double g;
+typedef union
+{
+  int a;
+  short b;
+  char c[1];
+  long d;
+  u64 e;
+  float f;
+  double g;
 } PROPERLY_ALIGNED_TYPE;
 
 #endif /*GCRYPT_TYPES_H*/
diff --git a/grub-core/lib/libgcrypt/src/versioninfo.rc.in 
b/grub-core/lib/libgcrypt/src/versioninfo.rc.in
index 3199521c9..929f9ccc5 100644
--- a/grub-core/lib/libgcrypt/src/versioninfo.rc.in
+++ b/grub-core/lib/libgcrypt/src/versioninfo.rc.in
@@ -39,7 +39,7 @@ BEGIN
             VALUE "FileDescription", "Libgcrypt - The GNU Crypto Library\0"
             VALUE "FileVersion", 
"@LIBGCRYPT_LT_CURRENT@.@LIBGCRYPT_LT_AGE@.@LIBGCRYPT_LT_REVISION@.@BUILD_REVISION@\0"
             VALUE "InternalName", "libgcrypt\0"
-            VALUE "LegalCopyright", "Copyright � 2012 Free Software 
Foundation, Inc.\0"
+            VALUE "LegalCopyright", "Copyright � 2023 g10 Code GmbH\0"
             VALUE "LegalTrademarks", "\0"
             VALUE "OriginalFilename", "libgcrypt.dll\0"
             VALUE "PrivateBuild", "\0"
diff --git a/grub-core/lib/libgcrypt/src/visibility.c 
b/grub-core/lib/libgcrypt/src/visibility.c
index 2fccb017b..5c64618bb 100644
--- a/grub-core/lib/libgcrypt/src/visibility.c
+++ b/grub-core/lib/libgcrypt/src/visibility.c
@@ -1,5 +1,6 @@
 /* visibility.c - Wrapper for all public functions.
  * Copyright (C) 2007, 2008, 2011  Free Software Foundation, Inc.
+ * Copyright (C) 2013  g10 Code GmbH
  *
  * This file is part of Libgcrypt.
  *
@@ -23,8 +24,9 @@
 #define _GCRY_INCLUDED_BY_VISIBILITY_C
 #include "g10lib.h"
 #include "cipher-proto.h"
-
-
+#include "context.h"
+#include "mpi.h"
+#include "ec-context.h"
 
 const char *
 gcry_strerror (gcry_error_t err)
@@ -56,7 +58,7 @@ gcry_err_make_from_errno (gcry_err_source_t source, int err)
   return _gcry_err_make_from_errno (source, err);
 }
 
-gcry_err_code_t
+gcry_error_t
 gcry_error_from_errno (int err)
 {
   return _gcry_error_from_errno (err);
@@ -75,7 +77,7 @@ gcry_control (enum gcry_ctl_cmds cmd, ...)
   va_list arg_ptr;
 
   va_start (arg_ptr, cmd);
-  err = _gcry_vcontrol (cmd, arg_ptr);
+  err = gpg_error (_gcry_vcontrol (cmd, arg_ptr));
   va_end(arg_ptr);
   return err;
 }
@@ -85,7 +87,7 @@ gcry_sexp_new (gcry_sexp_t *retsexp,
                const void *buffer, size_t length,
                int autodetect)
 {
-  return _gcry_sexp_new (retsexp, buffer, length, autodetect);
+  return gpg_error (_gcry_sexp_new (retsexp, buffer, length, autodetect));
 }
 
 gcry_error_t
@@ -93,35 +95,35 @@ gcry_sexp_create (gcry_sexp_t *retsexp,
                   void *buffer, size_t length,
                   int autodetect, void (*freefnc) (void *))
 {
-  return _gcry_sexp_create (retsexp, buffer, length,
-                            autodetect, freefnc);
+  return gpg_error (_gcry_sexp_create (retsexp, buffer, length,
+                                       autodetect, freefnc));
 }
 
 gcry_error_t
 gcry_sexp_sscan (gcry_sexp_t *retsexp, size_t *erroff,
                  const char *buffer, size_t length)
 {
-  return _gcry_sexp_sscan (retsexp, erroff, buffer, length);
+  return gpg_error (_gcry_sexp_sscan (retsexp, erroff, buffer, length));
 }
 
 gcry_error_t
 gcry_sexp_build (gcry_sexp_t *retsexp, size_t *erroff,
                  const char *format, ...)
 {
-  gcry_error_t err;
+  gcry_err_code_t rc;
   va_list arg_ptr;
 
   va_start (arg_ptr, format);
-  err = _gcry_sexp_vbuild (retsexp, erroff, format, arg_ptr);
+  rc = _gcry_sexp_vbuild (retsexp, erroff, format, arg_ptr);
   va_end (arg_ptr);
-  return err;
+  return gpg_error (rc);
 }
 
 gcry_error_t
 gcry_sexp_build_array (gcry_sexp_t *retsexp, size_t *erroff,
                        const char *format, void **arg_list)
 {
-  return _gcry_sexp_build_array (retsexp, erroff, format, arg_list);
+  return gpg_error (_gcry_sexp_build_array (retsexp, erroff, format, 
arg_list));
 }
 
 void
@@ -134,7 +136,13 @@ size_t
 gcry_sexp_canon_len (const unsigned char *buffer, size_t length,
                      size_t *erroff, gcry_error_t *errcode)
 {
-  return _gcry_sexp_canon_len (buffer, length, erroff, errcode);
+  size_t n;
+  gpg_err_code_t rc;
+
+  n = _gcry_sexp_canon_len (buffer, length, erroff, &rc);
+  if (errcode)
+    *errcode = gpg_error (rc);
+  return n;
 }
 
 size_t
@@ -225,6 +233,12 @@ gcry_sexp_nth_data (const gcry_sexp_t list, int number, 
size_t *datalen)
   return _gcry_sexp_nth_data (list, number, datalen);
 }
 
+void *
+gcry_sexp_nth_buffer (const gcry_sexp_t list, int number, size_t *rlength)
+{
+  return _gcry_sexp_nth_buffer (list, number, rlength);
+}
+
 char *
 gcry_sexp_nth_string (gcry_sexp_t list, int number)
 {
@@ -237,6 +251,21 @@ gcry_sexp_nth_mpi (gcry_sexp_t list, int number, int 
mpifmt)
   return _gcry_sexp_nth_mpi (list, number, mpifmt);
 }
 
+gpg_error_t
+gcry_sexp_extract_param (gcry_sexp_t sexp, const char *path,
+                         const char *list, ...)
+{
+  gcry_err_code_t rc;
+  va_list arg_ptr;
+
+  va_start (arg_ptr, list);
+  rc = _gcry_sexp_vextract_param (sexp, path, list, arg_ptr);
+  va_end (arg_ptr);
+  return gpg_error (rc);
+}
+
+
+
 gcry_mpi_t
 gcry_mpi_new (unsigned int nbits)
 {
@@ -261,6 +290,12 @@ gcry_mpi_copy (const gcry_mpi_t a)
   return _gcry_mpi_copy (a);
 }
 
+void
+gcry_mpi_snatch (gcry_mpi_t w, const gcry_mpi_t u)
+{
+  _gcry_mpi_snatch (w, u);
+}
+
 gcry_mpi_t
 gcry_mpi_set (gcry_mpi_t w, const gcry_mpi_t u)
 {
@@ -273,12 +308,36 @@ gcry_mpi_set_ui (gcry_mpi_t w, unsigned long u)
   return _gcry_mpi_set_ui (w, u);
 }
 
+gcry_error_t
+gcry_mpi_get_ui (unsigned int *w, gcry_mpi_t u)
+{
+  return gpg_error (_gcry_mpi_get_ui (w, u));
+}
+
 void
 gcry_mpi_swap (gcry_mpi_t a, gcry_mpi_t b)
 {
   _gcry_mpi_swap (a, b);
 }
 
+int
+gcry_mpi_is_neg (gcry_mpi_t a)
+{
+  return _gcry_mpi_is_neg (a);
+}
+
+void
+gcry_mpi_neg (gcry_mpi_t w, gcry_mpi_t u)
+{
+  _gcry_mpi_neg (w, u);
+}
+
+void
+gcry_mpi_abs (gcry_mpi_t w)
+{
+  _gcry_mpi_abs (w);
+}
+
 int
 gcry_mpi_cmp (const gcry_mpi_t u, const gcry_mpi_t v)
 {
@@ -296,7 +355,7 @@ gcry_mpi_scan (gcry_mpi_t *ret_mpi, enum gcry_mpi_format 
format,
                const void *buffer, size_t buflen,
                size_t *nscanned)
 {
-  return _gcry_mpi_scan (ret_mpi, format, buffer, buflen, nscanned);
+  return gpg_error (_gcry_mpi_scan (ret_mpi, format, buffer, buflen, 
nscanned));
 }
 
 gcry_error_t
@@ -305,7 +364,7 @@ gcry_mpi_print (enum gcry_mpi_format format,
                 size_t *nwritten,
                 const gcry_mpi_t a)
 {
-  return _gcry_mpi_print (format, buffer, buflen, nwritten, a);
+  return gpg_error (_gcry_mpi_print (format, buffer, buflen, nwritten, a));
 }
 
 gcry_error_t
@@ -313,13 +372,13 @@ gcry_mpi_aprint (enum gcry_mpi_format format,
                  unsigned char **buffer, size_t *nwritten,
                  const gcry_mpi_t a)
 {
-  return _gcry_mpi_aprint (format, buffer, nwritten, a);
+  return gpg_error (_gcry_mpi_aprint (format, buffer, nwritten, a));
 }
 
 void
 gcry_mpi_dump (const gcry_mpi_t a)
 {
-  _gcry_mpi_dump (a);
+  _gcry_log_printmpi (NULL, a);
 }
 
 void
@@ -414,6 +473,161 @@ gcry_mpi_invm (gcry_mpi_t x, gcry_mpi_t a, gcry_mpi_t m)
   return _gcry_mpi_invm (x, a, m);
 }
 
+gcry_mpi_point_t
+gcry_mpi_point_new (unsigned int nbits)
+{
+  return _gcry_mpi_point_new (nbits);
+}
+
+void
+gcry_mpi_point_release (gcry_mpi_point_t point)
+{
+  _gcry_mpi_point_release (point);
+}
+
+gcry_mpi_point_t
+gcry_mpi_point_copy (gcry_mpi_point_t point)
+{
+  return _gcry_mpi_point_copy (point);
+}
+
+void
+gcry_mpi_point_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                    gcry_mpi_point_t point)
+{
+  _gcry_mpi_point_get (x, y, z, point);
+}
+
+void
+gcry_mpi_point_snatch_get (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z,
+                           gcry_mpi_point_t point)
+{
+  _gcry_mpi_point_snatch_get (x, y, z, point);
+}
+
+gcry_mpi_point_t
+gcry_mpi_point_set (gcry_mpi_point_t point,
+                    gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z)
+{
+  return _gcry_mpi_point_set (point, x, y, z);
+}
+
+gcry_mpi_point_t
+gcry_mpi_point_snatch_set (gcry_mpi_point_t point,
+                           gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t z)
+{
+  return _gcry_mpi_point_snatch_set (point, x, y, z);
+}
+
+gpg_error_t
+gcry_mpi_ec_new (gcry_ctx_t *r_ctx,
+                 gcry_sexp_t keyparam, const char *curvename)
+{
+  return gpg_error (_gcry_mpi_ec_new (r_ctx, keyparam, curvename));
+}
+
+gcry_mpi_t
+gcry_mpi_ec_get_mpi (const char *name, gcry_ctx_t ctx, int copy)
+{
+  return _gcry_mpi_ec_get_mpi (name, ctx, copy);
+}
+
+gcry_mpi_point_t
+gcry_mpi_ec_get_point (const char *name, gcry_ctx_t ctx, int copy)
+{
+  return _gcry_mpi_ec_get_point (name, ctx, copy);
+}
+
+gpg_error_t
+gcry_mpi_ec_set_mpi (const char *name, gcry_mpi_t newvalue, gcry_ctx_t ctx)
+{
+  return gpg_error (_gcry_mpi_ec_set_mpi (name, newvalue, ctx));
+}
+
+gpg_error_t
+gcry_mpi_ec_set_point (const char *name, gcry_mpi_point_t newvalue,
+                        gcry_ctx_t ctx)
+{
+  return gpg_error (_gcry_mpi_ec_set_point (name, newvalue, ctx));
+}
+
+gpg_error_t
+gcry_mpi_ec_decode_point (gcry_mpi_point_t result, gcry_mpi_t value,
+                          gcry_ctx_t ctx)
+{
+  return gpg_error (_gcry_mpi_ec_decode_point
+                    (result, value,
+                     ctx? _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC) : 
NULL));
+}
+
+int
+gcry_mpi_ec_get_affine (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_point_t point,
+                        gcry_ctx_t ctx)
+{
+  return _gcry_mpi_ec_get_affine (x, y, point,
+                                  _gcry_ctx_get_pointer (ctx, 
CONTEXT_TYPE_EC));
+}
+
+void
+gcry_mpi_ec_dup (gcry_mpi_point_t w, gcry_mpi_point_t u, gcry_ctx_t ctx)
+{
+  mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+
+  if (ec->model == MPI_EC_EDWARDS || ec->model == MPI_EC_MONTGOMERY)
+    {
+      mpi_point_resize (w, ec);
+      mpi_point_resize (u, ec);
+    }
+
+  _gcry_mpi_ec_dup_point (w, u, ec);
+}
+
+void
+gcry_mpi_ec_add (gcry_mpi_point_t w,
+                 gcry_mpi_point_t u, gcry_mpi_point_t v, gcry_ctx_t ctx)
+{
+  mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+
+  if (ec->model == MPI_EC_EDWARDS || ec->model == MPI_EC_MONTGOMERY)
+    {
+      mpi_point_resize (w, ec);
+      mpi_point_resize (u, ec);
+      mpi_point_resize (v, ec);
+    }
+
+  _gcry_mpi_ec_add_points (w, u, v, ec);
+}
+
+void
+gcry_mpi_ec_sub (gcry_mpi_point_t w,
+                 gcry_mpi_point_t u, gcry_mpi_point_t v, gcry_ctx_t ctx)
+{
+  mpi_ec_t ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC);
+
+  if (ec->model == MPI_EC_EDWARDS || ec->model == MPI_EC_MONTGOMERY)
+    {
+      mpi_point_resize (w, ec);
+      mpi_point_resize (u, ec);
+      mpi_point_resize (v, ec);
+    }
+
+  _gcry_mpi_ec_sub_points (w, u, v, ec);
+}
+
+void
+gcry_mpi_ec_mul (gcry_mpi_point_t w, gcry_mpi_t n, gcry_mpi_point_t u,
+                 gcry_ctx_t ctx)
+{
+  _gcry_mpi_ec_mul_point (w, n, u,
+                          _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC));
+}
+
+int
+gcry_mpi_ec_curve_point (gcry_mpi_point_t point, gcry_ctx_t ctx)
+{
+  return _gcry_mpi_ec_curve_point
+    (point, _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC));
+}
 
 unsigned int
 gcry_mpi_get_nbits (gcry_mpi_t a)
@@ -469,6 +683,12 @@ gcry_mpi_set_opaque (gcry_mpi_t a, void *p, unsigned int 
nbits)
   return _gcry_mpi_set_opaque (a, p, nbits);
 }
 
+gcry_mpi_t
+gcry_mpi_set_opaque_copy (gcry_mpi_t a, const void *p, unsigned int nbits)
+{
+  return _gcry_mpi_set_opaque_copy (a, p, nbits);
+}
+
 void *
 gcry_mpi_get_opaque (gcry_mpi_t a, unsigned int *nbits)
 {
@@ -493,6 +713,20 @@ gcry_mpi_get_flag (gcry_mpi_t a, enum gcry_mpi_flag flag)
   return _gcry_mpi_get_flag (a, flag);
 }
 
+gcry_mpi_t
+_gcry_mpi_get_const (int no)
+{
+  switch (no)
+    {
+    case 1: return _gcry_mpi_const (MPI_C_ONE);
+    case 2: return _gcry_mpi_const (MPI_C_TWO);
+    case 3: return _gcry_mpi_const (MPI_C_THREE);
+    case 4: return _gcry_mpi_const (MPI_C_FOUR);
+    case 8: return _gcry_mpi_const (MPI_C_EIGHT);
+    default: log_bug("unsupported GCRYMPI_CONST_ macro used\n");
+    }
+}
+
 gcry_error_t
 gcry_cipher_open (gcry_cipher_hd_t *handle,
                   int algo, int mode, unsigned int flags)
@@ -503,7 +737,7 @@ gcry_cipher_open (gcry_cipher_hd_t *handle,
       return gpg_error (fips_not_operational ());
     }
 
-  return _gcry_cipher_open (handle, algo, mode, flags);
+  return gpg_error (_gcry_cipher_open (handle, algo, mode, flags));
 }
 
 void
@@ -518,7 +752,7 @@ gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, 
size_t keylen)
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_cipher_setkey (hd, key, keylen);
+  return gcry_error (_gcry_cipher_setkey (hd, key, keylen));
 }
 
 gcry_error_t
@@ -527,7 +761,7 @@ gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, 
size_t ivlen)
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_cipher_setiv (hd, iv, ivlen);
+  return gcry_error (_gcry_cipher_setiv (hd, iv, ivlen));
 }
 
 gpg_error_t
@@ -536,7 +770,34 @@ gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, 
size_t ctrlen)
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_cipher_setctr (hd, ctr, ctrlen);
+  return gcry_error (_gcry_cipher_setctr (hd, ctr, ctrlen));
+}
+
+gcry_error_t
+gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf, size_t 
abuflen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_cipher_authenticate (hd, abuf, abuflen));
+}
+
+gcry_error_t
+gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_cipher_gettag (hd, outtag, taglen));
+}
+
+gcry_error_t
+gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_cipher_checktag (hd, intag, taglen));
 }
 
 
@@ -546,13 +807,13 @@ gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void 
*buffer, size_t buflen)
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_cipher_ctl (h, cmd, buffer, buflen);
+  return gpg_error (_gcry_cipher_ctl (h, cmd, buffer, buflen));
 }
 
 gcry_error_t
 gcry_cipher_info (gcry_cipher_hd_t h, int what, void *buffer, size_t *nbytes)
 {
-  return _gcry_cipher_info (h, what, buffer, nbytes);
+  return gpg_error (_gcry_cipher_info (h, what, buffer, nbytes));
 }
 
 gcry_error_t
@@ -561,7 +822,7 @@ gcry_cipher_algo_info (int algo, int what, void *buffer, 
size_t *nbytes)
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_cipher_algo_info (algo, what, buffer, nbytes);
+  return gpg_error (_gcry_cipher_algo_info (algo, what, buffer, nbytes));
 }
 
 const char *
@@ -595,7 +856,7 @@ gcry_cipher_encrypt (gcry_cipher_hd_t h,
       return gpg_error (fips_not_operational ());
     }
 
-  return _gcry_cipher_encrypt (h, out, outsize, in, inlen);
+  return gpg_error (_gcry_cipher_encrypt (h, out, outsize, in, inlen));
 }
 
 gcry_error_t
@@ -606,7 +867,7 @@ gcry_cipher_decrypt (gcry_cipher_hd_t h,
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_cipher_decrypt (h, out, outsize, in, inlen);
+  return gpg_error (_gcry_cipher_decrypt (h, out, outsize, in, inlen));
 }
 
 size_t
@@ -622,9 +883,118 @@ gcry_cipher_get_algo_blklen (int algo)
 }
 
 gcry_error_t
-gcry_cipher_list (int *list, int *list_length)
+gcry_mac_algo_info (int algo, int what, void *buffer, size_t *nbytes)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_mac_algo_info (algo, what, buffer, nbytes));
+}
+
+const char *
+gcry_mac_algo_name (int algorithm)
+{
+  return _gcry_mac_algo_name (algorithm);
+}
+
+int
+gcry_mac_map_name (const char *string)
+{
+  return _gcry_mac_map_name (string);
+}
+
+int
+gcry_mac_get_algo (gcry_mac_hd_t hd)
+{
+  return _gcry_mac_get_algo (hd);
+}
+
+unsigned int
+gcry_mac_get_algo_maclen (int algo)
+{
+  return _gcry_mac_get_algo_maclen (algo);
+}
+
+unsigned int
+gcry_mac_get_algo_keylen (int algo)
+{
+  return _gcry_mac_get_algo_keylen (algo);
+}
+
+gcry_error_t
+gcry_mac_open (gcry_mac_hd_t *handle, int algo, unsigned int flags,
+               gcry_ctx_t ctx)
+{
+  if (!fips_is_operational ())
+    {
+      *handle = NULL;
+      return gpg_error (fips_not_operational ());
+    }
+
+  return gpg_error (_gcry_mac_open (handle, algo, flags, ctx));
+}
+
+void
+gcry_mac_close (gcry_mac_hd_t hd)
+{
+  _gcry_mac_close (hd);
+}
+
+gcry_error_t
+gcry_mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  if (fips_mode () && keylen < 14)
+    return GPG_ERR_INV_VALUE;
+
+  return gpg_error (_gcry_mac_setkey (hd, key, keylen));
+}
+
+gcry_error_t
+gcry_mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_mac_setiv (hd, iv, ivlen));
+}
+
+gcry_error_t
+gcry_mac_write (gcry_mac_hd_t hd, const void *buf, size_t buflen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_mac_write (hd, buf, buflen));
+}
+
+gcry_error_t
+gcry_mac_read (gcry_mac_hd_t hd, void *outbuf, size_t *outlen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_mac_read (hd, outbuf, outlen));
+}
+
+gcry_error_t
+gcry_mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_mac_verify (hd, buf, buflen));
+}
+
+gcry_error_t
+gcry_mac_ctl (gcry_mac_hd_t h, int cmd, void *buffer, size_t buflen)
 {
-  return _gcry_cipher_list (list, list_length);
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+
+  return gpg_error (_gcry_mac_ctl (h, cmd, buffer, buflen));
 }
 
 gcry_error_t
@@ -635,7 +1005,7 @@ gcry_pk_encrypt (gcry_sexp_t *result, gcry_sexp_t data, 
gcry_sexp_t pkey)
       *result = NULL;
       return gpg_error (fips_not_operational ());
     }
-  return _gcry_pk_encrypt (result, data, pkey);
+  return gpg_error (_gcry_pk_encrypt (result, data, pkey));
 }
 
 gcry_error_t
@@ -646,7 +1016,7 @@ gcry_pk_decrypt (gcry_sexp_t *result, gcry_sexp_t data, 
gcry_sexp_t skey)
       *result = NULL;
       return gpg_error (fips_not_operational ());
     }
-  return _gcry_pk_decrypt (result, data, skey);
+  return gpg_error (_gcry_pk_decrypt (result, data, skey));
 }
 
 gcry_error_t
@@ -657,7 +1027,19 @@ gcry_pk_sign (gcry_sexp_t *result, gcry_sexp_t data, 
gcry_sexp_t skey)
       *result = NULL;
       return gpg_error (fips_not_operational ());
     }
-  return _gcry_pk_sign (result, data, skey);
+  return gpg_error (_gcry_pk_sign (result, data, skey));
+}
+
+gcry_error_t
+gcry_pk_hash_sign (gcry_sexp_t *result, const char *data_tmpl, gcry_sexp_t 
skey,
+                   gcry_md_hd_t hd, gcry_ctx_t ctx)
+{
+  if (!fips_is_operational ())
+    {
+      *result = NULL;
+      return gpg_error (fips_not_operational ());
+    }
+  return gpg_error (_gcry_pk_sign_md (result, data_tmpl, hd, skey, ctx));
 }
 
 gcry_error_t
@@ -665,7 +1047,22 @@ gcry_pk_verify (gcry_sexp_t sigval, gcry_sexp_t data, 
gcry_sexp_t pkey)
 {
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
-  return _gcry_pk_verify (sigval, data, pkey);
+  return gpg_error (_gcry_pk_verify (sigval, data, pkey));
+}
+
+gcry_error_t
+gcry_pk_hash_verify (gcry_sexp_t sigval, const char *data_tmpl, gcry_sexp_t 
pkey,
+                     gcry_md_hd_t hd, gcry_ctx_t ctx)
+{
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+  return gpg_error (_gcry_pk_verify_md (sigval, data_tmpl, hd, pkey, ctx));
+}
+
+gcry_error_t
+gcry_pk_random_override_new (gcry_ctx_t *r_ctx, const unsigned char *p, size_t 
len)
+{
+  return gpg_error (_gcry_pk_random_override_new (r_ctx, p, len));
 }
 
 gcry_error_t
@@ -673,7 +1070,7 @@ gcry_pk_testkey (gcry_sexp_t key)
 {
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
-  return _gcry_pk_testkey (key);
+  return gpg_error (_gcry_pk_testkey (key));
 }
 
 gcry_error_t
@@ -684,13 +1081,13 @@ gcry_pk_genkey (gcry_sexp_t *r_key, gcry_sexp_t s_parms)
       *r_key = NULL;
       return gpg_error (fips_not_operational ());
     }
-  return _gcry_pk_genkey (r_key, s_parms);
+  return gpg_error (_gcry_pk_genkey (r_key, s_parms));
 }
 
 gcry_error_t
 gcry_pk_ctl (int cmd, void *buffer, size_t buflen)
 {
-  return _gcry_pk_ctl (cmd, buffer, buflen);
+  return gpg_error (_gcry_pk_ctl (cmd, buffer, buflen));
 }
 
 gcry_error_t
@@ -699,7 +1096,7 @@ gcry_pk_algo_info (int algo, int what, void *buffer, 
size_t *nbytes)
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_pk_algo_info (algo, what, buffer, nbytes);
+  return gpg_error (_gcry_pk_algo_info (algo, what, buffer, nbytes));
 }
 
 const char *
@@ -760,9 +1157,27 @@ gcry_pk_get_param (int algo, const char *name)
 }
 
 gcry_error_t
-gcry_pk_list (int *list, int *list_length)
+gcry_pubkey_get_sexp (gcry_sexp_t *r_sexp, int mode, gcry_ctx_t ctx)
+{
+  if (!fips_is_operational ())
+    {
+      *r_sexp = NULL;
+      return gpg_error (fips_not_operational ());
+    }
+  return gpg_error (_gcry_pubkey_get_sexp (r_sexp, mode, ctx));
+}
+
+unsigned int
+gcry_ecc_get_algo_keylen (int curveid)
+{
+  return _gcry_ecc_get_algo_keylen (curveid);
+}
+
+gpg_error_t
+gcry_ecc_mul_point (int curveid, unsigned char *result,
+                    const unsigned char *scalar, const unsigned char *point)
 {
-  return _gcry_pk_list (list, list_length);
+  return _gcry_ecc_mul_point (curveid, result, scalar, point);
 }
 
 gcry_error_t
@@ -774,7 +1189,7 @@ gcry_md_open (gcry_md_hd_t *h, int algo, unsigned int 
flags)
       return gpg_error (fips_not_operational ());
     }
 
-  return _gcry_md_open (h, algo, flags);
+  return gpg_error (_gcry_md_open (h, algo, flags));
 }
 
 void
@@ -788,7 +1203,7 @@ gcry_md_enable (gcry_md_hd_t hd, int algo)
 {
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
-  return _gcry_md_enable (hd, algo);
+  return gpg_error (_gcry_md_enable (hd, algo));
 }
 
 gcry_error_t
@@ -799,7 +1214,7 @@ gcry_md_copy (gcry_md_hd_t *bhd, gcry_md_hd_t ahd)
       *bhd = NULL;
       return gpg_error (fips_not_operational ());
     }
-  return _gcry_md_copy (bhd, ahd);
+  return gpg_error (_gcry_md_copy (bhd, ahd));
 }
 
 void
@@ -813,7 +1228,7 @@ gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, 
size_t buflen)
 {
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
-  return _gcry_md_ctl (hd, cmd, buffer, buflen);
+  return gpg_error (_gcry_md_ctl (hd, cmd, buffer, buflen));
 }
 
 void
@@ -833,6 +1248,12 @@ gcry_md_read (gcry_md_hd_t hd, int algo)
   return _gcry_md_read (hd, algo);
 }
 
+gcry_error_t
+gcry_md_extract (gcry_md_hd_t hd, int algo, void *buffer, size_t length)
+{
+  return gpg_error (_gcry_md_extract(hd, algo, buffer, length));
+}
+
 void
 gcry_md_hash_buffer (int algo, void *digest,
                      const void *buffer, size_t length)
@@ -845,6 +1266,18 @@ gcry_md_hash_buffer (int algo, void *digest,
   _gcry_md_hash_buffer (algo, digest, buffer, length);
 }
 
+gpg_error_t
+gcry_md_hash_buffers (int algo, unsigned int flags, void *digest,
+                      const gcry_buffer_t *iov, int iovcnt)
+{
+  if (!fips_is_operational ())
+    {
+      (void)fips_not_operational ();
+      fips_signal_error ("called in non-operational state");
+    }
+  return gpg_error (_gcry_md_hash_buffers (algo, flags, digest, iov, iovcnt));
+}
+
 int
 gcry_md_get_algo (gcry_md_hd_t hd)
 {
@@ -887,13 +1320,13 @@ gcry_md_info (gcry_md_hd_t h, int what, void *buffer, 
size_t *nbytes)
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
 
-  return _gcry_md_info (h, what, buffer, nbytes);
+  return gpg_error (_gcry_md_info (h, what, buffer, nbytes));
 }
 
 gcry_error_t
 gcry_md_algo_info (int algo, int what, void *buffer, size_t *nbytes)
 {
-  return _gcry_md_algo_info (algo, what, buffer, nbytes);
+  return gpg_error (_gcry_md_algo_info (algo, what, buffer, nbytes));
 }
 
 const char *
@@ -913,312 +1346,68 @@ gcry_md_setkey (gcry_md_hd_t hd, const void *key, 
size_t keylen)
 {
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
-  return _gcry_md_setkey (hd, key, keylen);
-}
-
-void
-gcry_md_debug (gcry_md_hd_t hd, const char *suffix)
-{
-  _gcry_md_debug (hd, suffix);
-}
-
-gcry_error_t
-gcry_md_list (int *list, int *list_length)
-{
-  return _gcry_md_list (list, list_length);
-}
-
-gcry_error_t
-gcry_ac_data_new (gcry_ac_data_t *data)
-{
-  return _gcry_ac_data_new (data);
-}
-
-void
-gcry_ac_data_destroy (gcry_ac_data_t data)
-{
-  _gcry_ac_data_destroy (data);
-}
-
-gcry_error_t
-gcry_ac_data_copy (gcry_ac_data_t *data_cp, gcry_ac_data_t data)
-{
-  return _gcry_ac_data_copy (data_cp, data);
-}
-
-unsigned int
-gcry_ac_data_length (gcry_ac_data_t data)
-{
-  return _gcry_ac_data_length (data);
-}
-
-void
-gcry_ac_data_clear (gcry_ac_data_t data)
-{
-  _gcry_ac_data_clear (data);
-}
-
-gcry_error_t
-gcry_ac_data_set (gcry_ac_data_t data, unsigned int flags,
-                  const char *name, gcry_mpi_t mpi)
-{
-  return _gcry_ac_data_set (data, flags, name, mpi);
-}
-
-gcry_error_t
-gcry_ac_data_get_name (gcry_ac_data_t data, unsigned int flags,
-                       const char *name, gcry_mpi_t *mpi)
-{
-  return _gcry_ac_data_get_name (data, flags, name, mpi);
-}
-
-gcry_error_t
-gcry_ac_data_get_index (gcry_ac_data_t data, unsigned int flags,
-                        unsigned int idx, const char **name, gcry_mpi_t *mpi)
-{
-  return _gcry_ac_data_get_index (data, flags, idx, name, mpi);
-}
 
-gcry_error_t
-gcry_ac_data_to_sexp (gcry_ac_data_t data, gcry_sexp_t *sexp,
-                      const char **identifiers)
-{
-  return _gcry_ac_data_to_sexp (data, sexp, identifiers);
-}
+  if (fips_mode () && keylen < 14)
+    return GPG_ERR_INV_VALUE;
 
-gcry_error_t
-gcry_ac_data_from_sexp (gcry_ac_data_t *data, gcry_sexp_t sexp,
-                        const char **identifiers)
-{
-  return _gcry_ac_data_from_sexp (data, sexp, identifiers);
+  return gpg_error (_gcry_md_setkey (hd, key, keylen));
 }
 
 void
-gcry_ac_io_init (gcry_ac_io_t *ac_io, gcry_ac_io_mode_t mode,
-                 gcry_ac_io_type_t type, ...)
-{
-  va_list arg_ptr;
-
-  va_start (arg_ptr, type);
-  _gcry_ac_io_init_va (ac_io, mode, type, arg_ptr);
-  va_end (arg_ptr);
-}
-
-void
-gcry_ac_io_init_va (gcry_ac_io_t *ac_io, gcry_ac_io_mode_t mode,
-                    gcry_ac_io_type_t type, va_list ap)
-{
-  _gcry_ac_io_init_va (ac_io, mode, type, ap);
-}
-
-gcry_error_t
-gcry_ac_open (gcry_ac_handle_t *handle,
-              gcry_ac_id_t algorithm, unsigned int flags)
-{
-  return _gcry_ac_open (handle, algorithm, flags);
-}
-
-void
-gcry_ac_close (gcry_ac_handle_t handle)
-{
-  _gcry_ac_close (handle);
-}
-
-gcry_error_t
-gcry_ac_key_init (gcry_ac_key_t *key, gcry_ac_handle_t handle,
-                  gcry_ac_key_type_t type, gcry_ac_data_t data)
-{
-  return _gcry_ac_key_init (key, handle, type, data);
-}
-
-gcry_error_t
-gcry_ac_key_pair_generate (gcry_ac_handle_t handle,
-                           unsigned int nbits, void *spec,
-                           gcry_ac_key_pair_t *key_pair,
-                           gcry_mpi_t **miscdata)
-{
-  return _gcry_ac_key_pair_generate ( handle, nbits, spec, key_pair, miscdata);
-}
-
-gcry_ac_key_t
-gcry_ac_key_pair_extract (gcry_ac_key_pair_t keypair, gcry_ac_key_type_t which)
+gcry_md_debug (gcry_md_hd_t hd, const char *suffix)
 {
-  return _gcry_ac_key_pair_extract (keypair, which);
+  _gcry_md_debug (hd, suffix);
 }
 
-gcry_ac_data_t
-gcry_ac_key_data_get (gcry_ac_key_t key)
+gpg_error_t
+gcry_kdf_derive (const void *passphrase, size_t passphraselen,
+                 int algo, int hashalgo,
+                 const void *salt, size_t saltlen,
+                 unsigned long iterations,
+                 size_t keysize, void *keybuffer)
 {
-  return _gcry_ac_key_data_get (key);
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+  return gpg_error (_gcry_kdf_derive (passphrase, passphraselen, algo, 
hashalgo,
+                                      salt, saltlen, iterations,
+                                      keysize, keybuffer));
 }
 
-gcry_error_t
-gcry_ac_key_test (gcry_ac_handle_t handle, gcry_ac_key_t key)
+gpg_error_t
+gcry_kdf_open (gcry_kdf_hd_t *hd, int algo, int subalgo,
+               const unsigned long *param, unsigned int paramlen,
+               const void *passphrase, size_t passphraselen,
+               const void *salt, size_t saltlen,
+               const void *key, size_t keylen,
+               const void *ad, size_t adlen)
 {
-  return _gcry_ac_key_test (handle, key);
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+  return gpg_error (_gcry_kdf_open (hd, algo, subalgo, param, paramlen,
+                                    passphrase, passphraselen, salt, saltlen,
+                                    key, keylen, ad, adlen));
 }
 
 gcry_error_t
-gcry_ac_key_get_nbits (gcry_ac_handle_t handle,
-                       gcry_ac_key_t key, unsigned int *nbits)
+gcry_kdf_compute (gcry_kdf_hd_t h, const struct gcry_kdf_thread_ops *ops)
 {
-  return _gcry_ac_key_get_nbits (handle, key, nbits);
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+  return gpg_error (_gcry_kdf_compute (h, ops));
 }
 
 gcry_error_t
-gcry_ac_key_get_grip (gcry_ac_handle_t handle, gcry_ac_key_t key,
-                      unsigned char *key_grip)
+gcry_kdf_final (gcry_kdf_hd_t h, size_t resultlen, void *result)
 {
-  return _gcry_ac_key_get_grip (handle, key, key_grip);
-}
-
-void
-gcry_ac_key_destroy (gcry_ac_key_t key)
-{
-  _gcry_ac_key_destroy (key);
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+  return gpg_error (_gcry_kdf_final (h, resultlen, result));
 }
 
 void
-gcry_ac_key_pair_destroy (gcry_ac_key_pair_t key_pair)
+gcry_kdf_close (gcry_kdf_hd_t h)
 {
-  _gcry_ac_key_pair_destroy (key_pair);
-}
-
-gcry_error_t
-gcry_ac_data_encode (gcry_ac_em_t method, unsigned int flags, void *options,
-                     gcry_ac_io_t *io_read, gcry_ac_io_t *io_write)
-{
-  return _gcry_ac_data_encode (method, flags, options, io_read, io_write);
-}
-
-gcry_error_t
-gcry_ac_data_decode (gcry_ac_em_t method, unsigned int flags, void *options,
-                     gcry_ac_io_t *io_read, gcry_ac_io_t *io_write)
-{
-  return _gcry_ac_data_decode (method, flags, options, io_read,  io_write);
-}
-
-gcry_error_t
-gcry_ac_data_encrypt (gcry_ac_handle_t handle,
-                      unsigned int flags,
-                      gcry_ac_key_t key,
-                      gcry_mpi_t data_plain,
-                      gcry_ac_data_t *data_encrypted)
-{
-  return _gcry_ac_data_encrypt (handle, flags, key,
-                                data_plain, data_encrypted);
-}
-
-gcry_error_t
-gcry_ac_data_decrypt (gcry_ac_handle_t handle,
-                      unsigned int flags,
-                      gcry_ac_key_t key,
-                      gcry_mpi_t *data_plain,
-                      gcry_ac_data_t data_encrypted)
-{
-  return _gcry_ac_data_decrypt (handle, flags, key,
-                                data_plain, data_encrypted);
-}
-
-gcry_error_t
-gcry_ac_data_sign (gcry_ac_handle_t handle,
-                   gcry_ac_key_t key,
-                   gcry_mpi_t data,
-                   gcry_ac_data_t *data_signature)
-{
-  return _gcry_ac_data_sign (handle, key, data, data_signature);
-}
-
-gcry_error_t
-gcry_ac_data_verify (gcry_ac_handle_t handle,
-                     gcry_ac_key_t key,
-                     gcry_mpi_t data,
-                     gcry_ac_data_t data_signature)
-{
-  return _gcry_ac_data_verify (handle, key, data, data_signature);
-}
-
-gcry_error_t
-gcry_ac_data_encrypt_scheme (gcry_ac_handle_t handle,
-                             gcry_ac_scheme_t scheme,
-                             unsigned int flags, void *opts,
-                             gcry_ac_key_t key,
-                             gcry_ac_io_t *io_message,
-                             gcry_ac_io_t *io_cipher)
-{
-  return _gcry_ac_data_encrypt_scheme (handle, scheme, flags, opts, key,
-                                       io_message, io_cipher);
-}
-
-gcry_error_t
-gcry_ac_data_decrypt_scheme (gcry_ac_handle_t handle,
-                             gcry_ac_scheme_t scheme,
-                             unsigned int flags, void *opts,
-                             gcry_ac_key_t key,
-                             gcry_ac_io_t *io_cipher,
-                             gcry_ac_io_t *io_message)
-{
-  return _gcry_ac_data_decrypt_scheme (handle, scheme, flags, opts, key,
-                                       io_cipher, io_message);
-}
-
-gcry_error_t
-gcry_ac_data_sign_scheme (gcry_ac_handle_t handle,
-                          gcry_ac_scheme_t scheme,
-                          unsigned int flags, void *opts,
-                          gcry_ac_key_t key,
-                          gcry_ac_io_t *io_message,
-                          gcry_ac_io_t *io_signature)
-{
-  return _gcry_ac_data_sign_scheme (handle, scheme, flags, opts, key,
-                                    io_message, io_signature);
-}
-
-gcry_error_t
-gcry_ac_data_verify_scheme (gcry_ac_handle_t handle,
-                            gcry_ac_scheme_t scheme,
-                            unsigned int flags, void *opts,
-                            gcry_ac_key_t key,
-                            gcry_ac_io_t *io_message,
-                            gcry_ac_io_t *io_signature)
-{
-  return _gcry_ac_data_verify_scheme (handle, scheme, flags, opts, key,
-                                      io_message, io_signature);
-}
-
-gcry_error_t
-gcry_ac_id_to_name (gcry_ac_id_t algorithm, const char **name)
-{
-  /* This function is deprecated.  We implement it in terms of the
-     suggested replacement.  */
-  const char *tmp = _gcry_pk_algo_name (algorithm);
-  if (!*tmp)
-    return gcry_error (GPG_ERR_PUBKEY_ALGO);
-  *name = tmp;
-  return 0;
-}
-
-gcry_error_t
-gcry_ac_name_to_id (const char *name, gcry_ac_id_t *algorithm)
-{
-  /* This function is deprecated.  We implement it in terms of the
-     suggested replacement.  */
-  int algo = _gcry_pk_map_name (name);
-  if (!algo)
-    return gcry_error (GPG_ERR_PUBKEY_ALGO);
-  *algorithm = algo;
-  return 0;
-}
-
-gpg_error_t
-gcry_kdf_derive (const void *passphrase, size_t passphraselen,
-                 int algo, int hashalgo,
-                 const void *salt, size_t saltlen,
-                 unsigned long iterations,
-                 size_t keysize, void *keybuffer)
-{
-  return _gcry_kdf_derive (passphrase, passphraselen, algo, hashalgo,
-                           salt, saltlen, iterations, keysize, keybuffer);
+  _gcry_kdf_close (h);
 }
 
 void
@@ -1238,7 +1427,7 @@ gcry_random_add_bytes (const void *buffer, size_t length, 
int quality)
 {
   if (!fips_is_operational ())
     return gpg_error (fips_not_operational ());
-  return _gcry_random_add_bytes (buffer, length, quality);
+  return gpg_error (_gcry_random_add_bytes (buffer, length, quality));
 }
 
 void *
@@ -1271,6 +1460,13 @@ void
 gcry_mpi_randomize (gcry_mpi_t w,
                     unsigned int nbits, enum gcry_random_level level)
 {
+  if (!fips_is_operational ())
+    {
+      (void)fips_not_operational ();
+      fips_signal_fatal_error ("called in non-operational state");
+      fips_noreturn ();
+    }
+
   _gcry_mpi_randomize (w, nbits, level);
 }
 
@@ -1296,8 +1492,11 @@ gcry_prime_generate (gcry_mpi_t *prime,
                      gcry_random_level_t random_level,
                      unsigned int flags)
 {
-  return _gcry_prime_generate (prime, prime_bits, factor_bits, factors,
-                               cb_func, cb_arg, random_level, flags);
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+  return gpg_error (_gcry_prime_generate (prime, prime_bits, factor_bits,
+                                          factors, cb_func, cb_arg,
+                                          random_level, flags));
 }
 
 gcry_error_t
@@ -1305,7 +1504,9 @@ gcry_prime_group_generator (gcry_mpi_t *r_g,
                             gcry_mpi_t prime, gcry_mpi_t *factors,
                             gcry_mpi_t start_g)
 {
-  return _gcry_prime_group_generator (r_g, prime, factors, start_g);
+  if (!fips_is_operational ())
+    return gpg_error (fips_not_operational ());
+  return gpg_error (_gcry_prime_group_generator (r_g, prime, factors, 
start_g));
 }
 
 void
@@ -1317,7 +1518,55 @@ gcry_prime_release_factors (gcry_mpi_t *factors)
 gcry_error_t
 gcry_prime_check (gcry_mpi_t x, unsigned int flags)
 {
-  return _gcry_prime_check (x, flags);
+  return gpg_error (_gcry_prime_check (x, flags));
+}
+
+void
+gcry_ctx_release (gcry_ctx_t ctx)
+{
+  _gcry_ctx_release (ctx);
+}
+
+void
+gcry_log_debug (const char *fmt, ...)
+{
+  va_list arg_ptr ;
+
+  va_start( arg_ptr, fmt ) ;
+  _gcry_logv (GCRY_LOG_DEBUG, fmt, arg_ptr);
+  va_end (arg_ptr);
+}
+
+void
+gcry_log_debughex (const char *text, const void *buffer, size_t length)
+{
+  _gcry_log_printhex (text, buffer, length);
+}
+
+void
+gcry_log_debugmpi (const char *text, gcry_mpi_t mpi)
+{
+  _gcry_log_printmpi (text, mpi);
+}
+
+void
+gcry_log_debugpnt (const char *text, mpi_point_t point, gcry_ctx_t ctx)
+{
+  mpi_ec_t ec = ctx? _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC) : NULL;
+
+  _gcry_mpi_point_log (text, point, ec);
+}
+
+void
+gcry_log_debugsxp (const char *text, gcry_sexp_t sexp)
+{
+  _gcry_log_printsxp (text, sexp);
+}
+
+char *
+gcry_get_config (int mode, const char *what)
+{
+  return _gcry_get_config (mode, what);
 }
 
 void
@@ -1444,43 +1693,3 @@ gcry_is_secure (const void *a)
 {
   return _gcry_is_secure (a);
 }
-
-
-gcry_error_t
-gcry_cipher_register (gcry_cipher_spec_t *cipher, int *algorithm_id,
-                      gcry_module_t *module)
-{
-  return _gcry_cipher_register (cipher, NULL, algorithm_id, module);
-}
-
-void
-gcry_cipher_unregister (gcry_module_t module)
-{
-  _gcry_cipher_unregister (module);
-}
-
-gcry_error_t
-gcry_pk_register (gcry_pk_spec_t *pubkey, unsigned int *algorithm_id,
-                  gcry_module_t *module)
-{
-  return _gcry_pk_register (pubkey, NULL, algorithm_id, module);
-}
-
-void
-gcry_pk_unregister (gcry_module_t module)
-{
-  _gcry_pk_unregister (module);
-}
-
-gcry_error_t
-gcry_md_register (gcry_md_spec_t *digest, unsigned int *algorithm_id,
-                  gcry_module_t *module)
-{
-  return _gcry_md_register (digest, NULL, algorithm_id, module);
-}
-
-void
-gcry_md_unregister (gcry_module_t module)
-{
-  _gcry_md_unregister (module);
-}
diff --git a/grub-core/lib/libgcrypt/src/visibility.h 
b/grub-core/lib/libgcrypt/src/visibility.h
index 3c1e8aa89..14bf62487 100644
--- a/grub-core/lib/libgcrypt/src/visibility.h
+++ b/grub-core/lib/libgcrypt/src/visibility.h
@@ -22,216 +22,6 @@
 
 /* Redefine all public symbols with an underscore unless we already
    use the underscore prefixed version internally.  */
-#define gcry_check_version          _gcry_check_version
-#define gcry_control                _gcry_control
-
-#define gcry_set_allocation_handler _gcry_set_allocation_handler
-#define gcry_set_fatalerror_handler _gcry_set_fatalerror_handler
-#define gcry_set_gettext_handler    _gcry_set_gettext_handler
-#define gcry_set_log_handler        _gcry_set_log_handler
-#define gcry_set_outofcore_handler  _gcry_set_outofcore_handler
-#define gcry_set_progress_handler   _gcry_set_progress_handler
-#define gcry_err_code_from_errno    _gcry_err_code_from_errno
-#define gcry_err_code_to_errno      _gcry_err_code_to_errno
-#define gcry_err_make_from_errno    _gcry_err_make_from_errno
-#define gcry_error_from_errno       _gcry_error_from_errno
-#define gcry_strerror               _gcry_strerror
-#define gcry_strsource              _gcry_strsource
-
-#define gcry_free                   _gcry_free
-#define gcry_malloc                 _gcry_malloc
-#define gcry_malloc_secure          _gcry_malloc_secure
-#define gcry_calloc                 _gcry_calloc
-#define gcry_calloc_secure          _gcry_calloc_secure
-#define gcry_realloc                _gcry_realloc
-#define gcry_strdup                 _gcry_strdup
-#define gcry_is_secure              _gcry_is_secure
-#define gcry_xcalloc                _gcry_xcalloc
-#define gcry_xcalloc_secure         _gcry_xcalloc_secure
-#define gcry_xmalloc                _gcry_xmalloc
-#define gcry_xmalloc_secure         _gcry_xmalloc_secure
-#define gcry_xrealloc               _gcry_xrealloc
-#define gcry_xstrdup                _gcry_xstrdup
-
-#define gcry_md_algo_info           _gcry_md_algo_info
-#define gcry_md_algo_name           _gcry_md_algo_name
-#define gcry_md_close               _gcry_md_close
-#define gcry_md_copy                _gcry_md_copy
-#define gcry_md_ctl                 _gcry_md_ctl
-#define gcry_md_enable              _gcry_md_enable
-#define gcry_md_get                 _gcry_md_get
-#define gcry_md_get_algo            _gcry_md_get_algo
-#define gcry_md_get_algo_dlen       _gcry_md_get_algo_dlen
-#define gcry_md_hash_buffer         _gcry_md_hash_buffer
-#define gcry_md_info                _gcry_md_info
-#define gcry_md_is_enabled          _gcry_md_is_enabled
-#define gcry_md_is_secure           _gcry_md_is_secure
-#define gcry_md_list                _gcry_md_list
-#define gcry_md_map_name            _gcry_md_map_name
-#define gcry_md_open                _gcry_md_open
-#define gcry_md_read                _gcry_md_read
-/* gcry_md_register and _gcry_md_register differ.  */
-#define gcry_md_unregister          _gcry_md_unregister
-#define gcry_md_reset               _gcry_md_reset
-#define gcry_md_setkey              _gcry_md_setkey
-#define gcry_md_write               _gcry_md_write
-#define gcry_md_debug               _gcry_md_debug
-
-#define gcry_cipher_algo_info       _gcry_cipher_algo_info
-#define gcry_cipher_algo_name       _gcry_cipher_algo_name
-#define gcry_cipher_close           _gcry_cipher_close
-#define gcry_cipher_setkey          _gcry_cipher_setkey
-#define gcry_cipher_setiv           _gcry_cipher_setiv
-#define gcry_cipher_setctr          _gcry_cipher_setctr
-#define gcry_cipher_ctl             _gcry_cipher_ctl
-#define gcry_cipher_decrypt         _gcry_cipher_decrypt
-#define gcry_cipher_encrypt         _gcry_cipher_encrypt
-#define gcry_cipher_get_algo_blklen _gcry_cipher_get_algo_blklen
-#define gcry_cipher_get_algo_keylen _gcry_cipher_get_algo_keylen
-#define gcry_cipher_info            _gcry_cipher_info
-#define gcry_cipher_list            _gcry_cipher_list
-#define gcry_cipher_map_name        _gcry_cipher_map_name
-#define gcry_cipher_mode_from_oid   _gcry_cipher_mode_from_oid
-#define gcry_cipher_open            _gcry_cipher_open
-/* gcry_cipher_register and  _gcry_cipher_register differ.  */
-#define gcry_cipher_unregister      _gcry_cipher_unregister
-
-#define gcry_pk_algo_info           _gcry_pk_algo_info
-#define gcry_pk_algo_name           _gcry_pk_algo_name
-#define gcry_pk_ctl                 _gcry_pk_ctl
-#define gcry_pk_decrypt             _gcry_pk_decrypt
-#define gcry_pk_encrypt             _gcry_pk_encrypt
-#define gcry_pk_genkey              _gcry_pk_genkey
-#define gcry_pk_get_keygrip         _gcry_pk_get_keygrip
-#define gcry_pk_get_curve           _gcry_pk_get_curve
-#define gcry_pk_get_param           _gcry_pk_get_param
-#define gcry_pk_get_nbits           _gcry_pk_get_nbits
-#define gcry_pk_list                _gcry_pk_list
-#define gcry_pk_map_name            _gcry_pk_map_name
-/* gcry_pk_register and _gcry_pk_register differ.  */
-#define gcry_pk_unregister          _gcry_pk_unregister
-#define gcry_pk_sign                _gcry_pk_sign
-#define gcry_pk_testkey             _gcry_pk_testkey
-#define gcry_pk_verify              _gcry_pk_verify
-
-#define gcry_ac_data_new            _gcry_ac_data_new
-#define gcry_ac_data_destroy        _gcry_ac_data_destroy
-#define gcry_ac_data_copy           _gcry_ac_data_copy
-#define gcry_ac_data_length         _gcry_ac_data_length
-#define gcry_ac_data_clear          _gcry_ac_data_clear
-#define gcry_ac_data_set            _gcry_ac_data_set
-#define gcry_ac_data_get_name       _gcry_ac_data_get_name
-#define gcry_ac_data_get_index      _gcry_ac_data_get_index
-#define gcry_ac_open                _gcry_ac_open
-#define gcry_ac_close               _gcry_ac_close
-#define gcry_ac_key_init            _gcry_ac_key_init
-#define gcry_ac_key_pair_generate   _gcry_ac_key_pair_generate
-#define gcry_ac_key_pair_extract    _gcry_ac_key_pair_extract
-#define gcry_ac_key_data_get        _gcry_ac_key_data_get
-#define gcry_ac_key_test            _gcry_ac_key_test
-#define gcry_ac_key_get_nbits       _gcry_ac_key_get_nbits
-#define gcry_ac_key_get_grip        _gcry_ac_key_get_grip
-#define gcry_ac_key_destroy         _gcry_ac_key_destroy
-#define gcry_ac_key_pair_destroy    _gcry_ac_key_pair_destroy
-#define gcry_ac_data_encrypt        _gcry_ac_data_encrypt
-#define gcry_ac_data_decrypt        _gcry_ac_data_decrypt
-#define gcry_ac_data_sign           _gcry_ac_data_sign
-#define gcry_ac_data_verify         _gcry_ac_data_verify
-#define gcry_ac_id_to_name          _gcry_ac_id_to_name
-#define gcry_ac_name_to_id          _gcry_ac_name_to_id
-#define gcry_ac_data_encode         _gcry_ac_data_encode
-#define gcry_ac_data_decode         _gcry_ac_data_decode
-#define gcry_ac_mpi_to_os           _gcry_ac_mpi_to_os
-#define gcry_ac_mpi_to_os_alloc     _gcry_ac_mpi_to_os_alloc
-#define gcry_ac_os_to_mpi           _gcry_ac_os_to_mpi
-#define gcry_ac_data_encrypt_scheme _gcry_ac_data_encrypt_scheme
-#define gcry_ac_data_decrypt_scheme _gcry_ac_data_decrypt_scheme
-#define gcry_ac_data_sign_scheme    _gcry_ac_data_sign_scheme
-#define gcry_ac_data_verify_scheme  _gcry_ac_data_verify_scheme
-#define gcry_ac_data_to_sexp        _gcry_ac_data_to_sexp
-#define gcry_ac_data_from_sexp      _gcry_ac_data_from_sexp
-#define gcry_ac_io_init             _gcry_ac_io_init
-#define gcry_ac_io_init_va          _gcry_ac_io_init_va
-
-#define gcry_kdf_derive             _gcry_kdf_derive
-
-#define gcry_prime_check            _gcry_prime_check
-#define gcry_prime_generate         _gcry_prime_generate
-#define gcry_prime_group_generator  _gcry_prime_group_generator
-#define gcry_prime_release_factors  _gcry_prime_release_factors
-
-#define gcry_random_add_bytes       _gcry_random_add_bytes
-#define gcry_random_bytes           _gcry_random_bytes
-#define gcry_random_bytes_secure    _gcry_random_bytes_secure
-#define gcry_randomize              _gcry_randomize
-#define gcry_create_nonce           _gcry_create_nonce
-
-#define gcry_sexp_alist             _gcry_sexp_alist
-#define gcry_sexp_append            _gcry_sexp_append
-#define gcry_sexp_build             _gcry_sexp_build
-#define gcry_sexp_build_array       _gcry_sexp_build_array
-#define gcry_sexp_cadr              _gcry_sexp_cadr
-#define gcry_sexp_canon_len         _gcry_sexp_canon_len
-#define gcry_sexp_car               _gcry_sexp_car
-#define gcry_sexp_cdr               _gcry_sexp_cdr
-#define gcry_sexp_cons              _gcry_sexp_cons
-#define gcry_sexp_create            _gcry_sexp_create
-#define gcry_sexp_dump              _gcry_sexp_dump
-#define gcry_sexp_find_token        _gcry_sexp_find_token
-#define gcry_sexp_length            _gcry_sexp_length
-#define gcry_sexp_new               _gcry_sexp_new
-#define gcry_sexp_nth               _gcry_sexp_nth
-#define gcry_sexp_nth_data          _gcry_sexp_nth_data
-#define gcry_sexp_nth_mpi           _gcry_sexp_nth_mpi
-#define gcry_sexp_prepend           _gcry_sexp_prepend
-#define gcry_sexp_release           _gcry_sexp_release
-#define gcry_sexp_sprint            _gcry_sexp_sprint
-#define gcry_sexp_sscan             _gcry_sexp_sscan
-#define gcry_sexp_vlist             _gcry_sexp_vlist
-#define gcry_sexp_nth_string        _gcry_sexp_nth_string
-
-#define gcry_mpi_add                _gcry_mpi_add
-#define gcry_mpi_add_ui             _gcry_mpi_add_ui
-#define gcry_mpi_addm               _gcry_mpi_addm
-#define gcry_mpi_aprint             _gcry_mpi_aprint
-#define gcry_mpi_clear_bit          _gcry_mpi_clear_bit
-#define gcry_mpi_clear_flag         _gcry_mpi_clear_flag
-#define gcry_mpi_clear_highbit      _gcry_mpi_clear_highbit
-#define gcry_mpi_cmp                _gcry_mpi_cmp
-#define gcry_mpi_cmp_ui             _gcry_mpi_cmp_ui
-#define gcry_mpi_copy               _gcry_mpi_copy
-#define gcry_mpi_div                _gcry_mpi_div
-#define gcry_mpi_dump               _gcry_mpi_dump
-#define gcry_mpi_gcd                _gcry_mpi_gcd
-#define gcry_mpi_get_flag           _gcry_mpi_get_flag
-#define gcry_mpi_get_nbits          _gcry_mpi_get_nbits
-#define gcry_mpi_get_opaque         _gcry_mpi_get_opaque
-#define gcry_mpi_invm               _gcry_mpi_invm
-#define gcry_mpi_mod                _gcry_mpi_mod
-#define gcry_mpi_mul                _gcry_mpi_mul
-#define gcry_mpi_mul_2exp           _gcry_mpi_mul_2exp
-#define gcry_mpi_mul_ui             _gcry_mpi_mul_ui
-#define gcry_mpi_mulm               _gcry_mpi_mulm
-#define gcry_mpi_new                _gcry_mpi_new
-#define gcry_mpi_powm               _gcry_mpi_powm
-#define gcry_mpi_print              _gcry_mpi_print
-#define gcry_mpi_randomize          _gcry_mpi_randomize
-#define gcry_mpi_release            _gcry_mpi_release
-#define gcry_mpi_rshift             _gcry_mpi_rshift
-#define gcry_mpi_lshift             _gcry_mpi_lshift
-#define gcry_mpi_scan               _gcry_mpi_scan
-#define gcry_mpi_set                _gcry_mpi_set
-#define gcry_mpi_set_bit            _gcry_mpi_set_bit
-#define gcry_mpi_set_flag           _gcry_mpi_set_flag
-#define gcry_mpi_set_highbit        _gcry_mpi_set_highbit
-#define gcry_mpi_set_opaque         _gcry_mpi_set_opaque
-#define gcry_mpi_set_ui             _gcry_mpi_set_ui
-#define gcry_mpi_snew               _gcry_mpi_snew
-#define gcry_mpi_sub                _gcry_mpi_sub
-#define gcry_mpi_sub_ui             _gcry_mpi_sub_ui
-#define gcry_mpi_subm               _gcry_mpi_subm
-#define gcry_mpi_swap               _gcry_mpi_swap
-#define gcry_mpi_test_bit           _gcry_mpi_test_bit
 
 
 /* Include the main header here so that public symbols are mapped to
@@ -240,112 +30,15 @@
   /* We need to redeclare the deprecated functions without the
      deprecated attribute.  */
 # define GCRYPT_NO_DEPRECATED
-# include "gcrypt.h"
-/* The algorithm IDs. */
-  gcry_error_t gcry_ac_data_new (gcry_ac_data_t *data);
-  void gcry_ac_data_destroy (gcry_ac_data_t data);
-  gcry_error_t gcry_ac_data_copy (gcry_ac_data_t *data_cp,
-                                  gcry_ac_data_t data);
-  unsigned int gcry_ac_data_length (gcry_ac_data_t data);
-  void gcry_ac_data_clear (gcry_ac_data_t data);
-  gcry_error_t gcry_ac_data_set (gcry_ac_data_t data, unsigned int flags,
-                                 const char *name, gcry_mpi_t mpi);
-  gcry_error_t gcry_ac_data_get_name (gcry_ac_data_t data, unsigned int flags,
-                                      const char *name, gcry_mpi_t *mpi);
-  gcry_error_t gcry_ac_data_get_index (gcry_ac_data_t data, unsigned int flags,
-                                       unsigned int idx,
-                                       const char **name, gcry_mpi_t *mpi);
-  gcry_error_t gcry_ac_data_to_sexp (gcry_ac_data_t data, gcry_sexp_t *sexp,
-                                   const char **identifiers);
-  gcry_error_t gcry_ac_data_from_sexp (gcry_ac_data_t *data, gcry_sexp_t sexp,
-                                     const char **identifiers);
-  void gcry_ac_io_init (gcry_ac_io_t *ac_io, gcry_ac_io_mode_t mode,
-                      gcry_ac_io_type_t type, ...);
-  void gcry_ac_io_init_va (gcry_ac_io_t *ac_io, gcry_ac_io_mode_t mode,
-                         gcry_ac_io_type_t type, va_list ap);
-  gcry_error_t gcry_ac_open (gcry_ac_handle_t *handle,
-                             gcry_ac_id_t algorithm, unsigned int flags);
-  void gcry_ac_close (gcry_ac_handle_t handle);
-  gcry_error_t gcry_ac_key_init (gcry_ac_key_t *key, gcry_ac_handle_t handle,
-                                 gcry_ac_key_type_t type, gcry_ac_data_t data);
-  gcry_error_t gcry_ac_key_pair_generate (gcry_ac_handle_t handle,
-                                          unsigned int nbits, void *spec,
-                                          gcry_ac_key_pair_t *key_pair,
-                                          gcry_mpi_t **misc_data);
-  gcry_ac_key_t gcry_ac_key_pair_extract (gcry_ac_key_pair_t key_pair,
-                                          gcry_ac_key_type_t which);
-  gcry_ac_data_t gcry_ac_key_data_get (gcry_ac_key_t key);
-  gcry_error_t gcry_ac_key_test (gcry_ac_handle_t handle, gcry_ac_key_t key);
-  gcry_error_t gcry_ac_key_get_nbits (gcry_ac_handle_t handle,
-                                      gcry_ac_key_t key, unsigned int *nbits);
-  gcry_error_t gcry_ac_key_get_grip (gcry_ac_handle_t handle, gcry_ac_key_t 
key,
-                                     unsigned char *key_grip);
-  void gcry_ac_key_destroy (gcry_ac_key_t key);
-  void gcry_ac_key_pair_destroy (gcry_ac_key_pair_t key_pair);
-  gcry_error_t gcry_ac_data_encode (gcry_ac_em_t method,
-                                  unsigned int flags, void *options,
-                                  gcry_ac_io_t *io_read,
-                                  gcry_ac_io_t *io_write);
-  gcry_error_t gcry_ac_data_decode (gcry_ac_em_t method,
-                                  unsigned int flags, void *options,
-                                  gcry_ac_io_t *io_read,
-                                  gcry_ac_io_t *io_write);
-  gcry_error_t gcry_ac_data_encrypt (gcry_ac_handle_t handle,
-                                     unsigned int flags,
-                                     gcry_ac_key_t key,
-                                     gcry_mpi_t data_plain,
-                                     gcry_ac_data_t *data_encrypted);
-  gcry_error_t gcry_ac_data_decrypt (gcry_ac_handle_t handle,
-                                     unsigned int flags,
-                                     gcry_ac_key_t key,
-                                     gcry_mpi_t *data_plain,
-                                     gcry_ac_data_t data_encrypted);
-  gcry_error_t gcry_ac_data_sign (gcry_ac_handle_t handle,
-                                  gcry_ac_key_t key,
-                                  gcry_mpi_t data,
-                                  gcry_ac_data_t *data_signature);
-  gcry_error_t gcry_ac_data_verify (gcry_ac_handle_t handle,
-                                    gcry_ac_key_t key,
-                                    gcry_mpi_t data,
-                                    gcry_ac_data_t data_signature);
-  gcry_error_t gcry_ac_data_encrypt_scheme (gcry_ac_handle_t handle,
-                                          gcry_ac_scheme_t scheme,
-                                          unsigned int flags, void *opts,
-                                          gcry_ac_key_t key,
-                                          gcry_ac_io_t *io_message,
-                                          gcry_ac_io_t *io_cipher);
-  gcry_error_t gcry_ac_data_decrypt_scheme (gcry_ac_handle_t handle,
-                                          gcry_ac_scheme_t scheme,
-                                          unsigned int flags, void *opts,
-                                          gcry_ac_key_t key,
-                                          gcry_ac_io_t *io_cipher,
-                                          gcry_ac_io_t *io_message);
-  gcry_error_t gcry_ac_data_sign_scheme (gcry_ac_handle_t handle,
-                                       gcry_ac_scheme_t scheme,
-                                       unsigned int flags, void *opts,
-                                       gcry_ac_key_t key,
-                                       gcry_ac_io_t *io_message,
-                                       gcry_ac_io_t *io_signature);
-  gcry_error_t gcry_ac_data_verify_scheme (gcry_ac_handle_t handle,
-                                         gcry_ac_scheme_t scheme,
-                                         unsigned int flags, void *opts,
-                                         gcry_ac_key_t key,
-                                         gcry_ac_io_t *io_message,
-                                         gcry_ac_io_t *io_signature);
-  gcry_error_t gcry_ac_id_to_name (gcry_ac_id_t algorithm, const char **name);
-  gcry_error_t gcry_ac_name_to_id (const char *name, gcry_ac_id_t *algorithm);
+# include "gcrypt-int.h"
+  /* None in this version.  */
 #else
-# include "gcrypt.h"
+# include "gcrypt-int.h"
 #endif
 
 /* Prototypes of functions exported but not ready for use.  */
 gcry_err_code_t gcry_md_get (gcry_md_hd_t hd, int algo,
                              unsigned char *buffer, int buflen);
-void gcry_ac_mpi_to_os (gcry_mpi_t mpi, unsigned char *os, size_t os_n);
-gcry_error_t gcry_ac_mpi_to_os_alloc (gcry_mpi_t mpi, unsigned char **os,
-                                       size_t *os_n);
-void gcry_ac_os_to_mpi (gcry_mpi_t mpi, unsigned char *os, size_t os_n);
-
 
 
 /* Our use of the ELF visibility feature works by passing
@@ -357,451 +50,486 @@ void gcry_ac_os_to_mpi (gcry_mpi_t mpi, unsigned char 
*os, size_t os_n);
 
 #ifdef _GCRY_INCLUDED_BY_VISIBILITY_C
 
-/* A macro to flag a function as visible.  Note that we take the
-   definition from the mapped name.  */
+/* A macro to flag a function as visible.  */
 #ifdef GCRY_USE_VISIBILITY
-# define MARK_VISIBLE(name) \
-    extern __typeof__ (_##name) name __attribute__ ((visibility("default")));
 # define MARK_VISIBLEX(name) \
     extern __typeof__ (name) name __attribute__ ((visibility("default")));
 #else
-# define MARK_VISIBLE(name) /* */
 # define MARK_VISIBLEX(name) /* */
 #endif
 
 
-/* First undef all redefined symbols so that we set the attribute on
-   the correct version name.  */
-#undef gcry_check_version
-#undef gcry_control
-
-#undef gcry_set_allocation_handler
-#undef gcry_set_fatalerror_handler
-#undef gcry_set_gettext_handler
-#undef gcry_set_log_handler
-#undef gcry_set_outofcore_handler
-#undef gcry_set_progress_handler
-#undef gcry_err_code_from_errno
-#undef gcry_err_code_to_errno
-#undef gcry_err_make_from_errno
-#undef gcry_error_from_errno
-#undef gcry_strerror
-#undef gcry_strsource
-
-#undef gcry_free
-#undef gcry_malloc
-#undef gcry_malloc_secure
-#undef gcry_calloc
-#undef gcry_calloc_secure
-#undef gcry_realloc
-#undef gcry_strdup
-#undef gcry_is_secure
-#undef gcry_xcalloc
-#undef gcry_xcalloc_secure
-#undef gcry_xmalloc
-#undef gcry_xmalloc_secure
-#undef gcry_xrealloc
-#undef gcry_xstrdup
-
-#undef gcry_md_algo_info
-#undef gcry_md_algo_name
-#undef gcry_md_close
-#undef gcry_md_copy
-#undef gcry_md_ctl
-#undef gcry_md_enable
-#undef gcry_md_get
-#undef gcry_md_get_algo
-#undef gcry_md_get_algo_dlen
-#undef gcry_md_hash_buffer
-#undef gcry_md_info
-#undef gcry_md_is_enabled
-#undef gcry_md_is_secure
-#undef gcry_md_list
-#undef gcry_md_map_name
-#undef gcry_md_open
-#undef gcry_md_read
-/* gcry_md_register is not anymore a macro.  */
-#undef gcry_md_unregister
-#undef gcry_md_reset
-#undef gcry_md_setkey
-#undef gcry_md_write
-#undef gcry_md_debug
-
-#undef gcry_cipher_algo_info
-#undef gcry_cipher_algo_name
-#undef gcry_cipher_close
-#undef gcry_cipher_setkey
-#undef gcry_cipher_setiv
-#undef gcry_cipher_setctr
-#undef gcry_cipher_ctl
-#undef gcry_cipher_decrypt
-#undef gcry_cipher_encrypt
-#undef gcry_cipher_get_algo_blklen
-#undef gcry_cipher_get_algo_keylen
-#undef gcry_cipher_info
-#undef gcry_cipher_list
-#undef gcry_cipher_map_name
-#undef gcry_cipher_mode_from_oid
-#undef gcry_cipher_open
-/* gcry_cipher_register is not anymore a macro.  */
-#undef gcry_cipher_unregister
-
-#undef gcry_pk_algo_info
-#undef gcry_pk_algo_name
-#undef gcry_pk_ctl
-#undef gcry_pk_decrypt
-#undef gcry_pk_encrypt
-#undef gcry_pk_genkey
-#undef gcry_pk_get_keygrip
-#undef gcry_pk_get_curve
-#undef gcry_pk_get_param
-#undef gcry_pk_get_nbits
-#undef gcry_pk_list
-#undef gcry_pk_map_name
-/* gcry_pk_register is not anymore a macro.  */
-#undef gcry_pk_unregister
-#undef gcry_pk_sign
-#undef gcry_pk_testkey
-#undef gcry_pk_verify
-
-#undef gcry_ac_data_new
-#undef gcry_ac_data_destroy
-#undef gcry_ac_data_copy
-#undef gcry_ac_data_length
-#undef gcry_ac_data_clear
-#undef gcry_ac_data_set
-#undef gcry_ac_data_get_name
-#undef gcry_ac_data_get_index
-#undef gcry_ac_open
-#undef gcry_ac_close
-#undef gcry_ac_key_init
-#undef gcry_ac_key_pair_generate
-#undef gcry_ac_key_pair_extract
-#undef gcry_ac_key_data_get
-#undef gcry_ac_key_test
-#undef gcry_ac_key_get_nbits
-#undef gcry_ac_key_get_grip
-#undef gcry_ac_key_destroy
-#undef gcry_ac_key_pair_destroy
-#undef gcry_ac_data_encrypt
-#undef gcry_ac_data_decrypt
-#undef gcry_ac_data_sign
-#undef gcry_ac_data_verify
-#undef gcry_ac_id_to_name
-#undef gcry_ac_name_to_id
-#undef gcry_ac_data_encode
-#undef gcry_ac_data_decode
-#undef gcry_ac_mpi_to_os
-#undef gcry_ac_mpi_to_os_alloc
-#undef gcry_ac_os_to_mpi
-#undef gcry_ac_data_encrypt_scheme
-#undef gcry_ac_data_decrypt_scheme
-#undef gcry_ac_data_sign_scheme
-#undef gcry_ac_data_verify_scheme
-#undef gcry_ac_data_to_sexp
-#undef gcry_ac_data_from_sexp
-#undef gcry_ac_io_init
-#undef gcry_ac_io_init_va
-
-#undef gcry_kdf_derive
-
-#undef gcry_prime_check
-#undef gcry_prime_generate
-#undef gcry_prime_group_generator
-#undef gcry_prime_release_factors
-
-#undef gcry_random_add_bytes
-#undef gcry_random_bytes
-#undef gcry_random_bytes_secure
-#undef gcry_randomize
-#undef gcry_create_nonce
-
-#undef gcry_sexp_alist
-#undef gcry_sexp_append
-#undef gcry_sexp_build
-#undef gcry_sexp_build_array
-#undef gcry_sexp_cadr
-#undef gcry_sexp_canon_len
-#undef gcry_sexp_car
-#undef gcry_sexp_cdr
-#undef gcry_sexp_cons
-#undef gcry_sexp_create
-#undef gcry_sexp_dump
-#undef gcry_sexp_find_token
-#undef gcry_sexp_length
-#undef gcry_sexp_new
-#undef gcry_sexp_nth
-#undef gcry_sexp_nth_data
-#undef gcry_sexp_nth_mpi
-#undef gcry_sexp_prepend
-#undef gcry_sexp_release
-#undef gcry_sexp_sprint
-#undef gcry_sexp_sscan
-#undef gcry_sexp_vlist
-#undef gcry_sexp_nth_string
-
-#undef gcry_mpi_add
-#undef gcry_mpi_add_ui
-#undef gcry_mpi_addm
-#undef gcry_mpi_aprint
-#undef gcry_mpi_clear_bit
-#undef gcry_mpi_clear_flag
-#undef gcry_mpi_clear_highbit
-#undef gcry_mpi_cmp
-#undef gcry_mpi_cmp_ui
-#undef gcry_mpi_copy
-#undef gcry_mpi_div
-#undef gcry_mpi_dump
-#undef gcry_mpi_gcd
-#undef gcry_mpi_get_flag
-#undef gcry_mpi_get_nbits
-#undef gcry_mpi_get_opaque
-#undef gcry_mpi_invm
-#undef gcry_mpi_mod
-#undef gcry_mpi_mul
-#undef gcry_mpi_mul_2exp
-#undef gcry_mpi_mul_ui
-#undef gcry_mpi_mulm
-#undef gcry_mpi_new
-#undef gcry_mpi_powm
-#undef gcry_mpi_print
-#undef gcry_mpi_randomize
-#undef gcry_mpi_release
-#undef gcry_mpi_rshift
-#undef gcry_mpi_lshift
-#undef gcry_mpi_scan
-#undef gcry_mpi_set
-#undef gcry_mpi_set_bit
-#undef gcry_mpi_set_flag
-#undef gcry_mpi_set_highbit
-#undef gcry_mpi_set_opaque
-#undef gcry_mpi_set_ui
-#undef gcry_mpi_snew
-#undef gcry_mpi_sub
-#undef gcry_mpi_sub_ui
-#undef gcry_mpi_subm
-#undef gcry_mpi_swap
-#undef gcry_mpi_test_bit
-
-
 /* Now mark all symbols.  */
 
-MARK_VISIBLE (gcry_check_version)
-MARK_VISIBLE (gcry_control)
-
-MARK_VISIBLE (gcry_set_allocation_handler)
-MARK_VISIBLE (gcry_set_fatalerror_handler)
-MARK_VISIBLE (gcry_set_gettext_handler)
-MARK_VISIBLE (gcry_set_log_handler)
-MARK_VISIBLE (gcry_set_outofcore_handler)
-MARK_VISIBLE (gcry_set_progress_handler)
-MARK_VISIBLE (gcry_err_code_from_errno)
-MARK_VISIBLE (gcry_err_code_to_errno)
-MARK_VISIBLE (gcry_err_make_from_errno)
-MARK_VISIBLE (gcry_error_from_errno)
-MARK_VISIBLE (gcry_strerror)
-MARK_VISIBLE (gcry_strsource)
-
-MARK_VISIBLE (gcry_free)
-MARK_VISIBLE (gcry_malloc)
-MARK_VISIBLE (gcry_malloc_secure)
-MARK_VISIBLE (gcry_calloc)
-MARK_VISIBLE (gcry_calloc_secure)
-MARK_VISIBLE (gcry_realloc)
-MARK_VISIBLE (gcry_strdup)
-MARK_VISIBLE (gcry_is_secure)
-MARK_VISIBLE (gcry_xcalloc)
-MARK_VISIBLE (gcry_xcalloc_secure)
-MARK_VISIBLE (gcry_xmalloc)
-MARK_VISIBLE (gcry_xmalloc_secure)
-MARK_VISIBLE (gcry_xrealloc)
-MARK_VISIBLE (gcry_xstrdup)
-
-MARK_VISIBLE (gcry_md_algo_info)
-MARK_VISIBLE (gcry_md_algo_name)
-MARK_VISIBLE (gcry_md_close)
-MARK_VISIBLE (gcry_md_copy)
-MARK_VISIBLE (gcry_md_ctl)
-MARK_VISIBLE (gcry_md_enable)
-MARK_VISIBLE (gcry_md_get)
-MARK_VISIBLE (gcry_md_get_algo)
-MARK_VISIBLE (gcry_md_get_algo_dlen)
-MARK_VISIBLE (gcry_md_hash_buffer)
-MARK_VISIBLE (gcry_md_info)
-MARK_VISIBLE (gcry_md_is_enabled)
-MARK_VISIBLE (gcry_md_is_secure)
-MARK_VISIBLE (gcry_md_list)
-MARK_VISIBLE (gcry_md_map_name)
-MARK_VISIBLE (gcry_md_open)
-MARK_VISIBLE (gcry_md_read)
-MARK_VISIBLEX(gcry_md_register)
-MARK_VISIBLE (gcry_md_reset)
-MARK_VISIBLE (gcry_md_setkey)
-MARK_VISIBLE (gcry_md_unregister)
-MARK_VISIBLE (gcry_md_write)
-MARK_VISIBLE (gcry_md_debug)
-
-MARK_VISIBLE (gcry_cipher_algo_info)
-MARK_VISIBLE (gcry_cipher_algo_name)
-MARK_VISIBLE (gcry_cipher_close)
-MARK_VISIBLE (gcry_cipher_setkey)
-MARK_VISIBLE (gcry_cipher_setiv)
-MARK_VISIBLE (gcry_cipher_setctr)
-MARK_VISIBLE (gcry_cipher_ctl)
-MARK_VISIBLE (gcry_cipher_decrypt)
-MARK_VISIBLE (gcry_cipher_encrypt)
-MARK_VISIBLE (gcry_cipher_get_algo_blklen)
-MARK_VISIBLE (gcry_cipher_get_algo_keylen)
-MARK_VISIBLE (gcry_cipher_info)
-MARK_VISIBLE (gcry_cipher_list)
-MARK_VISIBLE (gcry_cipher_map_name)
-MARK_VISIBLE (gcry_cipher_mode_from_oid)
-MARK_VISIBLE (gcry_cipher_open)
-MARK_VISIBLEX(gcry_cipher_register)
-MARK_VISIBLE (gcry_cipher_unregister)
-
-MARK_VISIBLE (gcry_pk_algo_info)
-MARK_VISIBLE (gcry_pk_algo_name)
-MARK_VISIBLE (gcry_pk_ctl)
-MARK_VISIBLE (gcry_pk_decrypt)
-MARK_VISIBLE (gcry_pk_encrypt)
-MARK_VISIBLE (gcry_pk_genkey)
-MARK_VISIBLE (gcry_pk_get_keygrip)
-MARK_VISIBLE (gcry_pk_get_curve)
-MARK_VISIBLE (gcry_pk_get_param)
-MARK_VISIBLE (gcry_pk_get_nbits)
-MARK_VISIBLE (gcry_pk_list)
-MARK_VISIBLE (gcry_pk_map_name)
-MARK_VISIBLEX(gcry_pk_register)
-MARK_VISIBLE (gcry_pk_sign)
-MARK_VISIBLE (gcry_pk_testkey)
-MARK_VISIBLE (gcry_pk_unregister)
-MARK_VISIBLE (gcry_pk_verify)
-
-MARK_VISIBLE (gcry_ac_data_new)
-MARK_VISIBLE (gcry_ac_data_destroy)
-MARK_VISIBLE (gcry_ac_data_copy)
-MARK_VISIBLE (gcry_ac_data_length)
-MARK_VISIBLE (gcry_ac_data_clear)
-MARK_VISIBLE (gcry_ac_data_set)
-MARK_VISIBLE (gcry_ac_data_get_name)
-MARK_VISIBLE (gcry_ac_data_get_index)
-MARK_VISIBLE (gcry_ac_open)
-MARK_VISIBLE (gcry_ac_close)
-MARK_VISIBLE (gcry_ac_key_init)
-MARK_VISIBLE (gcry_ac_key_pair_generate)
-MARK_VISIBLE (gcry_ac_key_pair_extract)
-MARK_VISIBLE (gcry_ac_key_data_get)
-MARK_VISIBLE (gcry_ac_key_test)
-MARK_VISIBLE (gcry_ac_key_get_nbits)
-MARK_VISIBLE (gcry_ac_key_get_grip)
-MARK_VISIBLE (gcry_ac_key_destroy)
-MARK_VISIBLE (gcry_ac_key_pair_destroy)
-MARK_VISIBLE (gcry_ac_data_encrypt)
-MARK_VISIBLE (gcry_ac_data_decrypt)
-MARK_VISIBLE (gcry_ac_data_sign)
-MARK_VISIBLE (gcry_ac_data_verify)
-MARK_VISIBLE (gcry_ac_id_to_name)
-MARK_VISIBLE (gcry_ac_name_to_id)
-/* MARK_VISIBLE (gcry_ac_list) Not defined although it is in
-        libgcrypt.vers. */
-MARK_VISIBLE (gcry_ac_data_encode)
-MARK_VISIBLE (gcry_ac_data_decode)
-MARK_VISIBLE (gcry_ac_mpi_to_os)
-MARK_VISIBLE (gcry_ac_mpi_to_os_alloc)
-MARK_VISIBLE (gcry_ac_os_to_mpi)
-MARK_VISIBLE (gcry_ac_data_encrypt_scheme)
-MARK_VISIBLE (gcry_ac_data_decrypt_scheme)
-MARK_VISIBLE (gcry_ac_data_sign_scheme)
-MARK_VISIBLE (gcry_ac_data_verify_scheme)
-MARK_VISIBLE (gcry_ac_data_to_sexp)
-MARK_VISIBLE (gcry_ac_data_from_sexp)
-MARK_VISIBLE (gcry_ac_io_init)
-MARK_VISIBLE (gcry_ac_io_init_va)
-
-MARK_VISIBLE (gcry_kdf_derive)
-
-MARK_VISIBLE (gcry_prime_check)
-MARK_VISIBLE (gcry_prime_generate)
-MARK_VISIBLE (gcry_prime_group_generator)
-MARK_VISIBLE (gcry_prime_release_factors)
-
-MARK_VISIBLE (gcry_random_add_bytes)
-MARK_VISIBLE (gcry_random_bytes)
-MARK_VISIBLE (gcry_random_bytes_secure)
-MARK_VISIBLE (gcry_randomize)
-MARK_VISIBLE (gcry_create_nonce)
-
-MARK_VISIBLE (gcry_sexp_alist)
-MARK_VISIBLE (gcry_sexp_append)
-MARK_VISIBLE (gcry_sexp_build)
-MARK_VISIBLE (gcry_sexp_build_array)
-MARK_VISIBLE (gcry_sexp_cadr)
-MARK_VISIBLE (gcry_sexp_canon_len)
-MARK_VISIBLE (gcry_sexp_car)
-MARK_VISIBLE (gcry_sexp_cdr)
-MARK_VISIBLE (gcry_sexp_cons)
-MARK_VISIBLE (gcry_sexp_create)
-MARK_VISIBLE (gcry_sexp_dump)
-MARK_VISIBLE (gcry_sexp_find_token)
-MARK_VISIBLE (gcry_sexp_length)
-MARK_VISIBLE (gcry_sexp_new)
-MARK_VISIBLE (gcry_sexp_nth)
-MARK_VISIBLE (gcry_sexp_nth_data)
-MARK_VISIBLE (gcry_sexp_nth_mpi)
-MARK_VISIBLE (gcry_sexp_prepend)
-MARK_VISIBLE (gcry_sexp_release)
-MARK_VISIBLE (gcry_sexp_sprint)
-MARK_VISIBLE (gcry_sexp_sscan)
-MARK_VISIBLE (gcry_sexp_vlist)
-MARK_VISIBLE (gcry_sexp_nth_string)
-
-MARK_VISIBLE (gcry_mpi_add)
-MARK_VISIBLE (gcry_mpi_add_ui)
-MARK_VISIBLE (gcry_mpi_addm)
-MARK_VISIBLE (gcry_mpi_aprint)
-MARK_VISIBLE (gcry_mpi_clear_bit)
-MARK_VISIBLE (gcry_mpi_clear_flag)
-MARK_VISIBLE (gcry_mpi_clear_highbit)
-MARK_VISIBLE (gcry_mpi_cmp)
-MARK_VISIBLE (gcry_mpi_cmp_ui)
-MARK_VISIBLE (gcry_mpi_copy)
-MARK_VISIBLE (gcry_mpi_div)
-MARK_VISIBLE (gcry_mpi_dump)
-MARK_VISIBLE (gcry_mpi_gcd)
-MARK_VISIBLE (gcry_mpi_get_flag)
-MARK_VISIBLE (gcry_mpi_get_nbits)
-MARK_VISIBLE (gcry_mpi_get_opaque)
-MARK_VISIBLE (gcry_mpi_invm)
-MARK_VISIBLE (gcry_mpi_mod)
-MARK_VISIBLE (gcry_mpi_mul)
-MARK_VISIBLE (gcry_mpi_mul_2exp)
-MARK_VISIBLE (gcry_mpi_mul_ui)
-MARK_VISIBLE (gcry_mpi_mulm)
-MARK_VISIBLE (gcry_mpi_new)
-MARK_VISIBLE (gcry_mpi_powm)
-MARK_VISIBLE (gcry_mpi_print)
-MARK_VISIBLE (gcry_mpi_randomize)
-MARK_VISIBLE (gcry_mpi_release)
-MARK_VISIBLE (gcry_mpi_rshift)
-MARK_VISIBLE (gcry_mpi_lshift)
-MARK_VISIBLE (gcry_mpi_scan)
-MARK_VISIBLE (gcry_mpi_set)
-MARK_VISIBLE (gcry_mpi_set_bit)
-MARK_VISIBLE (gcry_mpi_set_flag)
-MARK_VISIBLE (gcry_mpi_set_highbit)
-MARK_VISIBLE (gcry_mpi_set_opaque)
-MARK_VISIBLE (gcry_mpi_set_ui)
-MARK_VISIBLE (gcry_mpi_snew)
-MARK_VISIBLE (gcry_mpi_sub)
-MARK_VISIBLE (gcry_mpi_sub_ui)
-MARK_VISIBLE (gcry_mpi_subm)
-MARK_VISIBLE (gcry_mpi_swap)
-MARK_VISIBLE (gcry_mpi_test_bit)
-
-
-
-#undef MARK_VISIBLE
-#endif /*_GCRY_INCLUDED_BY_VISIBILITY_C*/
+MARK_VISIBLEX (gcry_check_version)
+MARK_VISIBLEX (gcry_control)
+
+MARK_VISIBLEX (gcry_set_allocation_handler)
+MARK_VISIBLEX (gcry_set_fatalerror_handler)
+MARK_VISIBLEX (gcry_set_gettext_handler)
+MARK_VISIBLEX (gcry_set_log_handler)
+MARK_VISIBLEX (gcry_set_outofcore_handler)
+MARK_VISIBLEX (gcry_set_progress_handler)
+
+MARK_VISIBLEX (gcry_err_code_from_errno)
+MARK_VISIBLEX (gcry_err_code_to_errno)
+MARK_VISIBLEX (gcry_err_make_from_errno)
+MARK_VISIBLEX (gcry_error_from_errno)
+MARK_VISIBLEX (gcry_strerror)
+MARK_VISIBLEX (gcry_strsource)
+
+MARK_VISIBLEX (gcry_malloc)
+MARK_VISIBLEX (gcry_malloc_secure)
+MARK_VISIBLEX (gcry_calloc)
+MARK_VISIBLEX (gcry_calloc_secure)
+MARK_VISIBLEX (gcry_realloc)
+MARK_VISIBLEX (gcry_strdup)
+MARK_VISIBLEX (gcry_is_secure)
+MARK_VISIBLEX (gcry_xcalloc)
+MARK_VISIBLEX (gcry_xcalloc_secure)
+MARK_VISIBLEX (gcry_xmalloc)
+MARK_VISIBLEX (gcry_xmalloc_secure)
+MARK_VISIBLEX (gcry_xrealloc)
+MARK_VISIBLEX (gcry_xstrdup)
+MARK_VISIBLEX (gcry_free)
+
+MARK_VISIBLEX (gcry_md_algo_info)
+MARK_VISIBLEX (gcry_md_algo_name)
+MARK_VISIBLEX (gcry_md_close)
+MARK_VISIBLEX (gcry_md_copy)
+MARK_VISIBLEX (gcry_md_ctl)
+MARK_VISIBLEX (gcry_md_enable)
+MARK_VISIBLEX (gcry_md_get)
+MARK_VISIBLEX (gcry_md_get_algo)
+MARK_VISIBLEX (gcry_md_get_algo_dlen)
+MARK_VISIBLEX (gcry_md_hash_buffer)
+MARK_VISIBLEX (gcry_md_hash_buffers)
+MARK_VISIBLEX (gcry_md_info)
+MARK_VISIBLEX (gcry_md_is_enabled)
+MARK_VISIBLEX (gcry_md_is_secure)
+MARK_VISIBLEX (gcry_md_map_name)
+MARK_VISIBLEX (gcry_md_open)
+MARK_VISIBLEX (gcry_md_read)
+MARK_VISIBLEX (gcry_md_extract)
+MARK_VISIBLEX (gcry_md_reset)
+MARK_VISIBLEX (gcry_md_setkey)
+MARK_VISIBLEX (gcry_md_write)
+MARK_VISIBLEX (gcry_md_debug)
+
+MARK_VISIBLEX (gcry_cipher_algo_info)
+MARK_VISIBLEX (gcry_cipher_algo_name)
+MARK_VISIBLEX (gcry_cipher_close)
+MARK_VISIBLEX (gcry_cipher_setkey)
+MARK_VISIBLEX (gcry_cipher_setiv)
+MARK_VISIBLEX (gcry_cipher_setctr)
+MARK_VISIBLEX (gcry_cipher_authenticate)
+MARK_VISIBLEX (gcry_cipher_checktag)
+MARK_VISIBLEX (gcry_cipher_gettag)
+MARK_VISIBLEX (gcry_cipher_ctl)
+MARK_VISIBLEX (gcry_cipher_decrypt)
+MARK_VISIBLEX (gcry_cipher_encrypt)
+MARK_VISIBLEX (gcry_cipher_get_algo_blklen)
+MARK_VISIBLEX (gcry_cipher_get_algo_keylen)
+MARK_VISIBLEX (gcry_cipher_info)
+MARK_VISIBLEX (gcry_cipher_map_name)
+MARK_VISIBLEX (gcry_cipher_mode_from_oid)
+MARK_VISIBLEX (gcry_cipher_open)
+
+MARK_VISIBLEX (gcry_mac_algo_info)
+MARK_VISIBLEX (gcry_mac_algo_name)
+MARK_VISIBLEX (gcry_mac_map_name)
+MARK_VISIBLEX (gcry_mac_get_algo)
+MARK_VISIBLEX (gcry_mac_get_algo_maclen)
+MARK_VISIBLEX (gcry_mac_get_algo_keylen)
+MARK_VISIBLEX (gcry_mac_open)
+MARK_VISIBLEX (gcry_mac_close)
+MARK_VISIBLEX (gcry_mac_setkey)
+MARK_VISIBLEX (gcry_mac_setiv)
+MARK_VISIBLEX (gcry_mac_write)
+MARK_VISIBLEX (gcry_mac_read)
+MARK_VISIBLEX (gcry_mac_verify)
+MARK_VISIBLEX (gcry_mac_ctl)
+
+MARK_VISIBLEX (gcry_pk_algo_info)
+MARK_VISIBLEX (gcry_pk_algo_name)
+MARK_VISIBLEX (gcry_pk_ctl)
+MARK_VISIBLEX (gcry_pk_decrypt)
+MARK_VISIBLEX (gcry_pk_encrypt)
+MARK_VISIBLEX (gcry_pk_genkey)
+MARK_VISIBLEX (gcry_pk_get_keygrip)
+MARK_VISIBLEX (gcry_pk_get_curve)
+MARK_VISIBLEX (gcry_pk_get_param)
+MARK_VISIBLEX (gcry_pk_get_nbits)
+MARK_VISIBLEX (gcry_pk_map_name)
+MARK_VISIBLEX (gcry_pk_sign)
+MARK_VISIBLEX (gcry_pk_testkey)
+MARK_VISIBLEX (gcry_pk_verify)
+MARK_VISIBLEX (gcry_pubkey_get_sexp)
+MARK_VISIBLEX (gcry_ecc_get_algo_keylen)
+MARK_VISIBLEX (gcry_ecc_mul_point)
+MARK_VISIBLEX (gcry_pk_hash_sign)
+MARK_VISIBLEX (gcry_pk_hash_verify)
+MARK_VISIBLEX (gcry_pk_random_override_new)
+
+MARK_VISIBLEX (gcry_kdf_derive)
+MARK_VISIBLEX (gcry_kdf_open)
+MARK_VISIBLEX (gcry_kdf_compute)
+MARK_VISIBLEX (gcry_kdf_final)
+MARK_VISIBLEX (gcry_kdf_close)
+
+MARK_VISIBLEX (gcry_prime_check)
+MARK_VISIBLEX (gcry_prime_generate)
+MARK_VISIBLEX (gcry_prime_group_generator)
+MARK_VISIBLEX (gcry_prime_release_factors)
+
+MARK_VISIBLEX (gcry_random_add_bytes)
+MARK_VISIBLEX (gcry_random_bytes)
+MARK_VISIBLEX (gcry_random_bytes_secure)
+MARK_VISIBLEX (gcry_randomize)
+MARK_VISIBLEX (gcry_create_nonce)
+
+MARK_VISIBLEX (gcry_sexp_alist)
+MARK_VISIBLEX (gcry_sexp_append)
+MARK_VISIBLEX (gcry_sexp_build)
+MARK_VISIBLEX (gcry_sexp_build_array)
+MARK_VISIBLEX (gcry_sexp_cadr)
+MARK_VISIBLEX (gcry_sexp_canon_len)
+MARK_VISIBLEX (gcry_sexp_car)
+MARK_VISIBLEX (gcry_sexp_cdr)
+MARK_VISIBLEX (gcry_sexp_cons)
+MARK_VISIBLEX (gcry_sexp_create)
+MARK_VISIBLEX (gcry_sexp_dump)
+MARK_VISIBLEX (gcry_sexp_find_token)
+MARK_VISIBLEX (gcry_sexp_length)
+MARK_VISIBLEX (gcry_sexp_new)
+MARK_VISIBLEX (gcry_sexp_nth)
+MARK_VISIBLEX (gcry_sexp_nth_buffer)
+MARK_VISIBLEX (gcry_sexp_nth_data)
+MARK_VISIBLEX (gcry_sexp_nth_mpi)
+MARK_VISIBLEX (gcry_sexp_nth_string)
+MARK_VISIBLEX (gcry_sexp_prepend)
+MARK_VISIBLEX (gcry_sexp_release)
+MARK_VISIBLEX (gcry_sexp_sprint)
+MARK_VISIBLEX (gcry_sexp_sscan)
+MARK_VISIBLEX (gcry_sexp_vlist)
+MARK_VISIBLEX (gcry_sexp_extract_param)
+
+MARK_VISIBLEX (gcry_mpi_abs)
+MARK_VISIBLEX (gcry_mpi_add)
+MARK_VISIBLEX (gcry_mpi_add_ui)
+MARK_VISIBLEX (gcry_mpi_addm)
+MARK_VISIBLEX (gcry_mpi_aprint)
+MARK_VISIBLEX (gcry_mpi_clear_bit)
+MARK_VISIBLEX (gcry_mpi_clear_flag)
+MARK_VISIBLEX (gcry_mpi_clear_highbit)
+MARK_VISIBLEX (gcry_mpi_cmp)
+MARK_VISIBLEX (gcry_mpi_cmp_ui)
+MARK_VISIBLEX (gcry_mpi_copy)
+MARK_VISIBLEX (gcry_mpi_div)
+MARK_VISIBLEX (gcry_mpi_dump)
+MARK_VISIBLEX (gcry_mpi_ec_add)
+MARK_VISIBLEX (gcry_mpi_ec_sub)
+MARK_VISIBLEX (gcry_mpi_ec_curve_point)
+MARK_VISIBLEX (gcry_mpi_ec_dup)
+MARK_VISIBLEX (gcry_mpi_ec_decode_point)
+MARK_VISIBLEX (gcry_mpi_ec_get_affine)
+MARK_VISIBLEX (gcry_mpi_ec_mul)
+MARK_VISIBLEX (gcry_mpi_ec_new)
+MARK_VISIBLEX (gcry_mpi_ec_get_mpi)
+MARK_VISIBLEX (gcry_mpi_ec_get_point)
+MARK_VISIBLEX (gcry_mpi_ec_set_mpi)
+MARK_VISIBLEX (gcry_mpi_ec_set_point)
+MARK_VISIBLEX (gcry_mpi_gcd)
+MARK_VISIBLEX (gcry_mpi_get_flag)
+MARK_VISIBLEX (gcry_mpi_get_nbits)
+MARK_VISIBLEX (gcry_mpi_get_opaque)
+MARK_VISIBLEX (gcry_mpi_is_neg)
+MARK_VISIBLEX (gcry_mpi_invm)
+MARK_VISIBLEX (gcry_mpi_mod)
+MARK_VISIBLEX (gcry_mpi_mul)
+MARK_VISIBLEX (gcry_mpi_mul_2exp)
+MARK_VISIBLEX (gcry_mpi_mul_ui)
+MARK_VISIBLEX (gcry_mpi_mulm)
+MARK_VISIBLEX (gcry_mpi_neg)
+MARK_VISIBLEX (gcry_mpi_new)
+MARK_VISIBLEX (gcry_mpi_point_get)
+MARK_VISIBLEX (gcry_mpi_point_new)
+MARK_VISIBLEX (gcry_mpi_point_release)
+MARK_VISIBLEX (gcry_mpi_point_copy)
+MARK_VISIBLEX (gcry_mpi_point_set)
+MARK_VISIBLEX (gcry_mpi_point_snatch_get)
+MARK_VISIBLEX (gcry_mpi_point_snatch_set)
+MARK_VISIBLEX (gcry_mpi_powm)
+MARK_VISIBLEX (gcry_mpi_print)
+MARK_VISIBLEX (gcry_mpi_randomize)
+MARK_VISIBLEX (gcry_mpi_release)
+MARK_VISIBLEX (gcry_mpi_rshift)
+MARK_VISIBLEX (gcry_mpi_lshift)
+MARK_VISIBLEX (gcry_mpi_scan)
+MARK_VISIBLEX (gcry_mpi_snatch)
+MARK_VISIBLEX (gcry_mpi_set)
+MARK_VISIBLEX (gcry_mpi_set_bit)
+MARK_VISIBLEX (gcry_mpi_set_flag)
+MARK_VISIBLEX (gcry_mpi_set_highbit)
+MARK_VISIBLEX (gcry_mpi_set_opaque)
+MARK_VISIBLEX (gcry_mpi_set_opaque_copy)
+MARK_VISIBLEX (gcry_mpi_set_ui)
+MARK_VISIBLEX (gcry_mpi_get_ui)
+MARK_VISIBLEX (gcry_mpi_snew)
+MARK_VISIBLEX (gcry_mpi_sub)
+MARK_VISIBLEX (gcry_mpi_sub_ui)
+MARK_VISIBLEX (gcry_mpi_subm)
+MARK_VISIBLEX (gcry_mpi_swap)
+MARK_VISIBLEX (gcry_mpi_test_bit)
+
+MARK_VISIBLEX (gcry_ctx_release)
+
+MARK_VISIBLEX (gcry_log_debug)
+MARK_VISIBLEX (gcry_log_debughex)
+MARK_VISIBLEX (gcry_log_debugmpi)
+MARK_VISIBLEX (gcry_log_debugpnt)
+MARK_VISIBLEX (gcry_log_debugsxp)
+
+MARK_VISIBLEX (gcry_get_config)
+
+/* Functions used to implement macros.  */
+MARK_VISIBLEX (_gcry_mpi_get_const)
+
+
+#undef MARK_VISIBLEX
+
+#else /*!_GCRY_INCLUDED_BY_VISIBILITY_C*/
+
+/* To avoid accidental use of the public functions inside Libgcrypt,
+   we redefine them to catch such errors.  The usual difference
+   between a public and an internal version is that the internal
+   version use gpg_err_code_t and the public version gpg_error_t.  */
+
+#define gcry_check_version          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_control                _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_set_allocation_handler _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_set_fatalerror_handler _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_set_gettext_handler    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_set_log_handler        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_set_outofcore_handler  _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_set_progress_handler   _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_err_code_from_errno    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_err_code_to_errno      _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_err_make_from_errno    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_error_from_errno       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_strerror               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_strsource              _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_malloc                 _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_malloc_secure          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_calloc                 _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_calloc_secure          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_realloc                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_strdup                 _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_xcalloc                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_xcalloc_secure         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_xmalloc                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_xmalloc_secure         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_xrealloc               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_xstrdup                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_free                   _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_is_secure              _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_cipher_open            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_close           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_setkey          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_setiv           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_setctr          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_algo_info       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_algo_name       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_authenticate    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_checktag        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_gettag          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_ctl             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_decrypt         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_encrypt         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_get_algo_blklen _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_get_algo_keylen _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_info            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_map_name        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_cipher_mode_from_oid   _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_pk_algo_info           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_algo_name           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_ctl                 _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_decrypt             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_encrypt             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_genkey              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_get_keygrip         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_get_curve           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_get_param           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_get_nbits           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_map_name            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_sign                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_testkey             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_verify              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pubkey_get_sexp        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_ecc_get_algo_keylen    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_ecc_mul_point          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_hash_sign           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_hash_verify         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_pk_random_override_new _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_md_algo_info           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_algo_name           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_close               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_copy                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_ctl                 _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_enable              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_get                 _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_get_algo            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_get_algo_dlen       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_hash_buffer         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_hash_buffers        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_info                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_is_enabled          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_is_secure           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_map_name            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_open                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_read                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_extract             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_reset               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_setkey              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_write               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_md_debug               _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_mac_algo_info          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_algo_name          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_map_name           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_get_algo           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_get_algo_maclen    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_get_algo_keylen    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_open               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_close              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_setkey             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_setiv              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_write              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_read               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_verify             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mac_ctl                _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_kdf_derive             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_kdf_open               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_kdf_compute            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_kdf_final              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_kdf_close              _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_prime_check            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_prime_generate         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_prime_group_generator  _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_prime_release_factors  _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_random_add_bytes       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_random_bytes           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_random_bytes_secure    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_randomize              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_create_nonce           _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_ctx_release            _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_sexp_alist             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_append            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_build             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_build_array       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_cadr              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_canon_len         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_car               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_cdr               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_cons              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_create            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_dump              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_find_token        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_length            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_new               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_nth               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_nth_buffer        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_nth_data          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_nth_mpi           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_nth_string        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_prepend           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_release           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_sprint            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_sscan             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_vlist             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_sexp_extract_param     _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_mpi_add                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_add_ui             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_addm               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_aprint             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_clear_bit          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_clear_flag         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_clear_highbit      _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_cmp                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_cmp_ui             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_copy               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_div                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_dump               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_gcd                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_get_flag           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_get_nbits          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_get_opaque         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_invm               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_mod                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_mul                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_mul_2exp           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_mul_ui             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_mulm               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_new                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_point_get          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_point_new          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_point_release      _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_point_copy         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_point_set          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_point_snatch_get   _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_point_snatch_set   _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_powm               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_print              _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_randomize          _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_release            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_rshift             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_lshift             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_scan               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_set                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_set_bit            _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_set_flag           _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_set_highbit        _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_set_opaque         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_set_ui             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_get_ui             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_snatch             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_snew               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_sub                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_sub_ui             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_subm               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_swap               _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_test_bit           _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+#define gcry_mpi_abs                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_add             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_sub             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_curve_point     _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_dup             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_decode_point    _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_get_affine      _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_get_mpi         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_get_point       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_mul             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_new             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_set_mpi         _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_ec_set_point       _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_is_neg             _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_neg                _gcry_USE_THE_UNDERSCORED_FUNCTION
+#define gcry_mpi_set_opaque_copy    _gcry_USE_THE_UNDERSCORED_FUNCTION
+
+
+#endif /*!_GCRY_INCLUDED_BY_VISIBILITY_C*/
 
 #endif /*GCRY_VISIBILITY_H*/
-- 
2.39.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]