qemu-s390x
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v7 1/2] target/s390x: support SHA-512 extensions


From: Thomas Huth
Subject: Re: [PATCH v7 1/2] target/s390x: support SHA-512 extensions
Date: Fri, 26 Aug 2022 12:21:36 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.12.0


Finally, I'm also having some spare minutes to have a look on this ... First, thank you for your work here, it's very appreciated! Some more comments inline below (mostly cosmetics since I'm not very much into this crypto stuff)...

On 09/08/2022 17.03, Jason A. Donenfeld wrote:
In order to fully support MSA_EXT_5, we have to support the SHA-512
special instructions. So implement those.

The implementation began as something TweetNacl-like, and then was
adjusted to be useful here. It's not very beautiful, but it is quite
short and compact, which is what we're going for.
>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
  target/s390x/gen-features.c      |   3 +
  target/s390x/tcg/crypto_helper.c | 157 +++++++++++++++++++++++++++++++
  2 files changed, 160 insertions(+)

If you've got some spare time, it would be great to have a test for the new functions in the tests/tcg/s390x/ folder, too (but otherwise we can still add them later).

diff --git a/target/s390x/gen-features.c b/target/s390x/gen-features.c
index ad140184b9..85ab69d04e 100644
--- a/target/s390x/gen-features.c
+++ b/target/s390x/gen-features.c
@@ -749,6 +749,9 @@ static uint16_t qemu_V7_0[] = {
   */
  static uint16_t qemu_MAX[] = {
      S390_FEAT_VECTOR_ENH2,
+    S390_FEAT_MSA_EXT_5,
+    S390_FEAT_KIMD_SHA_512,
+    S390_FEAT_KLMD_SHA_512,
  };

I think we likely have to fence the bits off for older machine type versions, like it has been done in commit 4f9b6c7ddb2 for example. However, the patch for the new 7.2 machine type is not merged yet (but I've queued it on https://gitlab.com/thuth/qemu/-/commits/s390x-next/ ), so you either have to pick that manually into your branch, or we fix it up later (which would be ok for me, too).

  /****** END FEATURE DEFS ******/
diff --git a/target/s390x/tcg/crypto_helper.c b/target/s390x/tcg/crypto_helper.c
index 138d9e7ad9..4d45de8faa 100644
--- a/target/s390x/tcg/crypto_helper.c
+++ b/target/s390x/tcg/crypto_helper.c
@@ -1,10 +1,12 @@
  /*
   *  s390x crypto helpers
   *
+ *  Copyright (C) 2022 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights 
Reserved.

Please drop the "All rights reserved" ... it does not have any legal meaning anymore, and also sounds weird in the Open Source context. See:

 https://en.wikipedia.org/wiki/All_rights_reserved#Obsolescence

   *  Copyright (c) 2017 Red Hat Inc
   *
   *  Authors:
   *   David Hildenbrand <david@redhat.com>
+ *   Jason A. Donenfeld <Jason@zx2c4.com>
   *
   * This work is licensed under the terms of the GNU GPL, version 2 or later.
   * See the COPYING file in the top-level directory.
@@ -18,6 +20,153 @@
  #include "exec/exec-all.h"
  #include "exec/cpu_ldst.h"
+static uint64_t R(uint64_t x, int c) { return (x >> c) | (x << (64 - c)); }
+static uint64_t Ch(uint64_t x, uint64_t y, uint64_t z) { return (x & y) ^ (~x 
& z); }
+static uint64_t Maj(uint64_t x, uint64_t y, uint64_t z) { return (x & y) ^ (x & z) 
^ (y & z); }
+static uint64_t Sigma0(uint64_t x) { return R(x, 28) ^ R(x, 34) ^ R(x, 39); }
+static uint64_t Sigma1(uint64_t x) { return R(x, 14) ^ R(x, 18) ^ R(x, 41); }
+static uint64_t sigma0(uint64_t x) { return R(x, 1) ^ R(x, 8) ^ (x >> 7); }
+static uint64_t sigma1(uint64_t x) { return R(x, 19) ^ R(x, 61) ^ (x >> 6); }
+
+static const uint64_t K[80] = {
+    0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL, 0xb5c0fbcfec4d3b2fULL,
+    0xe9b5dba58189dbbcULL, 0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
+    0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL, 0xd807aa98a3030242ULL,
+    0x12835b0145706fbeULL, 0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
+    0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL, 0x9bdc06a725c71235ULL,
+    0xc19bf174cf692694ULL, 0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
+    0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL, 0x2de92c6f592b0275ULL,
+    0x4a7484aa6ea6e483ULL, 0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
+    0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL, 0xb00327c898fb213fULL,
+    0xbf597fc7beef0ee4ULL, 0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
+    0x06ca6351e003826fULL, 0x142929670a0e6e70ULL, 0x27b70a8546d22ffcULL,
+    0x2e1b21385c26c926ULL, 0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
+    0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL, 0x81c2c92e47edaee6ULL,
+    0x92722c851482353bULL, 0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
+    0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL, 0xd192e819d6ef5218ULL,
+    0xd69906245565a910ULL, 0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
+    0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL, 0x2748774cdf8eeb99ULL,
+    0x34b0bcb5e19b48a8ULL, 0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
+    0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL, 0x748f82ee5defb2fcULL,
+    0x78a5636f43172f60ULL, 0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
+    0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL, 0xbef9a3f7b2c67915ULL,
+    0xc67178f2e372532bULL, 0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
+    0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL, 0x06f067aa72176fbaULL,
+    0x0a637dc5a2c898a6ULL, 0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
+    0x28db77f523047d84ULL, 0x32caab7b40c72493ULL, 0x3c9ebe0a15c9bebcULL,
+    0x431d67c49c100d4cULL, 0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
+    0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
+};
+
+static int kimd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t 
parameter_block,
+                       uint64_t *message_reg, uint64_t *len_reg, uint8_t 
*stack_buffer)
+{
+    enum { MAX_BLOCKS_PER_RUN = 64 }; /* This is arbitrary, just to keep 
interactivity. */
+    uint64_t z[8], b[8], a[8], w[16], t;
+    uint64_t message = message_reg ? *message_reg : 0, len = *len_reg, 
processed = 0;

The line is very long, could you please declare message and len on separate lines?

+    int i, j, message_reg_len = 64, blocks = 0, cc = 0;
+
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        len = (uint32_t)len;
+        message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
+    }
+
+    for (i = 0; i < 8; ++i) {
+        z[i] = a[i] = cpu_ldq_be_data_ra(env, wrap_address(env, 
parameter_block + 8 * i), ra);

Quite a long line again, maybe split it like this:

      abi_ptr addr = wrap_address(env, parameter_block + 8 * i);
      z[i] = a[i] = cpu_ldq_be_data_ra(env, addr, ra);

+    }
+
+    while (len >= 128) {
+        if (++blocks > MAX_BLOCKS_PER_RUN) {
+            cc = 3;
+            break;
+        }
+
+        for (i = 0; i < 16; ++i) {
+            if (message) {
+                w[i] = cpu_ldq_be_data_ra(env, wrap_address(env, message + 8 * 
i), ra);

Long line again, please split.

+            } else {
+                w[i] = be64_to_cpu(((uint64_t *)stack_buffer)[i]);
+            }
+        }
+
+        for (i = 0; i < 80; ++i) {
+            for (j = 0; j < 8; ++j) {
+                b[j] = a[j];
+            }
+            t = a[7] + Sigma1(a[4]) + Ch(a[4], a[5], a[6]) + K[i] + w[i % 16];
+            b[7] = t + Sigma0(a[0]) + Maj(a[0], a[1], a[2]);
+            b[3] += t;
+            for (j = 0; j < 8; ++j) {
+                a[(j + 1) % 8] = b[j];
+            }
+            if (i % 16 == 15) {
+                for (j = 0; j < 16; ++j) {
+                    w[j] += w[(j + 9) % 16] + sigma0(w[(j + 1) % 16]) + 
sigma1(w[(j + 14) % 16]);
+                }
+            }
+        }
+
+        for (i = 0; i < 8; ++i) {
+            a[i] += z[i];
+            z[i] = a[i];
+        }
+
+        if (message) {
+            message += 128;
+        } else {
+            stack_buffer += 128;
+        }
+        len -= 128;
+        processed += 128;
+    }
+
+    for (i = 0; i < 8; ++i) {
+        cpu_stq_be_data_ra(env, wrap_address(env, parameter_block + 8 * i), 
z[i], ra);
+    }
+
+    if (message_reg) {
+        *message_reg = deposit64(*message_reg, 0, message_reg_len, message);
+    }
+    *len_reg -= processed;
+    return cc;
+}
+
+static int klmd_sha512(CPUS390XState *env, uintptr_t ra, uint64_t 
parameter_block,
+                        uint64_t *message_reg, uint64_t *len_reg)
+{
+    uint8_t x[256];
+    uint64_t i, message, len;
+    int j, message_reg_len = 64, cc;
+
+    cc = kimd_sha512(env, ra, parameter_block, message_reg, len_reg, NULL);
+    if (cc) {
+        return cc;
+    }
+
+    message = *message_reg;
+    len = *len_reg;
+    if (!(env->psw.mask & PSW_MASK_64)) {
+        len = (uint32_t)len;
+        message_reg_len = (env->psw.mask & PSW_MASK_32) ? 32 : 24;
+    }
+
+    for (i = 0; i < len; ++i) {
+        x[i] = cpu_ldub_data_ra(env, wrap_address(env, message + i), ra);
+    }
+    memset(x + i, 0, sizeof(x) - i);
+    x[i] = 128;
+    i = i < 112 ? 128 : 256;
+    for (j = 0; j < 16; ++j) {
+        x[i - 16 + j] = cpu_ldub_data_ra(env, wrap_address(env, 
parameter_block + 64 + j), ra);
+    }
+    if (kimd_sha512(env, ra, parameter_block, NULL, &i, x)) {
+        g_assert_not_reached(); /* It must handle at least 2 blocks. */
+    }
+    *message_reg = deposit64(*message_reg, 0, message_reg_len, message + len);
+    *len_reg -= len;
+    return 0;
+}
+
  uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, uint32_t r2, uint32_t 
r3,
                       uint32_t type)
  {
@@ -52,6 +201,14 @@ uint32_t HELPER(msa)(CPUS390XState *env, uint32_t r1, 
uint32_t r2, uint32_t r3,
              cpu_stb_data_ra(env, param_addr, subfunc[i], ra);

So for KIMD and KLMD, I think we now have to set the bit that corresponds to SHA-512 in the query status information, too? Otherwise the guest might not use the function if it thinks that it is not available?

          }
          break;
+    case 3: /* CPACF_*_SHA_512 */
+        switch (type) {
+        case S390_FEAT_TYPE_KIMD:
+            return kimd_sha512(env, ra, env->regs[1], &env->regs[r2], 
&env->regs[r2 + 1], NULL);
+        case S390_FEAT_TYPE_KLMD:
+            return klmd_sha512(env, ra, env->regs[1], &env->regs[r2], 
&env->regs[r2 + 1]);
+        }
+        break;
      default:
          /* we don't implement any other subfunction yet */
          g_assert_not_reached();

 Thomas




reply via email to

[Prev in Thread] Current Thread [Next in Thread]