Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support

qemu-devel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support

From:	Max Chou
Subject:	Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support
Date:	Mon, 26 Jun 2023 16:02:48 +0800
User-agent:	Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:102.0) Gecko/20100101 Thunderbird/102.12.0

On 2023/6/23 3:33 PM, Richard Henderson wrote:

On 6/22/23 18:16, Max Chou wrote:
--- a/target/riscv/vcrypto_helper.c
+++ b/target/riscv/vcrypto_helper.c
@@ -22,6 +22,7 @@
  #include "qemu/bitops.h"
  #include "qemu/bswap.h"
  #include "cpu.h"
+#include "crypto/aes.h"
  #include "exec/memop.h"
  #include "exec/exec-all.h"
  #include "exec/helper-proto.h"
@@ -195,3 +196,310 @@ RVVCALL(OPIVX2, vwsll_vx_w, WOP_UUU_W, H8, H4,DO_SLL)
  GEN_VEXT_VX(vwsll_vx_b, 2)
  GEN_VEXT_VX(vwsll_vx_h, 4)
  GEN_VEXT_VX(vwsll_vx_w, 8)
+
+static inline void aes_sub_bytes(uint8_t round_state[4][4])
+{
+    for (int j = 0; j < 16; j++) {
+ round_state[j / 4][j % 4] = AES_sbox[round_state[j / 4][j %4]];
+    }
+}
+
+static inline void aes_shift_bytes(uint8_t round_state[4][4])
+{
+    uint8_t temp;
+    temp = round_state[0][1];
+    round_state[0][1] = round_state[1][1];
+    round_state[1][1] = round_state[2][1];
+    round_state[2][1] = round_state[3][1];
+    round_state[3][1] = temp;
+    temp = round_state[0][2];
+    round_state[0][2] = round_state[2][2];
+    round_state[2][2] = temp;
+    temp = round_state[1][2];
+    round_state[1][2] = round_state[3][2];
+    round_state[3][2] = temp;
+    temp = round_state[0][3];
+    round_state[0][3] = round_state[3][3];
+    round_state[3][3] = round_state[2][3];
+    round_state[2][3] = round_state[1][3];
+    round_state[1][3] = temp;
+}
+
+static inline void xor_round_key(uint8_t round_state[4][4], uint8_t*round_key)
+{
+    for (int j = 0; j < 16; j++) {
+ round_state[j / 4][j % 4] = round_state[j / 4][j % 4] ^(round_key)[j];
+    }
+}
+
+static inline void aes_inv_sub_bytes(uint8_t round_state[4][4])
+{
+    for (int j = 0; j < 16; j++) {
+ round_state[j / 4][j % 4] = AES_isbox[round_state[j / 4][j %4]];
+    }
+}
+
+static inline void aes_inv_shift_bytes(uint8_t round_state[4][4])
+{
+    uint8_t temp;
+    temp = round_state[3][1];
+    round_state[3][1] = round_state[2][1];
+    round_state[2][1] = round_state[1][1];
+    round_state[1][1] = round_state[0][1];
+    round_state[0][1] = temp;
+    temp = round_state[0][2];
+    round_state[0][2] = round_state[2][2];
+    round_state[2][2] = temp;
+    temp = round_state[1][2];
+    round_state[1][2] = round_state[3][2];
+    round_state[3][2] = temp;
+    temp = round_state[0][3];
+    round_state[0][3] = round_state[1][3];
+    round_state[1][3] = round_state[2][3];
+    round_state[2][3] = round_state[3][3];
+    round_state[3][3] = temp;
+}
+
+static inline uint8_t xtime(uint8_t x)
+{
+    return (x << 1) ^ (((x >> 7) & 1) * 0x1b);
+}
+
+static inline uint8_t multiply(uint8_t x, uint8_t y)
+{
+    return (((y & 1) * x) ^ ((y >> 1 & 1) * xtime(x)) ^
+            ((y >> 2 & 1) * xtime(xtime(x))) ^
+            ((y >> 3 & 1) * xtime(xtime(xtime(x)))) ^
+            ((y >> 4 & 1) * xtime(xtime(xtime(xtime(x))))));
+}
+
+static inline void aes_inv_mix_cols(uint8_t round_state[4][4])
+{
+    uint8_t a, b, c, d;
+    for (int j = 0; j < 4; ++j) {
+        a = round_state[j][0];
+        b = round_state[j][1];
+        c = round_state[j][2];
+        d = round_state[j][3];
+        round_state[j][0] = multiply(a, 0x0e) ^ multiply(b, 0x0b) ^
+                            multiply(c, 0x0d) ^ multiply(d, 0x09);
+        round_state[j][1] = multiply(a, 0x09) ^ multiply(b, 0x0e) ^
+                            multiply(c, 0x0b) ^ multiply(d, 0x0d);
+        round_state[j][2] = multiply(a, 0x0d) ^ multiply(b, 0x09) ^
+                            multiply(c, 0x0e) ^ multiply(d, 0x0b);
+        round_state[j][3] = multiply(a, 0x0b) ^ multiply(b, 0x0d) ^
+                            multiply(c, 0x09) ^ multiply(d, 0x0e);
+    }
+}
+
+static inline void aes_mix_cols(uint8_t round_state[4][4])
+{
+    uint8_t a, b;
+    for (int j = 0; j < 4; ++j) {
+        a = round_state[j][0];
+        b = round_state[j][0] ^ round_state[j][1] ^ round_state[j][2] ^
+            round_state[j][3];
+ round_state[j][0] ^= xtime(round_state[j][0] ^round_state[j][1]) ^ b;+ round_state[j][1] ^= xtime(round_state[j][1] ^round_state[j][2]) ^ b;+ round_state[j][2] ^= xtime(round_state[j][2] ^round_state[j][3]) ^ b;
+        round_state[j][3] ^= xtime(round_state[j][3] ^ a) ^ b;
+    }
+}
+
+#define GEN_ZVKNED_HELPER_VV(NAME,...) \+ void HELPER(NAME)(void *vd_vptr, void *vs2_vptr, CPURISCVState*env, \+ uint32_tdesc) \
+ { \
+ uint64_t *vd =vd_vptr; \+ uint64_t *vs2 =vs2_vptr; \+ uint32_t vl =env->vl; \+ uint32_t total_elems = vext_get_total_elems(env, desc,4); \+ uint32_t vta =vext_vta(desc); \
+ \
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++){ \+ uint64_t round_key[2] ={ \+ cpu_to_le64(vs2[i * 2 +0]), \+ cpu_to_le64(vs2[i * 2 +1]), \
+ };                                                            \
+ uint8_tround_state[4][4]; \+ cpu_to_le64s(vd + i * 2 +0); \+ cpu_to_le64s(vd + i * 2 +1); \+ for (int j = 0; j < 16; j++){ \+ round_state[j / 4][j % 4] = ((uint8_t *)(vd + i *2))[j]; \
+ }                                                             \
+ __VA_ARGS__;                                                  \
+ for (int j = 0; j < 16; j++){ \+ ((uint8_t *)(vd + i * 2))[j] = round_state[j / 4][j% 4]; \
+ }                                                             \
+ le64_to_cpus(vd + i * 2 +0); \+ le64_to_cpus(vd + i * 2 +1); \
+ } \
+ env->vstart =0; \+ /* set tail elements to 1s*/ \+ vext_set_elems_1s(vd, vta, vl * 4, total_elems *4); \
+    }
+
+#define GEN_ZVKNED_HELPER_VS(NAME,...) \+ void HELPER(NAME)(void *vd_vptr, void *vs2_vptr, CPURISCVState*env, \+ uint32_tdesc) \
+ { \
+ uint64_t *vd =vd_vptr; \+ uint64_t *vs2 =vs2_vptr; \+ uint32_t vl =env->vl; \+ uint32_t total_elems = vext_get_total_elems(env, desc,4); \+ uint32_t vta =vext_vta(desc); \
+ \
+ for (uint32_t i = env->vstart / 4; i < env->vl / 4; i++){ \+ uint64_t round_key[2] ={ \
+ cpu_to_le64(vs2[0]),                                      \
+ cpu_to_le64(vs2[1]),                                      \
+ };                                                            \
+ uint8_tround_state[4][4]; \+ cpu_to_le64s(vd + i * 2 +0); \+ cpu_to_le64s(vd + i * 2 +1); \+ for (int j = 0; j < 16; j++){ \+ round_state[j / 4][j % 4] = ((uint8_t *)(vd + i *2))[j]; \
+ }                                                             \
+ __VA_ARGS__;                                                  \
+ for (int j = 0; j < 16; j++){ \+ ((uint8_t *)(vd + i * 2))[j] = round_state[j / 4][j% 4]; \
+ }                                                             \
+ le64_to_cpus(vd + i * 2 +0); \+ le64_to_cpus(vd + i * 2 +1); \
+ } \
+ env->vstart =0; \+ /* set tail elements to 1s*/ \+ vext_set_elems_1s(vd, vta, vl * 4, total_elems *4); \
+    }
See
20230620110758.787479-1-richard.henderson@linaro.org/">https://lore.kernel.org/qemu-devel/20230620110758.787479-1-richard.henderson@linaro.org/
which should greatly simplify all of this.


r~

Thanks a lot.

I'll update the patch set for this.

With regards,
Max

[Prev in Thread]

Current Thread

[Next in Thread]

[PATCH v4 05/17] target/riscv: Move vector translation checks, (continued)
- [PATCH v4 05/17] target/riscv: Move vector translation checks, Max Chou, 2023/06/22
- [PATCH v4 07/17] target/riscv: Refactor some of the generic vector functionality, Max Chou, 2023/06/22
- [PATCH v4 08/17] tcg: Fix temporary variable in tcg_gen_gvec_andcs, Max Chou, 2023/06/22
  - Re: [PATCH v4 08/17] tcg: Fix temporary variable in tcg_gen_gvec_andcs, Daniel Henrique Barboza, 2023/06/22
    - Re: [PATCH v4 08/17] tcg: Fix temporary variable in tcg_gen_gvec_andcs, Richard Henderson, 2023/06/23
- [PATCH v4 09/17] target/riscv: Add Zvbb ISA extension support, Max Chou, 2023/06/22
  - Re: [PATCH v4 09/17] target/riscv: Add Zvbb ISA extension support, Daniel Henrique Barboza, 2023/06/22
- [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support, Max Chou, 2023/06/22
  - Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support, Daniel Henrique Barboza, 2023/06/22
  - Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support, Richard Henderson, 2023/06/23
    - Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support, Max Chou <=
- [PATCH v4 11/17] target/riscv: Add Zvknh ISA extension support, Max Chou, 2023/06/22
  - Re: [PATCH v4 11/17] target/riscv: Add Zvknh ISA extension support, Daniel Henrique Barboza, 2023/06/22
- [PATCH v4 12/17] target/riscv: Add Zvksh ISA extension support, Max Chou, 2023/06/22
  - Re: [PATCH v4 12/17] target/riscv: Add Zvksh ISA extension support, Daniel Henrique Barboza, 2023/06/22
- [PATCH v4 13/17] target/riscv: Add Zvkg ISA extension support, Max Chou, 2023/06/22
  - Re: [PATCH v4 13/17] target/riscv: Add Zvkg ISA extension support, Daniel Henrique Barboza, 2023/06/22
- [PATCH v4 14/17] crypto: Create sm4_subword, Max Chou, 2023/06/22
- [PATCH v4 15/17] crypto: Add SM4 constant parameter CK, Max Chou, 2023/06/22
- [PATCH v4 16/17] target/riscv: Add Zvksed ISA extension support, Max Chou, 2023/06/22
- [PATCH v4 17/17] target/riscv: Expose Zvk* and Zvb[b, c] cpu properties, Max Chou, 2023/06/22

Prev by Date: Re: [PATCH 08/26] scripts/oss-fuzz: add a suppression for keymap
Next by Date: Re: [PATCH v2 8/8] disas/riscv: Add support for XThead* instructions
Previous by thread: Re: [PATCH v4 10/17] target/riscv: Add Zvkned ISA extension support
Next by thread: [PATCH v4 11/17] target/riscv: Add Zvknh ISA extension support
Index(es):
- Date
- Thread