qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2] target/arm/arch_dump: Add SVE notes


From: Andrew Jones
Subject: Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
Date: Wed, 16 Oct 2019 11:13:14 +0200
User-agent: NeoMutt/20180716

On Thu, Oct 10, 2019 at 01:33:02PM -0400, Richard Henderson wrote:
> On 10/10/19 2:16 AM, Andrew Jones wrote:
> >> It might be best to avoid the ifdef altogether:
> >>
> >>     for (i = 0; i < 32; ++i) {
> >>         uint64_t *d = (uint64_t *)&buf[sve_zreg_offset(vq, i)];
> >>         for (j = 0; j < vq * 2; ++j) {
> >>             d[j] = cpu_to_le64(env->vfp.zregs[i].d[j]);
> >>         }
> >>     }
> >>
> >> The compiler may well transform the inner loop to memcpy for little-endian
> >> host, but even if it doesn't core dumping is hardly performance sensitive.
> > 
> > True. I even had something like the above at first, but then
> > overcomplicated it with the #ifdef-ing.
> 
> Ah, I wonder if you changed things around with the ifdefs due to the pregs.
> There's no trivial solution for those.  It'd be nice to share the bswapping
> subroutine that you add in the SVE KVM patch set, and size the temporary array
> using ARM_MAX_VQ.

How about something like this squashed in?

Thanks,
drew

diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index b02e398430b9..7c7fd23f4d94 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -182,6 +182,7 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
     struct aarch64_note *note;
     ARMCPU *cpu = env_archcpu(env);
     uint32_t vq = sve_current_vq(env);
+    uint64_t tmp[ARM_MAX_VQ * 2], *r;
     uint32_t fpr;
     uint8_t *buf;
     int ret, i;
@@ -198,31 +199,14 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction 
f,
     note->sve.flags = cpu_to_dump16(s, 1);
 
     for (i = 0; i < 32; ++i) {
-#ifdef HOST_WORDS_BIGENDIAN
-        uint64_t d[vq * 2];
-        int j;
-
-        for (j = 0; j < vq * 2; ++j) {
-            d[j] = bswap64(env->vfp.zregs[i].d[j]);
-        }
-#else
-        uint64_t *d = &env->vfp.zregs[i].d[0];
-#endif
-        memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16);
+        r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2);
+        memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16);
     }
 
     for (i = 0; i < 17; ++i) {
-#ifdef HOST_WORDS_BIGENDIAN
-        uint64_t d[DIV_ROUND_UP(vq * 2, 8)];
-        int j;
-
-        for (j = 0; j < DIV_ROUND_UP(vq * 2, 8); ++j) {
-            d[j] = bswap64(env->vfp.pregs[i].p[j]);
-        }
-#else
-        uint64_t *d = &env->vfp.pregs[i].p[0];
-#endif
-        memcpy(&buf[sve_preg_offset(vq, i)], &d[0], vq * 16 / 8);
+        r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0],
+                        DIV_ROUND_UP(vq * 2, 8));
+        memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8);
     }
 
     fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 5b9c3e4cd73d..b3092e5213e6 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
 void aarch64_sve_change_el(CPUARMState *env, int old_el,
                            int new_el, bool el0_a64);
 void aarch64_add_sve_properties(Object *obj);
+
+/*
+ * SVE registers are encoded in KVM's memory in an endianness-invariant format.
+ * The byte at offset i from the start of the in-memory representation contains
+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
+ * lowest offsets are stored in the lowest memory addresses, then that nearly
+ * matches QEMU's representation, which is to use an array of host-endian
+ * uint64_t's, where the lower offsets are at the lower indices. To complete
+ * the translation we just need to byte swap the uint64_t's on big-endian 
hosts.
+ */
+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+    int i;
+
+    for (i = 0; i < nr; ++i) {
+        dst[i] = bswap64(src[i]);
+    }
+
+    return dst;
+#else
+    return src;
+#endif
+}
+
 #else
 static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
 static inline void aarch64_sve_change_el(CPUARMState *env, int o,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 876184b8fe4d..e2da756e65ed 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -876,30 +876,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs)
     return 0;
 }
 
-/*
- * SVE registers are encoded in KVM's memory in an endianness-invariant format.
- * The byte at offset i from the start of the in-memory representation contains
- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
- * lowest offsets are stored in the lowest memory addresses, then that nearly
- * matches QEMU's representation, which is to use an array of host-endian
- * uint64_t's, where the lower offsets are at the lower indices. To complete
- * the translation we just need to byte swap the uint64_t's on big-endian 
hosts.
- */
-static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
-{
-#ifdef HOST_WORDS_BIGENDIAN
-    int i;
-
-    for (i = 0; i < nr; ++i) {
-        dst[i] = bswap64(src[i]);
-    }
-
-    return dst;
-#else
-    return src;
-#endif
-}
-
 /*
  * KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
  * and PREGS and the FFR have a slice size of 256 bits. However we simply hard



reply via email to

[Prev in Thread] Current Thread [Next in Thread]