Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globa

qemu-s390x
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globa

From:	David Hildenbrand
Subject:	Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals
Date:	Mon, 4 Feb 2019 16:47:12 +0100
User-agent:	Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Thunderbird/60.4.0
On 04.02.19 16:44, David Hildenbrand wrote:
> As floating point registers overlay some vector registers and we want
> to make use of the general tcg_gvec infrastructure that assumes vectors
> are not stored in globals but in memory, don't model flaoting point

s/flaoting/floating/

> registers as globals anymore. This is than similar to how arm handles
> it.
> 
> Reading/writing a floating point register means reading/writing memory now.
> 
> Break up ugly in2_x2() handling that modifies both, in1 and in2 into
> in1_x2l and in2_x2h. This makes things more readable. Also, in1_x1() is

s/in1_x2l/in2_x2l/
s/in1_x2h/in2_x2h/


Sorry Conny, I assume when you pick this up, you can fix this up :)

> ugly as it touches out/out2, get rid of that and use prep_x1() instead.
> 
> As we are no longer able to use the original global variables for
> out/out2, we have to use new temporary variables and write from them to
> the target registers using wout_ helpers.
> 
> E.g. an instruction that reads and writes x1 will use
> - prep_x1 to get the values into out/out2
> - wout_x1 to write the values from out/out2
> This special handling is needed for x1 as it is often used along with
> other inputs, so in1/in2 is already used.
> 
> Reviewed-by: Richard Henderson <address@hidden>
> Signed-off-by: David Hildenbrand <address@hidden>
> ---
> 
> v1 -> v2:
> - renamed x2h -> x2l and x2l -> x2h to match the actual meaning
>   ("high" on s390x is where the smaller bit numbers are ;) )
> - did another quick sniff test
> 
>  target/s390x/insn-data.def | 150 ++++++++++++++++++-------------------
>  target/s390x/translate.c   | 135 ++++++++++++++++++++-------------
>  2 files changed, 160 insertions(+), 125 deletions(-)
> 
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 54e39df831..dab805fd90 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -33,10 +33,10 @@
>      C(0xe308, AG,      RXY_a, Z,   r1, m2_64, r1, 0, add, adds64)
>      C(0xe318, AGF,     RXY_a, Z,   r1, m2_32s, r1, 0, add, adds64)
>      F(0xb30a, AEBR,    RRE,   Z,   e1, e2, new, e1, aeb, f32, IF_BFP)
> -    F(0xb31a, ADBR,    RRE,   Z,   f1_o, f2_o, f1, 0, adb, f64, IF_BFP)
> -    F(0xb34a, AXBR,    RRE,   Z,   0, x2_o, x1, 0, axb, f128, IF_BFP)
> +    F(0xb31a, ADBR,    RRE,   Z,   f1, f2, new, f1, adb, f64, IF_BFP)
> +    F(0xb34a, AXBR,    RRE,   Z,   x2h, x2l, x1, x1, axb, f128, IF_BFP)
>      F(0xed0a, AEB,     RXE,   Z,   e1, m2_32u, new, e1, aeb, f32, IF_BFP)
> -    F(0xed1a, ADB,     RXE,   Z,   f1_o, m2_64, f1, 0, adb, f64, IF_BFP)
> +    F(0xed1a, ADB,     RXE,   Z,   f1, m2_64, new, f1, adb, f64, IF_BFP)
>  /* ADD HIGH */
>      C(0xb9c8, AHHHR,   RRF_a, HW,  r2_sr32, r3_sr32, new, r1_32h, add, 
> adds32)
>      C(0xb9d8, AHHLR,   RRF_a, HW,  r2_sr32, r3, new, r1_32h, add, adds32)
> @@ -154,7 +154,7 @@
>      C(0xb241, CKSM,    RRE,   Z,   r1_o, ra2, new, r1_32, cksm, 0)
>  
>  /* COPY SIGN */
> -    F(0xb372, CPSDR,   RRF_b, FPSSH, f3_o, f2_o, f1, 0, cps, 0, IF_AFP1 | 
> IF_AFP2 | IF_AFP3)
> +    F(0xb372, CPSDR,   RRF_b, FPSSH, f3, f2, new, f1, cps, 0, IF_AFP1 | 
> IF_AFP2 | IF_AFP3)
>  
>  /* COMPARE */
>      C(0x1900, CR,      RR_a,  Z,   r1_o, r2_o, 0, 0, 0, cmps32)
> @@ -165,16 +165,16 @@
>      C(0xe320, CG,      RXY_a, Z,   r1_o, m2_64, 0, 0, 0, cmps64)
>      C(0xe330, CGF,     RXY_a, Z,   r1_o, m2_32s, 0, 0, 0, cmps64)
>      F(0xb309, CEBR,    RRE,   Z,   e1, e2, 0, 0, ceb, 0, IF_BFP)
> -    F(0xb319, CDBR,    RRE,   Z,   f1_o, f2_o, 0, 0, cdb, 0, IF_BFP)
> -    F(0xb349, CXBR,    RRE,   Z,   x1_o, x2_o, 0, 0, cxb, 0, IF_BFP)
> +    F(0xb319, CDBR,    RRE,   Z,   f1, f2, 0, 0, cdb, 0, IF_BFP)
> +    F(0xb349, CXBR,    RRE,   Z,   x2h, x2l, x1, 0, cxb, 0, IF_BFP)
>      F(0xed09, CEB,     RXE,   Z,   e1, m2_32u, 0, 0, ceb, 0, IF_BFP)
> -    F(0xed19, CDB,     RXE,   Z,   f1_o, m2_64, 0, 0, cdb, 0, IF_BFP)
> +    F(0xed19, CDB,     RXE,   Z,   f1, m2_64, 0, 0, cdb, 0, IF_BFP)
>  /* COMPARE AND SIGNAL */
>      F(0xb308, KEBR,    RRE,   Z,   e1, e2, 0, 0, keb, 0, IF_BFP)
> -    F(0xb318, KDBR,    RRE,   Z,   f1_o, f2_o, 0, 0, kdb, 0, IF_BFP)
> -    F(0xb348, KXBR,    RRE,   Z,   x1_o, x2_o, 0, 0, kxb, 0, IF_BFP)
> +    F(0xb318, KDBR,    RRE,   Z,   f1, f2, 0, 0, kdb, 0, IF_BFP)
> +    F(0xb348, KXBR,    RRE,   Z,   x2h, x2l, x1, 0, kxb, 0, IF_BFP)
>      F(0xed08, KEB,     RXE,   Z,   e1, m2_32u, 0, 0, keb, 0, IF_BFP)
> -    F(0xed18, KDB,     RXE,   Z,   f1_o, m2_64, 0, 0, kdb, 0, IF_BFP)
> +    F(0xed18, KDB,     RXE,   Z,   f1, m2_64, 0, 0, kdb, 0, IF_BFP)
>  /* COMPARE IMMEDIATE */
>      C(0xc20d, CFI,     RIL_a, EI,  r1, i2, 0, 0, 0, cmps32)
>      C(0xc20c, CGFI,    RIL_a, EI,  r1, i2, 0, 0, 0, cmps64)
> @@ -292,32 +292,32 @@
>      C(0xe326, CVDY,    RXY_a, LD,  r1_o, a2, 0, 0, cvd, 0)
>  /* CONVERT TO FIXED */
>      F(0xb398, CFEBR,   RRF_e, Z,   0, e2, new, r1_32, cfeb, 0, IF_BFP)
> -    F(0xb399, CFDBR,   RRF_e, Z,   0, f2_o, new, r1_32, cfdb, 0, IF_BFP)
> -    F(0xb39a, CFXBR,   RRF_e, Z,   0, x2_o, new, r1_32, cfxb, 0, IF_BFP)
> +    F(0xb399, CFDBR,   RRF_e, Z,   0, f2, new, r1_32, cfdb, 0, IF_BFP)
> +    F(0xb39a, CFXBR,   RRF_e, Z,   x2h, x2l, new, r1_32, cfxb, 0, IF_BFP)
>      F(0xb3a8, CGEBR,   RRF_e, Z,   0, e2, r1, 0, cgeb, 0, IF_BFP)
> -    F(0xb3a9, CGDBR,   RRF_e, Z,   0, f2_o, r1, 0, cgdb, 0, IF_BFP)
> -    F(0xb3aa, CGXBR,   RRF_e, Z,   0, x2_o, r1, 0, cgxb, 0, IF_BFP)
> +    F(0xb3a9, CGDBR,   RRF_e, Z,   0, f2, r1, 0, cgdb, 0, IF_BFP)
> +    F(0xb3aa, CGXBR,   RRF_e, Z,   x2h, x2l, r1, 0, cgxb, 0, IF_BFP)
>  /* CONVERT FROM FIXED */
>      F(0xb394, CEFBR,   RRF_e, Z,   0, r2_32s, new, e1, cegb, 0, IF_BFP)
> -    F(0xb395, CDFBR,   RRF_e, Z,   0, r2_32s, f1, 0, cdgb, 0, IF_BFP)
> -    F(0xb396, CXFBR,   RRF_e, Z,   0, r2_32s, x1, 0, cxgb, 0, IF_BFP)
> +    F(0xb395, CDFBR,   RRF_e, Z,   0, r2_32s, new, f1, cdgb, 0, IF_BFP)
> +    F(0xb396, CXFBR,   RRF_e, Z,   0, r2_32s, new_P, x1, cxgb, 0, IF_BFP)
>      F(0xb3a4, CEGBR,   RRF_e, Z,   0, r2_o, new, e1, cegb, 0, IF_BFP)
> -    F(0xb3a5, CDGBR,   RRF_e, Z,   0, r2_o, f1, 0, cdgb, 0, IF_BFP)
> -    F(0xb3a6, CXGBR,   RRF_e, Z,   0, r2_o, x1, 0, cxgb, 0, IF_BFP)
> +    F(0xb3a5, CDGBR,   RRF_e, Z,   0, r2_o, new, f1, cdgb, 0, IF_BFP)
> +    F(0xb3a6, CXGBR,   RRF_e, Z,   0, r2_o, new_P, x1, cxgb, 0, IF_BFP)
>  /* CONVERT TO LOGICAL */
>      F(0xb39c, CLFEBR,  RRF_e, FPE, 0, e2, new, r1_32, clfeb, 0, IF_BFP)
> -    F(0xb39d, CLFDBR,  RRF_e, FPE, 0, f2_o, new, r1_32, clfdb, 0, IF_BFP)
> -    F(0xb39e, CLFXBR,  RRF_e, FPE, 0, x2_o, new, r1_32, clfxb, 0, IF_BFP)
> +    F(0xb39d, CLFDBR,  RRF_e, FPE, 0, f2, new, r1_32, clfdb, 0, IF_BFP)
> +    F(0xb39e, CLFXBR,  RRF_e, FPE, x2h, x2l, new, r1_32, clfxb, 0, IF_BFP)
>      F(0xb3ac, CLGEBR,  RRF_e, FPE, 0, e2, r1, 0, clgeb, 0, IF_BFP)
> -    F(0xb3ad, CLGDBR,  RRF_e, FPE, 0, f2_o, r1, 0, clgdb, 0, IF_BFP)
> -    F(0xb3ae, CLGXBR,  RRF_e, FPE, 0, x2_o, r1, 0, clgxb, 0, IF_BFP)
> +    F(0xb3ad, CLGDBR,  RRF_e, FPE, 0, f2, r1, 0, clgdb, 0, IF_BFP)
> +    F(0xb3ae, CLGXBR,  RRF_e, FPE, x2h, x2l, r1, 0, clgxb, 0, IF_BFP)
>  /* CONVERT FROM LOGICAL */
>      F(0xb390, CELFBR,  RRF_e, FPE, 0, r2_32u, new, e1, celgb, 0, IF_BFP)
> -    F(0xb391, CDLFBR,  RRF_e, FPE, 0, r2_32u, f1, 0, cdlgb, 0, IF_BFP)
> -    F(0xb392, CXLFBR,  RRF_e, FPE, 0, r2_32u, x1, 0, cxlgb, 0, IF_BFP)
> +    F(0xb391, CDLFBR,  RRF_e, FPE, 0, r2_32u, new, f1, cdlgb, 0, IF_BFP)
> +    F(0xb392, CXLFBR,  RRF_e, FPE, 0, r2_32u, new_P, x1, cxlgb, 0, IF_BFP)
>      F(0xb3a0, CELGBR,  RRF_e, FPE, 0, r2_o, new, e1, celgb, 0, IF_BFP)
> -    F(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0, IF_BFP)
> -    F(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0, IF_BFP)
> +    F(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, new, f1, cdlgb, 0, IF_BFP)
> +    F(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, new_P, x1, cxlgb, 0, IF_BFP)
>  
>  /* CONVERT UTF-8 TO UTF-16 */
>      D(0xb2a7, CU12,    RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 12)
> @@ -336,10 +336,10 @@
>      C(0x1d00, DR,      RR_a,  Z,   r1_D32, r2_32s, new_P, r1_P32, divs32, 0)
>      C(0x5d00, D,       RX_a,  Z,   r1_D32, m2_32s, new_P, r1_P32, divs32, 0)
>      F(0xb30d, DEBR,    RRE,   Z,   e1, e2, new, e1, deb, 0, IF_BFP)
> -    F(0xb31d, DDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, ddb, 0, IF_BFP)
> -    F(0xb34d, DXBR,    RRE,   Z,   0, x2_o, x1, 0, dxb, 0, IF_BFP)
> +    F(0xb31d, DDBR,    RRE,   Z,   f1, f2, new, f1, ddb, 0, IF_BFP)
> +    F(0xb34d, DXBR,    RRE,   Z,   x2h, x2l, x1, x1, dxb, 0, IF_BFP)
>      F(0xed0d, DEB,     RXE,   Z,   e1, m2_32u, new, e1, deb, 0, IF_BFP)
> -    F(0xed1d, DDB,     RXE,   Z,   f1_o, m2_64, f1, 0, ddb, 0, IF_BFP)
> +    F(0xed1d, DDB,     RXE,   Z,   f1, m2_64, new, f1, ddb, 0, IF_BFP)
>  /* DIVIDE LOGICAL */
>      C(0xb997, DLR,     RRE,   Z,   r1_D32, r2_32u, new_P, r1_P32, divu32, 0)
>      C(0xe397, DL,      RXY_a, Z,   r1_D32, m2_32u, new_P, r1_P32, divu32, 0)
> @@ -410,13 +410,13 @@
>      C(0xb914, LGFR,    RRE,   Z,   0, r2_32s, 0, r1, mov2, 0)
>      C(0xe304, LG,      RXY_a, Z,   0, a2, r1, 0, ld64, 0)
>      C(0xe314, LGF,     RXY_a, Z,   0, a2, r1, 0, ld32s, 0)
> -    F(0x2800, LDR,     RR_a,  Z,   0, f2_o, 0, f1, mov2, 0, IF_AFP1 | 
> IF_AFP2)
> +    F(0x2800, LDR,     RR_a,  Z,   0, f2, 0, f1, mov2, 0, IF_AFP1 | IF_AFP2)
>      F(0x6800, LD,      RX_a,  Z,   0, m2_64, 0, f1, mov2, 0, IF_AFP1)
>      F(0xed65, LDY,     RXY_a, LD,  0, m2_64, 0, f1, mov2, 0, IF_AFP1)
>      F(0x3800, LER,     RR_a,  Z,   0, e2, 0, cond_e1e2, mov2, 0, IF_AFP1 | 
> IF_AFP2)
>      F(0x7800, LE,      RX_a,  Z,   0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
>      F(0xed64, LEY,     RXY_a, LD,  0, m2_32u, 0, e1, mov2, 0, IF_AFP1)
> -    F(0xb365, LXR,     RRE,   Z,   0, x2_o, 0, x1, movx, 0, IF_AFP1)
> +    F(0xb365, LXR,     RRE,   Z,   x2h, x2l, 0, x1, movx, 0, IF_AFP1)
>  /* LOAD IMMEDIATE */
>      C(0xc001, LGFI,    RIL_a, EI,  0, i2, 0, r1, mov2, 0)
>  /* LOAD RELATIVE LONG */
> @@ -454,8 +454,8 @@
>      C(0xe302, LTG,     RXY_a, EI,  0, a2, r1, 0, ld64, s64)
>      C(0xe332, LTGF,    RXY_a, GIE, 0, a2, r1, 0, ld32s, s64)
>      F(0xb302, LTEBR,   RRE,   Z,   0, e2, 0, cond_e1e2, mov2, f32, IF_BFP)
> -    F(0xb312, LTDBR,   RRE,   Z,   0, f2_o, 0, f1, mov2, f64, IF_BFP)
> -    F(0xb342, LTXBR,   RRE,   Z,   0, x2_o, 0, x1, movx, f128, IF_BFP)
> +    F(0xb312, LTDBR,   RRE,   Z,   0, f2, 0, f1, mov2, f64, IF_BFP)
> +    F(0xb342, LTXBR,   RRE,   Z,   x2h, x2l, 0, x1, movx, f128, IF_BFP)
>  /* LOAD AND TRAP */
>      C(0xe39f, LAT,     RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0)
>      C(0xe385, LGAT,    RXY_a, LAT, 0, a2, r1, 0, lgat, 0)
> @@ -476,9 +476,9 @@
>      C(0xb903, LCGR,    RRE,   Z,   0, r2, r1, 0, neg, neg64)
>      C(0xb913, LCGFR,   RRE,   Z,   0, r2_32s, r1, 0, neg, neg64)
>      F(0xb303, LCEBR,   RRE,   Z,   0, e2, new, e1, negf32, f32, IF_BFP)
> -    F(0xb313, LCDBR,   RRE,   Z,   0, f2_o, f1, 0, negf64, f64, IF_BFP)
> -    F(0xb343, LCXBR,   RRE,   Z,   0, x2_o, x1, 0, negf128, f128, IF_BFP)
> -    F(0xb373, LCDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, negf64, 0, IF_AFP1 | 
> IF_AFP2)
> +    F(0xb313, LCDBR,   RRE,   Z,   0, f2, new, f1, negf64, f64, IF_BFP)
> +    F(0xb343, LCXBR,   RRE,   Z,   x2h, x2l, new_P, x1, negf128, f128, 
> IF_BFP)
> +    F(0xb373, LCDFR,   RRE,   FPSSH, 0, f2, new, f1, negf64, 0, IF_AFP1 | 
> IF_AFP2)
>  /* LOAD HALFWORD */
>      C(0xb927, LHR,     RRE,   EI,  0, r2_16s, 0, r1_32, mov2, 0)
>      C(0xb907, LGHR,    RRE,   EI,  0, r2_16s, 0, r1, mov2, 0)
> @@ -537,15 +537,15 @@
>  /* LOAD FPR FROM GR */
>      F(0xb3c1, LDGR,    RRE,   FPRGR, 0, r2_o, 0, f1, mov2, 0, IF_AFP1)
>  /* LOAD GR FROM FPR */
> -    F(0xb3cd, LGDR,    RRE,   FPRGR, 0, f2_o, 0, r1, mov2, 0, IF_AFP2)
> +    F(0xb3cd, LGDR,    RRE,   FPRGR, 0, f2, 0, r1, mov2, 0, IF_AFP2)
>  /* LOAD NEGATIVE */
>      C(0x1100, LNR,     RR_a,  Z,   0, r2_32s, new, r1_32, nabs, nabs32)
>      C(0xb901, LNGR,    RRE,   Z,   0, r2, r1, 0, nabs, nabs64)
>      C(0xb911, LNGFR,   RRE,   Z,   0, r2_32s, r1, 0, nabs, nabs64)
>      F(0xb301, LNEBR,   RRE,   Z,   0, e2, new, e1, nabsf32, f32, IF_BFP)
> -    F(0xb311, LNDBR,   RRE,   Z,   0, f2_o, f1, 0, nabsf64, f64, IF_BFP)
> -    F(0xb341, LNXBR,   RRE,   Z,   0, x2_o, x1, 0, nabsf128, f128, IF_BFP)
> -    F(0xb371, LNDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, nabsf64, 0, IF_AFP1 | 
> IF_AFP2)
> +    F(0xb311, LNDBR,   RRE,   Z,   0, f2, new, f1, nabsf64, f64, IF_BFP)
> +    F(0xb341, LNXBR,   RRE,   Z,   x2h, x2l, new_P, x1, nabsf128, f128, 
> IF_BFP)
> +    F(0xb371, LNDFR,   RRE,   FPSSH, 0, f2, new, f1, nabsf64, 0, IF_AFP1 | 
> IF_AFP2)
>  /* LOAD ON CONDITION */
>      C(0xb9f2, LOCR,    RRF_c, LOC, r1, r2, new, r1_32, loc, 0)
>      C(0xb9e2, LOCGR,   RRF_c, LOC, r1, r2, r1, 0, loc, 0)
> @@ -568,9 +568,9 @@
>      C(0xb900, LPGR,    RRE,   Z,   0, r2, r1, 0, abs, abs64)
>      C(0xb910, LPGFR,   RRE,   Z,   0, r2_32s, r1, 0, abs, abs64)
>      F(0xb300, LPEBR,   RRE,   Z,   0, e2, new, e1, absf32, f32, IF_BFP)
> -    F(0xb310, LPDBR,   RRE,   Z,   0, f2_o, f1, 0, absf64, f64, IF_BFP)
> -    F(0xb340, LPXBR,   RRE,   Z,   0, x2_o, x1, 0, absf128, f128, IF_BFP)
> -    F(0xb370, LPDFR,   RRE,   FPSSH, 0, f2_o, f1, 0, absf64, 0, IF_AFP1 | 
> IF_AFP2)
> +    F(0xb310, LPDBR,   RRE,   Z,   0, f2, new, f1, absf64, f64, IF_BFP)
> +    F(0xb340, LPXBR,   RRE,   Z,   x2h, x2l, new_P, x1, absf128, f128, 
> IF_BFP)
> +    F(0xb370, LPDFR,   RRE,   FPSSH, 0, f2, new, f1, absf64, 0, IF_AFP1 | 
> IF_AFP2)
>  /* LOAD REVERSED */
>      C(0xb91f, LRVR,    RRE,   Z,   0, r2_32u, new, r1_32, rev32, 0)
>      C(0xb90f, LRVGR,   RRE,   Z,   0, r2_o, r1, 0, rev64, 0)
> @@ -588,20 +588,20 @@
>      F(0xb2bd, LFAS,    S,     IEEEE_SIM, 0, m2_32u, 0, 0, sfas, 0, IF_DFP)
>  /* LOAD FP INTEGER */
>      F(0xb357, FIEBR,   RRF_e, Z,   0, e2, new, e1, fieb, 0, IF_BFP)
> -    F(0xb35f, FIDBR,   RRF_e, Z,   0, f2_o, f1, 0, fidb, 0, IF_BFP)
> -    F(0xb347, FIXBR,   RRF_e, Z,   0, x2_o, x1, 0, fixb, 0, IF_BFP)
> +    F(0xb35f, FIDBR,   RRF_e, Z,   0, f2, new, f1, fidb, 0, IF_BFP)
> +    F(0xb347, FIXBR,   RRF_e, Z,   x2h, x2l, new_P, x1, fixb, 0, IF_BFP)
>  
>  /* LOAD LENGTHENED */
> -    F(0xb304, LDEBR,   RRE,   Z,   0, e2, f1, 0, ldeb, 0, IF_BFP)
> -    F(0xb305, LXDBR,   RRE,   Z,   0, f2_o, x1, 0, lxdb, 0, IF_BFP)
> -    F(0xb306, LXEBR,   RRE,   Z,   0, e2, x1, 0, lxeb, 0, IF_BFP)
> -    F(0xed04, LDEB,    RXE,   Z,   0, m2_32u, f1, 0, ldeb, 0, IF_BFP)
> -    F(0xed05, LXDB,    RXE,   Z,   0, m2_64, x1, 0, lxdb, 0, IF_BFP)
> -    F(0xed06, LXEB,    RXE,   Z,   0, m2_32u, x1, 0, lxeb, 0, IF_BFP)
> +    F(0xb304, LDEBR,   RRE,   Z,   0, e2, new, f1, ldeb, 0, IF_BFP)
> +    F(0xb305, LXDBR,   RRE,   Z,   0, f2, new_P, x1, lxdb, 0, IF_BFP)
> +    F(0xb306, LXEBR,   RRE,   Z,   0, e2, new_P, x1, lxeb, 0, IF_BFP)
> +    F(0xed04, LDEB,    RXE,   Z,   0, m2_32u, new, f1, ldeb, 0, IF_BFP)
> +    F(0xed05, LXDB,    RXE,   Z,   0, m2_64, new_P, x1, lxdb, 0, IF_BFP)
> +    F(0xed06, LXEB,    RXE,   Z,   0, m2_32u, new_P, x1, lxeb, 0, IF_BFP)
>  /* LOAD ROUNDED */
> -    F(0xb344, LEDBR,   RRE,   Z,   0, f2_o, new, e1, ledb, 0, IF_BFP)
> -    F(0xb345, LDXBR,   RRE,   Z,   0, x2_o, f1, 0, ldxb, 0, IF_BFP)
> -    F(0xb346, LEXBR,   RRE,   Z,   0, x2_o, new, e1, lexb, 0, IF_BFP)
> +    F(0xb344, LEDBR,   RRE,   Z,   0, f2, new, e1, ledb, 0, IF_BFP)
> +    F(0xb345, LDXBR,   RRE,   Z,   x2h, x2l, new, f1, ldxb, 0, IF_BFP)
> +    F(0xb346, LEXBR,   RRE,   Z,   x2h, x2l, new, e1, lexb, 0, IF_BFP)
>  
>  /* LOAD MULTIPLE */
>      C(0x9800, LM,      RS_a,  Z,   0, a2, 0, 0, lm32, 0)
> @@ -648,14 +648,14 @@
>      C(0x5c00, M,       RX_a,  Z,   r1p1_32s, m2_32s, new, r1_D32, mul, 0)
>      C(0xe35c, MFY,     RXY_a, GIE, r1p1_32s, m2_32s, new, r1_D32, mul, 0)
>      F(0xb317, MEEBR,   RRE,   Z,   e1, e2, new, e1, meeb, 0, IF_BFP)
> -    F(0xb31c, MDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, mdb, 0, IF_BFP)
> -    F(0xb34c, MXBR,    RRE,   Z,   0, x2_o, x1, 0, mxb, 0, IF_BFP)
> -    F(0xb30c, MDEBR,   RRE,   Z,   f1_o, e2, f1, 0, mdeb, 0, IF_BFP)
> -    F(0xb307, MXDBR,   RRE,   Z,   0, f2_o, x1, 0, mxdb, 0, IF_BFP)
> +    F(0xb31c, MDBR,    RRE,   Z,   f1, f2, new, f1, mdb, 0, IF_BFP)
> +    F(0xb34c, MXBR,    RRE,   Z,   x2h, x2l, x1, x1, mxb, 0, IF_BFP)
> +    F(0xb30c, MDEBR,   RRE,   Z,   f1, e2, new, f1, mdeb, 0, IF_BFP)
> +    F(0xb307, MXDBR,   RRE,   Z,   0, f2, x1, x1, mxdb, 0, IF_BFP)
>      F(0xed17, MEEB,    RXE,   Z,   e1, m2_32u, new, e1, meeb, 0, IF_BFP)
> -    F(0xed1c, MDB,     RXE,   Z,   f1_o, m2_64, f1, 0, mdb, 0, IF_BFP)
> -    F(0xed0c, MDEB,    RXE,   Z,   f1_o, m2_32u, f1, 0, mdeb, 0, IF_BFP)
> -    F(0xed07, MXDB,    RXE,   Z,   0, m2_64, x1, 0, mxdb, 0, IF_BFP)
> +    F(0xed1c, MDB,     RXE,   Z,   f1, m2_64, new, f1, mdb, 0, IF_BFP)
> +    F(0xed0c, MDEB,    RXE,   Z,   f1, m2_32u, new, f1, mdeb, 0, IF_BFP)
> +    F(0xed07, MXDB,    RXE,   Z,   0, m2_64, x1, x1, mxdb, 0, IF_BFP)
>  /* MULTIPLY HALFWORD */
>      C(0x4c00, MH,      RX_a,  Z,   r1_o, m2_16s, new, r1_32, mul, 0)
>      C(0xe37c, MHY,     RXY_a, GIE, r1_o, m2_16s, new, r1_32, mul, 0)
> @@ -681,14 +681,14 @@
>  
>  /* MULTIPLY AND ADD */
>      F(0xb30e, MAEBR,   RRD,   Z,   e1, e2, new, e1, maeb, 0, IF_BFP)
> -    F(0xb31e, MADBR,   RRD,   Z,   f1_o, f2_o, f1, 0, madb, 0, IF_BFP)
> +    F(0xb31e, MADBR,   RRD,   Z,   f1, f2, new, f1, madb, 0, IF_BFP)
>      F(0xed0e, MAEB,    RXF,   Z,   e1, m2_32u, new, e1, maeb, 0, IF_BFP)
> -    F(0xed1e, MADB,    RXF,   Z,   f1_o, m2_64, f1, 0, madb, 0, IF_BFP)
> +    F(0xed1e, MADB,    RXF,   Z,   f1, m2_64, new, f1, madb, 0, IF_BFP)
>  /* MULTIPLY AND SUBTRACT */
>      F(0xb30f, MSEBR,   RRD,   Z,   e1, e2, new, e1, mseb, 0, IF_BFP)
> -    F(0xb31f, MSDBR,   RRD,   Z,   f1_o, f2_o, f1, 0, msdb, 0, IF_BFP)
> +    F(0xb31f, MSDBR,   RRD,   Z,   f1, f2, new, f1, msdb, 0, IF_BFP)
>      F(0xed0f, MSEB,    RXF,   Z,   e1, m2_32u, new, e1, mseb, 0, IF_BFP)
> -    F(0xed1f, MSDB,    RXF,   Z,   f1_o, m2_64, f1, 0, msdb, 0, IF_BFP)
> +    F(0xed1f, MSDB,    RXF,   Z,   f1, m2_64, new, f1, msdb, 0, IF_BFP)
>  
>  /* OR */
>      C(0x1600, OR,      RR_a,  Z,   r1, r2, new, r1_32, or, nz32)
> @@ -793,17 +793,17 @@
>  
>  /* SQUARE ROOT */
>      F(0xb314, SQEBR,   RRE,   Z,   0, e2, new, e1, sqeb, 0, IF_BFP)
> -    F(0xb315, SQDBR,   RRE,   Z,   0, f2_o, f1, 0, sqdb, 0, IF_BFP)
> -    F(0xb316, SQXBR,   RRE,   Z,   0, x2_o, x1, 0, sqxb, 0, IF_BFP)
> +    F(0xb315, SQDBR,   RRE,   Z,   0, f2, new, f1, sqdb, 0, IF_BFP)
> +    F(0xb316, SQXBR,   RRE,   Z,   x2h, x2l, new, x1, sqxb, 0, IF_BFP)
>      F(0xed14, SQEB,    RXE,   Z,   0, m2_32u, new, e1, sqeb, 0, IF_BFP)
> -    F(0xed15, SQDB,    RXE,   Z,   0, m2_64, f1, 0, sqdb, 0, IF_BFP)
> +    F(0xed15, SQDB,    RXE,   Z,   0, m2_64, new, f1, sqdb, 0, IF_BFP)
>  
>  /* STORE */
>      C(0x5000, ST,      RX_a,  Z,   r1_o, a2, 0, 0, st32, 0)
>      C(0xe350, STY,     RXY_a, LD,  r1_o, a2, 0, 0, st32, 0)
>      C(0xe324, STG,     RXY_a, Z,   r1_o, a2, 0, 0, st64, 0)
> -    F(0x6000, STD,     RX_a,  Z,   f1_o, a2, 0, 0, st64, 0, IF_AFP1)
> -    F(0xed67, STDY,    RXY_a, LD,  f1_o, a2, 0, 0, st64, 0, IF_AFP1)
> +    F(0x6000, STD,     RX_a,  Z,   f1, a2, 0, 0, st64, 0, IF_AFP1)
> +    F(0xed67, STDY,    RXY_a, LD,  f1, a2, 0, 0, st64, 0, IF_AFP1)
>      F(0x7000, STE,     RX_a,  Z,   e1, a2, 0, 0, st32, 0, IF_AFP1)
>      F(0xed66, STEY,    RXY_a, LD,  e1, a2, 0, 0, st32, 0, IF_AFP1)
>  /* STORE RELATIVE LONG */
> @@ -865,10 +865,10 @@
>      C(0xe309, SG,      RXY_a, Z,   r1, m2_64, r1, 0, sub, subs64)
>      C(0xe319, SGF,     RXY_a, Z,   r1, m2_32s, r1, 0, sub, subs64)
>      F(0xb30b, SEBR,    RRE,   Z,   e1, e2, new, e1, seb, f32, IF_BFP)
> -    F(0xb31b, SDBR,    RRE,   Z,   f1_o, f2_o, f1, 0, sdb, f64, IF_BFP)
> -    F(0xb34b, SXBR,    RRE,   Z,   0, x2_o, x1, 0, sxb, f128, IF_BFP)
> +    F(0xb31b, SDBR,    RRE,   Z,   f1, f2, new, f1, sdb, f64, IF_BFP)
> +    F(0xb34b, SXBR,    RRE,   Z,   x2h, x2l, x1, x1, sxb, f128, IF_BFP)
>      F(0xed0b, SEB,     RXE,   Z,   e1, m2_32u, new, e1, seb, f32, IF_BFP)
> -    F(0xed1b, SDB,     RXE,   Z,   f1_o, m2_64, f1, 0, sdb, f64, IF_BFP)
> +    F(0xed1b, SDB,     RXE,   Z,   f1, m2_64, new, f1, sdb, f64, IF_BFP)
>  /* SUBTRACT HALFWORD */
>      C(0x4b00, SH,      RX_a,  Z,   r1, m2_16s, new, r1_32, sub, subs32)
>      C(0xe37b, SHY,     RXY_a, LD,  r1, m2_16s, new, r1_32, sub, subs32)
> @@ -908,8 +908,8 @@
>  
>  /* TEST DATA CLASS */
>      F(0xed10, TCEB,    RXE,   Z,   e1, a2, 0, 0, tceb, 0, IF_BFP)
> -    F(0xed11, TCDB,    RXE,   Z,   f1_o, a2, 0, 0, tcdb, 0, IF_BFP)
> -    F(0xed12, TCXB,    RXE,   Z,   x1_o, a2, 0, 0, tcxb, 0, IF_BFP)
> +    F(0xed11, TCDB,    RXE,   Z,   f1, a2, 0, 0, tcdb, 0, IF_BFP)
> +    F(0xed12, TCXB,    RXE,   Z,   0, a2, x1, 0, tcxb, 0, IF_BFP)
>  
>  /* TEST DECIMAL */
>      C(0xebc0, TP,      RSL,   E2,  la1, 0, 0, 0, tp, 0)
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 6249c70d02..639084af07 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -111,9 +111,8 @@ static TCGv_i64 cc_src;
>  static TCGv_i64 cc_dst;
>  static TCGv_i64 cc_vr;
>  
> -static char cpu_reg_names[32][4];
> +static char cpu_reg_names[16][4];
>  static TCGv_i64 regs[16];
> -static TCGv_i64 fregs[16];
>  
>  void s390x_translate_init(void)
>  {
> @@ -144,13 +143,53 @@ void s390x_translate_init(void)
>                                       offsetof(CPUS390XState, regs[i]),
>                                       cpu_reg_names[i]);
>      }
> +}
>  
> -    for (i = 0; i < 16; i++) {
> -        snprintf(cpu_reg_names[i + 16], sizeof(cpu_reg_names[0]), "f%d", i);
> -        fregs[i] = tcg_global_mem_new(cpu_env,
> -                                      offsetof(CPUS390XState, vregs[i][0].d),
> -                                      cpu_reg_names[i + 16]);
> -    }
> +static inline int vec_reg_offset(uint8_t reg, uint8_t enr, TCGMemOp size)
> +{
> +    const uint8_t es = 1 << size;
> +    int offs = enr * es;
> +
> +    g_assert(reg < 32);
> +    /*
> +     * vregs[n][0] is the lowest 8 byte and vregs[n][1] the highest 8 byte
> +     * of the 16 byte vector, on both, little and big endian systems.
> +     *
> +     * Big Endian (target/possible host)
> +     * B:  [ 0][ 1][ 2][ 3][ 4][ 5][ 6][ 7] - [ 8][ 
> 9][10][11][12][13][14][15]
> +     * HW: [     0][     1][     2][     3] - [     4][     5][     6][     
> 7]
> +     * W:  [             0][             1] - [             2][             
> 3]
> +     * DW: [                             0] - [                             
> 1]
> +     *
> +     * Little Endian (possible host)
> +     * B:  [ 7][ 6][ 5][ 4][ 3][ 2][ 1][ 0] - [15][14][13][12][11][10][ 9][ 
> 8]
> +     * HW: [     3][     2][     1][     0] - [     7][     6][     5][     
> 4]
> +     * W:  [             1][             0] - [             3][             
> 2]
> +     * DW: [                             0] - [                             
> 1]
> +     *
> +     * For 16 byte elements, the two 8 byte halves will not form a host
> +     * int128 if the host is little endian, since they're in the wrong order.
> +     * Some operations (e.g. xor) do not care. For operations like addition,
> +     * the two 8 byte elements have to be loaded separately. Let's force all
> +     * 16 byte operations to handle it in a special way.
> +     */
> +    g_assert(size <= MO_64);
> +#ifndef HOST_WORDS_BIGENDIAN
> +    offs ^= (8 - es);
> +#endif
> +    return offs + offsetof(CPUS390XState, vregs[reg][0].d);
> +}
> +
> +static inline int freg64_offset(uint8_t reg)
> +{
> +    g_assert(reg < 16);
> +    return vec_reg_offset(reg, 0, MO_64);
> +}
> +
> +static inline int freg32_offset(uint8_t reg)
> +{
> +    g_assert(reg < 16);
> +    return vec_reg_offset(reg, 0, MO_32);
>  }
>  
>  static TCGv_i64 load_reg(int reg)
> @@ -160,10 +199,19 @@ static TCGv_i64 load_reg(int reg)
>      return r;
>  }
>  
> +static TCGv_i64 load_freg(int reg)
> +{
> +    TCGv_i64 r = tcg_temp_new_i64();
> +
> +    tcg_gen_ld_i64(r, cpu_env, freg64_offset(reg));
> +    return r;
> +}
> +
>  static TCGv_i64 load_freg32_i64(int reg)
>  {
>      TCGv_i64 r = tcg_temp_new_i64();
> -    tcg_gen_shri_i64(r, fregs[reg], 32);
> +
> +    tcg_gen_ld32u_i64(r, cpu_env, freg32_offset(reg));
>      return r;
>  }
>  
> @@ -174,7 +222,7 @@ static void store_reg(int reg, TCGv_i64 v)
>  
>  static void store_freg(int reg, TCGv_i64 v)
>  {
> -    tcg_gen_mov_i64(fregs[reg], v);
> +    tcg_gen_st_i64(v, cpu_env, freg64_offset(reg));
>  }
>  
>  static void store_reg32_i64(int reg, TCGv_i64 v)
> @@ -190,7 +238,7 @@ static void store_reg32h_i64(int reg, TCGv_i64 v)
>  
>  static void store_freg32_i64(int reg, TCGv_i64 v)
>  {
> -    tcg_gen_deposit_i64(fregs[reg], fregs[reg], v, 32, 32);
> +    tcg_gen_st32_i64(v, cpu_env, freg32_offset(reg));
>  }
>  
>  static void return_low128(TCGv_i64 dest)
> @@ -3325,8 +3373,9 @@ static DisasJumpType op_maeb(DisasContext *s, DisasOps 
> *o)
>  
>  static DisasJumpType op_madb(DisasContext *s, DisasOps *o)
>  {
> -    int r3 = get_field(s->fields, r3);
> -    gen_helper_madb(o->out, cpu_env, o->in1, o->in2, fregs[r3]);
> +    TCGv_i64 r3 = load_freg(get_field(s->fields, r3));
> +    gen_helper_madb(o->out, cpu_env, o->in1, o->in2, r3);
> +    tcg_temp_free_i64(r3);
>      return DISAS_NEXT;
>  }
>  
> @@ -3340,8 +3389,9 @@ static DisasJumpType op_mseb(DisasContext *s, DisasOps 
> *o)
>  
>  static DisasJumpType op_msdb(DisasContext *s, DisasOps *o)
>  {
> -    int r3 = get_field(s->fields, r3);
> -    gen_helper_msdb(o->out, cpu_env, o->in1, o->in2, fregs[r3]);
> +    TCGv_i64 r3 = load_freg(get_field(s->fields, r3));
> +    gen_helper_msdb(o->out, cpu_env, o->in1, o->in2, r3);
> +    tcg_temp_free_i64(r3);
>      return DISAS_NEXT;
>  }
>  
> @@ -5085,19 +5135,11 @@ static void prep_r1_P(DisasContext *s, DisasFields 
> *f, DisasOps *o)
>  }
>  #define SPEC_prep_r1_P SPEC_r1_even
>  
> -static void prep_f1(DisasContext *s, DisasFields *f, DisasOps *o)
> -{
> -    o->out = fregs[get_field(f, r1)];
> -    o->g_out = true;
> -}
> -#define SPEC_prep_f1 0
> -
> +/* Whenever we need x1 in addition to other inputs, we'll load it to 
> out/out2 */
>  static void prep_x1(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> -    int r1 = get_field(f, r1);
> -    o->out = fregs[r1];
> -    o->out2 = fregs[r1 + 2];
> -    o->g_out = o->g_out2 = true;
> +    o->out = load_freg(get_field(f, r1));
> +    o->out2 = load_freg(get_field(f, r1) + 2);
>  }
>  #define SPEC_prep_x1 SPEC_r1_f128
>  
> @@ -5393,28 +5435,24 @@ static void in1_e1(DisasContext *s, DisasFields *f, 
> DisasOps *o)
>  }
>  #define SPEC_in1_e1 0
>  
> -static void in1_f1_o(DisasContext *s, DisasFields *f, DisasOps *o)
> +static void in1_f1(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> -    o->in1 = fregs[get_field(f, r1)];
> -    o->g_in1 = true;
> +    o->in1 = load_freg(get_field(f, r1));
>  }
> -#define SPEC_in1_f1_o 0
> +#define SPEC_in1_f1 0
>  
> -static void in1_x1_o(DisasContext *s, DisasFields *f, DisasOps *o)
> +/* Load the high double word of an extended (128-bit) format FP number */
> +static void in1_x2h(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> -    int r1 = get_field(f, r1);
> -    o->out = fregs[r1];
> -    o->out2 = fregs[r1 + 2];
> -    o->g_out = o->g_out2 = true;
> +    o->in1 = load_freg(get_field(f, r2));
>  }
> -#define SPEC_in1_x1_o SPEC_r1_f128
> +#define SPEC_in1_x2h SPEC_r2_f128
>  
> -static void in1_f3_o(DisasContext *s, DisasFields *f, DisasOps *o)
> +static void in1_f3(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> -    o->in1 = fregs[get_field(f, r3)];
> -    o->g_in1 = true;
> +    o->in1 = load_freg(get_field(f, r3));
>  }
> -#define SPEC_in1_f3_o 0
> +#define SPEC_in1_f3 0
>  
>  static void in1_la1(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> @@ -5599,21 +5637,18 @@ static void in2_e2(DisasContext *s, DisasFields *f, 
> DisasOps *o)
>  }
>  #define SPEC_in2_e2 0
>  
> -static void in2_f2_o(DisasContext *s, DisasFields *f, DisasOps *o)
> +static void in2_f2(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> -    o->in2 = fregs[get_field(f, r2)];
> -    o->g_in2 = true;
> +    o->in2 = load_freg(get_field(f, r2));
>  }
> -#define SPEC_in2_f2_o 0
> +#define SPEC_in2_f2 0
>  
> -static void in2_x2_o(DisasContext *s, DisasFields *f, DisasOps *o)
> +/* Load the low double word of an extended (128-bit) format FP number */
> +static void in2_x2l(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> -    int r2 = get_field(f, r2);
> -    o->in1 = fregs[r2];
> -    o->in2 = fregs[r2 + 2];
> -    o->g_in1 = o->g_in2 = true;
> +    o->in2 = load_freg(get_field(f, r2) + 2);
>  }
> -#define SPEC_in2_x2_o SPEC_r2_f128
> +#define SPEC_in2_x2l SPEC_r2_f128
>  
>  static void in2_ra2(DisasContext *s, DisasFields *f, DisasOps *o)
>  {
> 


-- 

Thanks,

David / dhildenb
[Prev in Thread]
Current Thread
[Next in Thread]
[qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals, David Hildenbrand, 2019/02/04
- Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals, David Hildenbrand <=
  - Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals, Cornelia Huck, 2019/02/04
    - Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals, David Hildenbrand, 2019/02/04
- Re: [qemu-s390x] [Qemu-devel] [PATCH v2] s390x/tcg: Don't model FP registers as globals, no-reply, 2019/02/04
  - Re: [qemu-s390x] [Qemu-devel] [PATCH v2] s390x/tcg: Don't model FP registers as globals, David Hildenbrand, 2019/02/04
    - Re: [qemu-s390x] [Qemu-devel] [PATCH v2] s390x/tcg: Don't model FP registers as globals, Cornelia Huck, 2019/02/04
- Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals, Cornelia Huck, 2019/02/04
Prev by Date: Re: [qemu-s390x] [PATCH v3 1/6] vfio-ccw: make it safe to access channel programs
Next by Date: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals
Previous by thread: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals
Next by thread: Re: [qemu-s390x] [PATCH v2] s390x/tcg: Don't model FP registers as globals
Index(es):
- Date
- Thread