tinycc-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Tinycc-devel] Re : Re: Re : Some questions regarding of TCC's optim


From: Domingo Alvarez Duarte
Subject: Re: [Tinycc-devel] Re : Re: Re : Some questions regarding of TCC's optimizations.
Date: Wed, 6 Apr 2022 20:34:07 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.7.0

Hello Brian !

I just applied you changes to my fork of tcc made fully reentrant here https://github.com/mingodad/tinycc and tested with the script shown bellow building sqlite3 with tcc and gcc -O0.

The resulting sqlite3 was tested creating a database from 150MB of sql and the resulting databases were the same (sqlite3-tcc/sqlite3-gcc) then the final test shown here was done with a memory database instead of disk.

Result:

====

bin                    old             new           diff %reduction (old/new)
---                     ---               --- ----          ----------
sqlite3-tcc        1449540    1412676    36864    2.60
sqlite3-gcc       1408080    1408080    0    0

====

Script to run the test:

====

sqlh=$HOME/dev/dadbiz++/third-party/dad/sqlite3-orig
showCPUSpeed() {
    cat /proc/cpuinfo | grep '^[c]pu MHz'
}

doIt() {
showCPUSpeed
echo compiling sqlite3 with tcc
/usr/bin/time ./tcc -o sqlite3-tcc $sqlh/sqlite3.c $sqlh/shell.c -lm -lpthread -ldl
ls -l sqlite3-tcc
#dbsql=$HOME/dev/AMPL/dad/sql/y-mod-sql/y103-hard-dat.db.sql
#dbsql=$HOME/dev/SquiLu/db-api/ourbiz.db.sql
dbsql=$HOME/dev/SquiLu/db-api/companies_uk_RG.db.sql
dbtcc=sdb-tcc.db
#rm $dbtcc
showCPUSpeed
#/usr/bin/time ./sqlite3-tcc $dbtcc  < $dbsql
echo running sqlite3-tcc
/usr/bin/time ./sqlite3-tcc  < $dbsql

showCPUSpeed
echo compiling sqlite3 with gcc
/usr/bin/time gcc -O0 -o sqlite3-gcc $sqlh/sqlite3.c $sqlh/shell.c -lm -lpthread -ldl
ls -l sqlite3-gcc
db0=sdb-gcc.db
showCPUSpeed
#rm $db0
#/usr/bin/time ./sqlite3-gcc $db0  < $dbsql
echo running sqlite3-gcc
/usr/bin/time ./sqlite3-gcc  < $dbsql
showCPUSpeed
}
doIt
doIt

====

Output before changes:

====

cpu MHz        : 1043.890
cpu MHz        : 955.076
cpu MHz        : 1368.369
cpu MHz        : 1202.189
cpu MHz        : 997.435
cpu MHz        : 1118.064
cpu MHz        : 1330.976
cpu MHz        : 972.875
compiling sqlite3 with tcc
0.10user 0.01system 0:00.11elapsed 100%CPU (0avgtext+0avgdata 15840maxresident)k
0inputs+2832outputs (0major+3916minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1449540 abr  6 20:08 sqlite3-tcc
cpu MHz        : 2776.711
cpu MHz        : 2657.025
cpu MHz        : 3145.106
cpu MHz        : 3028.188
cpu MHz        : 3135.120
cpu MHz        : 2681.011
cpu MHz        : 2891.677
cpu MHz        : 2840.370
running sqlite3-tcc
12.10user 0.12system 0:12.23elapsed 99%CPU (0avgtext+0avgdata 180488maxresident)k
0inputs+2976outputs (0major+44895minor)pagefaults 0swaps
cpu MHz        : 2712.282
cpu MHz        : 2700.523
cpu MHz        : 2700.011
cpu MHz        : 2706.670
cpu MHz        : 2699.597
cpu MHz        : 2699.745
cpu MHz        : 2699.178
cpu MHz        : 2699.843
compiling sqlite3 with gcc
4.00user 0.15system 0:04.16elapsed 100%CPU (0avgtext+0avgdata 234052maxresident)k
0inputs+2752outputs (0major+87170minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr  6 20:09 sqlite3-gcc
cpu MHz        : 2978.379
cpu MHz        : 2820.748
cpu MHz        : 2880.415
cpu MHz        : 2804.540
cpu MHz        : 2853.414
cpu MHz        : 2834.585
cpu MHz        : 2811.112
cpu MHz        : 2841.551
running sqlite3-gcc
10.86user 0.06system 0:10.93elapsed 100%CPU (0avgtext+0avgdata 179924maxresident)k
0inputs+2976outputs (0major+44880minor)pagefaults 0swaps
cpu MHz        : 2700.364
cpu MHz        : 2700.918
cpu MHz        : 2699.985
cpu MHz        : 2700.143
cpu MHz        : 2699.869
cpu MHz        : 2700.273
cpu MHz        : 2700.024
cpu MHz        : 2699.606
cpu MHz        : 2700.364
cpu MHz        : 2700.918
cpu MHz        : 2699.985
cpu MHz        : 2700.143
cpu MHz        : 2699.869
cpu MHz        : 2700.273
cpu MHz        : 2700.024
cpu MHz        : 2699.606
compiling sqlite3 with tcc
0.10user 0.00system 0:00.11elapsed 100%CPU (0avgtext+0avgdata 15952maxresident)k
0inputs+2832outputs (0major+3917minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1449540 abr  6 20:09 sqlite3-tcc
cpu MHz        : 3143.321
cpu MHz        : 2982.310
cpu MHz        : 3187.922
cpu MHz        : 2955.090
cpu MHz        : 2770.887
cpu MHz        : 2942.430
cpu MHz        : 3012.071
cpu MHz        : 3283.026
running sqlite3-tcc
12.04user 0.13system 0:12.18elapsed 99%CPU (0avgtext+0avgdata 180600maxresident)k
0inputs+2976outputs (0major+44895minor)pagefaults 0swaps
cpu MHz        : 2700.171
cpu MHz        : 2700.743
cpu MHz        : 2700.004
cpu MHz        : 2700.256
cpu MHz        : 2700.178
cpu MHz        : 2700.150
cpu MHz        : 2699.892
cpu MHz        : 2697.367
compiling sqlite3 with gcc
4.02user 0.13system 0:04.16elapsed 99%CPU (0avgtext+0avgdata 234080maxresident)k
0inputs+2752outputs (0major+87125minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr  6 20:09 sqlite3-gcc
cpu MHz        : 2799.667
cpu MHz        : 2915.447
cpu MHz        : 2942.032
cpu MHz        : 2726.340
cpu MHz        : 2654.825
cpu MHz        : 2850.573
cpu MHz        : 2919.901
cpu MHz        : 2873.281
running sqlite3-gcc
10.74user 0.10system 0:10.85elapsed 99%CPU (0avgtext+0avgdata 179868maxresident)k
0inputs+2976outputs (0major+44884minor)pagefaults 0swaps
cpu MHz        : 2700.370
cpu MHz        : 2699.943
cpu MHz        : 2700.177
cpu MHz        : 2700.000
cpu MHz        : 2700.023
cpu MHz        : 2699.778
cpu MHz        : 2699.937
cpu MHz        : 2700.448
====

Output after changes:

====

cpu MHz        : 1924.742
cpu MHz        : 993.702
cpu MHz        : 1634.113
cpu MHz        : 1038.054
cpu MHz        : 1496.319
cpu MHz        : 1681.467
cpu MHz        : 2188.883
cpu MHz        : 945.493
compiling sqlite3 with tcc
0.09user 0.02system 0:00.11elapsed 100%CPU (0avgtext+0avgdata 15800maxresident)k
0inputs+2760outputs (0major+3918minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1412676 abr  6 20:11 sqlite3-tcc
cpu MHz        : 3189.895
cpu MHz        : 2801.464
cpu MHz        : 3225.923
cpu MHz        : 3189.898
cpu MHz        : 2968.607
cpu MHz        : 2662.467
cpu MHz        : 2962.322
cpu MHz        : 2780.141
running sqlite3-tcc
11.97user 0.13system 0:12.10elapsed 99%CPU (0avgtext+0avgdata 180468maxresident)k
0inputs+2976outputs (0major+44892minor)pagefaults 0swaps
cpu MHz        : 2699.884
cpu MHz        : 2700.476
cpu MHz        : 2699.983
cpu MHz        : 2703.341
cpu MHz        : 2700.095
cpu MHz        : 2700.744
cpu MHz        : 2699.986
cpu MHz        : 2700.232
compiling sqlite3 with gcc
4.03user 0.10system 0:04.13elapsed 100%CPU (0avgtext+0avgdata 234000maxresident)k
0inputs+2752outputs (0major+87143minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr  6 20:11 sqlite3-gcc
cpu MHz        : 3063.076
cpu MHz        : 2904.942
cpu MHz        : 2930.517
cpu MHz        : 2914.045
cpu MHz        : 2957.797
cpu MHz        : 2916.358
cpu MHz        : 2907.853
cpu MHz        : 2933.959
running sqlite3-gcc
10.78user 0.10system 0:10.90elapsed 99%CPU (0avgtext+0avgdata 179872maxresident)k
0inputs+2976outputs (0major+44881minor)pagefaults 0swaps
cpu MHz        : 2701.588
cpu MHz        : 2700.007
cpu MHz        : 2701.630
cpu MHz        : 2699.928
cpu MHz        : 2700.337
cpu MHz        : 2699.619
cpu MHz        : 2699.824
cpu MHz        : 2700.400
cpu MHz        : 2701.588
cpu MHz        : 2700.007
cpu MHz        : 2701.630
cpu MHz        : 2699.928
cpu MHz        : 2700.337
cpu MHz        : 2699.619
cpu MHz        : 2699.824
cpu MHz        : 2700.400
compiling sqlite3 with tcc
0.10user 0.00system 0:00.11elapsed 99%CPU (0avgtext+0avgdata 15752maxresident)k
0inputs+2760outputs (0major+3916minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1412676 abr  6 20:11 sqlite3-tcc
cpu MHz        : 3207.638
cpu MHz        : 3100.938
cpu MHz        : 2931.666
cpu MHz        : 3233.458
cpu MHz        : 3143.723
cpu MHz        : 3139.950
cpu MHz        : 3027.417
cpu MHz        : 3045.425
running sqlite3-tcc
11.84user 0.12system 0:11.96elapsed 99%CPU (0avgtext+0avgdata 180432maxresident)k
0inputs+2976outputs (0major+44893minor)pagefaults 0swaps
cpu MHz        : 2700.060
cpu MHz        : 2700.690
cpu MHz        : 2700.411
cpu MHz        : 2701.015
cpu MHz        : 2700.212
cpu MHz        : 2699.936
cpu MHz        : 2700.098
cpu MHz        : 2700.684
compiling sqlite3 with gcc
4.00user 0.15system 0:04.16elapsed 100%CPU (0avgtext+0avgdata 234012maxresident)k
0inputs+2752outputs (0major+87185minor)pagefaults 0swaps
-rwxrwxr-x 1 mingo mingo 1408080 abr  6 20:12 sqlite3-gcc
cpu MHz        : 2785.807
cpu MHz        : 2720.240
cpu MHz        : 2799.280
cpu MHz        : 2784.467
cpu MHz        : 2724.321
cpu MHz        : 2907.124
cpu MHz        : 2757.591
cpu MHz        : 2718.077
running sqlite3-gcc
10.82user 0.12system 0:10.94elapsed 99%CPU (0avgtext+0avgdata 179916maxresident)k
0inputs+2976outputs (0major+44885minor)pagefaults 0swaps
cpu MHz        : 2699.992
cpu MHz        : 2700.192
cpu MHz        : 2699.976
cpu MHz        : 2701.052
cpu MHz        : 2699.528
cpu MHz        : 2700.650
cpu MHz        : 2700.131
cpu MHz        : 2700.006
====

Cheers !

On 6/4/22 18:06, Brian Callahan wrote:
Seeing as I've had all positive feedback on this, here's a more complete
diff that I think is suitable for committing.

It does the following:
1. Converts movl $0, %e{ax,cx,dx,sp,si,di} to xorl
%e{ax,cx,dx,sp,si,di}, %e{ax,cx,dx,sp,si,di}

2. Converts movq $0, %r{ax,cx,dx,sp,si,di} to xorl
%e{ax,cx,dx,sp,si,di}, %e{ax,cx,dx,sp,si,di}

There are two places where these idioms can be emitted, so it handles
both cases.

Here are some before and after .text size numbers:

bin             old     new     diff    %reduction
---             ---     ---     ----    ----------
tcc             328786  321358  7428    2.26
libtcc.a        307288  300252  7036    2.29
bcheck.o        23254   22801   453     1.95
bt-exe.o        4732    4550    182     3.85
bt-log.o        648     639     9       1.39
libtcc1.a       12678   12119   559     4.41

There is no change in compilation speed as far as I can measure.

There is an additional third location where a mov $0, %eax can be
emitted. It's in the form:
mov $0, %eax
jmp eb 05
mov $1, %eax
-or-
mov $1, %eax
jmp eb 05
mov $0, %eax

I could not find where this was happening, and the one place that looks
like it would be the place seems not to be. I don't think it impedes the
review and committing of this diff. And it'll give me something to do on
a rainy day if no one else beats me to it :)

At this point, I'd like any feedback on the diff below and/or
encouragement to commit it to mob.

Thanks.

~Brian

diff --git a/x86_64-gen.c b/x86_64-gen.c
index 81ec5d9..5085a0a 100644
--- a/x86_64-gen.c
+++ b/x86_64-gen.c
@@ -483,11 +483,21 @@ void load(int r, SValue *sv)
                  }
  #endif
              } else if (is64_type(ft)) {
-                orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
-                gen_le64(sv->c.i);
+                if (sv->c.i == 0 && r < 8) {
+                    o(0x31); /* xor r, r */
+                    o(0xc0 + REG_VALUE(r) * 9);
+                } else {
+                    orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
+                    gen_le64(sv->c.i);
+                }
              } else {
-                orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
-                gen_le32(fc);
+                if (fc == 0 && r < 8) {
+                    o(0x31); /* xor r, r */
+                    o(0xc0 + REG_VALUE(r) * 9);
+                } else {
+                    orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */
+                    gen_le32(fc);
+                }
              }
          } else if (v == VT_LOCAL) {
              orex(1,0,r,0x8d); /* lea xxx(%ebp), r */
@@ -1422,8 +1432,12 @@ void gfunc_call(int nb_args)
          }
      }

-    if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or
FUNC_ELLIPSIS */
-        oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov
nb_sse_args, %eax */
+    if (vtop->type.ref->f.func_type != FUNC_NEW) { /* implies FUNC_OLD
or FUNC_ELLIPSIS */
+        if (nb_sse_args == 0)
+            o(0xc031); /* xor eax, eax */
+        else
+            oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov
nb_sse_args, %eax */
+    }
      gcall_or_jmp(0);
      if (args_size)
          gadd_sp(args_size);

_______________________________________________
Tinycc-devel mailing list
Tinycc-devel@nongnu.org
https://lists.nongnu.org/mailman/listinfo/tinycc-devel



reply via email to

[Prev in Thread] Current Thread [Next in Thread]