[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r7710 - gnuradio/branches/developers/ngoergen/spe_fir_
From: |
ngoergen |
Subject: |
[Commit-gnuradio] r7710 - gnuradio/branches/developers/ngoergen/spe_fir_fff |
Date: |
Fri, 15 Feb 2008 16:39:26 -0700 (MST) |
Author: ngoergen
Date: 2008-02-15 16:39:25 -0700 (Fri, 15 Feb 2008)
New Revision: 7710
Modified:
gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h
Log:
fir_fff_spe: Outer loop implemented (currently unrolled 4 times), includes
alignment considerations, no scheduling considerations yet, first 8 outputs
verified. Work still needed on 4+ iteration outputs.
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-02-15
21:26:05 UTC (rev 7709)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-02-15
23:39:25 UTC (rev 7710)
@@ -10,6 +10,11 @@
all: multi_fir_fff_ppe fir_fff_spe.elf
+asm: fir_fff_spe.s
+
+fir_fff_spe.s: fir_fff_spe.cpp
+ $(SPU_CC) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) -S $^
+
multi_fir_fff_ppe: multi_fir_fff_ppe.c
$(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
@@ -17,4 +22,4 @@
$(SPU_CC) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
clean:
- rm -f multi_fir_fff_ppe fir_fff_spe.elf
+ rm -f multi_fir_fff_ppe fir_fff_spe.elf fir_fff_spe.s
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
2008-02-15 21:26:05 UTC (rev 7709)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
2008-02-15 23:39:25 UTC (rev 7710)
@@ -8,11 +8,23 @@
float in1_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
float in2_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
float out_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
-//float out_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
+static const __vector unsigned int shiftmasks[4] =
+ { { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f },
+ { 0x04050607, 0x08090a0b, 0x0c0d0e0f, 0x10111213 },
+ { 0x08090a0b, 0x0c0d0e0f, 0x10111213, 0x14151617 },
+ { 0x0c0d0e0f, 0x10111213, 0x14151617, 0x18191a1b }
+ };
+
spe_fir_fff_params_t spe_fir_fff_params __attribute__((aligned(16)));
+void test() {
+ unsigned int b =spe_fir_fff_params.ntaps;
+ __vector unsigned int bob = shiftmasks[0];
+ unsigned int bob2 = spe_fir_fff_params.offset;
+}
+
int main(unsigned long long spe, unsigned long long argp, unsigned long long
envp)
{
int tag = 1;
@@ -20,11 +32,6 @@
__vector float *vin2 = (__vector float *) in2_spe;
__vector float *vout = (__vector float *) out_spe;
- //asm(".test1\n");
- //unsigned int b = spe_fir_fff_params.ntaps;
- //unsigned int c = spe_fir_fff_params.size;
- //asm(".test2\n");
-
{
gr_spe_dma_lock_in<spe_fir_fff_params_t> argp_lock(
argp, &spe_fir_fff_params, sizeof(spe_fir_fff_params_t), tag);
@@ -35,38 +42,69 @@
gr_spe_dma_lock_out<__vector float> out_lock(
spe_fir_fff_params.ea_out, vout, spe_fir_fff_params.size *
sizeof(float), tag);
- asm(
+
+ asm(
+ " lqr $9,spe_fir_fff_params+16 \n"
+ " rotqbyi $31,$9,8 \n" //
number of nsamples
+ " lqr $33,spe_fir_fff_params+32 \n"
+ //" rotqbyi $33,$9,0 \n" //
initial offset into samples
+ " lqd $32,32($sp) \n"
// the index into out
+ ); // the current output vector
+
+
+
/////////////////////////////////////////////////////////////////////////////////////////
+
+ asm( // this loop computes single dot-product for [ X _ _ _ ]
+ ".start4: \n"
+ " xor $30,$30,$30 \n" // the
current output vector
+
" xor $5,$5,$5 \n" // even
" lqd $2,64($sp) \n" // odd
+ " ori $40,$2,0 \n"
" xor $6,$6,$6 \n"
" lqd $3,48($sp) \n"
" xor $7,$7,$7 \n"
" lqr $9,spe_fir_fff_params+16 \n"
" xor $8,$8,$8 \n"
- " rotqbyi $9,$9,12 \n"
- " rotqbyi $20,$9,12 \n"
+ " rotqbyi $9,$9,12 \n" // ntaps
+ " a $2, $33, $2 \n"
+
+ " andi $37,$33,0x0c \n"
// find index into masks [0-4]
+ " shlqbii $38,$37,2 \n"
// mult by 16
+ " lqd $39,shiftmasks($38) \n"
// load the right shift mask
+
".big_mama_loop: \n"
" lqd $10,0($2) \n"
- " ai $2,$2,64 \n"
" lqd $14,0($3) \n"
- " fma $5, $10, $14, $5 \n" // even
" lqd $11,16($2) \n"
- " ai $3,$3,64 \n"
" lqd $15,16($3) \n"
- " fma $6, $11, $15, $6 \n" // even
" lqd $12,32($2) \n"
- " ai $9,$9,-4 \n"
- " lqd $16,32($3) \n"
- " fma $7, $12, $16, $7 \n" // even
+ " lqd $16,32($3) \n"
" lqd $13,48($2) \n"
" lqd $17,48($3) \n"
+ " lqd $18,64($2) \n"
+
+ " shufb $10,$10,$11,$39 \n" // do
the truffle shuffle
+ " shufb $11,$11,$12,$39 \n" // do
the truffle shuffle
+ " shufb $12,$12,$13,$39 \n" // do
the truffle shuffle
+ " shufb $13,$13,$18,$39 \n" // do
the truffle shuffle
+
+ " fma $5, $10, $14, $5 \n" // even
+ " fma $6, $11, $15, $6 \n" // even
+ " fma $7, $12, $16, $7 \n" // even
" fma $8, $13, $17, $8 \n" // even
+
+ " ai $2,$2,64 \n"
+ " ai $3,$3,64 \n"
+ " ai $9,$9,-4 \n"
" brnz $9,.big_mama_loop \n"
" fa $18,$5,$6 \n"
" fa $19,$7,$8 \n"
" fa $5,$18,$19 \n"
+ );
+ asm( // accumulate word elements in r5 into first element in
r5
" ori $6,$5,0 \n"
" shlqbyi $6,$6,4 \n"
" fa $5,$6,$5 \n"
@@ -74,9 +112,242 @@
" fa $5,$6,$5 \n"
" shlqbyi $6,$6,4 \n"
" fa $5,$6,$5 \n"
- " lqd $9,32($sp) \n"
- " stqd $5,0($9) \n"
+ " fsmbi $10,0xC000 \n"
+ " and $11,$10,$5 \n"
+ " or $30,$11,$30 \n"
+
+ " ai $31,$31,-1 \n"
+ " brz $31,.finish4 \n"
+ " ai $33,$33,4 \n"
+ );
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+
+ asm( // this loop computes single dot-product for [ _ X _ _ ]
+
+ " xor $5,$5,$5 \n" // even
+ " lqd $2,64($sp) \n" // odd
+ " ori $40,$2,0 \n"
+ " xor $6,$6,$6 \n"
+ " lqd $3,48($sp) \n"
+ " xor $7,$7,$7 \n"
+ " lqr $9,spe_fir_fff_params+16
\n"
+ " xor $8,$8,$8 \n"
+ " rotqbyi $9,$9,12 \n" // ntaps
+ " a $2, $33, $2 \n"
+
+ " andi $37,$33,0x0c \n"
// find index into masks [0-4]
+ " shlqbii $38,$37,2 \n"
// mult by 16
+ " lqd $39,shiftmasks($38) \n"
// load the right shift mask
+
+ ".big_mama_loop2: \n"
+ " lqd $10,0($2) \n"
+ " lqd $14,0($3) \n"
+ " lqd $11,16($2) \n"
+ " lqd $15,16($3) \n"
+ " lqd $12,32($2) \n"
+ " lqd $16,32($3) \n"
+ " lqd $13,48($2) \n"
+ " lqd $17,48($3) \n"
+ " lqd $18,64($2) \n"
+
+ " shufb $10,$10,$11,$39 \n"
// do the truffle shuffle
+ " shufb $11,$11,$12,$39 \n"
// do the truffle shuffle
+ " shufb $12,$12,$13,$39 \n"
// do the truffle shuffle
+ " shufb $13,$13,$18,$39 \n"
// do the truffle shuffle
+
+ " fma $5, $10, $14, $5 \n" // even
+ " fma $6, $11, $15, $6 \n" // even
+ " fma $7, $12, $16, $7 \n" // even
+ " fma $8, $13, $17, $8 \n" // even
+
+ " ai $2,$2,64 \n"
+ " ai $3,$3,64 \n"
+ " ai $9,$9,-4 \n"
+ " brnz $9,.big_mama_loop2 \n"
+ " fa $18,$5,$6 \n"
+ " fa $19,$7,$8 \n"
+ " fa $5,$18,$19 \n"
);
+
+ asm( // accumulate word elements in r5 into first element in
r5
+ " ori $6,$5,0 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " fsmbi $10,0xC000 \n"
+ " and $11,$10,$5 \n"
+ " rotqbyi $12, $11, 12 \n"
+ " or $30,$12,$30 \n"
+
+ " ai $31,$31,-1 \n"
+ " brz $31,.finish4 \n"
+ " ai $33,$33,4 \n"
+ );
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+
+ asm( // this loop computes single dot-product for [ _ _ X _ ]
+
+ " xor $5,$5,$5 \n" // even
+ " lqd $2,64($sp) \n" // odd
+ " ori $40,$2,0 \n"
+ " xor $6,$6,$6 \n"
+ " lqd $3,48($sp) \n"
+ " xor $7,$7,$7 \n"
+ " lqr $9,spe_fir_fff_params+16
\n"
+ " xor $8,$8,$8 \n"
+ " rotqbyi $9,$9,12 \n" // ntaps
+ " a $2, $33, $2 \n"
+
+ " andi $37,$33,0x0c \n"
// find index into masks [0-4]
+ " shlqbii $38,$37,2 \n"
// mult by 16
+ " lqd $39,shiftmasks($38) \n"
// load the right shift mask
+
+ ".big_mama_loop3: \n"
+ " lqd $10,0($2) \n"
+ " lqd $14,0($3) \n"
+ " lqd $11,16($2) \n"
+ " lqd $15,16($3) \n"
+ " lqd $12,32($2) \n"
+ " lqd $16,32($3) \n"
+ " lqd $13,48($2) \n"
+ " lqd $17,48($3) \n"
+ " lqd $18,64($2) \n"
+
+ " shufb $10,$10,$11,$39 \n"
// do the truffle shuffle
+ " shufb $11,$11,$12,$39 \n"
// do the truffle shuffle
+ " shufb $12,$12,$13,$39 \n"
// do the truffle shuffle
+ " shufb $13,$13,$18,$39 \n"
// do the truffle shuffle
+
+ " fma $5, $10, $14, $5 \n" // even
+ " fma $6, $11, $15, $6 \n" // even
+ " fma $7, $12, $16, $7 \n" // even
+ " fma $8, $13, $17, $8 \n" // even
+
+ " ai $2,$2,64 \n"
+ " ai $3,$3,64 \n"
+ " ai $9,$9,-4 \n"
+ " brnz $9,.big_mama_loop3 \n"
+ " fa $18,$5,$6 \n"
+ " fa $19,$7,$8 \n"
+ " fa $5,$18,$19 \n"
+ );
+
+ asm( // accumulate word elements in r5 into first element in
r5
+ " ori $6,$5,0 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " fsmbi $10,0xC000 \n"
+ " and $11,$10,$5 \n"
+ " rotqbyi $12, $11, 8 \n"
+ " or $30,$12,$30 \n"
+
+ " ai $31,$31,-1 \n"
+ " brz $31,.finish4 \n"
+ " ai $33,$33,4 \n"
+ );
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+
+ asm( // this loop computes single dot-product for [ _ _ _ X ]
+
+ " xor $5,$5,$5 \n" // even
+ " lqd $2,64($sp) \n" // odd
+ " ori $40,$2,0 \n"
+ " xor $6,$6,$6 \n"
+ " lqd $3,48($sp) \n"
+ " xor $7,$7,$7 \n"
+ " lqr $9,spe_fir_fff_params+16
\n"
+ " xor $8,$8,$8 \n"
+ " rotqbyi $9,$9,12 \n" // ntaps
+ " a $2, $33, $2 \n"
+
+ " andi $37,$33,0x0c \n"
// find index into masks [0-4]
+ " shlqbii $38,$37,2 \n"
// mult by 16
+ " lqd $39,shiftmasks($38) \n"
// load the right shift mask
+
+ ".big_mama_loop4: \n"
+ " lqd $10,0($2) \n"
+ " lqd $14,0($3) \n"
+ " lqd $11,16($2) \n"
+ " lqd $15,16($3) \n"
+ " lqd $12,32($2) \n"
+ " lqd $16,32($3) \n"
+ " lqd $13,48($2) \n"
+ " lqd $17,48($3) \n"
+ " lqd $18,64($2) \n"
+
+ " shufb $10,$10,$11,$39 \n"
// do the truffle shuffle
+ " shufb $11,$11,$12,$39 \n"
// do the truffle shuffle
+ " shufb $12,$12,$13,$39 \n"
// do the truffle shuffle
+ " shufb $13,$13,$18,$39 \n"
// do the truffle shuffle
+
+ " fma $5, $10, $14, $5 \n" // even
+ " fma $6, $11, $15, $6 \n" // even
+ " fma $7, $12, $16, $7 \n" // even
+ " fma $8, $13, $17, $8 \n" // even
+
+ " ai $2,$2,64 \n"
+ " ai $3,$3,64 \n"
+ " ai $9,$9,-4 \n"
+ " brnz $9,.big_mama_loop4 \n"
+ " fa $18,$5,$6 \n"
+ " fa $19,$7,$8 \n"
+ " fa $5,$18,$19 \n"
+ );
+
+ asm( // accumulate word elements in r5 into first element in
r5
+ " ori $6,$5,0 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " shlqbyi $6,$6,4 \n"
+ " fa $5,$6,$5 \n"
+ " fsmbi $10,0xC000 \n"
+ " and $11,$10,$5 \n"
+ " rotqbyi $12, $11, 4 \n"
+ " or $30,$12,$30 \n"
+
+ " ai $31,$31,-1 \n"
+ " ai $33,$33,4 \n"
+ );
+
+/////////////////////////////////////////////////////////////////////////////////////////
+
+
+ asm( // Stores r5 in output
+ ".finish4: \n"
+
+ //" lqd $2,64($sp) \n" // odd
+ //" ai $2,$2,0 \n"
+ //" lqd $30,0($2) \n"
+ //" lqd $34,16($2) \n"
+ //" lqd $35,32($2) \n"
+ //" andi $37,$33,3 \n"
// find index into masks [0-4]
+ //" shlqbii $38,$37,4 \n"
// mult by 16
+
+ //" lqd $39,shiftmasks($38) \n"
// load the right shift mask
+ //" shufb $30,$30,$34,$39 \n" // do
the truffle shuffle
+ //" shufb $34,$34,$35,$39 \n"
+
+
+ " stqd $30,0($32) \n"
+ //" stqd $34,16($32) \n"
+ " ai $32,$32,16 \n"
// increment output pointer by 1 new vector.
+ " brnz $31,.start4 \n" //
start another output vector if needed
+ );
}
return 0;
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
2008-02-15 21:26:05 UTC (rev 7709)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
2008-02-15 23:39:25 UTC (rev 7710)
@@ -25,14 +25,13 @@
0,0,0,0,0,0,0,0, \
0,0,0,0,1,2,3,4
-
-#define MYMATRIX 1, 1, 1, 1, 1, 1, 1, 1, \
+#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
+ 9, 10, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
- 1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1
//#define MYMATRIX2 1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
@@ -44,13 +43,7 @@
1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23, \
1.23, 1.23, 1.23, 1.23, 1.23, 1.23, 1.23,
1.23
-#define MYMATRIX10 MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX, \
- MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX,
MYMATRIX
-
-#define MYMATRIX100 MYMATRIX10, MYMATRIX10, MYMATRIX10, MYMATRIX10,
MYMATRIX10, \
- MYMATRIX10, MYMATRIX10, MYMATRIX10,
MYMATRIX10, MYMATRIX10
-
-#define MYMATRIX2 -2, -2, -2, -2, -2, -2, -2, -2, \
+//#define MYMATRIX2 -2, -2, -2, -2, -2, -2, -2, -2, \
-2, -2, -2, -2, -2, -2, -2, -2, \
-2, -2, -2, -2, -2, -2, -2, -2, \
-2, -2, -2, -2, -2, -2, -2, -2, \
@@ -59,7 +52,23 @@
-2, -2, -2, -2, -2, -2, -2, -2, \
-2, -2, -2, -2, -2, -2, -2, -2
+#define MYMATRIX2 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1, \
+ 1, 1, 1, 1, 1, 1, 1, 1
+
+#define MYMATRIX10 MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX, \
+ MYMATRIX, MYMATRIX, MYMATRIX, MYMATRIX,
MYMATRIX
+
+#define MYMATRIX100 MYMATRIX10, MYMATRIX10, MYMATRIX10, MYMATRIX10,
MYMATRIX10, \
+ MYMATRIX10, MYMATRIX10, MYMATRIX10,
MYMATRIX10, MYMATRIX10
+
+
#define MYMATRIX210 MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2, \
MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2, MYMATRIX2
@@ -145,7 +154,9 @@
spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
- spe_fir_fff_params[i].size = size;
+ spe_fir_fff_params[i].nsamples = 16;
+ spe_fir_fff_params[i].offset = 0;
+ spe_fir_fff_params[i].size = size;
arg[i].spe = spe[i];
arg[i].spe_fir_fff_params = &spe_fir_fff_params[i];
@@ -175,7 +186,7 @@
exit(1);
}
- for (i = 0; i < 4; ++i) {
+ for (i = 0; i < 32; ++i) {
printf("out[%02d]=%f\n", i, out[i]);
}
printf("size %0d result=%f\n", size, out[0]);
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h
2008-02-15 21:26:05 UTC (rev 7709)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_params.h
2008-02-15 23:39:25 UTC (rev 7710)
@@ -5,10 +5,11 @@
unsigned long long ea_in1;
unsigned long long ea_in2;
unsigned long long ea_out;
- unsigned int size;
+ unsigned int nsamples;
unsigned int ntaps;
unsigned int offset;
- int pad[7];
+ unsigned int size;
+ int pad[6];
} spe_fir_fff_params_t;
#endif /*SPE_FIR_FFF_PARAMS_H_*/
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r7710 - gnuradio/branches/developers/ngoergen/spe_fir_fff,
ngoergen <=