[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Commit-gnuradio] r7783 - gnuradio/branches/developers/ngoergen/spe_fir_
From: |
ngoergen |
Subject: |
[Commit-gnuradio] r7783 - gnuradio/branches/developers/ngoergen/spe_fir_fff |
Date: |
Fri, 22 Feb 2008 13:55:45 -0700 (MST) |
Author: ngoergen
Date: 2008-02-22 13:55:45 -0700 (Fri, 22 Feb 2008)
New Revision: 7783
Added:
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h
Modified:
gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
Log:
spe_fir_fff: Collapsed outter-loop, moved hand-coded assembly to GAS file,
initial attempt at scheduling optimization, updated (somewhat) for Linux ABI
standard (more work here), tested against fir qa vectors: note . some of these
fail due to the limits of the single-precision float impl.
Still need to verify scheduling, cleanup ABI considerations. Double-precision
and complex versions soon to come.
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-02-22
20:27:23 UTC (rev 7782)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/Makefile 2008-02-22
20:55:45 UTC (rev 7783)
@@ -8,6 +8,9 @@
SPU_CLIBS =
SPU_CFLAGS = -Wall
+SPU_AS = spu-as
+SPU_CPP = spu-g++
+
all: multi_fir_fff_ppe fir_fff_spe.elf
asm: fir_fff_spe.s
@@ -18,8 +21,14 @@
multi_fir_fff_ppe: multi_fir_fff_ppe.c
$(CC) $(CFLAGS) $(CINCS) $(CLIBS) $^ -o $@
-fir_fff_spe.elf: fir_fff_spe.cpp
- $(SPU_CC) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
+fir_fff_spe.o: fir_fff_spe.cpp
+ $(SPU_CC) -c $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
+spe_fir_fff_as.o: spe_fir_fff_as.S
+ $(SPU_AS) -o $@ $^
+
+fir_fff_spe.elf: fir_fff_spe.o spe_fir_fff_as.o
+ $(SPU_CPP) $(SPU_CFLAGS) $(SPU_CINCS) $(SPU_CLIBS) $^ -o $@
+
clean:
- rm -f multi_fir_fff_ppe fir_fff_spe.elf fir_fff_spe.s
+ rm -f multi_fir_fff_ppe fir_fff_spe.elf fir_fff_spe.s *.o
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
2008-02-22 20:27:23 UTC (rev 7782)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/fir_fff_spe.cpp
2008-02-22 20:55:45 UTC (rev 7783)
@@ -2,6 +2,7 @@
#include <spu_intrinsics.h>
#include "gr_spe_dma_lock.h"
#include "spe_fir_fff_params.h"
+#include "spe_fir_fff_as.h"
#define MAX_BUFSIZE (128*100)
@@ -9,22 +10,8 @@
float in2_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
float out_spe[MAX_BUFSIZE] __attribute__((aligned(16)));
-static const __vector unsigned int shiftmasks[4] =
- { { 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f },
- { 0x04050607, 0x08090a0b, 0x0c0d0e0f, 0x10111213 },
- { 0x08090a0b, 0x0c0d0e0f, 0x10111213, 0x14151617 },
- { 0x0c0d0e0f, 0x10111213, 0x14151617, 0x18191a1b }
- };
-
spe_fir_fff_params_t spe_fir_fff_params __attribute__((aligned(16)));
-void test() {
- unsigned int b =spe_fir_fff_params.ntaps;
- __vector unsigned int bob = shiftmasks[0];
- unsigned int bob2 = spe_fir_fff_params.offset;
-}
-
-
int main(unsigned long long spe, unsigned long long argp, unsigned long long
envp)
{
int tag = 1;
@@ -42,312 +29,15 @@
gr_spe_dma_lock_out<__vector float> out_lock(
spe_fir_fff_params.ea_out, vout, spe_fir_fff_params.size *
sizeof(float), tag);
+ spe_fir_fff(
+ vin1,
+ vin2,
+ vout,
+ 0,
+ spe_fir_fff_params.offset,
+ spe_fir_fff_params.nsamples,
+ spe_fir_fff_params.ntaps);
- asm(
- " lqr $9,spe_fir_fff_params+16 \n"
- " rotqbyi $31,$9,8 \n" //
number of nsamples
- " lqr $33,spe_fir_fff_params+32 \n"
- //" rotqbyi $33,$9,0 \n" //
initial offset into samples
- " lqd $32,32($sp) \n"
// the index into out
-
- ); // the current output vector
-
-
-
/////////////////////////////////////////////////////////////////////////////////////////
-
- asm( // this loop computes single dot-product for [ X _ _ _ ]
- ".start4: \n"
- " xor $30,$30,$30 \n" // the
current output vector
-
- " xor $5,$5,$5 \n" // even
- " lqd $2,64($sp) \n" // odd
- " ori $40,$2,0 \n"
- " xor $6,$6,$6 \n"
- " lqd $3,48($sp) \n"
- " xor $7,$7,$7 \n"
- " lqr $9,spe_fir_fff_params+16 \n"
- " xor $8,$8,$8 \n"
- " rotqbyi $9,$9,12 \n" // ntaps
- " a $2, $33, $2 \n"
-
- " andi $37,$33,0x0c \n"
// find index into masks [0-4]
- " shlqbii $38,$37,2 \n"
// mult by 16
- " lqd $39,shiftmasks($38) \n"
// load the right shift mask
-
- ".big_mama_loop: \n"
- " lqd $10,0($2) \n"
- " lqd $14,0($3) \n"
- " lqd $11,16($2) \n"
- " lqd $15,16($3) \n"
- " lqd $12,32($2) \n"
- " lqd $16,32($3) \n"
- " lqd $13,48($2) \n"
- " lqd $17,48($3) \n"
- " lqd $18,64($2) \n"
-
- " shufb $10,$10,$11,$39 \n" // do
the truffle shuffle
- " shufb $11,$11,$12,$39 \n" // do
the truffle shuffle
- " shufb $12,$12,$13,$39 \n" // do
the truffle shuffle
- " shufb $13,$13,$18,$39 \n" // do
the truffle shuffle
-
- " fma $5, $10, $14, $5 \n" // even
- " fma $6, $11, $15, $6 \n" // even
- " fma $7, $12, $16, $7 \n" // even
- " fma $8, $13, $17, $8 \n" // even
-
- " ai $2,$2,64 \n"
- " ai $3,$3,64 \n"
- " ai $9,$9,-4 \n"
- " brnz $9,.big_mama_loop \n"
- " fa $18,$5,$6 \n"
- " fa $19,$7,$8 \n"
- " fa $5,$18,$19 \n"
- );
-
- asm( // accumulate word elements in r5 into first element in
r5
- " ori $6,$5,0 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " fsmbi $10,0xC000 \n"
- " and $11,$10,$5 \n"
- " or $30,$11,$30 \n"
-
- " ai $31,$31,-1 \n"
- " brz $31,.finish4 \n"
- " ai $33,$33,4 \n"
- );
-
-/////////////////////////////////////////////////////////////////////////////////////////
-
-
- asm( // this loop computes single dot-product for [ _ X _ _ ]
-
- " xor $5,$5,$5 \n" // even
- " lqd $2,64($sp) \n" // odd
- " ori $40,$2,0 \n"
- " xor $6,$6,$6 \n"
- " lqd $3,48($sp) \n"
- " xor $7,$7,$7 \n"
- " lqr $9,spe_fir_fff_params+16
\n"
- " xor $8,$8,$8 \n"
- " rotqbyi $9,$9,12 \n" // ntaps
- " a $2, $33, $2 \n"
-
- " andi $37,$33,0x0c \n"
// find index into masks [0-4]
- " shlqbii $38,$37,2 \n"
// mult by 16
- " lqd $39,shiftmasks($38) \n"
// load the right shift mask
-
- ".big_mama_loop2: \n"
- " lqd $10,0($2) \n"
- " lqd $14,0($3) \n"
- " lqd $11,16($2) \n"
- " lqd $15,16($3) \n"
- " lqd $12,32($2) \n"
- " lqd $16,32($3) \n"
- " lqd $13,48($2) \n"
- " lqd $17,48($3) \n"
- " lqd $18,64($2) \n"
-
- " shufb $10,$10,$11,$39 \n"
// do the truffle shuffle
- " shufb $11,$11,$12,$39 \n"
// do the truffle shuffle
- " shufb $12,$12,$13,$39 \n"
// do the truffle shuffle
- " shufb $13,$13,$18,$39 \n"
// do the truffle shuffle
-
- " fma $5, $10, $14, $5 \n" // even
- " fma $6, $11, $15, $6 \n" // even
- " fma $7, $12, $16, $7 \n" // even
- " fma $8, $13, $17, $8 \n" // even
-
- " ai $2,$2,64 \n"
- " ai $3,$3,64 \n"
- " ai $9,$9,-4 \n"
- " brnz $9,.big_mama_loop2 \n"
- " fa $18,$5,$6 \n"
- " fa $19,$7,$8 \n"
- " fa $5,$18,$19 \n"
- );
-
- asm( // accumulate word elements in r5 into first element in
r5
- " ori $6,$5,0 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " fsmbi $10,0xC000 \n"
- " and $11,$10,$5 \n"
- " rotqbyi $12, $11, 12 \n"
- " or $30,$12,$30 \n"
-
- " ai $31,$31,-1 \n"
- " brz $31,.finish4 \n"
- " ai $33,$33,4 \n"
- );
-
-/////////////////////////////////////////////////////////////////////////////////////////
-
-
- asm( // this loop computes single dot-product for [ _ _ X _ ]
-
- " xor $5,$5,$5 \n" // even
- " lqd $2,64($sp) \n" // odd
- " ori $40,$2,0 \n"
- " xor $6,$6,$6 \n"
- " lqd $3,48($sp) \n"
- " xor $7,$7,$7 \n"
- " lqr $9,spe_fir_fff_params+16
\n"
- " xor $8,$8,$8 \n"
- " rotqbyi $9,$9,12 \n" // ntaps
- " a $2, $33, $2 \n"
-
- " andi $37,$33,0x0c \n"
// find index into masks [0-4]
- " shlqbii $38,$37,2 \n"
// mult by 16
- " lqd $39,shiftmasks($38) \n"
// load the right shift mask
-
- ".big_mama_loop3: \n"
- " lqd $10,0($2) \n"
- " lqd $14,0($3) \n"
- " lqd $11,16($2) \n"
- " lqd $15,16($3) \n"
- " lqd $12,32($2) \n"
- " lqd $16,32($3) \n"
- " lqd $13,48($2) \n"
- " lqd $17,48($3) \n"
- " lqd $18,64($2) \n"
-
- " shufb $10,$10,$11,$39 \n"
// do the truffle shuffle
- " shufb $11,$11,$12,$39 \n"
// do the truffle shuffle
- " shufb $12,$12,$13,$39 \n"
// do the truffle shuffle
- " shufb $13,$13,$18,$39 \n"
// do the truffle shuffle
-
- " fma $5, $10, $14, $5 \n" // even
- " fma $6, $11, $15, $6 \n" // even
- " fma $7, $12, $16, $7 \n" // even
- " fma $8, $13, $17, $8 \n" // even
-
- " ai $2,$2,64 \n"
- " ai $3,$3,64 \n"
- " ai $9,$9,-4 \n"
- " brnz $9,.big_mama_loop3 \n"
- " fa $18,$5,$6 \n"
- " fa $19,$7,$8 \n"
- " fa $5,$18,$19 \n"
- );
-
- asm( // accumulate word elements in r5 into first element in
r5
- " ori $6,$5,0 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " fsmbi $10,0xC000 \n"
- " and $11,$10,$5 \n"
- " rotqbyi $12, $11, 8 \n"
- " or $30,$12,$30 \n"
-
- " ai $31,$31,-1 \n"
- " brz $31,.finish4 \n"
- " ai $33,$33,4 \n"
- );
-
-/////////////////////////////////////////////////////////////////////////////////////////
-
-
- asm( // this loop computes single dot-product for [ _ _ _ X ]
-
- " xor $5,$5,$5 \n" // even
- " lqd $2,64($sp) \n" // odd
- " ori $40,$2,0 \n"
- " xor $6,$6,$6 \n"
- " lqd $3,48($sp) \n"
- " xor $7,$7,$7 \n"
- " lqr $9,spe_fir_fff_params+16
\n"
- " xor $8,$8,$8 \n"
- " rotqbyi $9,$9,12 \n" // ntaps
- " a $2, $33, $2 \n"
-
- " andi $37,$33,0x0c \n"
// find index into masks [0-4]
- " shlqbii $38,$37,2 \n"
// mult by 16
- " lqd $39,shiftmasks($38) \n"
// load the right shift mask
-
- ".big_mama_loop4: \n"
- " lqd $10,0($2) \n"
- " lqd $14,0($3) \n"
- " lqd $11,16($2) \n"
- " lqd $15,16($3) \n"
- " lqd $12,32($2) \n"
- " lqd $16,32($3) \n"
- " lqd $13,48($2) \n"
- " lqd $17,48($3) \n"
- " lqd $18,64($2) \n"
-
- " shufb $10,$10,$11,$39 \n"
// do the truffle shuffle
- " shufb $11,$11,$12,$39 \n"
// do the truffle shuffle
- " shufb $12,$12,$13,$39 \n"
// do the truffle shuffle
- " shufb $13,$13,$18,$39 \n"
// do the truffle shuffle
-
- " fma $5, $10, $14, $5 \n" // even
- " fma $6, $11, $15, $6 \n" // even
- " fma $7, $12, $16, $7 \n" // even
- " fma $8, $13, $17, $8 \n" // even
-
- " ai $2,$2,64 \n"
- " ai $3,$3,64 \n"
- " ai $9,$9,-4 \n"
- " brnz $9,.big_mama_loop4 \n"
- " fa $18,$5,$6 \n"
- " fa $19,$7,$8 \n"
- " fa $5,$18,$19 \n"
- );
-
- asm( // accumulate word elements in r5 into first element in
r5
- " ori $6,$5,0 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " shlqbyi $6,$6,4 \n"
- " fa $5,$6,$5 \n"
- " fsmbi $10,0xC000 \n"
- " and $11,$10,$5 \n"
- " rotqbyi $12, $11, 4 \n"
- " or $30,$12,$30 \n"
-
- " ai $31,$31,-1 \n"
- " ai $33,$33,4 \n"
- );
-
-/////////////////////////////////////////////////////////////////////////////////////////
-
-
- asm( // Stores r5 in output
- ".finish4: \n"
-
- //" lqd $2,64($sp) \n" // odd
- //" ai $2,$2,0 \n"
- //" lqd $30,0($2) \n"
- //" lqd $34,16($2) \n"
- //" lqd $35,32($2) \n"
- //" andi $37,$33,3 \n"
// find index into masks [0-4]
- //" shlqbii $38,$37,4 \n"
// mult by 16
-
- //" lqd $39,shiftmasks($38) \n"
// load the right shift mask
- //" shufb $30,$30,$34,$39 \n" // do
the truffle shuffle
- //" shufb $34,$34,$35,$39 \n"
-
-
- " stqd $30,0($32) \n"
- //" stqd $34,16($32) \n"
- " ai $32,$32,16 \n"
// increment output pointer by 1 new vector.
- " brnz $31,.start4 \n" //
start another output vector if needed
- );
}
return 0;
Modified: gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
2008-02-22 20:27:23 UTC (rev 7782)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/multi_fir_fff_ppe.c
2008-02-22 20:55:45 UTC (rev 7783)
@@ -5,7 +5,7 @@
#include <spe_fir_fff_params.h>
#define NUM_SPE 1
-#define SIZE (64*10 )
+#define SIZE (64*1 )
//#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
9, 10, 11, 12, 13, 14, 15, 16, \
@@ -26,7 +26,7 @@
0,0,0,0,1,2,3,4
#define MYMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
- 9, 10, 1, 1, 1, 1, 1, 1, \
+ 9, 10, 1, -1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
1, 1, 1, 1, 1, 1, 1, 1, \
@@ -74,17 +74,20 @@
#define MYMATRIX2100 MYMATRIX210, MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, \
MYMATRIX210, MYMATRIX210, MYMATRIX210,
MYMATRIX210, MYMATRIX210
+#define TESTMATRIX 234, -4, 23, -56, 45, 98, -23, -7, 0, 0, 0, 0, 0,
0, 0, 0
+//#define TESTMATRIX 1, 2, 3, 4, 5, 6, 7, 8, \
+ 9, 10, 234, 234, 234, 234, 234, 234
+#define TESTTAPS 5, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-float dumb = 9;
-float in1[SIZE] = { MYMATRIX10 };
-float dumb2 = 9;
+float in1[16] = {
+ // MYMATRIX10
+ TESTMATRIX
+ };
+float in2[16] __attribute__((aligned(16))) = {
+ // MYMATRIX210
+ TESTTAPS
+} ;
-float dumb3 = 9;
-float in2[SIZE] __attribute__((aligned(16))) = { MYMATRIX210
- };
-
-float dumb4 = 9;
-
float out[SIZE] __attribute__((aligned(16)));
spe_fir_fff_params_t spe_fir_fff_params[NUM_SPE] __attribute__((aligned(16)));
@@ -130,7 +133,6 @@
}
for (i = 0; i < NUM_SPE; ++i) {
- spe_fir_fff_params[i].ntaps = 4;
spe[i] = spe_context_create(0, NULL);
if (!spe[i]) {
perror("spe_context_create");
@@ -154,7 +156,8 @@
spe_fir_fff_params[i].ea_in1 = (unsigned long) &in1[i*size ];
spe_fir_fff_params[i].ea_in2 = (unsigned long) &in2[i*size ];
spe_fir_fff_params[i].ea_out = (unsigned long) &out[i*size];
- spe_fir_fff_params[i].nsamples = 16;
+ spe_fir_fff_params[i].ntaps = 4;
+ spe_fir_fff_params[i].nsamples = 7;
spe_fir_fff_params[i].offset = 0;
spe_fir_fff_params[i].size = size;
Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.S
2008-02-22 20:55:45 UTC (rev 7783)
@@ -0,0 +1,164 @@
+ .file "fir_fff_spe.S"
+.text
+ .align 3
+ .global spe_fir_fff
+ .type spe_fir_fff, @function
+spe_fir_fff:
+ ori $32,$5,0 # 0-2
+ ori $33,$7,0 # 0-2
+ ori $31,$8,0 # 0-2
+ .start4:
+ xor $30,$30,$30 # 0-2 initilize the
current output vector
+ il $34, 16 # 0-2 shift mask for
output insertion
+
+ .start1:
+ andi $37,$33,0x0c # 0-2 find index into
masks [0-4]
+ xor $5,$5,$5 # 0-2
+ xor $6,$6,$6 # 0-2
+ shlqbii $38,$37,2 # 1-4 mult by 4
+ xor $7,$7,$7 # 0-2
+ ori $36,$9,0 # 0-2
+ lqd $39,shiftmasks($38) # 1-6 load the
right shift mask
+ xor $8,$8,$8 # 0-2
+ a $35, $33, $3 # 0-2
+ ori $38,$4,0 # 0-2
+
+ hbra .inner_loop_branch, .inner_loop
# inner-loop hint
+ .inner_loop:
+ lqd $10,0($35) # 1-6
+ lqd $11,16($35) # 1-6
+ lqd $12,32($35) # 1-6
+ lqd $13,48($35) # 1-6
+ lqd $18,64($35) # 1-6
+ lqd $14,0($38) # 1-6
+ lqd $15,16($38) # 1-6
+ ai $36,$36,-4 # 0-2
+ lqd $16,32($38) # 1-6
+ ai $35,$35,64 # 0-2
+ lqd $17,48($38) # 1-6
+ ai $38,$38,64 # 0-2
+
+ shufb $10,$10,$11,$39 # 1-4
+ fma $5, $10, $14, $5 # 0-6
+ shufb $11,$11,$12,$39 # 1-4
+ fma $6, $11, $15, $6 # 0-6
+ shufb $12,$12,$13,$39 # 1-4
+ fma $7, $12, $16, $7 # 0-6
+ shufb $13,$13,$18,$39 # 1-4
+ fma $8, $13, $17, $8 # 0-6
+
+ .inner_loop_branch:
+ brnz $36,.inner_loop
+ fa $18,$5,$6 # 0-6
+ hbra .outter_loop_branch, .start1 # 1-
+ fa $19,$7,$8 # 0-6
+ hbra .finish_branch, .finish_branch_targ
# 1-
+ fa $5,$18,$19 # 0-6
+
+# accumulate word elements in r5 into first element in r5
+ ori $6,$5,0 # 0-2
+ shlqbyi $6,$6,4 # 1-4
+ fa $5,$6,$5 # 0-2
+ shlqbyi $6,$6,4 # 1-4
+ fa $5,$6,$5 # 0-2
+ shlqbyi $6,$6,4 # 1-4
+ fa $5,$6,$5 # 0-2
+ fsmbi $10,0xC000 # 1-4
+ and $11,$10,$5 # 0-2
+ rotqby $12, $11, $34 # 1-4
+ or $30,$12,$30 # 0-2
+
+ ai $31,$31,-1 # 0-2
+ .finish_branch:
+ brz $31,.finish4
+ .finish_branch_targ:
+ ai $33,$33,4 # 0-2
+ ai $34,$34,-4 # 0-2
+
+ .outter_loop_branch:
+ brnz $34, .start1
+ hbra .outter_outter_loop_branch, .start4
+# Stores r5 in output
+ .finish4:
+ stqd $30,0($32)
+ ai $32,$32,16
# increment output pointer by 1 new vector.
+
+ .outter_outter_loop_branch:
+ brnz $31,.start4 # start
another output vector if needed
+
+ bi $lr
+ .size spe_fir_fff, .-spe_fir_fff
+.text
+ .global shiftmasks
+ .align 4
+ .type shiftmasks, @object
+ .size shiftmasks, 64
+shiftmasks:
+ .long 0x00010203
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+
+ .long 0x04050607
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x10111213
+
+ .long 0x08090a0b
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x14151617
+
+ .long 0x0c0d0e0f
+ .long 0x10111213
+ .long 0x14151617
+ .long 0x18191a1b
+
+# .long 66051
+# .long 67438087
+# .long 134810123
+# .long 202182159
+# .long 67438087
+# .long 134810123
+# .long 202182159
+# .long 269554195
+# .long 134810123
+# .long 202182159
+# .long 269554195
+# .long 336926231
+# .long 202182159
+# .long 269554195
+# .long 336926231
+# .long 404298267
+
+# old code to build stack for above routine
+# stqd $sp,-128($sp)
+# ai $sp,$sp,-128
+# lqd $3,32($sp)
+# hbrp # 1
+# cwd $9,0($sp)
+# shufb $3,$4,$3,$9
+# stqd $3,32($sp)
+# lqd $3,48($sp)
+# cwd $4,0($sp)
+# shufb $3,$4,$3,$4
+# stqd $3,48($sp)
+# lqd $3,64($sp)
+# cwd $4,0($sp)
+# shufb $3,$5,$3,$4
+# stqd $3,64($sp)
+# lqd $3,80($sp)
+# hbrp # 2
+# cwd $4,0($sp)
+# shufb $3,$6,$3,$4
+# stqd $3,80($sp)
+# lqd $3,96($sp)
+# cwd $4,0($sp)
+# shufb $3,$7,$3,$4
+# stqd $3,96($sp)
+# lqd $3,112($sp)
+# cwd $4,0($sp)
+# shufb $3,$8,$3,$4
+# stqd $3,112($sp)
+
+ .ident "Hand coded Cell SPU assembly"
Added: gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h
===================================================================
--- gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h
(rev 0)
+++ gnuradio/branches/developers/ngoergen/spe_fir_fff/spe_fir_fff_as.h
2008-02-22 20:55:45 UTC (rev 7783)
@@ -0,0 +1,22 @@
+#ifndef SPE_FIR_FFF_AS_H_
+#define SPE_FIR_FFF_AS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern void spe_fir_fff (
+ const __vector float *input,
+ const __vector float *taps,
+ __vector float *output,
+ const __vector float *delayline,
+ const unsigned int offset,
+ const unsigned int nsamples,
+ const unsigned int ntaps
+);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //SPE_FIR_FFF_AS_H_
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Commit-gnuradio] r7783 - gnuradio/branches/developers/ngoergen/spe_fir_fff,
ngoergen <=