[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
ABI=longlong on x86 linux
From: |
jason |
Subject: |
ABI=longlong on x86 linux |
Date: |
Thu, 24 Jan 2002 14:33:39 +0000 |
In testing the new factorial code on x86 duron linux (slack8.0) with gmp ABI
set to longlong , there appears to be an error in the mpz_cmp code
Of course with gmp ABI as default there is no problem
gmp-4.0 configured with
./configure --host=none-pc-linux-gnu ABI=longlong
make
gcc is 2.95.3
program below compiled with
gcc -Wall -O3 facnew.c .libs/libgmp.a -o facnew
facnew.c is
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
// x=product of all odd numbers z such that low < z <= high if none in
interval ret 1
void mpz_odd_product(mpz_t x,unsigned long low,unsigned long high,mpz_t*);
void new_mpz_fac_ui(mpz_t x,unsigned long n);
/* a guess , if defined then will use old factorial code otherwise can
remove the old code */
//#define FAC_THRESHOLD 400
/* define this to use smallish table */
//#define USESMALLTABLE
/*
code below for limb sizes of 32,64 bits only
for other limb sizes must calculate tables
and update code wherever there is a
BITS_PER_MP_LIMB
*/
#if BITS_PER_MP_LIMB != 32 && BITS_PER_MP_LIMB != 64
#error must fill tables etc for bits_per_mp_limb
#endif
void
new_mpz_fac_ui (mpz_ptr x, unsigned long n)
{
unsigned long z,stt;
int i,j;
mpz_t t,t1;
mpz_t st[8 * sizeof(unsigned long) + 1];
#ifdef USESMALLTABLE
#if BITS_PER_MP_LIMB == 32
static const mp_limb_t
table[17]={1,1,1,3,3,15,45,315,315,2835,14175,155925,467775,\
6081075,42567525,638512875,638512875};
#endif
#if BITS_PER_MP_LIMB == 64
static const mp_limb_t
table[26]={1,1,1,3,3,15,45,315,315,2835,14175,155925,467775,\
6081075,42567525,638512875,638512875,10854718875,\
97692469875,1856156927625,9280784638125,\
194896477400625,2143861251406875,49308808782358125,\
147926426347074375,3698160658676859375};
#endif
#endif /* USESMALLTABLE */
#ifdef USESMALLTABLE
#if BITS_PER_MP_LIMB == 32
if (n < 17)
#endif
#if BITS_PER_MP_LIMB == 64
if ( n < 26)
#endif
{
/* as always have 1 limb allocated , cant use mpz_set_ui as table is
mp_limb not ui */
x -> _mp_d[0] = table[n];
x -> _mp_size = 1;
popc_limb (i, (mp_limb_t)n);
mpz_mul_2exp (x, x, n - i);
return;
}
#else
if(n<=1){mpz_set_ui(x,1);return;}
if(n==2){mpz_set_ui(x,2);return;}
#endif /* USESMALLTABLE */
#ifdef FAC_THRESHOLD
if (n <= FAC_THRESHOLD) /* CAN BE REMOVED */
{
old_mpz_fac_ui (x, n);
return;
}
#endif /* FAC_THRESHOLD */
/* NOTE : MUST have n>=3 here */
mpz_init (t);
mpz_init_set_ui (t1,1);
/*
This can be used if we are allowed varible length arrays
a = high - low + (low >> 1) - (high >> 1);
ASSERT(sizeof (a) <= sizeof (mp_limb_t));
count_leading_zeros (stt, (mp_limb_t)a);
stt = BITS_PER_MP_LIMB - stt;
stt++;
mpz_t st[stt];
worse case need floor(logbase2((n>>1)+1))+1 stack entrys
*/
count_leading_zeros (stt, (mp_limb_t)((n>>1) + 1) );
stt = BITS_PER_MP_LIMB - stt + 1;
/* for safety use this stt = 8 * sizeof(unsigned long) + 1; */
/* Must allocate at least 2 limb for the stack entrys */
/* estimating space for the stack entrys is v. tricky */
for (i = 0; i < stt; i++)
mpz_init2 (st[i], BITS_PER_MP_LIMB * 2);
count_leading_zeros (z, (mp_limb_t)(n / 3) );
/* find z st 2^z>n/3 range for z is 1 <= z <= 8 * sizeof(unsigned
long)-1 */
z = BITS_PER_MP_LIMB - z;
/*
n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1} , n/2^i )^{i+1}
where 2^e || n! 3.2^z>n C_2(a,b)=PRODUCT of odd z such that a<z<=b
*/
for (j = 8 * sizeof(unsigned long) / 2; j != 0 ; j >>= 1)
{
mpz_set_ui (x, 1);
for (i = 8 * sizeof(unsigned long) - j; i >= j; i -= 2 * j)
if (z >= i)
{
mpz_odd_product (t, n >> i, n >> (i - 1), st);
if (i != j)
mpz_pow_ui (t, t, i / j);
mpz_mul (x, x, t);
}
if (z >= j && j != 1)
{
mpz_mul (t1, t1, x);
mpz_mul (t1, t1, t1);
}
}
for (i = 0; i < stt; i++)
mpz_clear (st[i]);
mpz_clear (t);
mpz_mul (x, x, t1);
mpz_clear (t1);
popc_limb (i, (mp_limb_t)n);
mpz_mul_2exp (x, x, n - i);
return;
}
/* FACMUL2 largest odd x such that x(x+2)<=2^BITS_PER_MP_LIMB-1
FACMUL3 largest odd x such that x(x+2)(x+4)<=2^BITS_PER_MP_LIMB-1
etc see code below
*/
#if BITS_PER_MP_LIMB == 32
#define FACMUL2 65535U
#define FACMUL3 1623
#define FACMUL4 253
#define FACMUL5 79
#define FACMUL6 35
#define FACMUL7 17
#define FACMUL8 9
#endif
#if BITS_PER_MP_LIMB == 64
#define FACMUL2 4294967295U
#define FACMUL3 2642243U
#define FACMUL4 65533U
#define FACMUL5 7127
#define FACMUL6 1619
#define FACMUL7 559
#define FACMUL8 249
#define FACMUL9 129
#define FACMUL10 75
#define FACMUL11 45
#endif
/* the product p=l*(l+2)*(l+4)*(l+6)*(l+8)*(l+10) uses 5 muls, 5 adds
could use z=8*(l+10)*l+61 , w=(8*l+36)*z-3301 ,
p=(w*(w+8*z+6602)+6613425)/4096 for op count of 3 muls, 3 shifts , 7
adds
see Knuth 4.6.4 adaptation of coeffs
*/
/*
x=product of all numbers z such that gcd(z,2)=1 and low < z <= high if none
in interval ret 1
*/
void
mpz_odd_product (mpz_t x, unsigned long low, unsigned long high,mpz_t* st)
{
unsigned long a = 1, b, stn = 0, m;
int c;
mp_limb_t l = low + 1 , p, pp = 0;
/*
to multiply n numbers together we need floor(lgbase2(n))+1 stack
entries
there are high-low-floor(high/2)+floor(low/2) odd numbers >low and
<=high
floor(lgbase2(n))=BITS_PER_MP_LIMB-count_leading_zeros(n)
note this is slightly over the top as we dont multiply by one and the
small
factors are multiplied together before putting on the stack and
we multiply in pairs before we put on stack
*/
/*
This can be used if we are allowed varible length arrays
a = high - low + (low >> 1) - (high >> 1);
ASSERT(sizeof (a) <= sizeof (mp_limb_t));
count_leading_zeros (stt, (mp_limb_t)a);
stt = BITS_PER_MP_LIMB - stt;
stt++;
mpz_t st[stt];
*/
if (l % 2 == 0) l++;
#if BITS_PER_MP_LIMB >= 64
if (high > 16)
{
m = high - 16;
if (m > FACMUL9)
m = FACMUL9;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
/* Hope the compiler can rearrage this as independant multiplies
like a
tree
l*(l+2) (l+4)*(l+6) (l+8)*(l+10) (l+12)*(l+14) (l+16) etc
or do some coeff adaptation , may be worth doing manually ?
*/
p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10) * (l +
12) * (l + 14) * (l + 16);
l += 18;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
if (high > 14)
{
m = high - 14;
if (m > FACMUL8)
m = FACMUL8;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10) * (l + 12)
* (l + 14);
l += 16;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
if (high > 12)
{
m = high - 12;
if (m > FACMUL7)
m = FACMUL7;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10) * (l + 12);
l += 14;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
#endif /* BITS_PER_MP_LIMB >= 64 */
if (high > 10)
{
m = high - 10;
if (m > FACMUL6)
m = FACMUL6;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10);
l += 12;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
if (high > 8)
{
m = high - 8;
if (m > FACMUL5)
m = FACMUL5;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8);
l += 10;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
if (high > 6)
{
m = high - 6;
if (m > FACMUL4)
m = FACMUL4;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l * (l + 2) * (l + 4) * (l + 6);
l += 8;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
if (high > 4)
{
m = high - 4;
if (m > FACMUL3)
m = FACMUL3;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l * (l + 2) * (l + 4);
l += 6;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
if (high > 2)
{
m = high - 2;
if (m > FACMUL2)
m = FACMUL2;
while(l <= m)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l * (l + 2);
l += 4;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
}
while(l <= high)
{
ASSERT(stn < 8 * sizeof(unsigned long) + 1);
p = l;
l += 2;
if (pp == 0)
{
pp = p;
continue;
}
umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
st[stn]->_mp_size = 2;
pp = 0;
stn++;
b = a++;
while( (b & 1) == 0)
{
mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
stn--;
b >>= 1;
}
}
if (stn == 0)
{
if (pp == 0)
mpz_set_ui (x, 1);
else
{
x->_mp_d[0] = pp;
x->_mp_size = 1;
}
}
else
{
if (pp == 0)
mpz_set (x, st[stn - 1]);
else
{
x->_mp_d[0] = pp;
x->_mp_size = 1;
mpz_mul (x, x, st[stn - 1]);
}
for (c = stn - 2; c >= 0; c--)
mpz_mul (x, x, st[c]);
}
return;
}
#if 0
/* PROGRAM TO GENERATE THE ABOVE CONSTANTS*/
#include <stdio.h>
#include <stdlib.h>
#include "gmp.h"
int main(void)
{mpz_t x,y,z,t;int a,b;
mpz_init(x);mpz_init(y);mpz_init(z);mpz_init(t);mpz_set_ui(y,1);
mpz_mul_2exp(y,y,__GMP_BITS_PER_MP_LIMB);mpz_sub_ui(y,y,1);
printf("/* Constants for __GMP_BITS_PER_MP_LIMB==%d
*/\n",__GMP_BITS_PER_MP_LIMB);
for(a=2;a<60;a++)
{mpz_root(x,y,a);if(mpz_even_p(x))mpz_add_ui(x,x,1);
do{mpz_add_ui(x,x,2);mpz_set_ui(z,1);mpz_set(t,x);
for(b=0;b<a;b++){mpz_mul(z,z,t);mpz_add_ui(t,t,2);}
}while(mpz_cmp(z,y)<=0);
do{mpz_sub_ui(x,x,2);mpz_set_ui(z,1);mpz_set(t,x);
for(b=0;b<a;b++){mpz_mul(z,z,t);mpz_add_ui(t,t,2);}
}while(mpz_cmp(z,y)>0);
printf("#define FACMUL%d ",a);mpz_out_str(stdout,10,x);printf("\n");}
return 0;}
#endif /* 0 */
int main(void)
{unsigned long i,a,c;mpz_t x,y;clock_t t;
mpz_init(x);mpz_init(y);
c=1;
for(i=5064;i<5065;i++)
{t=clock();for(a=0;a<c;a++)mpz_fac_ui(x,i);
printf("mpz_fac_ui is ");mpz_out_str(stdout,10,x);printf("\n");
t=clock();for(a=0;a<c;a++)new_mpz_fac_ui(y,i);
printf("new_fac_ui is ");mpz_out_str(stdout,10,y);printf("\n");
if(mpz_cmp(x,y)!=0){printf("\aerror %ld\n",i);}
}mpz_clear(x);mpz_clear(y);return 0;}
the ouput from mpz_fac_ui(x,5064) and new_mpz_fac_ui(y,5064) is the same but
the mpz_cmp(x,y) says they are different
Jason Moxham
address@hidden
- ABI=longlong on x86 linux,
jason <=