bug-gmp
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

ABI=longlong on x86 linux


From: jason
Subject: ABI=longlong on x86 linux
Date: Thu, 24 Jan 2002 14:33:39 +0000

In testing the new factorial code on x86 duron linux (slack8.0) with gmp ABI 
set to longlong , there appears to be an error in the mpz_cmp code 

Of course with gmp ABI as default there is no problem




gmp-4.0 configured with

./configure --host=none-pc-linux-gnu ABI=longlong
make

gcc is 2.95.3

program below compiled with

gcc -Wall -O3 facnew.c .libs/libgmp.a -o facnew

facnew.c is

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include "gmp.h"

#include "gmp-impl.h"
#include "longlong.h"



// x=product of all odd numbers z such that low < z <= high if none in 
interval ret 1
void    mpz_odd_product(mpz_t x,unsigned long low,unsigned long high,mpz_t*);
void    new_mpz_fac_ui(mpz_t x,unsigned long n);





/* a guess  , if defined then will use old factorial code otherwise can
remove the old code */
//#define FAC_THRESHOLD 400

/* define this to use smallish table */
//#define USESMALLTABLE


/*
code below for limb sizes of 32,64 bits only
for other limb sizes must calculate tables
and update code wherever there is a
BITS_PER_MP_LIMB
*/


#if BITS_PER_MP_LIMB != 32 && BITS_PER_MP_LIMB != 64
#error must fill tables etc for bits_per_mp_limb
#endif

void
new_mpz_fac_ui (mpz_ptr x, unsigned long n)
{

  unsigned long z,stt;
  int           i,j;
  mpz_t         t,t1;
  mpz_t         st[8 * sizeof(unsigned long) + 1];
  #ifdef USESMALLTABLE
  #if BITS_PER_MP_LIMB == 32
  static const mp_limb_t 
table[17]={1,1,1,3,3,15,45,315,315,2835,14175,155925,467775,\
                                    6081075,42567525,638512875,638512875};
  #endif
  #if BITS_PER_MP_LIMB == 64
  static const mp_limb_t 
table[26]={1,1,1,3,3,15,45,315,315,2835,14175,155925,467775,\
                                    
6081075,42567525,638512875,638512875,10854718875,\
                                    97692469875,1856156927625,9280784638125,\
                                    
194896477400625,2143861251406875,49308808782358125,\
                                    147926426347074375,3698160658676859375};
  #endif
  #endif /* USESMALLTABLE */
  
  #ifdef USESMALLTABLE
  #if BITS_PER_MP_LIMB == 32
  if (n < 17)
  #endif
  #if BITS_PER_MP_LIMB == 64
  if ( n < 26)
  #endif
    {
      /* as always have 1 limb allocated , cant use mpz_set_ui as table is 
mp_limb not ui */
      x -> _mp_d[0] = table[n];
      x -> _mp_size = 1;
      popc_limb (i, (mp_limb_t)n);
      mpz_mul_2exp (x, x, n - i);
      return;
    }
  #else 
  if(n<=1){mpz_set_ui(x,1);return;}
  if(n==2){mpz_set_ui(x,2);return;}
  #endif /* USESMALLTABLE */
  
  #ifdef FAC_THRESHOLD
  if (n <= FAC_THRESHOLD) /* CAN BE REMOVED */
    {
      old_mpz_fac_ui (x, n);
      return;
    }
  #endif /* FAC_THRESHOLD */
  
  /*  NOTE : MUST have n>=3 here */
  mpz_init (t);
  mpz_init_set_ui (t1,1);

        /*
          This can be used if we are allowed varible length arrays
          a = high - low + (low >> 1) - (high >> 1);
          ASSERT(sizeof (a) <= sizeof (mp_limb_t));
          count_leading_zeros (stt, (mp_limb_t)a);
          stt = BITS_PER_MP_LIMB - stt;
          stt++;
          mpz_t st[stt];
          worse case need floor(logbase2((n>>1)+1))+1 stack entrys
        */
  count_leading_zeros (stt, (mp_limb_t)((n>>1) + 1) );
  stt = BITS_PER_MP_LIMB - stt + 1;
  /* for safety use this        stt = 8 * sizeof(unsigned long) + 1; */
  /* Must allocate at  least 2 limb for the stack entrys */
  /* estimating space for the stack entrys is v. tricky */
  for (i = 0; i < stt; i++)
    mpz_init2 (st[i], BITS_PER_MP_LIMB * 2);
  count_leading_zeros (z, (mp_limb_t)(n / 3) );
  /* find z st 2^z>n/3   range for z is    1 <= z <= 8 * sizeof(unsigned 
long)-1 */
  z = BITS_PER_MP_LIMB - z;

  /*
  n! = 2^e * PRODUCT_{i=0}^{i=z-1} C_2( n/2^{i+1} , n/2^i )^{i+1}
  where 2^e || n!   3.2^z>n   C_2(a,b)=PRODUCT of odd z such that a<z<=b
  */
  
  for (j = 8 * sizeof(unsigned long) / 2; j != 0 ; j >>= 1)
    {
      mpz_set_ui (x, 1);
      for (i = 8 * sizeof(unsigned long) - j; i >= j; i -= 2 * j)
        if (z >= i)
          {
            mpz_odd_product (t, n >> i, n >> (i - 1), st);
            if (i != j)
              mpz_pow_ui (t, t, i / j);
            mpz_mul (x, x, t); 
          }
      if (z >= j && j != 1)
        {
          mpz_mul (t1, t1, x);
          mpz_mul (t1, t1, t1);
        }    
    }

  for (i = 0; i < stt; i++)
    mpz_clear (st[i]);
  mpz_clear (t);
  mpz_mul (x, x, t1);  
  mpz_clear (t1);
  popc_limb (i, (mp_limb_t)n);
  mpz_mul_2exp (x, x, n - i);
  return;
}


/*      FACMUL2 largest odd x such that x(x+2)<=2^BITS_PER_MP_LIMB-1
        FACMUL3 largest odd x such that x(x+2)(x+4)<=2^BITS_PER_MP_LIMB-1
        etc see code below
*/
#if BITS_PER_MP_LIMB == 32
#define FACMUL2         65535U
#define FACMUL3         1623
#define FACMUL4         253
#define FACMUL5         79
#define FACMUL6         35
#define FACMUL7         17
#define FACMUL8         9
#endif
#if BITS_PER_MP_LIMB == 64
#define FACMUL2         4294967295U
#define FACMUL3         2642243U
#define FACMUL4         65533U
#define FACMUL5         7127
#define FACMUL6         1619
#define FACMUL7         559
#define FACMUL8         249
#define FACMUL9         129
#define FACMUL10        75
#define FACMUL11        45
#endif
/*      the product p=l*(l+2)*(l+4)*(l+6)*(l+8)*(l+10) uses 5 muls, 5 adds
        could use z=8*(l+10)*l+61 , w=(8*l+36)*z-3301 , 
        p=(w*(w+8*z+6602)+6613425)/4096 for op count of 3 muls, 3 shifts , 7 
adds
        see Knuth 4.6.4 adaptation of coeffs
*/

/*
x=product of all numbers z such that gcd(z,2)=1 and low < z <= high if none 
in interval ret 1
*/
void
mpz_odd_product (mpz_t x, unsigned long low, unsigned long high,mpz_t* st)
{

  unsigned long a = 1, b, stn = 0, m;
  int           c;
  mp_limb_t     l = low + 1 , p, pp = 0;
  
        /*
         to multiply n numbers together we need floor(lgbase2(n))+1 stack 
entries
         there are high-low-floor(high/2)+floor(low/2) odd numbers >low and 
<=high
         floor(lgbase2(n))=BITS_PER_MP_LIMB-count_leading_zeros(n)
         note this is slightly over the top as we dont multiply by one and the 
small
         factors are multiplied together before putting on the stack and
         we multiply in pairs before we put on stack
        */
        /*
          This can be used if we are allowed varible length arrays
          a = high - low + (low >> 1) - (high >> 1);
          ASSERT(sizeof (a) <= sizeof (mp_limb_t));
          count_leading_zeros (stt, (mp_limb_t)a);
          stt = BITS_PER_MP_LIMB - stt;
          stt++;
          mpz_t st[stt];
        */
  if (l % 2 == 0) l++;
  #if BITS_PER_MP_LIMB >= 64
  if (high > 16)
    {
      m = high - 16;
      if (m > FACMUL9)
        m = FACMUL9;
      while(l <= m)
        {
           ASSERT(stn < 8 * sizeof(unsigned long) + 1);
           /* Hope the compiler can rearrage this as independant multiplies 
like a 
tree 
              l*(l+2) (l+4)*(l+6)   (l+8)*(l+10) (l+12)*(l+14) (l+16) etc 
              or do some coeff adaptation , may be worth doing manually ?
           */
           p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10) * (l + 
12) * (l + 14) * (l + 16);
           l += 18;
           if (pp == 0)
             {
               pp = p;
               continue;
             }
           umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
           st[stn]->_mp_size = 2;
           pp = 0;
           stn++;
           b = a++;
           while( (b & 1) == 0)
             {
               mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
               stn--;
               b >>= 1;
             }
        }
    }
  if (high > 14)
    {
      m = high - 14;
      if (m > FACMUL8)
        m = FACMUL8;
      while(l <= m)
        {
          ASSERT(stn < 8 * sizeof(unsigned long) + 1);
          p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10) * (l + 12) 
* (l + 14);
          l += 16;
          if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
          stn++;
          b = a++;
          while( (b & 1) == 0)
            {
              mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
              stn--;
              b >>= 1;
            }
        }
    }
  if (high > 12)
    {
      m = high - 12;
      if (m > FACMUL7)
        m = FACMUL7;
      while(l <= m)
        {
          ASSERT(stn < 8 * sizeof(unsigned long) + 1);
          p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10) * (l + 12);
          l += 14;
          if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
          stn++;
          b = a++;
          while( (b & 1) == 0)
            {
              mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
              stn--;
              b >>= 1;
            }
        }
    }
  #endif /* BITS_PER_MP_LIMB >= 64 */
  if (high > 10)
    {
      m = high - 10;
      if (m > FACMUL6)
        m = FACMUL6;
      while(l <= m)
        {
          ASSERT(stn < 8 * sizeof(unsigned long) + 1);
          p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8) * (l + 10);
          l += 12;
          if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
          stn++;
          b = a++;
          while( (b & 1) == 0)
            {
              mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
              stn--;
              b >>= 1;
            }
        }
    }
  if (high > 8)
    {
      m = high - 8;
      if (m > FACMUL5)
        m = FACMUL5;
      while(l <= m)
        {
          ASSERT(stn < 8 * sizeof(unsigned long) + 1);
          p = l * (l + 2) * (l + 4) * (l + 6) * (l + 8);
          l += 10;
          if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
          stn++;
          b = a++;
          while( (b & 1) == 0)
            {
              mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
              stn--;
              b >>= 1;
            }
        }
    }
  if (high > 6)
    {
      m = high - 6;
      if (m > FACMUL4)
        m = FACMUL4;
      while(l <= m)
        {
          ASSERT(stn < 8 * sizeof(unsigned long) + 1);
          p = l * (l + 2) * (l + 4) * (l + 6);
          l += 8;
          if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
          stn++;
          b = a++;
          while( (b & 1) == 0)
            {
              mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
              stn--;
              b >>= 1;
            }
        }
    }
  if (high > 4)
    {
      m = high - 4;
      if (m > FACMUL3)
        m = FACMUL3;
      while(l <= m)
        {
          ASSERT(stn < 8 * sizeof(unsigned long) + 1);
          p = l * (l + 2) * (l + 4);
          l += 6;
          if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
          stn++;
          b = a++;
          while( (b & 1) == 0)
            {
              mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
              stn--;
              b >>= 1;
            }
        }
    }
  if (high > 2)
    {
      m = high - 2;
      if (m > FACMUL2)
        m = FACMUL2;
      while(l <= m)
        {
          ASSERT(stn < 8 * sizeof(unsigned long) + 1);
          p = l * (l + 2);
          l += 4;
          if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
          stn++;
          b = a++;
          while( (b & 1) == 0)
            {
              mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
              stn--;
              b >>= 1;
            }
        }
    }
  while(l <= high)
    {
      ASSERT(stn < 8 * sizeof(unsigned long) + 1);
      p = l;
      l += 2;
      if (pp == 0)
            {
              pp = p;
              continue;
            }
          umul_ppmm (st[stn]->_mp_d[1], st[stn]->_mp_d[0], p, pp);
          st[stn]->_mp_size = 2;
          pp = 0;
      stn++;
      b = a++;
      while( (b & 1) == 0)
        {
          mpz_mul (st[stn - 2], st[stn - 1], st[stn - 2]);
          stn--;
          b >>= 1;
        }
    }
  if (stn == 0)
    {
      if (pp == 0)
        mpz_set_ui (x, 1);
      else
        {
          x->_mp_d[0] = pp;
          x->_mp_size = 1;
        }
    }
  else
    {
      if (pp == 0)
        mpz_set (x, st[stn - 1]);
      else
        {
          x->_mp_d[0] = pp;
          x->_mp_size = 1;
          mpz_mul (x, x, st[stn - 1]);
        }
      for (c = stn - 2; c >= 0; c--)
        mpz_mul (x, x, st[c]);
    }
  return;
}



#if 0
/* PROGRAM TO GENERATE THE ABOVE CONSTANTS*/

#include <stdio.h>
#include <stdlib.h>
#include "gmp.h"
int     main(void)
{mpz_t x,y,z,t;int a,b;

mpz_init(x);mpz_init(y);mpz_init(z);mpz_init(t);mpz_set_ui(y,1);
mpz_mul_2exp(y,y,__GMP_BITS_PER_MP_LIMB);mpz_sub_ui(y,y,1);
printf("/* Constants for __GMP_BITS_PER_MP_LIMB==%d 
*/\n",__GMP_BITS_PER_MP_LIMB);
for(a=2;a<60;a++)
   {mpz_root(x,y,a);if(mpz_even_p(x))mpz_add_ui(x,x,1);
    do{mpz_add_ui(x,x,2);mpz_set_ui(z,1);mpz_set(t,x);
       for(b=0;b<a;b++){mpz_mul(z,z,t);mpz_add_ui(t,t,2);}       
      }while(mpz_cmp(z,y)<=0);
    do{mpz_sub_ui(x,x,2);mpz_set_ui(z,1);mpz_set(t,x);
       for(b=0;b<a;b++){mpz_mul(z,z,t);mpz_add_ui(t,t,2);}       
      }while(mpz_cmp(z,y)>0);
    printf("#define FACMUL%d ",a);mpz_out_str(stdout,10,x);printf("\n");}
return 0;}
#endif /* 0 */


int     main(void)
{unsigned long i,a,c;mpz_t x,y;clock_t t;

mpz_init(x);mpz_init(y);
c=1;
for(i=5064;i<5065;i++)
   {t=clock();for(a=0;a<c;a++)mpz_fac_ui(x,i);
   printf("mpz_fac_ui is ");mpz_out_str(stdout,10,x);printf("\n");
   t=clock();for(a=0;a<c;a++)new_mpz_fac_ui(y,i);
   printf("new_fac_ui is ");mpz_out_str(stdout,10,y);printf("\n");
   if(mpz_cmp(x,y)!=0){printf("\aerror %ld\n",i);}
   }mpz_clear(x);mpz_clear(y);return 0;}


the ouput from mpz_fac_ui(x,5064) and new_mpz_fac_ui(y,5064) is the same but 
the mpz_cmp(x,y) says they are different


Jason Moxham
address@hidden



reply via email to

[Prev in Thread] Current Thread [Next in Thread]