octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Patch for sparse indexed assignment


From: David Bateman
Subject: Patch for sparse indexed assignment
Date: Wed, 12 Apr 2006 22:37:00 +0200
User-agent: Mozilla Thunderbird 1.0.6-7.5.20060mdk (X11/20050322)

Here is an optimization of sparse indexed assignments like s(p,:)=1 or
s(p,1)=sprandn( length(p),1) which makes them significantly faster...

D.

        * Sparse.cc (assign (Sparse<LT>&, const Sparse<RT>&)): Optimize
        assignment.
 

-- 
David Bateman                                address@hidden
Motorola Labs - Paris                        +33 1 69 35 48 04 (Ph) 
Parc Les Algorithmes, Commune de St Aubin    +33 6 72 01 06 33 (Mob) 
91193 Gif-Sur-Yvette FRANCE                  +33 1 69 35 77 01 (Fax) 

The information contained in this communication has been classified as: 

[x] General Business Information 
[ ] Motorola Internal Use Only 
[ ] Motorola Confidential Proprietary

*** liboctave/Sparse.cc.orig    2006-04-12 14:19:49.949293513 +0200
--- liboctave/Sparse.cc 2006-04-12 21:56:16.706420952 +0200
***************
*** 2492,2524 ****
                            {
                              octave_idx_type iii = 0;
                              octave_idx_type ii = idx_i.elem (iii);
!                             for (octave_idx_type i = 0; i < new_nr; i++)
                                {
!                                 OCTAVE_QUIT;
! 
!                                 if (iii < n && ii == i)
                                    {
                                      if (scalar != RT ())
                                        {
                                          stmp.data(kk) = scalar;
!                                         stmp.ridx(kk++) = i;
                                        }
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
!                                 else if (j < lhs.cols()) 
                                    {
!                                     for (octave_idx_type k = lhs.cidx(j); 
!                                          k < lhs.cidx(j+1); k++)
!                                       {
!                                         if (lhs.ridx(k) == i)
!                                           {
!                                             stmp.data(kk) = lhs.data(k);
!                                             stmp.ridx(kk++) = i;
!                                           }
!                                         if (lhs.ridx(k) >= i)
!                                           break;
!                                       }
                                    }
                                }
                              if (++jji < m)
--- 2492,2523 ----
                            {
                              octave_idx_type iii = 0;
                              octave_idx_type ii = idx_i.elem (iii);
!                             octave_idx_type ppp = 0;
!                             octave_idx_type ppi = lhs.cidx(j+1) - 
!                               lhs.cidx(j); 
!                             octave_idx_type pp = (ppp < ppi ? 
!                                                   lhs.ridx(lhs.cidx(j)+ppp) :
!                                                   new_nr);
!                             while (ppp < ppi || iii < n)
                                {
!                                 if (iii < n && ii <= pp)
                                    {
                                      if (scalar != RT ())
                                        {
                                          stmp.data(kk) = scalar;
!                                         stmp.ridx(kk++) = ii;
                                        }
+                                     if (ii == pp)
+                                       pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);                                        
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
!                                 else
                                    {
!                                     stmp.data(kk) = 
!                                       lhs.data(lhs.cidx(j)+ppp);
!                                     stmp.ridx(kk++) = pp;
!                                     pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);
                                    }
                                }
                              if (++jji < m)
***************
*** 2630,2665 ****
                        for (octave_idx_type i = 0; i < m; i++)
                          rhs_idx_j[i] = i;
  
!                     // Count the number of non-zero terms
!                     octave_idx_type new_nzmx = lhs.nnz ();
!                     for (octave_idx_type j = 0; j < m; j++)
!                       {
!                         octave_idx_type jj = idx_j.elem (j);
!                         for (octave_idx_type i = 0; i < n; i++)
!                           {
!                             OCTAVE_QUIT;
! 
!                             if (jj < lhs_nc)
!                               {
!                                 octave_idx_type ii = idx_i.elem (i);
!                             
!                                 if (ii < lhs_nr)
!                                   {
!                                     for (octave_idx_type k = lhs.cidx(jj); 
!                                          k < lhs.cidx(jj+1); k++)
!                                       {
!                                         if (lhs.ridx(k) == ii)
!                                           new_nzmx--;
!                                         if (lhs.ridx(k) >= ii)
!                                           break;
!                                       }
!                                   }
!                               }
!                             
!                             if (rhs.elem(rhs_idx_i[i],rhs_idx_j[j]) != RT ())
!                               new_nzmx++;
!                           }
!                       }
  
                      Sparse<LT> stmp (new_nr, new_nc, new_nzmx);
  
--- 2629,2636 ----
                        for (octave_idx_type i = 0; i < m; i++)
                          rhs_idx_j[i] = i;
  
!                     // Maximum number of non-zero elements
!                     octave_idx_type new_nzmx = lhs.nnz() + rhs.nnz();
  
                      Sparse<LT> stmp (new_nr, new_nc, new_nzmx);
  
***************
*** 2673,2707 ****
                            {
                              octave_idx_type iii = 0;
                              octave_idx_type ii = idx_i.elem (iii);
!                             for (octave_idx_type i = 0; i < new_nr; i++)
                                {
!                                 OCTAVE_QUIT;
! 
!                                 if (iii < n && ii == i)
                                    {
                                      RT rtmp = rhs.elem (rhs_idx_i[iii], 
                                                          rhs_idx_j[jji]);
                                      if (rtmp != RT ())
                                        {
                                          stmp.data(kk) = rtmp;
!                                         stmp.ridx(kk++) = i;
                                        }
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
!                                 else if (j < lhs.cols()) 
                                    {
!                                     for (octave_idx_type k = lhs.cidx(j); 
!                                          k < lhs.cidx(j+1); k++)
!                                       {
!                                         if (lhs.ridx(k) == i)
!                                           {
!                                             stmp.data(kk) = lhs.data(k);
!                                             stmp.ridx(kk++) = i;
!                                           }
!                                         if (lhs.ridx(k) >= i)
!                                           break;
!                                       }
                                    }
                                }
                              if (++jji < m)
--- 2644,2677 ----
                            {
                              octave_idx_type iii = 0;
                              octave_idx_type ii = idx_i.elem (iii);
!                             octave_idx_type ppp = 0;
!                             octave_idx_type ppi = lhs.cidx(j+1) -
!                               lhs.cidx(j);
!                             octave_idx_type pp = (ppp < ppi ? 
!                                                   lhs.ridx(lhs.cidx(j)+ppp) :
!                                                   new_nr);
!                             while (ppp < ppi || iii < n)
                                {
!                                 if (iii < n && ii <= pp)
                                    {
                                      RT rtmp = rhs.elem (rhs_idx_i[iii], 
                                                          rhs_idx_j[jji]);
                                      if (rtmp != RT ())
                                        {
                                          stmp.data(kk) = rtmp;
!                                         stmp.ridx(kk++) = ii;
                                        }
+                                     if (ii == pp)
+                                       pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);                                        
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
!                                 else
                                    {
!                                     stmp.data(kk) = 
!                                       lhs.data(lhs.cidx(j)+ppp);
!                                     stmp.ridx(kk++) = pp;
!                                     pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);
                                    }
                                }
                              if (++jji < m)
***************
*** 2719,2724 ****
--- 2689,2695 ----
                          stmp.cidx(j+1) = kk;
                        }
  
+                     stmp.maybe_compress();
                      lhs = stmp;
                    }
                }

reply via email to

[Prev in Thread] Current Thread [Next in Thread]