octave-maintainers
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Speedup of random indesing of sparse matrices (comparison to scilab)


From: David Bateman
Subject: Re: Speedup of random indesing of sparse matrices (comparison to scilab)
Date: Tue, 09 Oct 2007 18:47:56 +0200
User-agent: Thunderbird 1.5.0.7 (X11/20060921)

Here is another minor speedup in the same patch. Essentially in the
Sparse<T>::assign function we access the lhs.data, lhs.ridx, lhs.cidx
and lhs.elem methods in numerous places on the RHS of expressions.
However, as lhs is not const this means that these methods will call the
make_unique function, which results is a significant overhead. Making a
const copy of lhs and using that everywhere on the RHS of expressions
can avoid the calls to make_unique.

To test the speed-up this might give, I ran the code

rand ("state", 1); n = 1e6; m = 1e3; a = sparse(n,n); i = ceil(n *
rand(1,m)); j = ceil(n * rand(1,m)); t0 = cputime(); a(i,j) = 1;
cputime() - t0

on Octave 2.9.14 and on Octave with the attached version of my patch the
speed in Octave 2.9.14 was 0.11198 seconds and with the patch was
0.071989 seconds, or about a 36% speedup.

For good measure I compared

rand ('state', 1); n = 1e5; m = 1e3; a = spalloc(n,n,m*m); i = ceil(n *
rand(1,m)); j = ceil(n * rand(1,m)); t0 = cputime(); a(i,j) = rand(m,m);
cputime() - t0

against Octave 2.9.14, Octave 2.9.14+ with the attached patch and
MatlabR2007a and got the results

Octave 2.9.14:     0.17897
Octave 2.9.14+:    0.17016
MatlabR2007a:    Did not complete after 10minutes

I then tried

rand ('state', 1); n = 5e4; m = 2e2; a = spalloc(n,n,m*m); i = ceil(n *
rand(1,m)); j = ceil(n * rand(1,m)); t0 = cputime(); a(i,j) = rand(m,m);
cputime() - t0

and got

Octave 2.9.14+:     0.018997
MatlabR2007a:    4.4200

Seems matlab also has some issues with random indexes :-), even though I
gave matlab the advantage with the spalloc function allowing matlab to
preallocate the space for the sparse matrix.

D.

-- 
David Bateman                                address@hidden
Motorola Labs - Paris                        +33 1 69 35 48 04 (Ph) 
Parc Les Algorithmes, Commune de St Aubin    +33 6 72 01 06 33 (Mob) 
91193 Gif-Sur-Yvette FRANCE                  +33 1 69 35 77 01 (Fax) 

The information contained in this communication has been classified as: 

[x] General Business Information 
[ ] Motorola Internal Use Only 
[ ] Motorola Confidential Proprietary

*** ./liboctave/Sparse.cc.orig24        2007-10-09 17:36:40.144592064 +0200
--- ./liboctave/Sparse.cc       2007-10-09 18:10:02.915207956 +0200
***************
*** 1811,1816 ****
--- 1811,1823 ----
    return retval;
  }
  
+ struct 
+ idx_node 
+ {
+   octave_idx_type i;
+   struct idx_node *next;
+ };                
+ 
  template <class T>
  Sparse<T>
  Sparse<T>::index (idx_vector& idx_i, idx_vector& idx_j, int resize_ok) const
***************
*** 1903,1908 ****
--- 1910,1917 ----
                      octave_idx_type jj = idx_j.elem (j);
                      for (octave_idx_type i = cidx(jj); i < cidx(jj+1); i++)
                        {
+                         OCTAVE_QUIT;
+ 
                          octave_idx_type ii = itmp [ridx(i)];
                          if (ii >= 0)
                            {
***************
*** 1919,1974 ****
                }
              else
                {
                  // First count the number of non-zero elements
                  octave_idx_type new_nzmx = 0;
                  for (octave_idx_type j = 0; j < m; j++)
                    {
                      octave_idx_type jj = idx_j.elem (j);
-                     for (octave_idx_type i = 0; i < n; i++)
-                       {
-                         OCTAVE_QUIT;
  
!                         octave_idx_type ii = idx_i.elem (i);
!                         if (ii < nr && jj < nc)
                            {
!                             for (octave_idx_type k = cidx(jj); k < 
cidx(jj+1); k++)
                                {
!                                 if (ridx(k) == ii)
!                                   new_nzmx++;
!                                 if (ridx(k) >= ii)
!                                   break;
                                }
                            }
                        }
                    }
  
                  retval = Sparse<T> (n, m, new_nzmx);
  
                  octave_idx_type kk = 0;
                  retval.xcidx(0) = 0;
                  for (octave_idx_type j = 0; j < m; j++)
                    {
                      octave_idx_type jj = idx_j.elem (j);
!                     for (octave_idx_type i = 0; i < n; i++)
                        {
!                         OCTAVE_QUIT;
! 
!                         octave_idx_type ii = idx_i.elem (i);
!                         if (ii < nr && jj < nc)
                            {
!                             for (octave_idx_type k = cidx(jj); k < 
cidx(jj+1); k++)
                                {
!                                 if (ridx(k) == ii)
                                    {
!                                     retval.xdata(kk) = data(k);
!                                     retval.xridx(kk++) = i;
                                    }
-                                 if (ridx(k) >= ii)
-                                   break;
                                }
                            }
                        }
-                     retval.xcidx(j+1) = kk;
                    }
                }
            }
--- 1928,2036 ----
                }
              else
                {
+                 OCTAVE_LOCAL_BUFFER (struct idx_node, nodes, n); 
+                 OCTAVE_LOCAL_BUFFER (octave_idx_type, start_nodes, nr); 
+ 
+                 for (octave_idx_type i = 0; i < nr; i++)
+                   start_nodes[i] = -1;
+ 
+                 for (octave_idx_type i = 0; i < n; i++)
+                   {
+                     octave_idx_type ii = idx_i.elem (i);
+                     nodes[i].i = i;
+                     nodes[i].next = 0;
+ 
+                     octave_idx_type node = start_nodes[ii];
+                     if (node == -1)
+                       start_nodes[ii] = i;
+                     else
+                       {
+                         struct idx_node inode = nodes[node];
+                         while (inode.next)
+                           inode = *inode.next;
+                         inode.next = nodes + i;
+                       }
+                   }
+ 
                  // First count the number of non-zero elements
                  octave_idx_type new_nzmx = 0;
                  for (octave_idx_type j = 0; j < m; j++)
                    {
                      octave_idx_type jj = idx_j.elem (j);
  
!                     if (jj < nc)
!                       {
!                         for (octave_idx_type i = cidx(jj); 
!                              i < cidx(jj+1); i++)
                            {
!                             OCTAVE_QUIT;
! 
!                             octave_idx_type ii = start_nodes [ridx(i)];
! 
!                             if (ii >= 0)
                                {
!                                 struct idx_node inode = nodes[ii];
!                             
!                                 while (true)
!                                   {
!                                     if (inode.i >= 0 && 
!                                         idx_i.elem (inode.i) < nc)
!                                       new_nzmx ++;
!                                     if (inode.next == 0)
!                                       break;
!                                     else
!                                       inode = *inode.next;
!                                   }
                                }
                            }
                        }
                    }
  
+                 std::vector<T> X (n);
                  retval = Sparse<T> (n, m, new_nzmx);
+                 octave_idx_type *ri = retval.xridx ();
  
                  octave_idx_type kk = 0;
                  retval.xcidx(0) = 0;
                  for (octave_idx_type j = 0; j < m; j++)
                    {
                      octave_idx_type jj = idx_j.elem (j);
!                     if (jj < nc)
                        {
!                         for (octave_idx_type i = cidx(jj); 
!                              i < cidx(jj+1); i++)
                            {
!                             OCTAVE_QUIT;
! 
!                             octave_idx_type ii = start_nodes [ridx(i)];
! 
!                             if (ii >= 0)
                                {
!                                 struct idx_node inode = nodes[ii];
!                             
!                                 while (true)
                                    {
!                                     if (inode.i >= 0 && 
!                                         idx_i.elem (inode.i) < nc)
!                                       {
!                                         X [inode.i] = data (i);
!                                         retval.xridx (kk++) = inode.i;
!                                       }
! 
!                                     if (inode.next == 0)
!                                       break;
!                                     else
!                                       inode = *inode.next;
                                    }
                                }
                            }
+                         sort.sort (ri + retval.xcidx (j), 
+                                    kk - retval.xcidx (j));
+                         for (octave_idx_type p = retval.xcidx (j); 
+                              p < kk; p++)
+                           retval.xdata (p) = X [retval.xridx (p)]; 
+                         retval.xcidx(j+1) = kk;
                        }
                    }
                }
            }
***************
*** 2396,2401 ****
--- 2458,2467 ----
    if (n_idx > 0)
      idx_i = tmp[0];
  
+   // Take a constant copy of lhs. This means that ridx and family won't 
+   // call make_unique.
+   const Sparse<LT> c_lhs (lhs);
+ 
    if (n_idx == 2)
      {
        octave_idx_type n = idx_i.freeze (lhs_nr, "row", true);
***************
*** 2453,2464 ****
                              
                                  if (ii < lhs_nr)
                                    {
!                                     for (octave_idx_type k = lhs.cidx(jj); 
!                                          k < lhs.cidx(jj+1); k++)
                                        {
!                                         if (lhs.ridx(k) == ii)
                                            new_nzmx--;
!                                         if (lhs.ridx(k) >= ii)
                                            break;
                                        }
                                    }
--- 2519,2530 ----
                              
                                  if (ii < lhs_nr)
                                    {
!                                     for (octave_idx_type k = c_lhs.cidx(jj); 
!                                          k < c_lhs.cidx(jj+1); k++)
                                        {
!                                         if (c_lhs.ridx(k) == ii)
                                            new_nzmx--;
!                                         if (c_lhs.ridx(k) >= ii)
                                            break;
                                        }
                                    }
***************
*** 2483,2493 ****
                              octave_idx_type ii = idx_i.elem (iii);
                              octave_idx_type ppp = 0;
                              octave_idx_type ppi = (j >= lhs_nc ? 0 : 
!                                                    lhs.cidx(j+1) - 
!                                                    lhs.cidx(j));
                              octave_idx_type pp = (ppp < ppi ? 
!                                                   lhs.ridx(lhs.cidx(j)+ppp) :
!                                                   new_nr);
                              while (ppp < ppi || iii < n)
                                {
                                  if (iii < n && ii <= pp)
--- 2549,2559 ----
                              octave_idx_type ii = idx_i.elem (iii);
                              octave_idx_type ppp = 0;
                              octave_idx_type ppi = (j >= lhs_nc ? 0 : 
!                                                    c_lhs.cidx(j+1) - 
!                                                    c_lhs.cidx(j));
                              octave_idx_type pp = (ppp < ppi ? 
!                                       c_lhs.ridx(c_lhs.cidx(j)+ppp) :
!                                       new_nr);
                              while (ppp < ppi || iii < n)
                                {
                                  if (iii < n && ii <= pp)
***************
*** 2498,2525 ****
                                          stmp.ridx(kk++) = ii;
                                        }
                                      if (ii == pp)
!                                       pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);                                        
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
                                  else
                                    {
                                      stmp.data(kk) = 
!                                       lhs.data(lhs.cidx(j)+ppp);
                                      stmp.ridx(kk++) = pp;
!                                     pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);
                                    }
                                }
                              if (++jji < m)
                                jj = idx_j.elem(jji);
                            }
!                         else if (j < lhs.cols()) 
                            {
!                             for (octave_idx_type i = lhs.cidx(j); 
!                                  i < lhs.cidx(j+1); i++)
                                {
!                                 stmp.data(kk) = lhs.data(i);
!                                 stmp.ridx(kk++) = lhs.ridx(i);
                                }
                            }
                          stmp.cidx(j+1) = kk;
--- 2564,2591 ----
                                          stmp.ridx(kk++) = ii;
                                        }
                                      if (ii == pp)
!                                       pp = (++ppp < ppi ? 
c_lhs.ridx(c_lhs.cidx(j)+ppp) : new_nr);                                    
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
                                  else
                                    {
                                      stmp.data(kk) = 
!                                       c_lhs.data(c_lhs.cidx(j)+ppp);
                                      stmp.ridx(kk++) = pp;
!                                     pp = (++ppp < ppi ? 
c_lhs.ridx(c_lhs.cidx(j)+ppp) : new_nr);
                                    }
                                }
                              if (++jji < m)
                                jj = idx_j.elem(jji);
                            }
!                         else if (j < lhs_nc) 
                            {
!                             for (octave_idx_type i = c_lhs.cidx(j); 
!                                  i < c_lhs.cidx(j+1); i++)
                                {
!                                 stmp.data(kk) = c_lhs.data(i);
!                                 stmp.ridx(kk++) = c_lhs.ridx(i);
                                }
                            }
                          stmp.cidx(j+1) = kk;
***************
*** 2636,2646 ****
                              octave_idx_type ii = idx_i.elem (iii);
                              octave_idx_type ppp = 0;
                              octave_idx_type ppi = (j >= lhs_nc ? 0 : 
!                                                    lhs.cidx(j+1) - 
!                                                    lhs.cidx(j));
                              octave_idx_type pp = (ppp < ppi ? 
!                                                   lhs.ridx(lhs.cidx(j)+ppp) :
!                                                   new_nr);
                              while (ppp < ppi || iii < n)
                                {
                                  if (iii < n && ii <= pp)
--- 2702,2712 ----
                              octave_idx_type ii = idx_i.elem (iii);
                              octave_idx_type ppp = 0;
                              octave_idx_type ppi = (j >= lhs_nc ? 0 : 
!                                                    c_lhs.cidx(j+1) - 
!                                                    c_lhs.cidx(j));
                              octave_idx_type pp = (ppp < ppi ? 
!                                               c_lhs.ridx(c_lhs.cidx(j)+ppp) :
!                                               new_nr);
                              while (ppp < ppi || iii < n)
                                {
                                  if (iii < n && ii <= pp)
***************
*** 2653,2680 ****
                                          stmp.ridx(kk++) = ii;
                                        }
                                      if (ii == pp)
!                                       pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);                                        
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
                                  else
                                    {
                                      stmp.data(kk) = 
!                                       lhs.data(lhs.cidx(j)+ppp);
                                      stmp.ridx(kk++) = pp;
!                                     pp = (++ppp < ppi ? 
lhs.ridx(lhs.cidx(j)+ppp) : new_nr);
                                    }
                                }
                              if (++jji < m)
                                jj = idx_j.elem(jji);
                            }
!                         else if (j < lhs.cols()) 
                            {
!                             for (octave_idx_type i = lhs.cidx(j); 
!                                  i < lhs.cidx(j+1); i++)
                                {
!                                 stmp.data(kk) = lhs.data(i);
!                                 stmp.ridx(kk++) = lhs.ridx(i);
                                }
                            }
                          stmp.cidx(j+1) = kk;
--- 2719,2746 ----
                                          stmp.ridx(kk++) = ii;
                                        }
                                      if (ii == pp)
!                                       pp = (++ppp < ppi ? 
c_lhs.ridx(c_lhs.cidx(j)+ppp) : new_nr);                                    
                                      if (++iii < n)
                                        ii = idx_i.elem(iii);
                                    }
                                  else
                                    {
                                      stmp.data(kk) = 
!                                       c_lhs.data(c_lhs.cidx(j)+ppp);
                                      stmp.ridx(kk++) = pp;
!                                     pp = (++ppp < ppi ? 
c_lhs.ridx(c_lhs.cidx(j)+ppp) : new_nr);
                                    }
                                }
                              if (++jji < m)
                                jj = idx_j.elem(jji);
                            }
!                         else if (j < lhs_nc) 
                            {
!                             for (octave_idx_type i = c_lhs.cidx(j); 
!                                  i < c_lhs.cidx(j+1); i++)
                                {
!                                 stmp.data(kk) = c_lhs.data(i);
!                                 stmp.ridx(kk++) = c_lhs.ridx(i);
                                }
                            }
                          stmp.cidx(j+1) = kk;
***************
*** 2795,2804 ****
  
          if (idx_i)
            {
-             // Take a constant copy of lhs. This means that elem won't 
-             // create missing elements.
-             const Sparse<LT> c_lhs (lhs);
- 
              if (rhs_nr == 0 && rhs_nc == 0)
                lhs.maybe_delete_elements (idx_i);
              else if (len == 0)
--- 2861,2866 ----
2007-10-09  David Bateman  <address@hidden>

        * Sparse.cc (Sparse<T> Sparse<T>::index (idx_vector&, idx_vector&,
        int)): Remove a for loop in the random indexing case at the
        expense of maintaining a set of linked lists of indices that point 
        to the same column in the original matrix.
        (int assign (Sparse<LT>&, Sparse<RT>)): Take a const copy of lhs
        and use it on the RHS of expressions to avoid unnecessary calls to
        make_unique.

reply via email to

[Prev in Thread] Current Thread [Next in Thread]