[Changeset] Re: Faster Array transpose

octave-maintainers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Changeset] Re: Faster Array transpose

From:	David Bateman
Subject:	[Changeset] Re: Faster Array transpose
Date:	Mon, 05 May 2008 00:36:02 +0200
User-agent:	Thunderbird 2.0.0.12 (X11/20080306)

David Bateman wrote:
> I think the code below is a good compromise for this function. Sorry I
> can't easily create a changeset as thare are other uncomitted changes in
> Array.cc in by repository at the moment
> 
> D.

It wasn't correct.. I've written this up in my attempt to clean up the
Array class for the integration of the single precision type.. The
Changeset is attached, but I suspect it might not apply without the rest
of the single precision changes (which I consider pretty much complete
in themselves). With this changes the transpose and hermitian methods go
into the Array class, with only a one specialization for the DiagArray2
class. With this change "make check" passes correctly and the speed
difference with the bench mark

N = [128, 129, 1024, 1025, 2048, 2049, 4096, 4097];
nruns = 10;

t = zeros (1, length (N));
for i = 1: length (N)
  A = 1i*randn (N(i), N(i));

  for j = 1: nruns
    t0 = cputime ();
    B = A';
    t(i) = t(i) + cputime() - t0;
  endfor
  t (i) = t (i) ./ nruns;

  printf("N = %4d, time = %g sec\n", N(i), t(i));
  fflush (stdout);
endfor

I got previous without the patch

N =  128, time = 0.0016666 sec
N =  129, time = 0.0003333 sec
N = 1024, time = 0.0439971 sec
N = 1025, time = 0.0336645 sec
N = 2048, time = 0.276649 sec
N = 2049, time = 0.172656 sec
N = 4096, time = 1.19492 sec
N = 4097, time = 0.700954 sec

and with it

N =  128, time = 0 sec
N =  129, time = 0 sec
N = 1024, time = 0.0319979 sec
N = 1025, time = 0.0243318 sec
N = 2048, time = 0.170655 sec
N = 2049, time = 0.117326 sec
N = 4096, time = 0.745285 sec
N = 4097, time = 0.558964 sec

and similar improvements for the hermitian operator.

D.

# HG changeset patch
# User David Bateman <address@hidden>
# Date 1209938758 -7200
# Node ID 5ca24a40998f84f8d693aa8baeb3b85b1b06933c
# Parent  2442bbe5a6932984c8e9ec7378954f4e218e621d
Cache optimized hermitian/transpose methods

diff --git a/liboctave/Array.cc b/liboctave/Array.cc
--- a/liboctave/Array.cc
+++ b/liboctave/Array.cc
@@ -1203,7 +1203,48 @@ Array<T>::transpose (void) const
   octave_idx_type nr = dim1 ();
   octave_idx_type nc = dim2 ();
 
-  if (nr > 1 && nc > 1)
+  if (nr >= 8 && nc >= 8)
+    {
+      Array<T> result (dim_vector (nc, nr));
+
+      // Blocked transpose to attempt to avoid cache misses.
+
+      // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool
+      // on some compilers.
+      T buf[64];
+
+      octave_idx_type ii = 0, jj;
+      for (jj = 0; jj < (nc - 8 + 1); jj += 8)
+       {
+         for (ii = 0; ii < (nr - 8 + 1); ii += 8)
+           {
+             // Copy to buffer
+             for (octave_idx_type j = jj, k = 0, idxj = jj * nr; 
+                  j < jj + 8; j++, idxj += nr)
+               for (octave_idx_type i = ii; i < ii + 8; i++)
+                 buf[k++] = xelem (i + idxj);
+
+             // Copy from buffer
+             for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8; 
+                  i++, idxi += nc)
+               for (octave_idx_type j = jj, k = i - ii; j < jj + 8; 
+                    j++, k+=8)
+                 result.xelem (j + idxi) = buf[k];
+           }
+
+         if (ii < nr)
+           for (octave_idx_type j = jj; j < jj + 8; j++)
+             for (octave_idx_type i = ii; i < nr; i++)
+               result.xelem (j, i) = xelem (i, j);
+       } 
+
+      for (octave_idx_type j = jj; j < nc; j++)
+       for (octave_idx_type i = 0; i < nr; i++)
+         result.xelem (j, i) = xelem (i, j);
+
+      return result;
+    }
+  else if (nr > 1 && nc > 1)
     {
       Array<T> result (dim_vector (nc, nr));
 
@@ -1217,6 +1258,68 @@ Array<T>::transpose (void) const
     {
       // Fast transpose for vectors and empty matrices
       return Array<T> (*this, dim_vector (nc, nr));
+    }
+}
+
+template <class T>
+Array<T>
+Array<T>::hermitian (T (*fcn) (const T&)) const
+{
+  assert (ndims () == 2);
+
+  octave_idx_type nr = dim1 ();
+  octave_idx_type nc = dim2 ();
+
+  if (nr >= 8 && nc >= 8)
+    {
+      Array<T> result (dim_vector (nc, nr));
+
+      // Blocked transpose to attempt to avoid cache misses.
+
+      // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool
+      // on some compilers.
+      T buf[64];
+
+      octave_idx_type ii = 0, jj;
+      for (jj = 0; jj < (nc - 8 + 1); jj += 8)
+       {
+         for (ii = 0; ii < (nr - 8 + 1); ii += 8)
+           {
+             // Copy to buffer
+             for (octave_idx_type j = jj, k = 0, idxj = jj * nr; 
+                  j < jj + 8; j++, idxj += nr)
+               for (octave_idx_type i = ii; i < ii + 8; i++)
+                 buf[k++] = xelem (i + idxj);
+
+             // Copy from buffer
+             for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8; 
+                  i++, idxi += nc)
+               for (octave_idx_type j = jj, k = i - ii; j < jj + 8; 
+                    j++, k+=8)
+                 result.xelem (j + idxi) = fcn (buf[k]);
+           }
+
+         if (ii < nr)
+           for (octave_idx_type j = jj; j < jj + 8; j++)
+             for (octave_idx_type i = ii; i < nr; i++)
+               result.xelem (j, i) = fcn (xelem (i, j));
+       } 
+
+      for (octave_idx_type j = jj; j < nc; j++)
+       for (octave_idx_type i = 0; i < nr; i++)
+         result.xelem (j, i) = fcn (xelem (i, j));
+
+      return result;
+    }
+  else
+    {
+      Array<T> result (dim_vector (nc, nr));
+
+      for (octave_idx_type j = 0; j < nc; j++)
+       for (octave_idx_type i = 0; i < nr; i++)
+         result.xelem (j, i) = fcn (xelem (i, j));
+
+      return result;
     }
 }
 
diff --git a/liboctave/Array.h b/liboctave/Array.h
--- a/liboctave/Array.h
+++ b/liboctave/Array.h
@@ -461,6 +461,7 @@ public:
   bool is_empty (void) const { return numel () == 0; }
 
   Array<T> transpose (void) const;
+  Array<T> hermitian (T (*fcn) (const T&) = 0) const;
 
   const T *data (void) const { return rep->data; }
 
diff --git a/liboctave/Array2.h b/liboctave/Array2.h
--- a/liboctave/Array2.h
+++ b/liboctave/Array2.h
@@ -109,6 +109,12 @@ public:
       return Array2<T> (tmp, tmp.rows (), tmp.columns ());
     }
 
+  Array2<T> hermitian (T (*fcn) (const T&) = 0) const
+    {
+      Array<T> tmp = Array<T>::hermitian (fcn);
+      return Array2<T> (tmp, tmp.rows (), tmp.columns ());
+    }
+
   Array2<T> index (idx_vector& i, int resize_ok = 0,
                   const T& rfv = resize_fill_value (T ())) const
     {
diff --git a/liboctave/ArrayN.h b/liboctave/ArrayN.h
--- a/liboctave/ArrayN.h
+++ b/liboctave/ArrayN.h
@@ -102,6 +102,7 @@ public:
   ArrayN<T> squeeze (void) const { return Array<T>::squeeze (); }
 
   ArrayN<T> transpose (void) const { return Array<T>::transpose (); }
+  ArrayN<T> hermitian (T (*fcn) (const T&) = 0) const { return 
Array<T>::hermitian (fcn); }
 
   ArrayN<T>& insert (const ArrayN<T>& a, const dim_vector& dv)
     {
diff --git a/liboctave/CColVector.cc b/liboctave/CColVector.cc
--- a/liboctave/CColVector.cc
+++ b/liboctave/CColVector.cc
@@ -221,17 +221,16 @@ ComplexColumnVector::stack (const Comple
   return retval;
 }
 
-ComplexRowVector
+ComplexRowVector 
 ComplexColumnVector::hermitian (void) const
-{
-  octave_idx_type len = length ();
-  return ComplexRowVector (mx_inline_conj_dup (data (), len), len);
+{ 
+  return MArray<Complex>::hermitian (std::conj);
 }
 
 ComplexRowVector
 ComplexColumnVector::transpose (void) const
 {
-  return ComplexRowVector (*this);
+  return MArray<Complex>::transpose ();
 }
 
 ComplexColumnVector
diff --git a/liboctave/CColVector.h b/liboctave/CColVector.h
--- a/liboctave/CColVector.h
+++ b/liboctave/CColVector.h
@@ -72,7 +72,7 @@ public:
   ComplexColumnVector stack (const ColumnVector& a) const;
   ComplexColumnVector stack (const ComplexColumnVector& a) const;
 
-  ComplexRowVector hermitian (void) const;  // complex conjugate transpose.
+  ComplexRowVector hermitian (void) const;
   ComplexRowVector transpose (void) const;
 
   friend ComplexColumnVector conj (const ComplexColumnVector& a);
diff --git a/liboctave/CDiagMatrix.cc b/liboctave/CDiagMatrix.cc
--- a/liboctave/CDiagMatrix.cc
+++ b/liboctave/CDiagMatrix.cc
@@ -230,20 +230,6 @@ ComplexDiagMatrix::fill (const ComplexRo
     elem (i+beg, i+beg) = a.elem (i);
 
   return *this;
-}
-
-ComplexDiagMatrix
-ComplexDiagMatrix::hermitian (void) const
-{
-  return ComplexDiagMatrix (mx_inline_conj_dup (data (), length ()),
-                           cols (), rows ());
-}
-
-ComplexDiagMatrix
-ComplexDiagMatrix::transpose (void) const
-{
-  return ComplexDiagMatrix (mx_inline_dup (data (), length ()),
-                           cols (), rows ());
 }
 
 ComplexDiagMatrix
diff --git a/liboctave/CDiagMatrix.h b/liboctave/CDiagMatrix.h
--- a/liboctave/CDiagMatrix.h
+++ b/liboctave/CDiagMatrix.h
@@ -87,8 +87,8 @@ public:
   ComplexDiagMatrix& fill (const RowVector& a, octave_idx_type beg);
   ComplexDiagMatrix& fill (const ComplexRowVector& a, octave_idx_type beg);
 
-  ComplexDiagMatrix hermitian (void) const;  // complex conjugate transpose
-  ComplexDiagMatrix transpose (void) const;
+  ComplexDiagMatrix hermitian (void) const { return 
MDiagArray2<Complex>::hermitian (std::conj); }
+  ComplexDiagMatrix transpose (void) const { return 
MDiagArray2<Complex>::transpose(); }
 
   friend ComplexDiagMatrix conj (const ComplexDiagMatrix& a);
 
diff --git a/liboctave/CMatrix.cc b/liboctave/CMatrix.cc
--- a/liboctave/CMatrix.cc
+++ b/liboctave/CMatrix.cc
@@ -882,22 +882,6 @@ ComplexMatrix::stack (const ComplexDiagM
   retval.insert (*this, 0, 0);
   retval.insert (a, nr_insert, 0);
   return retval;
-}
-
-ComplexMatrix
-ComplexMatrix::hermitian (void) const
-{
-  octave_idx_type nr = rows ();
-  octave_idx_type nc = cols ();
-  ComplexMatrix result;
-  if (length () > 0)
-    {
-      result.resize (nc, nr);
-      for (octave_idx_type j = 0; j < nc; j++)
-       for (octave_idx_type i = 0; i < nr; i++)
-         result.elem (j, i) = conj (elem (i, j));
-    }
-  return result;
 }
 
 ComplexMatrix
diff --git a/liboctave/CMatrix.h b/liboctave/CMatrix.h
--- a/liboctave/CMatrix.h
+++ b/liboctave/CMatrix.h
@@ -126,7 +126,8 @@ public:
   ComplexMatrix stack (const ComplexColumnVector& a) const;
   ComplexMatrix stack (const ComplexDiagMatrix& a) const;
 
-  ComplexMatrix hermitian (void) const;  // complex conjugate transpose
+  ComplexMatrix hermitian (void) const
+    { return MArray2<Complex>::hermitian (std::conj); }
   ComplexMatrix transpose (void) const
     { return MArray2<Complex>::transpose (); }
 
diff --git a/liboctave/CRowVector.cc b/liboctave/CRowVector.cc
--- a/liboctave/CRowVector.cc
+++ b/liboctave/CRowVector.cc
@@ -227,14 +227,13 @@ ComplexColumnVector
 ComplexColumnVector
 ComplexRowVector::hermitian (void) const
 {
-  octave_idx_type len = length ();
-  return ComplexColumnVector (mx_inline_conj_dup (data (), len), len);
+  return MArray<Complex>::hermitian (std::conj);
 }
 
 ComplexColumnVector
 ComplexRowVector::transpose (void) const
 {
-  return ComplexColumnVector (*this);
+  return MArray<Complex>::transpose ();
 }
 
 ComplexRowVector
diff --git a/liboctave/CRowVector.h b/liboctave/CRowVector.h
--- a/liboctave/CRowVector.h
+++ b/liboctave/CRowVector.h
@@ -70,7 +70,7 @@ public:
   ComplexRowVector append (const RowVector& a) const;
   ComplexRowVector append (const ComplexRowVector& a) const;
 
-  ComplexColumnVector hermitian (void) const;  // complex conjugate transpose.
+  ComplexColumnVector hermitian (void) const;
   ComplexColumnVector transpose (void) const;
 
   friend ComplexRowVector conj (const ComplexRowVector& a);
diff --git a/liboctave/DiagArray2.cc b/liboctave/DiagArray2.cc
--- a/liboctave/DiagArray2.cc
+++ b/liboctave/DiagArray2.cc
@@ -33,6 +33,27 @@ along with Octave; see the file COPYING.
 #include "DiagArray2.h"
 
 #include "lo-error.h"
+
+template <class T>
+DiagArray2<T>
+DiagArray2<T>::transpose (void) const
+{
+  DiagArray2<T> retval (*this);
+  retval.dimensions = dim_vector (this->dim2 (), this->dim1 ());
+  return retval;
+}
+
+template <class T>
+DiagArray2<T>
+DiagArray2<T>::hermitian (T (* fcn) (const T&)) const
+{
+  DiagArray2<T> retval (this->dim2 (), this->dim1 ());
+  const T *p = this->data ();
+  T *q = retval.fortran_vec ();
+  for (octave_idx_type i = 0; i < this->length (); i++)
+    q [i] = fcn (p [i]);
+  return retval;
+}
 
 // A two-dimensional array with diagonal elements only.
 
diff --git a/liboctave/DiagArray2.h b/liboctave/DiagArray2.h
--- a/liboctave/DiagArray2.h
+++ b/liboctave/DiagArray2.h
@@ -180,6 +180,9 @@ public:
   void resize (octave_idx_type n, octave_idx_type m, const T& val);
 
   void maybe_delete_elements (idx_vector& i, idx_vector& j);
+
+  DiagArray2<T> transpose (void) const;
+  DiagArray2<T> hermitian (T (*fcn) (const T&) = 0) const;
 };
 
 #endif
diff --git a/liboctave/MArray.h b/liboctave/MArray.h
--- a/liboctave/MArray.h
+++ b/liboctave/MArray.h
@@ -63,6 +63,9 @@ public:
       return *this;
     }
 
+  MArray<T> transpose (void) const { return Array<T>::transpose (); }
+  MArray<T> hermitian (T (*fcn) (const T&) = 0) const { return 
Array<T>::hermitian (fcn); }
+
   octave_idx_type nnz (void) const
     {
       octave_idx_type retval = 0;
diff --git a/liboctave/MArray2.h b/liboctave/MArray2.h
--- a/liboctave/MArray2.h
+++ b/liboctave/MArray2.h
@@ -80,6 +80,7 @@ public:
   }
 
   MArray2<T> transpose (void) const { return Array2<T>::transpose (); }
+  MArray2<T> hermitian (T (*fcn) (const T&) = 0) const { return 
Array2<T>::hermitian (fcn); }
 
   MArray2<T> diag (octave_idx_type k) const
   {
diff --git a/liboctave/MDiagArray2.h b/liboctave/MDiagArray2.h
--- a/liboctave/MDiagArray2.h
+++ b/liboctave/MDiagArray2.h
@@ -81,6 +81,9 @@ public:
       return retval;
     }
 
+  MDiagArray2<T> transpose (void) const { return DiagArray2<T>::transpose (); }
+  MDiagArray2<T> hermitian (T (*fcn) (const T&) = 0) const { return 
DiagArray2<T>::hermitian (fcn); }
+
   static MDiagArray2<T> nil_array;
 
   // Currently, the OPS functions don't need to be friends, but that
diff --git a/liboctave/dColVector.cc b/liboctave/dColVector.cc
--- a/liboctave/dColVector.cc
+++ b/liboctave/dColVector.cc
@@ -142,7 +142,7 @@ RowVector
 RowVector
 ColumnVector::transpose (void) const
 {
-  return RowVector (*this);
+  return MArray<double>::transpose();
 }
 
 ColumnVector
diff --git a/liboctave/dDiagMatrix.cc b/liboctave/dDiagMatrix.cc
--- a/liboctave/dDiagMatrix.cc
+++ b/liboctave/dDiagMatrix.cc
@@ -136,12 +136,6 @@ DiagMatrix::fill (const RowVector& a, oc
     elem (i+beg, i+beg) = a.elem (i);
 
   return *this;
-}
-
-DiagMatrix
-DiagMatrix::transpose (void) const
-{
-  return DiagMatrix (mx_inline_dup (data (), length ()), cols (), rows ());
 }
 
 DiagMatrix
diff --git a/liboctave/dDiagMatrix.h b/liboctave/dDiagMatrix.h
--- a/liboctave/dDiagMatrix.h
+++ b/liboctave/dDiagMatrix.h
@@ -70,7 +70,7 @@ public:
   DiagMatrix& fill (const ColumnVector& a, octave_idx_type beg);
   DiagMatrix& fill (const RowVector& a, octave_idx_type beg);
 
-  DiagMatrix transpose (void) const;
+  DiagMatrix transpose (void) const { return MDiagArray2<double>::transpose(); 
}
 
   friend OCTAVE_API DiagMatrix real (const ComplexDiagMatrix& a);
   friend OCTAVE_API DiagMatrix imag (const ComplexDiagMatrix& a);
diff --git a/liboctave/dRowVector.cc b/liboctave/dRowVector.cc
--- a/liboctave/dRowVector.cc
+++ b/liboctave/dRowVector.cc
@@ -144,7 +144,7 @@ ColumnVector
 ColumnVector
 RowVector::transpose (void) const
 {
-  return ColumnVector (*this);
+  return MArray<double>::transpose();
 }
 
 RowVector
diff --git a/liboctave/fCColVector.cc b/liboctave/fCColVector.cc
--- a/liboctave/fCColVector.cc
+++ b/liboctave/fCColVector.cc
@@ -221,17 +221,16 @@ FloatComplexColumnVector::stack (const F
   return retval;
 }
 
-FloatComplexRowVector
+FloatComplexRowVector 
 FloatComplexColumnVector::hermitian (void) const
 {
-  octave_idx_type len = length ();
-  return FloatComplexRowVector (mx_inline_conj_dup (data (), len), len);
-}
-
-FloatComplexRowVector
+  return MArray<FloatComplex>::hermitian (std::conj);
+}
+
+FloatComplexRowVector 
 FloatComplexColumnVector::transpose (void) const
 {
-  return FloatComplexRowVector (*this);
+  return MArray<FloatComplex>::transpose ();
 }
 
 FloatComplexColumnVector
diff --git a/liboctave/fCColVector.h b/liboctave/fCColVector.h
--- a/liboctave/fCColVector.h
+++ b/liboctave/fCColVector.h
@@ -72,7 +72,7 @@ public:
   FloatComplexColumnVector stack (const FloatColumnVector& a) const;
   FloatComplexColumnVector stack (const FloatComplexColumnVector& a) const;
 
-  FloatComplexRowVector hermitian (void) const;  // complex conjugate 
transpose.
+  FloatComplexRowVector hermitian (void) const;
   FloatComplexRowVector transpose (void) const;
 
   friend FloatComplexColumnVector conj (const FloatComplexColumnVector& a);
diff --git a/liboctave/fCDiagMatrix.cc b/liboctave/fCDiagMatrix.cc
--- a/liboctave/fCDiagMatrix.cc
+++ b/liboctave/fCDiagMatrix.cc
@@ -230,20 +230,6 @@ FloatComplexDiagMatrix::fill (const Floa
     elem (i+beg, i+beg) = a.elem (i);
 
   return *this;
-}
-
-FloatComplexDiagMatrix
-FloatComplexDiagMatrix::hermitian (void) const
-{
-  return FloatComplexDiagMatrix (mx_inline_conj_dup (data (), length ()),
-                           cols (), rows ());
-}
-
-FloatComplexDiagMatrix
-FloatComplexDiagMatrix::transpose (void) const
-{
-  return FloatComplexDiagMatrix (mx_inline_dup (data (), length ()),
-                           cols (), rows ());
 }
 
 FloatComplexDiagMatrix
diff --git a/liboctave/fCDiagMatrix.h b/liboctave/fCDiagMatrix.h
--- a/liboctave/fCDiagMatrix.h
+++ b/liboctave/fCDiagMatrix.h
@@ -87,8 +87,8 @@ public:
   FloatComplexDiagMatrix& fill (const FloatRowVector& a, octave_idx_type beg);
   FloatComplexDiagMatrix& fill (const FloatComplexRowVector& a, 
octave_idx_type beg);
 
-  FloatComplexDiagMatrix hermitian (void) const;  // complex conjugate 
transpose
-  FloatComplexDiagMatrix transpose (void) const;
+  FloatComplexDiagMatrix hermitian (void) const { return 
MDiagArray2<FloatComplex>::hermitian (std::conj); }
+  FloatComplexDiagMatrix transpose (void) const { return 
MDiagArray2<FloatComplex>::transpose(); }
 
   friend FloatComplexDiagMatrix conj (const FloatComplexDiagMatrix& a);
 
diff --git a/liboctave/fCMatrix.cc b/liboctave/fCMatrix.cc
--- a/liboctave/fCMatrix.cc
+++ b/liboctave/fCMatrix.cc
@@ -876,22 +876,6 @@ FloatComplexMatrix::stack (const FloatCo
   retval.insert (*this, 0, 0);
   retval.insert (a, nr_insert, 0);
   return retval;
-}
-
-FloatComplexMatrix
-FloatComplexMatrix::hermitian (void) const
-{
-  octave_idx_type nr = rows ();
-  octave_idx_type nc = cols ();
-  FloatComplexMatrix result;
-  if (length () > 0)
-    {
-      result.resize (nc, nr);
-      for (octave_idx_type j = 0; j < nc; j++)
-       for (octave_idx_type i = 0; i < nr; i++)
-         result.elem (j, i) = conj (elem (i, j));
-    }
-  return result;
 }
 
 FloatComplexMatrix
diff --git a/liboctave/fCMatrix.h b/liboctave/fCMatrix.h
--- a/liboctave/fCMatrix.h
+++ b/liboctave/fCMatrix.h
@@ -126,7 +126,8 @@ public:
   FloatComplexMatrix stack (const FloatComplexColumnVector& a) const;
   FloatComplexMatrix stack (const FloatComplexDiagMatrix& a) const;
 
-  FloatComplexMatrix hermitian (void) const;  // complex conjugate transpose
+  FloatComplexMatrix hermitian (void) const
+    { return MArray2<FloatComplex>::hermitian (std::conj); }
   FloatComplexMatrix transpose (void) const
     { return MArray2<FloatComplex>::transpose (); }
 
diff --git a/liboctave/fCRowVector.cc b/liboctave/fCRowVector.cc
--- a/liboctave/fCRowVector.cc
+++ b/liboctave/fCRowVector.cc
@@ -224,17 +224,16 @@ FloatComplexRowVector::append (const Flo
   return retval;
 }
 
-FloatComplexColumnVector
+FloatComplexColumnVector 
 FloatComplexRowVector::hermitian (void) const
 {
-  octave_idx_type len = length ();
-  return FloatComplexColumnVector (mx_inline_conj_dup (data (), len), len);
-}
-
-FloatComplexColumnVector
+  return MArray<FloatComplex>::hermitian (std::conj);
+}
+
+FloatComplexColumnVector 
 FloatComplexRowVector::transpose (void) const
 {
-  return FloatComplexColumnVector (*this);
+  return MArray<FloatComplex>::transpose ();
 }
 
 FloatComplexRowVector
diff --git a/liboctave/fCRowVector.h b/liboctave/fCRowVector.h
--- a/liboctave/fCRowVector.h
+++ b/liboctave/fCRowVector.h
@@ -70,7 +70,7 @@ public:
   FloatComplexRowVector append (const FloatRowVector& a) const;
   FloatComplexRowVector append (const FloatComplexRowVector& a) const;
 
-  FloatComplexColumnVector hermitian (void) const;  // complex conjugate 
transpose.
+  FloatComplexColumnVector hermitian (void) const;
   FloatComplexColumnVector transpose (void) const;
 
   friend FloatComplexRowVector conj (const FloatComplexRowVector& a);
diff --git a/liboctave/fColVector.cc b/liboctave/fColVector.cc
--- a/liboctave/fColVector.cc
+++ b/liboctave/fColVector.cc
@@ -142,7 +142,7 @@ FloatRowVector
 FloatRowVector
 FloatColumnVector::transpose (void) const
 {
-  return FloatRowVector (*this);
+  return MArray<float>::transpose();
 }
 
 FloatColumnVector
diff --git a/liboctave/fDiagMatrix.cc b/liboctave/fDiagMatrix.cc
--- a/liboctave/fDiagMatrix.cc
+++ b/liboctave/fDiagMatrix.cc
@@ -136,12 +136,6 @@ FloatDiagMatrix::fill (const FloatRowVec
     elem (i+beg, i+beg) = a.elem (i);
 
   return *this;
-}
-
-FloatDiagMatrix
-FloatDiagMatrix::transpose (void) const
-{
-  return FloatDiagMatrix (mx_inline_dup (data (), length ()), cols (), rows 
());
 }
 
 FloatDiagMatrix
diff --git a/liboctave/fDiagMatrix.h b/liboctave/fDiagMatrix.h
--- a/liboctave/fDiagMatrix.h
+++ b/liboctave/fDiagMatrix.h
@@ -70,7 +70,7 @@ public:
   FloatDiagMatrix& fill (const FloatColumnVector& a, octave_idx_type beg);
   FloatDiagMatrix& fill (const FloatRowVector& a, octave_idx_type beg);
 
-  FloatDiagMatrix transpose (void) const;
+  FloatDiagMatrix transpose (void) const { return 
MDiagArray2<float>::transpose(); }
 
   friend OCTAVE_API FloatDiagMatrix real (const FloatComplexDiagMatrix& a);
   friend OCTAVE_API FloatDiagMatrix imag (const FloatComplexDiagMatrix& a);
diff --git a/liboctave/fRowVector.cc b/liboctave/fRowVector.cc
--- a/liboctave/fRowVector.cc
+++ b/liboctave/fRowVector.cc
@@ -144,7 +144,7 @@ FloatColumnVector
 FloatColumnVector
 FloatRowVector::transpose (void) const
 {
-  return FloatColumnVector (*this);
+  return MArray<float>::transpose();
 }
 
 FloatRowVector

[Prev in Thread]

Current Thread

[Next in Thread]

[Changeset] Re: Faster Array transpose, David Bateman <=
- [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/06
  - Re: [Changeset] Re: Faster Array transpose, dbateman, 2008/05/06
    - Re: [Changeset] Re: Faster Array transpose, David Bateman, 2008/05/06
    - Re: [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/20
    - Re: [Changeset] Re: Faster Array transpose, Jaroslav Hajek, 2008/05/21
    - Re: [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/21
    - Re: [Changeset] Re: Faster Array transpose, Jaroslav Hajek, 2008/05/21
    - Re: [Changeset] Re: Faster Array transpose, dbateman, 2008/05/21
    - Re: [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/21
    - Re: [Changeset] Re: Faster Array transpose, David Bateman, 2008/05/21

Prev by Date: Function handles for nonexisting functions
Next by Date: Function handles for nonexisting functions
Previous by thread: Function handles for nonexisting functions
Next by thread: [Changeset] Re: Faster Array transpose
Index(es):
- Date
- Thread