[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Changeset] Re: Faster Array transpose
From: |
David Bateman |
Subject: |
[Changeset] Re: Faster Array transpose |
Date: |
Mon, 05 May 2008 00:36:02 +0200 |
User-agent: |
Thunderbird 2.0.0.12 (X11/20080306) |
David Bateman wrote:
> I think the code below is a good compromise for this function. Sorry I
> can't easily create a changeset as thare are other uncomitted changes in
> Array.cc in by repository at the moment
>
> D.
It wasn't correct.. I've written this up in my attempt to clean up the
Array class for the integration of the single precision type.. The
Changeset is attached, but I suspect it might not apply without the rest
of the single precision changes (which I consider pretty much complete
in themselves). With this changes the transpose and hermitian methods go
into the Array class, with only a one specialization for the DiagArray2
class. With this change "make check" passes correctly and the speed
difference with the bench mark
N = [128, 129, 1024, 1025, 2048, 2049, 4096, 4097];
nruns = 10;
t = zeros (1, length (N));
for i = 1: length (N)
A = 1i*randn (N(i), N(i));
for j = 1: nruns
t0 = cputime ();
B = A';
t(i) = t(i) + cputime() - t0;
endfor
t (i) = t (i) ./ nruns;
printf("N = %4d, time = %g sec\n", N(i), t(i));
fflush (stdout);
endfor
I got previous without the patch
N = 128, time = 0.0016666 sec
N = 129, time = 0.0003333 sec
N = 1024, time = 0.0439971 sec
N = 1025, time = 0.0336645 sec
N = 2048, time = 0.276649 sec
N = 2049, time = 0.172656 sec
N = 4096, time = 1.19492 sec
N = 4097, time = 0.700954 sec
and with it
N = 128, time = 0 sec
N = 129, time = 0 sec
N = 1024, time = 0.0319979 sec
N = 1025, time = 0.0243318 sec
N = 2048, time = 0.170655 sec
N = 2049, time = 0.117326 sec
N = 4096, time = 0.745285 sec
N = 4097, time = 0.558964 sec
and similar improvements for the hermitian operator.
D.
# HG changeset patch
# User David Bateman <address@hidden>
# Date 1209938758 -7200
# Node ID 5ca24a40998f84f8d693aa8baeb3b85b1b06933c
# Parent 2442bbe5a6932984c8e9ec7378954f4e218e621d
Cache optimized hermitian/transpose methods
diff --git a/liboctave/Array.cc b/liboctave/Array.cc
--- a/liboctave/Array.cc
+++ b/liboctave/Array.cc
@@ -1203,7 +1203,48 @@ Array<T>::transpose (void) const
octave_idx_type nr = dim1 ();
octave_idx_type nc = dim2 ();
- if (nr > 1 && nc > 1)
+ if (nr >= 8 && nc >= 8)
+ {
+ Array<T> result (dim_vector (nc, nr));
+
+ // Blocked transpose to attempt to avoid cache misses.
+
+ // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool
+ // on some compilers.
+ T buf[64];
+
+ octave_idx_type ii = 0, jj;
+ for (jj = 0; jj < (nc - 8 + 1); jj += 8)
+ {
+ for (ii = 0; ii < (nr - 8 + 1); ii += 8)
+ {
+ // Copy to buffer
+ for (octave_idx_type j = jj, k = 0, idxj = jj * nr;
+ j < jj + 8; j++, idxj += nr)
+ for (octave_idx_type i = ii; i < ii + 8; i++)
+ buf[k++] = xelem (i + idxj);
+
+ // Copy from buffer
+ for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8;
+ i++, idxi += nc)
+ for (octave_idx_type j = jj, k = i - ii; j < jj + 8;
+ j++, k+=8)
+ result.xelem (j + idxi) = buf[k];
+ }
+
+ if (ii < nr)
+ for (octave_idx_type j = jj; j < jj + 8; j++)
+ for (octave_idx_type i = ii; i < nr; i++)
+ result.xelem (j, i) = xelem (i, j);
+ }
+
+ for (octave_idx_type j = jj; j < nc; j++)
+ for (octave_idx_type i = 0; i < nr; i++)
+ result.xelem (j, i) = xelem (i, j);
+
+ return result;
+ }
+ else if (nr > 1 && nc > 1)
{
Array<T> result (dim_vector (nc, nr));
@@ -1217,6 +1258,68 @@ Array<T>::transpose (void) const
{
// Fast transpose for vectors and empty matrices
return Array<T> (*this, dim_vector (nc, nr));
+ }
+}
+
+template <class T>
+Array<T>
+Array<T>::hermitian (T (*fcn) (const T&)) const
+{
+ assert (ndims () == 2);
+
+ octave_idx_type nr = dim1 ();
+ octave_idx_type nc = dim2 ();
+
+ if (nr >= 8 && nc >= 8)
+ {
+ Array<T> result (dim_vector (nc, nr));
+
+ // Blocked transpose to attempt to avoid cache misses.
+
+ // Don't use OCTAVE_LOCAL_BUFFER here as it doesn't work with bool
+ // on some compilers.
+ T buf[64];
+
+ octave_idx_type ii = 0, jj;
+ for (jj = 0; jj < (nc - 8 + 1); jj += 8)
+ {
+ for (ii = 0; ii < (nr - 8 + 1); ii += 8)
+ {
+ // Copy to buffer
+ for (octave_idx_type j = jj, k = 0, idxj = jj * nr;
+ j < jj + 8; j++, idxj += nr)
+ for (octave_idx_type i = ii; i < ii + 8; i++)
+ buf[k++] = xelem (i + idxj);
+
+ // Copy from buffer
+ for (octave_idx_type i = ii, idxi = ii * nc; i < ii + 8;
+ i++, idxi += nc)
+ for (octave_idx_type j = jj, k = i - ii; j < jj + 8;
+ j++, k+=8)
+ result.xelem (j + idxi) = fcn (buf[k]);
+ }
+
+ if (ii < nr)
+ for (octave_idx_type j = jj; j < jj + 8; j++)
+ for (octave_idx_type i = ii; i < nr; i++)
+ result.xelem (j, i) = fcn (xelem (i, j));
+ }
+
+ for (octave_idx_type j = jj; j < nc; j++)
+ for (octave_idx_type i = 0; i < nr; i++)
+ result.xelem (j, i) = fcn (xelem (i, j));
+
+ return result;
+ }
+ else
+ {
+ Array<T> result (dim_vector (nc, nr));
+
+ for (octave_idx_type j = 0; j < nc; j++)
+ for (octave_idx_type i = 0; i < nr; i++)
+ result.xelem (j, i) = fcn (xelem (i, j));
+
+ return result;
}
}
diff --git a/liboctave/Array.h b/liboctave/Array.h
--- a/liboctave/Array.h
+++ b/liboctave/Array.h
@@ -461,6 +461,7 @@ public:
bool is_empty (void) const { return numel () == 0; }
Array<T> transpose (void) const;
+ Array<T> hermitian (T (*fcn) (const T&) = 0) const;
const T *data (void) const { return rep->data; }
diff --git a/liboctave/Array2.h b/liboctave/Array2.h
--- a/liboctave/Array2.h
+++ b/liboctave/Array2.h
@@ -109,6 +109,12 @@ public:
return Array2<T> (tmp, tmp.rows (), tmp.columns ());
}
+ Array2<T> hermitian (T (*fcn) (const T&) = 0) const
+ {
+ Array<T> tmp = Array<T>::hermitian (fcn);
+ return Array2<T> (tmp, tmp.rows (), tmp.columns ());
+ }
+
Array2<T> index (idx_vector& i, int resize_ok = 0,
const T& rfv = resize_fill_value (T ())) const
{
diff --git a/liboctave/ArrayN.h b/liboctave/ArrayN.h
--- a/liboctave/ArrayN.h
+++ b/liboctave/ArrayN.h
@@ -102,6 +102,7 @@ public:
ArrayN<T> squeeze (void) const { return Array<T>::squeeze (); }
ArrayN<T> transpose (void) const { return Array<T>::transpose (); }
+ ArrayN<T> hermitian (T (*fcn) (const T&) = 0) const { return
Array<T>::hermitian (fcn); }
ArrayN<T>& insert (const ArrayN<T>& a, const dim_vector& dv)
{
diff --git a/liboctave/CColVector.cc b/liboctave/CColVector.cc
--- a/liboctave/CColVector.cc
+++ b/liboctave/CColVector.cc
@@ -221,17 +221,16 @@ ComplexColumnVector::stack (const Comple
return retval;
}
-ComplexRowVector
+ComplexRowVector
ComplexColumnVector::hermitian (void) const
-{
- octave_idx_type len = length ();
- return ComplexRowVector (mx_inline_conj_dup (data (), len), len);
+{
+ return MArray<Complex>::hermitian (std::conj);
}
ComplexRowVector
ComplexColumnVector::transpose (void) const
{
- return ComplexRowVector (*this);
+ return MArray<Complex>::transpose ();
}
ComplexColumnVector
diff --git a/liboctave/CColVector.h b/liboctave/CColVector.h
--- a/liboctave/CColVector.h
+++ b/liboctave/CColVector.h
@@ -72,7 +72,7 @@ public:
ComplexColumnVector stack (const ColumnVector& a) const;
ComplexColumnVector stack (const ComplexColumnVector& a) const;
- ComplexRowVector hermitian (void) const; // complex conjugate transpose.
+ ComplexRowVector hermitian (void) const;
ComplexRowVector transpose (void) const;
friend ComplexColumnVector conj (const ComplexColumnVector& a);
diff --git a/liboctave/CDiagMatrix.cc b/liboctave/CDiagMatrix.cc
--- a/liboctave/CDiagMatrix.cc
+++ b/liboctave/CDiagMatrix.cc
@@ -230,20 +230,6 @@ ComplexDiagMatrix::fill (const ComplexRo
elem (i+beg, i+beg) = a.elem (i);
return *this;
-}
-
-ComplexDiagMatrix
-ComplexDiagMatrix::hermitian (void) const
-{
- return ComplexDiagMatrix (mx_inline_conj_dup (data (), length ()),
- cols (), rows ());
-}
-
-ComplexDiagMatrix
-ComplexDiagMatrix::transpose (void) const
-{
- return ComplexDiagMatrix (mx_inline_dup (data (), length ()),
- cols (), rows ());
}
ComplexDiagMatrix
diff --git a/liboctave/CDiagMatrix.h b/liboctave/CDiagMatrix.h
--- a/liboctave/CDiagMatrix.h
+++ b/liboctave/CDiagMatrix.h
@@ -87,8 +87,8 @@ public:
ComplexDiagMatrix& fill (const RowVector& a, octave_idx_type beg);
ComplexDiagMatrix& fill (const ComplexRowVector& a, octave_idx_type beg);
- ComplexDiagMatrix hermitian (void) const; // complex conjugate transpose
- ComplexDiagMatrix transpose (void) const;
+ ComplexDiagMatrix hermitian (void) const { return
MDiagArray2<Complex>::hermitian (std::conj); }
+ ComplexDiagMatrix transpose (void) const { return
MDiagArray2<Complex>::transpose(); }
friend ComplexDiagMatrix conj (const ComplexDiagMatrix& a);
diff --git a/liboctave/CMatrix.cc b/liboctave/CMatrix.cc
--- a/liboctave/CMatrix.cc
+++ b/liboctave/CMatrix.cc
@@ -882,22 +882,6 @@ ComplexMatrix::stack (const ComplexDiagM
retval.insert (*this, 0, 0);
retval.insert (a, nr_insert, 0);
return retval;
-}
-
-ComplexMatrix
-ComplexMatrix::hermitian (void) const
-{
- octave_idx_type nr = rows ();
- octave_idx_type nc = cols ();
- ComplexMatrix result;
- if (length () > 0)
- {
- result.resize (nc, nr);
- for (octave_idx_type j = 0; j < nc; j++)
- for (octave_idx_type i = 0; i < nr; i++)
- result.elem (j, i) = conj (elem (i, j));
- }
- return result;
}
ComplexMatrix
diff --git a/liboctave/CMatrix.h b/liboctave/CMatrix.h
--- a/liboctave/CMatrix.h
+++ b/liboctave/CMatrix.h
@@ -126,7 +126,8 @@ public:
ComplexMatrix stack (const ComplexColumnVector& a) const;
ComplexMatrix stack (const ComplexDiagMatrix& a) const;
- ComplexMatrix hermitian (void) const; // complex conjugate transpose
+ ComplexMatrix hermitian (void) const
+ { return MArray2<Complex>::hermitian (std::conj); }
ComplexMatrix transpose (void) const
{ return MArray2<Complex>::transpose (); }
diff --git a/liboctave/CRowVector.cc b/liboctave/CRowVector.cc
--- a/liboctave/CRowVector.cc
+++ b/liboctave/CRowVector.cc
@@ -227,14 +227,13 @@ ComplexColumnVector
ComplexColumnVector
ComplexRowVector::hermitian (void) const
{
- octave_idx_type len = length ();
- return ComplexColumnVector (mx_inline_conj_dup (data (), len), len);
+ return MArray<Complex>::hermitian (std::conj);
}
ComplexColumnVector
ComplexRowVector::transpose (void) const
{
- return ComplexColumnVector (*this);
+ return MArray<Complex>::transpose ();
}
ComplexRowVector
diff --git a/liboctave/CRowVector.h b/liboctave/CRowVector.h
--- a/liboctave/CRowVector.h
+++ b/liboctave/CRowVector.h
@@ -70,7 +70,7 @@ public:
ComplexRowVector append (const RowVector& a) const;
ComplexRowVector append (const ComplexRowVector& a) const;
- ComplexColumnVector hermitian (void) const; // complex conjugate transpose.
+ ComplexColumnVector hermitian (void) const;
ComplexColumnVector transpose (void) const;
friend ComplexRowVector conj (const ComplexRowVector& a);
diff --git a/liboctave/DiagArray2.cc b/liboctave/DiagArray2.cc
--- a/liboctave/DiagArray2.cc
+++ b/liboctave/DiagArray2.cc
@@ -33,6 +33,27 @@ along with Octave; see the file COPYING.
#include "DiagArray2.h"
#include "lo-error.h"
+
+template <class T>
+DiagArray2<T>
+DiagArray2<T>::transpose (void) const
+{
+ DiagArray2<T> retval (*this);
+ retval.dimensions = dim_vector (this->dim2 (), this->dim1 ());
+ return retval;
+}
+
+template <class T>
+DiagArray2<T>
+DiagArray2<T>::hermitian (T (* fcn) (const T&)) const
+{
+ DiagArray2<T> retval (this->dim2 (), this->dim1 ());
+ const T *p = this->data ();
+ T *q = retval.fortran_vec ();
+ for (octave_idx_type i = 0; i < this->length (); i++)
+ q [i] = fcn (p [i]);
+ return retval;
+}
// A two-dimensional array with diagonal elements only.
diff --git a/liboctave/DiagArray2.h b/liboctave/DiagArray2.h
--- a/liboctave/DiagArray2.h
+++ b/liboctave/DiagArray2.h
@@ -180,6 +180,9 @@ public:
void resize (octave_idx_type n, octave_idx_type m, const T& val);
void maybe_delete_elements (idx_vector& i, idx_vector& j);
+
+ DiagArray2<T> transpose (void) const;
+ DiagArray2<T> hermitian (T (*fcn) (const T&) = 0) const;
};
#endif
diff --git a/liboctave/MArray.h b/liboctave/MArray.h
--- a/liboctave/MArray.h
+++ b/liboctave/MArray.h
@@ -63,6 +63,9 @@ public:
return *this;
}
+ MArray<T> transpose (void) const { return Array<T>::transpose (); }
+ MArray<T> hermitian (T (*fcn) (const T&) = 0) const { return
Array<T>::hermitian (fcn); }
+
octave_idx_type nnz (void) const
{
octave_idx_type retval = 0;
diff --git a/liboctave/MArray2.h b/liboctave/MArray2.h
--- a/liboctave/MArray2.h
+++ b/liboctave/MArray2.h
@@ -80,6 +80,7 @@ public:
}
MArray2<T> transpose (void) const { return Array2<T>::transpose (); }
+ MArray2<T> hermitian (T (*fcn) (const T&) = 0) const { return
Array2<T>::hermitian (fcn); }
MArray2<T> diag (octave_idx_type k) const
{
diff --git a/liboctave/MDiagArray2.h b/liboctave/MDiagArray2.h
--- a/liboctave/MDiagArray2.h
+++ b/liboctave/MDiagArray2.h
@@ -81,6 +81,9 @@ public:
return retval;
}
+ MDiagArray2<T> transpose (void) const { return DiagArray2<T>::transpose (); }
+ MDiagArray2<T> hermitian (T (*fcn) (const T&) = 0) const { return
DiagArray2<T>::hermitian (fcn); }
+
static MDiagArray2<T> nil_array;
// Currently, the OPS functions don't need to be friends, but that
diff --git a/liboctave/dColVector.cc b/liboctave/dColVector.cc
--- a/liboctave/dColVector.cc
+++ b/liboctave/dColVector.cc
@@ -142,7 +142,7 @@ RowVector
RowVector
ColumnVector::transpose (void) const
{
- return RowVector (*this);
+ return MArray<double>::transpose();
}
ColumnVector
diff --git a/liboctave/dDiagMatrix.cc b/liboctave/dDiagMatrix.cc
--- a/liboctave/dDiagMatrix.cc
+++ b/liboctave/dDiagMatrix.cc
@@ -136,12 +136,6 @@ DiagMatrix::fill (const RowVector& a, oc
elem (i+beg, i+beg) = a.elem (i);
return *this;
-}
-
-DiagMatrix
-DiagMatrix::transpose (void) const
-{
- return DiagMatrix (mx_inline_dup (data (), length ()), cols (), rows ());
}
DiagMatrix
diff --git a/liboctave/dDiagMatrix.h b/liboctave/dDiagMatrix.h
--- a/liboctave/dDiagMatrix.h
+++ b/liboctave/dDiagMatrix.h
@@ -70,7 +70,7 @@ public:
DiagMatrix& fill (const ColumnVector& a, octave_idx_type beg);
DiagMatrix& fill (const RowVector& a, octave_idx_type beg);
- DiagMatrix transpose (void) const;
+ DiagMatrix transpose (void) const { return MDiagArray2<double>::transpose();
}
friend OCTAVE_API DiagMatrix real (const ComplexDiagMatrix& a);
friend OCTAVE_API DiagMatrix imag (const ComplexDiagMatrix& a);
diff --git a/liboctave/dRowVector.cc b/liboctave/dRowVector.cc
--- a/liboctave/dRowVector.cc
+++ b/liboctave/dRowVector.cc
@@ -144,7 +144,7 @@ ColumnVector
ColumnVector
RowVector::transpose (void) const
{
- return ColumnVector (*this);
+ return MArray<double>::transpose();
}
RowVector
diff --git a/liboctave/fCColVector.cc b/liboctave/fCColVector.cc
--- a/liboctave/fCColVector.cc
+++ b/liboctave/fCColVector.cc
@@ -221,17 +221,16 @@ FloatComplexColumnVector::stack (const F
return retval;
}
-FloatComplexRowVector
+FloatComplexRowVector
FloatComplexColumnVector::hermitian (void) const
{
- octave_idx_type len = length ();
- return FloatComplexRowVector (mx_inline_conj_dup (data (), len), len);
-}
-
-FloatComplexRowVector
+ return MArray<FloatComplex>::hermitian (std::conj);
+}
+
+FloatComplexRowVector
FloatComplexColumnVector::transpose (void) const
{
- return FloatComplexRowVector (*this);
+ return MArray<FloatComplex>::transpose ();
}
FloatComplexColumnVector
diff --git a/liboctave/fCColVector.h b/liboctave/fCColVector.h
--- a/liboctave/fCColVector.h
+++ b/liboctave/fCColVector.h
@@ -72,7 +72,7 @@ public:
FloatComplexColumnVector stack (const FloatColumnVector& a) const;
FloatComplexColumnVector stack (const FloatComplexColumnVector& a) const;
- FloatComplexRowVector hermitian (void) const; // complex conjugate
transpose.
+ FloatComplexRowVector hermitian (void) const;
FloatComplexRowVector transpose (void) const;
friend FloatComplexColumnVector conj (const FloatComplexColumnVector& a);
diff --git a/liboctave/fCDiagMatrix.cc b/liboctave/fCDiagMatrix.cc
--- a/liboctave/fCDiagMatrix.cc
+++ b/liboctave/fCDiagMatrix.cc
@@ -230,20 +230,6 @@ FloatComplexDiagMatrix::fill (const Floa
elem (i+beg, i+beg) = a.elem (i);
return *this;
-}
-
-FloatComplexDiagMatrix
-FloatComplexDiagMatrix::hermitian (void) const
-{
- return FloatComplexDiagMatrix (mx_inline_conj_dup (data (), length ()),
- cols (), rows ());
-}
-
-FloatComplexDiagMatrix
-FloatComplexDiagMatrix::transpose (void) const
-{
- return FloatComplexDiagMatrix (mx_inline_dup (data (), length ()),
- cols (), rows ());
}
FloatComplexDiagMatrix
diff --git a/liboctave/fCDiagMatrix.h b/liboctave/fCDiagMatrix.h
--- a/liboctave/fCDiagMatrix.h
+++ b/liboctave/fCDiagMatrix.h
@@ -87,8 +87,8 @@ public:
FloatComplexDiagMatrix& fill (const FloatRowVector& a, octave_idx_type beg);
FloatComplexDiagMatrix& fill (const FloatComplexRowVector& a,
octave_idx_type beg);
- FloatComplexDiagMatrix hermitian (void) const; // complex conjugate
transpose
- FloatComplexDiagMatrix transpose (void) const;
+ FloatComplexDiagMatrix hermitian (void) const { return
MDiagArray2<FloatComplex>::hermitian (std::conj); }
+ FloatComplexDiagMatrix transpose (void) const { return
MDiagArray2<FloatComplex>::transpose(); }
friend FloatComplexDiagMatrix conj (const FloatComplexDiagMatrix& a);
diff --git a/liboctave/fCMatrix.cc b/liboctave/fCMatrix.cc
--- a/liboctave/fCMatrix.cc
+++ b/liboctave/fCMatrix.cc
@@ -876,22 +876,6 @@ FloatComplexMatrix::stack (const FloatCo
retval.insert (*this, 0, 0);
retval.insert (a, nr_insert, 0);
return retval;
-}
-
-FloatComplexMatrix
-FloatComplexMatrix::hermitian (void) const
-{
- octave_idx_type nr = rows ();
- octave_idx_type nc = cols ();
- FloatComplexMatrix result;
- if (length () > 0)
- {
- result.resize (nc, nr);
- for (octave_idx_type j = 0; j < nc; j++)
- for (octave_idx_type i = 0; i < nr; i++)
- result.elem (j, i) = conj (elem (i, j));
- }
- return result;
}
FloatComplexMatrix
diff --git a/liboctave/fCMatrix.h b/liboctave/fCMatrix.h
--- a/liboctave/fCMatrix.h
+++ b/liboctave/fCMatrix.h
@@ -126,7 +126,8 @@ public:
FloatComplexMatrix stack (const FloatComplexColumnVector& a) const;
FloatComplexMatrix stack (const FloatComplexDiagMatrix& a) const;
- FloatComplexMatrix hermitian (void) const; // complex conjugate transpose
+ FloatComplexMatrix hermitian (void) const
+ { return MArray2<FloatComplex>::hermitian (std::conj); }
FloatComplexMatrix transpose (void) const
{ return MArray2<FloatComplex>::transpose (); }
diff --git a/liboctave/fCRowVector.cc b/liboctave/fCRowVector.cc
--- a/liboctave/fCRowVector.cc
+++ b/liboctave/fCRowVector.cc
@@ -224,17 +224,16 @@ FloatComplexRowVector::append (const Flo
return retval;
}
-FloatComplexColumnVector
+FloatComplexColumnVector
FloatComplexRowVector::hermitian (void) const
{
- octave_idx_type len = length ();
- return FloatComplexColumnVector (mx_inline_conj_dup (data (), len), len);
-}
-
-FloatComplexColumnVector
+ return MArray<FloatComplex>::hermitian (std::conj);
+}
+
+FloatComplexColumnVector
FloatComplexRowVector::transpose (void) const
{
- return FloatComplexColumnVector (*this);
+ return MArray<FloatComplex>::transpose ();
}
FloatComplexRowVector
diff --git a/liboctave/fCRowVector.h b/liboctave/fCRowVector.h
--- a/liboctave/fCRowVector.h
+++ b/liboctave/fCRowVector.h
@@ -70,7 +70,7 @@ public:
FloatComplexRowVector append (const FloatRowVector& a) const;
FloatComplexRowVector append (const FloatComplexRowVector& a) const;
- FloatComplexColumnVector hermitian (void) const; // complex conjugate
transpose.
+ FloatComplexColumnVector hermitian (void) const;
FloatComplexColumnVector transpose (void) const;
friend FloatComplexRowVector conj (const FloatComplexRowVector& a);
diff --git a/liboctave/fColVector.cc b/liboctave/fColVector.cc
--- a/liboctave/fColVector.cc
+++ b/liboctave/fColVector.cc
@@ -142,7 +142,7 @@ FloatRowVector
FloatRowVector
FloatColumnVector::transpose (void) const
{
- return FloatRowVector (*this);
+ return MArray<float>::transpose();
}
FloatColumnVector
diff --git a/liboctave/fDiagMatrix.cc b/liboctave/fDiagMatrix.cc
--- a/liboctave/fDiagMatrix.cc
+++ b/liboctave/fDiagMatrix.cc
@@ -136,12 +136,6 @@ FloatDiagMatrix::fill (const FloatRowVec
elem (i+beg, i+beg) = a.elem (i);
return *this;
-}
-
-FloatDiagMatrix
-FloatDiagMatrix::transpose (void) const
-{
- return FloatDiagMatrix (mx_inline_dup (data (), length ()), cols (), rows
());
}
FloatDiagMatrix
diff --git a/liboctave/fDiagMatrix.h b/liboctave/fDiagMatrix.h
--- a/liboctave/fDiagMatrix.h
+++ b/liboctave/fDiagMatrix.h
@@ -70,7 +70,7 @@ public:
FloatDiagMatrix& fill (const FloatColumnVector& a, octave_idx_type beg);
FloatDiagMatrix& fill (const FloatRowVector& a, octave_idx_type beg);
- FloatDiagMatrix transpose (void) const;
+ FloatDiagMatrix transpose (void) const { return
MDiagArray2<float>::transpose(); }
friend OCTAVE_API FloatDiagMatrix real (const FloatComplexDiagMatrix& a);
friend OCTAVE_API FloatDiagMatrix imag (const FloatComplexDiagMatrix& a);
diff --git a/liboctave/fRowVector.cc b/liboctave/fRowVector.cc
--- a/liboctave/fRowVector.cc
+++ b/liboctave/fRowVector.cc
@@ -144,7 +144,7 @@ FloatColumnVector
FloatColumnVector
FloatRowVector::transpose (void) const
{
- return FloatColumnVector (*this);
+ return MArray<float>::transpose();
}
FloatRowVector
- [Changeset] Re: Faster Array transpose,
David Bateman <=
- [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/06
- Re: [Changeset] Re: Faster Array transpose, dbateman, 2008/05/06
- Re: [Changeset] Re: Faster Array transpose, David Bateman, 2008/05/06
- Re: [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/20
- Re: [Changeset] Re: Faster Array transpose, Jaroslav Hajek, 2008/05/21
- Re: [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/21
- Re: [Changeset] Re: Faster Array transpose, Jaroslav Hajek, 2008/05/21
- Re: [Changeset] Re: Faster Array transpose, dbateman, 2008/05/21
- Re: [Changeset] Re: Faster Array transpose, John W. Eaton, 2008/05/21
- Re: [Changeset] Re: Faster Array transpose, David Bateman, 2008/05/21