getfem-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Getfem-commits] r5376 - in /trunk/getfem: contrib/opt_assembly/opt_asse


From: Yves . Renard
Subject: [Getfem-commits] r5376 - in /trunk/getfem: contrib/opt_assembly/opt_assembly.cc src/getfem_generic_assembly.cc src/gmm/gmm_vector.h
Date: Mon, 03 Oct 2016 13:40:46 -0000

Author: renard
Date: Mon Oct  3 15:40:45 2016
New Revision: 5376

URL: http://svn.gna.org/viewcvs/getfem?rev=5376&view=rev
Log:
small optimizations

Modified:
    trunk/getfem/contrib/opt_assembly/opt_assembly.cc
    trunk/getfem/src/getfem_generic_assembly.cc
    trunk/getfem/src/gmm/gmm_vector.h

Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5376&r1=5375&r2=5376&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc   (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc   Mon Oct  3 15:40:45 2016
@@ -128,15 +128,17 @@
 
 #define MAT_TEST_1(title, ndof1, ndof2, expr, mim_, I1_, I2_, old_asm)  \
   cout << "\n" << title << endl;                                        \
+  getfem::model_real_sparse_matrix K(ndof1, ndof2), K2(ndof1, ndof2);   \
+  getfem::model_real_sparse_matrix K3(I1_.last()+1, I2_.last()+1);     \
   workspace.clear_expressions();                                        \
-  workspace.add_expression(expr, mim_);                                 \
-  ch.init(); ch.tic(); workspace.assembly(2); ch.toc();                 \
+  workspace.set_assembled_matrix(K3);                                  \
+  workspace.add_expression(expr, mim_);                                        
\
+  ch.init(); ch.tic(); workspace.assembly(2); ch.toc();                        
\
   cout << "Elapsed time for new assembly " << ch.elapsed() << endl;     \
-  getfem::model_real_sparse_matrix K(ndof1, ndof2), K2(ndof1, ndof2);   \
   ch.init(); ch.tic(); old_asm; ch.toc();                              \
   gmm::copy(K, K2);                                                     \
   cout << "Elapsed time for old assembly " << ch.elapsed() << endl;     \
-  gmm::add(gmm::scaled(gmm::sub_matrix(workspace.assembled_matrix(),    \
+  gmm::add(gmm::scaled(gmm::sub_matrix(K3,                             \
                                        I1_, I2_), scalar_type(-1)), K); \
   scalar_type norm_error = gmm::mat_norminf(K);                         \
   cout << "Error : " << norm_error << endl;
@@ -410,22 +412,59 @@
   
 }
 
-// 1 + Evaluation of the matrix storage computational time
-// 2 + Evaluation of the assembly time
-// 3 + Evaluation of the global instruction time
-// 4 + Resizing opération computational cost
-// 6 - fem_interpolation_context cost
-
 
 int main(int /* argc */, char * /* argv */[]) {
 
   GMM_SET_EXCEPTION_DEBUG; // Exceptions make a memory fault, to debug.
   FE_ENABLE_EXCEPT;        // Enable floating point exception for Nan.
   
-  // Mesured times for new assembly, old one, alterantive new assembly,
+  // Mesured times for new assembly, old one,
   // storage estimate part for the new assembly, global assembly part,
   // ga_exec cost (instructions not executed), J computation, resizing
   // instruction cost.
+  //                        new  | old  | sto  | asse | exec |  J   |resize|
+  test_new_assembly(2, 400, 1); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
+  // Mass                 : 0.87 | 0.86 | 0.19 | 0.32 | 0.26 | 0.09 | 0.16 |
+  // Laplacian            : 0.47 | 0.85 | 0.10 | 0.16 | 0.19 | 0.08 | 0.03 |
+  // Homogeneous elas     : 0.77 | 1.91 | 0.23 | 0.30 | 0.18 | 0.07 | 0.08 |
+  // Non-homogeneous elast: 0.94 | 2.32 | 0.26 | 0.32 | 0.18 | 0.08 | 0.08 |
+  test_new_assembly(3, 36, 1);  // ndofu = 151959 ndofp = 50653 ndofchi = 6553
+  // Mass                 : 1.46 | 1.68 | 0.34 | 0.54 | 0.31 | 0.15 | 0.12 |
+  // Laplacian            : 0.90 | 1.51 | 0.10 | 0.17 | 0.24 | 0.14 | 0.06 |
+  // Homogeneous elas     : 1.94 | 4.77 | 0.88 | 0.95 | 0.24 | 0.14 | 0.11 |
+  // Non-homogeneous elast: 2.06 | 6.81 | 0.74 | 0.86 | 0.24 | 0.14 | 0.11 |
+  test_new_assembly(2, 200, 2); // ndofu = 321602 ndofp = 160801 ndofchi = 1201
+  // Mass                 : 0.49 | 0.45 | 0.14 | 0.22 | 0.07 | 0.03 | 0.01 |
+  // Laplacian            : 0.24 | 0.38 | 0.06 | 0.10 | 0.06 | 0.03 | 0.03 |
+  // Homogeneous elas     : 0.62 | 1.28 | 0.22 | 0.25 | 0.05 | 0.02 | 0.11 |
+  // Non-homogeneous elast: 0.72 | 2.43 | 0.23 | 0.28 | 0.05 | 0.02 | 0.11 |
+  test_new_assembly(3, 18, 2);  // ndofu = 151959 ndofp = 50653 ndofchi = 6553
+  // Mass                 : 2.00 | 0.90 | 0.27 | 0.63 | 0.05 | 0.02 | 0.27 |
+  // Laplacian            : 0.30 | 0.58 | 0.16 | 0.17 | 0.04 | 0.02 | 0.02 |
+  // Homogeneous elas     : 2.50 | 3.48 | 1.53 | 1.72 | 0.04 | 0.02 | 0.35 |
+  // Non-homogeneous elast: 2.55 | 9.32 | 1.48 | 1.68 | 0.04 | 0.02 | 0.37 |
+  test_new_assembly(3, 9, 4);   // ndofu = 151959 ndofp = 50653 ndofchi = 6553
+  // Mass                 : 6.64 | 1.33 | 0.65 | 1.77 | 0.01 | .005 | 0.21 |
+  // Laplacian            : 0.78 | 0.79 | 0.32 | 0.43 | 0.01 | .005 | 0.07 |
+  // Homogeneous elas     : 10.2 | 5.52 | 0.90 | 1.69 | 0.01 | .005 | 2.50 |
+  // Non-homogeneous elast: 10.1 | 48.0 | 0.95 | 1.48 | 0.01 | .005 | 2.45 |
+
+  // Conclusions :
+  // Desactivation of debug test has no sensible effect.
+  // Compile time of assembly strings is negligible (< 0.0004)
+  // J computation takes half the computational time of the exec part
+  // The optimized instruction call is negligible
+
+
+  // Possible optimizations (focusing on a case)
+  // - Optimization of J computation (especially in 2D and standard cases)
+  // - Unroll loop in used instructions
+  // - Assembly optimization
+  // - storage optimization (matrices ...)
+  // - Fem interpolation context optimization.
+
+  // Original table :
+#if 0
   //                        new  | old  | sto  | asse | exec |  J   |resize|
   test_new_assembly(2, 400, 1);
   // Mass                 : 0.94 | 0.93 | 0.19 | 0.32 | 0.26 | 0.09 | 0.16 |
@@ -452,20 +491,7 @@
   // Laplacian            : 0.90 | 0.89 | 0.32 | 0.43 | 0.01 | .005 | 0.07 |
   // Homogeneous elas     : 11.1 | 6.65 | 0.90 | 1.69 | 0.01 | .005 | 2.50 |
   // Non-homogeneous elast: 11.0 | 49.1 | 0.95 | 1.48 | 0.01 | .005 | 2.45 |
-
-  // Conclusions :
-  // Desactivation of debug test has no sensible effect.
-  // Compile time is negligible (< 0.0004)
-  // J computation takes half the computational time of the exec part
-  // The optimized instruction call is negligible
-
-
-  // Possible optimizations (focusing on a case)
-  // - Optimization of J computation (especially in 2D and standard cases)
-  // - Unroll lop in used instructions
-  // - Assembly optimization
-  // - storage optimization (matrices ...)
-  // - Fem interpolation context optimization.
+#endif
 
   return 0; 
 }

Modified: trunk/getfem/src/getfem_generic_assembly.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_generic_assembly.cc?rev=5376&r1=5375&r2=5376&view=diff
==============================================================================
--- trunk/getfem/src/getfem_generic_assembly.cc (original)
+++ trunk/getfem/src/getfem_generic_assembly.cc Mon Oct  3 15:40:45 2016
@@ -5210,6 +5210,7 @@
           for (const size_type &dof1 : dofs1) {
             if (gmm::abs(*it) > threshold)
               K(dof1, dof2) += *it;
+             // K.col(dof2).wa(dof1, *it);
             ++it;
           }
       }

Modified: trunk/getfem/src/gmm/gmm_vector.h
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/gmm/gmm_vector.h?rev=5376&r1=5375&r2=5376&view=diff
==============================================================================
--- trunk/getfem/src/gmm/gmm_vector.h   (original)
+++ trunk/getfem/src/gmm/gmm_vector.h   Mon Oct  3 15:40:45 2016
@@ -65,9 +65,9 @@
     inline bool operator !=(std::complex<T> v) const
     { return ((*pm).r(l) != v); }
     inline ref_elt_vector &operator +=(T v)
-    { (*pm).w(l,(*pm).r(l) + v); return *this; }
+    { (*pm).wa(l, v); return *this; }
     inline ref_elt_vector &operator -=(T v)
-    { (*pm).w(l,(*pm).r(l) - v); return *this; }
+    { (*pm).wa(l, -v); return *this; }
     inline ref_elt_vector &operator /=(T v)
     { (*pm).w(l,(*pm).r(l) / v); return *this; }
     inline ref_elt_vector &operator *=(T v)
@@ -572,6 +572,9 @@
       else *(write_access(c)) = e;
     }
 
+    inline void wa(size_type c, const T &e)
+    { if (e != T(0)) { if (read_access(c)) *(write_access(c)) += e; } }
+
     inline T r(size_type c) const
     { const T *p = read_access(c); if (p) return *p; else return T(0); }
 
@@ -743,6 +746,15 @@
       else base_type::operator [](c) = e;
     }
 
+    inline void wa(size_type c, const T &e) {
+      GMM_ASSERT2(c < nbl, "out of range");
+      if (e != T(0)) {
+       iterator it = this->lower_bound(c);
+       if (it != this->end()) it->second += e;
+       else base_type::operator [](c) = e;
+      }
+    }
+
     inline T r(size_type c) const {
       GMM_ASSERT2(c < nbl, "out of range");
       const_iterator it = this->lower_bound(c);
@@ -866,7 +878,7 @@
 
   template<typename T> struct elt_rsvector_ {
     size_type c; T e;
-    /* e is initialized by default to avoid some false warnings of valgrind..
+    /* e is initialized by default to avoid some false warnings of valgrind.
        (from http://valgrind.org/docs/manual/mc-manual.html:
       
        When memory is read into the CPU's floating point registers, the
@@ -968,6 +980,7 @@
     { return ref_elt_vector<T, rsvector<T> >(this, c); }
 
     void w(size_type c, const T &e);
+    void wa(size_type c, const T &e);
     T r(size_type c) const;
     void swap_indices(size_type i, size_type j);
 
@@ -1020,7 +1033,7 @@
       iterator it = std::lower_bound(this->begin(), this->end(), ev);
       if (it != this->end() && it->c == j) {
        for (iterator ite = this->end() - 1; it != ite; ++it) *it = *(it+1);
-       base_type_::resize(nb_stored()-1);
+       base_resize(nb_stored()-1);
       }
     }
   }
@@ -1039,7 +1052,7 @@
     else {
       elt_rsvector_<T> ev(c, e);
       if (nb_stored() == 0) {
-       base_type_::resize(1,ev);
+       base_type_::reserve(16); base_type_::resize(1,ev);
       }
       else {
        iterator it = std::lower_bound(this->begin(), this->end(), ev);
@@ -1052,8 +1065,35 @@
          base_type_::resize(nb_stored()+1, ev);
          if (ind != this->nb_stored() - 1) {
            it = this->begin() + ind;
-           for (iterator ite = this->end() - 1; ite != it; --ite)
-             *ite = *(ite-1);
+           iterator ite = this->end(); --ite; iterator itee = ite; --itee; 
+           for (; ite != it; --ite, --itee) *ite = *itee;
+           *it = ev;
+         }
+       }
+      }
+    }
+  }
+
+  template <typename T> void rsvector<T>::wa(size_type c, const T &e) {
+    GMM_ASSERT2(c < nbl, "out of range");
+    if (e != T(0)) {
+      elt_rsvector_<T> ev(c, e);
+      if (nb_stored() == 0) {
+       base_type_::reserve(16); base_type_::resize(1, ev);
+      }
+      else {
+       iterator it = std::lower_bound(this->begin(), this->end(), ev);
+       if (it != this->end() && it->c == c) it->e += e;
+       else {
+         size_type ind = it - this->begin();
+          if (this->nb_stored() - ind > 1100)
+            GMM_WARNING2("Inefficient addition of element in rsvector with "
+                         << this->nb_stored() - ind << " non-zero entries");
+         base_type_::resize(nb_stored()+1, ev);
+         if (ind != this->nb_stored() - 1) {
+           it = this->begin() + ind;
+           iterator ite = this->end(); --ite; iterator itee = ite; --itee; 
+           for (; ite != it; --ite, --itee) *ite = *itee;
            *it = ev;
          }
        }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]