getfem-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Getfem-commits] r5457 - in /trunk/getfem: contrib/opt_assembly/ src/ sr


From: Yves . Renard
Subject: [Getfem-commits] r5457 - in /trunk/getfem: contrib/opt_assembly/ src/ src/getfem/
Date: Wed, 02 Nov 2016 11:59:11 -0000

Author: renard
Date: Wed Nov  2 12:59:10 2016
New Revision: 5457

URL: http://svn.gna.org/viewcvs/getfem?rev=5457&view=rev
Log:
optimization of the gradient computation

Modified:
    trunk/getfem/contrib/opt_assembly/opt_assembly.cc
    trunk/getfem/src/bgeot_geometric_trans.cc
    trunk/getfem/src/getfem/bgeot_geometric_trans.h
    trunk/getfem/src/getfem/bgeot_tensor.h
    trunk/getfem/src/getfem_fem.cc

Modified: trunk/getfem/contrib/opt_assembly/opt_assembly.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/contrib/opt_assembly/opt_assembly.cc?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/contrib/opt_assembly/opt_assembly.cc   (original)
+++ trunk/getfem/contrib/opt_assembly/opt_assembly.cc   Wed Nov  2 12:59:10 2016
@@ -364,9 +364,10 @@
               Iu, Iu,
               getfem::asm_stiffness_matrix_for_homogeneous_linear_elasticity
               (K, mim2, mf_u, lambda, mu));
-    if (all)
+    if (all) {
       MAT_TEST_2(ndofu, ndofu, "lambda*Div_Test_u*Div_Test2_u "
                 "+ mu*(Grad_Test_u'+Grad_Test_u):Grad_Test2_u", mim2, Iu, Iu);
+    }
     
     // MAT_TEST_2(ndofu, ndofu,
     //           "lambda*((address@hidden))"
@@ -425,7 +426,7 @@
     
     MAT_TEST_1("Test for linear non homogeneous elasticity stiffness matrix",
               ndofu, ndofu, "(Div_Test_u*(lambda2*Id(meshdim)) "
-              "+ mu2*Sym(Grad_Test_u)):Grad_Test2_u",
+              "+ (2*mu2)*Sym(Grad_Test_u)):Grad_Test2_u",
               mim2, Iu, Iu,
               getfem::asm_stiffness_matrix_for_linear_elasticity
               (K, mim2, mf_u, mf_p, lambda2, mu2));
@@ -455,47 +456,47 @@
   if (all || only_one == 1) // ndofu = 321602 ndofp = 160801 ndofchi = 1201
     test_new_assembly(2, 400, 1);
   // Vector source term   : 0.20 | 0.66 |
-  // Nonlinear residual   : 0.31 |      |
+  // Nonlinear residual   : 0.27 |      |
   // Mass (scalar)        : 0.18 | 0.56 | 0.04 | 0.06 | 0.06 | 0.06 |
   // Mass (vector)        : 0.30 | 0.80 | 0.09 | 0.15 | 0.06 | 0.09 |
-  // Laplacian            : 0.16 | 0.80 | 0.04 | 0.05 | 0.06 | 0.05 |
+  // Laplacian            : 0.15 | 0.80 | 0.04 | 0.05 | 0.06 | 0.04 |
   // Homogeneous elas     : 0.30 | 1.88 | 0.08 | 0.13 | 0.06 | 0.09 |
-  // Non-homogeneous elast: 0.35 | 2.26 | 0.09 | 0.16 | 0.06 | 0.13 |
+  // Non-homogeneous elast: 0.34 | 2.26 | 0.09 | 0.16 | 0.06 | 0.12 |
   if (all || only_one == 2) // ndofu = 151959 ndofp =  50653 ndofchi = 6553
     test_new_assembly(3, 36, 1);
   // Vector source term   : 0.26 | 0.79 |
-  // Nonlinear residual   : 0.54 |      |
+  // Nonlinear residual   : 0.49 |      |
   // Mass (scalar)        : 0.21 | 0.58 | 0.05 | 0.08 | 0.08 | 0.06 |
   // Mass (vector)        : 0.68 | 1.37 | 0.17 | 0.27 | 0.08 | 0.33 |
   // Laplacian            : 0.17 | 1.15 | 0.03 | 0.06 | 0.08 | 0.03 |
-  // Homogeneous elas     : 0.79 | 4.25 | 0.26 | 0.33 | 0.08 | 0.38 |
+  // Homogeneous elas     : 0.78 | 4.25 | 0.26 | 0.33 | 0.08 | 0.37 |
   // Non-homogeneous elast: 0.83 | 6.29 | 0.26 | 0.33 | 0.08 | 0.42 |
   if (all || only_one == 3) // ndofu = 321602 ndofp = 160801 ndofchi = 1201
     test_new_assembly(2, 200, 2);
   // Vector source term   : 0.09 | 0.23 |
-  // Nonlinear residual   : 0.15 |      |
-  // Mass (scalar)        : 0.09 | 0.25 | 0.02 | 0.03 | 0.03 | 0.03 |
+  // Nonlinear residual   : 0.12 |      |
+  // Mass (scalar)        : 0.08 | 0.25 | 0.02 | 0.03 | 0.03 | 0.02 |
   // Mass (vector)        : 0.26 | 0.44 | 0.05 | 0.09 | 0.03 | 0.14 |
-  // Laplacian            : 0.09 | 0.37 | 0.02 | 0.03 | 0.03 | 0.03 |
-  // Homogeneous elas     : 0.26 | 1.28 | 0.06 | 0.09 | 0.03 | 0.14 |
-  // Non-homogeneous elast: 0.28 | 2.38 | 0.06 | 0.10 | 0.03 | 0.16 |
+  // Laplacian            : 0.08 | 0.37 | 0.02 | 0.03 | 0.03 | 0.02 |
+  // Homogeneous elas     : 0.24 | 1.28 | 0.06 | 0.09 | 0.03 | 0.12 |
+  // Non-homogeneous elast: 0.26 | 2.38 | 0.06 | 0.10 | 0.03 | 0.13 |
   if (all || only_one == 4) // ndofu = 151959 ndofp =  50653 ndofchi = 6553
     test_new_assembly(3, 18, 2);
   // Vector source term   : 0.13 | 0.23 |
-  // Nonlinear residual   : 0.37 |      |
+  // Nonlinear residual   : 0.31 |      |
   // Mass (scalar)        : 0.11 | 0.25 | 0.05 | 0.05 | 0.03 | 0.03 |
   // Mass (vector)        : 1.13 | 0.89 | 0.21 | 0.35 | 0.03 | 0.75 |
-  // Laplacian            : 0.10 | 0.53 | 0.03 | 0.04 | 0.03 | 0.03 |
-  // Homogeneous elas     : 1.66 | 3.35 | 0.59 | 0.73 | 0.03 | 0.90 |
-  // Non-homogeneous elast: 1.70 | 9.08 | 0.59 | 0.73 | 0.03 | 0.94 |
+  // Laplacian            : 0.08 | 0.53 | 0.03 | 0.03 | 0.03 | 0.02 |
+  // Homogeneous elas     : 1.65 | 3.35 | 0.59 | 0.73 | 0.03 | 0.89 |
+  // Non-homogeneous elast: 1.68 | 9.08 | 0.59 | 0.73 | 0.03 | 0.92 |
   if (all || only_one == 5) // ndofu = 151959 ndofp =  50653 ndofchi = 6553
     test_new_assembly(3, 9, 4);
   // Vector source term   : 0.11 | 0.19 |
-  // Nonlinear residual   : 0.36 |      |
+  // Nonlinear residual   : 0.29 |      |
   // Mass (scalar)        : 0.51 | 0.34 | 0.09 | 0.16 | 0.01 | 0.33 |
   // Mass (vector)        : 4.37 | 1.31 | 0.41 | 1.27 | 0.01 | 3.10 |
-  // Laplacian            : 0.40 | 0.76 | 0.09 | 0.14 | 0.01 | 0.25 |
-  // Homogeneous elas     : 8.93 | 5.23 | 0.82 | 1.41 | 0.01 | 7.49 |
+  // Laplacian            : 0.36 | 0.76 | 0.09 | 0.14 | 0.01 | 0.21 |
+  // Homogeneous elas     : 8.85 | 5.23 | 0.82 | 1.41 | 0.01 | 7.41 |
   // Non-homogeneous elast: 8.94 | 47.4 | 0.82 | 1.41 | 0.01 | 7.50 |
 
   // Conclusions :

Modified: trunk/getfem/src/bgeot_geometric_trans.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/bgeot_geometric_trans.cc?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/bgeot_geometric_trans.cc   (original)
+++ trunk/getfem/src/bgeot_geometric_trans.cc   Wed Nov  2 12:59:10 2016
@@ -28,26 +28,86 @@
 
 namespace bgeot {
 
-std::vector<scalar_type>& __aux1(){
-  DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
-  return v;
-}
-
-std::vector<scalar_type>& __aux2(){
-  DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
-  return v;
-}
-
-std::vector<scalar_type>& __aux3(){
-  DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
-  return v;
-}
-
-std::vector<int>& __ipvt_aux(){
-  DEFINE_STATIC_THREAD_LOCAL(std::vector<int>, vi);
-  return vi;
-}
+  std::vector<scalar_type>& __aux1(){
+    DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
+    return v;
+  }
   
+  std::vector<scalar_type>& __aux2(){
+    DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
+    return v;
+  }
+  
+  std::vector<scalar_type>& __aux3(){
+    DEFINE_STATIC_THREAD_LOCAL(std::vector<scalar_type>, v);
+    return v;
+  }
+  
+  std::vector<int>& __ipvt_aux(){
+    DEFINE_STATIC_THREAD_LOCAL(std::vector<int>, vi);
+    return vi;
+  }
+  
+  // Optimized matrix mult for small matrices. To be verified.
+  // Multiply the matrix A of size MxN by B of size NxP in C of size MxP
+  void mat_mult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+               size_type M, size_type N, size_type P) {
+    auto itC = C; auto itB = B;
+    for (size_type j = 0; j < P; ++j, itB += N)
+      for (size_type i = 0; i < M; ++i, ++itC) {
+       auto itA = A+i, itB1 = itB;
+       *itC = (*itA) * (*itB1);
+       for (size_type k = 1; k < N; ++k)
+         { itA += M; ++itB1; *itC += (*itA) * (*itB1); }
+      }
+  }
+
+  // Optimized matrix mult for small matrices.
+  // Multiply the matrix A of size MxN by the transpose of B of size PxN
+  // in C of size MxP
+  void mat_tmult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+                size_type M, size_type N, size_type P) {
+    auto itC = C;
+    switch (N) {
+    case 0 : std::fill(C, C+M*P, scalar_type(0)); break;
+    case 1 : 
+      for (size_type j = 0; j < P; ++j)
+       for (size_type i = 0; i < M; ++i, ++itC) {
+         auto itA = A+i, itB = B+j;
+         *itC = (*itA) * (*itB);
+       }
+      break;
+    case 2 :
+      for (size_type j = 0; j < P; ++j)
+       for (size_type i = 0; i < M; ++i, ++itC) {
+         auto itA = A+i, itB = B+j;
+         *itC = (*itA) * (*itB);
+         itA += M; itB += P; *itC += (*itA) * (*itB);
+       }
+      break;
+    case 3 :
+      for (size_type j = 0; j < P; ++j)
+       for (size_type i = 0; i < M; ++i, ++itC) {
+         auto itA = A+i, itB = B+j;
+         *itC = (*itA) * (*itB);
+         itA += M; itB += P; *itC += (*itA) * (*itB);
+         itA += M; itB += P; *itC += (*itA) * (*itB);
+       }
+      break;
+    default :
+      for (size_type j = 0; j < P; ++j)
+       for (size_type i = 0; i < M; ++i, ++itC) {
+         auto itA = A+i, itB = B+j;
+         *itC = (*itA) * (*itB);
+         for (size_type k = 1; k < N; ++k)
+           { itA += M; itB += P; *itC += (*itA) * (*itB); }
+       }
+    }
+  }
+
+
+
+
   // Optimized lu_factor for small square matrices
   size_type lu_factor(scalar_type *A, std::vector<int> &ipvt,
                      size_type N) {

Modified: trunk/getfem/src/getfem/bgeot_geometric_trans.h
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem/bgeot_geometric_trans.h?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/getfem/bgeot_geometric_trans.h     (original)
+++ trunk/getfem/src/getfem/bgeot_geometric_trans.h     Wed Nov  2 12:59:10 2016
@@ -515,6 +515,15 @@
   { return lu_det(&(*(A.begin())), A.nrows()); }
   inline scalar_type lu_inverse(base_matrix &A, bool doassert = true)
   { return lu_inverse(&(*(A.begin())), A.nrows(), doassert); }
+  // Optimized matrix mult for small matrices.
+  // Multiply the matrix A of size MxN by B of size NxP in C of size MxP
+  void mat_mult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+               size_type M, size_type N, size_type P);
+  // Optimized matrix mult for small matrices.
+  // Multiply the matrix A of size MxN by the transpose of B of size PxN
+  // in C of size MxP
+  void mat_tmult(const scalar_type *A, const scalar_type *B, scalar_type *C,
+                size_type M, size_type N, size_type P);
 
 }  /* end of namespace bgeot.                                             */
 

Modified: trunk/getfem/src/getfem/bgeot_tensor.h
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem/bgeot_tensor.h?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/getfem/bgeot_tensor.h      (original)
+++ trunk/getfem/src/getfem/bgeot_tensor.h      Wed Nov  2 12:59:10 2016
@@ -200,34 +200,34 @@
     inline size_type order(void) const { return sizes_.size(); }
 
     void init(const multi_index &c) {
-      multi_index::const_iterator it = c.begin();
+      auto it = c.begin();
       size_type d = 1;
       sizes_ = c; coeff.resize(c.size());
-      multi_index::iterator p = coeff.begin(), pe = coeff.end();
+      auto p = coeff.begin(), pe = coeff.end();
       for ( ; p != pe; ++p, ++it) { *p = d; d *= *it; }
       this->resize(d);
     }
 
-    void init() { sizes_.resize(0);  coeff.resize(0); this->resize(1); }
-
-    void init(size_type i) {
+    inline void init() { sizes_.resize(0);  coeff.resize(0); this->resize(1); }
+
+    inline void init(size_type i) {
       sizes_.resize(1); sizes_[0] = i; coeff.resize(1); coeff[0] = 1;
       this->resize(i);
     }
 
-    void init(size_type i, size_type j) {
+    inline void init(size_type i, size_type j) {
       sizes_.resize(2); sizes_[0] = i; sizes_[1] = j;
       coeff.resize(2); coeff[0] = 1; coeff[1] = i;
       this->resize(i*j);
     }
 
-    void init(size_type i, size_type j, size_type k) {
+    inline void init(size_type i, size_type j, size_type k) {
       sizes_.resize(3); sizes_[0] = i; sizes_[1] = j; sizes_[2] = k; 
       coeff.resize(3); coeff[0] = 1; coeff[1] = i; coeff[2] = i*j;
       this->resize(i*j*k);
     }
 
-    void init(size_type i, size_type j, size_type k, size_type l) {
+    inline void init(size_type i, size_type j, size_type k, size_type l) {
       sizes_.resize(4);
       sizes_[0] = i; sizes_[1] = j; sizes_[2] = k; sizes_[3] = k; 
       coeff.resize(4);
@@ -235,29 +235,29 @@
       this->resize(i*j*k*l);
     }
 
-    void adjust_sizes(const multi_index &mi) {
-      if (!mi.size() || (mi.size() != sizes().size())
-          || !(std::equal(mi.begin(), mi.end(), sizes().begin())))
-        init(mi);
-    }
-
-    void adjust_sizes(void) { if (sizes_.size() || this->size() != 1) init(); }
-
-    void adjust_sizes(size_type i)
-    { if (sizes_.size() != 1 || sizes_[0] != i) init(i); }
-
-    void adjust_sizes(size_type i, size_type j)
-    { if (sizes_.size() != 2 || sizes_[0] != i || sizes_[1] != j) init(i, j); }
-
-    void adjust_sizes(size_type i, size_type j, size_type k) {
-      if (sizes_.size() != 3 || sizes_[0] != i || sizes_[1] != j
-          || sizes_[2] != k)
-        init(i, j, k);
-    }
-    void adjust_sizes(size_type i, size_type j, size_type k, size_type l) {
-      if (sizes_.size() != 3 || sizes_[0] != i || sizes_[1] != j
-          || sizes_[2] != k || sizes_[3] != l)
-        init(i, j, k, l);
+    inline void adjust_sizes(const multi_index &mi) { init(mi); }
+    inline void adjust_sizes(void) { init(); }
+    inline void adjust_sizes(size_type i) { init(i); }
+    inline void adjust_sizes(size_type i, size_type j) { init(i, j); }
+    inline void adjust_sizes(size_type i, size_type j, size_type k)
+    { init(i, j, k); }
+    inline void adjust_sizes(size_type i, size_type j, size_type k, size_type 
l)
+    { init(i, j, k, l); }
+    
+    inline size_type adjust_sizes_changing_last(const tensor &t, size_type P) {
+      const multi_index &mi = t.sizes_; size_type d = mi.size();
+      sizes_.resize(d); coeff.resize(d);
+      if (d) {
+       std::copy(mi.begin(), mi.end(), sizes_.begin());
+       std::copy(t.coeff.begin(), t.coeff.end(), coeff.begin());
+       size_type e = coeff.back();
+       sizes_.back() = P;
+       this->resize(e*P);
+       return e;
+      } else {
+       this->resize(1);
+       return 1;
+      }
     }
 
     /** reduction of tensor t with respect to index ni with matrix m:

Modified: trunk/getfem/src/getfem_fem.cc
URL: 
http://svn.gna.org/viewcvs/getfem/trunk/getfem/src/getfem_fem.cc?rev=5457&r1=5456&r2=5457&view=diff
==============================================================================
--- trunk/getfem/src/getfem_fem.cc      (original)
+++ trunk/getfem/src/getfem_fem.cc      Wed Nov  2 12:59:10 2016
@@ -77,6 +77,14 @@
   // In that case, the storage available in ctx.pfp()->c, ctx.pfp()->pc
   // and ctx.pfp()->hpc is not used.
 
+
+  // Specific multiplication for fem_interpolation_context use.
+  static inline void spec_mat_tmult_(const base_tensor &g, const base_matrix 
&B,
+                                    base_tensor &t) {
+    size_type P = B.nrows(), N = B.ncols();
+    size_type M = t.adjust_sizes_changing_last(g, P);
+    bgeot::mat_tmult(&(*(g.begin())), &(*(B.begin())), &(*(t.begin())),M,N,P);
+  }
   
   void fem_interpolation_context::pfp_base_value(base_tensor& t,
                                                  const pfem_precomp &pfp__) {
@@ -146,13 +154,14 @@
     }
   }
 
-  void fem_interpolation_context::pfp_grad_base_value(base_tensor& t,
-                                                      const pfem_precomp 
&pfp__) {
+  void fem_interpolation_context::pfp_grad_base_value
+  (base_tensor& t, const pfem_precomp &pfp__) {
     const pfem &pf__ = pfp__->get_pfem();
     GMM_ASSERT1(ii_ != size_type(-1), "Internal error");
 
     if (pf__->is_standard()) {
-      t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+      // t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+      spec_mat_tmult_(pfp__->grad(ii()), B(), t);
     } else {
       if (pf__->is_on_real_element())
         pf__->real_grad_base_value(*this, t);
@@ -161,18 +170,22 @@
         case virtual_fem::VECTORIAL_PRIMAL_TYPE:
           {
             base_tensor u;
-            u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+            // u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+           spec_mat_tmult_(pfp__->grad(ii()), B(), u);
             t.mat_transp_reduction(u, K(), 1);
           }
           break;
         case virtual_fem::VECTORIAL_DUAL_TYPE:
           {
             base_tensor u;
-            u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+            // u.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+           spec_mat_tmult_(pfp__->grad(ii()), B(), u);
             t.mat_transp_reduction(u, B(), 1);
           }
           break;
-        default: t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+        default:
+         // t.mat_transp_reduction(pfp__->grad(ii()), B(), 2);
+         spec_mat_tmult_(pfp__->grad(ii()), B(), t);
         }
         if (!(pf__->is_equivalent())) {
           set_pfp(pfp__);
@@ -186,7 +199,8 @@
   void fem_interpolation_context::grad_base_value(base_tensor& t,
                                                   bool withM) const {
     if (pfp_ && ii_ != size_type(-1) && pf_->is_standard()) {
-      t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+      // t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+      spec_mat_tmult_(pfp_->grad(ii()), B(), t);
     } else {
       if (pf()->is_on_real_element())
         pf()->real_grad_base_value(*this, t);
@@ -196,25 +210,30 @@
           case virtual_fem::VECTORIAL_PRIMAL_TYPE:
             {
               base_tensor u;
-              u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+              // u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+             spec_mat_tmult_(pfp_->grad(ii()), B(), u);
               t.mat_transp_reduction(u, K(), 1);
             }
             break;
           case virtual_fem::VECTORIAL_DUAL_TYPE:
             {
               base_tensor u;
-              u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+              // u.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+             spec_mat_tmult_(pfp_->grad(ii()), B(), u);
               t.mat_transp_reduction(u, B(), 1);
             }
             break;
-          default: t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+          default:
+           // t.mat_transp_reduction(pfp_->grad(ii()), B(), 2);
+           spec_mat_tmult_(pfp_->grad(ii()), B(), t);
           }
           
         } else {
           base_tensor u;
           pf()->grad_base_value(xref(), u);
           if (u.size()) { /* only if the FEM can provide grad_base_value */
-            t.mat_transp_reduction(u, B(), 2);
+           // t.mat_transp_reduction(u, B(), 2);
+            spec_mat_tmult_(u, B(), t);
             switch(pf()->vectorial_type()) {
             case virtual_fem::VECTORIAL_PRIMAL_TYPE:
               u = t; t.mat_transp_reduction(u, K(), 1); break;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]