11#include " BarrierEnergy.h"
2+ #include " distance.h"
23#include < muda/muda.h>
34#include < muda/container.h>
45#include < stdio.h>
@@ -12,6 +13,7 @@ struct BarrierEnergy<T, dim>::Impl
1213{
1314 DeviceBuffer<T> device_x;
1415 DeviceBuffer<T> device_contact_area, device_n, device_n_ceil, device_o;
16+ DeviceBuffer<int > device_bp, device_be;
1517 int N;
1618 DeviceBuffer<T> device_grad;
1719 DeviceTripletMatrix<T, 1 > device_hess;
@@ -33,7 +35,7 @@ BarrierEnergy<T, dim>::BarrierEnergy(const BarrierEnergy<T, dim> &rhs)
3335 : pimpl_{std::make_unique<Impl>(*rhs.pimpl_ )} {}
3436
3537template <typename T, int dim>
36- BarrierEnergy<T, dim>::BarrierEnergy(const std::vector<T> &x, const std::vector<T> n, const std::vector<T> o , const std::vector<T> &contact_area) : pimpl_{std::make_unique<Impl>()}
38+ BarrierEnergy<T, dim>::BarrierEnergy(const std::vector<T> &x, const std::vector<T> & n, const std::vector<T> &o, const std::vector< int > &be, const std::vector< int > &bp , const std::vector<T> &contact_area) : pimpl_{std::make_unique<Impl>()}
3739{
3840 pimpl_->N = x.size () / dim;
3941 pimpl_->device_x .copy_from (x);
@@ -44,7 +46,9 @@ BarrierEnergy<T, dim>::BarrierEnergy(const std::vector<T> &x, const std::vector<
4446 pimpl_->device_n_ceil .copy_from (n_ceil);
4547 pimpl_->device_n .copy_from (n);
4648 pimpl_->device_o .copy_from (o);
47- pimpl_->device_hess .resize_triplets (pimpl_->N * dim * dim + (pimpl_->N - 1 ) * dim * dim * 4 );
49+ pimpl_->device_bp .copy_from (bp);
50+ pimpl_->device_be .copy_from (be);
51+ pimpl_->device_hess .resize_triplets (pimpl_->N * dim * dim + (pimpl_->N - 1 ) * dim * dim * 4 + bp.size () * be.size () / 2 * 36 );
4852 pimpl_->device_hess .reshape (x.size (), x.size ());
4953 pimpl_->device_grad .resize (pimpl_->N * dim);
5054}
@@ -63,9 +67,13 @@ T BarrierEnergy<T, dim>::val()
6367 auto &device_n = pimpl_->device_n ;
6468 auto &device_n_ceil = pimpl_->device_n_ceil ;
6569 auto &device_o = pimpl_->device_o ;
66- int N = device_x.size () / dim;
70+ auto &device_bp = pimpl_->device_bp ;
71+ auto &device_be = pimpl_->device_be ;
72+ int N = device_x.size () / dim, Nbp = device_bp.size (), Nbe = device_be.size () / 2 ;
73+ int Npe = Nbp * Nbe;
6774 DeviceBuffer<T> device_val1 (N);
6875 DeviceBuffer<T> device_val2 (N);
76+ DeviceBuffer<T> device_val3 (Npe);
6977 ParallelFor (256 ).apply (N, [device_val1 = device_val1.viewer (), device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_n = device_n.cviewer (), device_o = device_o.cviewer ()] __device__ (int i) mutable
7078 { T d = 0 ;
7179 for (int j=0 ;j<dim;j++){
@@ -86,7 +94,26 @@ T BarrierEnergy<T, dim>::val()
8694 device_val2 (i)= kappa * device_contact_area (i) * dhat/2 *(s-1 )*log (s);
8795 } })
8896 .wait ();
89- return devicesum (device_val1) + devicesum (device_val2);
97+ ParallelFor (256 ).apply (Npe, [device_val3 = device_val3.viewer (), device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_bp = device_bp.cviewer (), device_be = device_be.cviewer (), Nbp, Nbe] __device__ (int i) mutable
98+ {
99+ int xI = device_bp (i / Nbe);
100+ int eI0 = device_be (2 *(i % Nbe)), eI1 = device_be (2 *(i % Nbe) + 1 );
101+ if (xI!=eI0 &&xI!=eI1){
102+ T dhatsqr= dhat*dhat;
103+ Eigen::Vector<T, 2 > p,e0 ,e1 ;
104+ p<<device_x (xI*dim),device_x (xI*dim+1 );
105+ e0 <<device_x (eI0*dim),device_x (eI0*dim+1 );
106+ e1 <<device_x (eI1*dim),device_x (eI1*dim+1 );
107+ T d_sqr=PointEdgeDistanceVal (p,e0 ,e1 );
108+ if (d_sqr<dhatsqr){
109+ T s = d_sqr / dhatsqr;
110+ device_val3 (i)= kappa * device_contact_area (xI) * dhat/8 *(s-1 )*log (s);
111+ }
112+ } })
113+ .wait ();
114+ return devicesum (device_val1) +
115+ devicesum (device_val2) +
116+ devicesum (device_val3);
90117} // Calculate the energy
91118
92119template <typename T, int dim>
@@ -95,10 +122,14 @@ const DeviceBuffer<T> &BarrierEnergy<T, dim>::grad()
95122 auto &device_x = pimpl_->device_x ;
96123 auto &device_contact_area = pimpl_->device_contact_area ;
97124 int N = device_x.size () / dim;
125+ int Nbp = pimpl_->device_bp .size (), Nbe = pimpl_->device_be .size () / 2 ;
126+ int Npe = Nbp * Nbe;
98127 auto &device_n = pimpl_->device_n ;
99128 auto &device_n_ceil = pimpl_->device_n_ceil ;
100129 auto &device_o = pimpl_->device_o ;
101130 auto &device_grad = pimpl_->device_grad ;
131+ auto &device_bp = pimpl_->device_bp ;
132+ auto &device_be = pimpl_->device_be ;
102133 device_grad.fill (0 );
103134 ParallelFor (256 ).apply (N, [device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_grad = device_grad.viewer (), device_n = device_n.cviewer (), device_o = device_o.cviewer ()] __device__ (int i) mutable
104135
@@ -134,6 +165,29 @@ const DeviceBuffer<T> &BarrierEnergy<T, dim>::grad()
134165 }
135166 } })
136167 .wait ();
168+ ParallelFor (256 ).apply (Npe, [device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_grad = device_grad.viewer (), device_bp = device_bp.cviewer (), device_be = device_be.cviewer (), Nbp, Nbe] __device__ (int i) mutable
169+ {
170+ int xI = device_bp (i / Nbe);
171+ int eI0 = device_be (2 *(i % Nbe)), eI1 = device_be (2 *(i % Nbe) + 1 );
172+ if (xI!=eI0 &&xI!=eI1){
173+ T dhatsqr= dhat*dhat;
174+ Eigen::Vector<T, 2 > p,e0 ,e1 ;
175+ p<<device_x (xI*dim),device_x (xI*dim+1 );
176+ e0 <<device_x (eI0*dim),device_x (eI0*dim+1 );
177+ e1 <<device_x (eI1*dim),device_x (eI1*dim+1 );
178+ T d_sqr=PointEdgeDistanceVal (p,e0 ,e1 );
179+ if (d_sqr<dhatsqr){
180+ T s = d_sqr / dhatsqr;
181+ Eigen::Vector<T, 6 > grad = 0.5 * device_contact_area (xI) * dhat * (kappa / 8 * (log (s) / dhatsqr + (s - 1 ) / d_sqr)) * PointEdgeDistanceGrad (p,e0 ,e1 );
182+ atomicAdd (&device_grad (xI*dim),grad (0 ));
183+ atomicAdd (&device_grad (xI*dim+1 ),grad (1 ));
184+ atomicAdd (&device_grad (eI0*dim),grad (2 ));
185+ atomicAdd (&device_grad (eI0*dim+1 ),grad (3 ));
186+ atomicAdd (&device_grad (eI1*dim),grad (4 ));
187+ atomicAdd (&device_grad (eI1*dim+1 ),grad (5 ));
188+ }
189+ } })
190+ .wait ();
137191 return device_grad;
138192}
139193
@@ -149,6 +203,10 @@ const DeviceTripletMatrix<T, 1> &BarrierEnergy<T, dim>::hess()
149203 auto device_hess_row_idx = device_hess.row_indices ();
150204 auto device_hess_col_idx = device_hess.col_indices ();
151205 auto device_hess_val = device_hess.values ();
206+ auto device_bp = pimpl_->device_bp ;
207+ auto device_be = pimpl_->device_be ;
208+ int Nbp = device_bp.size (), Nbe = device_be.size () / 2 ;
209+ int Npe = Nbp * Nbe;
152210 int N = device_x.size () / dim;
153211 ParallelFor (256 ).apply (N, [device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_hess_val = device_hess_val.viewer (), device_hess_row_idx = device_hess_row_idx.viewer (), device_hess_col_idx = device_hess_col_idx.viewer (), N, device_n = device_n.cviewer (), device_o = device_o.cviewer ()] __device__ (int i) mutable
154212 {
@@ -199,6 +257,54 @@ const DeviceTripletMatrix<T, 1> &BarrierEnergy<T, dim>::hess()
199257 }
200258 } })
201259 .wait ();
260+ ParallelFor (256 ).apply (Npe, [device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_hess_val = device_hess_val.viewer (), device_hess_row_idx = device_hess_row_idx.viewer (), device_hess_col_idx = device_hess_col_idx.viewer (), N, device_bp = device_bp.cviewer (), device_be = device_be.cviewer (), Nbp, Nbe] __device__ (int i) mutable
261+ {
262+ int xI = device_bp (i / Nbe);
263+ int eI0 = device_be (2 *(i % Nbe)), eI1 = device_be (2 *(i % Nbe) + 1 );
264+ int index[3 ] = {xI, eI0, eI1};
265+ for (int nI = 0 ; nI < 3 ; nI++)
266+ for (int nJ = 0 ; nJ < 3 ; nJ++)
267+ for (int j = 0 ; j < dim; j++)
268+ {
269+ for (int k = 0 ; k < dim; k++)
270+ {
271+ int idx = N * dim * dim + (N - 1 ) * dim * dim * 4 + i * dim * dim*9 + nI * dim * dim*3 + nJ * dim * dim + j * dim + k;
272+ device_hess_row_idx (idx) = index[nI] * dim + j;
273+ device_hess_col_idx (idx) = index[nJ] * dim + k;
274+ device_hess_val (idx) = 0 ;
275+ }
276+ }
277+ if (xI != eI0 && xI != eI1){
278+ T dhat_sqr = dhat * dhat;
279+ Eigen::Vector<T, 2 > p, e0 , e1 ;
280+ p << device_x (xI * dim), device_x (xI * dim + 1 );
281+ e0 << device_x (eI0 * dim), device_x (eI0 * dim + 1 );
282+ e1 << device_x (eI1 * dim), device_x (eI1 * dim + 1 );
283+ T d_sqr = PointEdgeDistanceVal (p, e0 , e1 );
284+ if (d_sqr < dhat_sqr)
285+ {
286+ Eigen::Vector<T, 6 > grad=PointEdgeDistanceGrad (p,e0 ,e1 );
287+ T s = d_sqr / dhat_sqr;
288+ Eigen::Matrix<T, 6 , 6 > localhess,PSD;
289+ localhess=kappa / (8 * d_sqr * d_sqr * dhat_sqr) * (d_sqr + dhat_sqr) * grad * grad.transpose ()
290+ + (kappa / 8 * (log (s) / dhat_sqr + (s - 1 ) / d_sqr)) * PointEdgeDistanceHess (p,e0 ,e1 );
291+ make_PSD (localhess,PSD);
292+ localhess=0.5 * device_contact_area (xI) * dhat * PSD;
293+ for (int nI = 0 ; nI < 3 ; nI++)
294+ for (int nJ = 0 ; nJ < 3 ; nJ++)
295+ for (int j = 0 ; j < dim; j++)
296+ {
297+ for (int k = 0 ; k < dim; k++)
298+ {
299+ int idx = N * dim * dim + (N - 1 ) * dim * dim * 4 + i * dim * dim*9 + nI * dim * dim*3 + nJ * dim * dim + j * dim + k;
300+ device_hess_val (idx) = localhess (nI*dim+j,nJ*dim+k);
301+ }
302+ }
303+ }
304+ } })
305+
306+ .wait ();
307+
202308 return device_hess;
203309
204310} // Calculate the Hessian of the energy
@@ -211,8 +317,12 @@ T BarrierEnergy<T, dim>::init_step_size(const DeviceBuffer<T> &p)
211317 auto &device_n_ceil = pimpl_->device_n_ceil ;
212318 auto &device_o = pimpl_->device_o ;
213319 int N = device_x.size () / dim;
320+ int Nbp = pimpl_->device_bp .size (), Nbe = pimpl_->device_be .size () / 2 ;
321+ int Npe = Nbp * Nbe;
214322 DeviceBuffer<T> device_alpha (N);
323+ DeviceBuffer<T> device_alpha1 (Npe);
215324 device_alpha.fill (1 );
325+ device_alpha1.fill (1 );
216326 ParallelFor (256 )
217327 .apply (N, [device_x = device_x.cviewer (), p = p.cviewer (), device_alpha = device_alpha.viewer (), device_n = device_n.cviewer (), device_o = device_o.cviewer ()] __device__ (int i) mutable
218328
@@ -254,7 +364,25 @@ T BarrierEnergy<T, dim>::init_step_size(const DeviceBuffer<T> &p)
254364 // printf("alpha: %f\n", device_alpha(i));
255365 } })
256366 .wait ();
257- return min_vector (device_alpha);
367+ T current_alpha = min_vector (device_alpha);
368+ ParallelFor (256 )
369+ .apply (Npe, [current_alpha, device_x = device_x.cviewer (), P = p.cviewer (), device_alpha1 = device_alpha1.viewer (), device_bp = pimpl_->device_bp .cviewer (), device_be = pimpl_->device_be .cviewer (), Nbp, Nbe] __device__ (int i) mutable
370+ {
371+ int xI = device_bp (i / Nbe);
372+ int eI0 = device_be (2 *(i % Nbe)), eI1 = device_be (2 *(i % Nbe) + 1 );
373+ Eigen::Matrix<T, 2 , 1 > p, e0 , e1 ,dp,de0,de1;
374+ p<<device_x (xI*dim),device_x (xI*dim+1 );
375+ e0 <<device_x (eI0*dim),device_x (eI0*dim+1 );
376+ e1 <<device_x (eI1*dim),device_x (eI1*dim+1 );
377+ dp<<P (xI*dim),P (xI*dim+1 );
378+ de0<<P (eI0*dim),P (eI0*dim+1 );
379+ de1<<P (eI1*dim),P (eI1*dim+1 );
380+ if (bbox_overlap (p,e0 ,e1 ,dp,de0,de1,current_alpha)){
381+ T toc=narrow_phase_CCD (p,e0 ,e1 ,dp,de0,de1,current_alpha);
382+ device_alpha1 (i)=min (device_alpha1 (i),toc);
383+ } })
384+ .wait ();
385+ return min (min_vector (device_alpha1), current_alpha);
258386}
259387template class BarrierEnergy <float , 2 >;
260388template class BarrierEnergy <float , 3 >;
0 commit comments