@@ -14,6 +14,8 @@ struct BarrierEnergy<T, dim>::Impl
1414 DeviceBuffer<T> device_x;
1515 DeviceBuffer<T> device_contact_area, device_n, device_n_ceil, device_o;
1616 DeviceBuffer<int > device_bp, device_be;
17+ std::vector<int > bp;
18+ std::vector<int > be;
1719 int N;
1820 DeviceBuffer<T> device_grad;
1921 DeviceTripletMatrix<T, 1 > device_hess;
@@ -48,6 +50,8 @@ BarrierEnergy<T, dim>::BarrierEnergy(const std::vector<T> &x, const std::vector<
4850 pimpl_->device_o .copy_from (o);
4951 pimpl_->device_bp .copy_from (bp);
5052 pimpl_->device_be .copy_from (be);
53+ pimpl_->bp = bp;
54+ pimpl_->be = be;
5155 pimpl_->device_hess .resize_triplets (pimpl_->N * dim * dim + (pimpl_->N - 1 ) * dim * dim * 4 + bp.size () * be.size () / 2 * 36 );
5256 pimpl_->device_hess .reshape (x.size (), x.size ());
5357 pimpl_->device_grad .resize (pimpl_->N * dim);
@@ -107,13 +111,14 @@ T BarrierEnergy<T, dim>::val()
107111 T d_sqr=PointEdgeDistanceVal (p,e0 ,e1 );
108112 if (d_sqr<dhatsqr){
109113 T s = d_sqr / dhatsqr;
110- device_val3 (i)= kappa * device_contact_area (xI) * dhat/8 *(s-1 )*log (s);
114+ device_val3 (i)= 0.5 * kappa * device_contact_area (xI) * dhat/8 *(s-1 )*log (s);
111115 }
112116 } })
113117 .wait ();
114- return devicesum (device_val1) +
115- devicesum (device_val2) +
116- devicesum (device_val3);
118+ T val1 = devicesum (device_val1) +
119+ devicesum (device_val2);
120+ T val2 = devicesum (device_val3);
121+ return val1 + val2;
117122} // Calculate the energy
118123
119124template <typename T, int dim>
@@ -160,7 +165,7 @@ const DeviceBuffer<T> &BarrierEnergy<T, dim>::grad()
160165 for (int j = 0 ; j < dim; j++)
161166 {
162167 T grad =device_contact_area (i) * dhat * (kappa / 2 * (log (s) / dhat + (s - 1 ) / d)) * device_n_ceil (j);
163- atomicAdd (& device_grad (i * dim + j), grad) ;
168+ device_grad (i * dim + j) += grad;
164169 atomicAdd (&device_grad ((N-1 ) * dim + j), -grad);
165170 }
166171 } })
@@ -188,6 +193,7 @@ const DeviceBuffer<T> &BarrierEnergy<T, dim>::grad()
188193 }
189194 } })
190195 .wait ();
196+
191197 return device_grad;
192198}
193199
@@ -340,7 +346,6 @@ T BarrierEnergy<T, dim>::init_step_size(const DeviceBuffer<T> &p)
340346 alpha += device_n (j) * (device_x (i * dim + j) - device_o (j));
341347 }
342348 device_alpha (i) = min (device_alpha (i), 0.9 * alpha / -p_n);
343- // printf("alpha: %f\n", device_alpha(i));
344349 } })
345350 .wait ();
346351
@@ -351,7 +356,7 @@ T BarrierEnergy<T, dim>::init_step_size(const DeviceBuffer<T> &p)
351356 T p_n = 0 ;
352357 for (int j = 0 ; j < dim; j++)
353358 {
354- p_n += p (i * dim + j) * device_n_ceil (j);
359+ p_n += ( p (i * dim + j)- p ((N- 1 )*dim+j) ) * device_n_ceil (j);
355360 }
356361 if (p_n < 0 )
357362 {
@@ -361,33 +366,59 @@ T BarrierEnergy<T, dim>::init_step_size(const DeviceBuffer<T> &p)
361366 alpha += device_n_ceil (j) * (device_x (i * dim + j) - device_x ((N-1 ) * dim + j));
362367 }
363368 device_alpha (i) = min (device_alpha (i), 0.9 * alpha / -p_n);
364- // printf("alpha: %f\n", device_alpha(i));
365369 } })
366370 .wait ();
367371 T current_alpha = min_vector (device_alpha);
368- ParallelFor (256 )
369- .apply (Npe, [current_alpha, device_x = device_x.cviewer (), P = p.cviewer (), device_alpha1 = device_alpha1.viewer (), device_bp = pimpl_->device_bp .cviewer (), device_be = pimpl_->device_be .cviewer (), Nbp, Nbe] __device__ (int i) mutable
370- {
371- int xI = device_bp (i / Nbe);
372- int eI0 = device_be (2 *(i % Nbe)), eI1 = device_be (2 *(i % Nbe) + 1 );
373- if (xI != eI0 && xI != eI1){
374- Eigen::Matrix<T, 2 , 1 > p, e0 , e1 , dp, de0, de1;
375- p<<device_x (xI*dim),device_x (xI*dim+1 );
376- e0 <<device_x (eI0*dim),device_x (eI0*dim+1 );
377- e1 <<device_x (eI1*dim),device_x (eI1*dim+1 );
378- dp<<P (xI*dim),P (xI*dim+1 );
379- de0<<P (eI0*dim),P (eI0*dim+1 );
380- de1<<P (eI1*dim),P (eI1*dim+1 );
381- if (bbox_overlap (p, e0 , e1 , dp, de0, de1, current_alpha))
382- {
383- T toc = narrow_phase_CCD (p, e0 , e1 , dp, de0, de1, current_alpha);
384- printf (" toc: %f\n " , toc);
385- device_alpha1 (i) = min (device_alpha1 (i), toc);
386- }
387- } })
388- .wait ();
372+ std::vector<T> x (device_x.size ());
373+ device_x.copy_to (x);
374+ std::vector<T> cpu_p (p.size ());
375+ p.copy_to (cpu_p);
376+ for (int i = 0 ; i < Nbp; i++)
377+ {
378+ for (int j = 0 ; j < Nbe; j++)
379+ {
380+ int xI = pimpl_->bp [i];
381+ int eI0 = pimpl_->be [2 * j], eI1 = pimpl_->be [2 * j + 1 ];
382+ if (xI != eI0 && xI != eI1)
383+ {
384+ Eigen::Matrix<T, 2 , 1 > p, e0 , e1 , dp, de0, de1;
385+ p << x[xI * dim], x[xI * dim + 1 ];
386+ e0 << x[eI0 * dim], x[eI0 * dim + 1 ];
387+ e1 << x[eI1 * dim], x[eI1 * dim + 1 ];
388+ dp << cpu_p[xI * dim], cpu_p[xI * dim + 1 ];
389+ de0 << cpu_p[eI0 * dim], cpu_p[eI0 * dim + 1 ];
390+ de1 << cpu_p[eI1 * dim], cpu_p[eI1 * dim + 1 ];
391+ if (bbox_overlap (p, e0 , e1 , dp, de0, de1, current_alpha))
392+ {
393+ T toc = narrow_phase_CCD (p, e0 , e1 , dp, de0, de1, current_alpha);
394+ if (toc < current_alpha)
395+ current_alpha = toc;
396+ }
397+ }
398+ }
399+ }
400+ // ParallelFor(256)
401+ // .apply(Npe, [current_alpha, device_x = device_x.cviewer(), P = p.cviewer(), device_alpha1 = device_alpha1.viewer(), device_bp = pimpl_->device_bp.cviewer(), device_be = pimpl_->device_be.cviewer(), Nbp, Nbe] __device__(int i) mutable
402+ // {
403+ // int xI = device_bp(i / Nbe);
404+ // int eI0 = device_be(2*(i % Nbe)), eI1 = device_be(2*(i % Nbe) + 1);
405+ // if (xI != eI0 && xI != eI1){
406+ // Eigen::Matrix<T, 2, 1> p, e0, e1, dp, de0, de1;
407+ // p<<device_x(xI*dim),device_x(xI*dim+1);
408+ // e0<<device_x(eI0*dim),device_x(eI0*dim+1);
409+ // e1<<device_x(eI1*dim),device_x(eI1*dim+1);
410+ // dp<<P(xI*dim),P(xI*dim+1);
411+ // de0<<P(eI0*dim),P(eI0*dim+1);
412+ // de1<<P(eI1*dim),P(eI1*dim+1);
413+ // if (bbox_overlap(p, e0, e1, dp, de0, de1, current_alpha))
414+ // {
415+ // T toc = narrow_phase_CCD(p, e0, e1, dp, de0, de1, current_alpha);
416+ // if (toc < device_alpha1(i))
417+ // device_alpha1(i) = toc;
418+ // }
419+ // } })
420+ // .wait();
389421 T a = min (min_vector (device_alpha1), current_alpha);
390- printf (" alpha: %f\n " , a);
391422 return a;
392423}
393424template class BarrierEnergy <float , 2 >;
0 commit comments