@@ -11,7 +11,7 @@ template <typename T, int dim>
1111struct BarrierEnergy <T, dim>::Impl
1212{
1313 DeviceBuffer<T> device_x;
14- DeviceBuffer<T> device_contact_area, device_n, device_o;
14+ DeviceBuffer<T> device_contact_area, device_n,device_n_ceil, device_o;
1515 int N;
1616 DeviceBuffer<T> device_grad;
1717 DeviceTripletMatrix<T, 1 > device_hess;
@@ -38,9 +38,12 @@ BarrierEnergy<T, dim>::BarrierEnergy(const std::vector<T> &x, const std::vector<
3838 pimpl_->N = x.size () / dim;
3939 pimpl_->device_x .copy_from (x);
4040 pimpl_->device_contact_area .copy_from (contact_area);
41+ std::vector<T> n_ceil (dim);
42+ n_ceil[1 ]=-1 ;
43+ pimpl_->device_n_ceil .copy_from (n_ceil);
4144 pimpl_->device_n .copy_from (n);
4245 pimpl_->device_o .copy_from (o);
43- pimpl_->device_hess .resize_triplets (pimpl_->N * dim * dim);
46+ pimpl_->device_hess .resize_triplets (( pimpl_->N * 2 - 1 ) * dim * dim);
4447 pimpl_->device_hess .reshape (x.size (), x.size ());
4548 pimpl_->device_grad .resize (pimpl_->N * dim);
4649}
@@ -57,20 +60,32 @@ T BarrierEnergy<T, dim>::val()
5760 auto &device_x = pimpl_->device_x ;
5861 auto &device_contact_area = pimpl_->device_contact_area ;
5962 auto &device_n = pimpl_->device_n ;
63+ auto &device_n_ceil = pimpl_->device_n_ceil ;
6064 auto &device_o = pimpl_->device_o ;
6165 int N = device_x.size () / dim;
62- DeviceBuffer<T> device_val (N);
63- ParallelFor (256 ).apply (N, [device_val = device_val.viewer (), device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_n = device_n.cviewer (), device_o = device_o.cviewer ()] __device__ (int i) mutable
66+ DeviceBuffer<T> device_val1 (N);
67+ DeviceBuffer<T> device_val2 (N);
68+ ParallelFor (256 ).apply (N, [device_val1 = device_val1.viewer (), device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_n = device_n.cviewer (), device_o = device_o.cviewer ()] __device__ (int i) mutable
6469 { T d = 0 ;
6570 for (int j=0 ;j<dim;j++){
6671 d += device_n (j)*(device_x (i*dim+j)-device_o (j));
6772 }
6873 if (d<dhat){
6974 T s = d / dhat;
70- device_val (i)= kappa * device_contact_area (i) * dhat/2 *(s-1 )*log (s);
75+ device_val1 (i)= kappa * device_contact_area (i) * dhat/2 *(s-1 )*log (s);
7176 } })
7277 .wait ();
73- return devicesum (device_val);
78+ ParallelFor (256 ).apply (N-1 , [device_val2 = device_val2.viewer (), device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_n_ceil = device_n_ceil.cviewer (), device_o = device_o.cviewer (),N] __device__ (int i) mutable
79+ { T d = 0 ;
80+ for (int j=0 ;j<dim;j++){
81+ d += device_n_ceil (j)*(device_x (i*dim+j)-device_x ((N-1 )*dim+j));
82+ }
83+ if (d<dhat){
84+ T s = d / dhat;
85+ device_val2 (i)= kappa * device_contact_area (i) * dhat/2 *(s-1 )*log (s);
86+ } })
87+ .wait ();
88+ return devicesum (device_val1)+devicesum (device_val2);
7489} // Calculate the energy
7590
7691template <typename T, int dim>
@@ -80,6 +95,7 @@ const DeviceBuffer<T> &BarrierEnergy<T, dim>::grad()
8095 auto &device_contact_area = pimpl_->device_contact_area ;
8196 int N = device_x.size () / dim;
8297 auto &device_n = pimpl_->device_n ;
98+ auto &device_n_ceil = pimpl_->device_n_ceil ;
8399 auto &device_o = pimpl_->device_o ;
84100 auto &device_grad = pimpl_->device_grad ;
85101 device_grad.fill (0 );
@@ -99,6 +115,23 @@ const DeviceBuffer<T> &BarrierEnergy<T, dim>::grad()
99115 }
100116 } })
101117 .wait ();
118+ ParallelFor (256 ).apply (N-1 , [device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_grad = device_grad.viewer (), device_n_ceil = device_n_ceil.cviewer (), device_o = device_o.cviewer (),N] __device__ (int i) mutable
119+
120+ {
121+ T d = 0 ;
122+ for (int j=0 ;j<dim;j++){
123+ d += device_n_ceil (j)*(device_x (i*dim+j)-device_x ((N-1 )*dim+j));
124+ }
125+ if (d < dhat)
126+ {
127+ T s = d / dhat;
128+ for (int j = 0 ; j < dim; j++)
129+ {
130+ T grad =device_contact_area (i) * dhat * (kappa / 2 * (log (s) / dhat + (s - 1 ) / d)) * device_n_ceil (j);
131+ device_grad (i * dim + j) += grad;
132+ device_grad ((N-1 ) * dim + j) -= grad;
133+ }
134+ } }).wait ();
102135 return device_grad;
103136}
104137
@@ -108,6 +141,7 @@ const DeviceTripletMatrix<T, 1> &BarrierEnergy<T, dim>::hess()
108141 auto &device_x = pimpl_->device_x ;
109142 auto &device_contact_area = pimpl_->device_contact_area ;
110143 auto &device_n = pimpl_->device_n ;
144+ auto &device_n_ceil = pimpl_->device_n_ceil ;
111145 auto &device_o = pimpl_->device_o ;
112146 auto &device_hess = pimpl_->device_hess ;
113147 auto device_hess_row_idx = device_hess.row_indices ();
@@ -144,6 +178,35 @@ const DeviceTripletMatrix<T, 1> &BarrierEnergy<T, dim>::hess()
144178 }
145179 } })
146180 .wait ();
181+ ParallelFor (256 ).apply (N-1 , [device_x = device_x.cviewer (), device_contact_area = device_contact_area.cviewer (), device_hess_val = device_hess_val.viewer (), device_hess_row_idx = device_hess_row_idx.viewer (), device_hess_col_idx = device_hess_col_idx.viewer (), N, device_n_ceil = device_n_ceil.cviewer (), device_o = device_o.cviewer ()] __device__ (int i) mutable
182+ {
183+ T d = 0 ;
184+ for (int j = 0 ; j < dim; j++)
185+ {
186+ d += device_n_ceil (j) * (device_x (i * dim + j) - device_x ((N-1 ) * dim + j));
187+ }
188+ if (d < dhat)
189+ for (int j = 0 ; j < dim; j++)
190+ {
191+ for (int k = 0 ; k < dim; k++)
192+ {
193+ int idx =N*dim*dim+ i * dim * dim + j * dim + k;
194+ device_hess_row_idx (idx) = (N-1 ) * dim + j;
195+ device_hess_col_idx (idx) = (N-1 ) * dim + k;
196+ device_hess_val (idx) = device_contact_area (i) * dhat * kappa / (2 * d * d * dhat) * (d + dhat) * device_n_ceil (j) * device_n_ceil (k);
197+ }
198+ }
199+ else
200+ for (int j = 0 ; j < dim; j++)
201+ {
202+ for (int k = 0 ; k < dim; k++)
203+ {
204+ int idx = N*dim*dim+i * dim * dim + j * dim + k;
205+ device_hess_row_idx (idx) = (N-1 ) * dim + j;
206+ device_hess_col_idx (idx) = (N-1 ) * dim + k;
207+ device_hess_val (idx) = 0 ;
208+ }
209+ } }).wait ();
147210 return device_hess;
148211
149212} // Calculate the Hessian of the energy
@@ -153,6 +216,7 @@ T BarrierEnergy<T, dim>::init_step_size(const DeviceBuffer<T> &p)
153216{
154217 auto &device_x = pimpl_->device_x ;
155218 auto &device_n = pimpl_->device_n ;
219+ auto &device_n_ceil = pimpl_->device_n_ceil ;
156220 auto &device_o = pimpl_->device_o ;
157221 int N = device_x.size () / dim;
158222 DeviceBuffer<T> device_alpha (N);
@@ -176,6 +240,26 @@ T BarrierEnergy<T, dim>::init_step_size(const DeviceBuffer<T> &p)
176240 device_alpha (i) = min (device_alpha (i), 0.9 * alpha / -p_n);
177241 } })
178242 .wait ();
243+
244+ ParallelFor (256 )
245+ .apply (N-1 , [device_x = device_x.cviewer (), p = p.cviewer (), device_alpha = device_alpha.viewer (), device_n_ceil = device_n_ceil.cviewer (), device_o = device_o.cviewer (),N] __device__ (int i) mutable
246+
247+ {
248+ T p_n = 0 ;
249+ for (int j = 0 ; j < dim; j++)
250+ {
251+ p_n += p (i * dim + j) * device_n_ceil (j);
252+ }
253+ if (p_n < 0 )
254+ {
255+ T alpha = 0 ;
256+ for (int j = 0 ; j < dim; j++)
257+ {
258+ alpha += device_n_ceil (j) * (device_x (i * dim + j) - device_x ((N-1 ) * dim + j));
259+ }
260+ device_alpha (i) = min (device_alpha (i), 0.9 * alpha / -p_n);
261+ } })
262+ .wait ();
179263 return min_vector (device_alpha);
180264}
181265template class BarrierEnergy <float , 2 >;
0 commit comments