@@ -153,22 +153,22 @@ T FrictionEnergy<T, dim>::val()
153153 device_val1 (i) = device_mu_lambda (i) * val;
154154 } })
155155 .wait ();
156- // ParallelFor(256).apply(pimpl_->npe, [device_val2 = device_val2.viewer(), device_v = device_v.cviewer(), device_mu_lambda_self = pimpl_->device_mu_lambda_self.cviewer(), device_n_self = pimpl_->device_n_self.cviewer(), device_r_self = pimpl_->device_r_self.cviewer(), device_bp = device_bp.cviewer(), device_be = device_be.cviewer(), Nbp, Nbe, hhat, this] __device__(int i) mutable
157- // {
158- // if (device_mu_lambda_self(i) > 0)
159- // {
160- // int xI = device_bp(i / Nbe);
161- // int eI0 = device_be(2 * (i % Nbe)), eI1 = device_be(2 * (i % Nbe) + 1);
162- // Eigen::Vector<T, 2> vp, ve0, ve1;
163- // vp << device_v(xI * dim), device_v(xI * dim + 1);
164- // ve0 << device_v(eI0 * dim), device_v(eI0 * dim + 1);
165- // ve1 << device_v(eI1 * dim), device_v(eI1 * dim + 1);
166- // Eigen::Matrix<T, 2, 2> mT = Eigen::Matrix<T, 2, 2>::Identity() - device_n_self(i) * device_n_self(i).transpose();
167- // Eigen::Matrix<T, 2, 1> rel_v = vp - ((1 - device_r_self(i)) * ve0 + device_r_self(i) * ve1);
168- // Eigen::Matrix<T, 2, 1> vbar = mT.transpose() * rel_v;
169- // device_val2(i) = device_mu_lambda_self(i) * f0(vbar.norm(), epsv, hhat);
170- // } })
171- // .wait();
156+ ParallelFor (256 ).apply (pimpl_->npe , [device_val2 = device_val2.viewer (), device_v = device_v.cviewer (), device_mu_lambda_self = pimpl_->device_mu_lambda_self .cviewer (), device_n_self = pimpl_->device_n_self .cviewer (), device_r_self = pimpl_->device_r_self .cviewer (), device_bp = device_bp.cviewer (), device_be = device_be.cviewer (), Nbp, Nbe, hhat, this ] __device__ (int i) mutable
157+ {
158+ if (device_mu_lambda_self (i) > 0 )
159+ {
160+ int xI = device_bp (i / Nbe);
161+ int eI0 = device_be (2 * (i % Nbe)), eI1 = device_be (2 * (i % Nbe) + 1 );
162+ Eigen::Vector<T, 2 > vp, ve0, ve1;
163+ vp << device_v (xI * dim), device_v (xI * dim + 1 );
164+ ve0 << device_v (eI0 * dim), device_v (eI0 * dim + 1 );
165+ ve1 << device_v (eI1 * dim), device_v (eI1 * dim + 1 );
166+ Eigen::Matrix<T, 2 , 2 > T_mat = Eigen::Matrix<T, 2 , 2 >::Identity () - device_n_self (i) * device_n_self (i).transpose ();
167+ Eigen::Matrix<T, 2 , 1 > rel_v = vp - ((1 - device_r_self (i)) * ve0 + device_r_self (i) * ve1);
168+ Eigen::Matrix<T, 2 , 1 > vbar = T_mat * rel_v;
169+ device_val2 (i) = device_mu_lambda_self (i) *f0 (vbar.norm (), epsv, hhat);
170+ } })
171+ .wait ();
172172 return devicesum (device_val1) + devicesum (device_val2);
173173}
174174
@@ -207,29 +207,29 @@ const DeviceBuffer<T> &FrictionEnergy<T, dim>::grad()
207207 }
208208 } })
209209 .wait ();
210- // ParallelFor(256).apply(pimpl_->npe, [device_v = device_v.cviewer(), device_mu_lambda_self = pimpl_->device_mu_lambda_self.cviewer(), device_n_self = pimpl_->device_n_self.cviewer(), device_r_self = pimpl_->device_r_self.cviewer(), device_bp = device_bp.cviewer(), device_be = device_be.cviewer(), device_grad = device_grad.viewer(), Nbp, Nbe, hhat, this] __device__(int i) mutable
211- // {
212- // if (device_mu_lambda_self(i) > 0)
213- // {
214- // int xI = device_bp(i / Nbe);
215- // int eI0 = device_be(2 * (i % Nbe)), eI1 = device_be(2 * (i % Nbe) + 1);
216- // Eigen::Vector<T, 2> vp, ve0, ve1;
217- // vp << device_v(xI * dim), device_v(xI * dim + 1);
218- // ve0 << device_v(eI0 * dim), device_v(eI0 * dim + 1);
219- // ve1 << device_v(eI1 * dim), device_v(eI1 * dim + 1);
220- // Eigen::Matrix<T, 2, 2> mT = Eigen::Matrix<T, 2, 2>::Identity() - device_n_self(i) * device_n_self(i).transpose();
221- // Eigen::Matrix<T, 2, 1> rel_v = vp - ((1 - device_r_self(i)) * ve0 + device_r_self(i) * ve1);
222- // Eigen::Matrix<T, 2, 1> vbar = mT.transpose() * rel_v;
223- // T grad_factor = f1_div_vbarnorm(vbar.norm(), epsv);
224- // Eigen::Matrix<T, 2, 1> grad = grad_factor * mT * vbar;
225- // for (int j = 0; j < dim; ++j)
226- // {
227- // atomic_add(&device_grad(xI * dim + j), device_mu_lambda_self(i) * grad(j));
228- // atomic_add(&device_grad(eI0 * dim + j), device_mu_lambda_self(i) * grad(j) * -(1 - device_r_self(i)));
229- // atomic_add(&device_grad(eI1 * dim + j), device_mu_lambda_self(i) * grad(j) * device_r_self(i));
230- // }
231- // } })
232- .wait ();
210+ ParallelFor (256 ).apply (pimpl_->npe , [device_v = device_v.cviewer (), device_mu_lambda_self = pimpl_->device_mu_lambda_self .cviewer (), device_n_self = pimpl_->device_n_self .cviewer (), device_r_self = pimpl_->device_r_self .cviewer (), device_bp = device_bp.cviewer (), device_be = device_be.cviewer (), device_grad = device_grad.viewer (), Nbp, Nbe, hhat, this ] __device__ (int i) mutable
211+ {
212+ if (device_mu_lambda_self (i) > 0 )
213+ {
214+ int xI = device_bp (i / Nbe);
215+ int eI0 = device_be (2 * (i % Nbe)), eI1 = device_be (2 * (i % Nbe) + 1 );
216+ Eigen::Vector<T, 2 > vp, ve0, ve1;
217+ vp << device_v (xI * dim), device_v (xI * dim + 1 );
218+ ve0 << device_v (eI0 * dim), device_v (eI0 * dim + 1 );
219+ ve1 << device_v (eI1 * dim), device_v (eI1 * dim + 1 );
220+ Eigen::Matrix<T, 2 , 2 > Tmat = Eigen::Matrix<T, 2 , 2 >::Identity () - device_n_self (i) * device_n_self (i).transpose ();
221+ Eigen::Matrix<T, 2 , 1 > rel_v = vp - ((1 - device_r_self (i)) * ve0 + device_r_self (i) * ve1);
222+ Eigen::Matrix<T, 2 , 1 > vbar = Tmat * rel_v;
223+ T grad_factor = f1_div_vbarnorm (vbar.norm (), epsv);
224+ Eigen::Matrix<T, 2 , 1 > grad = grad_factor * Tmat * vbar;
225+ for (int j = 0 ; j < dim; ++j)
226+ {
227+ atomic_add (&device_grad (xI * dim + j), device_mu_lambda_self (i) * grad (j));
228+ atomic_add (&device_grad (eI0 * dim + j), device_mu_lambda_self (i) * grad (j) * -(1 - device_r_self (i)));
229+ atomic_add (&device_grad (eI1 * dim + j), device_mu_lambda_self (i) * grad (j) * (- device_r_self (i) ));
230+ }
231+ } })
232+ .wait ();
233233
234234 return device_grad;
235235}
@@ -248,6 +248,7 @@ const DeviceTripletMatrix<T, 1> &FrictionEnergy<T, dim>::hess()
248248 auto device_hess_col_idx = device_hess.col_indices ();
249249 auto device_hess_val = device_hess.values ();
250250 int N = device_v.size () / dim;
251+ device_hess_val.fill (0 );
251252 ParallelFor (256 ).apply (N, [device_v = device_v.cviewer (), device_mu_lambda = device_mu_lambda.cviewer (), device_hess_val = device_hess_val.viewer (), device_hess_row_idx = device_hess_row_idx.viewer (), device_hess_col_idx = device_hess_col_idx.viewer (), hhat, n, N, this ] __device__ (int i) mutable
252253 {
253254 Eigen::Matrix<T, dim, dim> T_mat = Eigen::Matrix<T, dim, dim>::Identity () - n * n.transpose ();
@@ -258,7 +259,6 @@ const DeviceTripletMatrix<T, 1> &FrictionEnergy<T, dim>::hess()
258259 int idx = i * dim * dim + j * dim + k;
259260 device_hess_row_idx (idx) = i * dim + j;
260261 device_hess_col_idx (idx) = i * dim + k;
261- device_hess_val (idx) = 0 ;
262262 }
263263 }
264264 if (device_mu_lambda (i) > 0 )
@@ -288,65 +288,65 @@ const DeviceTripletMatrix<T, 1> &FrictionEnergy<T, dim>::hess()
288288 }
289289 } })
290290 .wait ();
291- // ParallelFor(256).apply(pimpl_->npe, [N, device_v = device_v.cviewer(), device_mu_lambda_self = pimpl_->device_mu_lambda_self.cviewer(), device_n_self = pimpl_->device_n_self.cviewer(), device_r_self = pimpl_->device_r_self.cviewer(), device_hess_val = device_hess_val.viewer(), device_hess_row_idx = device_hess_row_idx.viewer(), device_hess_col_idx = device_hess_col_idx.viewer(), device_bp = device_bp.cviewer(), device_be = device_be.cviewer(), Nbp, Nbe, hhat, this] __device__(int i) mutable
292- // {
293- // int xI = device_bp(i / Nbe);
294- // int eI0 = device_be(2 * (i % Nbe)), eI1 = device_be(2 * (i % Nbe) + 1);
295- // int index[3] = { xI, eI0, eI1 };
296- // for (int nI = 0; nI < 3; ++nI)
297- // {
298- // for (int nJ = 0; nJ < 3; ++nJ)
299- // {
300- // for (int c = 0; c < 2; ++c)
301- // {
302- // for (int r = 0; r < 2; ++r)
303- // {
304- // int idx = index[nI] * 2 + r;
305- // int jdx = index[nJ] * 2 + c;
306- // int kdx = N * dim * dim+nI*12+nJ*4+c*2+ r;
307- // device_hess_row_idx(kdx) = idx;
308- // device_hess_col_idx(kdx) = jdx;
309- // }
310- // }
311- // }
312- // }
313- // if (device_mu_lambda_self(i) > 0)
314- // {
291+ ParallelFor (256 ).apply (pimpl_->npe , [N, device_v = device_v.cviewer (), device_mu_lambda_self = pimpl_->device_mu_lambda_self .cviewer (), device_n_self = pimpl_->device_n_self .cviewer (), device_r_self = pimpl_->device_r_self .cviewer (), device_hess_val = device_hess_val.viewer (), device_hess_row_idx = device_hess_row_idx.viewer (), device_hess_col_idx = device_hess_col_idx.viewer (), device_bp = device_bp.cviewer (), device_be = device_be.cviewer (), Nbp, Nbe, hhat, this ] __device__ (int i) mutable
292+ {
293+ int xI = device_bp (i / Nbe);
294+ int eI0 = device_be (2 * (i % Nbe)), eI1 = device_be (2 * (i % Nbe) + 1 );
295+ int index[3 ] = {xI, eI0, eI1};
296+ for (int nI = 0 ; nI < 3 ; ++nI)
297+ {
298+ for (int nJ = 0 ; nJ < 3 ; ++nJ)
299+ {
300+ for (int c = 0 ; c < 2 ; ++c)
301+ {
302+ for (int r = 0 ; r < 2 ; ++r)
303+ {
304+ int idx = index[nI] * 2 + r;
305+ int jdx = index[nJ] * 2 + c;
306+ int kdx = N * dim * dim +i* 36 + nI * 12 + nJ * 4 + c * 2 + r;
307+ device_hess_row_idx (kdx) = idx;
308+ device_hess_col_idx (kdx) = jdx;
309+ }
310+ }
311+ }
312+ }
313+ if (device_mu_lambda_self (i) > 0 )
314+ {
315315
316- // Eigen::Vector<T, 2> vp, ve0, ve1;
317- // vp << device_v(xI * dim), device_v(xI * dim + 1);
318- // ve0 << device_v(eI0 * dim), device_v(eI0 * dim + 1);
319- // ve1 << device_v(eI1 * dim), device_v(eI1 * dim + 1);
320- // Eigen::Matrix<T, 2, 2> mT = Eigen::Matrix<T, 2, 2>::Identity() - device_n_self(i) * device_n_self(i).transpose();
321- // Eigen::Matrix<T, 2, 1> rel_v = vp - ((1 - device_r_self(i)) * ve0 + device_r_self(i) * ve1);
322- // Eigen::Matrix<T, 2, 1> vbar = mT.transpose() * rel_v;
323- // T vbarnorm = vbar.norm();
324- // Eigen::Matrix<T, 2, 2> inner_term = Eigen::Matrix<T, 2, 2>::Identity() * f1_div_vbarnorm(vbarnorm, epsv);
325- // if (vbarnorm != 0)
326- // {
327- // inner_term += f_hess_term(vbarnorm, epsv) / vbarnorm * vbar * vbar.transpose();
328- // }
329- // Eigen::Matrix<T, 2, 2> local_hess;
330- // make_PSD(inner_term, local_hess);
331- // local_hess = device_mu_lambda_self(i) * mT * local_hess * mT .transpose() / hhat;
316+ Eigen::Vector<T, 2 > vp, ve0, ve1;
317+ vp << device_v (xI * dim), device_v (xI * dim + 1 );
318+ ve0 << device_v (eI0 * dim), device_v (eI0 * dim + 1 );
319+ ve1 << device_v (eI1 * dim), device_v (eI1 * dim + 1 );
320+ Eigen::Matrix<T, 2 , 2 > Tmat = Eigen::Matrix<T, 2 , 2 >::Identity () - device_n_self (i) * device_n_self (i).transpose ();
321+ Eigen::Matrix<T, 2 , 1 > rel_v = vp - ((1 - device_r_self (i)) * ve0 + device_r_self (i) * ve1);
322+ Eigen::Matrix<T, 2 , 1 > vbar = Tmat * rel_v;
323+ T vbarnorm = vbar.norm ();
324+ Eigen::Matrix<T, 2 , 2 > inner_term = Eigen::Matrix<T, 2 , 2 >::Identity () * f1_div_vbarnorm (vbarnorm, epsv);
325+ if (vbarnorm != 0 )
326+ {
327+ inner_term += f_hess_term (vbarnorm, epsv) / vbarnorm * vbar * vbar.transpose ();
328+ }
329+ Eigen::Matrix<T, 2 , 2 > local_hess;
330+ make_PSD (inner_term, local_hess);
331+ local_hess = device_mu_lambda_self (i) * Tmat * local_hess * Tmat .transpose () / hhat;
332332
333- // T d_rel_v_dv[3] = {1, -(1 - device_r_self(i)), -device_r_self(i)};
334- // for (int nI = 0; nI < 3; ++nI)
335- // {
336- // for (int nJ = 0; nJ < 3; ++nJ)
337- // {
338- // for (int c = 0; c < 2; ++c)
339- // {
340- // for (int r = 0; r < 2; ++r)
341- // {
342- // int kdx =N * dim * dim + nI * 12 + nJ * 4 + c * 2 + r;
343- // device_hess_val(kdx) = d_rel_v_dv[nI] * d_rel_v_dv[nJ] * local_hess(r, c);
344- // }
345- // }
346- // }
347- // }
348- // } })
349- // .wait();
333+ T d_rel_v_dv[3 ] = {1 , -(1 - device_r_self (i)), -device_r_self (i)};
334+ for (int nI = 0 ; nI < 3 ; ++nI)
335+ {
336+ for (int nJ = 0 ; nJ < 3 ; ++nJ)
337+ {
338+ for (int c = 0 ; c < 2 ; ++c)
339+ {
340+ for (int r = 0 ; r < 2 ; ++r)
341+ {
342+ int kdx =N * dim * dim +i* 36 + nI * 12 + nJ * 4 + c * 2 + r;
343+ device_hess_val (kdx) = d_rel_v_dv[nI] * d_rel_v_dv[nJ] * local_hess (r, c);
344+ }
345+ }
346+ }
347+ }
348+ } })
349+ .wait ();
350350 return device_hess;
351351}
352352template class FrictionEnergy <float , 2 >;
0 commit comments