init grad

Roushelfy · Roushelfy · commit 93c0378aef46 · 2024-05-15T11:57:18.000+08:00
diff --git a/simulators/1_mass_spring/include/InertialEnergy.h b/simulators/1_mass_spring/include/InertialEnergy.h
@@ -21,13 +21,13 @@ class InertialEnergy
     InertialEnergy &operator=(InertialEnergy &&rhs);
     InertialEnergy &operator=(const InertialEnergy &rhs);
 
-    void update_x(DeviceBuffer<T> &x);
+    void update_x(const DeviceBuffer<T> &x);
     void generate_hess();
-    void update_x_tilde(DeviceBuffer<T> &x_tilde);
+    void update_x_tilde(const DeviceBuffer<T> &x_tilde);
     void update_m(T m);
-    T val();                           // Calculate the value of the energy
-    DeviceBuffer<T> &grad();           // Calculate the gradient of the energy
-    DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
+    T val();                                 // Calculate the value of the energy
+    const DeviceBuffer<T> &grad();           // Calculate the gradient of the energy
+    const DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
 
 private:
     // The implementation details of the VecAdder class are placed in the implementation class declared here.
diff --git a/simulators/1_mass_spring/include/MassSpringEnergy.h b/simulators/1_mass_spring/include/MassSpringEnergy.h
@@ -15,13 +15,13 @@ class MassSpringEnergy
     MassSpringEnergy(const MassSpringEnergy &rhs);
     MassSpringEnergy &operator=(MassSpringEnergy &&rhs);
 
-    void update_x(DeviceBuffer<T> &x);
+    void update_x(const DeviceBuffer<T> &x);
     void update_e(const std::vector<int> &e);
     void update_l2(const std::vector<T> &l2);
     void update_k(const std::vector<T> &k);
-    T val();                           // Calculate the value of the energy
-    DeviceBuffer<T> &grad();           // Calculate the gradient of the energy
-    DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
+    T val();                                 // Calculate the value of the energy
+    const DeviceBuffer<T> &grad();           // Calculate the gradient of the energy
+    const DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
 
 private:
     // The implementation details of the VecAdder class are placed in the implementation class declared here.
diff --git a/simulators/1_mass_spring/include/device_uti.h b/simulators/1_mass_spring/include/device_uti.h
@@ -4,7 +4,7 @@
 #include <Eigen/Dense>
 // utility functions
 template <typename T>
-T devicesum(muda::DeviceBuffer<T> &buffer);
+T devicesum(const muda::DeviceBuffer<T> &buffer);
 
 template <typename T, int Size>
 void __device__ make_PSD(const Eigen::Matrix<T, Size, Size> &hess, Eigen::Matrix<T, Size, Size> &PSD);
diff --git a/simulators/1_mass_spring/include/uti.h b/simulators/1_mass_spring/include/uti.h
@@ -20,4 +20,7 @@ template <typename T>
 T max_vector(const DeviceBuffer<T> &a);
 
 template <typename T>
-void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &hess, DeviceBuffer<T> &dir);
+void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &hess, DeviceBuffer<T> &dir);
+
+template <typename T>
+void display_vec(const DeviceBuffer<T> &vec);
diff --git a/simulators/1_mass_spring/src/InertialEnergy.cu b/simulators/1_mass_spring/src/InertialEnergy.cu
@@ -39,6 +39,11 @@ InertialEnergy<T, dim>::InertialEnergy(int N, T m) : pimpl_{std::make_unique<Imp
 template <typename T, int dim>
 InertialEnergy<T, dim>::Impl::Impl(int N_, T m_) : N(N_), m(m_)
 {
+	device_x.resize(N * dim);
+	device_x_tilde.resize(N * dim);
+	device_hess.resize_triplets(N * dim);
+	device_hess.reshape(N * dim, N * dim);
+	device_grad.resize(N * dim);
 }
 template <typename T, int dim>
 void InertialEnergy<T, dim>::generate_hess()
@@ -54,18 +59,20 @@ void InertialEnergy<T, dim>::generate_hess()
 				   device_hess_row_indices(i) = i;
 				   device_hess_col_indices(i) = i;
 				   device_hess_values(i) = m;
+				   // std::cout << device_hess_values(i) << ' ' << device_hess_row_indices(i) << ' ' << device_hess_col_indices(i) << std::endl;
+				   // printf("%f %d %d\n", device_hess_values(i), device_hess_row_indices(i), device_hess_col_indices(i));
 			   })
 		.wait();
 }
 
 template <typename T, int dim>
-void InertialEnergy<T, dim>::update_x(DeviceBuffer<T> &x)
+void InertialEnergy<T, dim>::update_x(const DeviceBuffer<T> &x)
 {
 	pimpl_->device_x.view().copy_from(x);
 }
 
 template <typename T, int dim>
-void InertialEnergy<T, dim>::update_x_tilde(DeviceBuffer<T> &x_tilde)
+void InertialEnergy<T, dim>::update_x_tilde(const DeviceBuffer<T> &x_tilde)
 {
 	pimpl_->device_x_tilde.view().copy_from(x_tilde);
 }
@@ -95,11 +102,11 @@ T InertialEnergy<T, dim>::val()
 }
 
 template <typename T, int dim>
-DeviceBuffer<T> &InertialEnergy<T, dim>::grad()
+const DeviceBuffer<T> &InertialEnergy<T, dim>::grad()
 {
 	auto &device_x = pimpl_->device_x;
 	auto &device_x_tilde = pimpl_->device_x_tilde;
-	auto &m = pimpl_->m;
+	auto m = pimpl_->m;
 	auto N = pimpl_->N * dim;
 	auto &device_grad = pimpl_->device_grad;
 	ParallelFor(256)
@@ -109,11 +116,12 @@ DeviceBuffer<T> &InertialEnergy<T, dim>::grad()
 				   device_grad(i) = m * (device_x(i) - device_x_tilde(i));
 			   })
 		.wait();
+	// display_vec(device_grad);
 	return device_grad;
 } // Calculate the gradient of the energy
 
 template <typename T, int dim>
-DeviceTripletMatrix<T, 1> &InertialEnergy<T, dim>::hess()
+const DeviceTripletMatrix<T, 1> &InertialEnergy<T, dim>::hess()
 {
 	return pimpl_->device_hess;
 } // Calculate the Hessian matrix of the energy
diff --git a/simulators/1_mass_spring/src/MassSpringEnergy.cu b/simulators/1_mass_spring/src/MassSpringEnergy.cu
@@ -40,11 +40,14 @@ MassSpringEnergy<T, dim>::MassSpringEnergy(const std::vector<T> &x, const std::v
 	pimpl_->device_e.copy_from(e);
 	pimpl_->device_l2.copy_from(l2);
 	pimpl_->device_k.copy_from(k);
+	pimpl_->device_hess.resize_triplets(pimpl_->device_e.size() / 2 * dim * dim * 4);
+	pimpl_->device_hess.reshape(x.size(), x.size());
+	pimpl_->device_grad.resize(pimpl_->N * dim);
 	int size = e.size() / 2;
 }
 
 template <typename T, int dim>
-void MassSpringEnergy<T, dim>::update_x(DeviceBuffer<T> &x)
+void MassSpringEnergy<T, dim>::update_x(const DeviceBuffer<T> &x)
 {
 	pimpl_->device_x.view().copy_from(x);
 }
@@ -92,14 +95,15 @@ T MassSpringEnergy<T, dim>::val()
 } // Calculate the energy
 
 template <typename T, int dim>
-DeviceBuffer<T> &MassSpringEnergy<T, dim>::grad()
+const DeviceBuffer<T> &MassSpringEnergy<T, dim>::grad()
 {
 	auto &device_x = pimpl_->device_x;
 	auto &device_e = pimpl_->device_e;
 	auto &device_l2 = pimpl_->device_l2;
 	auto &device_k = pimpl_->device_k;
 	auto N = pimpl_->device_e.size() / 2;
-	DeviceBuffer<T> device_grad(pimpl_->N * dim);
+	auto &device_grad = pimpl_->device_grad;
+	device_grad.fill(0);
 	ParallelFor(256).apply(N, [device_x = device_x.cviewer(), device_e = device_e.cviewer(), device_l2 = device_l2.cviewer(), device_k = device_k.cviewer(), device_grad = device_grad.viewer()] __device__(int i) mutable
 						   {
 							int idx1= device_e(2 * i); // First node index
@@ -109,26 +113,27 @@ DeviceBuffer<T> &MassSpringEnergy<T, dim>::grad()
 							for (int d = 0; d < dim;d++){
 								diffi[d] = device_x(dim * idx1 + d) - device_x(dim * idx2 + d);
 								diff += diffi[d] * diffi[d];
-						   }
+							}
 						   T factor = 2 * device_k(i) * (diff / device_l2(i) -1);
 						   for(int d=0;d<dim;d++){
 							   atomicAdd(&device_grad(dim * idx1 + d), factor * diffi[d]);
 							   atomicAdd(&device_grad(dim * idx2 + d), -factor * diffi[d]);
-							   
+							  
 						   } })
 		.wait();
+	// display_vec(device_grad);
 	return device_grad;
 }
 
 template <typename T, int dim>
-DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
+const DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
 {
 	auto &device_x = pimpl_->device_x;
 	auto &device_e = pimpl_->device_e;
 	auto &device_l2 = pimpl_->device_l2;
 	auto &device_k = pimpl_->device_k;
 	auto N = device_e.size() / 2;
-	auto device_hess = pimpl_->device_hess;
+	auto &device_hess = pimpl_->device_hess;
 	auto device_hess_row_idx = device_hess.row_indices();
 	auto device_hess_col_idx = device_hess.col_indices();
 	auto device_hess_val = device_hess.values();
@@ -149,8 +154,7 @@ DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
 		Eigen::Matrix<T, dim * 2, dim * 2> H_block, H_local;
 		H_block << H_diff, -H_diff,
 			-H_diff, H_diff;
-
-		//make_PSD(H_block, H_local);
+		make_PSD(H_block, H_local);
 		// add to global matrix
 		for (int ni = 0; ni < 2; ni++)
 			for (int nj = 0; nj < 2; nj++)
@@ -160,7 +164,7 @@ DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
 					for (int d2 = 0; d2 < dim; d2++){
 						device_hess_row_idx(indStart + d1 * dim + d2)= idx[ni]*dim + d1;
 						device_hess_col_idx(indStart + d1 * dim + d2)= idx[nj] * dim + d2;
-						device_hess_val(indStart + d1 * dim + d2)= H_local(ni * dim + d1, nj * dim + d2);
+						device_hess_val(indStart + d1 * dim + d2) = H_local(ni * dim + d1, nj * dim + d2);
 					}
 			} })
 		.wait();
diff --git a/simulators/1_mass_spring/src/device_uti.cu b/simulators/1_mass_spring/src/device_uti.cu
@@ -2,7 +2,7 @@
 using namespace muda;
 
 template <typename T>
-T devicesum(DeviceBuffer<T> &buffer)
+T devicesum(const DeviceBuffer<T> &buffer)
 {
     T sum = 0.0f;                  // Result of the reduction
     T *d_out;                      // Device memory to store the result of the reduction
@@ -18,8 +18,8 @@ T devicesum(DeviceBuffer<T> &buffer)
     cudaFree(d_out);
     return sum;
 }
-template float devicesum<float>(DeviceBuffer<float> &);
-template double devicesum<double>(DeviceBuffer<double> &);
+template float devicesum<float>(const DeviceBuffer<float> &);
+template double devicesum<double>(const DeviceBuffer<double> &);
 
 template <typename T, int Size>
 void __device__ make_PSD(const Eigen::Matrix<T, Size, Size> &hess, Eigen::Matrix<T, Size, Size> &PSD)
diff --git a/simulators/1_mass_spring/src/main.cpp b/simulators/1_mass_spring/src/main.cpp
@@ -2,7 +2,7 @@
 
 int main()
 {
-	float rho = 1000, k = 1e5, initial_stretch = 1.4, n_seg = 20, h = 0.004, side_len = 2, tol = 0.01;
+	float rho = 1000, k = 1e5, initial_stretch = 1.4, n_seg = 10, h = 0.004, side_len = 1, tol = 0.01;
 	// printf("Running mass-spring simulator with parameters: rho = %f, k = %f, initial_stretch = %f, n_seg = %d, h = %f, side_len = %f, tol = %f\n", rho, k, initial_stretch, n_seg, h, side_len, tol);
 	MassSpringSimulator<float, 2> simulator(rho, side_len, initial_stretch, k, h, tol, n_seg);
 	simulator.run();
diff --git a/simulators/1_mass_spring/src/simulator.cu b/simulators/1_mass_spring/src/simulator.cu
@@ -18,9 +18,9 @@ struct MassSpringSimulator<T, dim>::Impl
     InertialEnergy<T, dim> inertialenergy;
     MassSpringEnergy<T, dim> massspringenergy;
     Impl(T rho, T side_len, T initial_stretch, T K, T h_, T tol_, int n_seg);
-    void update_x(DeviceBuffer<T> &new_x);
-    void update_x_tilde(DeviceBuffer<T> &new_x_tilde);
-    void update_v(DeviceBuffer<T> &new_v);
+    void update_x(const DeviceBuffer<T> &new_x);
+    void update_x_tilde(const DeviceBuffer<T> &new_x_tilde);
+    void update_v(const DeviceBuffer<T> &new_v);
     T IP_val();
     void step_forward();
     void draw();
@@ -110,23 +110,21 @@ void MassSpringSimulator<T, dim>::Impl::step_forward()
     T E_last = IP_val();
     DeviceBuffer<T> p = search_direction();
     T residual = max_vector(p) / h;
-    // printf("Initial residual %f\n", residual);
+    std::cout << "Initial residual " << residual << "\n";
     while (residual > tol)
     {
-        // std::cout << "Iteration " << iter << ":\n";
-        // std::cout << "residual = " << residual << "\n";
-
         // Line search
         T alpha = 1;
         DeviceBuffer<T> x0 = x;
-        update_x(add_vector<T>(x, p, 1.0, alpha));
+        update_x(add_vector<T>(x0, p, 1.0, alpha));
         while (IP_val() > E_last)
         {
             alpha /= 2;
             update_x(add_vector<T>(x0, p, 1.0, alpha));
         }
         // std::cout << "step size = " << alpha << "\n";
         E_last = IP_val();
+        // std::cout << "Iteration " << iter << " residual " << residual << "E_last" << E_last << "\n";
         p = search_direction();
         residual = max_vector(p) / h;
         iter += 1;
@@ -144,20 +142,20 @@ T MassSpringSimulator<T, dim>::Impl::screen_projection_y(T point)
     return resolution - (offset + scale * point);
 }
 template <typename T, int dim>
-void MassSpringSimulator<T, dim>::Impl::update_x(DeviceBuffer<T> &new_x)
+void MassSpringSimulator<T, dim>::Impl::update_x(const DeviceBuffer<T> &new_x)
 {
     inertialenergy.update_x(new_x);
     massspringenergy.update_x(new_x);
     new_x.copy_to(x);
 }
 template <typename T, int dim>
-void MassSpringSimulator<T, dim>::Impl::update_x_tilde(DeviceBuffer<T> &new_x_tilde)
+void MassSpringSimulator<T, dim>::Impl::update_x_tilde(const DeviceBuffer<T> &new_x_tilde)
 {
     inertialenergy.update_x_tilde(new_x_tilde);
     new_x_tilde.copy_to(x_tilde);
 }
 template <typename T, int dim>
-void MassSpringSimulator<T, dim>::Impl::update_v(DeviceBuffer<T> &new_v)
+void MassSpringSimulator<T, dim>::Impl::update_v(const DeviceBuffer<T> &new_v)
 {
     new_v.copy_to(v);
 }
@@ -197,7 +195,6 @@ T MassSpringSimulator<T, dim>::Impl::IP_val()
 template <typename T, int dim>
 DeviceBuffer<T> MassSpringSimulator<T, dim>::Impl::IP_grad()
 {
-
     return add_vector<T>(inertialenergy.grad(), massspringenergy.grad(), 1.0, h * h);
 }
 
@@ -206,14 +203,14 @@ DeviceTripletMatrix<T, 1> MassSpringSimulator<T, dim>::Impl::IP_hess()
 {
     DeviceTripletMatrix<T, 1> inertial_hess = inertialenergy.hess();
     DeviceTripletMatrix<T, 1> massspring_hess = massspringenergy.hess();
-    DeviceTripletMatrix<T, 1> hess;
-    hess = add_triplet<T>(inertial_hess, massspring_hess, 1.0, h * h);
-    return inertial_hess;
+    DeviceTripletMatrix<T, 1> hess = add_triplet<T>(inertial_hess, massspring_hess, 1.0, h * h);
+    return hess;
 }
 template <typename T, int dim>
 DeviceBuffer<T> MassSpringSimulator<T, dim>::Impl::search_direction()
 {
     DeviceBuffer<T> dir;
+    dir.resize(x.size());
     search_dir(IP_grad(), IP_hess(), dir);
     return dir;
 }
diff --git a/simulators/1_mass_spring/src/uti.cu b/simulators/1_mass_spring/src/uti.cu
@@ -44,6 +44,7 @@ DeviceTripletMatrix<T, 1> add_triplet(const DeviceTripletMatrix<T, 1> &a, const
     int Nb = b.triplet_count();
     DeviceTripletMatrix<T, 1> c;
     c.resize_triplets(Na + Nb);
+    c.reshape(a.rows(), a.cols());
     ParallelFor(256)
         .apply(Na,
                [c_device_values = c.values().viewer(), c_device_row_indices = c.row_indices().viewer(), c_device_col_indices = c.col_indices().viewer(),
@@ -102,8 +103,9 @@ void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &he
     static LinearSystemContext ctx;
     auto neg_grad = mult_vector<T>(grad, -1);
     int N = grad.size();
-    Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> e_grad(neg_grad.data(), neg_grad.size());
-    DeviceDenseVector<T> grad_device(e_grad);
+    DeviceDenseVector<T> grad_device;
+    grad_device.resize(N);
+    grad_device.buffer_view().copy_from(neg_grad);
     DeviceCOOMatrix<T> A_coo;
     ctx.convert(hess, A_coo);
     DeviceCSRMatrix<T> A_csr;
@@ -115,4 +117,19 @@ void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &he
     dir.view().copy_from(dir_device.buffer_view());
 }
 template void search_dir<float>(const DeviceBuffer<float> &grad, const DeviceTripletMatrix<float, 1> &hess, DeviceBuffer<float> &dir);
-template void search_dir<double>(const DeviceBuffer<double> &grad, const DeviceTripletMatrix<double, 1> &hess, DeviceBuffer<double> &dir);
+template void search_dir<double>(const DeviceBuffer<double> &grad, const DeviceTripletMatrix<double, 1> &hess, DeviceBuffer<double> &dir);
+
+template <typename T>
+void display_vec(const DeviceBuffer<T> &vec)
+{
+    int N = vec.size();
+    ParallelFor(256)
+        .apply(N,
+               [vec = vec.cviewer()] __device__(int i) mutable
+               {
+                   printf("%d %f\n", i, vec(i));
+               })
+        .wait();
+}
+template void display_vec<float>(const DeviceBuffer<float> &vec);
+template void display_vec<double>(const DeviceBuffer<double> &vec);

Original file line number	Diff line number	Diff line change
`@@ -2,7 +2,7 @@`
`2`	`2`
`3`	`3`	`int main()`
`4`	`4`	`{`
`5`		`- float rho = 1000, k = 1e5, initial_stretch = 1.4, n_seg = 20, h = 0.004, side_len = 2, tol = 0.01;`
	`5`	`+ float rho = 1000, k = 1e5, initial_stretch = 1.4, n_seg = 10, h = 0.004, side_len = 1, tol = 0.01;`
`6`	`6`	`// printf("Running mass-spring simulator with parameters: rho = %f, k = %f, initial_stretch = %f, n_seg = %d, h = %f, side_len = %f, tol = %f\n", rho, k, initial_stretch, n_seg, h, side_len, tol);`
`7`	`7`	`MassSpringSimulator<float, 2> simulator(rho, side_len, initial_stretch, k, h, tol, n_seg);`
`8`	`8`	`simulator.run();`