Skip to content

Commit 93c0378

Browse files
committed
init grad
1 parent c178072 commit 93c0378

10 files changed

Lines changed: 77 additions & 48 deletions

File tree

simulators/1_mass_spring/include/InertialEnergy.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ class InertialEnergy
2121
InertialEnergy &operator=(InertialEnergy &&rhs);
2222
InertialEnergy &operator=(const InertialEnergy &rhs);
2323

24-
void update_x(DeviceBuffer<T> &x);
24+
void update_x(const DeviceBuffer<T> &x);
2525
void generate_hess();
26-
void update_x_tilde(DeviceBuffer<T> &x_tilde);
26+
void update_x_tilde(const DeviceBuffer<T> &x_tilde);
2727
void update_m(T m);
28-
T val(); // Calculate the value of the energy
29-
DeviceBuffer<T> &grad(); // Calculate the gradient of the energy
30-
DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
28+
T val(); // Calculate the value of the energy
29+
const DeviceBuffer<T> &grad(); // Calculate the gradient of the energy
30+
const DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
3131

3232
private:
3333
// The implementation details of the VecAdder class are placed in the implementation class declared here.

simulators/1_mass_spring/include/MassSpringEnergy.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ class MassSpringEnergy
1515
MassSpringEnergy(const MassSpringEnergy &rhs);
1616
MassSpringEnergy &operator=(MassSpringEnergy &&rhs);
1717

18-
void update_x(DeviceBuffer<T> &x);
18+
void update_x(const DeviceBuffer<T> &x);
1919
void update_e(const std::vector<int> &e);
2020
void update_l2(const std::vector<T> &l2);
2121
void update_k(const std::vector<T> &k);
22-
T val(); // Calculate the value of the energy
23-
DeviceBuffer<T> &grad(); // Calculate the gradient of the energy
24-
DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
22+
T val(); // Calculate the value of the energy
23+
const DeviceBuffer<T> &grad(); // Calculate the gradient of the energy
24+
const DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
2525

2626
private:
2727
// The implementation details of the VecAdder class are placed in the implementation class declared here.

simulators/1_mass_spring/include/device_uti.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
#include <Eigen/Dense>
55
// utility functions
66
template <typename T>
7-
T devicesum(muda::DeviceBuffer<T> &buffer);
7+
T devicesum(const muda::DeviceBuffer<T> &buffer);
88

99
template <typename T, int Size>
1010
void __device__ make_PSD(const Eigen::Matrix<T, Size, Size> &hess, Eigen::Matrix<T, Size, Size> &PSD);

simulators/1_mass_spring/include/uti.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,4 +20,7 @@ template <typename T>
2020
T max_vector(const DeviceBuffer<T> &a);
2121

2222
template <typename T>
23-
void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &hess, DeviceBuffer<T> &dir);
23+
void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &hess, DeviceBuffer<T> &dir);
24+
25+
template <typename T>
26+
void display_vec(const DeviceBuffer<T> &vec);

simulators/1_mass_spring/src/InertialEnergy.cu

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,11 @@ InertialEnergy<T, dim>::InertialEnergy(int N, T m) : pimpl_{std::make_unique<Imp
3939
template <typename T, int dim>
4040
InertialEnergy<T, dim>::Impl::Impl(int N_, T m_) : N(N_), m(m_)
4141
{
42+
device_x.resize(N * dim);
43+
device_x_tilde.resize(N * dim);
44+
device_hess.resize_triplets(N * dim);
45+
device_hess.reshape(N * dim, N * dim);
46+
device_grad.resize(N * dim);
4247
}
4348
template <typename T, int dim>
4449
void InertialEnergy<T, dim>::generate_hess()
@@ -54,18 +59,20 @@ void InertialEnergy<T, dim>::generate_hess()
5459
device_hess_row_indices(i) = i;
5560
device_hess_col_indices(i) = i;
5661
device_hess_values(i) = m;
62+
// std::cout << device_hess_values(i) << ' ' << device_hess_row_indices(i) << ' ' << device_hess_col_indices(i) << std::endl;
63+
// printf("%f %d %d\n", device_hess_values(i), device_hess_row_indices(i), device_hess_col_indices(i));
5764
})
5865
.wait();
5966
}
6067

6168
template <typename T, int dim>
62-
void InertialEnergy<T, dim>::update_x(DeviceBuffer<T> &x)
69+
void InertialEnergy<T, dim>::update_x(const DeviceBuffer<T> &x)
6370
{
6471
pimpl_->device_x.view().copy_from(x);
6572
}
6673

6774
template <typename T, int dim>
68-
void InertialEnergy<T, dim>::update_x_tilde(DeviceBuffer<T> &x_tilde)
75+
void InertialEnergy<T, dim>::update_x_tilde(const DeviceBuffer<T> &x_tilde)
6976
{
7077
pimpl_->device_x_tilde.view().copy_from(x_tilde);
7178
}
@@ -95,11 +102,11 @@ T InertialEnergy<T, dim>::val()
95102
}
96103

97104
template <typename T, int dim>
98-
DeviceBuffer<T> &InertialEnergy<T, dim>::grad()
105+
const DeviceBuffer<T> &InertialEnergy<T, dim>::grad()
99106
{
100107
auto &device_x = pimpl_->device_x;
101108
auto &device_x_tilde = pimpl_->device_x_tilde;
102-
auto &m = pimpl_->m;
109+
auto m = pimpl_->m;
103110
auto N = pimpl_->N * dim;
104111
auto &device_grad = pimpl_->device_grad;
105112
ParallelFor(256)
@@ -109,11 +116,12 @@ DeviceBuffer<T> &InertialEnergy<T, dim>::grad()
109116
device_grad(i) = m * (device_x(i) - device_x_tilde(i));
110117
})
111118
.wait();
119+
// display_vec(device_grad);
112120
return device_grad;
113121
} // Calculate the gradient of the energy
114122

115123
template <typename T, int dim>
116-
DeviceTripletMatrix<T, 1> &InertialEnergy<T, dim>::hess()
124+
const DeviceTripletMatrix<T, 1> &InertialEnergy<T, dim>::hess()
117125
{
118126
return pimpl_->device_hess;
119127
} // Calculate the Hessian matrix of the energy

simulators/1_mass_spring/src/MassSpringEnergy.cu

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,14 @@ MassSpringEnergy<T, dim>::MassSpringEnergy(const std::vector<T> &x, const std::v
4040
pimpl_->device_e.copy_from(e);
4141
pimpl_->device_l2.copy_from(l2);
4242
pimpl_->device_k.copy_from(k);
43+
pimpl_->device_hess.resize_triplets(pimpl_->device_e.size() / 2 * dim * dim * 4);
44+
pimpl_->device_hess.reshape(x.size(), x.size());
45+
pimpl_->device_grad.resize(pimpl_->N * dim);
4346
int size = e.size() / 2;
4447
}
4548

4649
template <typename T, int dim>
47-
void MassSpringEnergy<T, dim>::update_x(DeviceBuffer<T> &x)
50+
void MassSpringEnergy<T, dim>::update_x(const DeviceBuffer<T> &x)
4851
{
4952
pimpl_->device_x.view().copy_from(x);
5053
}
@@ -92,14 +95,15 @@ T MassSpringEnergy<T, dim>::val()
9295
} // Calculate the energy
9396

9497
template <typename T, int dim>
95-
DeviceBuffer<T> &MassSpringEnergy<T, dim>::grad()
98+
const DeviceBuffer<T> &MassSpringEnergy<T, dim>::grad()
9699
{
97100
auto &device_x = pimpl_->device_x;
98101
auto &device_e = pimpl_->device_e;
99102
auto &device_l2 = pimpl_->device_l2;
100103
auto &device_k = pimpl_->device_k;
101104
auto N = pimpl_->device_e.size() / 2;
102-
DeviceBuffer<T> device_grad(pimpl_->N * dim);
105+
auto &device_grad = pimpl_->device_grad;
106+
device_grad.fill(0);
103107
ParallelFor(256).apply(N, [device_x = device_x.cviewer(), device_e = device_e.cviewer(), device_l2 = device_l2.cviewer(), device_k = device_k.cviewer(), device_grad = device_grad.viewer()] __device__(int i) mutable
104108
{
105109
int idx1= device_e(2 * i); // First node index
@@ -109,26 +113,27 @@ DeviceBuffer<T> &MassSpringEnergy<T, dim>::grad()
109113
for (int d = 0; d < dim;d++){
110114
diffi[d] = device_x(dim * idx1 + d) - device_x(dim * idx2 + d);
111115
diff += diffi[d] * diffi[d];
112-
}
116+
}
113117
T factor = 2 * device_k(i) * (diff / device_l2(i) -1);
114118
for(int d=0;d<dim;d++){
115119
atomicAdd(&device_grad(dim * idx1 + d), factor * diffi[d]);
116120
atomicAdd(&device_grad(dim * idx2 + d), -factor * diffi[d]);
117-
121+
118122
} })
119123
.wait();
124+
// display_vec(device_grad);
120125
return device_grad;
121126
}
122127

123128
template <typename T, int dim>
124-
DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
129+
const DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
125130
{
126131
auto &device_x = pimpl_->device_x;
127132
auto &device_e = pimpl_->device_e;
128133
auto &device_l2 = pimpl_->device_l2;
129134
auto &device_k = pimpl_->device_k;
130135
auto N = device_e.size() / 2;
131-
auto device_hess = pimpl_->device_hess;
136+
auto &device_hess = pimpl_->device_hess;
132137
auto device_hess_row_idx = device_hess.row_indices();
133138
auto device_hess_col_idx = device_hess.col_indices();
134139
auto device_hess_val = device_hess.values();
@@ -149,8 +154,7 @@ DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
149154
Eigen::Matrix<T, dim * 2, dim * 2> H_block, H_local;
150155
H_block << H_diff, -H_diff,
151156
-H_diff, H_diff;
152-
153-
//make_PSD(H_block, H_local);
157+
make_PSD(H_block, H_local);
154158
// add to global matrix
155159
for (int ni = 0; ni < 2; ni++)
156160
for (int nj = 0; nj < 2; nj++)
@@ -160,7 +164,7 @@ DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
160164
for (int d2 = 0; d2 < dim; d2++){
161165
device_hess_row_idx(indStart + d1 * dim + d2)= idx[ni]*dim + d1;
162166
device_hess_col_idx(indStart + d1 * dim + d2)= idx[nj] * dim + d2;
163-
device_hess_val(indStart + d1 * dim + d2)= H_local(ni * dim + d1, nj * dim + d2);
167+
device_hess_val(indStart + d1 * dim + d2) = H_local(ni * dim + d1, nj * dim + d2);
164168
}
165169
} })
166170
.wait();

simulators/1_mass_spring/src/device_uti.cu

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
using namespace muda;
33

44
template <typename T>
5-
T devicesum(DeviceBuffer<T> &buffer)
5+
T devicesum(const DeviceBuffer<T> &buffer)
66
{
77
T sum = 0.0f; // Result of the reduction
88
T *d_out; // Device memory to store the result of the reduction
@@ -18,8 +18,8 @@ T devicesum(DeviceBuffer<T> &buffer)
1818
cudaFree(d_out);
1919
return sum;
2020
}
21-
template float devicesum<float>(DeviceBuffer<float> &);
22-
template double devicesum<double>(DeviceBuffer<double> &);
21+
template float devicesum<float>(const DeviceBuffer<float> &);
22+
template double devicesum<double>(const DeviceBuffer<double> &);
2323

2424
template <typename T, int Size>
2525
void __device__ make_PSD(const Eigen::Matrix<T, Size, Size> &hess, Eigen::Matrix<T, Size, Size> &PSD)

simulators/1_mass_spring/src/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
int main()
44
{
5-
float rho = 1000, k = 1e5, initial_stretch = 1.4, n_seg = 20, h = 0.004, side_len = 2, tol = 0.01;
5+
float rho = 1000, k = 1e5, initial_stretch = 1.4, n_seg = 10, h = 0.004, side_len = 1, tol = 0.01;
66
// printf("Running mass-spring simulator with parameters: rho = %f, k = %f, initial_stretch = %f, n_seg = %d, h = %f, side_len = %f, tol = %f\n", rho, k, initial_stretch, n_seg, h, side_len, tol);
77
MassSpringSimulator<float, 2> simulator(rho, side_len, initial_stretch, k, h, tol, n_seg);
88
simulator.run();

simulators/1_mass_spring/src/simulator.cu

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ struct MassSpringSimulator<T, dim>::Impl
1818
InertialEnergy<T, dim> inertialenergy;
1919
MassSpringEnergy<T, dim> massspringenergy;
2020
Impl(T rho, T side_len, T initial_stretch, T K, T h_, T tol_, int n_seg);
21-
void update_x(DeviceBuffer<T> &new_x);
22-
void update_x_tilde(DeviceBuffer<T> &new_x_tilde);
23-
void update_v(DeviceBuffer<T> &new_v);
21+
void update_x(const DeviceBuffer<T> &new_x);
22+
void update_x_tilde(const DeviceBuffer<T> &new_x_tilde);
23+
void update_v(const DeviceBuffer<T> &new_v);
2424
T IP_val();
2525
void step_forward();
2626
void draw();
@@ -110,23 +110,21 @@ void MassSpringSimulator<T, dim>::Impl::step_forward()
110110
T E_last = IP_val();
111111
DeviceBuffer<T> p = search_direction();
112112
T residual = max_vector(p) / h;
113-
// printf("Initial residual %f\n", residual);
113+
std::cout << "Initial residual " << residual << "\n";
114114
while (residual > tol)
115115
{
116-
// std::cout << "Iteration " << iter << ":\n";
117-
// std::cout << "residual = " << residual << "\n";
118-
119116
// Line search
120117
T alpha = 1;
121118
DeviceBuffer<T> x0 = x;
122-
update_x(add_vector<T>(x, p, 1.0, alpha));
119+
update_x(add_vector<T>(x0, p, 1.0, alpha));
123120
while (IP_val() > E_last)
124121
{
125122
alpha /= 2;
126123
update_x(add_vector<T>(x0, p, 1.0, alpha));
127124
}
128125
// std::cout << "step size = " << alpha << "\n";
129126
E_last = IP_val();
127+
// std::cout << "Iteration " << iter << " residual " << residual << "E_last" << E_last << "\n";
130128
p = search_direction();
131129
residual = max_vector(p) / h;
132130
iter += 1;
@@ -144,20 +142,20 @@ T MassSpringSimulator<T, dim>::Impl::screen_projection_y(T point)
144142
return resolution - (offset + scale * point);
145143
}
146144
template <typename T, int dim>
147-
void MassSpringSimulator<T, dim>::Impl::update_x(DeviceBuffer<T> &new_x)
145+
void MassSpringSimulator<T, dim>::Impl::update_x(const DeviceBuffer<T> &new_x)
148146
{
149147
inertialenergy.update_x(new_x);
150148
massspringenergy.update_x(new_x);
151149
new_x.copy_to(x);
152150
}
153151
template <typename T, int dim>
154-
void MassSpringSimulator<T, dim>::Impl::update_x_tilde(DeviceBuffer<T> &new_x_tilde)
152+
void MassSpringSimulator<T, dim>::Impl::update_x_tilde(const DeviceBuffer<T> &new_x_tilde)
155153
{
156154
inertialenergy.update_x_tilde(new_x_tilde);
157155
new_x_tilde.copy_to(x_tilde);
158156
}
159157
template <typename T, int dim>
160-
void MassSpringSimulator<T, dim>::Impl::update_v(DeviceBuffer<T> &new_v)
158+
void MassSpringSimulator<T, dim>::Impl::update_v(const DeviceBuffer<T> &new_v)
161159
{
162160
new_v.copy_to(v);
163161
}
@@ -197,7 +195,6 @@ T MassSpringSimulator<T, dim>::Impl::IP_val()
197195
template <typename T, int dim>
198196
DeviceBuffer<T> MassSpringSimulator<T, dim>::Impl::IP_grad()
199197
{
200-
201198
return add_vector<T>(inertialenergy.grad(), massspringenergy.grad(), 1.0, h * h);
202199
}
203200

@@ -206,14 +203,14 @@ DeviceTripletMatrix<T, 1> MassSpringSimulator<T, dim>::Impl::IP_hess()
206203
{
207204
DeviceTripletMatrix<T, 1> inertial_hess = inertialenergy.hess();
208205
DeviceTripletMatrix<T, 1> massspring_hess = massspringenergy.hess();
209-
DeviceTripletMatrix<T, 1> hess;
210-
hess = add_triplet<T>(inertial_hess, massspring_hess, 1.0, h * h);
211-
return inertial_hess;
206+
DeviceTripletMatrix<T, 1> hess = add_triplet<T>(inertial_hess, massspring_hess, 1.0, h * h);
207+
return hess;
212208
}
213209
template <typename T, int dim>
214210
DeviceBuffer<T> MassSpringSimulator<T, dim>::Impl::search_direction()
215211
{
216212
DeviceBuffer<T> dir;
213+
dir.resize(x.size());
217214
search_dir(IP_grad(), IP_hess(), dir);
218215
return dir;
219216
}

simulators/1_mass_spring/src/uti.cu

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ DeviceTripletMatrix<T, 1> add_triplet(const DeviceTripletMatrix<T, 1> &a, const
4444
int Nb = b.triplet_count();
4545
DeviceTripletMatrix<T, 1> c;
4646
c.resize_triplets(Na + Nb);
47+
c.reshape(a.rows(), a.cols());
4748
ParallelFor(256)
4849
.apply(Na,
4950
[c_device_values = c.values().viewer(), c_device_row_indices = c.row_indices().viewer(), c_device_col_indices = c.col_indices().viewer(),
@@ -102,8 +103,9 @@ void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &he
102103
static LinearSystemContext ctx;
103104
auto neg_grad = mult_vector<T>(grad, -1);
104105
int N = grad.size();
105-
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> e_grad(neg_grad.data(), neg_grad.size());
106-
DeviceDenseVector<T> grad_device(e_grad);
106+
DeviceDenseVector<T> grad_device;
107+
grad_device.resize(N);
108+
grad_device.buffer_view().copy_from(neg_grad);
107109
DeviceCOOMatrix<T> A_coo;
108110
ctx.convert(hess, A_coo);
109111
DeviceCSRMatrix<T> A_csr;
@@ -115,4 +117,19 @@ void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &he
115117
dir.view().copy_from(dir_device.buffer_view());
116118
}
117119
template void search_dir<float>(const DeviceBuffer<float> &grad, const DeviceTripletMatrix<float, 1> &hess, DeviceBuffer<float> &dir);
118-
template void search_dir<double>(const DeviceBuffer<double> &grad, const DeviceTripletMatrix<double, 1> &hess, DeviceBuffer<double> &dir);
120+
template void search_dir<double>(const DeviceBuffer<double> &grad, const DeviceTripletMatrix<double, 1> &hess, DeviceBuffer<double> &dir);
121+
122+
template <typename T>
123+
void display_vec(const DeviceBuffer<T> &vec)
124+
{
125+
int N = vec.size();
126+
ParallelFor(256)
127+
.apply(N,
128+
[vec = vec.cviewer()] __device__(int i) mutable
129+
{
130+
printf("%d %f\n", i, vec(i));
131+
})
132+
.wait();
133+
}
134+
template void display_vec<float>(const DeviceBuffer<float> &vec);
135+
template void display_vec<double>(const DeviceBuffer<double> &vec);

0 commit comments

Comments
 (0)