Skip to content

Commit c178072

Browse files
committed
improved
1 parent 5e866cc commit c178072

10 files changed

Lines changed: 221 additions & 148 deletions

File tree

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,6 @@ include_directories(${EIGEN3_INCLUDE_DIR})
3030
include_directories(/usr/include)
3131

3232
add_subdirectory(simulators)
33+
add_compile_options(/W4)
34+
add_compile_options(/wd4100)
35+
add_compile_options(/wd4244)

simulators/1_mass_spring/include/InertialEnergy.h

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
#include <memory>
44
#include <Eigen/Dense>
55
#include "square_mesh.h"
6-
#include "uti.h"
6+
#include <muda/muda.h>
7+
#include <muda/container.h>
8+
#include <muda/ext/linear_system.h>
9+
10+
using namespace muda;
711

812
template <typename T, int dim>
913
class InertialEnergy
@@ -17,16 +21,17 @@ class InertialEnergy
1721
InertialEnergy &operator=(InertialEnergy &&rhs);
1822
InertialEnergy &operator=(const InertialEnergy &rhs);
1923

20-
void update_x(const std::vector<T> &x);
21-
void update_x_tilde(const std::vector<T> &x_tilde);
24+
void update_x(DeviceBuffer<T> &x);
25+
void generate_hess();
26+
void update_x_tilde(DeviceBuffer<T> &x_tilde);
2227
void update_m(T m);
23-
T val(); // Calculate the value of the energy
24-
std::vector<T> &grad(); // Calculate the gradient of the energy
25-
SparseMatrix<T> hess(); // Calculate the Hessian matrix of the energy
28+
T val(); // Calculate the value of the energy
29+
DeviceBuffer<T> &grad(); // Calculate the gradient of the energy
30+
DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
2631

2732
private:
2833
// The implementation details of the VecAdder class are placed in the implementation class declared here.
2934
struct Impl;
3035
// The private pointer to the implementation class Impl
3136
std::unique_ptr<Impl> pimpl_;
32-
};
37+
};

simulators/1_mass_spring/include/MassSpringEnergy.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ class MassSpringEnergy
1515
MassSpringEnergy(const MassSpringEnergy &rhs);
1616
MassSpringEnergy &operator=(MassSpringEnergy &&rhs);
1717

18-
void update_x(const std::vector<T> &x);
18+
void update_x(DeviceBuffer<T> &x);
1919
void update_e(const std::vector<int> &e);
2020
void update_l2(const std::vector<T> &l2);
2121
void update_k(const std::vector<T> &k);
22-
T val(); // Calculate the value of the energy
23-
std::vector<T> &grad(); // Calculate the gradient of the energy
24-
SparseMatrix<T> hess(); // Calculate the Hessian matrix of the energy
22+
T val(); // Calculate the value of the energy
23+
DeviceBuffer<T> &grad(); // Calculate the gradient of the energy
24+
DeviceTripletMatrix<T, 1> &hess(); // Calculate the Hessian matrix of the energy
2525

2626
private:
2727
// The implementation details of the VecAdder class are placed in the implementation class declared here.
Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,39 +1,23 @@
11
#pragma once
2-
#include <SFML/Graphics.hpp>
2+
33
#include <vector>
44
#include <cmath>
5-
#include "InertialEnergy.h"
6-
#include "MassSpringEnergy.h"
75
#include "square_mesh.h"
8-
#include "uti.h"
96
#include <iostream>
107
template <typename T, int dim>
118
class MassSpringSimulator
129
{
1310
public:
1411
MassSpringSimulator();
1512
~MassSpringSimulator();
13+
MassSpringSimulator(MassSpringSimulator &&rhs);
14+
MassSpringSimulator &operator=(MassSpringSimulator &&rhs);
1615
MassSpringSimulator(T rho, T side_len, T initial_stretch, T K, T h, T tol, int n_seg);
1716
void run();
18-
void draw();
19-
void step_forward();
20-
void update_x(std::vector<T> new_x);
21-
void update_x_tilde(std::vector<T> new_x_tilde);
22-
void update_v(std::vector<T> new_v);
23-
T IP_val();
24-
std::vector<T> IP_grad();
25-
SparseMatrix<T> IP_hess();
26-
std::vector<T> search_direction();
27-
T screen_projection_x(T point);
28-
T screen_projection_y(T point);
2917

3018
private:
31-
int n_seg;
32-
T h, rho, side_len, initial_stretch, m, tol;
33-
int resolution = 900, scale = 200, offset = resolution / 2, radius = 5;
34-
std::vector<T> x, x_tilde, v, k, l2;
35-
std::vector<int> e;
36-
sf::RenderWindow window;
37-
InertialEnergy<T, dim> inertialenergy;
38-
MassSpringEnergy<T, dim> massspringenergy;
19+
// The implementation details of the VecAdder class are placed in the implementation class declared here.
20+
struct Impl;
21+
// The private pointer to the implementation class Impl
22+
std::unique_ptr<Impl> pimpl_;
3923
};
Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,23 @@
11
#pragma once
22
#include <Eigen/Dense>
33
#include "SparseMatrix.h"
4+
#include <muda/cub/device/device_reduce.h>
5+
#include <muda/container.h>
6+
#include <muda/muda.h>
7+
#include <muda/ext/linear_system.h>
8+
using namespace muda;
9+
10+
template <typename T>
11+
DeviceBuffer<T> add_vector(const DeviceBuffer<T> &a, const DeviceBuffer<T> &b, const T &factor1 = 1, const T &factor2 = 1);
12+
413
template <typename T>
5-
std::vector<T> add_vector(const std::vector<T> &a, const std::vector<T> &b, const T &factor1 = 1, const T &factor2 = 1);
14+
DeviceBuffer<T> mult_vector(const DeviceBuffer<T> &a, const T &b);
615

716
template <typename T>
8-
std::vector<T> mult_vector(const std::vector<T> &a, const T &b);
17+
DeviceTripletMatrix<T, 1> add_triplet(const DeviceTripletMatrix<T, 1> &a, const DeviceTripletMatrix<T, 1> &b, const T &factor1 = 1, const T &factor2 = 1);
918

1019
template <typename T>
11-
T max_vector(const std::vector<T> &a);
20+
T max_vector(const DeviceBuffer<T> &a);
1221

1322
template <typename T>
14-
void search_dir(const std::vector<T> &grad, const SparseMatrix<T> &hess, std::vector<T> &dir);
23+
void search_dir(const DeviceBuffer<T> &grad, const DeviceTripletMatrix<T, 1> &hess, DeviceBuffer<T> &dir);

simulators/1_mass_spring/src/InertialEnergy.cu

Lines changed: 35 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "InertialEnergy.h"
2+
#include "uti.h"
23
#include <muda/muda.h>
34
#include <muda/container.h>
45
#include "device_uti.h"
@@ -8,10 +9,10 @@ template <typename T, int dim>
89
struct InertialEnergy<T, dim>::Impl
910
{
1011
DeviceBuffer<T> device_x, device_x_tilde, device_grad;
12+
DeviceTripletMatrix<T, 1> device_hess;
1113
int N;
1214
T m, val;
13-
std::vector<T> host_grad;
14-
SparseMatrix<T> host_hess;
15+
Impl(int N, T m);
1516
};
1617
template <typename T, int dim>
1718
InertialEnergy<T, dim>::InertialEnergy() = default;
@@ -30,26 +31,43 @@ InertialEnergy<T, dim>::InertialEnergy(const InertialEnergy<T, dim> &rhs)
3031
: pimpl_{std::make_unique<Impl>(*rhs.pimpl_)} {}
3132

3233
template <typename T, int dim>
33-
InertialEnergy<T, dim>::InertialEnergy(int N, T m) : pimpl_{std::make_unique<Impl>()}
34+
InertialEnergy<T, dim>::InertialEnergy(int N, T m) : pimpl_{std::make_unique<Impl>(N, m)}
3435
{
35-
pimpl_->N = N;
36-
pimpl_->m = m;
37-
pimpl_->device_grad = std::vector<T>(pimpl_->N * dim);
38-
pimpl_->host_grad = std::vector<T>(pimpl_->N * dim);
39-
pimpl_->host_hess = SparseMatrix<T>(pimpl_->N * dim);
40-
pimpl_->host_hess.set_diagonal(m);
36+
generate_hess();
37+
}
38+
39+
template <typename T, int dim>
40+
InertialEnergy<T, dim>::Impl::Impl(int N_, T m_) : N(N_), m(m_)
41+
{
42+
}
43+
template <typename T, int dim>
44+
void InertialEnergy<T, dim>::generate_hess()
45+
{
46+
auto &device_hess = pimpl_->device_hess;
47+
auto m = pimpl_->m;
48+
auto N = pimpl_->N;
49+
ParallelFor(256)
50+
.apply(N * dim,
51+
[device_hess_row_indices = device_hess.row_indices().viewer(), device_hess_col_indices = device_hess.col_indices().viewer(),
52+
device_hess_values = device_hess.values().viewer(), m] __device__(int i) mutable
53+
{
54+
device_hess_row_indices(i) = i;
55+
device_hess_col_indices(i) = i;
56+
device_hess_values(i) = m;
57+
})
58+
.wait();
4159
}
4260

4361
template <typename T, int dim>
44-
void InertialEnergy<T, dim>::update_x(const std::vector<T> &x)
62+
void InertialEnergy<T, dim>::update_x(DeviceBuffer<T> &x)
4563
{
46-
pimpl_->device_x.copy_from(x);
64+
pimpl_->device_x.view().copy_from(x);
4765
}
4866

4967
template <typename T, int dim>
50-
void InertialEnergy<T, dim>::update_x_tilde(const std::vector<T> &x_tilde)
68+
void InertialEnergy<T, dim>::update_x_tilde(DeviceBuffer<T> &x_tilde)
5169
{
52-
pimpl_->device_x_tilde.copy_from(x_tilde);
70+
pimpl_->device_x_tilde.view().copy_from(x_tilde);
5371
}
5472

5573
template <typename T, int dim>
@@ -77,29 +95,27 @@ T InertialEnergy<T, dim>::val()
7795
}
7896

7997
template <typename T, int dim>
80-
std::vector<T> &InertialEnergy<T, dim>::grad()
98+
DeviceBuffer<T> &InertialEnergy<T, dim>::grad()
8199
{
82100
auto &device_x = pimpl_->device_x;
83101
auto &device_x_tilde = pimpl_->device_x_tilde;
84102
auto &m = pimpl_->m;
85103
auto N = pimpl_->N * dim;
86104
auto &device_grad = pimpl_->device_grad;
87-
auto &host_grad = pimpl_->host_grad;
88105
ParallelFor(256)
89106
.apply(N,
90107
[device_x = device_x.cviewer(), device_x_tilde = device_x_tilde.cviewer(), m, N, device_grad = device_grad.viewer()] __device__(int i) mutable
91108
{
92109
device_grad(i) = m * (device_x(i) - device_x_tilde(i));
93110
})
94111
.wait();
95-
device_grad.copy_to(host_grad);
96-
return host_grad;
112+
return device_grad;
97113
} // Calculate the gradient of the energy
98114

99115
template <typename T, int dim>
100-
SparseMatrix<T> InertialEnergy<T, dim>::hess()
116+
DeviceTripletMatrix<T, 1> &InertialEnergy<T, dim>::hess()
101117
{
102-
return pimpl_->host_hess;
118+
return pimpl_->device_hess;
103119
} // Calculate the Hessian matrix of the energy
104120

105121
template class InertialEnergy<float, 2>;

simulators/1_mass_spring/src/MassSpringEnergy.cu

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@ struct MassSpringEnergy<T, dim>::Impl
1313
DeviceBuffer<T> device_l2, device_k;
1414
DeviceBuffer<int> device_e;
1515
int N;
16-
std::vector<T> host_grad;
17-
SparseMatrix<T> host_hess;
16+
DeviceBuffer<T> device_grad;
17+
DeviceTripletMatrix<T, 1> device_hess;
1818
};
1919
template <typename T, int dim>
2020
MassSpringEnergy<T, dim>::MassSpringEnergy() = default;
@@ -40,15 +40,13 @@ MassSpringEnergy<T, dim>::MassSpringEnergy(const std::vector<T> &x, const std::v
4040
pimpl_->device_e.copy_from(e);
4141
pimpl_->device_l2.copy_from(l2);
4242
pimpl_->device_k.copy_from(k);
43-
pimpl_->host_grad = std::vector<T>(pimpl_->N * dim);
4443
int size = e.size() / 2;
45-
pimpl_->host_hess = SparseMatrix<T>(size * dim * dim);
4644
}
4745

4846
template <typename T, int dim>
49-
void MassSpringEnergy<T, dim>::update_x(const std::vector<T> &x)
47+
void MassSpringEnergy<T, dim>::update_x(DeviceBuffer<T> &x)
5048
{
51-
pimpl_->device_x.copy_from(x);
49+
pimpl_->device_x.view().copy_from(x);
5250
}
5351

5452
template <typename T, int dim>
@@ -94,15 +92,14 @@ T MassSpringEnergy<T, dim>::val()
9492
} // Calculate the energy
9593

9694
template <typename T, int dim>
97-
std::vector<T> &MassSpringEnergy<T, dim>::grad()
95+
DeviceBuffer<T> &MassSpringEnergy<T, dim>::grad()
9896
{
9997
auto &device_x = pimpl_->device_x;
10098
auto &device_e = pimpl_->device_e;
10199
auto &device_l2 = pimpl_->device_l2;
102100
auto &device_k = pimpl_->device_k;
103101
auto N = pimpl_->device_e.size() / 2;
104102
DeviceBuffer<T> device_grad(pimpl_->N * dim);
105-
auto &host_grad = pimpl_->host_grad;
106103
ParallelFor(256).apply(N, [device_x = device_x.cviewer(), device_e = device_e.cviewer(), device_l2 = device_l2.cviewer(), device_k = device_k.cviewer(), device_grad = device_grad.viewer()] __device__(int i) mutable
107104
{
108105
int idx1= device_e(2 * i); // First node index
@@ -120,23 +117,22 @@ std::vector<T> &MassSpringEnergy<T, dim>::grad()
120117

121118
} })
122119
.wait();
123-
device_grad.copy_to(host_grad);
124-
return host_grad;
120+
return device_grad;
125121
}
126122

127123
template <typename T, int dim>
128-
SparseMatrix<T> MassSpringEnergy<T, dim>::hess()
124+
DeviceTripletMatrix<T, 1> &MassSpringEnergy<T, dim>::hess()
129125
{
130126
auto &device_x = pimpl_->device_x;
131127
auto &device_e = pimpl_->device_e;
132128
auto &device_l2 = pimpl_->device_l2;
133129
auto &device_k = pimpl_->device_k;
134130
auto N = device_e.size() / 2;
135-
auto &host_hess = pimpl_->host_hess;
136-
DeviceBuffer<T> device_hess(N * dim * dim * 4);
137-
DeviceBuffer<int> device_hess_row_idx(N * dim * dim * 4);
138-
DeviceBuffer<int> device_hess_col_idx(N * dim * dim * 4);
139-
ParallelFor(256).apply(N, [device_x = device_x.cviewer(), device_e = device_e.cviewer(), device_l2 = device_l2.cviewer(), device_k = device_k.cviewer(), device_hess = device_hess.viewer(), device_hess_row_idx = device_hess_row_idx.viewer(), device_hess_col_idx = device_hess_col_idx.viewer(), N] __device__(int i) mutable
131+
auto device_hess = pimpl_->device_hess;
132+
auto device_hess_row_idx = device_hess.row_indices();
133+
auto device_hess_col_idx = device_hess.col_indices();
134+
auto device_hess_val = device_hess.values();
135+
ParallelFor(256).apply(N, [device_x = device_x.cviewer(), device_e = device_e.cviewer(), device_l2 = device_l2.cviewer(), device_k = device_k.cviewer(), device_hess_val = device_hess_val.viewer(), device_hess_row_idx = device_hess_row_idx.viewer(), device_hess_col_idx = device_hess_col_idx.viewer(), N] __device__(int i) mutable
140136
{
141137
int idx[2] = {device_e(2 * i), device_e(2 * i + 1)}; // First node index
142138
T diff = 0;
@@ -164,14 +160,11 @@ SparseMatrix<T> MassSpringEnergy<T, dim>::hess()
164160
for (int d2 = 0; d2 < dim; d2++){
165161
device_hess_row_idx(indStart + d1 * dim + d2)= idx[ni]*dim + d1;
166162
device_hess_col_idx(indStart + d1 * dim + d2)= idx[nj] * dim + d2;
167-
device_hess(indStart + d1 * dim + d2)= H_local(ni * dim + d1, nj * dim + d2);
163+
device_hess_val(indStart + d1 * dim + d2)= H_local(ni * dim + d1, nj * dim + d2);
168164
}
169165
} })
170166
.wait();
171-
device_hess.copy_to(host_hess.set_val_buffer());
172-
device_hess_row_idx.copy_to(host_hess.set_row_buffer());
173-
device_hess_col_idx.copy_to(host_hess.set_col_buffer());
174-
return host_hess;
167+
return device_hess;
175168

176169
} // Calculate the Hessian of the energy
177170

simulators/1_mass_spring/src/main.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
#include "simulator.cpp"
1+
#include "simulator.h"
22

33
int main()
44
{

0 commit comments

Comments
 (0)