|
| 1 | +#include "../include/ops_mpi.hpp" |
| 2 | + |
| 3 | +#include <mpi.h> |
| 4 | + |
| 5 | +#include <cstdint> |
| 6 | +#include <cstring> |
| 7 | +#include <numeric> |
| 8 | +#include <vector> |
| 9 | + |
| 10 | +#include "../../common/include/common.hpp" |
| 11 | + |
| 12 | +namespace kutergin_a_allreduce { |
| 13 | + |
| 14 | +namespace { |
| 15 | + |
| 16 | +void ApplyOp(void *a, const void *b, int count, MPI_Datatype datatype, MPI_Op op) { |
| 17 | + if (op == MPI_SUM && datatype == MPI_INT) { |
| 18 | + for (int i = 0; i < count; ++i) { |
| 19 | + reinterpret_cast<int *>(a)[i] += reinterpret_cast<const int *>(b)[i]; |
| 20 | + } |
| 21 | + } |
| 22 | +} |
| 23 | + |
| 24 | +} // namespace |
| 25 | + |
| 26 | +AllreduceMPI::AllreduceMPI(const InType &in) { |
| 27 | + SetTypeOfTask(GetStaticTypeOfTask()); |
| 28 | + GetInput() = in; |
| 29 | + GetOutput() = 0; |
| 30 | +} |
| 31 | + |
| 32 | +int Allreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm) { |
| 33 | + int rank = 0; |
| 34 | + int size = 0; |
| 35 | + MPI_Comm_rank(comm, &rank); |
| 36 | + MPI_Comm_size(comm, &size); |
| 37 | + |
| 38 | + int type_size = 0; |
| 39 | + MPI_Type_size(datatype, &type_size); |
| 40 | + |
| 41 | + std::memcpy(recvbuf, sendbuf, static_cast<size_t>(count) * type_size); |
| 42 | + |
| 43 | + for (int mask = 1; mask < size; mask <<= 1) { |
| 44 | + if ((rank & mask) != 0) { |
| 45 | + MPI_Send(recvbuf, count, datatype, rank - mask, 0, comm); |
| 46 | + break; |
| 47 | + } |
| 48 | + |
| 49 | + if (rank + mask < size) { |
| 50 | + std::vector<uint8_t> tmp(static_cast<size_t>(count) * type_size); |
| 51 | + MPI_Recv(tmp.data(), count, datatype, rank + mask, 0, comm, MPI_STATUS_IGNORE); |
| 52 | + ApplyOp(recvbuf, tmp.data(), count, datatype, op); |
| 53 | + } |
| 54 | + } |
| 55 | + |
| 56 | + for (int mask = 1; mask < size; mask <<= 1) { |
| 57 | + if (rank < mask && rank + mask < size) { |
| 58 | + MPI_Send(recvbuf, count, datatype, rank + mask, 0, comm); |
| 59 | + } else if (rank >= mask && rank < 2 * mask) { |
| 60 | + MPI_Recv(recvbuf, count, datatype, rank - mask, 0, comm, MPI_STATUS_IGNORE); |
| 61 | + } |
| 62 | + } |
| 63 | + |
| 64 | + return MPI_SUCCESS; |
| 65 | +} |
| 66 | + |
| 67 | +bool AllreduceMPI::ValidationImpl() { |
| 68 | + return true; |
| 69 | +} |
| 70 | + |
| 71 | +bool AllreduceMPI::PreProcessingImpl() { |
| 72 | + return true; |
| 73 | +} |
| 74 | + |
| 75 | +bool AllreduceMPI::RunImpl() { |
| 76 | + int rank = 0; |
| 77 | + MPI_Comm_rank(MPI_COMM_WORLD, &rank); |
| 78 | + |
| 79 | + const auto &input_struct = GetInput(); |
| 80 | + |
| 81 | + int local_sum = 0; |
| 82 | + if (!input_struct.elements.empty()) { |
| 83 | + local_sum = std::accumulate(input_struct.elements.begin(), input_struct.elements.end(), 0); |
| 84 | + } |
| 85 | + |
| 86 | + int global_sum = 0; |
| 87 | + |
| 88 | + Allreduce(&local_sum, &global_sum, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD); |
| 89 | + |
| 90 | + GetOutput() = global_sum; |
| 91 | + |
| 92 | + return true; |
| 93 | +} |
| 94 | + |
| 95 | +bool AllreduceMPI::PostProcessingImpl() { |
| 96 | + return true; |
| 97 | +} |
| 98 | + |
| 99 | +} // namespace kutergin_a_allreduce |
0 commit comments