Skip to content

Commit b6776aa

Browse files
authored
[SYCLomatic] Enable the migration of API cudaLaunchHostFunc (#2667)
Signed-off-by: intwanghao <hao3.wang@intel.com>
1 parent f1c2029 commit b6776aa

5 files changed

Lines changed: 106 additions & 39 deletions

File tree

clang/lib/DPCT/RuleInfra/MapNames.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,9 @@ void MapNames::setExplicitNamespaceMap(
338338
{"cudaStream_t",
339339
std::make_shared<TypeNameRule>(getDpctNamespace() + "queue_ptr",
340340
HelperFeatureEnum::device_ext)},
341+
{"cudaHostFn_t",
342+
std::make_shared<TypeNameRule>(getDpctNamespace() + "host_func",
343+
HelperFeatureEnum::device_ext)},
341344
{"CUstream",
342345
std::make_shared<TypeNameRule>(getDpctNamespace() + "queue_ptr",
343346
HelperFeatureEnum::device_ext)},

clang/lib/DPCT/RulesLang/RulesLang.cpp

Lines changed: 72 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ void TypeInDeclRule::registerMatcher(MatchFinder &MF) {
346346
"cublasLtMatmulHeuristicResult_t", "CUjit_target",
347347
"cublasLtMatrixTransformDesc_t", "cudaGraphicsMapFlags",
348348
"cudaGraphicsRegisterFlags", "cudaExternalMemoryHandleType",
349-
"CUstreamCallback"))))))
349+
"CUstreamCallback", "cudaHostFn_t"))))))
350350
.bind("cudaTypeDef"),
351351
this);
352352

@@ -4622,7 +4622,8 @@ void KernelCallRule::registerMatcher(ast_matchers::MatchFinder &MF) {
46224622
this);
46234623

46244624
auto launchAPIName = [&]() {
4625-
return hasAnyName("cudaLaunchKernel", "cudaLaunchCooperativeKernel");
4625+
return hasAnyName("cudaLaunchKernel", "cudaLaunchCooperativeKernel",
4626+
"cudaLaunchHostFunc");
46264627
};
46274628
MF.addMatcher(
46284629
callExpr(allOf(callee(functionDecl(launchAPIName())), parentStmt()))
@@ -4837,56 +4838,89 @@ void KernelCallRule::runRule(
48374838
LaunchKernelCall = getNodeAsType<CallExpr>(Result, "launchUsed");
48384839
IsAssigned = true;
48394840
}
4840-
if (!LaunchKernelCall)
4841+
auto FD = LaunchKernelCall->getDirectCallee();
4842+
if (!LaunchKernelCall || !FD)
48414843
return;
4842-
const Expr *CalleeDRE = LaunchKernelCall->getArg(0);
4843-
bool IsFuncTypeErased = true;
4844-
auto QT = CalleeDRE->getType();
4845-
4846-
if (QT->isPointerType()) {
4847-
QT = QT->getPointeeType();
4848-
}
4849-
if (QT->isFunctionType()) {
4850-
IsFuncTypeErased = false;
4851-
}
4852-
4853-
if (!getAddressedRef(CalleeDRE)) {
4854-
if (IsFuncTypeErased) {
4855-
DpctGlobalInfo::setCVersionCUDALaunchUsed();
4844+
std::string FuncName = FD->getNameAsString();
4845+
std::cout << FuncName << std::endl;
4846+
if (FuncName == "cudaLaunchHostFunc") {
4847+
if (DpctGlobalInfo::getUsmLevel() != UsmLevel::UL_Restricted) {
4848+
report(LaunchKernelCall->getBeginLoc(), Diagnostics::API_NOT_MIGRATED,
4849+
false, "cudaLaunchHostFunc");
4850+
return;
48564851
}
48574852
std::string ReplStr;
48584853
llvm::raw_string_ostream OS(ReplStr);
4854+
std::string IndentStr = getIndent(LaunchKernelCall->getBeginLoc(),
4855+
DpctGlobalInfo::getSourceManager())
4856+
.str();
48594857
if (IsAssigned) {
48604858
OS << MapNames::getCheckErrorMacroName() << "(";
48614859
}
4862-
OS << MapNames::getDpctNamespace() << "kernel_launcher::launch(";
4863-
size_t ArgsNum = LaunchKernelCall->getNumArgs();
4864-
for (size_t i = 0; i < ArgsNum; i++) {
4865-
if (auto Arg = LaunchKernelCall->getArg(i)) {
4866-
if (i == 0) {
4867-
if (auto E = getAddressedRef(CalleeDRE, false, nullptr)) {
4868-
OS << ExprAnalysis::ref(E);
4860+
OS << ExprAnalysis::ref(LaunchKernelCall->getArg(0))
4861+
<< "->submit([&](sycl::handler &cgh) {" << getNL() << IndentStr
4862+
<< " cgh.host_task([=](){" << getNL() << IndentStr << " "
4863+
<< ExprAnalysis::ref(LaunchKernelCall->getArg(1)) << "("
4864+
<< ExprAnalysis::ref(LaunchKernelCall->getArg(2)) << ");" << getNL()
4865+
<< IndentStr << " });" << getNL() << IndentStr << "})";
4866+
if (IsAssigned) {
4867+
OS << ")";
4868+
}
4869+
auto Repl = new ReplaceStmt(LaunchKernelCall, OS.str());
4870+
Repl->setBlockLevelFormatFlag();
4871+
emplaceTransformation(Repl);
4872+
return;
4873+
} else {
4874+
const Expr *CalleeDRE = LaunchKernelCall->getArg(0);
4875+
bool IsFuncTypeErased = true;
4876+
auto QT = CalleeDRE->getType();
4877+
4878+
if (QT->isPointerType()) {
4879+
QT = QT->getPointeeType();
4880+
}
4881+
if (QT->isFunctionType()) {
4882+
IsFuncTypeErased = false;
4883+
}
4884+
4885+
if (!getAddressedRef(CalleeDRE)) {
4886+
if (IsFuncTypeErased) {
4887+
DpctGlobalInfo::setCVersionCUDALaunchUsed();
4888+
}
4889+
std::string ReplStr;
4890+
llvm::raw_string_ostream OS(ReplStr);
4891+
if (IsAssigned) {
4892+
OS << MapNames::getCheckErrorMacroName() << "(";
4893+
}
4894+
OS << MapNames::getDpctNamespace() << "kernel_launcher::launch(";
4895+
size_t ArgsNum = LaunchKernelCall->getNumArgs();
4896+
for (size_t i = 0; i < ArgsNum; i++) {
4897+
if (auto Arg = LaunchKernelCall->getArg(i)) {
4898+
if (i == 0) {
4899+
if (auto E = getAddressedRef(CalleeDRE, false, nullptr)) {
4900+
OS << ExprAnalysis::ref(E);
4901+
} else {
4902+
OS << ExprAnalysis::ref(Arg);
4903+
}
48694904
} else {
4870-
OS << ExprAnalysis::ref(Arg);
4905+
OS << ", " << ExprAnalysis::ref(Arg);
48714906
}
4872-
} else {
4873-
OS << ", " << ExprAnalysis::ref(Arg);
48744907
}
48754908
}
4876-
}
4877-
OS << ")";
4878-
if (IsAssigned) {
48794909
OS << ")";
4910+
if (IsAssigned) {
4911+
OS << ")";
4912+
}
4913+
emplaceTransformation(new ReplaceStmt(LaunchKernelCall, OS.str()));
4914+
return;
48804915
}
4881-
emplaceTransformation(new ReplaceStmt(LaunchKernelCall, OS.str()));
4882-
return;
4883-
}
48844916

4885-
if (!IsAssigned)
4886-
findAndRemoveTrailingSemicolon(LaunchKernelCall, Result);
4887-
if (DpctGlobalInfo::getInstance().buildLaunchKernelInfo(LaunchKernelCall,
4888-
IsAssigned)) {
4889-
emplaceTransformation(new ReplaceStmt(LaunchKernelCall, true, false, ""));
4917+
if (!IsAssigned)
4918+
findAndRemoveTrailingSemicolon(LaunchKernelCall, Result);
4919+
if (DpctGlobalInfo::getInstance().buildLaunchKernelInfo(LaunchKernelCall,
4920+
IsAssigned)) {
4921+
emplaceTransformation(
4922+
new ReplaceStmt(LaunchKernelCall, true, false, ""));
4923+
}
48904924
}
48914925
}
48924926
}

clang/lib/DPCT/SrcAPI/APINames.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ ENTRY(cudaGridDependencySynchronize, cudaGridDependencySynchronize, false, NO_FL
148148
ENTRY(cudaLaunchCooperativeKernel, cudaLaunchCooperativeKernel, true, NO_FLAG, P4, "Partial: DPCT1007")
149149
ENTRY(cudaLaunchCooperativeKernelMultiDevice, cudaLaunchCooperativeKernelMultiDevice, false, NO_FLAG, P4, "comment")
150150
ENTRY(cudaLaunchDevice, cudaLaunchDevice, false, NO_FLAG, P4, "comment")
151-
ENTRY(cudaLaunchHostFunc, cudaLaunchHostFunc, false, NO_FLAG, P4, "comment")
151+
ENTRY(cudaLaunchHostFunc, cudaLaunchHostFunc, true, NO_FLAG, P4, "comment")
152152
ENTRY(cudaLaunchKernel, cudaLaunchKernel, true, NO_FLAG, P0, "Partial: DPCT1007, success only when directly using of kernel function name")
153153
ENTRY(cudaLaunchKernelExC, cudaLaunchKernelExC, false, NO_FLAG, P4, "comment")
154154
ENTRY(cudaSetDoubleForDevice, cudaSetDoubleForDevice, false, NO_FLAG, P0, "comment")

clang/runtime/dpct-rt/include/dpct/kernel.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,8 @@ namespace dpct {
3535
typedef void (*kernel_functor)(sycl::queue &, const sycl::nd_range<3> &,
3636
unsigned int, void **, void **);
3737

38+
typedef void (*host_func)(void *);
39+
3840
struct kernel_function_info {
3941
int max_work_group_size = 0;
4042
int shared_size_bytes = 0;

clang/test/dpct/launch-kernel-usm.cu

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
1+
// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2
2+
// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2
13
// RUN: dpct --format-range=none -out-root %T/launch-kernel-usm %s --cuda-include-path="%cuda-path/include" -- -x cuda --cuda-host-only -std=c++14
24
// RUN: FileCheck %s --match-full-lines --input-file %T/launch-kernel-usm/launch-kernel-usm.dp.cpp
35

6+
#include<cuda_runtime.h>
7+
#include<iostream>
8+
49
// CHECK: void template_device(T *d, T *s) {
510
template<class T>
611
__device__ void template_device(T *d) {
@@ -22,6 +27,11 @@ __global__ void kernel(int *d, cudaTextureObject_t tex) {
2227
tex1D(d + gtid, tex, gtid);
2328
}
2429

30+
void hostCallback(void *userData) {
31+
const char *msg = static_cast<const char*>(userData);
32+
std::cout << "Host callback executed. Message: " << msg << std::endl;
33+
}
34+
2535
int main() {
2636
int *d_data;
2737
cudaMalloc(&d_data, sizeof(int));
@@ -87,6 +97,24 @@ int main() {
8797
// CHECK: dpct::kernel_launcher::launch(kernel_array[10], dpct::dim3(16), dpct::dim3(16), args, 0, 0);
8898
cudaLaunchKernel(kernel_array[10], dim3(16), dim3(16), args, 0, 0);
8999

100+
cudaError_t err;
101+
const char *message = "Kernel execution finished.";
102+
cudaStream_t stream;
103+
// CHECK: err = DPCT_CHECK_ERROR(stream->submit([&](sycl::handler &cgh) {
104+
// CHECK: cgh.host_task([=](){
105+
// CHECK: hostCallback((void*)message);
106+
// CHECK: });
107+
// CHECK: }));
108+
err = cudaLaunchHostFunc(stream, hostCallback, (void*)message);
109+
110+
// CHECK: dpct::host_func fn = hostCallback;
111+
cudaHostFn_t fn = hostCallback;
112+
// CHECK: stream->submit([&](sycl::handler &cgh) {
113+
// CHECK: cgh.host_task([=](){
114+
// CHECK: fn((void*)message);
115+
// CHECK: });
116+
cudaLaunchHostFunc(stream, fn, (void*)message);
117+
90118
cudaStreamDestroy(stream);
91119
cudaDestroyTextureObject(tex);
92120
cudaFree(d_data21);

0 commit comments

Comments
 (0)