Skip to content

Commit a653161

Browse files
authored
[SYCLomatic][PTX] Enable to migrate "cp.async.commit_group", "cp.async.wait_group" and "cp.async.wait_all" (#2588)
Signed-off-by: chenwei.sun <chenwei.sun@intel.com>
1 parent 9b620bf commit a653161

4 files changed

Lines changed: 73 additions & 12 deletions

File tree

clang/lib/DPCT/Diagnostics/Diagnostics.inc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,8 +298,8 @@ DEF_WARNING(JOINT_MATRIX_SHAPE, 1135, HIGH_LEVEL, "Please check if joint_matrix
298298
DEF_COMMENT(JOINT_MATRIX_SHAPE, 1135, HIGH_LEVEL, "Please check if joint_matrix implementations support the combination of data type and matrix shape type in the target hardware.")
299299
DEF_WARNING(UNSUPPORTED_EXTMEM_WIN_HANDLE, 1136, HIGH_LEVEL, "SYCL Bindless Images extension only supports importing external resource memory using NT handle on Windows. If assert(%0.get_win32_handle()) fails, you may need to adjust the code to use (%0.get_win32_handle()).")
300300
DEF_COMMENT(UNSUPPORTED_EXTMEM_WIN_HANDLE, 1136, HIGH_LEVEL, "SYCL Bindless Images extension only supports importing external resource memory using NT handle on Windows. If assert({0}.get_win32_handle()) fails, you may need to adjust the code to use ({0}.get_win32_handle()).")
301-
DEF_WARNING(ASYNC_COPY_DEVICE_WARN, 1137, LOW_LEVEL, "ASM instruction \"cp.async\" is asynchronous copy, current it is migrated to synchronous copy operation. You may need to adjust the code to tune the performance.")
302-
DEF_COMMENT(ASYNC_COPY_DEVICE_WARN, 1137, LOW_LEVEL, "ASM instruction \"cp.async\" is asynchronous copy, current it is migrated to synchronous copy operation. You may need to adjust the code to tune the performance.")
301+
DEF_WARNING(ASYNC_COPY_DEVICE_WARN, 1137, LOW_LEVEL, "ASM instruction \"%0\" is asynchronous copy, current it is migrated to synchronous copy operation. You may need to adjust the code to tune the performance.")
302+
DEF_COMMENT(ASYNC_COPY_DEVICE_WARN, 1137, LOW_LEVEL, "ASM instruction \"{0}\" is asynchronous copy, current it is migrated to synchronous copy operation. You may need to adjust the code to tune the performance.")
303303

304304
// clang-format on
305305

clang/lib/DPCT/RulesAsm/AsmMigration.cpp

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ using namespace clang::dpct;
3737
namespace {
3838

3939
inline bool SYCLGenError() { return true; }
40-
inline bool SYCLGenSuccess() {return false; }
40+
inline bool SYCLGenSuccess() { return false; }
4141

4242
/// This is used to handle all the AST nodes (except specific instructions, Eg.
4343
/// mov/setp), and generate functionally equivalent SYCL code.
@@ -2177,8 +2177,8 @@ class SYCLGen : public SYCLGenBase {
21772177
if (emitStmt(Inst->getOutputOperand()))
21782178
return SYCLGenError();
21792179
OS() << " = ";
2180-
OS() << MapNames::getDpctNamespace() << "bfe_safe<" << TypeStr << ">(" << Op[0]
2181-
<< ", " << Op[1] << ", " << Op[2] << ')';
2180+
OS() << MapNames::getDpctNamespace() << "bfe_safe<" << TypeStr << ">("
2181+
<< Op[0] << ", " << Op[1] << ", " << Op[2] << ')';
21822182
endstmt();
21832183
insertHeader(HeaderType::HT_DPCT_Math);
21842184
return SYCLGenSuccess();
@@ -2380,8 +2380,8 @@ class SYCLGen : public SYCLGenBase {
23802380
if (DpctGlobalInfo::useIntelDeviceMath() && !RD.empty()) {
23812381
insertHeader(HeaderType::HT_SYCL_Math);
23822382
OS() << MapNames::getClNamespace() << "ext::intel::math::"
2383-
<< (T->getKind() == InlineAsmBuiltinType::f32 ? 'f' : 'd')
2384-
<< "rcp_" << RD << '(' << Op[0] << ')';
2383+
<< (T->getKind() == InlineAsmBuiltinType::f32 ? 'f' : 'd') << "rcp_"
2384+
<< RD << '(' << Op[0] << ')';
23852385
} else {
23862386
OS() << "1 / " << Op[0];
23872387
}
@@ -2773,9 +2773,7 @@ class SYCLGen : public SYCLGenBase {
27732773
return SYCLGenSuccess();
27742774
}
27752775

2776-
bool handle_cp(const InlineAsmInstruction *Inst) override {
2777-
if (Inst->getNumInputOperands() != 3 || Inst->getNumTypes() != 1)
2778-
return SYCLGenError();
2776+
bool HandleCopyOperation(const InlineAsmInstruction *Inst) {
27792777

27802778
llvm::SaveAndRestore<const InlineAsmInstruction *> Store(CurrInst);
27812779
CurrInst = Inst;
@@ -2809,9 +2807,40 @@ class SYCLGen : public SYCLGenBase {
28092807
OS() << CommonBody("3");
28102808
endstmt();
28112809

2812-
report(Diagnostics::ASYNC_COPY_DEVICE_WARN, true);
2810+
auto OpStr =
2811+
llvm::Twine(Inst->getOpcodeID()->getName()).concat(".async").str();
2812+
report(Diagnostics::ASYNC_COPY_DEVICE_WARN, true, OpStr);
2813+
return SYCLGenSuccess();
2814+
}
2815+
2816+
bool HandleCopyWait(const InlineAsmInstruction *Inst) {
2817+
auto CommonStr = llvm::Twine("")
2818+
.concat("\"")
2819+
.concat(GAS->getAsmString()->getString())
2820+
.concat("\"")
2821+
.str();
2822+
2823+
report(
2824+
Diagnostics::FUNC_CALL_REMOVED, true, CommonStr,
2825+
"current \"cp.async\" is migrated to synchronous copy operation. You "
2826+
"may need to adjust the code to tune the performance.");
28132827
return SYCLGenSuccess();
28142828
}
2829+
2830+
bool handle_cp(const InlineAsmInstruction *Inst) override {
2831+
if (Inst->getNumInputOperands() == 3 && Inst->getNumTypes() == 1 &&
2832+
Inst->hasAttr(InstAttr::async))
2833+
return HandleCopyOperation(Inst);
2834+
2835+
if (Inst->getNumInputOperands() == 1 && Inst->hasAttr(InstAttr::async) &&
2836+
(Inst->hasAttr(InstAttr::commit_group) ||
2837+
Inst->hasAttr(InstAttr::wait_group) ||
2838+
Inst->hasAttr(InstAttr::wait_all))) {
2839+
return HandleCopyWait(Inst);
2840+
}
2841+
2842+
return SYCLGenError();
2843+
}
28152844
};
28162845

28172846
/// Clean the special character in identifier.

clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,9 @@ MODIFIER(wide, ".wide")
400400
MODIFIER(sync, ".sync")
401401
MODIFIER(async, ".async")
402402
MODIFIER(cg, ".cg")
403+
MODIFIER(commit_group, ".commit_group")
404+
MODIFIER(wait_group, ".wait_group")
405+
MODIFIER(wait_all, ".wait_all")
403406
MODIFIER(warp, ".warp")
404407
MODIFIER(up, ".up")
405408
MODIFIER(down, ".down")

clang/test/dpct/asm/cp.cu

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66

77
// clang-format off
88
#include <cstdint>
9-
#include <cstdint>
109
#include <cuda_runtime.h>
1110

1211
// CHECK: inline void cp_async4(void *smem_ptr, const void *glob_ptr) {
@@ -67,4 +66,34 @@ __device__ inline void cp_async4_pred(void *smem_ptr, const void *glob_ptr,
6766
"r"(smem), "l"(glob_ptr), "n"(BYTES));
6867
}
6968

69+
// CHECK:inline void cp_async_commit_group() {
70+
// CHECK-NEXT: /*
71+
// CHECK-NEXT: DPCT1026:{{[0-9]+}}: The call to "cp.async.commit_group;" was removed because current "cp.async" is migrated to synchronous copy operation. You may need to adjust the code to tune the performance.
72+
// CHECK-NEXT: */
73+
// CHECK-EMPTY:
74+
// CHECK-NEXT:}
75+
__device__ inline void cp_async_commit_group() {
76+
asm volatile("cp.async.commit_group;" ::);
77+
}
78+
79+
// CHECK:inline void cp_async_wait_group() {
80+
// CHECK-NEXT: /*
81+
// CHECK-NEXT: DPCT1026:{{[0-9]+}}: The call to "cp.async.wait_group 0;" was removed because current "cp.async" is migrated to synchronous copy operation. You may need to adjust the code to tune the performance.
82+
// CHECK-NEXT: */
83+
// CHECK-EMPTY:
84+
// CHECK-NEXT:}
85+
__device__ inline void cp_async_wait_group() {
86+
asm volatile("cp.async.wait_group 0;");
87+
}
88+
89+
// CHECK:inline void cp_async_wait_all() {
90+
// CHECK-NEXT: /*
91+
// CHECK-NEXT: DPCT1026:{{[0-9]+}}: The call to "cp.async.wait_all;" was removed because current "cp.async" is migrated to synchronous copy operation. You may need to adjust the code to tune the performance.
92+
// CHECK-NEXT: */
93+
// CHECK-EMPTY:
94+
// CHECK-NEXT:}
95+
__device__ inline void cp_async_wait_all() {
96+
asm volatile("cp.async.wait_all;");
97+
}
98+
7099
// clang-format on

0 commit comments

Comments
 (0)