|
8 | 8 | #include <cstdint> |
9 | 9 | #include <cuda_runtime.h> |
10 | 10 |
|
11 | | -// CHECK: void atomicAddKernel(int* lock, int val, const sycl::nd_item<3> &item_ct1) { |
12 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>()); |
| 11 | +// CHECK: void atomicAddKernel(int* lock, int val) { |
| 12 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::plus<>()); |
13 | 13 | // CHECK-NEXT:} |
14 | 14 | __global__ void atomicAddKernel(int* lock, int val) { |
15 | 15 | asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n" |
16 | 16 | ::"l"(lock),"r"(val):"memory"); |
17 | 17 | } |
18 | 18 |
|
19 | | -// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val, |
20 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
21 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>()); |
| 19 | +// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val) { |
| 20 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_or<>()); |
22 | 21 | // CHECK-NEXT:} |
23 | 22 | __global__ void atomicOrKernel(uint32_t* lock, uint32_t val) { |
24 | 23 | asm volatile("red.relaxed.gpu.global.or.b32 [%0], %1;\n" |
25 | 24 | ::"l"(lock),"r"(val):"memory"); |
26 | 25 | } |
27 | 26 |
|
28 | | -// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val, |
29 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
30 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>()); |
| 27 | +// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val) { |
| 28 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_xor<>()); |
31 | 29 | // CHECK-NEXT:} |
32 | 30 | __global__ void atomicXorKernel(uint32_t* lock, uint32_t val) { |
33 | 31 | asm volatile("red.relaxed.gpu.global.xor.b32 [%0], %1;\n" |
34 | 32 | ::"l"(lock),"r"(val):"memory"); |
35 | 33 | } |
36 | 34 |
|
37 | | -// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val, |
38 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
39 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>()); |
| 35 | +// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val) { |
| 36 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_and<>()); |
40 | 37 | // CHECK-NEXT: } |
41 | 38 | __global__ void atomicAndKernel(uint32_t* lock, uint32_t val) { |
42 | 39 | asm volatile("red.relaxed.gpu.global.and.b32 [%0], %1;\n" |
43 | 40 | ::"l"(lock),"r"(val):"memory"); |
44 | 41 | } |
45 | 42 |
|
46 | | -// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val, |
47 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
48 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>()); |
| 43 | +// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val) { |
| 44 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::maximum<>()); |
49 | 45 | // CHECK-NEXT: } |
50 | 46 | __global__ void atomicMaxKernel(uint32_t* lock, uint32_t val) { |
51 | 47 | asm volatile("red.relaxed.gpu.global.max.u32 [%0], %1;\n" |
52 | 48 | ::"l"(lock),"r"(val):"memory"); |
53 | 49 | } |
54 | 50 |
|
55 | | -// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val, |
56 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
57 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>()); |
| 51 | +// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val) { |
| 52 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::minimum<>()); |
58 | 53 | // CHECK-NEXT: } |
59 | 54 | __global__ void atomicMinKernel(uint32_t* lock, uint32_t val) { |
60 | 55 | asm volatile("red.relaxed.gpu.global.min.u32 [%0], %1;\n" |
61 | 56 | ::"l"(lock),"r"(val):"memory"); |
62 | 57 | } |
63 | 58 |
|
64 | | -// CHECK: void atomicAddKernelRelease(int* lock, int val, |
65 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
66 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>()); |
| 59 | +// CHECK: void atomicAddKernelRelease(int* lock, int val) { |
| 60 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::plus<>()); |
67 | 61 | // CHECK-NEXT:} |
68 | 62 | __global__ void atomicAddKernelRelease(int* lock, int val) { |
69 | 63 | asm volatile("red.release.gpu.global.add.s32 [%0], %1;\n" |
70 | 64 | ::"l"(lock),"r"(val):"memory"); |
71 | 65 | } |
72 | 66 |
|
73 | | -// CHECK: void atomicOrKernelRelease(uint32_t* lock, uint32_t val, |
74 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
75 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>()); |
| 67 | +// CHECK: void atomicOrKernelRelease(uint32_t* lock, uint32_t val) { |
| 68 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_or<>()); |
76 | 69 | // CHECK-NEXT:} |
77 | 70 | __global__ void atomicOrKernelRelease(uint32_t* lock, uint32_t val) { |
78 | 71 | asm volatile("red.release.gpu.global.or.b32 [%0], %1;\n" |
79 | 72 | ::"l"(lock),"r"(val):"memory"); |
80 | 73 | } |
81 | 74 |
|
82 | | -// CHECK: void atomicXorKernelRelease(uint32_t* lock, uint32_t val, |
83 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
84 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>()); |
| 75 | +// CHECK: void atomicXorKernelRelease(uint32_t* lock, uint32_t val) { |
| 76 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_xor<>()); |
85 | 77 | // CHECK-NEXT:} |
86 | 78 | __global__ void atomicXorKernelRelease(uint32_t* lock, uint32_t val) { |
87 | 79 | asm volatile("red.release.gpu.global.xor.b32 [%0], %1;\n" |
88 | 80 | ::"l"(lock),"r"(val):"memory"); |
89 | 81 | } |
90 | 82 |
|
91 | | -// CHECK: void atomicAndKernelRelease(uint32_t* lock, uint32_t val, |
92 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
93 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>()); |
| 83 | +// CHECK: void atomicAndKernelRelease(uint32_t* lock, uint32_t val) { |
| 84 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::bit_and<>()); |
94 | 85 | // CHECK-NEXT: } |
95 | 86 | __global__ void atomicAndKernelRelease(uint32_t* lock, uint32_t val) { |
96 | 87 | asm volatile("red.release.gpu.global.and.b32 [%0], %1;\n" |
97 | 88 | ::"l"(lock),"r"(val):"memory"); |
98 | 89 | } |
99 | 90 |
|
100 | | -// CHECK: void atomicMaxKernelRelease(uint32_t* lock, uint32_t val, |
101 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
102 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>()); |
| 91 | +// CHECK: void atomicMaxKernelRelease(uint32_t* lock, uint32_t val) { |
| 92 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::maximum<>()); |
103 | 93 | // CHECK-NEXT: } |
104 | 94 | __global__ void atomicMaxKernelRelease(uint32_t* lock, uint32_t val) { |
105 | 95 | asm volatile("red.release.gpu.global.max.u32 [%0], %1;\n" |
106 | 96 | ::"l"(lock),"r"(val):"memory"); |
107 | 97 | } |
108 | 98 |
|
109 | | -// CHECK: void atomicMinKernelRelease(uint32_t* lock, uint32_t val, |
110 | | -// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
111 | | -// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>()); |
| 99 | +// CHECK: void atomicMinKernelRelease(uint32_t* lock, uint32_t val) { |
| 100 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(sycl::ext::oneapi::this_work_item::get_nd_item<3>().get_group(), val,sycl::minimum<>()); |
112 | 101 | // CHECK-NEXT: } |
113 | 102 | __global__ void atomicMinKernelRelease(uint32_t* lock, uint32_t val) { |
114 | 103 | asm volatile("red.release.gpu.global.min.u32 [%0], %1;\n" |
|
0 commit comments