55#include < cuda_runtime.h>
66#include < iostream>
77
8- __global__ void vectorAdd (const int *A, int *B, int *C, int N) {
8+ __global__ static inline void vectorAdd (const int *A, int *B, int *C, int N) {
99 int i = blockIdx .x * blockDim .x + threadIdx .x ;
1010 if (i < N) {
1111 C[i] = A[i] + B[i];
1212 }
1313}
1414
15- // CHECK: void vectorAdd_wrapper(const int * A ,int * B ,int * C ,int N) {
15+ // CHECK: static inline void vectorAdd_wrapper(const int * A ,int * B ,int * C ,int N) {
1616// CHECK: sycl::queue queue = *dpct::kernel_launcher::_que;
1717// CHECK: unsigned int localMemSize = dpct::kernel_launcher::_local_mem_size;
1818// CHECK: sycl::nd_range<3> nr = dpct::kernel_launcher::_nr;
@@ -24,15 +24,15 @@ __global__ void vectorAdd(const int *A, int *B, int *C, int N) {
2424// CHECK: }
2525
2626template <typename T>
27- __global__ void vectorTemplateAdd (const T *A, T *B, T *C, int N) {
27+ __global__ static inline void vectorTemplateAdd (const T *A, T *B, T *C, int N) {
2828 int i = blockIdx .x * blockDim .x + threadIdx .x ;
2929 if (i < N) {
3030 C[i] = A[i] + B[i];
3131 }
3232}
3333
3434// CHECK: template<typename T>
35- // CHECK: void vectorTemplateAdd_wrapper(const T * A ,T * B ,T * C ,int N) {
35+ // CHECK: static inline void vectorTemplateAdd_wrapper(const T * A ,T * B ,T * C ,int N) {
3636// CHECK: sycl::queue queue = *dpct::kernel_launcher::_que;
3737// CHECK: unsigned int localMemSize = dpct::kernel_launcher::_local_mem_size;
3838// CHECK: sycl::nd_range<3> nr = dpct::kernel_launcher::_nr;
0 commit comments