44// RUN: cp -r %S/pytorch_inc %T/pytorch/ATen/
55// RUN: cd %T/pytorch/ATen
66// RUN: mkdir dpct_out
7- // RUN: dpct --out-root dpct_out %T/pytorch/ATen/src/ATen.cu --extra-arg="-I%T/pytorch/ATen/pytorch_inc" --cuda-include-path="%cuda-path/include" --rule-file=%S/../../../tools/dpct/extensions/pytorch_api_rules/pytorch_api.yaml --analysis-scope-path %T/pytorch/ATen/pytorch_inc --analysis-scope-path %T/pytorch/ATen/src --in-root %T/pytorch/ATen/src
7+ // RUN: dpct --format-range=none -- out-root dpct_out %T/pytorch/ATen/src/ATen.cu --extra-arg="-I%T/pytorch/ATen/pytorch_inc" --cuda-include-path="%cuda-path/include" --rule-file=%S/../../../tools/dpct/extensions/pytorch_api_rules/pytorch_api.yaml --analysis-scope-path %T/pytorch/ATen/pytorch_inc --analysis-scope-path %T/pytorch/ATen/src --in-root %T/pytorch/ATen/src
88// RUN: FileCheck --input-file %T/pytorch/ATen/dpct_out/ATen.dp.cpp --match-full-lines %T/pytorch/ATen/src/ATen.cu
99
1010// CHECK: #include <c10/xpu/XPUStream.h>
1818// CHECK-NEXT: #include <c10/util/Half.h>
1919#include < ATen/cuda/CUDATensorMethods.cuh>
2020
21+ // CHECK: // BEGIN_1
22+ // CHECK-EMPTY:
23+ // CHECK-EMPTY:
24+ // CHECK-NEXT: // END_1
25+ // BEGIN_1
26+ #include < ATen/cuda/Exceptions.h>
27+ #include < THC/THCAtomics.cuh>
28+ // END_1
29+
30+ // CHECK: #include <c10/xpu/XPUMacros.h>
31+ #include < c10/cuda/CUDAMacros.h>
32+
2133#define AT_CUDA_CHECK (stmt ) (stmt)
2234
2335// CHECK: #define BE_AT_CHECK
@@ -31,20 +43,19 @@ void test_CUDAStream_as_arg() {
3143 dim3 blockSize (8 , 8 , 1 );
3244 void *args[] = {nullptr };
3345
34- // CHECK: ([&]() {
35- // CHECK-NEXT: ((sycl::queue *)(c10::xpu::getCurrentXPUStream()))
36- // CHECK-NEXT: ->parallel_for( sycl::nd_range<3>(gridSize * blockSize, blockSize),
37- // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) {
38- // CHECK-NEXT: kernel();
39- // CHECK-NEXT: });
46+ // CHECK: ([&](){
47+ // CHECK-NEXT: ((sycl::queue*)(c10::xpu::getCurrentXPUStream()))->parallel_for(
48+ // CHECK-NEXT: sycl::nd_range<3>(gridSize * blockSize, blockSize),
49+ // CHECK-NEXT: [=](sycl::nd_item<3> item_ct1) {
50+ // CHECK-NEXT: kernel();
51+ // CHECK-NEXT: });
4052 // CHECK-NEXT: return 0;
4153 // CHECK-NEXT: }());
4254 AT_CUDA_CHECK (cudaLaunchKernel ((const void *)kernel, gridSize, blockSize, args, 0 , at::cuda::getCurrentCUDAStream ()));
4355}
4456
4557int main () {
46- // CHECK: dpct::queue_ptr st =
47- // CHECK-NEXT: &static_cast<sycl::queue &>(c10::xpu::getCurrentXPUStream());
58+ // CHECK: dpct::queue_ptr st = &static_cast<sycl::queue&>(c10::xpu::getCurrentXPUStream());
4859 cudaStream_t st = 0 ;
4960
5061 // stream APIs
@@ -55,14 +66,18 @@ int main() {
5566 // CHECK: auto deviceStream = c10::xpu::getCurrentXPUStream(devInd);
5667 auto deviceStream = at::cuda::getCurrentCUDAStream (devInd);
5768
58- // CHECK: dpct::queue_ptr curr_cuda_st =
59- // CHECK-NEXT: &static_cast<sycl::queue &>(c10::xpu::getCurrentXPUStream().queue());
69+ // CHECK: dpct::queue_ptr curr_cuda_st = &static_cast<sycl::queue &>(c10::xpu::getCurrentXPUStream(). queue());
6070 cudaStream_t curr_cuda_st = at::cuda::getCurrentCUDAStream ().stream ();
61- // CHECK: dpct::queue_ptr dev_cuda_st = &static_cast<sycl::queue &>(
62- // CHECK-NEXT: c10::xpu::getCurrentXPUStream(devInd).queue());
71+ // CHECK: dpct::queue_ptr dev_cuda_st = &static_cast<sycl::queue &>(c10::xpu::getCurrentXPUStream(devInd). queue());
6372 cudaStream_t dev_cuda_st = at::cuda::getCurrentCUDAStream (devInd).stream ();
6473
6574 test_CUDAStream_as_arg ();
6675
6776 return 0 ;
6877}
78+
79+ // CHECK: void foo2(c10::DeviceGuard device_guard, float *f) try {
80+ // CHECK-NEXT: (DPCT_CHECK_ERROR(f = (float *)sycl::malloc_device(4, static_cast<sycl::queue&>(c10::xpu::getCurrentXPUStream()))));
81+ void foo2 (at::cuda::CUDAGuard device_guard, float *f) {
82+ C10_CUDA_CHECK (cudaMalloc (&f, 4 ));
83+ }
0 commit comments