SYCL¶
SYCL is a cross-platform abstraction layer that enables code for heterogeneous processors to be written using C++ with the host and kernel code for an application contained in the same source file. On NERSC systems, only the Intel 2024.1.0 module supports SYCL.
Vector Addition Example¶
sycl-vecadd-buffer.cpp
#include <sycl/sycl.hpp>
#include <cmath>
#include <iostream>
int main() {
const int n = 100000;
sycl::buffer<double> b_a{n}, b_b{n}, b_c{n};
{
sycl::host_accessor a{b_a, sycl::write_only};
sycl::host_accessor b{b_b, sycl::write_only};
for (size_t i = 0; i < n; i++) {
a[i] = sin(i) * sin(i);
b[i] = cos(i) * cos(i);
}
}
sycl::queue q{sycl::gpu_selector_v};
q.submit([&](sycl::handler &h) {
sycl::accessor a{b_a, h, sycl::read_only};
sycl::accessor b{b_b, h, sycl::read_only};
sycl::accessor c{b_c, h, sycl::write_only};
h.parallel_for(n, [=](sycl::id<1> i) { c[i] = a[i] + b[i]; });
});
{
double sum = 0.0;
sycl::host_accessor c{b_c, sycl::read_only};
for (size_t i = 0; i < n; i++)
sum += c[i];
std::cout << "sum = " << sum / n << std::endl;
if (!(fabs(sum - static_cast<double>(n)) <= 1.0e-8))
return 1;
}
return 0;
}
Makefile
CXX = icpx
CXXFLAGS = -std=c++17 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend '--cuda-gpu-arch=sm_80'
all: sycl-vecadd-buffer.x
sycl-vecadd-buffer.x: sycl-vecadd-buffer.cpp
$(CXX) $(CXXFLAGS) -o $@ $^
.PHONY: test
test:
./sycl-vecadd-buffer.x
$ module load intel/2024.1.0
$ make
icpx -std=c++17 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend '--cuda-gpu-arch=sm_80' -o sycl-vecadd-buffer.x sycl-vecadd-buffer.cpp
$ ./sycl-vecadd-buffer.x
sum = 1
oneMKL Example¶
sycl-gemm-usm.cpp
#include <sycl/sycl.hpp>
#include <oneapi/mkl/blas.hpp>
#include <cmath>
#include <iostream>
int main() {
const int m = 600;
const int n = 1200;
const int p = 2400;
std::vector<double> matA(m * n);
std::vector<double> matB(n * p);
std::vector<double> matC_serial(m * p);
for (size_t i = 0; i < m; i++)
for (size_t j = 0; j < n; j++)
matA[i * n + j] = 1.0;
for (size_t i = 0; i < n; i++)
for (size_t j = 0; j < p; j++)
matB[i * p + j] = 2.0;
for (size_t i = 0; i < m; i++) {
for (size_t j = 0; j < p; j++) {
for (size_t d = 0; d < n; d++) {
matC_serial[i * p + j] += matA[i * n + d] * matB[d * p + j];
}
}
}
sycl::queue q{sycl::gpu_selector_v};
double *dev_a = sycl::malloc_device<double>((m * n), q);
double *dev_b = sycl::malloc_device<double>((n * p), q);
double *dev_c = sycl::malloc_device<double>((m * p), q);
q.memcpy(dev_a, matA.data(), sizeof(double) * m * n);
q.memcpy(dev_b, matB.data(), sizeof(double) * n * p);
q.wait();
oneapi::mkl::transpose transA = oneapi::mkl::transpose::nontrans;
oneapi::mkl::transpose transB = oneapi::mkl::transpose::nontrans;
oneapi::mkl::blas::column_major::gemm(q, transA, transB, p, m, n, 1.0, dev_b, p, dev_a, n, 0.0, dev_c, p);
q.wait();
std::vector<double> matC_parallel(m * p);
q.memcpy(matC_parallel.data(), dev_c, sizeof(double) * m * p);
q.wait();
for (size_t i = 0; i < m; i++) {
for (size_t j = 0; j < p; j++) {
if (!(fabs(matC_parallel[i * p + j] - matC_serial[i * p + j]) <= 1.0e-8))
return 1;
}
}
sycl::free(dev_a, q);
sycl::free(dev_b, q);
sycl::free(dev_c, q);
return 0;
}
Makefile
CXX = icpx
CXXFLAGS = -std=c++17 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend '--cuda-gpu-arch=sm_80'
INCFLAGS = -I/global/common/software/nersc9/onemkl/experimental-2024-06-20/include
LDFLAGS = -L/global/common/software/nersc9/onemkl/experimental-2024-06-20/lib -lonemkl
all: sycl-gemm-usm.x
sycl-gemm-usm.x: sycl-gemm-usm.cpp
${CXX} $(CXXFLAGS) $(INCFLAGS) $(LDFLAGS) -o $@ $^
.PHONY: test
test:
./sycl-gemm-usm.x
$ module load intel/2024.1.0
$ export LD_LIBRARY_PATH=/global/common/software/nersc9/onemkl/experimental-2024-06-20/lib:$LD_LIBRARY_PATH
$ make
icpx -std=c++17 -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend '--cuda-gpu-arch=sm_80' -I/global/common/software/nersc9/onemkl/experimental-2024-06-20/include -L/global/common/software/nersc9/onemkl/experimental-2024-06-20/lib -lonemkl -o sycl-gemm-usm.x sycl-gemm-usm.cpp
$ ./sycl-gemm-usm.x
References¶
- NERSC, ALCF, Codeplay partnership
- DPC++ tutorial
- DPC++ Examples from Intel
- Free ebook on SYCL programming
- SYCL 2020 Specification
- SYCL.tech portal
- Codeplay material
- oneMKL
Support¶
#sycl
channel in NERSC Users Slack (login required)- NERSC Help Desk