We have a code that uses OpenMP for offloading. The application compiles fine with stock llvm13 targetting nvidia devices. When using oneAPI (2021.4), trying to target spir64, we see a compilation error in a single translation unit (See attachment).
We provide a simple reproducer that exhibits the same code structure. The error comes from a combination of distribute and parallel for collapse pragmas with some definitions of array indices that are NOT in the innermost for-loop.
Stock llvm13+cuda seems more forgiving. It would probably be good if icpx could error better here, pointing the user to the problem.
I put a version of the function that exhibits the error and a fixed version in the same file. I use #ifdef REPRODUCE to toggle between them.
To compile reproducing the error:
icpx -DREPRODUCE -fiopenmp -fopenmp-targets=spir64 -g -std=c++17 -O1 -I. -o simple.o -c simple.cpp
To compile the fix:
icpx -fiopenmp -fopenmp-targets=spir64 -g -std=c++17 -O1 -I. -o simple.o -c simple.cpp
cd examples/exahype2/euler
icpx -I. -I../../..//src -fiopenmp -fopenmp-targets=spir64 -g -std=c++17 -O0 -DDimensions=2 -DPeanoDebug=0 -c -o tasks/EulerEnclaveTask.o tasks/EulerEnclaveTask.cpp
The source of the problem in tasks/EulerEnclaveTask.cpp is the call to the templated function Fusanov_2D (L457-468). That function is defined in ../../../src/exahype2/fv/Rusanov.h. The error comes in due to the use of the static methods SOLVER::flux and SOLVER::maxEigenvalue.
We can compile the file without problem using LLVM13 for nvidia target:
clang++ -I. -I../../..//src -fopenmp -fopenmp-targets=nvptx64-nvidia-clang++ -g -std=c++17 -O0 -DDimensions=2 -DPeanoDebug=0 -c -o tasks/EulerEnclaveTask.o tasks/EulerEnclaveTask.cpp
// This screams an error message
// It is somehow the interplay of distribute and collapse,
// the cause of the error is the sourceIndex and destinationIndex
// defintion OUTSIDE the innermost for loop
#ifdef REPRODUCE
void compute_bad(
const double * __restrict__ Qin,
double * __restrict__ Qout
)
{
const int M = 5;
const int N = 100;
#pragma omp target teams distribute
for (int i=0;i<N;i++)
{
#pragma omp parallel for collapse(2)
for(int k=0;k<M;k++)
{
int sourceIndex = 0;
int destinationIndex = 1;
for(int l=0;l<M;l++)
{
Qout[destinationIndex] = Qin[sourceIndex];
}
}
}
}
#else
//
// This version works, I pull the definitions sourceIndex and destinationIndex inside the innermost loop
//
//
void compute_fixed(
const double * __restrict__ Qin,
double * __restrict__ Qout
)
{
const int M = 5;
const int N = 100;
#pragma omp target teams distribute
for (int i=0;i<N;i++)
{
#pragma omp parallel for collapse(2)
for(int k=0;k<M;k++)
{
for(int l=0;l<M;l++)
{
int sourceIndex = 0;
int destinationIndex = 1;
Qout[destinationIndex] = Qin[sourceIndex];
}
}
}
}
#endif
icpx -I. -I../../..//src -fiopenmp -fopenmp-targets=spir64 -g -std=c++17 -O2 -funroll-loops -DDimensions=2 -DPeanoDebug=0 -c -o tasks/EulerEnclaveTask.o tasks/EulerEnclaveTask.cpp
PLEASE submit a bug report to https://software.intel.com/en-us/support/priority-support and include the crash backtrace, preprocessed source, and associated run script.
Stack dump:
0. Program arguments: /opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang++ -cc1 -triple spir64 -aux-triple x86_64-unknown-linux-gnu -disable-lifetime-markers -disable-intel-proprietary-opts -Wspir-compat -emit-llvm-bc -emit-llvm-uselists -disable-free -disable-llvm-verifier -discard-value-names -main-file-name EulerEnclaveTask.cpp -mrelocation-model pic -pic-level 2 -fhalf-no-semantic-interposition -fveclib=SVML -mframe-pointer=all -menable-no-infs -menable-no-nans -menable-unsafe-fp-math -fno-signed-zeros -mreassociate -freciprocal-math -fdenormal-fp-math=preserve-sign,preserve-sign -ffp-contract=fast -fno-rounding-math -ffast-math -ffinite-math-only -fno-verbose-asm -no-integrated-as -mconstructor-aliases -mllvm -treat-scalable-fixed-error-as-warning -debug-info-kind=limited -dwarf-version=4 -debugger-tuning=gdb -fcoverage-compilation-dir=/home/gcgt96/Exa/Peano/examples/exahype2/euler -resource-dir /opt/intel/oneapi/compiler/2021.4.0/linux/lib/clang/13.0.0 -internal-isystem /opt/intel/oneapi/compiler/2021.4.0/linux/bin/../include/sycl -internal-isystem /opt/intel/oneapi/compiler/2021.4.0/linux/bin/../include -I . -I ../../..//src -D Dimensions=2 -D PeanoDebug=0 -I/opt/intel/oneapi/vpl/2021.6.0/include -I/opt/intel/oneapi/tbb/2021.4.0/env/../include -I/opt/intel/oneapi/mpi/2021.4.0//include -I/opt/intel/oneapi/mkl/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/ippcp/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/dpl/2021.5.0/linux/include -I/opt/intel/oneapi/dpcpp-ct/2021.4.0/include -I/opt/intel/oneapi/dnnl/2021.4.0/cpu_dpcpp_gpu_dpcpp/lib -I/opt/intel/oneapi/dev-utilities/2021.4.0/include -I/opt/intel/oneapi/dal/2021.4.0/include -I/opt/intel/oneapi/compiler/2021.4.0/linux/include -I/opt/intel/oneapi/ccl/2021.4.0/include/cpu_gpu_dpcpp -I/opt/intel/oneapi/vpl/2021.6.0/include -I/opt/intel/oneapi/tbb/2021.4.0/env/../include -I/opt/intel/oneapi/mpi/2021.4.0//include -I/opt/intel/oneapi/mkl/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/ippcp/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/dpl/2021.5.0/linux/include -I/opt/intel/oneapi/dpcpp-ct/2021.4.0/include -I/opt/intel/oneapi/dnnl/2021.4.0/cpu_dpcpp_gpu_dpcpp/lib -I/opt/intel/oneapi/dev-utilities/2021.4.0/include -I/opt/intel/oneapi/dal/2021.4.0/include -I/opt/intel/oneapi/compiler/2021.4.0/linux/include -I/opt/intel/oneapi/ccl/2021.4.0/include/cpu_gpu_dpcpp -I/opt/intel/oneapi/vpl/2021.6.0/include -I/opt/intel/oneapi/tbb/2021.4.0/env/../include -I/opt/intel/oneapi/mpi/2021.4.0//include -I/opt/intel/oneapi/mkl/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/ippcp/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/dpl/2021.5.0/linux/include -I/opt/intel/oneapi/dpcpp-ct/2021.4.0/include -I/opt/intel/oneapi/dnnl/2021.4.0/cpu_dpcpp_gpu_dpcpp/lib -I/opt/intel/oneapi/dev-utilities/2021.4.0/include -I/opt/intel/oneapi/dal/2021.4.0/include -I/opt/intel/oneapi/compiler/2021.4.0/linux/include -I/opt/intel/oneapi/ccl/2021.4.0/include/cpu_gpu_dpcpp -I/opt/intel/oneapi/vpl/2021.6.0/include -I/opt/intel/oneapi/tbb/2021.4.0/env/../include -I/opt/intel/oneapi/mpi/2021.4.0//include -I/opt/intel/oneapi/mkl/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/ippcp/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/dpl/2021.5.0/linux/include -I/opt/intel/oneapi/dpcpp-ct/2021.4.0/include -I/opt/intel/oneapi/dnnl/2021.4.0/cpu_dpcpp_gpu_dpcpp/lib -I/opt/intel/oneapi/dev-utilities/2021.4.0/include -I/opt/intel/oneapi/dal/2021.4.0/include -I/opt/intel/oneapi/compiler/2021.4.0/linux/include -I/opt/intel/oneapi/ccl/2021.4.0/include/cpu_gpu_dpcpp -I/opt/intel/oneapi/vpl/2021.6.0/include -I/opt/intel/oneapi/tbb/2021.4.0/env/../include -I/opt/intel/oneapi/mpi/2021.4.0//include -I/opt/intel/oneapi/mkl/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/ippcp/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/dpl/2021.5.0/linux/include -I/opt/intel/oneapi/dpcpp-ct/2021.4.0/include -I/opt/intel/oneapi/dnnl/2021.4.0/cpu_dpcpp_gpu_dpcpp/lib -I/opt/intel/oneapi/dev-utilities/2021.4.0/include -I/opt/intel/oneapi/dal/2021.4.0/include -I/opt/intel/oneapi/compiler/2021.4.0/linux/include -I/opt/intel/oneapi/ccl/2021.4.0/include/cpu_gpu_dpcpp -I/opt/intel/oneapi/vpl/2021.6.0/include -I/opt/intel/oneapi/tbb/2021.4.0/env/../include -I/opt/intel/oneapi/mpi/2021.4.0//include -I/opt/intel/oneapi/mkl/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/ippcp/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/dpl/2021.5.0/linux/include -I/opt/intel/oneapi/dpcpp-ct/2021.4.0/include -I/opt/intel/oneapi/dnnl/2021.4.0/cpu_dpcpp_gpu_dpcpp/lib -I/opt/intel/oneapi/dev-utilities/2021.4.0/include -I/opt/intel/oneapi/dal/2021.4.0/include -I/opt/intel/oneapi/compiler/2021.4.0/linux/include -I/opt/intel/oneapi/ccl/2021.4.0/include/cpu_gpu_dpcpp -I/opt/intel/oneapi/vpl/2021.6.0/include -I/opt/intel/oneapi/tbb/2021.4.0/env/../include -I/opt/intel/oneapi/mpi/2021.4.0//include -I/opt/intel/oneapi/mkl/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/ippcp/2021.4.0/include -I/opt/intel/oneapi/ipp/2021.4.0/include -I/opt/intel/oneapi/dpl/2021.5.0/linux/include -I/opt/intel/oneapi/dpcpp-ct/2021.4.0/include -I/opt/intel/oneapi/dnnl/2021.4.0/cpu_dpcpp_gpu_dpcpp/lib -I/opt/intel/oneapi/dev-utilities/2021.4.0/include -I/opt/intel/oneapi/dal/2021.4.0/include -I/opt/intel/oneapi/compiler/2021.4.0/linux/include -I/opt/intel/oneapi/ccl/2021.4.0/include/cpu_gpu_dpcpp -cxx-isystem /opt/intel/oneapi/clck/2021.3.0/include -cxx-isystem /opt/intel/oneapi/clck/2021.3.0/include -cxx-isystem /opt/intel/oneapi/clck/2021.3.0/include -cxx-isystem /opt/intel/oneapi/clck/2021.3.0/include -cxx-isystem /opt/intel/oneapi/clck/2021.3.0/include -cxx-isystem /opt/intel/oneapi/clck/2021.3.0/include -cxx-isystem /opt/intel/oneapi/clck/2021.3.0/include -internal-isystem /opt/intel/oneapi/compiler/2021.4.0/linux/bin/../compiler/include -internal-isystem /opt/rh/devtoolset-9/root/usr/lib/gcc/x86_64-redhat-linux/9/../../../../include/c++/9 -internal-isystem /opt/rh/devtoolset-9/root/usr/lib/gcc/x86_64-redhat-linux/9/../../../../include/c++/9/x86_64-redhat-linux -internal-isystem /opt/rh/devtoolset-9/root/usr/lib/gcc/x86_64-redhat-linux/9/../../../../include/c++/9/backward -internal-isystem /opt/intel/oneapi/compiler/2021.4.0/linux/lib/clang/13.0.0/include -internal-isystem /usr/local/include -internal-isystem /opt/rh/devtoolset-9/root/usr/lib/gcc/x86_64-redhat-linux/9/../../../../x86_64-redhat-linux/include -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -std=c++17 -fdeprecated-macro -fno-dwarf-directory-asm -fdebug-compilation-dir=/home/gcgt96/Exa/Peano/examples/exahype2/euler -ferror-limit 19 -fheinous-gnu-extensions -fopenmp-late-outline -fintel-openmp-region -fopenmp-threadprivate-legacy -fopenmp -funroll-loops -fsycl-instrument-device-code -fgnuc-version=4.2.1 -fcolor-diagnostics -fopenmp-is-device -fopenmp-host-ir-file-path /tmp/EulerEnclaveTask-a2e88b.bc -mllvm -paropt=63 -fopenmp-targets=spir64 -D__GCC_HAVE_DWARF2_CFI_ASM=1 -fintel-compatibility -mllvm -disable-hir-generate-mkl-call -mllvm -intel-libirc-allowed -mllvm -loopopt=0 -o /tmp/EulerEnclaveTask-4323d7.bc -x c++ tasks/EulerEnclaveTask.cpp
1. <eof> parser at end of file
2. Per-module optimization passes
3. Running pass 'VPO Paropt Pass' on module 'tasks/EulerEnclaveTask.cpp'.
#0 0x00005603d4643cda llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x3b4dcda)
#1 0x00005603d464419e SignalHandler(int) Signals.cpp:0:0
#2 0x00007fd569ce91b0 __restore_rt (/lib64/libpthread.so.0+0x141b0)
#3 0x00005603d4c5b3f8 llvm::vpo::VPOParoptTransform::assignParallelDimensions() const (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x41653f8)
#4 0x00005603d4c59d7e llvm::vpo::VPOParoptTransform::paroptTransforms() (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x4163d7e)
#5 0x00005603d4c23968 llvm::vpo::VPOParoptModuleTransform::doParoptTransforms(std::__1::function<llvm::vpo::WRegionInfo& (llvm::Function&, bool*)>) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x412d968)
#6 0x00005603d4c19749 llvm::VPOParoptPass::runImpl(llvm::Module&, std::__1::function<llvm::vpo::WRegionInfo& (llvm::Function&, bool*)>, unsigned int) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x4123749)
#7 0x00005603d4c194d1 llvm::vpo::VPOParopt::runOnModule(llvm::Module&) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x41234d1)
#8 0x00005603d3f5f5ad llvm::legacy::PassManagerImpl::run(llvm::Module&) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x34695ad)
#9 0x00005603d51af988 clang::EmitBackendOutput(clang::DiagnosticsEngine&, clang::HeaderSearchOptions const&, clang::CodeGenOptions const&, clang::TargetOptions const&, clang::LangOptions const&, llvm::StringRef, llvm::Module*, clang::BackendAction, std::__1::unique_ptr<llvm::raw_pwrite_stream, std::__1::default_delete<llvm::raw_pwrite_stream> >) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x46b9988)
#10 0x00005603d572148c clang::BackendConsumer::HandleTranslationUnit(clang::ASTContext&) CodeGenAction.cpp:0:0
#11 0x00005603d6696863 clang::ParseAST(clang::Sema&, bool, bool) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x5ba0863)
#12 0x00005603d571d247 clang::CodeGenAction::ExecuteAction() (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x4c27247)
#13 0x00005603d56570ae clang::FrontendAction::Execute() (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x4b610ae)
#14 0x00005603d55a3eea clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x4aadeea)
#15 0x00005603d5717c26 clang::ExecuteCompilerInvocation(clang::CompilerInstance*) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x4c21c26)
#16 0x00005603d34011cb cc1_main(llvm::ArrayRef<char const*>, char const*, void*) (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x290b1cb)
#17 0x00005603d33fe8a0 ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&) driver.cpp:0:0
#18 0x00005603d33fdf72 main (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x2907f72)
#19 0x00007fd569b321e2 __libc_start_main (/lib64/libc.so.6+0x281e2)
#20 0x00005603d33fb269 _start (/opt/intel/oneapi/compiler/2021.4.0/linux/bin/clang+++0x2905269)
clang++: error: unable to execute command: Segmentation fault (core dumped)
clang++: error: clang frontend command failed due to signal (use -v to see invocation)