diff --git a/CMakeLists.txt b/CMakeLists.txt index 91e072bf8..8f15ec370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,13 @@ if (ENABLE_FFT) message (STATUS "Found Heffte_DIR: ${Heffte_DIR}") endif () +option (ENABLE_NUFFT "Enable NUFFT transform" OFF) +if (ENABLE_NUFFT) + add_definitions (-DENABLE_NUFFT) + find_package(CUFINUFFT REQUIRED) + message (STATUS "Found CUFINUFFT_DIR: ${CUFINUFFT_DIR}") +endif () + option (ENABLE_SOLVERS "Enable IPPL solvers" OFF) add_subdirectory (src) diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake new file mode 100644 index 000000000..202a044a3 --- /dev/null +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -0,0 +1,32 @@ +# +# Find CUFINUFFT includes and library +# +# CUFINUFFT_INCLUDE_DIR - where to find cufinufft.h +# CUFINUFFT_LIBRARY - libcufinufft.so path +# CUFINUFFT_FOUND - do not attempt to use if "no" or undefined. + +FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h + HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include + PATHS ENV CPP_INCLUDE_PATH +) +#Static library has some issues and gives a cuda error at the end of compilation +FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so + HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib $ENV{CUFINUFFT_DIR}/lib $ENV{CUFINUFFT}/lib ${PROJECT_SOURCE_DIR}/lib + PATHS ENV LIBRARY_PATH +) + +IF(CUFINUFFT_INCLUDE_DIR AND CUFINUFFT_LIBRARY_DIR) + SET( CUFINUFFT_FOUND "YES" ) + SET( CUFINUFFT_DIR $ENV{CUFINUFFT_DIR} ) +ENDIF() + +IF (CUFINUFFT_FOUND) + IF (NOT CUFINUFFT_FIND_QUIETLY) + MESSAGE(STATUS "Found cufinufft library dir: ${CUFINUFFT_LIBRARY_DIR}") + MESSAGE(STATUS "Found cufinufft include dir: ${CUFINUFFT_INCLUDE_DIR}") + ENDIF (NOT CUFINUFFT_FIND_QUIETLY) +ELSE (CUFINUFFT_FOUND) + IF (CUFINUFFT_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find CUFINUFFT!") + ENDIF (CUFINUFFT_FIND_REQUIRED) +ENDIF (CUFINUFFT_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ad4b1f186..bd6a2205b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,7 +94,14 @@ include_directories ( add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) -target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) + +if (ENABLE_NUFFT) + target_include_directories(ippl PUBLIC ${CUFINUFFT_INCLUDE_DIR}) + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) +else() + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) +endif() + install (TARGETS ippl DESTINATION lib) install (FILES ${IPPL_BASEDIR_HDRS} DESTINATION include) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 8a13e3b45..6544b54f2 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -30,12 +30,15 @@ #include #include +#include #include #include +#include #include #include "FieldLayout/FieldLayout.h" #include "Field/Field.h" +#include "Particle/ParticleAttrib.h" #include "Utility/ParameterList.h" #include "Utility/IpplException.h" @@ -64,6 +67,12 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; +#ifdef KOKKOS_ENABLE_CUDA + /** + Tag classes for Non-uniform type of Fourier transforms + */ + class NUFFTransform {}; +#endif enum FFTComm { a2av = 0, @@ -110,6 +119,37 @@ namespace ippl { using backendCos = heffte::backend::stock_cos; }; #endif +#endif + +#ifdef KOKKOS_ENABLE_CUDA + template + struct CufinufftType {}; + + template <> + struct CufinufftType { + std::function makeplan = cufinufftf_makeplan; + std::function setpts = cufinufftf_setpts; + std::function execute = cufinufftf_execute; + std::function destroy = cufinufftf_destroy; + + using complexType = cuFloatComplex; + using plan_t = cufinufftf_plan; + }; + + template <> + struct CufinufftType { + std::function makeplan = cufinufft_makeplan; + std::function setpts = cufinufft_setpts; + std::function execute = cufinufft_execute; + std::function destroy = cufinufft_destroy; + + using complexType = cuDoubleComplex; + using plan_t = cufinufft_plan; + }; #endif } @@ -296,7 +336,56 @@ namespace ippl { }; +#ifdef KOKKOS_ENABLE_CUDA + /** + Non-uniform FFT class + */ + template + class FFT { + + public: + + typedef FieldLayout Layout_t; + typedef Kokkos::complex KokkosComplex_t; + typedef Field ComplexField_t; + + using complexType = typename detail::CufinufftType::complexType; + using plan_t = typename detail::CufinufftType::plan_t; + + /** Create a new FFT object with the layout for the input Field, type + * (1 or 2) for the NUFFT and parameters for cuFINUFFT. + */ + FFT(const Layout_t& layout, int type, const ParameterList& params); + + // Destructor + ~FFT(); + + /** Do the NUFFT. + */ + template + void transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, ComplexField_t& f); + + + private: + + /** + setup performs the initialization necessary. + */ + void setup(std::array& nmodes, + const ParameterList& params); + + detail::CufinufftType nufft_m; + plan_t plan_m; + int ier_m; + T tol_m; + int type_m; + + }; + + } +#endif #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 853651858..985e50ab4 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -57,8 +57,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -88,45 +88,45 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -138,74 +138,74 @@ namespace ippl { int direction, typename FFT::ComplexField_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost).real( - fview(i, j, k).real()); - tempField(i-nghost, j-nghost, k-nghost).imag( - fview(i, j, k).imag()); - }); - - - - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k).real() = - tempField(i-nghost, j-nghost, k-nghost).real(); - fview(i, j, k).imag() = - tempField(i-nghost, j-nghost, k-nghost).imag(); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).real( + fview(i, j, k).real()); + tempField(i-nghost, j-nghost, k-nghost).imag( + fview(i, j, k).imag()); + }); + + + + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).real(); + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).imag(); + }); } @@ -275,46 +275,46 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {lowInput, highInput}; - heffte::box3d outbox = {lowOutput, highOutput}; + heffte::box3d inbox = {lowInput, highInput}; + heffte::box3d outbox = {lowOutput, highOutput}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), - heffteOptions); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), + heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -325,104 +325,104 @@ namespace ippl { typename FFT::RealField_t& f, typename FFT::ComplexField_t& g) { - auto fview = f.getView(); - auto gview = g.getView(); - const int nghostf = f.getNghost(); - const int nghostg = g.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, - fview.extent(1) - 2*nghostf, - fview.extent(2) - 2*nghostf); - - Kokkos::View - tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, - gview.extent(1) - 2*nghostg, - gview.extent(2) - 2*nghostg); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos f field in FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); - }); - Kokkos::parallel_for("copy from Kokkos g field in FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( - gview(i, j, k).real()); - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( - gview(i, j, k).imag()); - }); + auto fview = f.getView(); + auto gview = g.getView(); + const int nghostf = f.getNghost(); + const int nghostg = g.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, + fview.extent(1) - 2*nghostf, + fview.extent(2) - 2*nghostf); + + Kokkos::View + tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, + gview.extent(1) - 2*nghostg, + gview.extent(2) - 2*nghostg); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos f field in FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); + }); + Kokkos::parallel_for("copy from Kokkos g field in FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( + gview(i, j, k).real()); + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( + gview(i, j, k).imag()); + }); - if ( direction == 1 ) - { - heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), - heffte::scale::full ); - } - else if ( direction == -1 ) - { - heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), - heffte::scale::none ); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos f field FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); - }); - - Kokkos::parallel_for("copy to Kokkos g field FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - gview(i, j, k).real() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); - gview(i, j, k).imag() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); - }); + if ( direction == 1 ) + { + heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), + heffte::scale::full ); + } + else if ( direction == -1 ) + { + heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), + heffte::scale::none ); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos f field FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); + }); + + Kokkos::parallel_for("copy to Kokkos g field FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + gview(i, j, k).real() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); + gview(i, j, k).imag() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); + }); } @@ -446,8 +446,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -477,44 +477,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -524,66 +524,66 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); } @@ -607,8 +607,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -638,44 +638,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -686,68 +686,258 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); } + + +#ifdef KOKKOS_ENABLE_CUDA + //========================================================================= + // FFT NUFFTransform Constructors + //========================================================================= + + /** + Create a new FFT object of type NUFFTransform, with a + given layout and cuFINUFFT parameters. + */ + + template + FFT::FFT(const Layout_t& layout, + int type, + const ParameterList& params) + { + /** + * cuFINUFFT requires to pass a 3D array even for 2D and + * 1D FFTs we just have to fill in other + * dimensions to be 1. Note this is different from Heffte + * where we fill 0. + */ + + std::array nmodes; + + const NDIndex& lDom = layout.getLocalNDIndex(); + + nmodes.fill(1); + + for(size_t d = 0; d < Dim; ++d) { + nmodes[d] = lDom[d].length();; + } + + type_m = type; + setup(nmodes, params); + } + + + /** + setup performs the initialization necessary. + */ + template + void + FFT::setup(std::array& nmodes, + const ParameterList& params) + { + + cufinufft_opts opts; + ier_m = cufinufft_default_opts(type_m, Dim, &opts); + tol_m = 1e-6; + + if(!params.get("use_cufinufft_defaults")) { + tol_m = params.get("tolerance"); + opts.gpu_method = params.get("gpu_method"); + opts.gpu_sort = params.get("gpu_sort"); + opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); + } + + int maxbatchsize = 0; //default option. ignored for ntransf = 1 which + // is our case + + int iflag; + + if(type_m == 1) { + iflag = -1; + } + else if(type_m == 2) { + iflag = 1; + } + else { + throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); + } + + //dim in cufinufft is int + int dim = static_cast(Dim); + ier_m = nufft_m.makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, + maxbatchsize, &plan_m, &opts); + + } + + + + template + template + void + FFT::transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, + typename FFT::ComplexField_t& f) + { + auto fview = f.getView(); + auto Rview = R.getView(); + auto Qview = Q.getView(); + const int nghost = f.getNghost(); + + auto localNp = R.getParticleCount(); + + /** + * cuFINUFFT's layout is left, hence we allocate the temporary + * Kokkos views with the same layout + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + + //Initialize the pointers to NULL and fill only relevant dimensions + //CUFINUFFT requires the input like this. + Kokkos::View tempR[3] = {}; + + + for(size_t d = 0; d < Dim; ++d) { + Kokkos::realloc(tempR[d], localNp); + } + + + Kokkos::View tempQ("tempQ", localNp); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).x = + fview(i, j, k).real(); + tempField(i-nghost, j-nghost, k-nghost).y = + fview(i, j, k).imag(); + }); + + + Kokkos::parallel_for("copy from particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + for(size_t d = 0; d < Dim; ++d) { + tempR[d](i) = Rview(i)[d]; + } + tempQ(i).x = Qview(i).real(); + tempQ(i).y = Qview(i).imag(); + }); + + ier_m = nufft_m.setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + NULL, NULL, NULL, plan_m); + + ier_m = nufft_m.execute(tempQ.data(), tempField.data(), plan_m); + + + if(type_m == 1) { + Kokkos::parallel_for("copy to field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).x; + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).y; + }); + } + else if(type_m == 2) { + Kokkos::parallel_for("copy to particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + Qview(i).real() = tempQ(i).x; + Qview(i).imag() = tempQ(i).y; + }); + } + } + + template + FFT::~FFT() { + + ier_m = nufft_m.destroy(plan_m); + + } +#endif } // vi: set et ts=4 sw=4 sts=4: diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 5d0332166..4d3e5fe90 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -39,6 +39,18 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestNUFFT1 TestNUFFT1.cpp) +target_link_libraries ( + TestNUFFT1 + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) +add_executable (TestNUFFT2 TestNUFFT2.cpp) +target_link_libraries ( + TestNUFFT2 + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) # vi: set et ts=4 sw=4 sts=4: # Local Variables: diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp new file mode 100644 index 000000000..06ac71234 --- /dev/null +++ b/test/FFT/TestNUFFT1.cpp @@ -0,0 +1,201 @@ +#include "Ippl.h" +#include "Utility/ParameterList.h" + +#include +#include +#include +#include +#include + +template +struct Bunch : public ippl::ParticleBase +{ + + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) + { + this->addAttribute(Q); + } + + ~Bunch(){ } + + typedef ippl::ParticleAttrib> charge_container_type; + charge_container_type Q; + +}; + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + using view_type_complex = typename ippl::detail::ViewType, 1>::view_type; + // Output View for the random numbers + view_type x; + + view_type_complex Q; + + // The GeneratorPool + GeneratorPool rand_pool; + + T minU, maxU; + + // Initialize all members + generate_random(view_type x_,view_type_complex Q_, GeneratorPool rand_pool_, + T& minU_, T& maxU_) + : x(x_), Q(Q_), rand_pool(rand_pool_), + minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + for (unsigned d = 0; d < Dim; ++d) { + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + } + Q(i).real() = rand_gen.drand(0.0, 1.0); + Q(i).imag() = rand_gen.drand(0.0, 1.0); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + + +int main(int argc, char *argv[]) { + + Ippl ippl(argc,argv); + + constexpr unsigned int dim = 3; + const double pi = std::acos(-1.0); + + typedef ippl::ParticleSpatialLayout playout_type; + typedef Bunch bunch_type; + + + std::array pt = {256, 256, 256}; + ippl::Index I(pt[0]); + ippl::Index J(pt[1]); + ippl::Index K(pt[2]); + ippl::NDIndex owned(I, J, K); + + ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d=0; d layout(owned, decomp); + + std::array dx = { + 2.0 * pi / double(pt[0]), + 2.0 * pi / double(pt[1]), + 2.0 * pi / double(pt[2]), + }; + + typedef ippl::Vector Vector_t; + + Vector_t hx = {dx[0], dx[1], dx[2]}; + Vector_t origin = {-pi, -pi, -pi}; + ippl::UniformCartesian mesh(owned, hx, origin); + + playout_type pl(layout, mesh); + + bunch_type bunch(pl); + bunch.setParticleBC(ippl::BC::PERIODIC); + + using size_type = ippl::detail::size_type; + + + size_type Np = std::pow(256,3) * 8; + + typedef ippl::Field, dim> field_type; + + field_type field(mesh, layout); + + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-6); + + fftParams.add("use_cufinufft_defaults", false); + + typedef ippl::FFT FFT_type; + + std::unique_ptr fft; + + int type = 1; + + fft = std::make_unique(layout, type, fftParams); + + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + + + size_type nloc = Np/Ippl::Comm->size(); + + bunch.create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42)); + Kokkos::parallel_for(nloc, + generate_random, dim>( + bunch.R.getView(), bunch.Q.getView(), rand_pool64, minU, maxU)); + + + fft->transform(bunch.R, bunch.Q, field); + + auto field_result = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), field.getView()); + + Kokkos::complex max_error_abs(0.0, 0.0); + Kokkos::complex max_error_rel(0.0, 0.0); + + //Pick some mode to check. We choose it same as cuFINUFFT testcase cufinufft3d1_test.cu + ippl::Vector kVec; + kVec[0] = (int)(0.37 * pt[0]); + kVec[1] = (int)(0.26 * pt[1]); + kVec[2] = (int)(0.13 * pt[2]); + + const int nghost = field.getNghost(); + + int iInd = (pt[0]/2 + kVec[0] + nghost); + int jInd = (pt[1]/2 + kVec[1] + nghost); + int kInd = (pt[2]/2 + kVec[2] + nghost); + + + Kokkos::complex reducedValue(0.0, 0.0); + + auto Rview = bunch.R.getView(); + auto Qview = bunch.Q.getView(); + + Kokkos::complex imag = {0.0, 1.0}; + + Kokkos::parallel_reduce("NUDFT type1", nloc, + KOKKOS_LAMBDA(const size_t idx, Kokkos::complex& valL) { + + double arg = 0.0; + for(size_t d = 0; d < dim; ++d) { + arg += kVec[d]*Rview(idx)[d]; + } + + valL += (Kokkos::Experimental::cos(arg) + - imag * Kokkos::Experimental::sin(arg)) * Qview(idx); + }, Kokkos::Sum>(reducedValue)); + + double abs_error_real = std::fabs(reducedValue.real() - field_result(iInd, jInd, kInd).real()); + double rel_error_real = std::fabs(reducedValue.real() - field_result(iInd, jInd, kInd).real()) /std::fabs(reducedValue.real()); + double abs_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()); + double rel_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()) /std::fabs(reducedValue.imag()); + + std::cout << "Abs Error in real part: " << std::setprecision(16) + << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; + std::cout << "Abs Error in imag part: " << std::setprecision(16) + << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; + + + //Kokkos::complex max_error(0.0, 0.0); + //MPI_Reduce(&max_error_local, &max_error, 1, + // MPI_C_DOUBLE_COMPLEX, MPI_MAX, 0, Ippl::getComm()); + + return 0; +} diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp new file mode 100644 index 000000000..147c2ba74 --- /dev/null +++ b/test/FFT/TestNUFFT2.cpp @@ -0,0 +1,229 @@ +#include "Ippl.h" +#include "Utility/ParameterList.h" + +#include +#include +#include +#include +#include + +template +struct Bunch : public ippl::ParticleBase +{ + + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) + { + this->addAttribute(Q); + } + + ~Bunch(){ } + + typedef ippl::ParticleAttrib> charge_container_type; + charge_container_type Q; + +}; + +template +struct generate_random_particles { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x; + + // The GeneratorPool + GeneratorPool rand_pool; + + T minU, maxU; + + // Initialize all members + generate_random_particles(view_type x_, GeneratorPool rand_pool_, + T& minU_, T& maxU_) + : x(x_), rand_pool(rand_pool_), + minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + for (unsigned d = 0; d < Dim; ++d) { + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +template +struct generate_random_field { + + using view_type = typename ippl::detail::ViewType::view_type; + view_type f; + + // The GeneratorPool + GeneratorPool rand_pool; + + // Initialize all members + generate_random_field(view_type f_, GeneratorPool rand_pool_) + : f(f_), rand_pool(rand_pool_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i, const size_t j, const size_t k) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + f(i, j, k).real() = rand_gen.drand(0.0, 1.0); + f(i, j, k).imag() = rand_gen.drand(0.0, 1.0); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +int main(int argc, char *argv[]) { + + Ippl ippl(argc,argv); + + constexpr unsigned int dim = 3; + const double pi = std::acos(-1.0); + + typedef ippl::ParticleSpatialLayout playout_type; + typedef Bunch bunch_type; + + + ippl::Vector pt = {32, 32, 32}; + ippl::Index I(pt[0]); + ippl::Index J(pt[1]); + ippl::Index K(pt[2]); + ippl::NDIndex owned(I, J, K); + + ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d=0; d layout(owned, decomp); + + std::array dx = { + 2.0 * pi / double(pt[0]), + 2.0 * pi / double(pt[1]), + 2.0 * pi / double(pt[2]), + }; + + typedef ippl::Vector Vector_t; + //typedef ippl::Vector, 3> CxVector_t; + + Vector_t hx = {dx[0], dx[1], dx[2]}; + Vector_t origin = {-pi, -pi, -pi}; + ippl::UniformCartesian mesh(owned, hx, origin); + + playout_type pl(layout, mesh); + + bunch_type bunch(pl); + bunch.setParticleBC(ippl::BC::PERIODIC); + + using size_type = ippl::detail::size_type; + + + size_type Np = std::pow(32,3) * 10; + + typedef ippl::Field, dim> field_type; + + field_type field(mesh, layout); + + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-12); + + fftParams.add("use_cufinufft_defaults", false); + + typedef ippl::FFT FFT_type; + + std::unique_ptr fft; + + int type = 2; + + fft = std::make_unique(layout, type, fftParams); + + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + + + size_type nloc = Np/Ippl::Comm->size(); + + const int nghost = field.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + auto fview = field.getView(); + bunch.create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42)); + Kokkos::parallel_for(nloc, + generate_random_particles, dim>( + bunch.R.getView(), rand_pool64, minU, maxU)); + + Kokkos::parallel_for(mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost}), + generate_random_field, Kokkos::Random_XorShift64_Pool<>, dim>( + field.getView(), rand_pool64)); + + fft->transform(bunch.R, bunch.Q, field); + + auto Q_result = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), bunch.Q.getView()); + + Kokkos::complex max_error_abs(0.0, 0.0); + Kokkos::complex max_error_rel(0.0, 0.0); + + //Pick some target point to check. We choose it same as cuFINUFFT testcase cufinufft3d2_test.cu + + int idx = nloc/2; + + Kokkos::complex reducedValue(0.0, 0.0); + + auto Rview = bunch.R.getView(); + + Kokkos::complex imag = {0.0, 1.0}; + + Kokkos::parallel_reduce("NUDFT type2", + mdrange_type({0, 0, 0}, + {fview.extent(0) - 2 * nghost, + fview.extent(1) - 2 * nghost, + fview.extent(2) - 2 * nghost}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + Kokkos::complex& valL) + { + ippl::Vector iVec = {i, j, k}; + double arg = 0.0; + for(size_t d = 0; d < dim; ++d) { + arg += (iVec[d] - (pt[d]/2)) * Rview(idx)[d]; + } + + valL += (Kokkos::Experimental::cos(arg) + + imag * Kokkos::Experimental::sin(arg)) * fview(i + nghost, j + nghost, k + nghost); + }, Kokkos::Sum>(reducedValue)); + + double abs_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()); + double rel_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()) /std::fabs(reducedValue.real()); + double abs_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()); + double rel_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()) /std::fabs(reducedValue.imag()); + + std::cout << "Abs Error in real part: " << std::setprecision(16) + << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; + std::cout << "Abs Error in imag part: " << std::setprecision(16) + << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; + + + //Kokkos::complex max_error(0.0, 0.0); + //MPI_Reduce(&max_error_local, &max_error, 1, + // MPI_C_DOUBLE_COMPLEX, MPI_MAX, 0, Ippl::getComm()); + + return 0; +}