@@ -12,28 +12,18 @@ I worked for the CSE team supporting HECToR, the UK's national supercomputing fa
12
12
13
13
Without knowing much scientific details of his work, it was possible to parallelise his code using the 2DECOMP&FFT library in a matter of days. Some essential parts of the serial and the the parallel code are compared side-by-side below to show how easy it is to use the library.
14
14
15
- <table
16
- style="text-align: left; width: 90%; margin-left: auto; margin-right: auto;"
17
- border="1" cellpadding="1" cellspacing="1">
15
+ <table border =" 1 " cellpadding =" 1 " cellspacing =" 1 " >
18
16
<tbody>
19
17
<tr>
20
- <th
21
- style="vertical-align: top; background-color: rgb(153, 255, 153); text-align: left; width: 50%;">Serial
22
- Code
23
- </th>
24
- <th
25
- style="vertical-align: top; background-color: rgb(204, 204, 255); width: 50%;">Parallel
26
- Code
27
- </th>
18
+ <th>Serial Code</th>
19
+ <th>Parallel Code</th>
28
20
</tr>
29
21
<tr>
30
- <td
31
- style="vertical-align: top; background-color: rgb(153, 255, 153);">
32
- <pre class="pre_special">program vort<br><br> implicit none<br><br> ! use FFTW for the Fourier transforms<br> include "fftw3.f"<br><br><br> integer, parameter :: mytype = 8<br> <br> ! global size<br> integer, parameter :: n1=8, n2=8, n3=8 <br><br><br><br><br><br><br> real(mytype), allocatable, dimension(:,:,:) :: &<br> ox,oy,oz, uu,vv,ww<br> complex(mytype), allocatable, dimension(:,:,:) :: &<br> Ax,Ay,Az,U,V,W<br><br> integer*8 :: plan<br><br><br><br> ! allocate space for real-space vorticity field<br> allocate(ox(0:n1-1,0:n2-1,0:n3-1))<br> allocate(oy(0:n1-1,0:n2-1,0:n3-1))<br> allocate(oz(0:n1-1,0:n2-1,0:n3-1))<br><br><br><br><br><br><br> <br> ! initialise vorticity in real-space <br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br><br> ! init ox, oy, oz<br> call vortexOmega( &<br> ox(i,j,k),oy(i,j,k),oz(i,j,k), &<br> xgs+dx*(i+0.5-1), ygs+dy*(j+0.5-1), &<br> zgs+dz*(k+0.5-1))<br> end do<br> end do<br> end do<br><br><br><br><br><br><br> ! allocate space for wave-space variables<br> allocate(Ax(0:n1/2,0:n2-1,0:n3-1))<br> allocate(Ay(0:n1/2,0:n2-1,0:n3-1))<br> allocate(Az(0:n1/2,0:n2-1,0:n3-1))<br> allocate(U(0:n1/2,0:n2-1,0:n3-1))<br> allocate(V(0:n1/2,0:n2-1,0:n3-1))<br> allocate(W(0:n1/2,0:n2-1,0:n3-1))<br><br><br><br><br><br><br><br><br><br><br><br><br><br> ! Using FFTW, <br> ! forward FFT - ox,oy,oz -> Ax,Ay,Az<br> call dfftw_plan_dft_r2c_3d( &<br> plan,n1,n2,n3,ox,Ax,FFTW_ESTIMATE)<br> call dfftw_execute_dft_r2c(plan,ox,Ax)<br> call dfftw_plan_dft_r2c_3d( &<br> plan,n1,n2,n3,oy,Ay,FFTW_ESTIMATE)<br> call dfftw_execute_dft_r2c(plan,oy,Ay)<br> call dfftw_plan_dft_r2c_3d( &<br> plan,n1,n2,n3,oz,Az,FFTW_ESTIMATE)<br> call dfftw_execute_dft_r2c(plan,oz,Az)<br><br> deallocate(ox,oy,oz)<br><br> ! Fourier domain stuff<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1/2<br><br> ! wave-space calculation omitted<br> U(i,j,k) = ......<br> V(i,j,k) = ......<br> W(i,j,k) = ......<br><br> end do<br> end do<br> end do<br><br> deallocate(Ax,Ay,Az)<br> <br> ! allocate real-space velocity variables<br> allocate(uu(0:n1-1,0:n2-1,0:n3-1))<br> allocate(vv(0:n1-1,0:n2-1,0:n3-1))<br> allocate(ww(0:n1-1,0:n2-1,0:n3-1))<br><br><br><br><br><br><br><br> ! Using FFTW, <br> ! inverse FFT U,V,W -> uu,vv,ww<br> call dfftw_plan_dft_c2r_3d( &<br> plan,n1,n2,n3,U,uu,FFTW_ESTIMATE)<br> call dfftw_execute_dft_c2r(plan,U,uu)<br> call dfftw_plan_dft_c2r_3d( &<br> plan,n1,n2,n3,V,vv,FFTW_ESTIMATE)<br> call dfftw_execute_dft_c2r(plan,V,vv)<br> call dfftw_plan_dft_c2r_3d( &<br> plan,n1,n2,n3,W,ww,FFTW_ESTIMATE)<br> call dfftw_execute_dft_c2r(plan,W,WW)<br><br> ! rescale<br> uu = uu*scale<br> vv = vv*scale<br> ww = ww*scale<br><br> deallocate(U,V,W)<br><br> ! write out using Fortran direct access <br> inquire(iolength=iol) uu(1,1,1)<br> open(10, FILE='data_serial', &<br> FORM='unformatted', &<br> ACCESS='DIRECT', RECL=iol)<br> n=1<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br> write(10,rec=n) uu(i,j,k)<br> n=n+1<br> end do<br> end do<br> end do<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br> write(10,rec=n) vv(i,j,k)<br> n=n+1<br> end do<br> end do<br> end do<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br> write(10,rec=n) ww(i,j,k)<br> n=n+1<br> end do<br> end do<br> end do<br> close(10)<br><br> deallocate(uu,vv,ww)<br><br><br><br><br><br><br>end program vort<br></pre>
22
+ <td>
23
+ <pre>program vort<br><br> implicit none<br><br> ! use FFTW for the Fourier transforms<br> include "fftw3.f"<br><br><br> integer, parameter :: mytype = 8<br> <br> ! global size<br> integer, parameter :: n1=8, n2=8, n3=8 <br><br><br><br><br><br><br> real(mytype), allocatable, dimension(:,:,:) :: &<br> ox,oy,oz, uu,vv,ww<br> complex(mytype), allocatable, dimension(:,:,:) :: &<br> Ax,Ay,Az,U,V,W<br><br> integer*8 :: plan<br><br><br><br> ! allocate space for real-space vorticity field<br> allocate(ox(0:n1-1,0:n2-1,0:n3-1))<br> allocate(oy(0:n1-1,0:n2-1,0:n3-1))<br> allocate(oz(0:n1-1,0:n2-1,0:n3-1))<br><br><br><br><br><br><br> <br> ! initialise vorticity in real-space <br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br><br> ! init ox, oy, oz<br> call vortexOmega( &<br> ox(i,j,k),oy(i,j,k),oz(i,j,k), &<br> xgs+dx*(i+0.5-1), ygs+dy*(j+0.5-1), &<br> zgs+dz*(k+0.5-1))<br> end do<br> end do<br> end do<br><br><br><br><br><br><br> ! allocate space for wave-space variables<br> allocate(Ax(0:n1/2,0:n2-1,0:n3-1))<br> allocate(Ay(0:n1/2,0:n2-1,0:n3-1))<br> allocate(Az(0:n1/2,0:n2-1,0:n3-1))<br> allocate(U(0:n1/2,0:n2-1,0:n3-1))<br> allocate(V(0:n1/2,0:n2-1,0:n3-1))<br> allocate(W(0:n1/2,0:n2-1,0:n3-1))<br><br><br><br><br><br><br><br><br><br><br><br><br><br> ! Using FFTW, <br> ! forward FFT - ox,oy,oz -> Ax,Ay,Az<br> call dfftw_plan_dft_r2c_3d( &<br> plan,n1,n2,n3,ox,Ax,FFTW_ESTIMATE)<br> call dfftw_execute_dft_r2c(plan,ox,Ax)<br> call dfftw_plan_dft_r2c_3d( &<br> plan,n1,n2,n3,oy,Ay,FFTW_ESTIMATE)<br> call dfftw_execute_dft_r2c(plan,oy,Ay)<br> call dfftw_plan_dft_r2c_3d( &<br> plan,n1,n2,n3,oz,Az,FFTW_ESTIMATE)<br> call dfftw_execute_dft_r2c(plan,oz,Az)<br><br> deallocate(ox,oy,oz)<br><br> ! Fourier domain stuff<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1/2<br><br> ! wave-space calculation omitted<br> U(i,j,k) = ......<br> V(i,j,k) = ......<br> W(i,j,k) = ......<br><br> end do<br> end do<br> end do<br><br> deallocate(Ax,Ay,Az)<br> <br> ! allocate real-space velocity variables<br> allocate(uu(0:n1-1,0:n2-1,0:n3-1))<br> allocate(vv(0:n1-1,0:n2-1,0:n3-1))<br> allocate(ww(0:n1-1,0:n2-1,0:n3-1))<br><br><br><br><br><br><br><br> ! Using FFTW, <br> ! inverse FFT U,V,W -> uu,vv,ww<br> call dfftw_plan_dft_c2r_3d( &<br> plan,n1,n2,n3,U,uu,FFTW_ESTIMATE)<br> call dfftw_execute_dft_c2r(plan,U,uu)<br> call dfftw_plan_dft_c2r_3d( &<br> plan,n1,n2,n3,V,vv,FFTW_ESTIMATE)<br> call dfftw_execute_dft_c2r(plan,V,vv)<br> call dfftw_plan_dft_c2r_3d( &<br> plan,n1,n2,n3,W,ww,FFTW_ESTIMATE)<br> call dfftw_execute_dft_c2r(plan,W,WW)<br><br> ! rescale<br> uu = uu*scale<br> vv = vv*scale<br> ww = ww*scale<br><br> deallocate(U,V,W)<br><br> ! write out using Fortran direct access <br> inquire(iolength=iol) uu(1,1,1)<br> open(10, FILE='data_serial', &<br> FORM='unformatted', &<br> ACCESS='DIRECT', RECL=iol)<br> n=1<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br> write(10,rec=n) uu(i,j,k)<br> n=n+1<br> end do<br> end do<br> end do<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br> write(10,rec=n) vv(i,j,k)<br> n=n+1<br> end do<br> end do<br> end do<br> do k=0,n3-1<br> do j=0,n2-1<br> do i=0,n1-1<br> write(10,rec=n) ww(i,j,k)<br> n=n+1<br> end do<br> end do<br> end do<br> close(10)<br><br> deallocate(uu,vv,ww)<br><br><br><br><br><br><br>end program vort<br></pre>
33
24
</td>
34
- <td
35
- style="vertical-align: top; background-color: rgb(204, 204, 255);">
36
- <pre class="pre_special">program vort<br><br> ! use the 2DECOMP&FFT instead of FFTW<br> use decomp_2d<br> use decomp_2d_fft<br> use decomp_2d_io<br> use MPI<br><br> implicit none<br><br> ! global size<br> integer, parameter :: n1=8, n2=8, n3=8 <br> ! 2D processor grid<br> integer, parameter :: p_row=2, p_col=2 <br><br> integer (kind=MPI_OFFSET_KIND) :: filesize, disp<br> integer, dimension(3) :: fft_start, fft_end, fft_size<br><br> real(mytype), allocatable, dimension(:,:,:) :: &<br> ox,oy,oz, uu,vv,ww<br> complex(mytype), allocatable, dimension(:,:,:) :: &<br> Ax,Ay,Az,U,V,W<br><br> ! initialisation of 2DECOMP&FFT<br> call MPI_INIT(ierror)<br> call decomp_2d_init(n1,n2,n3,p_row,p_col)<br><br> ! allocate space for real-space vorticity field<br> allocate(ox(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(oy(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(oz(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br><br> ! initialise vorticity in real-space<br> do k=xstart(3)-1,xend(3)-1<br> do j=xstart(2)-1,xend(2)-1<br> do i=xstart(1)-1,xend(1)-1<br> ! subroutine identical in serial <br> ! and parallel version<br> call vortexOmega( &<br> ox(i,j,k),oy(i,j,k),oz(i,j,k), &<br> xgs+dx*(i+0.5-1), ygs+dy*(j+0.5-1), &<br> zgs+dz*(k+0.5-1))<br> end do<br> end do<br> end do<br><br> ! initialise FFT library<br> call decomp_2d_fft_init<br> call decomp_2d_fft_get_size( &<br> fft_start,fft_end,fft_size)<br> <br> ! allocate space for wave-space variables<br> allocate(Ax(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(Ay(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(Az(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(U(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(V(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(W(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br><br> ! Using 2DECOMP&FFT, <br> ! forward FFT - ox,oy,oz -> Ax,Ay,Az<br> call decomp_2d_fft_3d(ox, Ax)<br><br><br> call decomp_2d_fft_3d(oy, Ay)<br><br><br> call decomp_2d_fft_3d(oz, Az)<br><br><br><br> deallocate(ox,oy,oz)<br><br> ! Fourier domain stuff<br> do k=fft_start(3)-1,fft_end(3)-1<br> do j=fft_start(2)-1,fft_end(2)-1<br> do i=fft_start(1)-1,fft_end(1)-1<br><br> ! wave-space calculation omitted<br> U(i,j,k) = ......<br> V(i,j,k) = ......<br> W(i,j,k) = ......<br><br> end do<br> end do<br> end do<br><br> deallocate(Ax,Ay,Az)<br> <br> ! allocate real-space velocity variables<br> allocate(uu(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(vv(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(ww(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br><br> ! Using 2DECOMP&FFT, <br> !inverse FFT U,V,W -> uu,vv,ww<br> call decomp_2d_fft_3d(U,uu)<br><br><br> call decomp_2d_fft_3d(V,vv)<br><br><br> call decomp_2d_fft_3d(W,ww)<br><br><br><br> ! rescale<br> uu = uu*scale<br> vv = vv*scale<br> ww = ww*scale<br><br> deallocate(U,V,W)<br><br> ! write out using 2DECOMP&FFT MPI-IO routines<br><br> call MPI_FILE_OPEN(MPI_COMM_WORLD, &<br> 'data_parallel', &<br> MPI_MODE_CREATE+MPI_MODE_WRONLY, &<br> MPI_INFO_NULL, &<br> fh, ierror)<br> <br> ! guarantee overwriting<br> filesize = 0_MPI_OFFSET_KIND<br> call MPI_FILE_SET_SIZE(fh,filesize,ierror) <br> disp = 0_MPI_OFFSET_KIND<br> call decomp_2d_write_var(fh,disp,n1,n2,n3,1,uu)<br> call decomp_2d_write_var(fh,disp,n1,n2,n3,1,vv)<br> call decomp_2d_write_var(fh,disp,n1,n2,n3,1,ww)<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br> call MPI_FILE_CLOSE(fh,ierror)<br><br> deallocate(uu,vv,ww)<br><br> ! clean up <br> call decomp_2d_fft_finalize<br> call decomp_2d_finalize<br> call MPI_FINALIZE(ierror)<br><br>end program vort<br></pre>
25
+ <td>
26
+ <pre>program vort<br><br> ! use the 2DECOMP&FFT instead of FFTW<br> use decomp_2d<br> use decomp_2d_fft<br> use decomp_2d_io<br> use MPI<br><br> implicit none<br><br> ! global size<br> integer, parameter :: n1=8, n2=8, n3=8 <br> ! 2D processor grid<br> integer, parameter :: p_row=2, p_col=2 <br><br> integer (kind=MPI_OFFSET_KIND) :: filesize, disp<br> integer, dimension(3) :: fft_start, fft_end, fft_size<br><br> real(mytype), allocatable, dimension(:,:,:) :: &<br> ox,oy,oz, uu,vv,ww<br> complex(mytype), allocatable, dimension(:,:,:) :: &<br> Ax,Ay,Az,U,V,W<br><br> ! initialisation of 2DECOMP&FFT<br> call MPI_INIT(ierror)<br> call decomp_2d_init(n1,n2,n3,p_row,p_col)<br><br> ! allocate space for real-space vorticity field<br> allocate(ox(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(oy(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(oz(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br><br> ! initialise vorticity in real-space<br> do k=xstart(3)-1,xend(3)-1<br> do j=xstart(2)-1,xend(2)-1<br> do i=xstart(1)-1,xend(1)-1<br> ! subroutine identical in serial <br> ! and parallel version<br> call vortexOmega( &<br> ox(i,j,k),oy(i,j,k),oz(i,j,k), &<br> xgs+dx*(i+0.5-1), ygs+dy*(j+0.5-1), &<br> zgs+dz*(k+0.5-1))<br> end do<br> end do<br> end do<br><br> ! initialise FFT library<br> call decomp_2d_fft_init<br> call decomp_2d_fft_get_size( &<br> fft_start,fft_end,fft_size)<br> <br> ! allocate space for wave-space variables<br> allocate(Ax(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(Ay(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(Az(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(U(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(V(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br> allocate(W(fft_start(1)-1:fft_end(1)-1, &<br> fft_start(2)-1:fft_end(2)-1, &<br> fft_start(3)-1:fft_end(3)-1) )<br><br> ! Using 2DECOMP&FFT, <br> ! forward FFT - ox,oy,oz -> Ax,Ay,Az<br> call decomp_2d_fft_3d(ox, Ax)<br><br><br> call decomp_2d_fft_3d(oy, Ay)<br><br><br> call decomp_2d_fft_3d(oz, Az)<br><br><br><br> deallocate(ox,oy,oz)<br><br> ! Fourier domain stuff<br> do k=fft_start(3)-1,fft_end(3)-1<br> do j=fft_start(2)-1,fft_end(2)-1<br> do i=fft_start(1)-1,fft_end(1)-1<br><br> ! wave-space calculation omitted<br> U(i,j,k) = ......<br> V(i,j,k) = ......<br> W(i,j,k) = ......<br><br> end do<br> end do<br> end do<br><br> deallocate(Ax,Ay,Az)<br> <br> ! allocate real-space velocity variables<br> allocate(uu(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(vv(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br> allocate(ww(xstart(1)-1:xend(1)-1, &<br> xstart(2)-1:xend(2)-1, &<br> xstart(3)-1:xend(3)-1))<br><br> ! Using 2DECOMP&FFT, <br> ! inverse FFT U,V,W -> uu,vv,ww<br> call decomp_2d_fft_3d(U,uu)<br><br><br> call decomp_2d_fft_3d(V,vv)<br><br><br> call decomp_2d_fft_3d(W,ww)<br><br><br><br> ! rescale<br> uu = uu*scale<br> vv = vv*scale<br> ww = ww*scale<br><br> deallocate(U,V,W)<br><br> ! write out using 2DECOMP&FFT MPI-IO routines<br><br> call MPI_FILE_OPEN(MPI_COMM_WORLD, &<br> 'data_parallel', &<br> MPI_MODE_CREATE+MPI_MODE_WRONLY, &<br> MPI_INFO_NULL, &<br> fh, ierror)<br> <br> ! guarantee overwriting<br> filesize = 0_MPI_OFFSET_KIND<br> call MPI_FILE_SET_SIZE(fh,filesize,ierror) <br> disp = 0_MPI_OFFSET_KIND<br> call decomp_2d_write_var(fh,disp,n1,n2,n3,1,uu)<br> call decomp_2d_write_var(fh,disp,n1,n2,n3,1,vv)<br> call decomp_2d_write_var(fh,disp,n1,n2,n3,1,ww)<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br> call MPI_FILE_CLOSE(fh,ierror)<br><br> deallocate(uu,vv,ww)<br><br> ! clean up <br> call decomp_2d_fft_finalize<br> call decomp_2d_finalize<br> call MPI_FINALIZE(ierror)<br><br>end program vort<br></pre>
37
27
</td>
38
28
</tr>
39
29
</tbody>
0 commit comments