'Performance of Fortran versus MPI files writing/reading
I am confused about the performance of Fortran writing and reading performance (speed) versus MPI one for small and big files.
I wrote the following simple dummy program to test this (just writing dummy values to files):
PROGRAM test
!
IMPLICIT NONE
!
#if defined (__MPI)
!
! Include file for MPI
!
#if defined (__MPI_MODULE)
USE mpi
#else
INCLUDE 'mpif.h'
#endif
#else
! dummy world and null communicator
INTEGER, PARAMETER :: MPI_COMM_WORLD = 0
INTEGER, PARAMETER :: MPI_COMM_NULL = -1
INTEGER, PARAMETER :: MPI_COMM_SELF = -2
#endif
INTEGER (kind=MPI_OFFSET_KIND) :: lsize, pos, pos2
INTEGER, PARAMETER :: DP = 8
REAL(kind=DP), ALLOCATABLE, DIMENSION(:) :: trans_prob, array_cpu
INTEGER :: ierr, i, error, my_pool_id, world_comm
INTEGER (kind=DP) :: fil
REAL :: start, finish
INTEGER :: iunepmat, npool, arr_size, loop, pos3, j
real(dp):: dummy
integer*8 :: unf_recl
integer :: ios, direct_io_factor, recl
iunepmat = 10000
arr_size = 102400
loop = 500
! Initialize MPI
CALL MPI_INIT(ierr)
call MPI_COMM_DUP(MPI_COMM_WORLD, world_comm, ierr)
call MPI_COMM_RANK(world_comm,my_pool_id,error)
ALLOCATE(trans_prob(arr_size))
trans_prob(:) = 1.5d0
!Write using Fortran
CALL MPI_BARRIER(world_comm,error)
!
CALL cpu_time(start)
!
DO i=1, loop
! This writes also info on the record length using a real with 4 bytes.
OPEN(unit=10+my_pool_id, form='unformatted', position='append', action='write')
WRITE(10+my_pool_id ) trans_prob(:)
CLOSE(unit=10+my_pool_id)
ENDDO
CALL MPI_COMM_SIZE(world_comm, npool, error)
! Master collect and write
IF (my_pool_id==0) THEN
INQUIRE (IOLENGTH=direct_io_factor) dummy
unf_recl = direct_io_factor * int(arr_size * loop, kind=kind(unf_recl))
ALLOCATE (array_cpu( arr_size * loop ))
array_cpu(:) = 0.0d0
OPEN(unit=100,file='merged.dat',form='unformatted', status='new', position='append', action='write')
DO i=0, npool - 1
OPEN(unit=10+i,form='unformatted', status ='old', access='direct', recl = unf_recl )
READ(unit=10+i, rec=1) array_cpu(:)
CLOSE(unit=10+i)
WRITE(unit=100) array_cpu(:)
ENDDO
CLOSE(unit=100)
DEALLOCATE (array_cpu)
ENDIF
call cpu_time(finish)
!Print time
CALL MPI_BARRIER(world_comm,error)
IF (my_pool_id==0) print*, ' Fortran time', finish-start
!Write using MPI
CALL MPI_BARRIER(world_comm,error)
!
CALL cpu_time(start)
!
lsize = INT( arr_size , kind = MPI_OFFSET_KIND)
pos = 0
pos2 = 0
CALL MPI_FILE_OPEN(world_comm, 'MPI.dat',MPI_MODE_WRONLY + MPI_MODE_CREATE,MPI_INFO_NULL,iunepmat,ierr)
DO i=1, loop
pos = pos2 + INT( arr_size * (my_pool_id), kind = MPI_OFFSET_KIND ) * 8_MPI_OFFSET_KIND
CALL MPI_FILE_SEEK(iunepmat, pos, MPI_SEEK_SET, ierr)
CALL MPI_FILE_WRITE(iunepmat, trans_prob, lsize, MPI_DOUBLE_PRECISION,MPI_STATUS_IGNORE,ierr)
pos2 = pos2 + INT( arr_size * (npool -1), kind = MPI_OFFSET_KIND ) * 8_MPI_OFFSET_KIND
ENDDO
!
CALL MPI_FILE_CLOSE(iunepmat,ierr)
CALL cpu_time(finish)
CALL MPI_BARRIER(world_comm,error)
IF (my_pool_id==0) print*, ' MPI time', finish-start
DEALLOCATE (trans_prob)
END PROGRAM
The compilation is made with:
mpif90 -O3 -x f95-cpp-input -D__FFTW -D__MPI -D__SCALAPACK test_mpi2.f90 -o a.x
and then run in parallel with 4 cores:
mpirun -np 4 ./a.x
I get the following results:
Loop size 1
array size 10,240,000
File size: 313 Mb
Fortran time 0.237030014 sec
MPI time 0.164155006 sec
Loop size 10
array size 1,024,000
File size: 313 Mb
Fortran time 0.242821991 sec
MPI time 0.172048002 sec
Loop size 100
array size 102,400
File size: 313 Mb
Fortran time 0.235879987 sec
MPI time 9.78289992E-02 sec
Loop size 50
array size 1,024,000
File size: 1.6G
Fortran time 1.60272002 sec
MPI time 3.40623116 sec
Loop size 500
array size 102,400
File size: 1.6G
Fortran time 1.44547606 sec
MPI time 3.38340592 sec
As you can see the performances of MPI degrade significantly for larger files. Is it possible to improve MPI performance for large files ?
Is this behavior expected?
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|