'Performance of Fortran versus MPI files writing/reading

I am confused about the performance of Fortran writing and reading performance (speed) versus MPI one for small and big files.

I wrote the following simple dummy program to test this (just writing dummy values to files):

PROGRAM test
! 
IMPLICIT NONE
! 
#if defined (__MPI)
        !
        !     Include file for MPI
        !
#if defined (__MPI_MODULE)
        USE mpi
#else
        INCLUDE 'mpif.h'
#endif
#else
        ! dummy world and null communicator
        INTEGER, PARAMETER :: MPI_COMM_WORLD =  0
        INTEGER, PARAMETER :: MPI_COMM_NULL  = -1
        INTEGER, PARAMETER :: MPI_COMM_SELF  = -2
#endif

INTEGER (kind=MPI_OFFSET_KIND) :: lsize, pos, pos2

INTEGER, PARAMETER :: DP = 8
REAL(kind=DP), ALLOCATABLE, DIMENSION(:) :: trans_prob, array_cpu
INTEGER ::  ierr, i, error, my_pool_id, world_comm
INTEGER (kind=DP) :: fil
REAL :: start, finish
INTEGER :: iunepmat, npool, arr_size, loop, pos3, j
  real(dp):: dummy
  integer*8 :: unf_recl
integer :: ios, direct_io_factor, recl

iunepmat = 10000
arr_size = 102400
loop     = 500

! Initialize MPI
CALL MPI_INIT(ierr)
call MPI_COMM_DUP(MPI_COMM_WORLD, world_comm, ierr)
call MPI_COMM_RANK(world_comm,my_pool_id,error)


ALLOCATE(trans_prob(arr_size))
trans_prob(:) = 1.5d0

!Write using Fortran
CALL MPI_BARRIER(world_comm,error)
!
CALL cpu_time(start)
!
DO i=1, loop 
  ! This writes also info on the record length using a real with 4 bytes. 
  OPEN(unit=10+my_pool_id, form='unformatted', position='append', action='write')
  WRITE(10+my_pool_id ) trans_prob(:)
  CLOSE(unit=10+my_pool_id)
ENDDO

CALL MPI_COMM_SIZE(world_comm, npool, error)

! Master collect and write
IF (my_pool_id==0) THEN
  INQUIRE (IOLENGTH=direct_io_factor) dummy
  unf_recl = direct_io_factor * int(arr_size * loop, kind=kind(unf_recl)) 
  ALLOCATE (array_cpu( arr_size * loop ))
  array_cpu(:) = 0.0d0
  OPEN(unit=100,file='merged.dat',form='unformatted', status='new', position='append', action='write')
  DO i=0, npool - 1
    OPEN(unit=10+i,form='unformatted', status ='old', access='direct', recl = unf_recl )
    READ(unit=10+i, rec=1) array_cpu(:)
    CLOSE(unit=10+i)
    WRITE(unit=100) array_cpu(:)
  ENDDO
  CLOSE(unit=100)
  DEALLOCATE (array_cpu)
ENDIF

call cpu_time(finish)

!Print time
CALL MPI_BARRIER(world_comm,error)
IF (my_pool_id==0) print*, ' Fortran time', finish-start

!Write using MPI
CALL MPI_BARRIER(world_comm,error)
!
CALL cpu_time(start)
! 
lsize = INT( arr_size , kind = MPI_OFFSET_KIND)
pos = 0
pos2 = 0
CALL MPI_FILE_OPEN(world_comm, 'MPI.dat',MPI_MODE_WRONLY + MPI_MODE_CREATE,MPI_INFO_NULL,iunepmat,ierr)
DO i=1, loop
  pos = pos2 + INT( arr_size * (my_pool_id),  kind = MPI_OFFSET_KIND ) * 8_MPI_OFFSET_KIND
  CALL MPI_FILE_SEEK(iunepmat, pos, MPI_SEEK_SET, ierr)
  CALL MPI_FILE_WRITE(iunepmat, trans_prob, lsize, MPI_DOUBLE_PRECISION,MPI_STATUS_IGNORE,ierr)
  pos2 = pos2 + INT( arr_size * (npool -1),  kind = MPI_OFFSET_KIND ) * 8_MPI_OFFSET_KIND
ENDDO
!
CALL MPI_FILE_CLOSE(iunepmat,ierr)
CALL cpu_time(finish)
 
CALL MPI_BARRIER(world_comm,error)
IF (my_pool_id==0) print*, ' MPI time', finish-start

DEALLOCATE (trans_prob)

END PROGRAM 

The compilation is made with:

mpif90  -O3  -x f95-cpp-input -D__FFTW -D__MPI -D__SCALAPACK  test_mpi2.f90 -o a.x

and then run in parallel with 4 cores:

mpirun -np 4 ./a.x

I get the following results:

Loop size 1

array size 10,240,000

File size: 313 Mb

Fortran time 0.237030014 sec

MPI time 0.164155006 sec


Loop size 10

array size 1,024,000

File size: 313 Mb

Fortran time 0.242821991 sec

MPI time 0.172048002 sec


Loop size 100

array size 102,400

File size: 313 Mb

Fortran time 0.235879987 sec

MPI time 9.78289992E-02 sec


Loop size 50

array size 1,024,000

File size: 1.6G

Fortran time 1.60272002 sec

MPI time 3.40623116 sec


Loop size 500

array size 102,400

File size: 1.6G

Fortran time 1.44547606 sec

MPI time 3.38340592 sec


As you can see the performances of MPI degrade significantly for larger files. Is it possible to improve MPI performance for large files ?

Is this behavior expected?



Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source