Pi 计算给出了不正确的 OpenMP 结果

-1 fortran openmp gfortran

我刚接触 OpenMP,想知道这段代码有什么问题。它以串行方式工作。我正在使用 Ubuntu Linux 和 gfortran。

 !
    program test_rand
    
    use omp_lib
    implicit none

    integer, parameter :: num_threads =36
    integer*8,parameter :: nc = 1000000000
    integer*8 ncirc,ncircs(0:35)
    integer i,thread_num,istart,iend,ppt
    real*8 x,y,dist,pi
    integer,parameter :: seed = 864

        call srand(seed)
        do i=1,4
            ncircs(i)=0
            end do
            ncirc=0

            ppt=(nc+num_threads-1)/num_threads

            istart=1
            iend=nc
            thread_num=1
    !$ call omp_set_num_threads(num_threads)
    !$omp parallel default(none) private(istart,iend,thread_num,i, &
    !$omp dist,x,y,ncircs) shared(ppt,ncirc)
        !$ thread_num = omp_get_thread_num()
        !$ istart=thread_num*ppt+1
        !$ iend = min(nc,thread_num*ppt+ppt)
            print*,thread_num

            do i=istart,iend
            x=rand()
            y=rand()
            dist=sqrt((x-0.5)**2+(y-0.5)**2)
            if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
            end do

        !$omp critical
            !$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
            ncirc=ncirc+ncircs(thread_num)
        !$omp end critical
    !$omp end parallel
            print*,ncircs
            pi=4*dble(ncirc)/dble(nc)
            print *,pi,ncirc
    end program test_rand
Run Code Online (Sandbox Code Playgroud)

输出:

lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi pi.f95
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi
           1
                    2            785398441                    0                    0                    0             16777216                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0            268435456                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0                    0
   3.1415937640000000                 785398441

real    0m25.211s
user    0m25.152s
sys 0m0.000s

lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi_omp pi.f95 -fopenmp
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi_omp
           3
          21
          25
          30
          35
          18
           4
          17
          11
          34
          12
          28
           7
          33
          29
          24
          31
          27
           8
          23
          10
          19
           0
           6
          26
          32
          16
          14
           1
          13
           5
          20
          15
           2
           9
          22
 thread_num=           6 istart=   166666669  iend=   194444446                    0
 thread_num=          11 istart=   305555559  iend=   333333336                    0
 thread_num=           5 istart=   138888891  iend=   166666668                    0
 thread_num=          27 istart=   750000007  iend=   777777784                    0
 thread_num=          19 istart=   527777783  iend=   555555560                    0
 thread_num=          25 istart=   694444451  iend=   722222228                    0
 thread_num=           2 istart=    55555557  iend=    83333334                    0
 thread_num=           1 istart=    27777779  iend=    55555556                    0
 thread_num=          21 istart=   583333339  iend=   611111116                    0
 thread_num=          18 istart=   500000005  iend=   527777782                    0
 thread_num=          20 istart=   555555561  iend=   583333338                    0
 thread_num=          24 istart=   666666673  iend=   694444450                    0
 thread_num=          29 istart=   805555563  iend=   833333340                    0
 thread_num=          28 istart=   777777785  iend=   805555562                    0
 thread_num=          26 istart=   722222229  iend=   750000006                    0
 thread_num=          12 istart=   333333337  iend=   361111114                    0
 thread_num=          22 istart=   611111117  iend=   638888894                    0
 thread_num=          23 istart=   638888895  iend=   666666672                    0
 thread_num=          10 istart=   277777781  iend=   305555558                    0
 thread_num=           3 istart=    83333335  iend=   111111112                    0
 thread_num=          34 istart=   944444453  iend=   972222230                    0
 thread_num=          17 istart=   472222227  iend=   500000004                    0
 thread_num=          15 istart=   416666671  iend=   444444448                    0
 thread_num=          14 istart=   388888893  iend=   416666670                    0
 thread_num=           8 istart=   222222225  iend=   250000002                    0
 thread_num=           7 istart=   194444447  iend=   222222224                    0
 thread_num=          30 istart=   833333341  iend=   861111118                    0
 thread_num=          32 istart=   888888897  iend=   916666674                    0
 thread_num=           9 istart=   250000003  iend=   277777780                    0
 thread_num=           4 istart=   111111113  iend=   138888890                    0
 thread_num=          16 istart=   444444449  iend=   472222226                    0
 thread_num=           0 istart=           1  iend=    27777778       93851473435152
 thread_num=          13 istart=   361111115  iend=   388888892                    0
 thread_num=          35 istart=   972222231  iend=  1000000000  4600331172021844405
 thread_num=          33 istart=   916666675  iend=   944444452                    0
 thread_num=          31 istart=   861111119  iend=   888888896                    0
      139938624438272                    0                    0                    0                    0      139942555560952                   10      140720670311236           3933000496                    0      139942559675136            466005475      139942562579304      140720670311528      139942559716466      140720670311360      140720670311376      139942562705897                    3      139942555561536                    1                    0                    1      139942562578432            361825296      139942555561536      139942562578432      139942562579304                    0      140720670311656      139942559737213      140720308486145           4294967295      139942562705897      139942557392112      139942562581024
   375409.03530958400            93852258827396

real    6m57.907s
user    8m35.083s
sys 219m5.670s
lakshmi@lakshmiVM:~/Documents/fortran$
Run Code Online (Sandbox Code Playgroud)

Ian*_*ush 8

恐怕你的程序有很多错误,无论是在错误还是风格方面。我们先看看如何使用编译器来解决一些问题,然后讨论这个编译器帮不了你的问题,然后再谈谈风格。最后,我将展示我将如何解决这个问题。

所以首先我会建议在开发时打开所有编译器警告和错误检测标志。这些有很多。让我们看看是什么-Wall -Wextra告诉您有关您的代码的信息:

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 --version
GNU Fortran (GCC) 11.1.0
Copyright © 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fopenmp -O -g pi_orig.f90
pi_orig.f90:20:7:

   20 |   ppt=(nc+num_threads-1)/num_threads
      |       1
Warning: Integer division truncated to constant ‘27777778’ at (1) [-Winteger-division]
pi_orig.f90:30:12:

   30 |   !$ iend = min(nc,thread_num*ppt+ppt)
      |            1
Warning: Possible change of value in conversion from INTEGER(8) to INTEGER(4) at (1) [-Wconversion]
Run Code Online (Sandbox Code Playgroud)

我并不担心第一个警告,但第二个警告告诉我这iend可能不是一个足够长的整数来存储您需要的迭代数 - 并且暗示两者都不是i,还有许多其他变量。因此,如果您nc在某个点之后增加,您将不会获得更高的准确性。

但是,您的程序不是用标准 Fortran 编写的。从长远来看,遵循标准将使您的生活更轻松。因此,让我们添加-std=2008使用编译器来检测您没有使用标准 Fortran 的地方:

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fopenmp -O -g pi_orig.f90
pi_orig.f90:8:12:

    8 |   integer*8,parameter :: nc = 1000000000
      |            1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:9:12:

    9 |   integer*8 ncirc,ncircs(0:35)
      |            1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:11:9:

   11 |   real*8 x,y,dist,pi
      |         1
Error: GNU Extension: Nonstandard type declaration REAL*8 at (1)
pi_orig.f90:37:23:

   37 |      if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
      |                       1
Error: Syntax error in IF-clause after (1)
pi_orig.f90:45:15:

   45 |   print*,ncircs
      |               1
Error: Function ‘ncircs’ requires an argument list at (1)
pi_orig.f90:27:12:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |            1
Error: Symbol ‘dist’ at (1) has no IMPLICIT type
pi_orig.f90:20:9:

   20 |   ppt=(nc+num_threads-1)/num_threads
      |         1
Error: Symbol ‘nc’ at (1) has no IMPLICIT type
pi_orig.f90:18:7:

   18 |   ncirc=0
      |       1
Error: Symbol ‘ncirc’ at (1) has no IMPLICIT type
pi_orig.f90:46:4:

   46 |   pi=4*dble(ncirc)/dble(nc)
      |    1
Error: Symbol ‘pi’ at (1) has no IMPLICIT type; did you mean ‘i’?
pi_orig.f90:27:14:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |              1
Error: Symbol ‘x’ at (1) has no IMPLICIT type
pi_orig.f90:27:16:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |                1
Error: Symbol ‘y’ at (1) has no IMPLICIT type
pi_orig.f90:14:12:

   14 |   call srand(seed)
      |            1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:12: Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:18:

   14 |   call srand(seed)
      |                  1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:16:5:

   16 |      ncircs(i)=0
      |     1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:34:11:

   34 |      x=rand()
      |           1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:11: Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:

   34 |      x=rand()
      |       1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL.  Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:

   34 |      x=rand()
      |       1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:35:7:

   35 |      y=rand()
      |       1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:41:70:

   41 |   !$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
      |                                                                      1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:42:20:

   42 |   ncirc=ncirc+ncircs(thread_num)
      |                    1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:27:17:

   27 |   !$omp dist,x,y,ncircs) shared(ppt,ncirc)
      |                 1
Error: Object ‘ncircs’ is not a variable at (1)
ijb@ijb-Latitude-5410:~/work/stack$ 
Run Code Online (Sandbox Code Playgroud)

哦亲爱的!实际上只有3个错误:

  1. Real*8 不是,也从来不是标准 Fortran 的一部分。gfortran 支持它,但其他编译器可能不支持。不要使用它!
  2. Integer*8 不是,也从来不是标准 Fortran 的一部分。gfortran 支持它,但其他编译器可能不支持。不要使用它!
  3. srand并且rand不是标准的 Fortran 内在函数。gfortran 支持它,但其他编译器可能不支持。不要使用它!

数字 3 可以通过使用标准内在函数Random_number(但见下文)来解决。数字 1 和 2 使用种类求解 - 请参阅Fortran 90 kind 参数。并且因为 1 和 2 影响变量声明,所有使用这些错误行声明的变量的行现在都被标记为错误本身,因此出现了级联错误。

修复这些可以解决一些问题,但恐怕还有更多错误。特别是在数组的边界之外访问数组。您可以-fcheck=all在调试时使用我强烈推荐的自动检测这些,但请注意它会大大减慢您的程序速度,因此在运行生产运行时忽略它。如果我修复了上面提到的错误,然后在运行程序时使用 -fcheck=all ,我会得到:

ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fcheck=all -fopenmp -O -g pi2.f90 
pi2.f90:22:7:

   22 |   ppt=(nc+num_threads-1)/num_threads
      |       1
Warning: Integer division truncated to constant ‘2778’ at (1) [-Winteger-division]
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
                    1
                   28
                   14
                   26
                   32
                    9
                    5
                    8
                   10
                   35
                   21
                   29
                    0
                   22
                   17
                    4
                   19
 thread_num=                   14 istart=                38893  iend=                41670                    0
 thread_num=                   21 istart=                58339  iend=                61116                    0
 thread_num=                   28 istart=                77785  iend=                80562                    0
                   20
                   12
                    7
                   24
                   23
                   15
                   34
                   25
 thread_num=                   26 istart=                72229  iend=                75006                    0
 thread_num=                   23 istart=                63895  iend=                66672                    0
 thread_num=                   10 istart=                27781  iend=                30558                    0
 thread_num=                   15 istart=                41671  iend=                44448                    0
 thread_num=                    1 istart=                 2779  iend=                 5556                    0
 thread_num=                   25 istart=                69451  iend=                72228                    0
 thread_num=                    9 istart=                25003  iend=                27780                    0
                   16
                   33
                   30
 thread_num=                    0 istart=                    1  iend=                 2778                    0
                   27
 thread_num=                    7 istart=                19447  iend=                22224                    0
                   31
                    2
                    6
 thread_num=                   17 istart=                47227  iend=                50004                    0
                    3
                   13
                   11
 thread_num=                   27 istart=                75007  iend=                77784                    0
 thread_num=                   30 istart=                83341  iend=                86118                    0
 thread_num=                   32 istart=                88897  iend=                91674                    0
 thread_num=                    4 istart=                11113  iend=                13890                    0
 thread_num=                   19 istart=                52783  iend=                55560                    0
 thread_num=                    2 istart=                 5557  iend=                 8334                    0
                   18
 thread_num=                    6 istart=                16669  iend=                19446                    0
 thread_num=                    5 istart=                13891  iend=                16668                    0
 thread_num=                    3 istart=                 8335  iend=                11112                    0
 thread_num=                   24 istart=                66673  iend=                69450                    0
 thread_num=                    8 istart=                22225  iend=                25002                    0
 thread_num=                   34 istart=                94453  iend=                97230                    0
 thread_num=                   11 istart=                30559  iend=                33336                    0
 thread_num=                   29 istart=                80563  iend=                83340                    0
 thread_num=                   22 istart=                61117  iend=                63894                    0
 thread_num=                   16 istart=                44449  iend=                47226                    0
 thread_num=                   20 istart=                55561  iend=                58338                    0
 thread_num=                   12 istart=                33337  iend=                36114                    0
At line 45 of file pi2.f90
Fortran runtime error: Index '36' of dimension 1 of array 'ncircs' above upper bound of 35

Error termination. Backtrace:
#0  0x7f580fce1d01 in ???
#1  0x7f580fce2849 in ???
#2  0x7f580fce2ec6 in ???
#3  0x4015ab in MAIN__._omp_fn.0
    at /home/ijb/work/stack/pi2.f90:45
#4  0x7f580fb4a77d in ???
#5  0x7f580fab1608 in start_thread
    at /build/glibc-YbNSs7/glibc-2.31/nptl/pthread_create.c:477
#6  0x7f580f9d6292 in ???
#7  0xffffffffffffffff in ???
Run Code Online (Sandbox Code Playgroud)

所以你可以用编译器来发现很多问题。然而,还有更多它无法捕捉。最重要的是,您将数组范围ncircs设为私有,但不在并行区域内对其进行初始化 - 当您进入并行区域时,每个线程将生成自己的私有变量的新版本,默认情况下,并行区域外的任何初始化都将被遗忘。所以你正在使用一个未初始化的变量。即使没有,在代码中也要注意,您只初始化ncircs了 .

然而,除了上面提到的错误之外,这并不是您编写 openmp 代码的真正方式。OpenMP 提供了在循环中自动共享工作的方法,您真的不应该像在代码中所做的那样手动共享迭代。此外,它提供了精确执行此类计算的缩减,关键应该只在需要的地方使用,而不是用于这里显示的缩减擅长的专业案例。也不需要ncircs数组——每个线程只需要一个值,那么为什么要声明一个数组呢?根据经验,如果您声明的内存随着线程数量的增加而增加,那么您可能做错事。最后,我也不会将线程数刻录到代码中,我会使用环境变量来设置线程数,以便无需重新编译即可更改它。

所以这是我将如何解决您的问题,并显示它可以正确编译和运行:

ijb@ijb-Latitude-5410:~/work/stack$ cat pi_ijb.f90
Program pi_calc

  Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64

  Use omp_lib
  
  Implicit None

  Real( wp ), Parameter :: pi_exact = 4.0_wp * Atan( 1.0_wp )

  Integer( li ), Parameter :: n_sample = 1000000000_li
  Integer      , Parameter :: seed = 864

  Real( wp ), Dimension( 1:2 ) :: rand
  
  Real( wp ) :: dist
  Real( wp ) :: pi_approx
  
  Integer( li ) :: ncirc
  Integer( li ) :: i_sample
  Integer( li ) :: start, finish, rate
  
  Integer :: thread_num
  Integer :: n_seed
  Integer :: i

  ! Assume using the maximum number of threads made available
  Write( *, * ) 'Running on ', omp_get_max_threads(), ' threads'

  ncirc = 0

  Call system_clock( start, rate )
  ! Create threads
  !$omp parallel default( none ) private( thread_num, n_seed, i, rand, dist ) reduction( +:ncirc )

  ! Set up the random number generator. Assume it is thread safe
  thread_num = omp_get_thread_num()
  Call Random_seed( size = n_seed )
  Call Random_seed( put = [ ( i * seed * ( thread_num + 1 ), i = 1, n_seed ) ] )

  ! Workshare the loop
  !$omp do
  Sampling_loop: Do i_sample = 1, n_sample
     Call Random_number( rand )
     rand = rand - 0.5_wp
     dist = rand( 1 ) * rand( 1 ) + rand( 2 ) * rand( 2 )
     If( dist <= 0.5_wp * 0.5_wp ) Then
        ncirc = ncirc + 1
     End If
  End Do Sampling_loop
  !$omp end do

  ! Close down the parallel region
  !$omp end parallel
  Call system_clock( finish, rate )

  pi_approx = 4.0_wp * Real( ncirc, wp ) / Real( n_sample, wp )
  Write( *, * ) 'Using ', n_sample, ' sampling points: '
  Write( *, * ) 'Approx pi ', pi_approx
  Write( *, * ) 'Exact  pi ', pi_exact
  Write( *, * ) 'Error     ', Abs( pi_approx - pi_exact )
  Write( *, * ) 'This took ', Real( finish - start ) / Real( rate ), ' seconds'

End Program pi_calc
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fcheck=all -O -g -fopenmp pi_ijb.f90 
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            4  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1415474360000002     
 Exact  pi    3.1415926535897931     
 Error        4.5217589792923008E-005
 This took    5.21012735      seconds
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -O -g -fopenmp pi_ijb.f90 
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=1
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            1  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1415804920000001     
 Exact  pi    3.1415926535897931     
 Error        1.2161589793002747E-005
 This took    18.5533371      seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=2
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            2  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1416029320000001     
 Exact  pi    3.1415926535897931     
 Error        1.0278410206954192E-005
 This took    8.87815666      seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
 Running on            4  threads
 Using            1000000000  sampling points: 
 Approx pi    3.1415474360000002     
 Exact  pi    3.1415926535897931     
 Error        4.5217589792923008E-005
 This took    4.49851656      seconds
Run Code Online (Sandbox Code Playgroud)

最后请注意,我假设标准的随机数生成器是线程安全的,简单来说。然而,我的理解是这不是保证,如果你想认真地做这种事情,你应该考虑提供你自己的,