我刚接触 OpenMP,想知道这段代码有什么问题。它以串行方式工作。我正在使用 Ubuntu Linux 和 gfortran。
!
program test_rand
use omp_lib
implicit none
integer, parameter :: num_threads =36
integer*8,parameter :: nc = 1000000000
integer*8 ncirc,ncircs(0:35)
integer i,thread_num,istart,iend,ppt
real*8 x,y,dist,pi
integer,parameter :: seed = 864
call srand(seed)
do i=1,4
ncircs(i)=0
end do
ncirc=0
ppt=(nc+num_threads-1)/num_threads
istart=1
iend=nc
thread_num=1
!$ call omp_set_num_threads(num_threads)
!$omp parallel default(none) private(istart,iend,thread_num,i, &
!$omp dist,x,y,ncircs) shared(ppt,ncirc)
!$ thread_num = omp_get_thread_num()
!$ istart=thread_num*ppt+1
!$ iend = min(nc,thread_num*ppt+ppt)
print*,thread_num
do i=istart,iend
x=rand()
y=rand()
dist=sqrt((x-0.5)**2+(y-0.5)**2)
if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
end do
!$omp critical
!$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
ncirc=ncirc+ncircs(thread_num)
!$omp end critical
!$omp end parallel
print*,ncircs
pi=4*dble(ncirc)/dble(nc)
print *,pi,ncirc
end program test_rand
Run Code Online (Sandbox Code Playgroud)
输出:
lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi pi.f95
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi
1
2 785398441 0 0 0 16777216 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 268435456 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3.1415937640000000 785398441
real 0m25.211s
user 0m25.152s
sys 0m0.000s
lakshmi@lakshmiVM:~/Documents/fortran$ gfortran -o pi_omp pi.f95 -fopenmp
lakshmi@lakshmiVM:~/Documents/fortran$ time ./pi_omp
3
21
25
30
35
18
4
17
11
34
12
28
7
33
29
24
31
27
8
23
10
19
0
6
26
32
16
14
1
13
5
20
15
2
9
22
thread_num= 6 istart= 166666669 iend= 194444446 0
thread_num= 11 istart= 305555559 iend= 333333336 0
thread_num= 5 istart= 138888891 iend= 166666668 0
thread_num= 27 istart= 750000007 iend= 777777784 0
thread_num= 19 istart= 527777783 iend= 555555560 0
thread_num= 25 istart= 694444451 iend= 722222228 0
thread_num= 2 istart= 55555557 iend= 83333334 0
thread_num= 1 istart= 27777779 iend= 55555556 0
thread_num= 21 istart= 583333339 iend= 611111116 0
thread_num= 18 istart= 500000005 iend= 527777782 0
thread_num= 20 istart= 555555561 iend= 583333338 0
thread_num= 24 istart= 666666673 iend= 694444450 0
thread_num= 29 istart= 805555563 iend= 833333340 0
thread_num= 28 istart= 777777785 iend= 805555562 0
thread_num= 26 istart= 722222229 iend= 750000006 0
thread_num= 12 istart= 333333337 iend= 361111114 0
thread_num= 22 istart= 611111117 iend= 638888894 0
thread_num= 23 istart= 638888895 iend= 666666672 0
thread_num= 10 istart= 277777781 iend= 305555558 0
thread_num= 3 istart= 83333335 iend= 111111112 0
thread_num= 34 istart= 944444453 iend= 972222230 0
thread_num= 17 istart= 472222227 iend= 500000004 0
thread_num= 15 istart= 416666671 iend= 444444448 0
thread_num= 14 istart= 388888893 iend= 416666670 0
thread_num= 8 istart= 222222225 iend= 250000002 0
thread_num= 7 istart= 194444447 iend= 222222224 0
thread_num= 30 istart= 833333341 iend= 861111118 0
thread_num= 32 istart= 888888897 iend= 916666674 0
thread_num= 9 istart= 250000003 iend= 277777780 0
thread_num= 4 istart= 111111113 iend= 138888890 0
thread_num= 16 istart= 444444449 iend= 472222226 0
thread_num= 0 istart= 1 iend= 27777778 93851473435152
thread_num= 13 istart= 361111115 iend= 388888892 0
thread_num= 35 istart= 972222231 iend= 1000000000 4600331172021844405
thread_num= 33 istart= 916666675 iend= 944444452 0
thread_num= 31 istart= 861111119 iend= 888888896 0
139938624438272 0 0 0 0 139942555560952 10 140720670311236 3933000496 0 139942559675136 466005475 139942562579304 140720670311528 139942559716466 140720670311360 140720670311376 139942562705897 3 139942555561536 1 0 1 139942562578432 361825296 139942555561536 139942562578432 139942562579304 0 140720670311656 139942559737213 140720308486145 4294967295 139942562705897 139942557392112 139942562581024
375409.03530958400 93852258827396
real 6m57.907s
user 8m35.083s
sys 219m5.670s
lakshmi@lakshmiVM:~/Documents/fortran$
Run Code Online (Sandbox Code Playgroud)
恐怕你的程序有很多错误,无论是在错误还是风格方面。我们先看看如何使用编译器来解决一些问题,然后讨论这个编译器帮不了你的问题,然后再谈谈风格。最后,我将展示我将如何解决这个问题。
所以首先我会建议在开发时打开所有编译器警告和错误检测标志。这些有很多。让我们看看是什么-Wall -Wextra告诉您有关您的代码的信息:
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 --version
GNU Fortran (GCC) 11.1.0
Copyright © 2021 Free Software Foundation, Inc.
This is free software; see the source for copying conditions. There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fopenmp -O -g pi_orig.f90
pi_orig.f90:20:7:
20 | ppt=(nc+num_threads-1)/num_threads
| 1
Warning: Integer division truncated to constant ‘27777778’ at (1) [-Winteger-division]
pi_orig.f90:30:12:
30 | !$ iend = min(nc,thread_num*ppt+ppt)
| 1
Warning: Possible change of value in conversion from INTEGER(8) to INTEGER(4) at (1) [-Wconversion]
Run Code Online (Sandbox Code Playgroud)
我并不担心第一个警告,但第二个警告告诉我这iend可能不是一个足够长的整数来存储您需要的迭代数 - 并且暗示两者都不是i,还有许多其他变量。因此,如果您nc在某个点之后增加,您将不会获得更高的准确性。
但是,您的程序不是用标准 Fortran 编写的。从长远来看,遵循标准将使您的生活更轻松。因此,让我们添加-std=2008使用编译器来检测您没有使用标准 Fortran 的地方:
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fopenmp -O -g pi_orig.f90
pi_orig.f90:8:12:
8 | integer*8,parameter :: nc = 1000000000
| 1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:9:12:
9 | integer*8 ncirc,ncircs(0:35)
| 1
Error: GNU Extension: Nonstandard type declaration INTEGER*8 at (1)
pi_orig.f90:11:9:
11 | real*8 x,y,dist,pi
| 1
Error: GNU Extension: Nonstandard type declaration REAL*8 at (1)
pi_orig.f90:37:23:
37 | if (dist.le.0.5) ncircs(thread_num)=ncircs(thread_num)+1
| 1
Error: Syntax error in IF-clause after (1)
pi_orig.f90:45:15:
45 | print*,ncircs
| 1
Error: Function ‘ncircs’ requires an argument list at (1)
pi_orig.f90:27:12:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Symbol ‘dist’ at (1) has no IMPLICIT type
pi_orig.f90:20:9:
20 | ppt=(nc+num_threads-1)/num_threads
| 1
Error: Symbol ‘nc’ at (1) has no IMPLICIT type
pi_orig.f90:18:7:
18 | ncirc=0
| 1
Error: Symbol ‘ncirc’ at (1) has no IMPLICIT type
pi_orig.f90:46:4:
46 | pi=4*dble(ncirc)/dble(nc)
| 1
Error: Symbol ‘pi’ at (1) has no IMPLICIT type; did you mean ‘i’?
pi_orig.f90:27:14:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Symbol ‘x’ at (1) has no IMPLICIT type
pi_orig.f90:27:16:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Symbol ‘y’ at (1) has no IMPLICIT type
pi_orig.f90:14:12:
14 | call srand(seed)
| 1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:12: Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:14:18:
14 | call srand(seed)
| 1
Warning: The intrinsic ‘srand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘srand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:16:5:
16 | ncircs(i)=0
| 1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:34:11:
34 | x=rand()
| 1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:11: Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:
34 | x=rand()
| 1
Warning: The intrinsic ‘rand’ at (1) is not included in the selected standard but a GNU Fortran extension and ‘rand’ will be treated as if declared EXTERNAL. Use an appropriate ‘-std=’* option or define ‘-fall-intrinsics’ to allow this intrinsic. [-Wintrinsics-std]
pi_orig.f90:34:7:
34 | x=rand()
| 1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:35:7:
35 | y=rand()
| 1
Error: Function ‘rand’ at (1) has no IMPLICIT type
pi_orig.f90:41:70:
41 | !$ print*, "thread_num=",thread_num, "istart=",istart," iend=",iend,ncircs(thread_num+1)
| 1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:42:20:
42 | ncirc=ncirc+ncircs(thread_num)
| 1
Error: Function ‘ncircs’ at (1) has no IMPLICIT type
pi_orig.f90:27:17:
27 | !$omp dist,x,y,ncircs) shared(ppt,ncirc)
| 1
Error: Object ‘ncircs’ is not a variable at (1)
ijb@ijb-Latitude-5410:~/work/stack$
Run Code Online (Sandbox Code Playgroud)
哦亲爱的!实际上只有3个错误:
srand并且rand不是标准的 Fortran 内在函数。gfortran 支持它,但其他编译器可能不支持。不要使用它!数字 3 可以通过使用标准内在函数Random_number(但见下文)来解决。数字 1 和 2 使用种类求解 - 请参阅Fortran 90 kind 参数。并且因为 1 和 2 影响变量声明,所有使用这些错误行声明的变量的行现在都被标记为错误本身,因此出现了级联错误。
修复这些可以解决一些问题,但恐怕还有更多错误。特别是在数组的边界之外访问数组。您可以-fcheck=all在调试时使用我强烈推荐的自动检测这些,但请注意它会大大减慢您的程序速度,因此在运行生产运行时忽略它。如果我修复了上面提到的错误,然后在运行程序时使用 -fcheck=all ,我会得到:
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -std=f2008 -fcheck=all -fopenmp -O -g pi2.f90
pi2.f90:22:7:
22 | ppt=(nc+num_threads-1)/num_threads
| 1
Warning: Integer division truncated to constant ‘2778’ at (1) [-Winteger-division]
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
1
28
14
26
32
9
5
8
10
35
21
29
0
22
17
4
19
thread_num= 14 istart= 38893 iend= 41670 0
thread_num= 21 istart= 58339 iend= 61116 0
thread_num= 28 istart= 77785 iend= 80562 0
20
12
7
24
23
15
34
25
thread_num= 26 istart= 72229 iend= 75006 0
thread_num= 23 istart= 63895 iend= 66672 0
thread_num= 10 istart= 27781 iend= 30558 0
thread_num= 15 istart= 41671 iend= 44448 0
thread_num= 1 istart= 2779 iend= 5556 0
thread_num= 25 istart= 69451 iend= 72228 0
thread_num= 9 istart= 25003 iend= 27780 0
16
33
30
thread_num= 0 istart= 1 iend= 2778 0
27
thread_num= 7 istart= 19447 iend= 22224 0
31
2
6
thread_num= 17 istart= 47227 iend= 50004 0
3
13
11
thread_num= 27 istart= 75007 iend= 77784 0
thread_num= 30 istart= 83341 iend= 86118 0
thread_num= 32 istart= 88897 iend= 91674 0
thread_num= 4 istart= 11113 iend= 13890 0
thread_num= 19 istart= 52783 iend= 55560 0
thread_num= 2 istart= 5557 iend= 8334 0
18
thread_num= 6 istart= 16669 iend= 19446 0
thread_num= 5 istart= 13891 iend= 16668 0
thread_num= 3 istart= 8335 iend= 11112 0
thread_num= 24 istart= 66673 iend= 69450 0
thread_num= 8 istart= 22225 iend= 25002 0
thread_num= 34 istart= 94453 iend= 97230 0
thread_num= 11 istart= 30559 iend= 33336 0
thread_num= 29 istart= 80563 iend= 83340 0
thread_num= 22 istart= 61117 iend= 63894 0
thread_num= 16 istart= 44449 iend= 47226 0
thread_num= 20 istart= 55561 iend= 58338 0
thread_num= 12 istart= 33337 iend= 36114 0
At line 45 of file pi2.f90
Fortran runtime error: Index '36' of dimension 1 of array 'ncircs' above upper bound of 35
Error termination. Backtrace:
#0 0x7f580fce1d01 in ???
#1 0x7f580fce2849 in ???
#2 0x7f580fce2ec6 in ???
#3 0x4015ab in MAIN__._omp_fn.0
at /home/ijb/work/stack/pi2.f90:45
#4 0x7f580fb4a77d in ???
#5 0x7f580fab1608 in start_thread
at /build/glibc-YbNSs7/glibc-2.31/nptl/pthread_create.c:477
#6 0x7f580f9d6292 in ???
#7 0xffffffffffffffff in ???
Run Code Online (Sandbox Code Playgroud)
所以你可以用编译器来发现很多问题。然而,还有更多它无法捕捉。最重要的是,您将数组范围ncircs设为私有,但不在并行区域内对其进行初始化 - 当您进入并行区域时,每个线程将生成自己的私有变量的新版本,默认情况下,并行区域外的任何初始化都将被遗忘。所以你正在使用一个未初始化的变量。即使没有,在代码中也要注意,您只初始化ncircs了 .
然而,除了上面提到的错误之外,这并不是您编写 openmp 代码的真正方式。OpenMP 提供了在循环中自动共享工作的方法,您真的不应该像在代码中所做的那样手动共享迭代。此外,它提供了精确执行此类计算的缩减,关键应该只在需要的地方使用,而不是用于这里显示的缩减擅长的专业案例。也不需要ncircs数组——每个线程只需要一个值,那么为什么要声明一个数组呢?根据经验,如果您声明的内存随着线程数量的增加而增加,那么您可能做错事。最后,我也不会将线程数刻录到代码中,我会使用环境变量来设置线程数,以便无需重新编译即可更改它。
所以这是我将如何解决您的问题,并显示它可以正确编译和运行:
ijb@ijb-Latitude-5410:~/work/stack$ cat pi_ijb.f90
Program pi_calc
Use, Intrinsic :: iso_fortran_env, Only : wp => real64, li => int64
Use omp_lib
Implicit None
Real( wp ), Parameter :: pi_exact = 4.0_wp * Atan( 1.0_wp )
Integer( li ), Parameter :: n_sample = 1000000000_li
Integer , Parameter :: seed = 864
Real( wp ), Dimension( 1:2 ) :: rand
Real( wp ) :: dist
Real( wp ) :: pi_approx
Integer( li ) :: ncirc
Integer( li ) :: i_sample
Integer( li ) :: start, finish, rate
Integer :: thread_num
Integer :: n_seed
Integer :: i
! Assume using the maximum number of threads made available
Write( *, * ) 'Running on ', omp_get_max_threads(), ' threads'
ncirc = 0
Call system_clock( start, rate )
! Create threads
!$omp parallel default( none ) private( thread_num, n_seed, i, rand, dist ) reduction( +:ncirc )
! Set up the random number generator. Assume it is thread safe
thread_num = omp_get_thread_num()
Call Random_seed( size = n_seed )
Call Random_seed( put = [ ( i * seed * ( thread_num + 1 ), i = 1, n_seed ) ] )
! Workshare the loop
!$omp do
Sampling_loop: Do i_sample = 1, n_sample
Call Random_number( rand )
rand = rand - 0.5_wp
dist = rand( 1 ) * rand( 1 ) + rand( 2 ) * rand( 2 )
If( dist <= 0.5_wp * 0.5_wp ) Then
ncirc = ncirc + 1
End If
End Do Sampling_loop
!$omp end do
! Close down the parallel region
!$omp end parallel
Call system_clock( finish, rate )
pi_approx = 4.0_wp * Real( ncirc, wp ) / Real( n_sample, wp )
Write( *, * ) 'Using ', n_sample, ' sampling points: '
Write( *, * ) 'Approx pi ', pi_approx
Write( *, * ) 'Exact pi ', pi_exact
Write( *, * ) 'Error ', Abs( pi_approx - pi_exact )
Write( *, * ) 'This took ', Real( finish - start ) / Real( rate ), ' seconds'
End Program pi_calc
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -fcheck=all -O -g -fopenmp pi_ijb.f90
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 4 threads
Using 1000000000 sampling points:
Approx pi 3.1415474360000002
Exact pi 3.1415926535897931
Error 4.5217589792923008E-005
This took 5.21012735 seconds
ijb@ijb-Latitude-5410:~/work/stack$ gfortran-11 -Wall -Wextra -O -g -fopenmp pi_ijb.f90
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=1
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 1 threads
Using 1000000000 sampling points:
Approx pi 3.1415804920000001
Exact pi 3.1415926535897931
Error 1.2161589793002747E-005
This took 18.5533371 seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=2
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 2 threads
Using 1000000000 sampling points:
Approx pi 3.1416029320000001
Exact pi 3.1415926535897931
Error 1.0278410206954192E-005
This took 8.87815666 seconds
ijb@ijb-Latitude-5410:~/work/stack$ export OMP_NUM_THREADS=4
ijb@ijb-Latitude-5410:~/work/stack$ ./a.out
Running on 4 threads
Using 1000000000 sampling points:
Approx pi 3.1415474360000002
Exact pi 3.1415926535897931
Error 4.5217589792923008E-005
This took 4.49851656 seconds
Run Code Online (Sandbox Code Playgroud)
最后请注意,我假设标准的随机数生成器是线程安全的,简单来说。然而,我的理解是这不是保证,如果你想认真地做这种事情,你应该考虑提供你自己的,