文件 1..64 每个是 160 MB 并存储在 RAM 磁盘中。
产生者:
seq 120 | parallel -k 'seq {}0000000 {}9999999 | fmt -30' | head -c 10G > 10G
parallel --pipepart --block -1 -a 10G 'cat > {#}'
Run Code Online (Sandbox Code Playgroud)
nocat
:
#!/bin/bash
export LC_ALL=C
sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 1; sort ) < 1) \
<((rm 2; sort ) < 2) ) \
<(sort -m \
<((rm 3; sort ) < 3) \
<((rm 4; sort ) < 4) ) ) \
<(sort -m \
<(sort -m \
<((rm 5; sort ) < 5) \
<((rm 6; sort ) < 6) ) \
<(sort -m \
<((rm 7; sort ) < 7) \
<((rm 8; sort ) < 8) ) ) ) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 9; sort ) < 9) \
<((rm 10; sort ) < 10) ) \
<(sort -m \
<((rm 11; sort ) < 11) \
<((rm 12; sort ) < 12) ) ) \
<(sort -m \
<(sort -m \
<((rm 13; sort ) < 13) \
<((rm 14; sort ) < 14) ) \
<(sort -m \
<((rm 15; sort ) < 15) \
<((rm 16; sort ) < 16) ) ) ) ) \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 17; sort ) < 17) \
<((rm 18; sort ) < 18) ) \
<(sort -m \
<((rm 19; sort ) < 19) \
<((rm 20; sort ) < 20) ) ) \
<(sort -m \
<(sort -m \
<((rm 21; sort ) < 21) \
<((rm 22; sort ) < 22) ) \
<(sort -m \
<((rm 23; sort ) < 23) \
<((rm 24; sort ) < 24) ) ) ) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 25; sort ) < 25) \
<((rm 26; sort ) < 26) ) \
<(sort -m \
<((rm 27; sort ) < 27) \
<((rm 28; sort ) < 28) ) ) \
<(sort -m \
<(sort -m \
<((rm 29; sort ) < 29) \
<((rm 30; sort ) < 30) ) \
<(sort -m \
<((rm 31; sort ) < 31) \
<((rm 32; sort ) < 32) ) ) ) ) ) \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 33; sort ) < 33) \
<((rm 34; sort ) < 34) ) \
<(sort -m \
<((rm 35; sort ) < 35) \
<((rm 36; sort ) < 36) ) ) \
<(sort -m \
<(sort -m \
<((rm 37; sort ) < 37) \
<((rm 38; sort ) < 38) ) \
<(sort -m \
<((rm 39; sort ) < 39) \
<((rm 40; sort ) < 40) ) ) ) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 41; sort ) < 41) \
<((rm 42; sort ) < 42) ) \
<(sort -m \
<((rm 43; sort ) < 43) \
<((rm 44; sort ) < 44) ) ) \
<(sort -m \
<(sort -m \
<((rm 45; sort ) < 45) \
<((rm 46; sort ) < 46) ) \
<(sort -m \
<((rm 47; sort ) < 47) \
<((rm 48; sort ) < 48) ) ) ) ) \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 49; sort ) < 49) \
<((rm 50; sort ) < 50) ) \
<(sort -m \
<((rm 51; sort ) < 51) \
<((rm 52; sort ) < 52) ) ) \
<(sort -m \
<(sort -m \
<((rm 53; sort ) < 53) \
<((rm 54; sort ) < 54) ) \
<(sort -m \
<((rm 55; sort ) < 55) \
<((rm 56; sort ) < 56) ) ) ) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 57; sort ) < 57) \
<((rm 58; sort ) < 58) ) \
<(sort -m \
<((rm 59; sort ) < 59) \
<((rm 60; sort ) < 60) ) ) \
<(sort -m \
<(sort -m \
<((rm 61; sort ) < 61) \
<((rm 62; sort ) < 62) ) \
<(sort -m \
<((rm 63; sort ) < 63) \
<((rm 64; sort ) < 64) ) ) ) ) ) |
md5sum
Run Code Online (Sandbox Code Playgroud)
withcat
:
#!/bin/bash
export LC_ALL=C
sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 1; sort ) < 1) \
<((rm 2; sort ) < 2) | cat) \
<(sort -m \
<((rm 3; sort ) < 3) \
<((rm 4; sort ) < 4) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 5; sort ) < 5) \
<((rm 6; sort ) < 6) | cat) \
<(sort -m \
<((rm 7; sort ) < 7) \
<((rm 8; sort ) < 8) | cat) | cat) | cat) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 9; sort ) < 9) \
<((rm 10; sort ) < 10) | cat) \
<(sort -m \
<((rm 11; sort ) < 11) \
<((rm 12; sort ) < 12) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 13; sort ) < 13) \
<((rm 14; sort ) < 14) | cat) \
<(sort -m \
<((rm 15; sort ) < 15) \
<((rm 16; sort ) < 16) | cat) | cat) | cat) | cat) \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 17; sort ) < 17) \
<((rm 18; sort ) < 18) | cat) \
<(sort -m \
<((rm 19; sort ) < 19) \
<((rm 20; sort ) < 20) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 21; sort ) < 21) \
<((rm 22; sort ) < 22) | cat) \
<(sort -m \
<((rm 23; sort ) < 23) \
<((rm 24; sort ) < 24) | cat) | cat) | cat) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 25; sort ) < 25) \
<((rm 26; sort ) < 26) | cat) \
<(sort -m \
<((rm 27; sort ) < 27) \
<((rm 28; sort ) < 28) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 29; sort ) < 29) \
<((rm 30; sort ) < 30) | cat) \
<(sort -m \
<((rm 31; sort ) < 31) \
<((rm 32; sort ) < 32) | cat) | cat) | cat) | cat) | cat) \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 33; sort ) < 33) \
<((rm 34; sort ) < 34) | cat) \
<(sort -m \
<((rm 35; sort ) < 35) \
<((rm 36; sort ) < 36) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 37; sort ) < 37) \
<((rm 38; sort ) < 38) | cat) \
<(sort -m \
<((rm 39; sort ) < 39) \
<((rm 40; sort ) < 40) | cat) | cat) | cat) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 41; sort ) < 41) \
<((rm 42; sort ) < 42) | cat) \
<(sort -m \
<((rm 43; sort ) < 43) \
<((rm 44; sort ) < 44) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 45; sort ) < 45) \
<((rm 46; sort ) < 46) | cat) \
<(sort -m \
<((rm 47; sort ) < 47) \
<((rm 48; sort ) < 48) | cat) | cat) | cat) | cat) \
<(sort -m \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 49; sort ) < 49) \
<((rm 50; sort ) < 50) | cat) \
<(sort -m \
<((rm 51; sort ) < 51) \
<((rm 52; sort ) < 52) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 53; sort ) < 53) \
<((rm 54; sort ) < 54) | cat) \
<(sort -m \
<((rm 55; sort ) < 55) \
<((rm 56; sort ) < 56) | cat) | cat) | cat) \
<(sort -m \
<(sort -m \
<(sort -m \
<((rm 57; sort ) < 57) \
<((rm 58; sort ) < 58) | cat) \
<(sort -m \
<((rm 59; sort ) < 59) \
<((rm 60; sort ) < 60) | cat) | cat) \
<(sort -m \
<(sort -m \
<((rm 61; sort ) < 61) \
<((rm 62; sort ) < 62) | cat) \
<(sort -m \
<((rm 63; sort ) < 63) \
<((rm 64; sort ) < 64) | cat) | cat) | cat) | cat) | cat) | cat |
md5sum
Run Code Online (Sandbox Code Playgroud)
唯一的区别是,在withcat
每一个sort -m
通过管道输送到cat
。
“无用的使用 cat”-people 会让你相信withcat
会比nocat
. 然而,事实恰恰相反:
$ time bash nocat
c933d81faea7b8dec8eb64ca0b044d74 -
real 3m40.854s
user 2m48.687s
sys 0m49.135s
$ time bash withcat
c933d81faea7b8dec8eb64ca0b044d74 -
real 2m21.812s
user 2m16.651s
sys 1m36.135s
Run Code Online (Sandbox Code Playgroud)
该测试在一台 64 核机器上运行,它不执行任何其他操作。一切都在 RAM 中(所以这不是由于磁盘速度慢)。每个测试运行3次,最佳时间如上所示。所有三个测试都在最佳时间的 5 秒内完成(所以这不是侥幸)。
为什么通过管道将输出传输到cat
?
编辑
是否将cat
输入分组为更大的块?和/或是否sort
为每一行刷新输出?
为了测试这个,我试过:
$ strace -ff sort -m <(sort 1) <(sort 2) 2>fromsort | cat >/dev/null
$ strace -ff sort -m <(sort 1 | cat ) <(sort 2 | cat) 2>fromcat | cat >/dev/null
Run Code Online (Sandbox Code Playgroud)
如果cat
将其分成更大的块,我们希望read
返回更大的块。但它没有:
$ grep -E 'read|write' fromsort |field 1,5|sort | uniq -c
1 openat(AT_FDCWD, 3
8 pread64(3, =
1 read(3, 3771
40989 read(3, 4096
2 read(3, 832
1 read(3, unknown
1 read(4, 0
1 read(4, 2241
40959 read(4, 4096
1 write(1, 1916
81949 write(1, 4096
$ grep -E 'read|write' fromcat |field 1,5|sort | uniq -c
1 openat(AT_FDCWD, 3
8 pread64(3, =
1 read(3, 3771
40989 read(3, 4096
2 read(3, 832
1 read(3, unknown
1 read(4, 2241
40959 read(4, 4096
1 read(4, unknown
1 write(1, 1916
81949 write(1, 4096
Run Code Online (Sandbox Code Playgroud)
在这两种情况下,read
和write
都是 4K。
(顺便说一句,如果从文件而不是从管道sort
中读取,确实会读取(更多)更大的块,但这里的情况并非如此)。
编辑 2
上面的目的是证明一个额外cat
的并不总是无用的;并找出导致这种情况的原因。
目标不是对数据进行排序。
但是,如果您的目标是对数据进行排序,为什么不直接使用sort
内置的--parallel
?
默认情况下sort
似乎--parallel 8
在 64 核机器上使用。top
显示它使用高达 800% 的 CPU。您可以强制它使用 64 个内核--parallel 64
:
$ time sort {1..64} | md5sum
real 9m4.005s
user 29m56.454s
sys 5m49.560s
$ time sort --parallel 64 {1..64} | md5sum
real 6m50.332s
user 35m55.040s
sys 11m37.609s
Run Code Online (Sandbox Code Playgroud)
所以GNU排序--parallel
比上面的要慢得多。以上现在可以作为parsort
:http : //git.savannah.gnu.org/cgit/parallel.git/tree/src/parsort