搜索匹配项,然后将该匹配项后的值替换为前一行中的值

a a*_*a a 5 bash awk sed

我有多个文件,其中包含以下模式的数千行:

O   HOH     1      11.700 -11.906   1.533
H   HOH     0      12.561 -12.318   1.596
H   HOH     0      11.445 -12.031   0.627
O   HOH     2      17.897   8.999  -0.104
H   HOH     0      18.444   8.521   0.516
H   HOH     0      17.127   8.457  -0.223
O   HOH     3      -0.688  10.310 -15.189
H   HOH     0      -0.760   9.366 -15.055
H   HOH     0      -0.494  10.665 -14.324
O   HOH     4     -10.418   3.733   4.069
H   HOH     0      -9.928   3.726   4.897
H   HOH     0     -11.222   4.218   4.268
O   HOH     5       2.655  -8.910  13.338
H   HOH     0       2.377  -9.739  13.737
H   HOH     0       1.847  -8.531  12.992
O   HOH     6      -0.191  -1.010 -19.469
H   HOH     0       0.455  -1.460 -18.924
H   HOH     0      -0.571  -0.343 -18.895
O   HOH     7      14.338 -15.985  -1.467
H   HOH     0      15.129 -15.647  -1.886
H   HOH     0      13.854 -16.408  -2.175
O   HOH     8     -15.510  -7.073   1.857
H   HOH     0     -15.526  -6.116   1.874
H   HOH     0     -16.341  -7.321   1.464
O   HOH     9       9.155   6.561  -7.729
H   HOH     0       9.419   7.335  -7.245
H   HOH     0       9.943   6.018  -7.768
.
.
.
.
O   HOH  999999     9.155   6.561  -7.729
H   HOH     0       9.419   7.335  -7.245
H   HOH     0       9.943   6.018  -7.768
Run Code Online (Sandbox Code Playgroud)

我需要的是更换的各2个连续的时刻'H HOH 0''H HOH xxx'地方xxx是0以上的权值

我尝试过的是,我使用以下命令将大文件分成3行文件

split -l 3 foo
Run Code Online (Sandbox Code Playgroud)

然后使用awk将所需的值复制到另一个文件

awk 'NR==1 print $3' > foo--
Run Code Online (Sandbox Code Playgroud)

然后尝试通过读取文件foo--并使用sed替换模式来替换foo中的“ 0”

sed -e '/  0  /{r foo--' -e 'd}' foo 
Run Code Online (Sandbox Code Playgroud)

但这没用

很确定有更好的方法可以做到这一点

最终结果应如下所示:

O   HOH     1      11.700 -11.906   1.533
H   HOH     1      12.561 -12.318   1.596
H   HOH     1      11.445 -12.031   0.627
O   HOH     2      17.897   8.999  -0.104
H   HOH     2      18.444   8.521   0.516
H   HOH     2      17.127   8.457  -0.223
O   HOH     3      -0.688  10.310 -15.189
H   HOH     3      -0.760   9.366 -15.055
H   HOH     3      -0.494  10.665 -14.324
O   HOH     4     -10.418   3.733   4.069
H   HOH     4      -9.928   3.726   4.897
H   HOH     4     -11.222   4.218   4.268
O   HOH     5       2.655  -8.910  13.338
H   HOH     5       2.377  -9.739  13.737
H   HOH     5       1.847  -8.531  12.992
O   HOH     6      -0.191  -1.010 -19.469
H   HOH     6       0.455  -1.460 -18.924
H   HOH     6      -0.571  -0.343 -18.895
.
.
.
O   HOH     99999      -0.191  -1.010 -19.469
H   HOH     99999       0.455  -1.460 -18.924
H   HOH     99999      -0.571  -0.343 -18.895
Run Code Online (Sandbox Code Playgroud)

Ed *_*ton 1

如果您不关心字段之间的间距,则只需获取所需的值:

$ awk '$1=="H"{$3=p} {p=$3} 1' file
O   HOH     1      11.700 -11.906   1.533
H HOH 1 12.561 -12.318 1.596
H HOH 1 11.445 -12.031 0.627
O   HOH     2      17.897   8.999  -0.104
H HOH 2 18.444 8.521 0.516
H HOH 2 17.127 8.457 -0.223
O   HOH     3      -0.688  10.310 -15.189
H HOH 3 -0.760 9.366 -15.055
H HOH 3 -0.494 10.665 -14.324
O   HOH     4     -10.418   3.733   4.069
H HOH 4 -9.928 3.726 4.897
H HOH 4 -11.222 4.218 4.268
O   HOH     5       2.655  -8.910  13.338
H HOH 5 2.377 -9.739 13.737
H HOH 5 1.847 -8.531 12.992
O   HOH     6      -0.191  -1.010 -19.469
H HOH 6 0.455 -1.460 -18.924
H HOH 6 -0.571 -0.343 -18.895
O   HOH     7      14.338 -15.985  -1.467
H HOH 7 15.129 -15.647 -1.886
H HOH 7 13.854 -16.408 -2.175
O   HOH     8     -15.510  -7.073   1.857
H HOH 8 -15.526 -6.116 1.874
H HOH 8 -16.341 -7.321 1.464
O   HOH     9       9.155   6.561  -7.729
H HOH 9 9.419 7.335 -7.245
H HOH 9 9.943 6.018 -7.768
O   HOH  999999     9.155   6.561  -7.729
H HOH 999999 9.419 7.335 -7.245
H HOH 999999 9.943 6.018 -7.768
Run Code Online (Sandbox Code Playgroud)

或以空格分隔,最后 4 个字段右对齐:

$ awk '$1=="H"{$3=p} {p=$3} 1' file | column -t -R3,4,5,6
O  HOH       1   11.700  -11.906    1.533
H  HOH       1   12.561  -12.318    1.596
H  HOH       1   11.445  -12.031    0.627
O  HOH       2   17.897    8.999   -0.104
H  HOH       2   18.444    8.521    0.516
H  HOH       2   17.127    8.457   -0.223
O  HOH       3   -0.688   10.310  -15.189
H  HOH       3   -0.760    9.366  -15.055
H  HOH       3   -0.494   10.665  -14.324
O  HOH       4  -10.418    3.733    4.069
H  HOH       4   -9.928    3.726    4.897
H  HOH       4  -11.222    4.218    4.268
O  HOH       5    2.655   -8.910   13.338
H  HOH       5    2.377   -9.739   13.737
H  HOH       5    1.847   -8.531   12.992
O  HOH       6   -0.191   -1.010  -19.469
H  HOH       6    0.455   -1.460  -18.924
H  HOH       6   -0.571   -0.343  -18.895
O  HOH       7   14.338  -15.985   -1.467
H  HOH       7   15.129  -15.647   -1.886
H  HOH       7   13.854  -16.408   -2.175
O  HOH       8  -15.510   -7.073    1.857
H  HOH       8  -15.526   -6.116    1.874
H  HOH       8  -16.341   -7.321    1.464
O  HOH       9    9.155    6.561   -7.729
H  HOH       9    9.419    7.335   -7.245
H  HOH       9    9.943    6.018   -7.768
O  HOH  999999    9.155    6.561   -7.729
H  HOH  999999    9.419    7.335   -7.245
H  HOH  999999    9.943    6.018   -7.768
Run Code Online (Sandbox Code Playgroud)

或 TSV 格式:

$ awk -v OFS='\t' '{$3=($1=="H" ? p : $3); p=$3} 1' file
O       HOH     1       11.700  -11.906 1.533
H       HOH     1       12.561  -12.318 1.596
H       HOH     1       11.445  -12.031 0.627
O       HOH     2       17.897  8.999   -0.104
H       HOH     2       18.444  8.521   0.516
H       HOH     2       17.127  8.457   -0.223
O       HOH     3       -0.688  10.310  -15.189
H       HOH     3       -0.760  9.366   -15.055
H       HOH     3       -0.494  10.665  -14.324
O       HOH     4       -10.418 3.733   4.069
H       HOH     4       -9.928  3.726   4.897
H       HOH     4       -11.222 4.218   4.268
O       HOH     5       2.655   -8.910  13.338
H       HOH     5       2.377   -9.739  13.737
H       HOH     5       1.847   -8.531  12.992
O       HOH     6       -0.191  -1.010  -19.469
H       HOH     6       0.455   -1.460  -18.924
H       HOH     6       -0.571  -0.343  -18.895
O       HOH     7       14.338  -15.985 -1.467
H       HOH     7       15.129  -15.647 -1.886
H       HOH     7       13.854  -16.408 -2.175
O       HOH     8       -15.510 -7.073  1.857
H       HOH     8       -15.526 -6.116  1.874
H       HOH     8       -16.341 -7.321  1.464
O       HOH     9       9.155   6.561   -7.729
H       HOH     9       9.419   7.335   -7.245
H       HOH     9       9.943   6.018   -7.768
O       HOH     999999  9.155   6.561   -7.729
H       HOH     999999  9.419   7.335   -7.245
H       HOH     999999  9.943   6.018   -7.768
Run Code Online (Sandbox Code Playgroud)

但是,如果您需要保留原始间距,那么使用 GNU awk 将第 4 个参数设置为 split() ,这将保留您开始使用的任何间距:

$ cat tst.awk
{
    if ($1 == "H") {
        split(prev,pflds,FS,pseps)
        n = split($0,flds,FS,seps)
        seps[2] = pseps[2]
        flds[3] = pflds[3]
        seps[3] = sprintf("%*s",length(pseps[3]) + length(pflds[4]) - length(flds[4]),"")
        $0 = ""
        for (i=1; i<=n; i++) {
            $0 = $0 flds[i] seps[i]
        }
    }
    print
    prev = $0
}
Run Code Online (Sandbox Code Playgroud)

$ awk -f tst.awk file
O   HOH     1      11.700 -11.906   1.533
H   HOH     1      12.561 -12.318   1.596
H   HOH     1      11.445 -12.031   0.627
O   HOH     2      17.897   8.999  -0.104
H   HOH     2      18.444   8.521   0.516
H   HOH     2      17.127   8.457  -0.223
O   HOH     3      -0.688  10.310 -15.189
H   HOH     3      -0.760   9.366 -15.055
H   HOH     3      -0.494  10.665 -14.324
O   HOH     4     -10.418   3.733   4.069
H   HOH     4      -9.928   3.726   4.897
H   HOH     4     -11.222   4.218   4.268
O   HOH     5       2.655  -8.910  13.338
H   HOH     5       2.377  -9.739  13.737
H   HOH     5       1.847  -8.531  12.992
O   HOH     6      -0.191  -1.010 -19.469
H   HOH     6       0.455  -1.460 -18.924
H   HOH     6      -0.571  -0.343 -18.895
O   HOH     7      14.338 -15.985  -1.467
H   HOH     7      15.129 -15.647  -1.886
H   HOH     7      13.854 -16.408  -2.175
O   HOH     8     -15.510  -7.073   1.857
H   HOH     8     -15.526  -6.116   1.874
H   HOH     8     -16.341  -7.321   1.464
O   HOH     9       9.155   6.561  -7.729
H   HOH     9       9.419   7.335  -7.245
H   HOH     9       9.943   6.018  -7.768
O   HOH  999999     9.155   6.561  -7.729
H   HOH  999999     9.419   7.335  -7.245
H   HOH  999999     9.943   6.018  -7.768
Run Code Online (Sandbox Code Playgroud)