Mat*_*att 1 conditional r dataframe
我有一个名为的数据框df,看起来像这样;
id face value
1 r 15
1 r 11
1 t 16
1 t 17
2 r 13
2 r 25
2 t 12
2 t 18
3 r 30
3 r 20
3 t 19
3 t 10
Run Code Online (Sandbox Code Playgroud)
因此,如果满足两个条件,我需要平均每一行.条件是; 如果id和face是相同的,那么平均value.
例如,if id=1,face=r然后是平均值15+11并将计算值13放在新列中.我必须为整个数据帧(2000行,500行不同id)执行此操作.
PS; 对于每一个face我必须有不同的列.我的意思是,例如,如果id=1并face=r把平均value在一个名为新列newr,如果id=2和face=r投入平均value在一个名为新列newr也是如此.那么,如果id=1和face=t投入平均value在一个名为新列newt.输出将是这样的;
id face newr newt
1 r 13
1 t 16.5
2 r 19
2 t 15
Run Code Online (Sandbox Code Playgroud)
这是我的 str(df1)
Classes ‘data.table’ and 'data.frame': 340 obs. of 26 variables:
$ id : int 5 5 5 5 5 5 5 5 7 7 ...
$ nirid : chr "bx5xtx1" "ax5xrx2" "bx5xrx2" "bx5xtx2" ...
$ group : Factor w/ 3 levels "a","b","r": 2 1 2 2 2 1 1 1 1 1 ...
$ section : Factor w/ 3 levels "","r","t": 3 2 2 3 2 3 2 3 2 3 ...
$ face : Factor w/ 3 levels "","1","2": 2 3 3 3 2 2 2 3 2 3 ...
$ sample : chr "B3C-3D" "B3C-3D" "B3C-3D" "B3C-3D" ...
$ treatment : chr "control" "control" "control" "control" ...
$ width : num 1 1 1 1 1 ...
$ thick : num 1.02 1.02 1.02 1.02 1.02 ...
$ length : num 16 16 16 16 16 ...
$ testweight : num 126 126 126 126 126 ...
$ maxload : num 418 418 418 418 418 418 418 418 436 436 ...
$ loadppl : num 251 251 251 251 251 251 251 251 258 258 ...
$ ppldistance: num 0.139 0.139 0.139 0.139 0.139 ...
$ scmor : num 0.399 0.399 0.399 0.399 0.399 ...
$ scmoe : num 5.53e-05 5.53e-05 5.53e-05 5.53e-05 5.53e-05 ...
$ failure : int 2 2 2 2 2 2 2 2 2 2 ...
$ mcweight : num 107 107 107 107 107 ...
$ odweight : num 94.1 94.1 94.1 94.1 94.1 94.1 94.1 94.1 90.3 90.3 ...
$ mc : num 13.3 13.3 13.3 13.3 13.3 ...
$ sgsc : num 0.415 0.415 0.415 0.415 0.415 ...
$ scmorpsi : num 58 58 58 58 58 ...
$ scmoepsi : num 8.03 8.03 8.03 8.03 8.03 ...
$ rows : chr "9" "10" "11" "12" ...
$ value :Class 'AsIs' num [1:238000] 0.0147 -0.0169 -0.0152 0.0135 -0.0107 ...
$ sg42 :Class 'AsIs' num [1:235280] 1.86e-04 9.39e-05 8.94e-05 1.83e-04 8.86e-05 ...
- attr(*, ".internal.selfref")=<externalptr>
Run Code Online (Sandbox Code Playgroud)
更新
这是使用实际数据集的一小部分 dput(droplevels(head(data, 20)))
structure(list(id = c(5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 7L,
7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L), nirid = c("bx5xtx1",
"ax5xrx2", "bx5xrx2", "bx5xtx2", "bx5xrx1", "ax5xtx1", "ax5xrx1",
"ax5xtx2", "ax7xrx1", "ax7xtx2", "ax7xrx2", "ax7xtx1", "ax8xrx2",
"ax8xtx1", "ax8xrx1", "ax8xtx2", "ax9xtx2", "bx9xtx2", "ax9xrx2",
"ax9xtx1"), group = c("b", "a", "b", "b", "b", "a", "a", "a",
"a", "a", "a", "a", "a", "a", "a", "a", "a", "b", "a", "a"),
section = c("t", "r", "r", "t", "r", "t", "r", "t", "r",
"t", "r", "t", "r", "t", "r", "t", "t", "t", "r", "t"), face = c(1L,
2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 1L), sample = c("B3C-3D", "B3C-3D", "B3C-3D",
"B3C-3D", "B3C-3D", "B3C-3D", "B3C-3D", "B3C-3D", "B3C-1E",
"B3C-1E", "B3C-1E", "B3C-1E", "B1C-2D", "B1C-2D", "B1C-2D",
"B1C-2D", "A3C-2C", "A3C-2C", "A3C-2C", "A3C-2C"), treatment = c("control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control", "control", "control", "control", "control", "control",
"control"), width = c("1.003", "1.003", "1.003", "1.003",
"1.003", "1.003", "1.003", "1.003", "1.021", "1.021", "1.021",
"1.021", "1.02", "1.02", "1.02", "1.02", "1.033", "1.033",
"1.033", "1.033"), thick = c("1.02", "1.02", "1.02", "1.02",
"1.02", "1.02", "1.02", "1.02", "1.043", "1.043", "1.043",
"1.043", "1.025", "1.025", "1.025", "1.025", "1.029", "1.029",
"1.029", "1.029"), length = c("16", "16", "16", "16", "16",
"16", "16", "16", "15.98", "15.98", "15.98", "15.98", "16.016",
"16.016", "16.016", "16.016", "16.005", "16.005", "16.005",
"16.005"), testweight = c("126", "126", "126", "126", "126",
"126", "126", "126", "121.4", "121.4", "121.4", "121.4",
"144.1", "144.1", "144.1", "144.1", "119.6", "119.6", "119.6",
"119.6"), maxload = c(418L, 418L, 418L, 418L, 418L, 418L,
418L, 418L, 436L, 436L, 436L, 436L, 631L, 631L, 631L, 631L,
486L, 486L, 486L, 486L), loadppl = c("251", "251", "251",
"251", "251", "251", "251", "251", "258", "258", "258", "258",
"296", "296", "296", "296", "255", "255", "255", "255"),
ppldistance = c("0.1388", "0.1388", "0.1388", "0.1388", "0.1388",
"0.1388", "0.1388", "0.1388", "0.155", "0.155", "0.155",
"0.155", "0.1412", "0.1412", "0.1412", "0.1412", "0.1488",
"0.1488", "0.1488", "0.1488"), scmor = c("0.399330740757585",
"0.399330740757585", "0.399330740757585", "0.399330740757585",
"0.399330740757585", "0.399330740757585", "0.399330740757585",
"0.399330740757585", "0.391336060622532", "0.391336060622532",
"0.391336060622532", "0.391336060622532", "0.587001478757759",
"0.587001478757759", "0.587001478757759", "0.587001478757759",
"0.442958394865818", "0.442958394865818", "0.442958394865818",
"0.442958394865818"), scmoe = c("5.5328050375923e-05", "5.5328050375923e-05",
"5.5328050375923e-05", "5.5328050375923e-05", "5.5328050375923e-05",
"5.5328050375923e-05", "5.5328050375923e-05", "5.5328050375923e-05",
"4.6792031310635e-05", "4.6792031310635e-05", "4.6792031310635e-05",
"4.6792031310635e-05", "6.2150955161815e-05", "6.2150955161815e-05",
"6.2150955161815e-05", "6.2150955161815e-05", "4.9585347590597e-05",
"4.9585347590597e-05", "4.9585347590597e-05", "4.9585347590597e-05"
), failure = c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), mcweight = c("106.6",
"106.6", "106.6", "106.6", "106.6", "106.6", "106.6", "106.6",
"102.1", "102.1", "102.1", "102.1", "121.9", "121.9", "121.9",
"121.9", "100.7", "100.7", "100.7", "100.7"), odweight = c("94.1",
"94.1", "94.1", "94.1", "94.1", "94.1", "94.1", "94.1", "90.3",
"90.3", "90.3", "90.3", "107.1", "107.1", "107.1", "107.1",
"88.3", "88.3", "88.3", "88.3"), mc = c("13.2837407013815",
"13.2837407013815", "13.2837407013815", "13.2837407013815",
"13.2837407013815", "13.2837407013815", "13.2837407013815",
"13.2837407013815", "13.0675526024363", "13.0675526024363",
"13.0675526024363", "13.0675526024363", "13.8188608776844",
"13.8188608776844", "13.8188608776844", "13.8188608776844",
"14.0430351075878", "14.0430351075878", "14.0430351075878",
"14.0430351075878"), sgsc = c("0.414649099500969", "0.414649099500969",
"0.414649099500969", "0.414649099500969", "0.414649099500969",
"0.414649099500969", "0.414649099500969", "0.414649099500969",
"0.385028360121945", "0.385028360121945", "0.385028360121945",
"0.385028360121945", "0.461392466167132", "0.461392466167132",
"0.461392466167132", "0.461392466167132", "0.376174963976185",
"0.376174963976185", "0.376174963976185", "0.376174963976185"
), scmorpsi = c("57.9580175265", "57.9580175265", "57.9580175265",
"57.9580175265", "57.9580175265", "57.9580175265", "57.9580175265",
"57.9580175265", "56.79768659253", "56.79768659253", "56.79768659253",
"56.79768659253", "85.1961507631", "85.1961507631", "85.1961507631",
"85.1961507631", "64.2900427962", "64.2900427962", "64.2900427962",
"64.2900427962"), scmoepsi = c("8.0301959907", "8.0301959907",
"8.0301959907", "8.0301959907", "8.0301959907", "8.0301959907",
"8.0301959907", "8.0301959907", "6.7912962715", "6.7912962715",
"6.7912962715", "6.7912962715", "9.0204579335", "9.0204579335",
"9.0204579335", "9.0204579335", "7.1967122773", "7.1967122773",
"7.1967122773", "7.1967122773"), rows = 9:28, value = c("0.014680833",
"-0.0169", "-0.015241563", "0.013507307", "-0.010687351",
"0.000479", "-0.0311", "-7.18e-05", "-0.037", "-0.00349",
"-0.0395", "-0.000859", "-0.018", "0.000127", "-0.0234",
"0.00215", "-0.0165", "-0.0162", "-0.0286", "-0.0214"), sg42 = c("0.000185853584415584",
"9.39393939393943e-05", "8.93772943722944e-05", "0.000183087277056277",
"8.86156017316018e-05", "0.000180270562770563", "9.02597402597403e-05",
"0.0001831779004329", "8.26839826839824e-05", "0.000167605411255411",
"8.44155844155841e-05", "0.000175891774891775", "9.1774891774892e-05",
"0.000180465367965368", "9.02597402597405e-05", "0.000178874458874459",
"0.000160822510822511", "0.000154978354978355", "8.26839826839826e-05",
"0.000159090909090909")), .Names = c("id", "nirid", "group",
"section", "face", "sample", "treatment", "width", "thick", "length",
"testweight", "maxload", "loadppl", "ppldistance", "scmor", "scmoe",
"failure", "mcweight", "odweight", "mc", "sgsc", "scmorpsi",
"scmoepsi", "rows", "value", "sg42"), row.names = c(NA, 20L), class = "data.frame")
Run Code Online (Sandbox Code Playgroud)
预期结果列newr,newt,newrsg42和newtsg42
非常感谢你 :)
这是一个使用aggregate()和的解决方案reshape():
df <- data.frame(id=c(1L,1L,1L,1L,2L,2L,2L,2L,3L,3L,3L,3L),face=c('r','r','t','t','r','r','t','t','r','r','t','t'),value=c(15L,11L,16L,17L,13L,25L,12L,18L,30L,20L,19L,10L),stringsAsFactors=F);
reshape(transform(aggregate(value~face+id,df,mean),time=face),dir='w',idvar=c('id','face'));
## face id value.r value.t
## 1 r 1 13 NA
## 2 t 1 NA 16.5
## 3 r 2 19 NA
## 4 t 2 NA 15.0
## 5 r 3 25 NA
## 6 t 3 NA 14.5
Run Code Online (Sandbox Code Playgroud)