这是我的数据框的示例列,RR是标题:
RR
Cvv
Cvv
Caa
Run Code Online (Sandbox Code Playgroud)
我需要的是"反转"数据,以便在数据帧中获得子串vv和aa作为标题和RR.得到的矩阵将是:
vv | aa
CRR |
CRR |
| CRR
Run Code Online (Sandbox Code Playgroud)
所以我们在两个矩阵中都得到了相同的关系.在第一行和第二行,vv与RR耦合.在第三行,aa与RR耦合.
这可以通过R实现吗?有任何想法吗 ?
谢谢你的期待!
我在上面的例子中过度简化了我的数据.所以这是我的实际数据集的示例:
> dput(head(A1F[4:15],n=20))
structure(list(RR = structure(c(15L, 15L, 15L, 27L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
" ", "Caa", "Caj", "Cbb", "Cbb ", "Cbv", "Cja", "Cjr", "Crj",
"Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb", "Gbv",
"Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
AA = structure(c(13L, 13L, 13L, 1L, 1L, 1L, 1L, 15L, 27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 1L), .Label = c("",
"Caa", "Caj", "Car", "Cbb", "Cbv", "Cja", "Cjr", "Cjr ",
"Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbb",
"Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"
), class = "factor"), BB = structure(c(9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L
), .Label = c("", "?", "Caa", "Caj", "Cbv", "Cja", "Cjr",
"Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv",
"Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
VV = structure(c(8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 1L, 1L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("",
" ", "Caa", "Caj", "Caj+", "Cbb", "Cbv", "Cja", "Cjr", "Crv",
"Cvb", "Cvr", "Cvv", "Gaa", "Gbb", "Gja", "Gjr", "Grv", "Gvb",
"Gvr"), class = "factor"), RJ = structure(c(8L, 3L, 3L, 1L,
1L, 12L, 12L, 12L, 12L, 12L, 1L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L), .Label = c("", "Caa", "Caj", "Cbv",
"Ccrj", "Cja", "Cjr", "Crj", "Crj ", "Crr", "Crv", "Cvr",
"Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv",
"Gvr", "Gvv"), class = "factor"), JR = structure(c(7L, 7L,
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L,
18L, 18L, 18L, 18L, 18L, 18L), .Label = c("", "Caa", "Caj",
"Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv",
"Gaa", "Gaj", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Grv ",
"Gvb", "Gvb ", "Gvr", "Gvv"), class = "factor"), BV = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv",
"Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa",
"Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grv", "Gvb", "Gvr",
"Gvv", "R"), class = "factor"), VB = structure(c(1L, 1L,
7L, 7L, 18L, 18L, 1L, 1L, 10L, 10L, 21L, 21L, 21L, 1L, 21L,
21L, 21L, 21L, 21L, 1L), .Label = c("", "Caa", "Caj", "Cbb",
"Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv", "Cvb", "Cvv", "Gaa",
"Gaj", "Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grr", "Grv", "Gvb",
"Gvr", "Gvv"), class = "factor"), AJ = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 10L,
1L, 10L, 10L), .Label = c("", "Caa", "Caj", "Cbb", "Cbv",
"Cja", "Cjr", "Crj", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj",
"Gbb", "Gbv", "Gja", "Gjr", "Grj", "Grj ", "Grr", "Grv",
"Gvb", "Gvr", "Gvv"), class = "factor"), JA = structure(c(10L,
10L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 6L, 6L, 6L, 6L), .Label = c("", "Caa", "Caj", "Cbv",
"Cja", "Cjr", "Crr", "Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj",
"Gbv", "Gja", "Gjr", "Grr", "Grv", "Gvb", "Gvv"), class = "factor"),
VR = structure(c(1L, 5L, 5L, 5L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L), .Label = c("",
"Caa", "Caj", "Caj ", "Cbv", "Cja", "Cjr", "Crj", "Crr",
"Crv", "Cvb", "Cvr", "Cvv", "Gaa", "Gaj", "Gbv", "Gja", "Gjr",
"Grj", "Grr", "Grv", "Gvb", "Gvr", "Gvv"), class = "factor"),
RV = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 1L, 1L), .Label = c("",
"Caa", "Caj", "Cbb", "Cbv", "Cja", "Cjr", "Crj", "Crr", "Crv",
"Cvr", "Cvv", "Cvv ", "Gaa", "Gaj", "Gbb", "Gbv", "Gja",
"Gjr", "Grj", "Grr", "Grv", "Gvr", "Gvv"), class = "factor")), .Names = c("RR",
"AA", "BB", "VV", "RJ", "JR", "BV", "VB", "AJ", "JA", "VR", "RV"
), row.names = c(NA, 20L), class = "data.frame")
Run Code Online (Sandbox Code Playgroud)
如上所述,期望的矩阵将保持关系和行顺序.GSee提供了一个我可以应用的答案,但仅提供给我的矩阵的一列,因为[[仅选择特定条目并选择多个条目用[不起作用].我不确定我是否正朝着正确的方向前进......
根据实际数据集(如上所示),这是所需的输出(前三行)的样子:
structure(list(vv = structure(c(1L, 1L, 1L), .Label = "CRR", class = "factor"),
rv = c(NA, NA, NA), ja = structure(c(1L, 1L, 1L), .Label = "CVV", class = "factor"),
aa = structure(c(1L, 1L, 1L), .Label = "CAJ", class = "factor"),
bv = structure(c(1L, 2L, 2L), .Label = c("", "CVR"), class = "factor"),
aj = structure(c(1L, 2L, 2L), .Label = c("", "CRJ"), class = "factor"),
vb = structure(c(1L, 1L, 1L), .Label = "CAA", class = "factor"),
rj = structure(c(2L, 1L, 1L), .Label = c("", "CRJ"), class = "factor"),
rr = structure(c(1L, 1L, 1L), .Label = "CBB", class = "factor"),
vr = structure(c(1L, 1L, 1L), .Label = "CJA", class = "factor"),
bb = structure(c(1L, 1L, 1L), .Label = "CBV", class = "factor"),
jr = c(NA, NA, NA)), .Names = c("vv", "rv", "ja", "aa", "bv",
"aj", "vb", "rj", "rr", "vr", "bb", "jr"), class = "data.frame", row.names = c(NA,
-3L))
Run Code Online (Sandbox Code Playgroud)
我希望这更有意义.
这有点硬编码,但想法就在那里.
require(stringr)
require(plyr)
vect <- data.frame(RR=c("Cvv", "Cvv", "Caa"))
theMat <- t(adply(levels(vect$RR), .margins=1, .fun=function(x){str_extract(string=vect$RR, pattern=x)}))[-1 ,]
colnames(theMat) <- levels(vect$RR)
colnames(theMat) <- str_sub(colnames(theMat), start=2, end=3)
theMat <- str_replace(string=theMat, pattern=paste(colnames(theMat), collapse="|"), replacement="RR")
Run Code Online (Sandbox Code Playgroud)