我有一个数据框,其第一列(weights)包含一个列表(数据帧?):
> head(data$weights)
> data <- structure(list(A373R11 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.151631702143023, Signature.4 = 0.149799882118262,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0.0634912587993959,
Signature.8 = 0, Signature.9 = 0.173189155080817, Signature.10 = 0,
Signature.11 = 0, Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0, Signature.18 = 0,
Signature.19 = 0, Signature.20 = 0, Signature.21 = 0.0905517653558877,
Signature.R1 = 0, Signature.R2 = 0, Signature.R3 = 0, Signature.U1 = 0.155590748898003,
Signature.U2 = 0.145955461287919), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8", "Signature.9",
"Signature.10", "Signature.11", "Signature.12", "Signature.13",
"Signature.14", "Signature.15", "Signature.16", "Signature.17",
"Signature.18", "Signature.19", "Signature.20", "Signature.21",
"Signature.R1", "Signature.R2", "Signature.R3", "Signature.U1",
"Signature.U2"), row.names = "A373R11", class = "data.frame"),
A373R13 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.221014874027829, Signature.4 = 0,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0.279252211893692,
Signature.9 = 0, Signature.10 = 0, Signature.11 = 0,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0.115216422668955, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.0636987713225648, Signature.U1 = 0.108875099907467,
Signature.U2 = 0), .Names = c("Signature.1A", "Signature.1B",
"Signature.2", "Signature.3", "Signature.4", "Signature.5",
"Signature.6", "Signature.7", "Signature.8", "Signature.9",
"Signature.10", "Signature.11", "Signature.12", "Signature.13",
"Signature.14", "Signature.15", "Signature.16", "Signature.17",
"Signature.18", "Signature.19", "Signature.20", "Signature.21",
"Signature.R1", "Signature.R2", "Signature.R3", "Signature.U1",
"Signature.U2"), row.names = "A373R13", class = "data.frame"),
A373R3 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.0795605471131758, Signature.4 = 0.0973130562439999,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0.249674548796242,
Signature.9 = 0.0725013504411567, Signature.10 = 0, Signature.11 = 0.064665155855146,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.0703546703126821, Signature.U1 = 0.21753544296676,
Signature.U2 = 0.0739201832004727), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R3", class = "data.frame"),
A373R5 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.113996509522102, Signature.4 = 0.114874220936966,
Signature.5 = 0.142056872670519, Signature.6 = 0, Signature.7 = 0,
Signature.8 = 0.208376707959741, Signature.9 = 0.0744527503782136,
Signature.10 = 0, Signature.11 = 0, Signature.12 = 0,
Signature.13 = 0, Signature.14 = 0, Signature.15 = 0.0771902641012979,
Signature.16 = 0, Signature.17 = 0, Signature.18 = 0,
Signature.19 = 0, Signature.20 = 0, Signature.21 = 0,
Signature.R1 = 0, Signature.R2 = 0, Signature.R3 = 0,
Signature.U1 = 0.0673567355607731, Signature.U2 = 0), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R5", class = "data.frame"),
A373R9 = structure(list(Signature.1A = 0, Signature.1B = 0,
Signature.2 = 0, Signature.3 = 0.116847300193985, Signature.4 = 0,
Signature.5 = 0.21624751052703, Signature.6 = 0, Signature.7 = 0,
Signature.8 = 0.252498230882402, Signature.9 = 0, Signature.10 = 0,
Signature.11 = 0.119495912880994, Signature.12 = 0, Signature.13 = 0,
Signature.14 = 0, Signature.15 = 0, Signature.16 = 0,
Signature.17 = 0, Signature.18 = 0, Signature.19 = 0,
Signature.20 = 0, Signature.21 = 0, Signature.R1 = 0,
Signature.R2 = 0, Signature.R3 = 0.0725549911220892,
Signature.U1 = 0, Signature.U2 = 0), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A373R9", class = "data.frame"),
A512R19 = structure(list(Signature.1A = 0.109490572493859,
Signature.1B = 0, Signature.2 = 0, Signature.3 = 0, Signature.4 = 0.22010156823306,
Signature.5 = 0, Signature.6 = 0, Signature.7 = 0, Signature.8 = 0,
Signature.9 = 0, Signature.10 = 0, Signature.11 = 0,
Signature.12 = 0, Signature.13 = 0, Signature.14 = 0,
Signature.15 = 0, Signature.16 = 0, Signature.17 = 0,
Signature.18 = 0, Signature.19 = 0, Signature.20 = 0,
Signature.21 = 0, Signature.R1 = 0, Signature.R2 = 0,
Signature.R3 = 0.150943894106973, Signature.U1 = 0.248556502648564,
Signature.U2 = 0.119306892617062), .Names = c("Signature.1A",
"Signature.1B", "Signature.2", "Signature.3", "Signature.4",
"Signature.5", "Signature.6", "Signature.7", "Signature.8",
"Signature.9", "Signature.10", "Signature.11", "Signature.12",
"Signature.13", "Signature.14", "Signature.15", "Signature.16",
"Signature.17", "Signature.18", "Signature.19", "Signature.20",
"Signature.21", "Signature.R1", "Signature.R2", "Signature.R3",
"Signature.U1", "Signature.U2"), row.names = "A512R19", class = "data.frame")), .Names = c("A373R11",
"A373R13", "A373R3", "A373R5", "A373R9", "A512R19"))
Run Code Online (Sandbox Code Playgroud)
这里,每行包含一个样本,每列包含特定签名的分数:
> data[1]
$A373R11
Signature.1A Signature.1B Signature.2 Signature.3 Signature.4 Signature.5 Signature.6 Signature.7 Signature.8 Signature.9 Signature.10 Signature.11
A373R11 0 0 0 0.1516317 0.1497999 0 0 0.06349126 0 0.1731892 0 0
Signature.12 Signature.13 Signature.14 Signature.15 Signature.16 Signature.17 Signature.18 Signature.19 Signature.20 Signature.21 Signature.R1 Signature.R2
A373R11 0 0 0 0 0 0 0 0 0 0.09055177 0 0
Signature.R3 Signature.U1 Signature.U2
A373R11 0 0.1555907 0.1459555
Run Code Online (Sandbox Code Playgroud)
我想将其转换为具有以下结构的数据帧:
sample signature score
A373R11 Signature.1A 0
A373R11 Signature.1B 0
[...]
A373R13 Signature.1A 0
A373R13 Signature.1B 0
[...]
Run Code Online (Sandbox Code Playgroud)
谁能指出我正确的方向?
两种方法:
1)使用data.table-package
使用:
library(data.table)
melt(rbindlist(data, idcol = 'sample'),
id = 'sample', variable.name = 'signature', value.name = 'score')
Run Code Online (Sandbox Code Playgroud)
得到:
Run Code Online (Sandbox Code Playgroud)sample signature score 1: A373R11 Signature.1A 0.00000000 2: A373R13 Signature.1A 0.00000000 3: A373R3 Signature.1A 0.00000000 4: A373R5 Signature.1A 0.00000000 5: A373R9 Signature.1A 0.00000000 --- 158: A373R13 Signature.U2 0.00000000 159: A373R3 Signature.U2 0.07392018 160: A373R5 Signature.U2 0.00000000 161: A373R9 Signature.U2 0.00000000 162: A512R19 Signature.U2 0.11930689
2)基础R
使用:
dat2 <- do.call(rbind, dat)
reshape(dat2, idvar = 'sample', ids = row.names(dat2),
varying = list(1:ncol(dat2)), times = colnames(dat2),
timevar = 'signature', v.names = 'score',
new.row.names = NULL, direction = 'long')
Run Code Online (Sandbox Code Playgroud)
得到:
Run Code Online (Sandbox Code Playgroud)signature score sample A373R11.Signature.1A Signature.1A 0.00000000 A373R11 A373R13.Signature.1A Signature.1A 0.00000000 A373R13 A373R3.Signature.1A Signature.1A 0.00000000 A373R3 A373R5.Signature.1A Signature.1A 0.00000000 A373R5 A373R9.Signature.1A Signature.1A 0.00000000 A373R9 ..... A373R13.Signature.U2 Signature.U2 0.00000000 A373R13 A373R3.Signature.U2 Signature.U2 0.07392018 A373R3 A373R5.Signature.U2 Signature.U2 0.00000000 A373R5 A373R9.Signature.U2 Signature.U2 0.00000000 A373R9 A512R19.Signature.U2 Signature.U2 0.11930689 A512R19
注意:
最好不要为数据提供与函数相同的名称.见?data.
一个tidyverse解决方案,我们首先将所有data.frames连接在一起,然后gather根据需要使用它们重塑它们:
library(dplyr)
library(tidyr)
data %>%
bind_rows(.id = 'sample') %>%
gather(signature, score, -sample)
Run Code Online (Sandbox Code Playgroud)
得到:
Run Code Online (Sandbox Code Playgroud)sample signature score 1 A373R11 Signature.1A 0.00000000 2 A373R13 Signature.1A 0.00000000 3 A373R3 Signature.1A 0.00000000 4 A373R5 Signature.1A 0.00000000 5 A373R9 Signature.1A 0.00000000 6 A512R19 Signature.1A 0.10949057 7 A373R11 Signature.1B 0.00000000 8 A373R13 Signature.1B 0.00000000 9 A373R3 Signature.1B 0.00000000 10 A373R5 Signature.1B 0.00000000 ....
可以写成没有管道的单线程:
gather(bind_rows(data, .id = 'sample'), signature, score, -sample)
Run Code Online (Sandbox Code Playgroud)
这是一个基本的R替代品rapply.请注意,我已重命名您的data.frame数据.
# pull out the values and their attached names with rapply
myVec <- rapply(unname(dat), identity)
# even better:
# myVec <- unlist(unname(dat))
# construct the data.frame
mydf <- data.frame(sample=rep(names(dat), lengths(dat)),
signature=names(myVec),
score=myVec,
stringsAsFactors=FALSE, row.names = seq_along(myVec))
Run Code Online (Sandbox Code Playgroud)
这回来了
head(mydf)
sample signature score
1 A373R11 Signature.1A 0.0000000
2 A373R11 Signature.1B 0.0000000
3 A373R11 Signature.2 0.0000000
4 A373R11 Signature.3 0.1516317
5 A373R11 Signature.4 0.1497999
6 A373R11 Signature.5 0.0000000
Run Code Online (Sandbox Code Playgroud)
默认情况下,rapply对每个列表的最后元素执行一个函数,返回一个向量.我用来不identity加改变地返回这些元素.由于每个元素都有一个与之关联的名称,因此rapply返回一个命名向量.
我unname用来去掉每个外部列表项的名称.这使得下一步的数据构建变得更加容易.否则,命名向量将具有类似"A373R11.Signature.5"的名称,这将需要更多工作来返回所需结果.