我有一个命名向量列表。我正在尝试总结他们的价值观。但向量中的某些名称具有相反的等效名称。例如,如果我有一些如下所示的数据:
myList <- list(`1` = c('x1:x2' = 2, 'x2:x1' = 1, 'x3:x4' = 1),
`2` = c('x1:x2' = 3, 'x6:x1' = 2, 'x1:x1' = 1, 'x4:x3' = 1),
`3` = c('x3:x4' = 2, 'x1:x2' = 1, 'x4:x3' = 4),
`4` = c('x5:x2' = 1, 'x2:x5' = 1)
)
> myList
$`1`
x1:x2 x2:x1 x3:x4
2 1 1
$`2`
x1:x2 x6:x1 x1:x1 x4:x3
3 2 1 1
$`3`
x3:x4 x1:x2 x4:x3
2 1 4
$`4`
x5:x2 x2:x5
1 1
Run Code Online (Sandbox Code Playgroud)
在这里,我们可以看到myList[[1]]我们有x1:x2 = 2和x2:x1 = 1。由于它们彼此相反,因此它们是等效的,因此本质上,x1:x2 = 3。
我试图对每个列表元素上每个命名元素(包括相反)的值求和。
我想要的输出看起来像这样:
var count listNo
1 x1:x2 3 1
2 x3:x4 1 1
3 x1:x2 3 2
4 x6:x1 2 2
5 x1:x1 1 2
6 x4:x3 1 2
7 x3:x4 6 3
8 x1:x2 1 3
9 x5:x2 2 4
Run Code Online (Sandbox Code Playgroud)
All*_*ron 11
这很棘手。我有兴趣看到更优雅的解决方案
`row.names<-`(do.call(rbind, Map(function(vec, name) {
x <- names(vec)
l <- sapply(strsplit(x, ":"), function(y) {
paste0("x", sort(as.numeric(sub("\\D", "", y))), collapse = ":")
})
df <- setNames(as.data.frame(table(rep(l, vec))), c("var", "count"))
df$listNo <- name
df
}, vec = myList, name = names(myList))), NULL)
#> var count listNo
#> 1 x1:x2 3 1
#> 2 x3:x4 1 1
#> 3 x1:x1 1 2
#> 4 x1:x2 3 2
#> 5 x1:x6 2 2
#> 6 x3:x4 1 2
#> 7 x1:x2 1 3
#> 8 x3:x4 6 3
#> 9 x2:x5 2 4
Run Code Online (Sandbox Code Playgroud)
由reprex 包于 2022 年 3 月 6 日创建(v2.0.1)
ben*_*n23 10
这是一个与以下内容混合的基本 R 方法dplyr::bind_rows():
tmp <- lapply(1:length(myList), function(i) {\n tapply(setNames(myList[[i]], \n sapply(strsplit(names(myList[[i]]), ":"), \n function(x) paste0(sort(x), collapse = ":"))), \n sapply(strsplit(names(myList[[i]]), ":"), \n function(x) paste0(sort(x), collapse = ":")), sum)\n})\n\nbind_rows(tmp, .id = "listNo") |> \n pivot_longer(!listNo, names_to = "var", values_to = "count", values_drop_na = T)\n\n# A tibble: 9 x 3\n listNo var count\n <chr> <chr> <dbl>\n1 1 x1:x2 3\n2 1 x3:x4 1\n3 2 x1:x2 3\n4 2 x3:x4 1\n5 2 x1:x1 1\n6 2 x1:x6 2\n7 3 x1:x2 1\n8 3 x3:x4 6\n9 4 x2:x5 2\nRun Code Online (Sandbox Code Playgroud)\n出于好奇,我运行microbenchmark了现有的答案,似乎 @ThomasIsCoding 的解决方案在时间上击败了 @AllanCameron ,成为最佳解决方案:
microbenchmark::microbenchmark(\n Allan = {\n `row.names<-`(do.call(rbind, Map(function(vec, name) { \n x <- names(vec)\n l <- sapply(strsplit(x, ":"), function(y) {\n paste0("x", sort(as.numeric(sub("\\\\D", "", y))), collapse = ":")\n })\n df <- setNames(as.data.frame(table(rep(l, vec))), c("var", "count"))\n df$listNo <- name\n df\n }, vec = myList, name = names(myList))), NULL)\n },\n benson23 = {\n tmp <- lapply(1:length(myList), function(i) {\n tapply(setNames(myList[[i]], \n sapply(strsplit(names(myList[[i]]), ":"), \n function(x) paste0(sort(x), collapse = ":"))), \n sapply(strsplit(names(myList[[i]]), ":"), \n function(x) paste0(sort(x), collapse = ":")), sum)\n })\n \n bind_rows(tmp, .id = "listNo") |> \n pivot_longer(!listNo, names_to = "var", values_to = "count", values_drop_na = T)\n \n },\n tmfmnk = {\n map_dfr(myList, enframe, .id = "listNo") %>%\n mutate(var = map_chr(str_split(name, ":"), ~ str_c(sort(.), collapse = ":"))) %>%\n group_by(listNo, var) %>%\n summarise(count = sum(value))\n },\n zephryl = {\n tibble(count = myList, listNo = names(myList)) %>%\n unnest_longer(count, indices_to = "var") %>% \n mutate(\n var = str_extract_all(var, "\\\\d+"),\n var = map_chr(var, ~ str_glue("x{sort(.x)[[1]]}:x{sort(.x)[[2]]}"))\n ) %>% \n group_by(listNo, var) %>%\n summarize(count = sum(count), .groups = "drop")\n },\n PaulS = {\n map_dfr(myList, identity, .id = "listNo") %>%\n pivot_longer(cols = -listNo, values_drop_na = T) %>% \n rowwise %>%\n mutate(name = str_split(name, ":", simplify = T) %>% sort %>% \n str_c(collapse = ":")) %>% \n group_by(name, listNo) %>% \n summarise(count = sum(value), .groups = "drop") \n },\n TIC1 = {\n aggregate(\n count ~ .,\n transform(\n cbind(\n setNames(do.call(rbind, Map(stack, myList)), c("count", "var")),\n listNo = rep(seq_along(myList), lengths(myList))\n ),\n var = sapply(\n strsplit(as.character(var), ":"),\n function(x) paste0(sort(x), collapse = ":")\n )\n ),\n sum\n )\n },\n TIC2 = {\n aggregate(\n count ~ .,\n cbind(\n var = unlist(sapply(\n myList,\n function(x) {\n sapply(\n strsplit(names(x), ":"),\n function(v) paste0(sort(v), collapse = ":")\n )\n }\n )),\n setNames(stack(myList), c("count", "listNo"))\n ),\n sum\n )\n },\n Ma\xc3\xabl = {\n myList %>% \n imap(~ .x %>% \n enframe() %>% \n separate(name, into = c("c1", "c2")) %>% \n graph.data.frame(., directed = F) %>% \n get.data.frame() %>% \n group_by(from, to) %>% \n summarise(count = sum(value)) %>% \n unite(c("from","to"), col = "var", sep = ":") %>% \n mutate(listNo = .y)) %>%\n bind_rows()\n })\n\nUnit: milliseconds\n expr min lq mean median uq max neval cld\n Allan 2.1327 2.25920 2.475978 2.33445 2.45270 12.3697 100 a \n benson23 3.5083 3.80855 4.150929 4.03700 4.27685 13.3313 100 a \n tmfmnk 5.4928 5.88520 6.324940 6.24190 6.66975 8.1777 100 ab \n zephryl 10.1629 10.89110 14.813878 11.58475 12.14085 221.0931 100 c \n PaulS 7.7565 8.44360 11.402325 9.10860 9.47480 124.1965 100 bc \n TIC1 3.5233 3.88805 8.240207 4.06640 4.26765 202.9082 100 a c \n TIC2 1.8722 2.03240 2.247993 2.13230 2.24045 10.7320 100 a \n Ma\xc3\xabl 35.3066 39.52920 44.456091 40.96870 42.39480 170.8322 100 d\nRun Code Online (Sandbox Code Playgroud)\n
另一种tidyverse选择可能是:
map_dfr(myList, enframe, .id = "listNo") %>%
mutate(var = map_chr(str_split(name, ":"), ~ str_c(sort(.), collapse = ":"))) %>%
group_by(listNo, var) %>%
summarise(count = sum(value))
listNo var count
<chr> <chr> <dbl>
1 1 x1:x2 3
2 1 x3:x4 1
3 2 x1:x1 1
4 2 x1:x2 3
5 2 x1:x6 2
6 2 x3:x4 1
7 3 x1:x2 1
8 3 x3:x4 6
9 4 x2:x5 2
Run Code Online (Sandbox Code Playgroud)
另一种可能的解决方案,tidyverse基于:
library(tidyverse)\n\nmap_dfr(myList, identity, .id = "listNo") %>%\n pivot_longer(cols = -listNo, values_drop_na = T) %>% \n rowwise %>%\n mutate(name = str_split(name, ":", simplify = T) %>% sort %>% \n str_c(collapse = ":")) %>% \n group_by(name, listNo) %>% \n summarise(count = sum(value), .groups = "drop") \n\n#> # A tibble: 9 \xc3\x97 3\n#> name listNo count\n#> <chr> <chr> <dbl>\n#> 1 x1:x1 2 1\n#> 2 x1:x2 1 3\n#> 3 x1:x2 2 3\n#> 4 x1:x2 3 1\n#> 5 x1:x6 2 2\n#> 6 x2:x5 4 2\n#> 7 x3:x4 1 1\n#> 8 x3:x4 2 1\n#> 9 x3:x4 3 6\nRun Code Online (Sandbox Code Playgroud)\n
一个 {tidyverse} 解决方案:
library(tidyverse)
tibble(count = myList, listNo = names(myList)) %>%
unnest_longer(count, indices_to = "var") %>%
mutate(
var = str_extract_all(var, "\\d+"),
var = map_chr(var, ~ str_glue("x{sort(.x)[[1]]}:x{sort(.x)[[2]]}"))
) %>%
group_by(listNo, var) %>%
summarize(count = sum(count), .groups = "drop")
# # A tibble: 9 x 3
# listNo var count
# <chr> <chr> <dbl>
# 1 1 x1:x2 3
# 2 1 x3:x4 1
# 3 2 x1:x1 1
# 4 2 x1:x2 3
# 5 2 x1:x6 2
# 6 2 x3:x4 1
# 7 3 x1:x2 1
# 8 3 x3:x4 6
# 9 4 x2:x5 2
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
1222 次 |
| 最近记录: |