对命名向量值求和,其中名称在 R 中颠倒

Ele*_*ino 23 r

我有一个命名向量列表。我正在尝试总结他们的价值观。但向量中的某些名称具有相反的等效名称。例如,如果我有一些如下所示的数据:

myList <- list(`1` = c('x1:x2' = 2, 'x2:x1' = 1, 'x3:x4' = 1),
               `2` = c('x1:x2' = 3, 'x6:x1' = 2, 'x1:x1' = 1, 'x4:x3' = 1),
               `3` = c('x3:x4' = 2, 'x1:x2' = 1, 'x4:x3' = 4),
               `4` = c('x5:x2' = 1, 'x2:x5' = 1)
               )
> myList
$`1`
x1:x2 x2:x1 x3:x4
    2     1     1

$`2`
x1:x2 x6:x1 x1:x1 x4:x3
    3     2     1     1

$`3`
x3:x4 x1:x2 x4:x3
    2     1     4

$`4`
x5:x2 x2:x5
    1     1
Run Code Online (Sandbox Code Playgroud)

在这里,我们可以看到myList[[1]]我们有x1:x2 = 2x2:x1 = 1。由于它们彼此相反,因此它们是等效的,因此本质上,x1:x2 = 3

我试图对每个列表元素上每个命名元素(包括相反)的值求和。

我想要的输出看起来像这样:

    var count listNo
1 x1:x2     3      1
2 x3:x4     1      1
3 x1:x2     3      2
4 x6:x1     2      2
5 x1:x1     1      2
6 x4:x3     1      2
7 x3:x4     6      3
8 x1:x2     1      3
9 x5:x2     2      4
Run Code Online (Sandbox Code Playgroud)

All*_*ron 11

这很棘手。我有兴趣看到更优雅的解决方案

`row.names<-`(do.call(rbind, Map(function(vec, name) {  
    x <- names(vec)
    l <- sapply(strsplit(x, ":"), function(y) {
      paste0("x", sort(as.numeric(sub("\\D", "", y))), collapse = ":")
      })
    df <- setNames(as.data.frame(table(rep(l, vec))), c("var", "count"))
    df$listNo <- name
    df
  }, vec = myList, name = names(myList))), NULL)

#>     var count listNo
#> 1 x1:x2     3      1
#> 2 x3:x4     1      1
#> 3 x1:x1     1      2
#> 4 x1:x2     3      2
#> 5 x1:x6     2      2
#> 6 x3:x4     1      2
#> 7 x1:x2     1      3
#> 8 x3:x4     6      3
#> 9 x2:x5     2      4
Run Code Online (Sandbox Code Playgroud)

由reprex 包于 2022 年 3 月 6 日创建(v2.0.1)


ben*_*n23 10

解决方案

\n

这是一个与以下内容混合的基本 R 方法dplyr::bind_rows()

\n
tmp <- lapply(1:length(myList), function(i) {\n  tapply(setNames(myList[[i]], \n                  sapply(strsplit(names(myList[[i]]), ":"), \n                         function(x) paste0(sort(x), collapse = ":"))), \n         sapply(strsplit(names(myList[[i]]), ":"), \n                function(x) paste0(sort(x), collapse = ":")), sum)\n})\n\nbind_rows(tmp, .id = "listNo") |> \n  pivot_longer(!listNo, names_to = "var", values_to = "count", values_drop_na = T)\n\n# A tibble: 9 x 3\n  listNo var   count\n  <chr>  <chr> <dbl>\n1 1      x1:x2     3\n2 1      x3:x4     1\n3 2      x1:x2     3\n4 2      x3:x4     1\n5 2      x1:x1     1\n6 2      x1:x6     2\n7 3      x1:x2     1\n8 3      x3:x4     6\n9 4      x2:x5     2\n
Run Code Online (Sandbox Code Playgroud)\n

微基准测试

\n

出于好奇,我运行microbenchmark了现有的答案,似乎 @ThomasIsCoding 的解决方案在时间上击败了 @AllanCameron ,成为最佳解决方案:

\n
microbenchmark::microbenchmark(\n  Allan = {\n    `row.names<-`(do.call(rbind, Map(function(vec, name) {  \n      x <- names(vec)\n      l <- sapply(strsplit(x, ":"), function(y) {\n        paste0("x", sort(as.numeric(sub("\\\\D", "", y))), collapse = ":")\n      })\n      df <- setNames(as.data.frame(table(rep(l, vec))), c("var", "count"))\n      df$listNo <- name\n      df\n    }, vec = myList, name = names(myList))), NULL)\n    },\n  benson23 = {\n    tmp <- lapply(1:length(myList), function(i) {\n      tapply(setNames(myList[[i]], \n                      sapply(strsplit(names(myList[[i]]), ":"), \n                             function(x) paste0(sort(x), collapse = ":"))), \n             sapply(strsplit(names(myList[[i]]), ":"), \n                    function(x) paste0(sort(x), collapse = ":")), sum)\n    })\n    \n    bind_rows(tmp, .id = "listNo") |> \n      pivot_longer(!listNo, names_to = "var", values_to = "count", values_drop_na = T)\n    \n  },\n  tmfmnk = {\n    map_dfr(myList, enframe, .id = "listNo") %>%\n      mutate(var = map_chr(str_split(name, ":"), ~ str_c(sort(.), collapse = ":"))) %>%\n      group_by(listNo, var) %>%\n      summarise(count = sum(value))\n  },\n  zephryl = {\n    tibble(count = myList, listNo = names(myList)) %>%\n      unnest_longer(count, indices_to = "var") %>% \n      mutate(\n        var = str_extract_all(var, "\\\\d+"),\n        var = map_chr(var, ~ str_glue("x{sort(.x)[[1]]}:x{sort(.x)[[2]]}"))\n      ) %>% \n      group_by(listNo, var) %>%\n      summarize(count = sum(count), .groups = "drop")\n  },\n  PaulS = {\n    map_dfr(myList, identity, .id = "listNo") %>%\n      pivot_longer(cols = -listNo, values_drop_na = T) %>% \n      rowwise %>%\n      mutate(name = str_split(name, ":", simplify = T) %>% sort %>% \n               str_c(collapse = ":")) %>% \n      group_by(name, listNo) %>% \n      summarise(count = sum(value), .groups = "drop") \n  },\n  TIC1 = {\n    aggregate(\n      count ~ .,\n      transform(\n        cbind(\n          setNames(do.call(rbind, Map(stack, myList)), c("count", "var")),\n          listNo = rep(seq_along(myList), lengths(myList))\n        ),\n        var = sapply(\n          strsplit(as.character(var), ":"),\n          function(x) paste0(sort(x), collapse = ":")\n        )\n      ),\n      sum\n    )\n  },\n  TIC2 = {\n    aggregate(\n      count ~ .,\n      cbind(\n        var = unlist(sapply(\n          myList,\n          function(x) {\n            sapply(\n              strsplit(names(x), ":"),\n              function(v) paste0(sort(v), collapse = ":")\n            )\n          }\n        )),\n        setNames(stack(myList), c("count", "listNo"))\n      ),\n      sum\n    )\n  },\n  Ma\xc3\xabl = {\n    myList %>% \n      imap(~ .x %>% \n             enframe() %>% \n             separate(name, into = c("c1", "c2")) %>% \n             graph.data.frame(., directed = F) %>% \n             get.data.frame() %>% \n             group_by(from, to) %>% \n             summarise(count = sum(value)) %>% \n             unite(c("from","to"), col = "var", sep = ":") %>% \n             mutate(listNo = .y)) %>%\n      bind_rows()\n  })\n\nUnit: milliseconds\n     expr     min       lq      mean   median       uq      max neval  cld\n    Allan  2.1327  2.25920  2.475978  2.33445  2.45270  12.3697   100 a   \n benson23  3.5083  3.80855  4.150929  4.03700  4.27685  13.3313   100 a   \n   tmfmnk  5.4928  5.88520  6.324940  6.24190  6.66975   8.1777   100 ab  \n  zephryl 10.1629 10.89110 14.813878 11.58475 12.14085 221.0931   100   c \n    PaulS  7.7565  8.44360 11.402325  9.10860  9.47480 124.1965   100  bc \n     TIC1  3.5233  3.88805  8.240207  4.06640  4.26765 202.9082   100 a c \n     TIC2  1.8722  2.03240  2.247993  2.13230  2.24045  10.7320   100 a   \n     Ma\xc3\xabl 35.3066 39.52920 44.456091 40.96870 42.39480 170.8322   100    d\n
Run Code Online (Sandbox Code Playgroud)\n


tmf*_*mnk 9

另一种tidyverse选择可能是:

map_dfr(myList, enframe, .id = "listNo") %>%
    mutate(var = map_chr(str_split(name, ":"), ~ str_c(sort(.), collapse = ":"))) %>%
    group_by(listNo, var) %>%
    summarise(count = sum(value))

  listNo var   count
  <chr>  <chr> <dbl>
1 1      x1:x2     3
2 1      x3:x4     1
3 2      x1:x1     1
4 2      x1:x2     3
5 2      x1:x6     2
6 2      x3:x4     1
7 3      x1:x2     1
8 3      x3:x4     6
9 4      x2:x5     2
Run Code Online (Sandbox Code Playgroud)


Pau*_*ulS 8

另一种可能的解决方案,tidyverse基于:

\n
library(tidyverse)\n\nmap_dfr(myList, identity, .id = "listNo") %>%\n  pivot_longer(cols = -listNo, values_drop_na = T) %>% \n  rowwise %>%\n  mutate(name = str_split(name, ":", simplify = T) %>% sort %>% \n         str_c(collapse = ":")) %>% \n  group_by(name, listNo) %>% \n  summarise(count = sum(value), .groups = "drop") \n\n#> # A tibble: 9 \xc3\x97 3\n#>   name  listNo count\n#>   <chr> <chr>  <dbl>\n#> 1 x1:x1 2          1\n#> 2 x1:x2 1          3\n#> 3 x1:x2 2          3\n#> 4 x1:x2 3          1\n#> 5 x1:x6 2          2\n#> 6 x2:x5 4          2\n#> 7 x3:x4 1          1\n#> 8 x3:x4 2          1\n#> 9 x3:x4 3          6\n
Run Code Online (Sandbox Code Playgroud)\n


zep*_*ryl 8

一个 {tidyverse} 解决方案:

library(tidyverse)
               
tibble(count = myList, listNo = names(myList)) %>%
  unnest_longer(count, indices_to = "var") %>% 
  mutate(
    var = str_extract_all(var, "\\d+"),
    var = map_chr(var, ~ str_glue("x{sort(.x)[[1]]}:x{sort(.x)[[2]]}"))
  ) %>% 
  group_by(listNo, var) %>%
  summarize(count = sum(count), .groups = "drop")

# # A tibble: 9 x 3
#   listNo var   count
#   <chr>  <chr> <dbl>
# 1 1      x1:x2     3
# 2 1      x3:x4     1
# 3 2      x1:x1     1
# 4 2      x1:x2     3
# 5 2      x1:x6     2
# 6 2      x3:x4     1
# 7 3      x1:x2     1
# 8 3      x3:x4     6
# 9 4      x2:x5     2
Run Code Online (Sandbox Code Playgroud)