我正在尝试对多个变量进行 t 检验。假设我想分组am,然后我想看看是否mpg有统计上的差异vs
这是一个旧答案,summarize_each但我正在尝试使用acrossdplyr 包中的内容。
library(tidyverse)
library(broom)
mtcars %>%
group_by(am) %>%
summarise_each(funs(
t.test(.[vs == 0], .[vs == 1])$p.value,
t.test(.[vs == 0], .[vs == 1])$conf.int[1],
t.test(.[vs == 0], .[vs == 1])$conf.int[2]
),
vars = mpg)
#> Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
#> Please use `across()` instead.
#> Warning: `funs()` was deprecated in dplyr 0.8.0.
#> Please use a list of either functions or lambdas:
#>
#> # Simple named list:
#> list(mean = mean, median = median)
#>
#> # Auto named with `tibble::lst()`:
#> tibble::lst(mean, median)
#>
#> # Using lambdas
#> list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
#> # A tibble: 2 x 4
#> am `vars_$` `vars_[..2` `vars_[..3`
#> <dbl> <dbl> <dbl> <dbl>
#> 1 0 0.000395 -8.33 -3.05
#> 2 1 0.00459 -14.0 -3.27
## clean names via broom
t.test(mtcars %>% filter(am == 0) %>% filter(vs == 0) %>% pull(mpg), mtcars %>% filter(am == 0) %>% filter(vs == 1)%>% pull(mpg)) %>% broom::tidy()
#> # A tibble: 1 x 10
#> estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -5.69 15.0 20.7 -4.63 0.000395 14.0 -8.33 -3.05
#> # ... with 2 more variables: method <chr>, alternative <chr>
t.test(mtcars %>% filter(am == 1) %>% filter(vs == 0) %>% pull(mpg), mtcars %>% filter(am == 1) %>% filter(vs == 1) %>% pull(mpg)) %>% broom::tidy()
#> # A tibble: 1 x 10
#> estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high
#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 -8.62 19.8 28.4 -3.55 0.00459 11.0 -14.0 -3.27
#> # ... with 2 more variables: method <chr>, alternative <chr>
## how to pass functions into .fns??
mtcars %>%
group_by(am) %>%
summarise(across(
.cols = mpg,
.fns = list(
t.test(.[vs == 0], .[vs == 1])$p.value,
t.test(.[vs == 0], .[vs == 1])$conf.int[1],
t.test(.[vs == 0], .[vs == 1])$conf.int[2]
)
))
#> Error: Problem with `summarise()` input `..1`.
#> i `..1 = across(...)`.
#> x Must subset columns with a valid subscript vector.
#> i Logical subscripts must match the size of the indexed input.
#> x Input has size 11 but subscript `i` has size 19.
#> i The error occurred in group 1: am = 0.
Run Code Online (Sandbox Code Playgroud)
由reprex 包(v2.0.1)创建于 2021-09-23
如果我们正在使用tidy
library(dplyr)
library(broom)
library(tidyr)
mtcars %>%
group_by(am) %>%
summarise(across(
.cols = mpg,
~ list(tidy(t.test(.[vs == 0], .[vs == 1])) %>%
select(p.value, conf.low, conf.high))
)) %>%
unnest(mpg)
Run Code Online (Sandbox Code Playgroud)
-输出
# A tibble: 2 x 4
am p.value conf.low conf.high
<dbl> <dbl> <dbl> <dbl>
1 0 0.000395 -8.33 -3.05
2 1 0.00459 -14.0 -3.27
Run Code Online (Sandbox Code Playgroud)
在OP的代码中,我们需要 lambda 函数list
mtcars %>%
group_by(am) %>%
summarise(across(
.cols = mpg,
.fns = list(
p.value = ~ t.test(.[vs == 0], .[vs == 1])$p.value,
conf.low = ~ t.test(.[vs == 0], .[vs == 1])$conf.int[1],
conf.high =~ t.test(.[vs == 0], .[vs == 1])$conf.int[2]
)
))
Run Code Online (Sandbox Code Playgroud)
-输出
# A tibble: 2 x 4
am mpg_p.value mpg_conf.low mpg_conf.high
<dbl> <dbl> <dbl> <dbl>
1 0 0.000395 -8.33 -3.05
2 1 0.00459 -14.0 -3.27
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
378 次 |
| 最近记录: |