library(tidyverse)
stats <- read_csv('stats.csv')
## Warning: Installed Rcpp (0.12.12) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
Run Code Online (Sandbox Code Playgroud)
我很确定在更新之前我有相同的行为Rcpp.
sessionInfo()
## R version 3.3.2 (2016-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## locale:
## [1] en_AU.UTF-8/en_AU.UTF-8/en_AU.UTF-8/C/en_AU.UTF-8/en_AU.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] dplyr_0.7.1 purrr_0.2.2.2 readr_1.1.1 tidyr_0.6.3
## [5] tibble_1.3.3 ggplot2_2.2.1 tidyverse_1.1.1
##
## loaded via a namespace (and not attached):
## [1] Rcpp_0.12.12 cellranger_1.1.0 plyr_1.8.4 bindr_0.1
## [5] forcats_0.2.0 tools_3.3.2 digest_0.6.12 lubridate_1.6.0
## [9] jsonlite_1.5 evaluate_0.10.1 nlme_3.1-131 gtable_0.2.0
## [13] lattice_0.20-35 pkgconfig_2.0.1 rlang_0.1.1 psych_1.7.5
## [17] yaml_2.1.14 parallel_3.3.2 haven_1.1.0 bindrcpp_0.2
## [21] xml2_1.1.1 httr_1.2.1 stringr_1.2.0 knitr_1.16
## [25] hms_0.3 rprojroot_1.2 grid_3.3.2 glue_1.1.1
## [29] R6_2.2.2 readxl_1.0.0 foreign_0.8-69 rmarkdown_1.6
## [33] modelr_0.1.0 reshape2_1.4.2 magrittr_1.5 backports_1.1.0
## [37] scales_0.4.1 htmltools_0.3.6 rvest_0.3.2 assertthat_0.2.0
## [41] mnormt_1.5-5 colorspace_1.3-2 stringi_1.1.5 lazyeval_0.2.0
## [45] munsell_0.4.3 broom_0.4.2
Run Code Online (Sandbox Code Playgroud)
filter和invoke_map执行组聚合test <- function(impl, size) {
stats %>%
filter(message.size==size & implementation==impl) %>%
select(ts.in, ts.out) %>%
summarise(begin=min(ts.in),
end=max(ts.out),
process.time=end - begin,
message.rate=size * 10000/as.double(process.time)/1024/1024)
}
invoke_map_df(test, crossing(impl=c('Camel', 'Spark'), size=c(1024, 1024*5, 1024*10)) %>% transpose())
## # A tibble: 6 x 4
## begin end process.time message.rate
## <dttm> <dttm> <time> <dbl>
## 1 2017-07-17 04:27:52 2017-07-17 04:28:13 21 secs 0.4650298
## 2 2017-07-17 04:30:25 2017-07-17 04:32:02 97 secs 30.2029639
## 3 2017-07-17 04:32:58 2017-07-17 04:36:17 199 secs 29.4440955
## 4 2017-07-17 04:18:31 2017-07-17 04:18:54 23 secs 0.4245924
## 5 2017-07-17 04:19:47 2017-07-17 04:21:29 102 secs 28.7224265
## 6 2017-07-17 04:23:10 2017-07-17 04:26:28 198 secs 29.5928030
Run Code Online (Sandbox Code Playgroud)
group_by和summarisestats %>%
group_by(implementation, message.size) %>%
summarise(total.size=sum(message.size),
begin=min(ts.in),
end=max(ts.out),
duration=end-begin,
message.rate=total.size/as.numeric(duration)/1024/1024) %>%
ungroup() %>%
select(begin, end, duration, message.rate)
## # A tibble: 6 x 4
## begin end duration message.rate
## <dttm> <dttm> <time> <dbl>
## 1 2017-07-17 04:27:52 2017-07-17 04:28:13 21.000000 secs 0.4650298
## 2 2017-07-17 04:30:25 2017-07-17 04:32:02 1.616667 secs 30.2029639
## 3 2017-07-17 04:32:58 2017-07-17 04:36:17 3.316667 secs 29.4440955
## 4 2017-07-17 04:18:31 2017-07-17 04:18:54 23.000000 secs 0.4245924
## 5 2017-07-17 04:19:47 2017-07-17 04:21:29 1.700000 secs 28.7224265
## 6 2017-07-17 04:23:10 2017-07-17 04:26:28 3.300000 secs 29.5928030
Run Code Online (Sandbox Code Playgroud)
由于某种原因,process.time计算不正确,但message.rate取决于它是否正确!我在这里做错了吗?
group_by和dostats %>%
group_by(implementation, message.size) %>%
do(tibble(total.size=sum(.$message.size),
begin=min(.$ts.in),
end=max(.$ts.out),
duration=end-begin,
message.rate=total.size/as.numeric(duration)/1024/1024)) %>%
ungroup() %>%
select(begin, end, duration, message.rate)
## # A tibble: 6 x 4
## begin end duration message.rate
## <dttm> <dttm> <time> <dbl>
## 1 2017-07-17 04:27:52 2017-07-17 04:28:13 21 secs 0.4650298
## 2 2017-07-17 04:30:25 2017-07-17 04:32:02 97 secs 30.2029639
## 3 2017-07-17 04:32:58 2017-07-17 04:36:17 199 secs 29.4440955
## 4 2017-07-17 04:18:31 2017-07-17 04:18:54 23 secs 0.4245924
## 5 2017-07-17 04:19:47 2017-07-17 04:21:29 102 secs 28.7224265
## 6 2017-07-17 04:23:10 2017-07-17 04:26:28 198 secs 29.5928030
Run Code Online (Sandbox Code Playgroud)
行为匹配filter和invoke_map组合.
我本来想发表评论,但显然这需要 50 个代表点。我将 R 更新到 3.3.3,将 RStudio 更新到 1.0.143(在 OS X Yosemite 上),然后今天早些时候更新了所有软件包。在加载 dplyr 时开始出现相同的错误,但在前面:
警告消息:安装的 Rcpp (0.12.12) 与用于构建 dplyr (0.12.11) 的 Rcpp 不同。请重新安装 dplyr 以避免随机崩溃或未定义的行为。
重新安装 dplyr 两次并没有使错误消失 - 所以我想我会等待更新的 dplyr (现在是 0.7.1)并祈祷同时我只会遇到随机崩溃,而不是未定义的行为。