Ank*_*ary 5 r data-analysis accumulate dplyr rolling-computation
我想使用dplyr+ 滞后函数复制以下公式 R。代码工作到每组的第二行,然后继续给我 0
预测 = 滞后(价值,1)*(1-滞后(损耗)/52)
状况:
我从第 3 行开始得到 0。下面是我的复制代码。
data <- data %>% group_by(Patch) %>% mutate(id = row_number())
data <- data %>% group_by(Patch) %>% mutate(forecast = lag(Value,1)*(1-lag(Attrition,1)/52))
tbl_df(data)
# A tibble: 12 x 6
Patch Week Value Attrition id forecast
<chr> <date> <dbl> <dbl> <int> <dbl>
1 11P11 2021-06-14 2 0.075 1 NA
2 11P11 2021-06-21 0 0.075 2 2.00
3 11P11 2021-06-28 0 0.075 3 0
4 11P12 2021-06-14 3 0.075 1 NA
5 11P12 2021-06-21 0 0.075 2 3.00
6 11P12 2021-06-28 0 0.075 3 0
7 11P12 2021-07-05 0 0.075 4 0
8 11P13 2021-06-14 1 0.075 1 NA
9 11P13 2021-06-21 0 0.075 2 0.999
10 11P13 2021-06-28 0 0.075 3 0
11 11P13 2021-07-05 0 0.075 4 0
12 11P13 2021-07-12 0 0.075 5 0
> dput(data)
structure(list(Patch = c("11P11", "11P11", "11P11", "11P12",
"11P12", "11P12", "11P12", "11P13", "11P13", "11P13", "11P13",
"11P13"), Week = structure(c(18792, 18799, 18806, 18792, 18799,
18806, 18813, 18792, 18799, 18806, 18813, 18820), class = "Date"),
Value = c(2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0), Attrition = c(0.075,
0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075,
0.075, 0.075), id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 5L), forecast = c(NA, 1.99711538461538, 0, NA, 2.99567307692308,
0, 0, NA, 0.998557692307692, 0, 0, 0)), row.names = c(NA,
-12L), groups = structure(list(Patch = c("11P11", "11P12", "11P13"
), .rows = structure(list(1:3, 4:7, 8:12), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Run Code Online (Sandbox Code Playgroud)
棘手的是您需要连续构建forecast变量,这就是它在标准调用中不起作用的原因mutate()。
purrr这是我依赖于map()和进行数据整合的方法reduce():
library(tidyverse)
data %>%
mutate(forecast = NA) %>%
split(~ Patch) %>%
map(~ .x %>%
pmap(~ tibble(...)) %>%
reduce(\(.x, .y) {
prev <- slice_tail(.x)
base_value <- ifelse(prev$Value != 0, prev$Value, prev$forecast)
bind_rows(.x,
mutate(.y,
forecast = base_value * 1 - prev$Attrition / 5))
})) %>%
reduce(bind_rows)
Run Code Online (Sandbox Code Playgroud)
返回:
# A tibble: 12 x 6
Patch Week Value Attrition id forecast
<chr> <date> <dbl> <dbl> <int> <dbl>
1 11P11 2021-06-14 2 0.075 1 NA
2 11P11 2021-06-21 0 0.075 2 1.98
3 11P11 2021-06-28 0 0.075 3 1.97
4 11P12 2021-06-14 3 0.075 1 NA
5 11P12 2021-06-21 0 0.075 2 2.98
6 11P12 2021-06-28 0 0.075 3 2.97
7 11P12 2021-07-05 0 0.075 4 2.95
8 11P13 2021-06-14 1 0.075 1 NA
9 11P13 2021-06-21 0 0.075 2 0.985
10 11P13 2021-06-28 0 0.075 3 0.97
11 11P13 2021-07-05 0 0.075 4 0.955
12 11P13 2021-07-12 0 0.075 5 0.94
Run Code Online (Sandbox Code Playgroud)
使用的数据:
data <- structure(list(Patch = c("11P11", "11P11", "11P11", "11P12", "11P12", "11P12", "11P12", "11P13", "11P13", "11P13", "11P13", "11P13"), Week = structure(c(18792, 18799, 18806, 18792, 18799, 18806, 18813, 18792, 18799, 18806, 18813, 18820), class = "Date"), Value = c(2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0), Attrition = c(0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075), id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L), forecast = c(NA, 1.99711538461538, 0, NA, 2.99567307692308, 0, 0, NA, 0.998557692307692, 0, 0, 0)), row.names = c(NA, -12L), groups = structure(list(Patch = c("11P11", "11P12", "11P13"), .rows = structure(list(1:3, 4:7, 8:12), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))
Run Code Online (Sandbox Code Playgroud)