在同一个变异函数 dplyr 中使用滞后结果

Question

在同一个变异函数 dplyr 中使用滞后结果

Ank*_*ary 5 r data-analysis accumulate dplyr rolling-computation

我想使用dplyr+ 滞后函数复制以下公式 R。代码工作到每组的第二行，然后继续给我 0

预测 = 滞后（价值，1）*（1-滞后（损耗）/52）

状况：

预测的第一个值应该是空的，因为我们已经有了值。
第二行根据 Attrition 和 Value 列的先前值进行计算。
第三行之前的值应分别从预测（不是值列）和损耗列中选取。

我从第 3 行开始得到 0。下面是我的复制代码。

data <- data %>% group_by(Patch) %>% mutate(id = row_number())
data <- data %>% group_by(Patch) %>% mutate(forecast = lag(Value,1)*(1-lag(Attrition,1)/52))

tbl_df(data)
# A tibble: 12 x 6
   Patch Week       Value Attrition    id forecast
   <chr> <date>     <dbl>     <dbl> <int>    <dbl>
 1 11P11 2021-06-14     2     0.075     1   NA    
 2 11P11 2021-06-21     0     0.075     2    2.00 
 3 11P11 2021-06-28     0     0.075     3    0    
 4 11P12 2021-06-14     3     0.075     1   NA    
 5 11P12 2021-06-21     0     0.075     2    3.00 
 6 11P12 2021-06-28     0     0.075     3    0    
 7 11P12 2021-07-05     0     0.075     4    0    
 8 11P13 2021-06-14     1     0.075     1   NA    
 9 11P13 2021-06-21     0     0.075     2    0.999
10 11P13 2021-06-28     0     0.075     3    0    
11 11P13 2021-07-05     0     0.075     4    0    
12 11P13 2021-07-12     0     0.075     5    0   


> dput(data)
structure(list(Patch = c("11P11", "11P11", "11P11", "11P12", 
"11P12", "11P12", "11P12", "11P13", "11P13", "11P13", "11P13", 
"11P13"), Week = structure(c(18792, 18799, 18806, 18792, 18799, 
18806, 18813, 18792, 18799, 18806, 18813, 18820), class = "Date"), 
    Value = c(2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0), Attrition = c(0.075, 
    0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 
    0.075, 0.075), id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 
    3L, 4L, 5L), forecast = c(NA, 1.99711538461538, 0, NA, 2.99567307692308, 
    0, 0, NA, 0.998557692307692, 0, 0, 0)), row.names = c(NA, 
-12L), groups = structure(list(Patch = c("11P11", "11P12", "11P13"
), .rows = structure(list(1:3, 4:7, 8:12), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

Run Code Online (Sandbox Code Playgroud)

Answer 1

kti*_*tiu 1

棘手的是您需要连续构建forecast变量，这就是它在标准调用中不起作用的原因mutate()。

purrr这是我依赖于map()和进行数据整合的方法reduce()：

library(tidyverse)

data %>%
  mutate(forecast = NA) %>%
  split(~ Patch) %>%
  map(~ .x %>%
          pmap(~ tibble(...)) %>%
          reduce(\(.x, .y) {
            prev <- slice_tail(.x)
            base_value <- ifelse(prev$Value != 0, prev$Value, prev$forecast)
            bind_rows(.x,
                      mutate(.y,
                             forecast = base_value * 1 - prev$Attrition / 5))
          })) %>%
  reduce(bind_rows)

Run Code Online (Sandbox Code Playgroud)

返回：

# A tibble: 12 x 6
   Patch Week       Value Attrition    id forecast
   <chr> <date>     <dbl>     <dbl> <int>    <dbl>
 1 11P11 2021-06-14     2     0.075     1   NA
 2 11P11 2021-06-21     0     0.075     2    1.98
 3 11P11 2021-06-28     0     0.075     3    1.97
 4 11P12 2021-06-14     3     0.075     1   NA
 5 11P12 2021-06-21     0     0.075     2    2.98
 6 11P12 2021-06-28     0     0.075     3    2.97
 7 11P12 2021-07-05     0     0.075     4    2.95
 8 11P13 2021-06-14     1     0.075     1   NA
 9 11P13 2021-06-21     0     0.075     2    0.985
10 11P13 2021-06-28     0     0.075     3    0.97
11 11P13 2021-07-05     0     0.075     4    0.955
12 11P13 2021-07-12     0     0.075     5    0.94

Run Code Online (Sandbox Code Playgroud)

使用的数据：

data <- structure(list(Patch = c("11P11", "11P11", "11P11", "11P12", "11P12", "11P12", "11P12", "11P13", "11P13", "11P13", "11P13", "11P13"), Week = structure(c(18792, 18799, 18806, 18792, 18799, 18806, 18813, 18792, 18799, 18806, 18813, 18820), class = "Date"), Value = c(2, 0, 0, 3, 0, 0, 0, 1, 0, 0, 0, 0), Attrition = c(0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075, 0.075), id = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 5L), forecast = c(NA, 1.99711538461538, 0, NA, 2.99567307692308, 0, 0, NA, 0.998557692307692, 0, 0, 0)), row.names = c(NA, -12L), groups = structure(list(Patch = c("11P11", "11P12", "11P13"), .rows = structure(list(1:3, 4:7, 8:12), ptype = integer(0), class = c("vctrs_list_of", "vctrs_vctr", "list"))), row.names = c(NA, -3L), class = c("tbl_df", "tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", "tbl_df", "tbl", "data.frame"))

Run Code Online (Sandbox Code Playgroud)

归档时间：	4 年，11 月前
查看次数：	107 次
最近记录：	4 年，10 月前