Furrr / purrr 进度条与计算进度根本不同步

Fre*_*son 5 r pmap purrr furrr

我想创建一个函数,它接受一个函数并对 a 中的每一行应用一次,tibble参数存储在 a 的相应命名列中tibble 我意识到这听起来有点奇怪,但我希望面向用户的函数/功能简单。

在大多数情况下,处理会花费很多时间,所以我真的更喜欢有进度条功能,这就是我发现很大麻烦的地方:

此代码有效(然后没有进度条):

library(tibble)
library(dplyr)
library(purrr)
library(furrr)
library(tidyr)
library(wrassp)
library(progressr)

xf <- function(x,trim,na.rm,ds="ded"){
  return(x*trim*na.rm)
}

xf2 <- function(x,trim,na.rm,ds="ded"){
  return(list("a"=x,"b"=trim))
}

xf3 <- function(x,trim,na.rm,ds="ded"){
  return(data.frame("a"=x,"b"=trim))
}

mymap <- function(f,...){
  plan(multisession)
  exDF <- tribble(
    ~x, ~trim, ~na.rm, ~notarg, ~listOfFiles, ~toFile,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE, 
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE
  )

  dotArgs <- list(...)
  dotArgsRT <- as_tibble_row(dotArgs)

  dotArgsNames <- names(dotArgs)
  
  allArgsNames <- formalArgs(f)
  
  exDF %>% 
    select(-any_of(!!dotArgsNames)) %>%
    bind_cols(dotArgsRT) %>% 
    select(any_of(allArgsNames)) %>%
    rowwise() %>% 
    mutate(temp = list(future_pmap(.,.f=f,.progress=FALSE))) %>% 
    tibble::rownames_to_column(var = "sl_rowIdx") %>% 
    mutate(out = list(map(temp,as_tibble))) %>%
    select(-temp) %>% 
    unnest(out) %>%
    unnest(out)
}

mymap(xf,c=20,a=20,ds=1) 
mymap(xf2,c=20,a=20,ds=1)
mymap(xf3,c=20,a=20,ds=1)
Run Code Online (Sandbox Code Playgroud)

这段代码是有效的(抱歉扩展示例,但我想强制显示进度条):

library(tibble)
library(dplyr)
library(purrr)
library(furrr)
library(tidyr)
library(wrassp)
library(progressr)

xf <- function(x,trim,na.rm,ds="ded"){
  return(x*trim*na.rm)
}

mymap <- function(f,...){
  plan(multicore)
  exDF <- tribble(
    ~x, ~trim, ~na.rm, ~notarg, ~listOfFiles, ~toFile,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE, 
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE
  )
  
  exDF <- exDF %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) 
  exDF <- exDF   %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF) %>% 
    bind_rows(exDF)
  
  dotArgs <- list(...)
  dotArgsRT <- as_tibble_row(dotArgs)
  
  dotArgsNames <- names(dotArgs)
  
  allArgsNames <- formalArgs(f)

  p <- progressr::progressor(steps = nrow(exDF))

  pWrap <- function(fun=f,...){
    
    iDA <- list(...)
    p(message="processing")
    #Sys.sleep(0.1)
    do.call(fun,iDA)
    
  }

 out <- exDF %>% 
    select(-any_of(!!dotArgsNames)) %>%
    bind_cols(dotArgsRT) %>% 
    select(any_of(allArgsNames)) %>%
    rowwise() %>% 
    mutate(temp = list(future_pmap(.,.f=pWrap))) %>% 
    tibble::rownames_to_column(var = "sl_rowIdx") %>% 
    mutate(out = list(map(temp,as_tibble))) %>%
    select(-temp) %>% 
    unnest(out) %>%
    unnest(out)

    return(out)


}

mymap(xf,c=20,a=20,ds=1)
Run Code Online (Sandbox Code Playgroud)

但是,如果我这样调用该函数,则不会显示进度条,但前提是我这样调用它:

with_progress(mymap(xf,c=20,a=20,ds=1))
Run Code Online (Sandbox Code Playgroud)

并且,进度条很快出现,然后消失,然后函数处理数据一段时间,然后返回数据。

因此,进度条并不能真正向用户提供功能的整体进展信息。

我想这与在预期返回值时评估 dplyr 调用有关?

但是,我如何强制进度条与该过程同步呢?

我尝试使用 justpmap而不是来future_pmap解决尚未解决的值的潜在问题,但这似乎不是问题。

我感谢我能得到的所有帮助。

Nir*_*ham 2

library(tidyverse)
library(furrr)
library(progressr)

xf <- function(x,trim,na.rm,ds="ded"){
  Sys.sleep(sample.int(20,1)/10)
  return(x*trim*na.rm)
}

mymap <- function(f,...){
  handlers(global = TRUE)
  
  plan(multisession)
  exDF <- tribble(
    ~x, ~trim, ~na.rm, ~notarg, ~listOfFiles, ~toFile,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE, 
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE,
    0.5, 0, TRUE, 11.2, "~/Desktop/a1.wav", FALSE,
    0.4, 0.5, TRUE, 12, "~/Desktop/a1.wav", FALSE
  )
  
  dotArgs <- list(...)
  dotArgsRT <- as_tibble_row(dotArgs)
  
  dotArgsNames <- names(dotArgs)
  
  allArgsNames <- formalArgs(f)
  
   
  setupdf <-   exDF %>% 
    select(-any_of(!!dotArgsNames)) %>%
    bind_cols(dotArgsRT) %>% 
    select(any_of(c(allArgsNames,"id"))) %>%
    rowwise() 
  
  num_to_do <- nrow(setupdf)
  cat("\nWill be doing ",num_to_do,"\n")
  p <- progressor(num_to_do)

  
  f2 <- function(...){
    result <- f(...)
    p()
    result
  }
    part1 <- setupdf%>% 
    mutate(temp = list(pmap(cur_data(),.f=f2))) 
  
    print("calculationa are complete ")
    
    part1 %>%
    tibble::rownames_to_column(var = "sl_rowIdx") %>% 
    mutate(out = list(map(temp,as_tibble))) %>%
    select(-temp) %>% 
    unnest(out) %>%
    unnest(out)
}



mymap(xf,c=20,a=20,ds=1) 
Run Code Online (Sandbox Code Playgroud)