使用 doParallel 在 R 中并行化 keras 模型

Fra*_*ani 6 foreach r doparallel keras tensorflow

我正在尝试使用 keras for R 集成多个神经网络。为此,我想通过使用“foreach”循环来并行化不同网络的训练。

models <- list()
x_bagged <- list()
y_bagged <- list()

n_nets = 2
bag_frac <-0.7
len <- nrow(x_train)

for(i in 1:n_nets){
    sam <- sample(len, floor(bag_frac*len), replace=FALSE)
    x_bagged[[i]] <- x_train[sam,]
    y_bagged[[i]] <- y_train[sam]

    models[[i]] <- keras_model_sequential() 

models[[i]] %>% 
  layer_dense(units = 100, input_shape = ncol(x_train), activation = "relu", kernel_initializer = 'glorot_normal') %>% 
  layer_batch_normalization() %>%
  layer_dense(units = 100, activation = custom_activation, kernel_initializer = 'glorot_normal') %>%
  layer_dense(units = 1, activation = 'linear', kernel_initializer = 'glorot_normal')


    models[[i]] %>% compile(
  loss = "MSE",
    optimizer= optimizer_sgd(lr=0.01)
    )
    }


library(foreach)
library(doParallel)
cl<-makeCluster(2)
registerDoParallel(cl)
nep <- 10

 foreach(i = 1:n_nets,.packages=c("keras")) %dopar% { 
         models[[i]] %>% keras::fit(
  x_bagged[[i]], y_bagged[[i]], 
  epochs = nep,
  validation_split = 0.1,
  batch_size =256,
  verbose=1
)
} 
stopCluster(cl)
Run Code Online (Sandbox Code Playgroud)

我使用 %do% 而不是 %dopar% 运行代码没有问题;但是,当我尝试在多个内核上同时安装网络时,出现以下错误:

{中的错误:任务 1 失败 - “'what' 必须是函数或字符串”回溯:

  1. foreach(i = 1:n_reti, .packages = c("keras")) %dopar% { . 模型[[i]] %>% keras::fit(x_bagged[[i]], y_bagged[[i]], .epochs
    = nep, validation_split = 0.1, batch_size = 256,
    .verbose = 1) 。}
  2. e$fun(obj,substitute(ex),parent.frame(),e$data)

有谁知道我如何克服这个错误?有没有其他方法可以并行化 R 上的模型训练?

先感谢您!

小智 4

虽然这个问题很老了,但我遇到了同样的问题,所以我在这里发布解决方案。问题是 Keras 模型对象在序列化之前无法传输给工作人员。一个快速的解决方法是在将模型发送给工作人员之前对模型进行序列化,然后在本地节点上对它们进行反序列化:

library(foreach)
library(doParallel)
cl<-makeCluster(2)
registerDoParallel(cl)
nep <- 10

# Serialize models before sending them to the workers
models_par <- lapply(models_par, keras::serialize_model)

# Now send the models, not just the indices
foreach(model = models_par,.packages=c("keras")) %dopar% { 

  # Unserialize locally
  model_local <- keras::unserialize_model(model)
  model_local %>% keras::fit(
    x_bagged[[i]], y_bagged[[i]], 
    epochs = nep,
    validation_split = 0.1,
    batch_size =256,
    verbose=1
  )

  # Serialize before sending back to master
  keras::serialize_model(model_local)
} 
stopCluster(cl)
Run Code Online (Sandbox Code Playgroud)