R - 三元函数的矢量化实现

use*_*195 5 r vectorization

我有三个矢量X,Y并且Z长度相等n.我需要创建n x n x n一个函数数组f(X[i],Y[j],Z[k]).直接的方法是顺序循环遍历3个向量中每个向量的每个元素.但是,计算阵列所需的时间呈指数增长n.有没有办法使用矢量化操作来实现它?

编辑:正如评论中所提到的,我添加了一个简单的例子来说明需要什么.

set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)

F = array(0, dim=c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
  for (j in 1:length(Y))
    for (k in 1:length(Z))
      F[i,j,k] = X[i] * (Y[j] + Z[k])
Run Code Online (Sandbox Code Playgroud)

谢谢.

Jul*_*rre 6

您可以使用嵌套outer:

set.seed(1)
X = rnorm(10)
Y = seq(11,20)
Z = seq(21,30)

F = array(0, dim = c( length(X),length(Y),length(Z) ) )
for (i in 1:length(X))
  for (j in 1:length(Y))
    for (k in 1:length(Z))
      F[i,j,k] = X[i] * (Y[j] + Z[k])

F2 <- outer(X, outer(Y, Z, "+"), "*")

> identical(F, F2)
[1] TRUE
Run Code Online (Sandbox Code Playgroud)

包含expand.gridNick K提出的解决方案的微基准测试:

X = rnorm(100)
Y = seq(1:100)
Z = seq(101:200)

forLoop <- function(X, Y, Z) {
  F = array(0, dim = c( length(X),length(Y),length(Z) ) )
  for (i in 1:length(X))
    for (j in 1:length(Y))
      for (k in 1:length(Z))
        F[i,j,k] = X[i] * (Y[j] + Z[k])
  return(F)
}

nestedOuter <- function(X, Y, Z) {
  outer(X, outer(Y, Z, "+"), "*")
}

expandGrid <- function(X, Y, Z) {
  df <- expand.grid(X = X, Y = Y, Z = Z)
  G <- df$X * (df$Y + df$Z)
  dim(G) <- c(length(X), length(Y), length(Z))
  return(G)
}

library(microbenchmark)
mbm <- microbenchmark(
  forLoop = F1 <- forLoop(X, Y, Z), 
  nestedOuter = F2 <- nestedOuter(X, Y, Z), 
  expandGrid = F3 <- expandGrid(X, Y, Z), 
  times = 50L)

> mbm
Unit: milliseconds
expr         min         lq        mean      median          uq        max neval
forLoop 3261.872552 3339.37383 3458.812265 3388.721159 3524.651971 4074.40422    50
nestedOuter    3.293461    3.36810    9.874336    3.541637    5.126789   54.24087    50
expandGrid   53.907789   57.15647   85.612048   88.286431  103.516819  235.45443    50
Run Code Online (Sandbox Code Playgroud)


Dav*_*urg 5

这是一个额外的选项,一个可能的Rcpp实现(如果你喜欢你的循环).虽然我可能无法超越@Juliens解决方案(也许有人可以),但他们或多或少都有相同的时机

library(Rcpp)
cppFunction('NumericVector RCPP(NumericVector X,  NumericVector Y, NumericVector Z){

             int nrow = X.size(), ncol = 3, indx = 0;
             double temp(1) ;
             NumericVector out(pow(nrow, ncol)) ;
             IntegerVector dim(ncol) ;

             for (int l = 0; l < ncol; l++){
               dim[l] = nrow;
             }             

            for (int j = 0; j < nrow; j++) {
               for (int k = 0; k < nrow; k++) {
                     temp = Y[j] + Z[k] ;
                   for (int i = 0; i < nrow; i++) {
                         out[indx] = X[i] * temp ;
                         indx += 1 ;
                   }
               }
            }

            out.attr("dim") = dim;
            return out;
}')
Run Code Online (Sandbox Code Playgroud)

证实

identical(RCPP(X, Y, Z), F)
## [1] TRUE
Run Code Online (Sandbox Code Playgroud)

快速基准

set.seed(123)
X = rnorm(100)
Y = 1:100
Z = 101:200

nestedOuter <- function(X, Y, Z) outer(X, outer(Y, Z, "+"), "*")

library(microbenchmark)
microbenchmark( 
  nestedOuter = nestedOuter(X, Y, Z),  
  RCPP = RCPP(X, Y, Z),
  unit = "relative",
  times = 1e4)

# Unit: relative
#        expr      min       lq     mean   median       uq       max neval
# nestedOuter 1.000000 1.000000 1.000000 1.000000 1.000000 1.0000000 10000
#        RCPP 1.164254 1.141713 1.081235 1.100596 1.080133 0.7092394 10000
Run Code Online (Sandbox Code Playgroud)