这是一个我想要转换成c ++的R方法来加快速度
setMethod("[[", signature=signature(x="ncdfFlowSet"),
definition=function(x, i, j, use.exprs = TRUE, ...)
{
#subset by j
if(!missing(j)){
if(is.character(j)){
j <- match(j, localChNames)
if(any(is.na(j)))
stop("subscript out of bounds")
}
fr@parameters <- fr@parameters[j, , drop = FALSE]
localChNames <- localChNames[j]
}
#other stuff
})
Run Code Online (Sandbox Code Playgroud)
Kevin对矢量子集的精彩工作使得这个j
子集的生活变得更加容易
// [[Rcpp::export]]
Rcpp::S4 readFrame(Rcpp::S4 x
, std::string sampleName
, Rcpp::RObject j_obj
, bool useExpr
)
{
Rcpp::Environment frEnv = x.slot("frames");
Rcpp::S4 frObj = frEnv.get(sampleName);
Rcpp::S4 fr = Rcpp::clone(frObj);
//get local channel names
Rcpp::StringVector colnames = x.slot("colnames");
Rcpp::StringVector ch_selected;
/*
* subset by j if applicable
*/
int j_type = j_obj.sexp_type();
//creating j index used for subsetting colnames and pdata
Rcpp::IntegerVector j_indx;
if(j_type == STRSXP)//when character vector
{
ch_selected = Rcpp::StringVector(j_obj.get__());
unsigned nCol = ch_selected.size();
j_indx = Rcpp::IntegerVector(nCol);
//match ch_selected to colnames
for(unsigned i = 0 ; i < nCol; i ++)
{
const Rcpp::internal::string_proxy<STRSXP> &thisCh = ch_selected(i);
Rcpp::StringVector::iterator match_id = std::find(colnames.begin(), colnames.end(), thisCh);
if(match_id == colnames.end()){
std::string strCh = Rcpp::as<std::string>(thisCh);
Rcpp::stop("j subscript out of bounds: " + strCh);
}else
{
j_indx(i) = match_id - colnames.begin();
}
}
}
else if(j_type == NILSXP)//j is set to NULL in R when not supplied
{
ch_selected = colnames;
}
else if(j_type == LGLSXP)
{
Rcpp::LogicalVector j_val(j_obj.get__());
ch_selected = colnames[j_val];
#to convert numeric indices to integer
}
else if(j_type == INTSXP)
{
Rcpp::IntegerVector j_val(j_obj.get__());
j_indx = j_val - 1; //convert to 0-based index
ch_selected = colnames[j_indx];
}
else if(j_type == REALSXP)
{
Rcpp::NumericVector j_val(j_obj.get__());
#to convert numeric indices to integer
}
else
Rcpp::stop("unsupported j expression!");
/*
* subset annotationDataFrame (a data frame)
*
*/
if(j_type != NILSXP)
{
Rcpp::S4 pheno = fr.slot("parameters");
Rcpp::DataFrame pData = pheno.slot("data");
Rcpp::CharacterVector pd_name = pData["name"];
Rcpp::CharacterVector pd_desc = pData["desc"];
Rcpp::NumericVector pd_range = pData["range"];
Rcpp::NumericVector pd_minRange = pData["minRange"];
Rcpp::NumericVector pd_maxRange = pData["maxRange"];
Rcpp::DataFrame plist = Rcpp::DataFrame::create(Rcpp::Named("name") = pd_name[j_indx]
,Rcpp::Named("desc") = pd_desc[j_indx]
,Rcpp::Named("range") = pd_range[j_indx]
,Rcpp::Named("minRange") = pd_minRange[j_indx]
,Rcpp::Named("maxRange") = pd_maxRange[j_indx]
);
pheno.slot("data") = plist;
}
Run Code Online (Sandbox Code Playgroud)
然而,j
索引R
通常允许不同类型的输入(character
,logical
或numeric
).我不知道是否有相同种类的polymorphic
机构(可能通过抽象的矢量指针/参考),以使冗余码(简单地由于不同类型RCPP ::**载体的),用于[-subsetting
在data.frame
以后可以被避免.
我们通常主张将逻辑分为调度步骤和模板化函数步骤.因此,您应该能够使用以下内容解决问题:
#include <Rcpp.h>
using namespace Rcpp;
template <typename T>
SEXP readFrame(Rcpp::S4 x, std::string sampleName, T const& j, bool useExpr) {
// use the typed 'j' expression
}
// [[Rcpp::export(subset)]]
SEXP readFrame_dispatch(Rcpp::S4 x, std::string sampleName, SEXP j, bool useExpr)
switch (TYPEOF(j)) {
case INTSXP: return readFrame<IntegerVector>(x, sampleName, j, useExpr);
case REALSXP: return readFrame<NumericVector>(x, sampleName, j, useExpr);
case STRSXP: return readFrame<CharacterVector>(x, sampleName, j, useExpr);
case LGLSXP: return readFrame<LogicalVector>(x, sampleName, j, useExpr);
default: stop("Unsupported SEXP type");
}
return R_NilValue;
}
Run Code Online (Sandbox Code Playgroud)
Rcpp中的一个设计目标是出于速度原因尽可能避免运行时多态性 - 几乎所有多态都是静态完成的,并且运行时查找理想情况下应该只发生一次(除非偶尔出现我们被迫回调到R的时间)一些例程).
调度代码有点丑陋和机械,但允许这种"风格"的编程.如果'dispatch'与'implementation'分开,则代码变得更具可读性,因为您可以在一个位置隐藏调度丑陋.
我不知道是否有一些宏魔法可以减少该表格的调度代码中的代码重复,但是......
归档时间: |
|
查看次数: |
269 次 |
最近记录: |