整体的 MLR3 平均分数

sno*_*eep 6 r mlr3

使用非常有用的mlr3 书中的示例,我尝试简单地返回堆叠模型输出的平均分数。有人可以解释一下如何使用 mlr3 执行此操作吗?我尝试过使用LearnerClassifAvg$new( id = "classif.avg")po("classifavg"),但不确定我是否正确应用了这些,谢谢

例子:

library("magrittr")
library("mlr3learners") # for classif.glmnet

task      = mlr_tasks$get("iris")
train.idx = sample(seq_len(task$nrow), 120)
test.idx  = setdiff(seq_len(task$nrow), train.idx)

rprt = lrn("classif.rpart", predict_type = "prob")
glmn = lrn("classif.glmnet", predict_type = "prob")

#  Create Learner CV Operators
lrn_0 = PipeOpLearnerCV$new(rprt, id = "rpart_cv_1")
lrn_0$param_set$values$maxdepth = 5L
lrn_1 = PipeOpPCA$new(id = "pca1") %>>% PipeOpLearnerCV$new(rprt, id = "rpart_cv_2")
lrn_1$param_set$values$rpart_cv_2.maxdepth = 1L
lrn_2 = PipeOpPCA$new(id = "pca2") %>>% PipeOpLearnerCV$new(glmn)

# Union them with a PipeOpNULL to keep original features
level_0 = gunion(list(lrn_0, lrn_1,lrn_2, PipeOpNOP$new(id = "NOP1")))

# Cbind the output 3 times, train 2 learners but also keep level
# 0 predictions
level_1 = level_0 %>>%
  PipeOpFeatureUnion$new(4) %>>%
  PipeOpCopy$new(3) %>>%
  gunion(list(
    PipeOpLearnerCV$new(rprt, id = "rpart_cv_l1"),
    PipeOpLearnerCV$new(glmn, id = "glmnt_cv_l1"),
    PipeOpNOP$new(id = "NOP_l1")
  ))


level_1$plot(html = FALSE)


level_2  <- level_1 %>>%
  PipeOpFeatureUnion$new(3, id = "u2") %>>%
  LearnerClassifAvg$new( id = "classif.avg")

level_2$plot(html = FALSE)


lrn = GraphLearner$new(level_2)


lrn$
  train(task, train.idx)$
  predict(task, test.idx)$
  score()

## returns: Error: Trying to predict response, but incoming data has no factors

Run Code Online (Sandbox Code Playgroud)

ava*_*ava 5

如果我们不将特征传递给classif.avg( PipeOpNOP),我们仍然会遇到相同的错误:

\n
Error: Trying to predict response, but incoming data has no factors\n
Run Code Online (Sandbox Code Playgroud)\n
Error: Trying to predict response, but incoming data has no factors\n
Run Code Online (Sandbox Code Playgroud)\n

\n
library("magrittr")\nlibrary("mlr3learners") # for classif.glmnet\nlibrary("mlr3verse") #for LearnerClassifAvg\nlibrary("mlr3pipelines") # for pipelines\n\ntask      = mlr_tasks$get("iris")\ntrain.idx = sample(seq_len(task$nrow), 120)\ntest.idx  = setdiff(seq_len(task$nrow), train.idx)\n\nrprt = lrn("classif.rpart", predict_type = "prob")\nglmn = lrn("classif.glmnet", predict_type = "prob")\n\n#  Create Learner CV Operators\nlrn_0 = PipeOpLearnerCV$new(rprt, id = "rpart_cv_1")\nlrn_0$param_set$values$maxdepth = 5L\nlrn_1 = PipeOpPCA$new(id = "pca1") %>>% PipeOpLearnerCV$new(rprt, id = "rpart_cv_2")\nlrn_1$param_set$values$rpart_cv_2.maxdepth = 1L\nlrn_2 = PipeOpPCA$new(id = "pca2") %>>% PipeOpLearnerCV$new(glmn)\n\n# Union them with a PipeOpNULL to keep original features\nlevel_0 = gunion(list(lrn_0, lrn_1,lrn_2, PipeOpNOP$new(id = "NOP1")))\n\n# Cbind the output 3 times, train 2 learners but also keep level\n# 0 predictions\nlevel_1 = level_0 %>>%\n  PipeOpFeatureUnion$new(4) %>>%\n  PipeOpCopy$new(2) %>>%\n  gunion(list(\n    PipeOpLearnerCV$new(rprt, id = "rpart_cv_l1"),\n    PipeOpLearnerCV$new(glmn, id = "glmnt_cv_l1")\n    # PipeOpNOP$new(id = "NOP_l1") #leave out features here\n  ))\n\n\nlevel_2  <- level_1 %>>%\n  PipeOpFeatureUnion$new(2, id = "u2") %>>%\n  LearnerClassifAvg$new( id = "classif.avg")\n\nlevel_2$plot(html = FALSE)\n
Run Code Online (Sandbox Code Playgroud)\n

由reprex 包(v1.0.0)创建于 2021-03-27

\n

可以通过设置学习器的正确预测类型来缓解此错误:

\n
lrn_avg <- LearnerClassifAvg$new( id = "classif.avg")\nlrn_avg$predict_type ="prob"\n
Run Code Online (Sandbox Code Playgroud)\n

在此处检查错误消息:https ://github.com/cran/mlr3pipelines/blob/master/R/LearnerAvg.R

\n
if (all(fcts) != (self$predict_type == "response")) {\n        stopf("Trying to predict %s, but incoming data has %sfactors", self$predict_type, if (all(fcts)) "only " else "no "\n
Run Code Online (Sandbox Code Playgroud)\n

使用更简单的整体演示解决方案

\n
\nlrn = GraphLearner$new(level_2)\n\n\nlrn$\n  train(task, train.idx)$\n  predict(task, test.idx)$\n  score()\n#> INFO  [20:42:55.490] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 2/3) \n#> INFO  [20:42:55.557] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 1/3) \n#> INFO  [20:42:55.591] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 3/3) \n#> INFO  [20:42:55.810] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 3/3) \n#> INFO  [20:42:55.849] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 2/3) \n#> INFO  [20:42:55.901] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 1/3) \n#> INFO  [20:42:56.188] [mlr3]  Applying learner \'classif.glmnet\' on task \'iris\' (iter 3/3) \n#> INFO  [20:42:56.299] [mlr3]  Applying learner \'classif.glmnet\' on task \'iris\' (iter 1/3) \n#> INFO  [20:42:56.374] [mlr3]  Applying learner \'classif.glmnet\' on task \'iris\' (iter 2/3) \n#> INFO  [20:42:56.634] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 1/3) \n#> INFO  [20:42:56.699] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 2/3) \n#> INFO  [20:42:56.765] [mlr3]  Applying learner \'classif.rpart\' on task \'iris\' (iter 3/3) \n#> INFO  [20:42:57.065] [mlr3]  Applying learner \'classif.glmnet\' on task \'iris\' (iter 2/3) \n#> INFO  [20:42:57.177] [mlr3]  Applying learner \'classif.glmnet\' on task \'iris\' (iter 1/3) \n#> INFO  [20:42:57.308] [mlr3]  Applying learner \'classif.glmnet\' on task \'iris\' (iter 3/3)\n#> Error: Trying to predict response, but incoming data has no factors\n
Run Code Online (Sandbox Code Playgroud)\n

\n
lrn_avg <- LearnerClassifAvg$new( id = "classif.avg")\nlrn_avg$predict_type ="prob"\n
Run Code Online (Sandbox Code Playgroud)\n

由reprex 包(v1.0.0)于 2021-03-28 创建

\n