我正在使用 R 中的 mlr3 包进行建模和预测。我正在处理一个包含测试集和训练集的大数据集。测试集和训练集由指示符列指示(在代码中:test_or_train)。
library(readr)
library(mlr3)
library(mlr3learners)
library(mlr3pipelines)
library(reprex)
library(caret)
# Data
urlfile = 'https://raw.githubusercontent.com/shudras/office_data/master/office_data.csv'
data = read_csv(url(urlfile))[-1]
## Create artificial partition to test and train sets
art_part = createDataPartition(data$imdb_rating, list=FALSE)
train = data[art_part,]
test = data[-art_part,]
## Add test-train indicators
train$test_or_train = 'train'
test$test_or_train = 'test'
## Data set that I want to work / am working with
data = rbind(test, train)
# Create …Run Code Online (Sandbox Code Playgroud)