# 1) load data
# from {SLmetrics}
data("obesity", package = "SLmetrics")
Using {xgboost} and {SLmetrics} in regression tasks
In this section a gradient boosting machine (GBM) is trained on the obesity-dataset, and evaluated using {SLmetrics}. The gbm trained here is a light gradient boosting machine from {xgboost}.1
Data preparation
# 1.1) define the features
# and outcomes
<- obesity$target$regression
outcome <- obesity$features
features
# 2) split data in training
# and test
# 2.1) set seed for
# for reproducibility
set.seed(1903)
# 2.2) exttract
# indices with a simple
# 80/20 split
<- sample(1:nrow(features), size = 0.95 * nrow(features))
index
# 1.1) extract training
# data and construct
# as lgb.Dataset
<- features[index,]
train <- xgboost::xgb.DMatrix(
dtrain data = data.matrix(train),
label = outcome[index]
)
# 1.2) extract test
# data
<- features[-index,]
test
# 1.2.1) extract actual
# values and constuct
# as.factor for {SLmetrics}
# methods
<- outcome[-index]
actual
# 1.2.2) construct as data.matrix
# for predict method
<- xgboost::xgb.DMatrix(
dtest data = data.matrix(test),
label = data.matrix(actual)
)
Training the GBM
Evaluation function
# 1) define the custom
# evaluation metric
<- function(
eval_rrse
preds,
dtrain) {
# 1) extract values
<- xgboost::getinfo(dtrain, "label")
actual <- preds
predicted <- rrse(
value actual = actual,
predicted = predicted
)
# 2) construnct output
# list
list(
metric = "RRMSE",
value = value
)
}
Training the GBM
We train the model using the xgb.train()
-function,
# 1) model training
<- xgboost::xgb.train(
model data = dtrain,
nrounds = 10L,
verbose = 0,
feval = eval_rrse,
watchlist = list(
train = dtrain,
test = dtest
),maximize = FALSE
)
Performance Evaluation
We extract the predicted values using the predict()
-function,
# 1) out of sample
# prediction
<- predict(
predicted
model,newdata = dtest
)
We summarize the performance using relative root mean squared error, root mean squared error and concordance correlation coefficient
# 1) summarize all
# performance measures
# in data.frame
data.frame(
RRMSE = rrse(actual, predicted),
RMSE = rmse(actual, predicted),
CCC = ccc(actual, predicted)
)
#> RRMSE RMSE CCC
#> 1 0.4115731 10.76499 0.9062945