Skip to contents

Create an xgboost tunable workflow for regression and classification

Usage

workflow_boost_tree(rec, engine = "xgboost", counts = TRUE, ...)

Arguments

rec

prerocessing recipe to build the workflow

engine

xgboost, lightgbm (xgboost by default)

counts

Optional logic argument wether mtry use counts or not

...

optional engine arguments

Examples

library(tidymodels)
library(xgboost)
library(modeldata)
library(future)
data(cells)
split <- cells |>
  mutate(across(where(is.character), as.factor)) |>
  sample_n(500) |>
  initial_split(strata = class)
train <- training(split)
folds <- vfold_cv(train, v = 2, strata = class)
wf <- train |>
  recipe(case ~ .) |>
  step_integer(all_nominal_predictors()) |>
  workflow_boost_tree()
doFuture::registerDoFuture()
plan(sequential)
res <- wf |>
  tune::tune_grid(
    folds,
    grid = 2,
    metrics = metric_set(roc_auc),
    control = tune::control_grid(save_workflow = TRUE, verbose = FALSE)
  )
#> i Creating pre-processing data to finalize unknown parameter: mtry
res |> collect_metrics()
#> # A tibble: 2 × 13
#>    mtry trees min_n tree_depth learn_rate loss_reduction sample_size .metric
#>   <int> <int> <int>      <int>      <dbl>          <dbl>       <dbl> <chr>  
#> 1     1  2000    40          1      0.316   0.0000000001         1   roc_auc
#> 2    57     1     2         15      0.001  31.6                  0.1 roc_auc
#> # ℹ 5 more variables: .estimator <chr>, mean <dbl>, n <int>, std_err <dbl>,
#> #   .config <chr>
res |> last_fit_metrics(split, "roc_auc")
#> # A tibble: 3 × 4
#>   .metric     .estimator .estimate .config             
#>   <chr>       <chr>          <dbl> <chr>               
#> 1 accuracy    binary         0.563 Preprocessor1_Model1
#> 2 roc_auc     binary         0.563 Preprocessor1_Model1
#> 3 brier_class binary         0.265 Preprocessor1_Model1
best <- res |> fit_best()
best |>
  augment(testing(split)) |>
  roc_auc(case, .pred_Test) |>
  pull(.estimate)
#> [1] 0.5721939