Create an xgboost tunable workflow for regression and classification

Usage

workflow_boost_tree(rec, engine = "xgboost", counts = TRUE, ...)

Arguments

rec: prerocessing recipe to build the workflow
engine: xgboost, lightgbm (xgboost by default)
counts: Optional logic argument wether mtry use counts or not
...: optional engine arguments

Examples

library(tidymodels)
library(xgboost)
library(modeldata)
library(future)
data(cells)
split <- cells |>
  mutate(across(where(is.character), as.factor)) |>
  sample_n(500) |>
  initial_split(strata = class)
train <- training(split)
folds <- vfold_cv(train, v = 2, strata = class)
wf <- train |>
  recipe(case ~ .) |>
  step_integer(all_nominal_predictors()) |>
  workflow_boost_tree()
doFuture::registerDoFuture()
plan(sequential)
res <- wf |>
  tune::tune_grid(
    folds,
    grid = 2,
    metrics = metric_set(roc_auc),
    control = tune::control_grid(save_workflow = TRUE, verbose = FALSE)
  )
#> i Creating pre-processing data to finalize unknown parameter: mtry
res |> collect_metrics()
#> # A tibble: 2 × 13
#>    mtry trees min_n tree_depth learn_rate loss_reduction sample_size .metric
#>   <int> <int> <int>      <int>      <dbl>          <dbl>       <dbl> <chr>  
#> 1     1  2000    40          1      0.316   0.0000000001         1   roc_auc
#> 2    57     1     2         15      0.001  31.6                  0.1 roc_auc
#> # ℹ 5 more variables: .estimator <chr>, mean <dbl>, n <int>, std_err <dbl>,
#> #   .config <chr>
res |> last_fit_metrics(split, "roc_auc")
#> # A tibble: 3 × 4
#>   .metric     .estimator .estimate .config             
#>   <chr>       <chr>          <dbl> <chr>               
#> 1 accuracy    binary         0.563 Preprocessor1_Model1
#> 2 roc_auc     binary         0.563 Preprocessor1_Model1
#> 3 brier_class binary         0.265 Preprocessor1_Model1
best <- res |> fit_best()
best |>
  augment(testing(split)) |>
  roc_auc(case, .pred_Test) |>
  pull(.estimate)
#> [1] 0.5721939