Tune, and evaluate, species distribution models
Usage
tune_sdm(
prep,
out_dir = FALSE,
return_val = "path",
algo = c("all", "maxnet", "envelope", "rf"),
max_corr = list(maxnet = 0.7, envelope = 0.9, rf = 0.99),
fc = "auto_feature",
limit_p = FALSE,
rm = seq(1, 6, 0.5),
trees = c(999),
mtry = TRUE,
limit_spat_mtry = 4,
nodesize = c(1, 2),
keep_model = FALSE,
best_run = FALSE,
metrics_df = envSDM::sdm_metrics,
use_metrics = c("auc_po", "CBI_rescale", "IMAE"),
do_gc = FALSE,
force_new = FALSE,
...
)Arguments
- prep
Character or named list. If character, the path to an existing
prep.rds. Otherwise, the result of a call to prep_sdm with return_val = "object"- out_dir
FALSE or character. If FALSE the result of tune_sdm will be saved to a temporary folder. If character, a file 'tune.rds' will be created at the path defined by out_dir.
- return_val
Character: "object" or "path". Both return a named list. In the case of "path" the named list is simply list(tune = out_dir). Will be set to "object" if
out_diris FALSE.- algo
Character. Name of algorithm to use.
- max_corr
Named list. Names of list elements must match algorithms being used. For each pair of predictor variables correlated at or above
max_corrone will be dropped usingcaret::findCorrelation().- fc
Character. Used to generate levels of
classesargument tomaxnet::maxnet()that are tuned.- limit_p
TRUE,FALSEor number of predictor variables above which to limit the use ofpin the classes argument used inmaxnet::maxnet(). Useful with many predictor variables when it becomes unwieldy to generate interactions for all predictors.- rm
Numeric. Used to generate levels of
regmultargument tomaxnet::maxnet()that are tuned.- trees
Used to generate the levels of
ntreeargument torandomForest::randomForest()that are tuned.TRUE(tune with defaulttrees),FALSE(don't tunetrees) or numeric (thetreesvalues to tune with).- mtry
Used to generate the levels of
mtryargument torandomForest::randomForest()that are tuned.TRUE(tune with sensible guesses formtry),FALSE(only use defaultrandomForest::randomForest()mtry) or numeric (themtryvalues to tune with).- limit_spat_mtry
Numeric. If
mtryisTRUEand if using spatial cross validation, the values ofmtryto tune will be limited to less than or equal tolimit_spat_mtry.- nodesize
Used to generate the levels of
nodesizeargument torandomForest::randomForest()that are tuned.TRUE(tune with defaultnodesize),FALSE(only use defaultrandomForest::randomForest()nodesize) or numeric (thenodesizevalues to tune with).- keep_model
Logical. If
TRUEthe model results will be appended as a list column in the returned tibble (as columnm)- best_run
Logical. If
TRUEthis alters the behaviour of thetune_sdm()by, well, not tuning. :). Sets all folds to the same value so no cross-validation.- metrics_df
Dataframe. Defines which metrics to use when deciding on 'good' SDMs.
- use_metrics
Character. Vector of values in metrics_df$metric to use when finding the 'best' model.
- do_gc
Logical. Run
base::rm(list = ls)andbase::gc()at end of function? Useful when running SDMs for many, many taxa, especially if done in parallel.- force_new
Logical. If outputs already exist, should they be remade?
- ...
Passed to
evaluate_sdm(). e.g. thresholds for use inpredicts::pa_evaluate()(astrargument, although if used, the values of thethresholdselement of thepa_ModelEvaluationobject returned bypredicts::pa_evaluate()will be limited to the values intr).
Value
If return_val is "object" a named list. If return_val is "path"
a path to the saved file. If out_dir is a valid path, the 'full
result' (irrespective of return_val) is also saved to
fs::path(out_dir, "prep.rds"). The 'full result' is a named list with
elements:
Examples
out_dir <- file.path(system.file(package = "envSDM"), "examples")
data <- fs::path(system.file(package = "envSDM"), "examples") |>
fs::dir_ls(regexp = "prep\\.rds$"
, recurse = TRUE
) |>
tibble::enframe(name = NULL, value = "prep") |>
dplyr::mutate(taxa = gsub("\\.rds", "", basename(dirname(prep)))
, out_dir = fs::path(out_dir, taxa)
)
purrr::map(data$out_dir
, \(x) tune_sdm(prep = fs::path(x, "prep.rds")
, out_dir = x
, fc = "lq"
, rm = c(2, 3)
, trees = 500
, mtry = c(1:3)
, nodesize = c(1, 2, 3)
, limit_p = 3
, use_metrics = c("auc_po", "CBI_rescale", "IMAE")
#, force_new = TRUE
)
)
#> [[1]]
#> /home/nwilloug/temp/RtmpO4BWL8/temp_libpathfdac86abd0f83/envSDM/examples/chg__0.3__1/tune.rds
#>
#> [[2]]
#> /home/nwilloug/temp/RtmpO4BWL8/temp_libpathfdac86abd0f83/envSDM/examples/chg__0.3__5/tune.rds
#>
#> [[3]]
#> /home/nwilloug/temp/RtmpO4BWL8/temp_libpathfdac86abd0f83/envSDM/examples/chg__0__1/tune.rds
#>
#> [[4]]
#> /home/nwilloug/temp/RtmpO4BWL8/temp_libpathfdac86abd0f83/envSDM/examples/chg__0__5/tune.rds
#>
# which tune args were best for each taxa using 'combo'?
data %>%
dplyr::mutate(tune = fs::path(out_dir, "tune.rds")
, tune = purrr::map(tune, rio::import, trust = TRUE)
, tune_mean = purrr::map(tune, "tune_mean")
) %>%
tidyr::unnest(cols = c(tune_mean)) %>%
dplyr::filter(best) %>% # used 'combo' to determine 'best' as default in tune_sdm
dplyr::select(taxa, algo, tune_args, combo, auc_po, IMAE, CBI, max_spec_sens)
#> # A tibble: 4 × 8
#> taxa algo tune_args combo auc_po IMAE CBI max_spec_sens
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 chg__0.3__1 rf tr: 500. mt: 2. ns: 2 0.657 0.875 0.820 0.832 0.551
#> 2 chg__0.3__5 rf tr: 500. mt: 1. ns: 3 0.532 0.785 0.805 0.684 0.281
#> 3 chg__0__1 rf tr: 500. mt: 1. ns: 1 0.519 0.782 0.811 0.638 0.275
#> 4 chg__0__5 rf tr: 500. mt: 1. ns: 2 0.510 0.792 0.814 0.584 0.263
# or best tune args choosing on just auc_po?
data %>%
dplyr::mutate(tune = fs::path(out_dir, "tune.rds")
, tune = purrr::map(tune, rio::import, trust = TRUE)
, all = purrr::map(tune, "tune_mean")
) %>%
tidyr::unnest(cols = c(all)) %>%
dplyr::group_by(taxa) %>%
dplyr::filter(auc_po == max(auc_po)) %>%
dplyr::ungroup() %>%
dplyr::select(taxa, algo, tune_args, auc_po, IMAE, CBI, max_spec_sens)
#> # A tibble: 4 × 7
#> taxa algo tune_args auc_po IMAE CBI max_spec_sens
#> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
#> 1 chg__0.3__1 rf tr: 500. mt: 1. ns: 1 0.877 0.817 0.788 0.606
#> 2 chg__0.3__5 rf tr: 500. mt: 2. ns: 2 0.786 0.809 0.640 0.263
#> 3 chg__0__1 rf tr: 500. mt: 2. ns: 3 0.788 0.811 0.581 0.272
#> 4 chg__0__5 rf tr: 500. mt: 1. ns: 2 0.792 0.814 0.584 0.263