Skip to contents

Tune, and evaluate, species distribution models

Usage

tune_sdm(
  prep,
  out_dir = FALSE,
  return_val = "path",
  algo = c("all", "maxnet", "bioclim", "envelope", "rf"),
  fc = "auto_feature",
  limit_p = FALSE,
  rm = seq(1, 6, 0.5),
  trees = c(500, 1000, 2000),
  mtry = TRUE,
  limit_spat_mtry = 4,
  nodesize = c(1, 2),
  keep_model = FALSE,
  best_run = FALSE,
  metrics_df = envSDM::sdm_metrics,
  use_metrics = c("auc_po", "CBI_rescale", "IMAE"),
  do_gc = TRUE,
  force_new = FALSE,
  ...
)

Arguments

prep

Character or named list. If character, the path to an existing prep.rds. Otherwise, the result of a call to prep_sdm with return_val = "object"

out_dir

FALSE or character. If FALSE the result of tune_sdm will be saved to a temporary folder. If character, a file 'tune.rds' will be created at the path defined by out_dir.

return_val

Character: "object" or "path". Both return a named list. In the case of "path" the named list is simply list(tune = out_dir). Will be set to "object" if out_dir is FALSE.

algo

Character. Name of algorithm to use.

fc

Character. Used to generate levels of classes argument to maxnet::maxnet() that are tuned.

limit_p

TRUE, FALSE or number of predictor variables above which to limit the use of p in the classes argument used in maxnet::maxnet(). Useful with many predictor variables when it becomes unwieldy to generate interactions for all predictors.

rm

Numeric. Used to generate levels of regmult argument to maxnet::maxnet() that are tuned.

trees

Used to generate the levels of ntree argument to randomForest::randomForest() that are tuned. TRUE (tune with default trees), FALSE (don't tune trees) or numeric (the trees values to tune with).

mtry

Used to generate the levels of mtry argument to randomForest::randomForest() that are tuned. TRUE (tune with sensible guesses for mtry), FALSE (only use default randomForest::randomForest() mtry) or numeric (the mtry values to tune with).

limit_spat_mtry

Numeric. If mtry is TRUE and if using spatial cross validation, the values of mtry to tune will be limited to less than or equal to limit_spat_mtry.

nodesize

Used to generate the levels of nodesize argument to randomForest::randomForest() that are tuned. TRUE (tune with default nodesize), FALSE (only use default randomForest::randomForest() nodesize) or numeric (the nodesize values to tune with).

keep_model

Logical. If TRUE the model results will be appended as a list column in the returned tibble (as column m)

best_run

Logical. If TRUE this alters the behaviour of the tune_sdm() by, well, not tuning. :). Sets all blocks to the same value so no cross-validation.

metrics_df

Dataframe. Defines which metrics to use when deciding on 'good' SDMs.

use_metrics

Character. Vector of values in metrics_df$metric to use when finding the 'best' model.

do_gc

Logical. Run base::rm(list = ls) and base::gc() at end of function? Useful when running SDMs for many, many taxa, especially if done in parallel.

force_new

Logical. If outputs already exist, should they be remade?

...

Passed to evaluate_sdm(). e.g. thresholds for use in predicts::pa_evaluate() (as tr argument, although if used, the values of the thresholds element of the pa_ModelEvaluation object returned by predicts::pa_evaluate() will be limited to the values in tr).

Value

If return_val is "object" a named list. If return_val is "path" a named list list(prep = out_dir). If out_dir is a valid path, the 'full result' (irrespective of return_val) is also saved to fs::path(out_dir, "prep.rds"). The 'full result' is a named list with elements:

Examples


  out_dir <- file.path(system.file(package = "envSDM"), "examples")

  data <- fs::path(system.file(package = "envSDM"), "examples") |>
    fs::dir_ls(regexp = "prep\\.rds$"
               , recurse = TRUE
               ) |>
    tibble::enframe(name = NULL, value = "prep") |>
    dplyr::mutate(taxa = gsub("\\.rds", "", basename(dirname(prep)))
                  , out_dir = fs::path(out_dir, taxa)
                  )

  purrr::map(data$out_dir
              , \(x) tune_sdm(prep = fs::path(x, "prep.rds")
                              , out_dir = x
                              , fc = "lq"
                              , rm = c(2, 3)
                              , trees = 500
                              , mtry = c(1:3)
                              , nodesize = 2
                              #, force_new = TRUE
                              )
              )
#> Warning: Missing `trust` will be set to FALSE by default for RDS in 2.0.0.
#> Warning: Missing `trust` will be set to FALSE by default for RDS in 2.0.0.
#> Warning: Missing `trust` will be set to FALSE by default for RDS in 2.0.0.
#> Warning: Missing `trust` will be set to FALSE by default for RDS in 2.0.0.
#> [[1]]
#> [[1]]$tune_file
#> H:/temp/nige/RtmpAZgTQZ/temp_libpath362426b1106f/envSDM/examples/acaule/tune.rds
#> 
#> 
#> [[2]]
#> [[2]]$tune_file
#> H:/temp/nige/RtmpAZgTQZ/temp_libpath362426b1106f/envSDM/examples/bradypus/tune.rds
#> 
#> 

  # which tune args were best for each taxa using 'combo'?
  data %>%
    dplyr::mutate(tune = fs::path(out_dir, "tune.rds")
                  , tune = purrr::map(tune, rio::import)
                  , tune_mean = purrr::map(tune, "tune_mean")
                  ) %>%
    tidyr::unnest(cols = c(tune_mean)) %>%
    dplyr::filter(best) %>% # used 'combo' to determine 'best' as default in tune_sdm
    dplyr::select(taxa, algo, tune_args, combo, auc_po, IMAE, CBI, max_spec_sens)
#> Warning: There were 2 warnings in `dplyr::mutate()`.
#> The first warning was:
#>  In argument: `tune = purrr::map(tune, rio::import)`.
#> Caused by warning:
#> ! Missing `trust` will be set to FALSE by default for RDS in 2.0.0.
#>  Run dplyr::last_dplyr_warnings() to see the 1 remaining warning.
#> # A tibble: 2 × 8
#>   taxa     algo   tune_args             combo auc_po  IMAE   CBI max_spec_sens
#>   <chr>    <chr>  <chr>                 <dbl>  <dbl> <dbl> <dbl>         <dbl>
#> 1 acaule   maxnet fc: lq. rm: 2         0.823  0.968 0.875 0.942         0.312
#> 2 bradypus rf     tr: 500. mt: 1. ns: 2 0.468  0.714 0.702 0.867         0.326

  # or best tune args choosing on just auc_po?
  data %>%
    dplyr::mutate(tune = fs::path(out_dir, "tune.rds")
                  , tune = purrr::map(tune, rio::import)
                  , all = purrr::map(tune, "tune_mean")
                  ) %>%
    tidyr::unnest(cols = c(all)) %>%
    dplyr::group_by(taxa) %>%
    dplyr::filter(auc_po == max(auc_po)) %>%
    dplyr::ungroup() %>%
    dplyr::select(taxa, algo, tune_args, auc_po, IMAE, CBI, max_spec_sens)
#> Warning: There were 2 warnings in `dplyr::mutate()`.
#> The first warning was:
#>  In argument: `tune = purrr::map(tune, rio::import)`.
#> Caused by warning:
#> ! Missing `trust` will be set to FALSE by default for RDS in 2.0.0.
#>  Run dplyr::last_dplyr_warnings() to see the 1 remaining warning.
#> # A tibble: 2 × 7
#>   taxa     algo   tune_args             auc_po  IMAE   CBI max_spec_sens
#>   <chr>    <chr>  <chr>                  <dbl> <dbl> <dbl>         <dbl>
#> 1 acaule   rf     tr: 500. mt: 2. ns: 2  0.969 0.902 0.769         0.233
#> 2 bradypus maxnet fc: lq. rm: 3          0.723 0.631 0.830         0.448