--- title: "Main functions: binary data" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Main functions: binary data} %\VignetteEngine{knitr::knitr} %\VignetteEncoding{UTF-8} --- ## Complete code example Here are presented, in a full and complete example, all main functions (starting with `BIOMOD_[...]`) of `biomod2`. The data set used is the [DataSpecies](../reference/DataSpecies.html) containing presence/absence data.
Similar examples are presented for count data on the [Main functions (abund) webpage](examples_1_mainFunctions_AB.html).

### Load dataset and variables ```R library(biomod2) library(terra) # Load species occurrences (6 species available) data('DataSpecies') head(DataSpecies) # Select the name of the studied species myRespName <- 'GuloGulo' # Get corresponding presence/absence data myResp <- as.numeric(DataSpecies[, myRespName]) # Get corresponding XY coordinates myRespXY <- DataSpecies[, c('X_WGS84', 'Y_WGS84')] # Load environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12) data('bioclim_current') myExpl <- rast(bioclim_current) ```

### Prepare data & parameters #### Format data (observations & explanatory variables) ```R # Format data with true absences myBiomodData <- BIOMOD_FormatingData(resp.name = myRespName, resp.var = myResp, resp.xy = myRespXY, expl.var = myExpl) myBiomodData plot(myBiomodData) ``` #### Pseudo-absences extraction Single or multiple set of pseudo-absences can be selected with the [`BIOMOD_FormatingData`](../reference/BIOMOD_FormatingData.html) function, which calls the [`bm_PseudoAbsences`](../reference/bm_PseudoAbsences.html) function to do so. More examples are presented on the [Auxiliary functions webpage](examples_2_auxiliaryFunctions.html). ```R # # Transform true absences into potential pseudo-absences # myResp.PA <- ifelse(myResp == 1, 1, NA) # # # Format data with pseudo-absences : random method # myBiomodData.r <- BIOMOD_FormatingData(resp.var = myResp.PA, # expl.var = myExpl, # resp.xy = myRespXY, # resp.name = myRespName, # PA.nb.rep = 4, # PA.nb.absences = 1000, # PA.strategy = 'random') # # myBiomodData.r # plot(myBiomodData.r) ``` ```R # # Select multiple sets of pseudo-absences # # # Transform true absences into potential pseudo-absences # myResp.PA <- ifelse(myResp == 1, 1, NA) # # # Format Data with pseudo-absences : random method # myBiomodData.multi <- BIOMOD_FormatingData(resp.var = myResp.PA, # expl.var = myExpl, # resp.xy = myRespXY, # resp.name = myRespName, # PA.nb.rep = 4, # PA.nb.absences = c(1000, 500, 500, 200), # PA.strategy = 'random') # myBiomodData.multi # summary(myBiomodData.multi) # plot(myBiomodData.multi) ``` #### Cross-validation datasets Several cross-validation methods are available and can be selected with the [`BIOMOD_Modeling`](../reference/BIOMOD_Modeling.html) function, which calls the [`bm_CrossValidation`](../reference/bm_CrossValidation.html) function to do so. More examples are presented on the [Auxiliary functions webpage](examples_2_auxiliaryFunctions.html). ```R # # k-fold selection # cv.k <- bm_CrossValidation(bm.format = myBiomodData, # strategy = 'kfold', # nb.rep = 2, # k = 3) # # # stratified selection (geographic) # cv.s <- bm_CrossValidation(bm.format = myBiomodData, # strategy = 'strat', # k = 2, # balance = 'presences', # strat = 'x') # head(cv.k) # head(cv.s) ``` #### Retrieve modeling options Modeling options are automatically retrieved from selected models within the [`BIOMOD_Modeling`](../reference/BIOMOD_Modeling.html) function, which calls the [`bm_ModelingOptions`](../reference/bm_ModelingOptions.html) function to do so. Model parameters can also be automatically tuned to a specific dataset, by calling the [`bm_Tuning`](../reference/bm_Tuning.html) function, however it can be quite long. More examples are presented on the [Auxiliary functions webpage](examples_2_auxiliaryFunctions.html). ```R # # bigboss parameters # opt.b <- bm_ModelingOptions(data.type = 'binary', # models = c('SRE', 'XGBOOST'), # strategy = 'bigboss') # # # tuned parameters with formated data # opt.t <- bm_ModelingOptions(data.type = 'binary', # models = c('SRE', 'XGBOOST'), # strategy = 'tuned', # bm.format = myBiomodData) # # opt.b # opt.t ```

### Run modeling #### Single models ```R # Model single models myBiomodModelOut <- BIOMOD_Modeling(bm.format = myBiomodData, modeling.id = 'AllModels', CV.strategy = 'random', CV.nb.rep = 2, CV.perc = 0.8, OPT.strategy = 'bigboss', metric.eval = c('TSS', 'AUCroc', 'BOYCE'), var.import = 3) # seed.val = 123) # nb.cpu = 8) myBiomodModelOut # Get evaluation scores & variables importance get_evaluations(myBiomodModelOut) get_variables_importance(myBiomodModelOut) # Represent evaluation scores & variables importance bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'calibration') bm_PlotEvalMean(bm.out = myBiomodModelOut, dataset = 'validation') bm_PlotEvalBoxplot(bm.out = myBiomodModelOut, dataset = 'calibration', group.by = c('algo', 'algo')) bm_PlotEvalBoxplot(bm.out = myBiomodModelOut, dataset = 'calibration', group.by = c('algo', 'run')) bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'algo')) bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('expl.var', 'algo', 'run')) bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut, group.by = c('algo', 'expl.var', 'run')) # Represent response curves bm_PlotResponseCurves(bm.out = myBiomodModelOut, models.chosen = get_built_models(myBiomodModelOut)[c(1, 4, 8, 10, 13)], fixed.var = 'median') bm_PlotResponseCurves(bm.out = myBiomodModelOut, models.chosen = get_built_models(myBiomodModelOut)[c(1, 4, 8, 10, 13)], fixed.var = 'min') bm_PlotResponseCurves(bm.out = myBiomodModelOut, models.chosen = get_built_models(myBiomodModelOut)[4], fixed.var = 'median', do.bivariate = TRUE) # Explore models' outliers & residuals bm_ModelAnalysis(bm.mod = myBiomodModelOut, models.chosen = get_built_models(myBiomodModelOut)[c(1, 4, 8, 10, 13)]) ``` #### Ensemble models ```R # Model ensemble models myBiomodEM <- BIOMOD_EnsembleModeling(bm.mod = myBiomodModelOut, models.chosen = 'all', em.by = 'all', em.algo = c('EMmedian', 'EMmean', 'EMwmean', 'EMca', 'EMci', 'EMcv'), metric.select = c('TSS'), metric.select.thresh = c(0.7), metric.eval = c('TSS', 'AUCroc', 'BOYCE'), var.import = 3, EMci.alpha = 0.05, EMwmean.decay = 'proportional') myBiomodEM # Get evaluation scores & variables importance get_evaluations(myBiomodEM) get_variables_importance(myBiomodEM) # Represent evaluation scores & variables importance bm_PlotEvalMean(bm.out = myBiomodEM, dataset = 'calibration', group.by = 'full.name') bm_PlotEvalBoxplot(bm.out = myBiomodEM, dataset = 'calibration', group.by = c('full.name', 'full.name')) bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('expl.var', 'full.name', 'full.name')) bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('expl.var', 'algo', 'merged.by.run')) bm_PlotVarImpBoxplot(bm.out = myBiomodEM, group.by = c('algo', 'expl.var', 'merged.by.run')) # Represent response curves bm_PlotResponseCurves(bm.out = myBiomodEM, models.chosen = get_built_models(myBiomodEM)[c(1, 6, 7)], fixed.var = 'median') bm_PlotResponseCurves(bm.out = myBiomodEM, models.chosen = get_built_models(myBiomodEM)[c(1, 6, 7)], fixed.var = 'min') bm_PlotResponseCurves(bm.out = myBiomodEM, models.chosen = get_built_models(myBiomodEM)[7], fixed.var = 'median', do.bivariate = TRUE) ```

### Project models #### Single models ```R # Project single models myBiomodProj <- BIOMOD_Projection(bm.mod = myBiomodModelOut, proj.name = 'Current', new.env = myExpl, models.chosen = 'all', metric.binary = 'all', metric.filter = 'all', build.clamping.mask = TRUE) myBiomodProj plot(myBiomodProj) ``` #### Ensemble models ```R # Project ensemble models (from single projections) myBiomodEMProj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM, bm.proj = myBiomodProj, models.chosen = 'all', metric.binary = 'all', metric.filter = 'all') # Project ensemble models (building single projections) myBiomodEMProj <- BIOMOD_EnsembleForecasting(bm.em = myBiomodEM, proj.name = 'CurrentEM', new.env = myExpl, models.chosen = 'all', metric.binary = 'all', metric.filter = 'all') myBiomodEMProj plot(myBiomodEMProj) ```

### Compare range sizes ```R # Load environmental variables extracted from BIOCLIM (bio_3, bio_4, bio_7, bio_11 & bio_12) data('bioclim_future') myExplFuture = rast(bioclim_future) # Project onto future conditions myBiomodProjFuture <- BIOMOD_Projection(bm.mod = myBiomodModelOut, proj.name = 'Future', new.env = myExplFuture, models.chosen = 'all', metric.binary = 'TSS', build.clamping.mask = TRUE) # Compute differences myBiomodRangeSize <- BIOMOD_RangeSize(proj.current = myBiomodProj, proj.future = myBiomodProjFuture, metric.binary = 'TSS') myBiomodRangeSize@Compt.By.Models plot(myBiomodRangeSize@Diff.By.Pixel) # Represent main results bm_PlotRangeSize(bm.range = myBiomodRangeSize, do.count = TRUE, do.perc = TRUE, do.maps = TRUE, do.mean = TRUE) ```