## ----set_options, include = FALSE---------------------------------------------
knitr::opts_chunk$set(
  eval = FALSE, # Chunks of codes will not be evaluated by default
  collapse = TRUE,
  comment = "#>",
  fig.width = 7, fig.height = 5,   # Set device size at rendering time (when plots are generated)
  out.width = "100%" # Ensure that figures are fitting the vignette width
)

## ----setup, eval = TRUE, include = FALSE--------------------------------------
library(deepSTRAPP)

is_dev_version <- function (pkg = "deepSTRAPP")
{
  # # Check if ran on CRAN
  # not_cran <- identical(Sys.getenv("NOT_CRAN"), "true") # || interactive()

  # Version number check
  version <- tryCatch(as.character(utils::packageVersion(pkg)), error = function(e) "")
  dev_version <- grepl("\\.9000", version)

  # not_cran || dev_version
  
  return(dev_version)
}


## ----adjust_dpi_CRAN, include = FALSE, eval = !is_dev_version()---------------
knitr::opts_chunk$set(
  dpi = 72   # Lower DPI to save space
)

## ----adjust_dpi_dev, include = FALSE, eval = is_dev_version()-----------------
# knitr::opts_chunk$set(
#   dpi = 72   # Default DPI for the dev version
# )

## ----model_diversity_dynamics-------------------------------------------------
# ## Goal: Map evolution of diversification rates and regime shifts on the time-calibrated phylogeny
# 
# # Run a BAMM (Bayesian Analysis of Macroevolutionary Mixtures)
# 
# # Step 1/ Set BAMM - Record BAMM settings and generate all input files needed for BAMM.
# # Step 2/ Run BAMM - Run BAMM and move output files in dedicated directory.
# # Step 3/ Evaluate BAMM - Produce evaluation plots and ESS data.
# # Step 4/ Import BAMM outputs - Load `BAMM_object` in R and subset posterior samples.
# # Step 5/ Clean BAMM files - Remove files generated during the BAMM run.
# 
# # All these actions are performed by a single function: deepSTRAPP::prepare_diversification_data()
# ?deepSTRAPP::prepare_diversification_data()
# 
# # This function perform a single BAMM run to infer diversification rates and regime shifts.
# # Due to the stochastic nature of the exploration of the parameter space with MCMC process,
# # best practice recommend to ran multiple runs and check for convergence of the MCMC traces,
# # ensuring that the region of high probability has been reached by your MCMC runs.
# 
# 
# ## Parametrize BAMM
# 
# # BAMM accepts numerous arguments that control the modeling process.
# # The main arguments are listed in the function deepSTRAPP::prepare_diversification_data().
# # Additional arguments can be provided as a named list under 'additional_BAMM_settings'.
# # To see all available settings, load and print the BAMM template file provided within deepSTRAPP.
# data(BAMM_template_diversification)
# print(BAMM_template_diversification)
# 
# 
# ## Load time-calibrated phylogeny
# library(phytools)
# data(whale.tree)
# 
# # Source: Steeman, M. E., M. B. Hebsgaard, R. E. Fordyce, S. Y. W. Ho, D. L. Rabosky,
# # R. Nielsen, C. Rahbek, H. Glenner, M. V. Sorensen, and E. Willerslev (2009)
# # Radiation of extant cetaceans driven by restructuring of the oceans. Systematic Biology, 58, 573-585.
# 
# 
# ## Run BAMM workflow with deepSTRAPP
# whale_BAMM_object <- prepare_diversification_data(
#    BAMM_install_directory_path = "./software/bamm-2.5.0/", # To provide path to BAMM directory
#    phylo = whale.tree,
#    prefix_for_files = "whale",
#    seed = 1234, # Set seed for reproducibility
#    numberOfGenerations = 10^5, # Set low for the example (Default = 10^7)
#    # Set the overall proportion of terminals in the phylogeny compared to
#    # the estimated overall richness in the clade
#    globalSamplingFraction = 1.0,
#    # The path to the `.txt` file used to provide clade-specific sampling fractions.
#    # Here, we use a global sampling fraction.
#    sampleProbsFilename = NULL,
#    # Set the expected number of regime shifts. It acts as an hyperparameter controlling
#    # the exponential prior distribution used to modulate reversible jumps across
#    # model configurations in the rjMCMC run.
#    # When set to 'NULL', an empirical rule is used to define this value: 1 regime shift expected
#    # for every 100 tips in the phylogeny, with a minimum of 1.
#    expectedNumberOfShifts = NULL,
#    # Set the frequency in which to write the event data to the output file =
#    # the sampling frequency of posterior samples.
#    # When set to `NULL`, the frequency is set such as 2000 posterior samples
#    # are recorded (before removing the burn-in).
#    eventDataWriteFreq = NULL,
#    # Proportion of posterior samples removed from the BAMM output to ensure that the remaining samples
#    # where drawn once the equilibrium distribution was reached.
#    burn_in = 0.25,
#    # Number of posterior samples to extract, after removing the burn-in,
#    # in the final `BAMM_object` to use for downstream analyses.
#    nb_posterior_samples = 1000,
#    # List of additional arguments as found in `BAMM_template_diversification`.
#    additional_BAMM_settings = list(),
#    # Output directory used to store input/output files generated
#    BAMM_output_directory_path = "./BAMM_outputs/",
#    keep_BAMM_outputs = TRUE, # To keep the BAMM_output directory after the run
#    # Controls the definition of 'core-shifts' used to distinguish across configurations
#    # when identifying the most frequent regime shift configuration (MAP) across samples.
#    MAP_odd_ratio_threshold = 5,
#    # To include (or not) the evaluation step and produce MCMC trace, ESS,
#    # and prior/posterior comparisons for expected number of shifts.
#    skip_evaluations = FALSE,
#    plot_evaluations = TRUE, # To plot the three outputs of the evaluation step
#    # To save in a table (ESS), and PDFs (MCMC trace, and prior/posterior comparisons
#    # for expected number of shifts) the evaluation outputs.
#    save_evaluations = TRUE)
# 
# ## Inspect evaluations
# 
# # This function produces two evaluation plots and one table to check the quality of the BAMM run.
# 
# # 1/ Plot the MCMC trace = evolution of logLik across MCMC generations.
#    # Output file = 'MCMC_trace_logLik.pdf'.
# 
# # 2/ Compute the Effective Sample Size (ESS) across posterior samples (after removing burn-in)
#    # using coda::effectiveSize().
#    # This is a way to evaluate if your MCMC runs has enough generations to produce robust estimates.
#    # Ideally, ESS should be higher than 200. Output file = 'ESS_df.csv'.
# 
# # 3/ Plot the comparison of prior and posterior distributions of the number of regime shifts
#    # with BAMMtools::plotPrior. Output file = 'PP_nb_shifts_plot.pdf'.
#    # A good value for expectedNumberOfShifts is one with high similarities between the distributions
#    # hinting that the information in the data coincides
#    # with your expectations for the number of regime shifts.
#    # The best practice consists in trying several values to control if it affects or not the final output.
# 
# # An example of these plots and table is shown below:
# 

## ----model_diversity_dynamics_eval, eval = TRUE, echo = FALSE-----------------

## Include the three evaluation files within the package, so I can load the ESS table, and include display of the two evaluation plots here

# 1/ MCMC trace
knitr::include_graphics("figures/whale_MCMC_trace_logLik.png", dpi = 50)

# 2/ ESS data.frame
whale_ESS_df <- read.table(file = "tables/whale_ESS_df.csv", sep = ",", header = TRUE)
cat("Effective Sample Size (ESS) across posterior samples\n")
whale_ESS <- unlist(whale_ESS_df[1, , drop = TRUE])
print(whale_ESS)

# 3/ PP nb shifts vs. Prior
knitr::include_graphics("figures/whale_PP_nb_shifts_plot.png", dpi = 50)


## ----explore_diversity_dynamics-----------------------------------------------
# ### Explore diversity dynamics
# 
# # Load directly the result
# data(whale_BAMM_object, package = "deepSTRAPP")
# 
# # Explore output
# str(whale_BAMM_object, 1)
# 
# # Record the regime shift events and macroevolutionary regimes parameters across posterior samples
# str(whale_BAMM_object$eventData, 1)
# # Mean speciation rates at tips aggregated across all posterior samples
# head(whale_BAMM_object$meanTipLambda)
# # Mean extinction rates at tips aggregated across all posterior samples
# head(whale_BAMM_object$meanTipMu)
# 
# 
# ### Explore posterior probabilities of regime shifts
# 
# # Each posterior sample has its own diversification history with
# # time-varying diversification rates and regime shift locations.
# # To visualize the expected location of regime shifts,
# # we compute the Marginal posterior Shift Probability (MSP) of each branch,
# # as the probability to observe a regime shift on each individual branch across all posterior samples.
# # For more details, see `[BAMMtools::marginalShiftProbsTree()]`.
# # This branch-specific information is stored as phylogenetic tree with branch scaled to their MSP score.
# whale_BAMM_object$MSP_tree # Tree with branch scaled to MSP scores.
# 
# # Plot the MSP tree
# plot(whale_BAMM_object$MSP_tree, cex = 0.25)
# title(main = "Marginal posterior Shift Probabilities per branches")
# # Please note that a series of long branch does not indicate that a regime shift
# # is likely to occur on each branch, but rather that the location of the regime shift
# # is uncertain and shared across closely related branches, which is illustrated by the next plots.
# 
# # This information can be used to adjust the size of the symbols showing the location
# # of regime shifts on the tree using the `adjust_size_to_prob = TRUE` argument
# # in `deepSTRAPP::plot_BAMM_rates()`.
# 
# 
# ### Define regime shift location
# 
# # deepSTRAPP allows to plot the location of regime shifts over mean diversification rates mapped on a phylogeny
# ?deepSTRAPP::plot_BAMM_rates()
# 
# # Three options are available based on the regime shifts recorded across all BAMM posterior samples
# 
#  # `index` = Posterior sample index.
#    # Used to select a unique posterior BAMM sample and map regime shifts as observed in this sample.
# 
#  # `MAP` = Maximum A Posteriori probability.
#    # Shows location of regime shifts according to the samples exhibiting
#    # the Maximum A Posteriori probability (MAP) configuration =  the most frequent configuration
#    # for the location of regime shifts across all BAMM posterior samples.
#    # This only accounts for which branches the regimes occur on, not the location of shifts
#    # along the branches.
#    # For more details, see `[BAMMtools::credibleShiftSet()]`.
#    # Only regime shifts with an odd-ratio of marginal posterior shift probability / prior shift probability
#    # higher than the threshold provided in the `MAP_odd_ratio_threshold` argument (default = 5)
#    # are considered as core-shifts and used to identify the MAP configuration.
#    # The location of the regimes along each branch is then averaged across all the identified MAP samples.
# 
# # Provide the indices of all MAP samples
# whale_BAMM_object$MAP_indices
# # BAMM object summarizing diversification dynamics for MAP samples only. Used to plot the MAP mean rates.
# whale_BAMM_object$MAP_BAMM_object
# 
#  # `MSC` = Maximum Shift Credibility.
#    # Shows location of regime shifts according to the samples exhibiting
#    # the Maximum Shift Credibility (MCS) configuration = the configuration with the highest Posterior
#    # probability to occur as computed from the product of the marginal branch-specific shift probabilities.
#    # This only accounts for which branches the regimes occur on, not the location of shifts
#    # along the branches.
#    # For more details, see `[BAMMtools::maximumShiftCredibility()]`.
#    # This option can be useful if multiple configurations are present in relatively close frequencies,
#    # making it ambiguous to identify the MAP configuration.
#    # The location of the regimes along each branch is then averaged across all the identified MSC samples.
# 
# # Provide the indices of all MSC samples
# whale_BAMM_object$MSC_indices
# # BAMM object summarizing diversification dynamics for MSC samples only. Used to plot the MSC mean rates.
# whale_BAMM_object$MSC_BAMM_object
# 
# 
# ### Plot rates and regimes
# 
# ## Plot two samples
# 
# # Plot configuration in BAMM posterior sample n°100
# plot_BAMM_rates(whale_BAMM_object,
#                 configuration_type = "index",
#                 sample_index = 100,
#                 cex = 0.2, # Adjust tip label size
#                 labels = TRUE, legend = TRUE)
# title(main = "Shift location: Sample n\u00B0100")
# 
# # Plot configuration in BAMM posterior sample n°700
# plot_BAMM_rates(whale_BAMM_object,
#                 configuration_type = "index",
#                 sample_index = 700,
#                 cex = 0.2, # Adjust tip label size
#                 labels = TRUE, legend = TRUE)
# title(main = "Shift location: Sample n\u00B0700")
# 
# ## Plot the MAP configuration
# plot_BAMM_rates(whale_BAMM_object,
#                 configuration_type = "MAP",
#                 cex = 0.2, # Adjust tip label size
#                 labels = TRUE, legend = TRUE)
# title(main = "Mean rates: Overall - Mean shift locations: MAP")
# 
# ## Plot the MSC configuration
# plot_BAMM_rates(whale_BAMM_object,
#                 configuration_type = "MSC",
#                 regimes_pch = 23, # Use a diamond symbol
#                 regimes_fill = "purple", # Change fill color of symbols
#                 cex = 0.25, # Adjust tip label size
#                 labels = TRUE, legend = TRUE)
# title(main = "Mean rates: Overall - Mean shift locations: MSC")
# 
# 
# # Similarly to the two posterior samples used as examples,
# # the MAP and MSC configurations do not agree on the location of regime shifts in this case.
# # However, they highlight the existence of a unique regime shift located on either of the two branches.
# 
# ## Note on diversification models for time-calibrated phylogenies
# 
# # This function relies on BAMM to provide a reliable solution to map diversification rates
# # and regime shifts on a time-calibrated phylogeny and obtain the `BAMM_object` object needed
# # to run the deepSTRAPP workflow ([run_deepSTRAPP_for_focal_time], [run_deepSTRAPP_over_time]).
# # However, it is one option among others for modeling diversification on phylogenies.
# # You may wish to explore alternatives models such as LSBDS model in RevBayes (Höhna et al., 2016),
# # the MTBD model (Barido-Sottani et al., 2020), or the ClaDS2 model (Maliet et al., 2019) for your own data.
# # However, you will need Bayesian models that infer regime shifts
# # to be able to perform STRAPP tests (Rabosky & Huang, 2016).
# # Additionally, you need to format the model output such as in `BAMM_object`,
# # so it can be used in a deepSTRAPP workflow.
# 

## ----explore_diversity_dynamics_eval, eval = TRUE, echo = FALSE---------------
# Load directly the result
data(whale_BAMM_object, package = "deepSTRAPP")

# Plot the MSP tree
plot(whale_BAMM_object$MSP_tree, cex = 0.25)
title(main = "Marginal posterior Shift Probabilities per branches")

## Plot two samples

# Plot configuration in BAMM posterior sample n°100
plot_BAMM_rates(whale_BAMM_object, 
                configuration_type = "index",
                sample_index = 100,  
                cex = 0.2, # Adjust tip label size
                labels = TRUE, legend = TRUE)
title(main = "Shift location: Sample n\u00B0100")

# Plot configuration in BAMM posterior sample n°700
plot_BAMM_rates(whale_BAMM_object, 
                configuration_type = "index",
                sample_index = 700,  
                cex = 0.2, # Adjust tip label size
                labels = TRUE, legend = TRUE)
title(main = "Shift location: Sample n\u00B0700")

## Plot the MAP configuration
plot_BAMM_rates(whale_BAMM_object, 
                configuration_type = "MAP",
                cex = 0.2, # Adjust tip label size
                labels = TRUE, legend = TRUE)
title(main = "Mean shift locations: MAP")

## Plot the MSC configuration
plot_BAMM_rates(whale_BAMM_object, 
                configuration_type = "MSC",
                regimes_pch = 23, # Use a diamond symbol
                regimes_fill = "purple", # Change fill color of symbols
                cex = 0.2, # Adjust tip label size
                labels = TRUE, legend = TRUE)
title(main = "Mean shift locations: MSC")