## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, fig.width = 7, fig.height = 4, fig.align='center' ) ## ----------------------------------------------------------------------------- # Load RuHere library(RuHere) # Loading the example data data("occurrences", package = "RuHere") ## ----------------------------------------------------------------------------- # Remove invalid coordinates and store the result as a list to separate valid/invalid data occ_split <- remove_invalid_coordinates( occ = occurrences, long = "decimalLongitude", lat = "decimalLatitude", return_invalid = TRUE ) # Records with invalid coordinates occ_split$invalid[, c("species", "decimalLongitude", "decimalLatitude")] # Update the main 'occ' data frame to contain only the valid records occ <- occ_split$valid ## ----------------------------------------------------------------------------- occ <- flag_fossil(occ) # Scan for fossil-related terms # Number of records flagged as fossil sum(!occ$fossil_flag) # No records flagged as fossil ## ----------------------------------------------------------------------------- occ <- flag_cultivated(occ) # Scan for fossil-related terms # Number of records flagged as fossil sum(!occ$cultivated_flag) ## ----------------------------------------------------------------------------- # Flag all iNaturalist records (including Research Grade) occ_inat <- flag_inaturalist(occ, research_grade = TRUE) #Flag even when is research-grade sum(!occ_inat$inaturalist_flag) #Number of flagged records # Flag only iNaturalist records without Research Grade occ <- flag_inaturalist(occ, research_grade = FALSE) # Flags only non-peer-verified iNaturalist records sum(!occ$inaturalist_flag) #All inaturalist records are classified as Research Grade ## ----------------------------------------------------------------------------- occ <- flag_year(occ, lower_limit = 1980, upper_limit = NULL) #We could specify a upper limit as well sum(!occ$year_flag) #Number of flagged records ## ----------------------------------------------------------------------------- # Duplicated records new_occ <- rbind(occurrences[1:1000, ], occurrences[1:100,]) ## ----------------------------------------------------------------------------- # Flag records and keep the most recent and preferably from GBIF: # Create vector to prioritize gbif records preferable_datasource <- c("gbif", "specieslink", "idigbio") occ_dup1 <- flag_duplicates(occ = new_occ, continuous_variable = "year", categorical_variable = "data_source", priority_categories = preferable_datasource) sum(!occ_dup1$duplicated_flag) #Number of flagged records ## ----------------------------------------------------------------------------- # Flag duplicates based on coordinates and year occ_dup2 <- flag_duplicates(occ = new_occ, additional_groups = "year") ## ----------------------------------------------------------------------------- # Import raster data("worldclim", package = "RuHere") wc <- terra::unwrap(worldclim) #Unpack raster # Flag duplicates based in raster cells and keep the most recent occ_dup3 <- flag_duplicates(occ = new_occ, continuous_variable = "year", by_cell = TRUE, raster_variable = wc) ## ----------------------------------------------------------------------------- # Install the package if necessary # if(!require("CoordinateCleaner")){ # install.packages("CoordinateCleaner") # } # Loading the package library(CoordinateCleaner) # Run spatial check using some tests occ <- clean_coordinates(x = occ, tests = c("capitals", "centroids", "equal", "institutions", "zeros")) ## ----------------------------------------------------------------------------- head(occ[,19:25]) ## ----eval=FALSE--------------------------------------------------------------- # # Interactive map with map_here() # map_here(occ, species = "Araucaria angustifolia", label = "record_id", cex = 4) ## ----show mapview, eval=T, echo=F, fig.align='center', out.width='80%'-------- knitr::include_graphics("vignettes_img/IMG02.jpeg") ## ----out.width = "80%"-------------------------------------------------------- # Static map with ggplot ggmap_here(occ, species = "Araucaria angustifolia", show_no_flagged = FALSE) # Do not show unflagged records ## ----out.width = "80%"-------------------------------------------------------- ggmap_here(occ, species = "Araucaria angustifolia", facet_wrap = TRUE) ## ----------------------------------------------------------------------------- occ_consensus <- flag_consensus(occ, flags = c("cultivated", "year"), consensus_rule = "any_true", flag_name = "old_cultivated") # Records flagged because they are cultivated and collected before 1980 occ_consensus_flagged <- occ_consensus[!occ_consensus$old_cultivated, ] occ_consensus_flagged[, c("species", "cultivated_flag", "year_flag", "old_cultivated")] ## ----out.width = "80%"-------------------------------------------------------- ggmap_here(occ_consensus, flags = c("year", "cultivated"), # Specific flags to show additional_flags = "old_cultivated", # Column name of the custom flag names_additional_flags = "Old & cultivated",# Label used in the legend col_additional_flags = "red", # Color for the custom flag show_no_flagged = FALSE) # Do not show unflagged records ## ----------------------------------------------------------------------------- # Create directory to save removed records path_to_save <- file.path(tempdir(), "removed_records") dir.create(path_to_save) # Identify records to force keeping and removing to_keep <- c("gbif_17175", "gbif_6108") to_remove <- c("gbif_5516", "specieslink_1091") # Remove flagged records with manual control # and save removed records to a folder occ_cleaned <- remove_flagged(occ = occ, flags = "all", column_id = "record_id", force_keep = to_keep, force_remove = to_remove, save_flagged = TRUE, output_dir = path_to_save) # Total number of records nrow(occ) # Number of valid records nrow(occ_cleaned) # Number of records removed nrow(occ) - nrow(occ_cleaned) ## ----eval=FALSE--------------------------------------------------------------- # fs::dir_tree(path_to_save) # #> Temp/removed_records # #> ├── Biodiversity Institution.gz # #> ├── Capital centroid.gz # #> ├── Country-Province centroid.gz # #> ├── Cultivated.gz # #> ├── Equal lat-long.gz # #> └── Zero lat-long.gz ## ----------------------------------------------------------------------------- flag_summary <- summarize_flags(occ) ## ----------------------------------------------------------------------------- # Data.frame summarizing the number of records per flag flag_summary$df_summary ## ----out.width = "80%"-------------------------------------------------------- # Bar plot flag_summary$plot_summary ## ----out.width = "80%"-------------------------------------------------------- # Summarize removed records using saved data flag_summary_dir <- summarize_flags(flagged_dir = path_to_save, show_unflagged = FALSE, # Do not show unflagged records fill = "firebrick") # Change color of bars flag_summary_dir$plot_summary ## ----eval=FALSE--------------------------------------------------------------- # ggplot2::ggsave(filename = file.path(path_to_save, "Summary.png"), # plot = flag_summary_dir$plot_summary, width = 8, height = 5, # dpi = 600) ## ----out.width = "80%"-------------------------------------------------------- # Create a grid of species richness r_richness <- richness_here(occ, summary = "species", res = 2) # Create a grid of record density (total number of occurrences) r_records <- richness_here(occ, summary = "records", res = 2) # Plotting the results ggrid_here(r_richness) ggrid_here(r_records) ## ----out.width = "80%"-------------------------------------------------------- # Converting flag columns to numeric for plotting # We invert the logic so that errors (FALSE) become 1 and clean data (TRUE) become 0 occ$cultivated_flag_num <- as.numeric(!occ$cultivated_flag) # Create the grid r_flagged <- richness_here(occ, summary = "records", field = "cultivated_flag_num", field_name = "Cultivated records", fun = sum, res = 2) # Plot with ggrid_here ggrid_here(r_flagged, low_color = "white", mid_color = "orange", high_color = "firebrick")