## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, tidy = FALSE, comment = "#>", warning = FALSE, message = FALSE, fig.width = 9, fig.height = 7 ) set.seed(888L) local_data <- "~/Sync/birddog/data-biogas/rawfiles-direct" has_local_data <- dir.exists(local_data) knitr::opts_chunk$set(eval = has_local_data) # When local data is absent (e.g. CRAN), no chunk runs; the pre-built HTML ships instead. ## ----eval = FALSE------------------------------------------------------------- # # stable version (CRAN) # install.packages("birddog") # # # development version (GitHub) # # install.packages("remotes") # remotes::install_github("roneyfraga/birddog") ## ----------------------------------------------------------------------------- library(birddog) ## ----eval = FALSE------------------------------------------------------------- # library(openalexR) # # # Fetch works from OpenAlex API # url_api <- "https://api.openalex.org/works?page=1&filter=primary_location.source.id:s121026525" # # openalexR::oa_request(query_url = url_api) |> # openalexR::oa2df(entity = "works") |> # birddog::read_openalex(format = "api") -> # M # # # Or from a CSV export # M <- birddog::read_openalex("path/to/openalex-export.csv", format = "csv") ## ----eval = FALSE------------------------------------------------------------- # # BibTeX # M <- birddog::read_wos("path/to/savedrecs.bib", format = "bib") # # # RIS # M <- birddog::read_wos("path/to/savedrecs.ris", format = "ris") # # # Plain text # M <- birddog::read_wos("path/to/savedrecs.txt", format = "txt-plain-text") # # # Tab-delimited # M <- birddog::read_wos("path/to/savedrecs.txt", format = "txt-tab-delimited") ## ----eval = FALSE------------------------------------------------------------- # # Download from OpenAlex (~15 min) # query_oa <- "( biogas )" # # openalexR::oa_fetch( # entity = "works", # title_and_abstract.search = query_oa, # verbose = TRUE # ) -> # papers # # M <- birddog::read_openalex(papers, format = "api") ## ----eval = FALSE------------------------------------------------------------- # # Pre-computed dataset # url_m <- "https://roneyfraga.com/volume/keep_it/biogas-data/M.rds" # M <- readRDS(url(url_m)) ## ----include = FALSE---------------------------------------------------------- M <- readRDS(file.path(local_data, "M.rds")) ## ----------------------------------------------------------------------------- dplyr::glimpse(M) ## ----eval = FALSE------------------------------------------------------------- # net <- birddog::sniff_network(M, type = "direct citation") ## ----include = FALSE---------------------------------------------------------- net <- readRDS(file.path(local_data, "net.rds")) ## ----------------------------------------------------------------------------- net |> tidygraph::activate(nodes) |> dplyr::select(name, AU, PY, TI, TC) |> dplyr::arrange(dplyr::desc(TC)) ## ----eval = FALSE------------------------------------------------------------- # comps <- birddog::sniff_components(net) ## ----include = FALSE---------------------------------------------------------- comps <- readRDS(file.path(local_data, "comps.rds")) ## ----------------------------------------------------------------------------- comps$components |> dplyr::slice_head(n = 5) |> gt::gt() ## ----eval = FALSE------------------------------------------------------------- # groups <- birddog::sniff_groups( # comps, # algorithm = "fast_greedy", # min_group_size = 30, # seed = 888L # ) ## ----include = FALSE---------------------------------------------------------- groups <- readRDS(file.path(local_data, "groups.rds")) ## ----------------------------------------------------------------------------- groups$aggregate |> gt::gt() ## ----eval = FALSE------------------------------------------------------------- # # ~2 min # groups_attributes <- birddog::sniff_groups_attributes( # groups, # growth_rate_period = 2010:2024, # show_results = FALSE # ) # ## ----include = FALSE---------------------------------------------------------- groups_attributes <- readRDS(file.path(local_data, "groups_attributes.rds")) ## ----------------------------------------------------------------------------- groups_attributes$attributes_table ## ----------------------------------------------------------------------------- groups_keywords <- birddog::sniff_groups_keywords(groups) groups_keywords |> dplyr::filter(group %in% c('c1g1', 'c1g2', 'c1g3')) |> gt::gt() ## ----eval = FALSE------------------------------------------------------------- # # ~30 min # groups_terms <- birddog::sniff_groups_terms(groups, algorithm = "phrase") # ## ----include = FALSE---------------------------------------------------------- groups_terms <- readRDS(file.path(local_data, "groups_terms.rds")) ## ----------------------------------------------------------------------------- groups_terms$terms_table |> dplyr::slice_head(n = 3) |> gt::gt() ## ----eval = FALSE------------------------------------------------------------- # # ~20 min # groups_hubs <- birddog::sniff_groups_hubs(groups) # ## ----include = FALSE---------------------------------------------------------- groups_hubs <- readRDS(file.path(local_data, "groups_hubs.rds")) ## ----------------------------------------------------------------------------- groups_hubs |> dplyr::filter(zone != "noHub") |> dplyr::mutate(Zi = round(Zi, 2), Pi = round(Pi, 2)) |> dplyr::arrange(dplyr::desc(zone), dplyr::desc(Zi)) |> dplyr::slice_head(n = 15) |> gt::gt() |> gt::text_transform( locations = gt::cells_body(columns = name), fn = function(x) { glue::glue('{x}') } ) ## ----eval = FALSE------------------------------------------------------------- # # ~1.5 min # groups_cct <- birddog::sniff_citations_cycle_time( # groups, # scope = "groups", # start_year = 2000, # end_year = 2024 # ) # # groups_cct$plots[["c1g3"]] ## ----include = FALSE---------------------------------------------------------- groups_cct <- readRDS(file.path(local_data, "groups_cct.rds")) ## ----eval = FALSE------------------------------------------------------------- # groups_entropy <- birddog::sniff_entropy( # groups, # scope = "groups", # start_year = 2000, # end_year = 2024 # ) # # groups_entropy$plots[["c1g3"]] ## ----eval = FALSE------------------------------------------------------------- # # ~2 min # groups_cumulative <- birddog::sniff_groups_cumulative(groups) # ## ----include = FALSE---------------------------------------------------------- groups_cumulative <- readRDS(file.path(local_data, "groups_cumulative.rds")) ## ----eval = FALSE------------------------------------------------------------- # suppressMessages({ # groups_cumulative_trajectories <- birddog::sniff_groups_trajectories(groups_cumulative) # }) # # birddog::plot_group_trajectories_2d( # groups_cumulative_trajectories, # group = "c1g3", # label_vertical_position = -2 # ) # # birddog::plot_group_trajectories_3d( # groups_cumulative_trajectories, # group = "c1g3" # ) # ## ----eval = FALSE, warning = FALSE-------------------------------------------- # traj_data <- birddog::detect_main_trajectories( # groups_cumulative_trajectories, # group = "c1g3" # ) # # traj_filtered <- birddog::filter_trajectories( # traj_data$trajectories, # top_n = 3 # ) # # birddog::plot_group_trajectories_lines_2d( # traj_data = traj_data, # traj_filtered = traj_filtered, # title = "c1g3" # ) # # birddog::plot_group_trajectories_lines_3d( # traj_data = traj_data, # traj_filtered = traj_filtered, # group_id = "c1g3" # ) ## ----eval = FALSE------------------------------------------------------------- # # ~11 min # groups_cumulative_citations <- birddog::sniff_groups_cumulative_citations( # groups, # min_citations = 2 # ) # ## ----eval = FALSE------------------------------------------------------------- # # groups_key_route <- birddog::sniff_key_route(groups, scope = "groups") # # groups_key_route[["c1g3"]]$plot # # groups_key_route[["c1g3"]]$data |> # dplyr::select(-name) |> # gt::gt() ## ----include = FALSE---------------------------------------------------------- key_route_c1g3_data <- readRDS(file.path(local_data, "key_route_c1g3_data.rds")) ## ----------------------------------------------------------------------------- key_route_c1g3_data |> dplyr::select(document = name, name2, title = TI) |> gt::gt() |> gt::text_transform( locations = gt::cells_body(columns = document), fn = function(x) { glue::glue('{x}') } ) ## ----eval = FALSE------------------------------------------------------------- # # Prepare STM data (~30 min) # groups_stm_prepare <- birddog::sniff_groups_stm_prepare( # groups, # group_to_stm = "c1g3" # ) ## ----include = FALSE---------------------------------------------------------- groups_stm_prepare <- readRDS(file.path(local_data, "groups_stm_prepare.rds")) ## ----eval = FALSE------------------------------------------------------------- # groups_stm_prepare$plots[['metrics_by_k']] # groups_stm_prepare$plots[['exclusivity_vs_coherence']] ## ----eval = FALSE------------------------------------------------------------- # # Run STM (~35 sec) # groups_stm_run <- birddog::sniff_groups_stm_run( # groups_stm_prepare, # k_topics = 17, # n_top_documents = 20 # ) ## ----eval = FALSE------------------------------------------------------------- # groups_stm_run$topic_proportion |> # dplyr::mutate(topic_proportion = round(topic_proportion, 3)) |> # gt::gt() # # groups_stm_run$top_documents |> # dplyr::group_by(topic) |> # dplyr::arrange(dplyr::desc(gamma)) |> # dplyr::slice_head(n = 3) |> # dplyr::select(-DI) |> # gt::gt() |> # gt::text_transform( # locations = gt::cells_body(columns = document), # fn = function(x) { # glue::glue('{x}') # } # ) ## ----------------------------------------------------------------------------- sessionInfo()