## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
tidy = FALSE,
comment = "#>",
warning = FALSE,
message = FALSE,
fig.width = 9,
fig.height = 7
)
set.seed(888L)
local_data <- "~/Sync/birddog/data-biogas/rawfiles-direct"
has_local_data <- dir.exists(local_data)
knitr::opts_chunk$set(eval = has_local_data)
# When local data is absent (e.g. CRAN), no chunk runs; the pre-built HTML ships instead.
## ----eval = FALSE-------------------------------------------------------------
# # stable version (CRAN)
# install.packages("birddog")
#
# # development version (GitHub)
# # install.packages("remotes")
# remotes::install_github("roneyfraga/birddog")
## -----------------------------------------------------------------------------
library(birddog)
## ----eval = FALSE-------------------------------------------------------------
# library(openalexR)
#
# # Fetch works from OpenAlex API
# url_api <- "https://api.openalex.org/works?page=1&filter=primary_location.source.id:s121026525"
#
# openalexR::oa_request(query_url = url_api) |>
# openalexR::oa2df(entity = "works") |>
# birddog::read_openalex(format = "api") ->
# M
#
# # Or from a CSV export
# M <- birddog::read_openalex("path/to/openalex-export.csv", format = "csv")
## ----eval = FALSE-------------------------------------------------------------
# # BibTeX
# M <- birddog::read_wos("path/to/savedrecs.bib", format = "bib")
#
# # RIS
# M <- birddog::read_wos("path/to/savedrecs.ris", format = "ris")
#
# # Plain text
# M <- birddog::read_wos("path/to/savedrecs.txt", format = "txt-plain-text")
#
# # Tab-delimited
# M <- birddog::read_wos("path/to/savedrecs.txt", format = "txt-tab-delimited")
## ----eval = FALSE-------------------------------------------------------------
# # Download from OpenAlex (~15 min)
# query_oa <- "( biogas )"
#
# openalexR::oa_fetch(
# entity = "works",
# title_and_abstract.search = query_oa,
# verbose = TRUE
# ) ->
# papers
#
# M <- birddog::read_openalex(papers, format = "api")
## ----eval = FALSE-------------------------------------------------------------
# # Pre-computed dataset
# url_m <- "https://roneyfraga.com/volume/keep_it/biogas-data/M.rds"
# M <- readRDS(url(url_m))
## ----include = FALSE----------------------------------------------------------
M <- readRDS(file.path(local_data, "M.rds"))
## -----------------------------------------------------------------------------
dplyr::glimpse(M)
## ----eval = FALSE-------------------------------------------------------------
# net <- birddog::sniff_network(M, type = "direct citation")
## ----include = FALSE----------------------------------------------------------
net <- readRDS(file.path(local_data, "net.rds"))
## -----------------------------------------------------------------------------
net |>
tidygraph::activate(nodes) |>
dplyr::select(name, AU, PY, TI, TC) |>
dplyr::arrange(dplyr::desc(TC))
## ----eval = FALSE-------------------------------------------------------------
# comps <- birddog::sniff_components(net)
## ----include = FALSE----------------------------------------------------------
comps <- readRDS(file.path(local_data, "comps.rds"))
## -----------------------------------------------------------------------------
comps$components |>
dplyr::slice_head(n = 5) |>
gt::gt()
## ----eval = FALSE-------------------------------------------------------------
# groups <- birddog::sniff_groups(
# comps,
# algorithm = "fast_greedy",
# min_group_size = 30,
# seed = 888L
# )
## ----include = FALSE----------------------------------------------------------
groups <- readRDS(file.path(local_data, "groups.rds"))
## -----------------------------------------------------------------------------
groups$aggregate |>
gt::gt()
## ----eval = FALSE-------------------------------------------------------------
# # ~2 min
# groups_attributes <- birddog::sniff_groups_attributes(
# groups,
# growth_rate_period = 2010:2024,
# show_results = FALSE
# )
#
## ----include = FALSE----------------------------------------------------------
groups_attributes <- readRDS(file.path(local_data, "groups_attributes.rds"))
## -----------------------------------------------------------------------------
groups_attributes$attributes_table
## -----------------------------------------------------------------------------
groups_keywords <- birddog::sniff_groups_keywords(groups)
groups_keywords |>
dplyr::filter(group %in% c('c1g1', 'c1g2', 'c1g3')) |>
gt::gt()
## ----eval = FALSE-------------------------------------------------------------
# # ~30 min
# groups_terms <- birddog::sniff_groups_terms(groups, algorithm = "phrase")
#
## ----include = FALSE----------------------------------------------------------
groups_terms <- readRDS(file.path(local_data, "groups_terms.rds"))
## -----------------------------------------------------------------------------
groups_terms$terms_table |>
dplyr::slice_head(n = 3) |>
gt::gt()
## ----eval = FALSE-------------------------------------------------------------
# # ~20 min
# groups_hubs <- birddog::sniff_groups_hubs(groups)
#
## ----include = FALSE----------------------------------------------------------
groups_hubs <- readRDS(file.path(local_data, "groups_hubs.rds"))
## -----------------------------------------------------------------------------
groups_hubs |>
dplyr::filter(zone != "noHub") |>
dplyr::mutate(Zi = round(Zi, 2), Pi = round(Pi, 2)) |>
dplyr::arrange(dplyr::desc(zone), dplyr::desc(Zi)) |>
dplyr::slice_head(n = 15) |>
gt::gt() |>
gt::text_transform(
locations = gt::cells_body(columns = name),
fn = function(x) {
glue::glue('{x}')
}
)
## ----eval = FALSE-------------------------------------------------------------
# # ~1.5 min
# groups_cct <- birddog::sniff_citations_cycle_time(
# groups,
# scope = "groups",
# start_year = 2000,
# end_year = 2024
# )
#
# groups_cct$plots[["c1g3"]]
## ----include = FALSE----------------------------------------------------------
groups_cct <- readRDS(file.path(local_data, "groups_cct.rds"))
## ----eval = FALSE-------------------------------------------------------------
# groups_entropy <- birddog::sniff_entropy(
# groups,
# scope = "groups",
# start_year = 2000,
# end_year = 2024
# )
#
# groups_entropy$plots[["c1g3"]]
## ----eval = FALSE-------------------------------------------------------------
# # ~2 min
# groups_cumulative <- birddog::sniff_groups_cumulative(groups)
#
## ----include = FALSE----------------------------------------------------------
groups_cumulative <- readRDS(file.path(local_data, "groups_cumulative.rds"))
## ----eval = FALSE-------------------------------------------------------------
# suppressMessages({
# groups_cumulative_trajectories <- birddog::sniff_groups_trajectories(groups_cumulative)
# })
#
# birddog::plot_group_trajectories_2d(
# groups_cumulative_trajectories,
# group = "c1g3",
# label_vertical_position = -2
# )
#
# birddog::plot_group_trajectories_3d(
# groups_cumulative_trajectories,
# group = "c1g3"
# )
#
## ----eval = FALSE, warning = FALSE--------------------------------------------
# traj_data <- birddog::detect_main_trajectories(
# groups_cumulative_trajectories,
# group = "c1g3"
# )
#
# traj_filtered <- birddog::filter_trajectories(
# traj_data$trajectories,
# top_n = 3
# )
#
# birddog::plot_group_trajectories_lines_2d(
# traj_data = traj_data,
# traj_filtered = traj_filtered,
# title = "c1g3"
# )
#
# birddog::plot_group_trajectories_lines_3d(
# traj_data = traj_data,
# traj_filtered = traj_filtered,
# group_id = "c1g3"
# )
## ----eval = FALSE-------------------------------------------------------------
# # ~11 min
# groups_cumulative_citations <- birddog::sniff_groups_cumulative_citations(
# groups,
# min_citations = 2
# )
#
## ----eval = FALSE-------------------------------------------------------------
#
# groups_key_route <- birddog::sniff_key_route(groups, scope = "groups")
#
# groups_key_route[["c1g3"]]$plot
#
# groups_key_route[["c1g3"]]$data |>
# dplyr::select(-name) |>
# gt::gt()
## ----include = FALSE----------------------------------------------------------
key_route_c1g3_data <- readRDS(file.path(local_data, "key_route_c1g3_data.rds"))
## -----------------------------------------------------------------------------
key_route_c1g3_data |>
dplyr::select(document = name, name2, title = TI) |>
gt::gt() |>
gt::text_transform(
locations = gt::cells_body(columns = document),
fn = function(x) {
glue::glue('{x}')
}
)
## ----eval = FALSE-------------------------------------------------------------
# # Prepare STM data (~30 min)
# groups_stm_prepare <- birddog::sniff_groups_stm_prepare(
# groups,
# group_to_stm = "c1g3"
# )
## ----include = FALSE----------------------------------------------------------
groups_stm_prepare <- readRDS(file.path(local_data, "groups_stm_prepare.rds"))
## ----eval = FALSE-------------------------------------------------------------
# groups_stm_prepare$plots[['metrics_by_k']]
# groups_stm_prepare$plots[['exclusivity_vs_coherence']]
## ----eval = FALSE-------------------------------------------------------------
# # Run STM (~35 sec)
# groups_stm_run <- birddog::sniff_groups_stm_run(
# groups_stm_prepare,
# k_topics = 17,
# n_top_documents = 20
# )
## ----eval = FALSE-------------------------------------------------------------
# groups_stm_run$topic_proportion |>
# dplyr::mutate(topic_proportion = round(topic_proportion, 3)) |>
# gt::gt()
#
# groups_stm_run$top_documents |>
# dplyr::group_by(topic) |>
# dplyr::arrange(dplyr::desc(gamma)) |>
# dplyr::slice_head(n = 3) |>
# dplyr::select(-DI) |>
# gt::gt() |>
# gt::text_transform(
# locations = gt::cells_body(columns = document),
# fn = function(x) {
# glue::glue('{x}')
# }
# )
## -----------------------------------------------------------------------------
sessionInfo()