## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----install, eval=FALSE------------------------------------------------------ # install.packages("SportMiner") ## ----api-key------------------------------------------------------------------ # library(SportMiner) # # # Option 1: Set directly # sm_set_api_key("your_api_key_here") # # # Option 2: Set via environment variable (recommended) # # Add to your .Renviron file: # # SCOPUS_API_KEY=your_api_key_here # # Then restart R and run: # sm_set_api_key() ## ----search------------------------------------------------------------------- # # Define the search query # query <- paste0( # 'TITLE-ABS-KEY(', # '("talent identification" OR "sport science" OR "athlete") ', # 'AND ', # '("principal component analysis" OR "PCA" OR "cluster analysis") ', # ') AND DOCTYPE(ar) AND PUBYEAR > 2010' # ) # # # Retrieve papers # papers <- sm_search_scopus( # query = query, # max_count = 100, # verbose = TRUE # ) # # # View the data structure # head(papers[, c("title", "year", "author_keywords")]) ## ----preprocess--------------------------------------------------------------- # # Preprocess abstracts # processed_data <- sm_preprocess_text( # data = papers, # text_col = "abstract", # min_word_length = 3 # ) # # # View the processed data # head(processed_data) ## ----dtm---------------------------------------------------------------------- # # Create DTM # dtm <- sm_create_dtm( # word_counts = processed_data, # min_term_freq = 3, # max_term_freq = 0.5 # ) # # # Check dimensions # print(paste("Documents:", dtm$nrow, "| Terms:", dtm$ncol)) ## ----optimal-k---------------------------------------------------------------- # # Test different values of k # k_selection <- sm_select_optimal_k( # dtm = dtm, # k_range = seq(4, 16, by = 2), # method = "gibbs", # plot = TRUE # ) # # # View results # print(k_selection$results) # print(paste("Optimal k:", k_selection$optimal_k)) ## ----train-lda---------------------------------------------------------------- # # Train the model # lda_model <- sm_train_lda( # dtm = dtm, # k = k_selection$optimal_k, # method = "gibbs", # iter = 500 # ) ## ----plot-terms--------------------------------------------------------------- # # Plot top terms # sm_plot_topic_terms( # model = lda_model, # n_terms = 10 # ) ## ----plot-frequency----------------------------------------------------------- # # Plot document distribution # sm_plot_topic_frequency( # model = lda_model, # dtm = dtm # ) ## ----plot-trends-------------------------------------------------------------- # # Add doc_id to papers for joining # papers$doc_id <- paste0("doc_", seq_len(nrow(papers))) # # # Plot trends # sm_plot_topic_trends( # model = lda_model, # dtm = dtm, # metadata = papers, # doc_id_col = "doc_id" # ) ## ----keyword-network---------------------------------------------------------- # # Create network # network_plot <- sm_keyword_network( # data = papers, # keyword_col = "author_keywords", # min_cooccurrence = 2, # top_n = 30 # ) # # print(network_plot) ## ----compare-models----------------------------------------------------------- # # Run comparison # comparison <- sm_compare_models( # dtm = dtm, # k = 10, # seed = 1729, # verbose = TRUE # ) # # # View metrics # print(comparison$metrics) # # # Get recommendation # print(paste("Recommended model:", comparison$recommendation)) # # # Use the recommended model # best_model <- comparison$models[[tolower(comparison$recommendation)]] ## ----custom-theme------------------------------------------------------------- # library(ggplot2) # # # Create a plot with custom theme settings # p <- sm_plot_topic_frequency(lda_model, dtm) # # # Add customizations # p + # labs( # title = "Distribution of Research Topics in Sport Science", # subtitle = "Based on 100 papers from Scopus (2010-2025)" # ) + # theme_sportminer(base_size = 14, grid = FALSE)