## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----setup-------------------------------------------------------------------- # library(putior) ## ----eval=FALSE--------------------------------------------------------------- # # Run the complete example # source(system.file("examples", "reprex.R", package = "putior")) ## ----------------------------------------------------------------------------- # # Scan all R and Python files in a directory # workflow <- put("./src/") # # # View the extracted workflow # print(workflow) ## ----echo=FALSE, eval=TRUE---------------------------------------------------- # Create example output for documentation example_output <- data.frame( file_name = c("data_processing.R", "data_processing.R", "analysis.py"), file_type = c("r", "r", "py"), input = c(NA, "raw_data.csv", "clean_data.csv"), label = c("Load Customer Data", "Clean and Validate", "Sales Analysis"), id = c("load_data", "clean_data", "analyze_sales"), node_type = c("input", "process", "process"), output = c("raw_data.csv", "clean_data.csv", "sales_report.json"), stringsAsFactors = FALSE ) print(example_output) ## ----------------------------------------------------------------------------- # # Process a single file # workflow <- put("./scripts/analysis.R") ## ----------------------------------------------------------------------------- # # Search subdirectories recursively # workflow <- put("./project/", recursive = TRUE) ## ----------------------------------------------------------------------------- # # Only R files # workflow <- put("./src/", pattern = "\\.R$") # # # R and SQL files only # workflow <- put("./src/", pattern = "\\.(R|sql)$") # # # All supported file types (default) # workflow <- put("./src/", pattern = "\\.(R|r|py|sql|sh|jl)$") ## ----------------------------------------------------------------------------- # # Include line numbers for debugging # workflow <- put("./src/", include_line_numbers = TRUE) ## ----------------------------------------------------------------------------- # # Enable validation (default) - provides helpful warnings # workflow <- put("./src/", validate = TRUE) # # # Disable validation warnings # workflow <- put("./src/", validate = FALSE) ## ----------------------------------------------------------------------------- # # Annotations without explicit IDs get auto-generated UUIDs # #put label:"Load Data", node_type:"input", output:"data.csv" # #put label:"Process Data", node_type:"process", input:"data.csv", output:"clean.csv" # # # Extract workflow - IDs will be auto-generated # workflow <- put("./") # print(workflow$id) # Will show UUIDs like "a1b2c3d4-e5f6-7890-abcd-ef1234567890" ## ----------------------------------------------------------------------------- # # In process_data.R: # #put label:"Process Step", node_type:"process", input:"raw.csv" # # No output specified - will default to "process_data.R" # # # In analyze_data.R: # #put label:"Analyze", node_type:"process", input:"process_data.R", output:"results.csv" # # This creates a connection from process_data.R to analyze_data.R ## ----------------------------------------------------------------------------- # # In main.R (sources other scripts): # #put label:"Main Analysis", input:"load_data.R,process_data.R", output:"report.pdf" # source("load_data.R") # Reading load_data.R into main.R # source("process_data.R") # Reading process_data.R into main.R # # # In load_data.R (sourced by main.R): # #put label:"Data Loader", node_type:"input" # # output defaults to "load_data.R" # # # In process_data.R (sourced by main.R, depends on load_data.R): # #put label:"Data Processor", input:"load_data.R" # # output defaults to "process_data.R" ## ----------------------------------------------------------------------------- # # Extract workflow from all files # complete_workflow <- put("./sales_project/", recursive = TRUE) # print(complete_workflow) ## ----------------------------------------------------------------------------- # # Test annotation syntax # is_valid_put_annotation('#put name:"test", label:"Test Node"') # Should return TRUE # is_valid_put_annotation("#put invalid syntax") # Should return FALSE # # # Check what files are found # list.files("./src/", pattern = "\\.(R|py)$")