--- title: "nhscancerwaits Workflow" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{nhscancerwaits Workflow} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE ) ``` # Overview `nhscancerwaits` provides tools for importing, cleaning, analysing, benchmarking, modelling and visualising NHS Cancer Waiting Times data. The analytical workflow implemented in this package was developed from research investigating NHS Cancer Waiting Times performance, provider variation and cancer pathway outcomes. The package supports reproducible analysis of provider-level and pathway-level performance using modern statistical modelling, benchmarking and visualisation techniques. This vignette uses simulated data with the same structure expected from NHS Cancer Waiting Times datasets. Simulated data are used so that the examples can run on any system without requiring external NHS files. The same workflow can be applied directly to real NHS England Cancer Waiting Times Excel or CSV datasets. The package supports: * data import from CSV and Excel files; * cleaning and harmonisation of NHS Cancer Waiting Times data; * KPI summaries; * provider filtering; * mixed-effects modelling; * intraclass correlation coefficient estimation; * adjusted provider benchmarking; * adjusted cancer-pathway prediction; * provider clustering; * sensitivity analysis; * visualisation and Excel export. ```{r setup} library(nhscancerwaits) ``` # Create Example Data The following simulated dataset mimics the structure commonly encountered in NHS Cancer Waiting Times analyses. Variables include provider identifiers, cancer pathways, reporting periods, activity volumes and performance percentages. ```{r} set.seed(123) example_data <- expand.grid( provider_code = paste0("P", 1:12), cancer_type = c("Breast", "Lung", "Skin", "Lower GI"), month_index = 1:12, KEEP.OUT.ATTRS = FALSE ) example_data$provider_name <- paste( "Provider", example_data$provider_code ) example_data$standard <- "62-day" example_data$reporting_date <- seq.Date( from = as.Date("2026-01-01"), by = "month", length.out = 12 )[example_data$month_index] example_data$total_treated <- sample( 30:120, nrow(example_data), replace = TRUE ) example_data$performance_percent <- round( runif( nrow(example_data), min = 60, max = 92 ), 1 ) head(example_data) ``` # KPI Summary ```{r} kpi_summary <- summarise_kpis( example_data, group_var = "standard", performance_var = "performance_percent" ) kpi_summary ``` # Provider Filtering ```{r} filtered_data <- filter_providers( example_data, provider_var = "provider_code", activity_var = "total_treated", performance_var = "performance_percent", min_mean_activity = 20, min_observations = 5, max_cv = 0.5 ) nrow(filtered_data) ``` # Provider Summary ```{r} provider_summary <- summarise_providers( filtered_data, provider_var = "provider_code", performance_var = "performance_percent", activity_var = "total_treated" ) head(provider_summary) ``` # Pathway Summary ```{r} pathway_summary <- summarise_pathways( filtered_data, pathway_var = "cancer_type", performance_var = "performance_percent" ) pathway_summary ``` # Mixed-Effects Model ```{r} model <- fit_cwt_mixed_model( filtered_data, performance_var = "performance_percent", month_var = "month_index", pathway_var = "cancer_type", provider_var = "provider_code" ) model ``` # Intraclass Correlation Coefficient ```{r} icc_results <- calculate_icc(model) icc_results ``` # Fixed-Effect Estimates ```{r} model_effects <- extract_model_effects(model) model_effects ``` # Adjusted Provider Effects ```{r} provider_effects <- extract_provider_effects( model, provider_name = "provider_code" ) head(provider_effects) ``` # Adjusted Pathway Predictions ```{r} pathway_predictions <- predict_pathway_performance( model, filtered_data, pathway_var = "cancer_type", month_var = "month_index", provider_var = "provider_code" ) pathway_predictions ``` # Provider Clustering ```{r} provider_clusters <- cluster_providers( filtered_data, provider_var = "provider_code", performance_var = "performance_percent", activity_var = "total_treated", k = 3 ) head(provider_clusters) ``` # Sensitivity Analysis ```{r} sensitivity_results <- run_sensitivity_analysis( filtered_data, provider_var = "provider_code", activity_var = "total_treated", performance_var = "performance_percent", month_var = "month_index", pathway_var = "cancer_type" ) sensitivity_results ``` # Diagnostic Utilities ```{r} wide_table <- pivot_provider_months( filtered_data, provider_var = "provider_code", month_var = "reporting_date", performance_var = "performance_percent" ) head(wide_table) ``` ```{r} silhouette_score <- calculate_silhouette_score( provider_clusters ) silhouette_score ``` # Plots ```{r, fig.width=7, fig.height=5} plot_national_trends( filtered_data, month_var = "reporting_date", performance_var = "performance_percent", group_var = "standard" ) ``` ```{r, fig.width=7, fig.height=5} plot_provider_effects( provider_effects, provider_var = "provider_code", effect_var = "adjusted_effect" ) ``` ```{r, fig.width=7, fig.height=5} plot_pathway_predictions( pathway_predictions, pathway_var = "cancer_type", prediction_var = "predicted_performance" ) ``` ```{r, fig.width=7, fig.height=5} plot_provider_clusters( provider_clusters ) ``` # Export Results The package can export tables to Excel. This chunk is not evaluated during package checking because CRAN policies discourage writing files during vignette execution. ```{r, eval = FALSE} export_excel_tables( tables = list( kpi_summary = kpi_summary, provider_summary = provider_summary, pathway_summary = pathway_summary, icc_results = icc_results, model_effects = model_effects, provider_effects = provider_effects, pathway_predictions = pathway_predictions, provider_clusters = provider_clusters, sensitivity_results = sensitivity_results ), path = "nhscancerwaits_results.xlsx" ) ``` # Full Applied Workflow For real NHS Cancer Waiting Times data, a typical workflow is: ```{r, eval = FALSE} library(nhscancerwaits) data <- load_cwt_excel( "your_nhs_cancer_waiting_times_file.xlsx" ) data <- clean_cwt_data(data) kpis <- summarise_kpis(data) filtered <- filter_providers(data) model <- fit_cwt_mixed_model(filtered) icc <- calculate_icc(model) provider_effects <- extract_provider_effects(model) pathway_predictions <- predict_pathway_performance( model, filtered ) provider_clusters <- cluster_providers(filtered) sensitivity <- run_sensitivity_analysis(filtered) ``` # Summary `nhscancerwaits` provides a complete workflow for NHS Cancer Waiting Times analysis, including data import, cleaning, summary statistics, provider filtering, mixed-effects modelling, ICC estimation, adjusted provider benchmarking, pathway prediction, clustering, sensitivity analysis, visualisation and export. Although this vignette uses simulated data, the functions were designed to support analysis of real NHS Cancer Waiting Times datasets and can be applied directly to appropriately formatted NHS England data sources.