nhscancerwaits Workflow

Overview

nhscancerwaits provides tools for importing, cleaning, analysing, benchmarking, modelling and visualising NHS Cancer Waiting Times data.

The analytical workflow implemented in this package was developed from research investigating NHS Cancer Waiting Times performance, provider variation and cancer pathway outcomes. The package supports reproducible analysis of provider-level and pathway-level performance using modern statistical modelling, benchmarking and visualisation techniques.

This vignette uses simulated data with the same structure expected from NHS Cancer Waiting Times datasets. Simulated data are used so that the examples can run on any system without requiring external NHS files. The same workflow can be applied directly to real NHS England Cancer Waiting Times Excel or CSV datasets.

The package supports:

library(nhscancerwaits)

Create Example Data

The following simulated dataset mimics the structure commonly encountered in NHS Cancer Waiting Times analyses. Variables include provider identifiers, cancer pathways, reporting periods, activity volumes and performance percentages.

set.seed(123)

example_data <- expand.grid(
  provider_code = paste0("P", 1:12),
  cancer_type = c("Breast", "Lung", "Skin", "Lower GI"),
  month_index = 1:12,
  KEEP.OUT.ATTRS = FALSE
)

example_data$provider_name <- paste(
  "Provider",
  example_data$provider_code
)

example_data$standard <- "62-day"

example_data$reporting_date <- seq.Date(
  from = as.Date("2026-01-01"),
  by = "month",
  length.out = 12
)[example_data$month_index]

example_data$total_treated <- sample(
  30:120,
  nrow(example_data),
  replace = TRUE
)

example_data$performance_percent <- round(
  runif(
    nrow(example_data),
    min = 60,
    max = 92
  ),
  1
)

head(example_data)
#>   provider_code cancer_type month_index provider_name standard reporting_date
#> 1            P1      Breast           1   Provider P1   62-day     2026-01-01
#> 2            P2      Breast           1   Provider P2   62-day     2026-01-01
#> 3            P3      Breast           1   Provider P3   62-day     2026-01-01
#> 4            P4      Breast           1   Provider P4   62-day     2026-01-01
#> 5            P5      Breast           1   Provider P5   62-day     2026-01-01
#> 6            P6      Breast           1   Provider P6   62-day     2026-01-01
#>   total_treated performance_percent
#> 1            60                73.0
#> 2           108                63.3
#> 3            80                69.0
#> 4            43                71.6
#> 5            96                68.3
#> 6            71                75.1

KPI Summary

kpi_summary <- summarise_kpis(
  example_data,
  group_var = "standard",
  performance_var = "performance_percent"
)

kpi_summary
#> # A tibble: 1 × 7
#>   standard observations mean_percent median_percent minimum_percent
#>   <chr>           <int>        <dbl>          <dbl>           <dbl>
#> 1 62-day            576         75.5           75.7              60
#> # ℹ 2 more variables: maximum_percent <dbl>, sd_percent <dbl>

Provider Filtering

filtered_data <- filter_providers(
  example_data,
  provider_var = "provider_code",
  activity_var = "total_treated",
  performance_var = "performance_percent",
  min_mean_activity = 20,
  min_observations = 5,
  max_cv = 0.5
)

nrow(filtered_data)
#> [1] 576

Provider Summary

provider_summary <- summarise_providers(
  filtered_data,
  provider_var = "provider_code",
  performance_var = "performance_percent",
  activity_var = "total_treated"
)

head(provider_summary)
#> # A tibble: 6 × 9
#>   provider_code observations mean_performance median_performance min_performance
#>   <fct>                <int>            <dbl>              <dbl>           <dbl>
#> 1 P2                      48             77.6               79.8            60.1
#> 2 P10                     48             76.9               77.4            60.4
#> 3 P11                     48             76.2               77.7            61.3
#> 4 P6                      48             75.9               75.2            60.4
#> 5 P5                      48             75.9               74.2            60.2
#> 6 P3                      48             75.8               76.8            60  
#> # ℹ 4 more variables: max_performance <dbl>, sd_performance <dbl>,
#> #   mean_activity <dbl>, total_activity <int>

Pathway Summary

pathway_summary <- summarise_pathways(
  filtered_data,
  pathway_var = "cancer_type",
  performance_var = "performance_percent"
)

pathway_summary
#> # A tibble: 4 × 7
#>   cancer_type observations mean_performance median_performance min_performance
#>   <fct>              <int>            <dbl>              <dbl>           <dbl>
#> 1 Lung                 144             76.5               76.9            60.1
#> 2 Lower GI             144             76.1               77.0            60.2
#> 3 Breast               144             74.7               74.2            60.1
#> 4 Skin                 144             74.6               75.4            60  
#> # ℹ 2 more variables: max_performance <dbl>, sd_performance <dbl>

Mixed-Effects Model

model <- fit_cwt_mixed_model(
  filtered_data,
  performance_var = "performance_percent",
  month_var = "month_index",
  pathway_var = "cancer_type",
  provider_var = "provider_code"
)

model
#> Linear mixed model fit by maximum likelihood  ['lmerMod']
#> Formula: performance_percent ~ month_index + cancer_type + (1 | provider_code)
#>    Data: data
#>       AIC       BIC    logLik -2*log(L)  df.resid 
#>  4172.512  4203.004 -2079.256  4158.512       569 
#> Random effects:
#>  Groups        Name        Std.Dev.
#>  provider_code (Intercept) 0.000   
#>  Residual                  8.943   
#> Number of obs: 576, groups:  provider_code, 12
#> Fixed Effects:
#>         (Intercept)          month_index      cancer_typeLung  
#>            75.22882             -0.08189              1.78125  
#>     cancer_typeSkin  cancer_typeLower GI  
#>            -0.08958              1.37500  
#> optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings

Intraclass Correlation Coefficient

icc_results <- calculate_icc(model)

icc_results
#>   provider_variance residual_variance total_variance icc
#> 1                 0          79.97877       79.97877   0

Fixed-Effect Estimates

model_effects <- extract_model_effects(model)

model_effects
#> # A tibble: 5 × 7
#>   effect term                estimate std.error statistic conf.low conf.high
#>   <chr>  <chr>                  <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
#> 1 fixed  (Intercept)          75.2        1.02    73.5      73.2      77.2  
#> 2 fixed  month_index          -0.0819     0.108   -0.759    -0.293     0.130
#> 3 fixed  cancer_typeLung       1.78       1.05     1.69     -0.284     3.85 
#> 4 fixed  cancer_typeSkin      -0.0896     1.05    -0.0850   -2.16      1.98 
#> 5 fixed  cancer_typeLower GI   1.37       1.05     1.30     -0.691     3.44

Adjusted Provider Effects

provider_effects <- extract_provider_effects(
  model,
  provider_name = "provider_code"
)

head(provider_effects)
#>   provider_code adjusted_effect
#> 1            P1               0
#> 2            P2               0
#> 3            P3               0
#> 4            P4               0
#> 5            P5               0
#> 6            P6               0

Adjusted Pathway Predictions

pathway_predictions <- predict_pathway_performance(
  model,
  filtered_data,
  pathway_var = "cancer_type",
  month_var = "month_index",
  provider_var = "provider_code"
)

pathway_predictions
#>   cancer_type month_index provider_code predicted_performance
#> 1        Lung         6.5            P1              76.47778
#> 2    Lower GI         6.5            P1              76.07153
#> 3      Breast         6.5            P1              74.69653
#> 4        Skin         6.5            P1              74.60694

Provider Clustering

provider_clusters <- cluster_providers(
  filtered_data,
  provider_var = "provider_code",
  performance_var = "performance_percent",
  activity_var = "total_treated",
  k = 3
)

head(provider_clusters)
#> # A tibble: 6 × 8
#>   provider_code mean_performance median_performance sd_performance mean_activity
#>   <fct>                    <dbl>              <dbl>          <dbl>         <dbl>
#> 1 P1                        75.5               76.9           8.23          84.1
#> 2 P2                        77.6               79.8          10.1           73.1
#> 3 P3                        75.8               76.8           8.56          82.1
#> 4 P4                        74.2               74.6           8.97          73.1
#> 5 P5                        75.9               74.2           9.56          74.2
#> 6 P6                        75.9               75.2           9.04          71.4
#> # ℹ 3 more variables: total_activity <int>, cluster <fct>, cluster_label <chr>

Sensitivity Analysis

sensitivity_results <- run_sensitivity_analysis(
  filtered_data,
  provider_var = "provider_code",
  activity_var = "total_treated",
  performance_var = "performance_percent",
  month_var = "month_index",
  pathway_var = "cancer_type"
)

sensitivity_results
#>               cohort rows providers provider_variance residual_variance
#> 1        Main cohort  576        12                 0          79.97877
#> 2 No outlier removal  576        12                 0          79.97877
#> 3    Stricter cohort  576        12                 0          79.97877
#>   total_variance icc month_effect month_p_value status
#> 1       79.97877   0  -0.08189103            NA     ok
#> 2       79.97877   0  -0.08189103            NA     ok
#> 3       79.97877   0  -0.08189103            NA     ok

Diagnostic Utilities

wide_table <- pivot_provider_months(
  filtered_data,
  provider_var = "provider_code",
  month_var = "reporting_date",
  performance_var = "performance_percent"
)

head(wide_table)
#> # A tibble: 6 × 13
#>   provider `2026-01-01` `2026-02-01` `2026-03-01` `2026-04-01` `2026-05-01`
#>   <fct>    <list>       <list>       <list>       <list>       <list>      
#> 1 P1       <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>   
#> 2 P2       <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>   
#> 3 P3       <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>   
#> 4 P4       <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>   
#> 5 P5       <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>   
#> 6 P6       <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>    <dbl [4]>   
#> # ℹ 7 more variables: `2026-06-01` <list>, `2026-07-01` <list>,
#> #   `2026-08-01` <list>, `2026-09-01` <list>, `2026-10-01` <list>,
#> #   `2026-11-01` <list>, `2026-12-01` <list>
silhouette_score <- calculate_silhouette_score(
  provider_clusters
)

silhouette_score
#> [1] 0.3891414

Plots

plot_national_trends(
  filtered_data,
  month_var = "reporting_date",
  performance_var = "performance_percent",
  group_var = "standard"
)

plot_provider_effects(
  provider_effects,
  provider_var = "provider_code",
  effect_var = "adjusted_effect"
)

plot_pathway_predictions(
  pathway_predictions,
  pathway_var = "cancer_type",
  prediction_var = "predicted_performance"
)

plot_provider_clusters(
  provider_clusters
)

Export Results

The package can export tables to Excel. This chunk is not evaluated during package checking because CRAN policies discourage writing files during vignette execution.

export_excel_tables(
  tables = list(
    kpi_summary = kpi_summary,
    provider_summary = provider_summary,
    pathway_summary = pathway_summary,
    icc_results = icc_results,
    model_effects = model_effects,
    provider_effects = provider_effects,
    pathway_predictions = pathway_predictions,
    provider_clusters = provider_clusters,
    sensitivity_results = sensitivity_results
  ),
  path = "nhscancerwaits_results.xlsx"
)

Full Applied Workflow

For real NHS Cancer Waiting Times data, a typical workflow is:

library(nhscancerwaits)

data <- load_cwt_excel(
  "your_nhs_cancer_waiting_times_file.xlsx"
)

data <- clean_cwt_data(data)

kpis <- summarise_kpis(data)

filtered <- filter_providers(data)

model <- fit_cwt_mixed_model(filtered)

icc <- calculate_icc(model)

provider_effects <- extract_provider_effects(model)

pathway_predictions <- predict_pathway_performance(
  model,
  filtered
)

provider_clusters <- cluster_providers(filtered)

sensitivity <- run_sensitivity_analysis(filtered)

Summary

nhscancerwaits provides a complete workflow for NHS Cancer Waiting Times analysis, including data import, cleaning, summary statistics, provider filtering, mixed-effects modelling, ICC estimation, adjusted provider benchmarking, pathway prediction, clustering, sensitivity analysis, visualisation and export.

Although this vignette uses simulated data, the functions were designed to support analysis of real NHS Cancer Waiting Times datasets and can be applied directly to appropriately formatted NHS England data sources.