--- title: "Practical Applications of qDEA" author: "Joe Atwood" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Practical Applications of qDEA} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) library(qDEA) ``` ## Introduction This vignette demonstrates practical applications of qDEA through real-world examples and workflows. We'll cover: - Hospital efficiency analysis - Retail store performance - Handling outliers and noisy data - Benchmarking and target setting - Reporting results ## Case Study 1: Hospital Efficiency Analysis ### The Problem A hospital administrator wants to evaluate the efficiency of 12 hospitals using: - **Inputs**: Number of doctors and nurses - **Outputs**: Outpatient and inpatient treatments The administrator suspects that 1-2 hospitals may have data quality issues or operate under exceptional circumstances. ### Analysis ```{r hospital-setup} # Load hospital data data(CST22) # Examine the data print(CST22) # Prepare inputs and outputs X <- as.matrix(CST22[, c("DOCTORS", "NURSES")]) Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")]) # Summary statistics cat("Input summary:\n") summary(X) cat("\nOutput summary:\n") summary(Y) ``` ### Step 1: Standard DEA Analysis ```{r hospital-dea} # Run standard DEA (no outliers allowed) dea_result <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0, getproject = TRUE) # Create results table results_dea <- data.frame( Hospital = CST22$HOSPITAL, Efficiency = round(dea_result$effvals, 3), Rank = rank(-dea_result$effvals, ties.method = "min") ) print(results_dea) cat("\nEfficient hospitals:", sum(dea_result$effvals >= 0.99), "out of", nrow(X)) ``` ### Step 2: Robust qDEA Analysis ```{r hospital-qdea} # Run qDEA allowing 10% outliers (≈1 hospital) qdea_result <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10, getproject = TRUE) # Compare DEA and qDEA results results_comparison <- data.frame( Hospital = CST22$HOSPITAL, DEA_Eff = round(dea_result$effvals, 3), qDEA_Eff = round(qdea_result$effvalsq, 3), Change = round(qdea_result$effvalsq - dea_result$effvals, 3), DEA_Rank = rank(-dea_result$effvals, ties.method = "min"), qDEA_Rank = rank(-qdea_result$effvalsq, ties.method = "min") ) print(results_comparison) ``` ### Step 3: Target Setting ```{r hospital-targets} # Calculate targets for inefficient hospitals targets <- data.frame( Hospital = CST22$HOSPITAL, Current_Doctors = X[,1], Target_Doctors = round(qdea_result$PROJ_DATA$X0HATq[,1], 1), Doctor_Reduction = round(X[,1] - qdea_result$PROJ_DATA$X0HATq[,1], 1), Current_Nurses = X[,2], Target_Nurses = round(qdea_result$PROJ_DATA$X0HATq[,2], 1), Nurse_Reduction = round(X[,2] - qdea_result$PROJ_DATA$X0HATq[,2], 1), Efficiency = round(qdea_result$effvalsq, 3) ) # Show only inefficient hospitals inefficient <- targets[targets$Efficiency < 0.99, ] print(inefficient) # Calculate total potential savings cat("\nTotal potential reductions:\n") cat("Doctors:", sum(targets$Doctor_Reduction), "\n") cat("Nurses:", sum(targets$Nurse_Reduction), "\n") ``` ### Step 4: Peer Benchmarks ```{r hospital-peers} # Identify peer hospitals for benchmarking peers <- qdea_result$PEER_DATA$PEERSq # Show peers for an inefficient hospital (e.g., Hospital D) cat("Benchmark hospitals for Hospital D:\n") hospital_d_peers <- peers[peers$dmu0 == "D", ] print(hospital_d_peers[order(-hospital_d_peers$z), ]) ``` ### Management Report ```{r hospital-report} # Create executive summary cat("=" , rep("=", 50), "\n", sep="") cat("HOSPITAL EFFICIENCY ANALYSIS - EXECUTIVE SUMMARY\n") cat("=" , rep("=", 50), "\n", sep="") cat("\nDATA: 12 hospitals\n") cat("INPUTS: Doctors, Nurses\n") cat("OUTPUTS: Outpatients, Inpatients\n") cat("METHOD: qDEA with VRS, 10% outlier allowance\n") cat("\n--- EFFICIENCY RESULTS ---\n") cat("Mean efficiency:", round(mean(qdea_result$effvalsq), 3), "\n") cat("Median efficiency:", round(median(qdea_result$effvalsq), 3), "\n") cat("Efficient hospitals:", sum(qdea_result$effvalsq >= 0.99), "\n") cat("Inefficient hospitals:", sum(qdea_result$effvalsq < 0.99), "\n") cat("\n--- IMPROVEMENT POTENTIAL ---\n") cat("If all hospitals achieve target efficiency:\n") cat(" Doctor reduction:", sum(targets$Doctor_Reduction), "(", round(100*sum(targets$Doctor_Reduction)/sum(X[,1]), 1), "%)\n") cat(" Nurse reduction:", sum(targets$Nurse_Reduction), "(", round(100*sum(targets$Nurse_Reduction)/sum(X[,2]), 1), "%)\n") cat("\n--- TOP PERFORMERS ---\n") top3 <- head(results_comparison[order(-results_comparison$qDEA_Eff), ], 3) print(top3[, c("Hospital", "qDEA_Eff")]) cat("\n--- NEEDS IMPROVEMENT ---\n") bottom3 <- head(results_comparison[order(results_comparison$qDEA_Eff), ], 3) print(bottom3[, c("Hospital", "qDEA_Eff")]) ``` ## Case Study 2: Retail Store Performance ### The Problem A retail chain wants to evaluate store performance with potential outliers due to: - Special events or temporary factors - Data entry errors - Unique local market conditions ```{r retail-setup} # Load retail data data(CST21) print(CST21) # Prepare data X <- as.matrix(CST21[, c("EMPLOYEES", "FLOOR_AREA")]) Y <- as.matrix(CST21$SALES) ``` ### Sensitivity Analysis: How Many Outliers? ```{r retail-sensitivity} # Test different outlier proportions qout_values <- c(0, 0.05, 0.10, 0.15, 0.20) sensitivity_results <- data.frame( Store = CST21$STORE ) for (q in qout_values) { result <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = q) col_name <- paste0("qout_", sprintf("%.2f", q)) sensitivity_results[[col_name]] <- round(result$effvalsq, 3) } print(sensitivity_results) # Calculate how efficiency changes with qout sensitivity_results$Range <- apply( sensitivity_results[, -1], 1, function(x) max(x) - min(x) ) cat("\nStores most sensitive to outlier allowance:\n") print(sensitivity_results[order(-sensitivity_results$Range), c("Store", "Range")]) ``` ### Recommended Approach ```{r retail-recommendation} # Use moderate outlier allowance result_retail <- qDEA(X = X, Y = Y, orient = "out", RTS = "VRS", qout = 0.10, getproject = TRUE) # Performance report performance <- data.frame( Store = CST21$STORE, Employees = X[,1], Floor_Area = X[,2], Actual_Sales = Y[,1], Target_Sales = round(result_retail$PROJ_DATA$Y0HATq[,1], 0), Sales_Gap = round(result_retail$PROJ_DATA$Y0HATq[,1] - Y[,1], 0), Efficiency = round(result_retail$effvalsq, 3) ) print(performance) # Classify stores performance$Category <- ifelse( performance$Efficiency >= 0.95, "Excellent", ifelse(performance$Efficiency >= 0.85, "Good", ifelse(performance$Efficiency >= 0.75, "Needs Improvement", "Critical")) ) cat("\nStore Classification:\n") table(performance$Category) ``` ## Case Study 3: Dealing with Outliers ### Identifying Outliers ```{r outlier-detection} data(CST11) X <- as.matrix(CST11$EMPLOYEES) Y <- as.matrix(CST11$SALES_EJOR) # Run with very restrictive outlier allowance strict <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.01) # Run with moderate outlier allowance moderate <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.15) # Stores with big efficiency changes are likely outliers outlier_check <- data.frame( Store = CST11$STORE, Strict = round(strict$effvalsq, 3), Moderate = round(moderate$effvalsq, 3), Change = round(moderate$effvalsq - strict$effvalsq, 3) ) print(outlier_check) # Flag potential outliers (large efficiency changes) outlier_check$Potential_Outlier <- outlier_check$Change > 0.10 cat("\nPotential outliers identified:\n") print(outlier_check[outlier_check$Potential_Outlier, ]) ``` ### Impact of Outlier Removal ```{r outlier-impact} # Compare DEA vs qDEA to see impact of outlier allowance impact <- data.frame( Store = CST11$STORE, DEA = round(strict$effvals, 3), qDEA = round(moderate$effvalsq, 3), Difference = round(moderate$effvalsq - strict$effvals, 3) ) print(impact) cat("\nMean efficiency:\n") cat("DEA (no outliers):", round(mean(strict$effvals), 3), "\n") cat("qDEA (15% outliers):", round(mean(moderate$effvalsq), 3), "\n") ``` ## Workflow: Complete Analysis Template Here's a complete workflow you can adapt: ```{r workflow-template, eval=FALSE} # ========================================== # COMPLETE qDEA ANALYSIS WORKFLOW # ========================================== # 1. Load and examine data data(CST22) # Replace with your data X <- as.matrix(CST22[, c("DOCTORS", "NURSES")]) Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")]) # Check data quality summary(X) summary(Y) # Look for: missing values, extreme values, data entry errors # 2. Run standard DEA baseline baseline <- qDEA(X = X, Y = Y, orient = "in", # Choose: in, out, inout RTS = "VRS", # Choose: CRS, VRS, DRS, IRS qout = 0) # 3. Run robust qDEA robust <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10, # Adjust based on expected outliers nqiter = 3, # Iterative refinement getproject = TRUE) # Get targets # 4. Compare results comparison <- data.frame( Unit = rownames(X), DEA = round(baseline$effvals, 3), qDEA = round(robust$effvalsq, 3), Change = round(robust$effvalsq - baseline$effvals, 3) ) # 5. Identify outliers potential_outliers <- comparison$Unit[abs(comparison$Change) > 0.10] # 6. Calculate targets targets <- data.frame( Unit = rownames(X), Efficiency = round(robust$effvalsq, 3), Current_Input1 = X[,1], Target_Input1 = round(robust$PROJ_DATA$X0HATq[,1], 2), Current_Input2 = X[,2], Target_Input2 = round(robust$PROJ_DATA$X0HATq[,2], 2) ) # 7. Generate report # Export to CSV write.csv(comparison, "efficiency_comparison.csv", row.names = FALSE) write.csv(targets, "efficiency_targets.csv", row.names = FALSE) # 8. Optional: Bootstrap for confidence intervals boot_result <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10, nboot = 1000, seedval = 12345) boot_ci <- data.frame( Unit = rownames(X), Efficiency = round(boot_result$effvalsq, 3), BC_Efficiency = round(boot_result$BOOT_DATA$effvalsq.bc, 3), Bias = round(boot_result$effvalsq - boot_result$BOOT_DATA$effvalsq.bc, 3) ) ``` ## Best Practices Summary ### 1. Data Preparation ✓ Check for missing values ✓ Verify all values are positive ✓ Look for extreme outliers or data entry errors ✓ Ensure comparable units (scale if necessary) ✓ Document data sources and definitions ### 2. Model Selection ✓ Choose orientation based on managerial control ✓ Use VRS unless scale efficiency is of interest ✓ Start with qout = 0.10 (10% outliers) ✓ Test sensitivity to qout selection ### 3. Interpretation ✓ Efficiency scores are relative, not absolute ✓ Compare units within same analysis only ✓ Consider context (outliers may be legitimate) ✓ Verify targets are achievable ✓ Use peers for benchmarking ### 4. Reporting ✓ Document methodology clearly ✓ Report both DEA and qDEA results ✓ Explain outlier allowance rationale ✓ Provide actionable recommendations ✓ Include sensitivity analysis ### 5. Common Mistakes to Avoid ✗ Comparing efficiency across different analyses ✗ Using CRS when scale varies significantly ✗ Setting qout too high (> 0.25) ✗ Ignoring data quality issues ✗ Over-interpreting small efficiency differences ## Exporting Results ### To CSV ```{r export-csv, eval=FALSE} # Prepare comprehensive results results_export <- data.frame( Unit = CST22$HOSPITAL, Input1 = X[,1], Input2 = X[,2], Output1 = Y[,1], Output2 = Y[,2], DEA_Efficiency = round(baseline$effvals, 4), qDEA_Efficiency = round(robust$effvalsq, 4), Target_Input1 = round(robust$PROJ_DATA$X0HATq[,1], 2), Target_Input2 = round(robust$PROJ_DATA$X0HATq[,2], 2) ) # Export write.csv(results_export, "qDEA_results.csv", row.names = FALSE) ``` ### To Excel (requires openxlsx package) ```{r export-excel, eval=FALSE} library(openxlsx) # Create workbook wb <- createWorkbook() # Add worksheets addWorksheet(wb, "Efficiency Scores") addWorksheet(wb, "Targets") addWorksheet(wb, "Peers") # Write data writeData(wb, "Efficiency Scores", comparison) writeData(wb, "Targets", targets) writeData(wb, "Peers", robust$PEER_DATA$PEERSq) # Save saveWorkbook(wb, "qDEA_analysis.xlsx", overwrite = TRUE) ``` ## Visualization Examples ### Efficiency Distribution ```{r viz-distribution, fig.width=7, fig.height=5} data(CST22) X <- as.matrix(CST22[, c("DOCTORS", "NURSES")]) Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")]) result <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10) # Histogram hist(result$effvalsq, breaks = 10, col = "lightblue", border = "white", main = "Distribution of Efficiency Scores", xlab = "Efficiency", ylab = "Frequency") abline(v = mean(result$effvalsq), col = "red", lwd = 2, lty = 2) legend("topleft", legend = paste("Mean =", round(mean(result$effvalsq), 3)), col = "red", lty = 2, lwd = 2) ``` ### Efficiency Comparison ```{r viz-comparison, fig.width=7, fig.height=6} # Compare DEA and qDEA dea <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0) qdea <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10) # Scatter plot plot(dea$effvals, qdea$effvalsq, xlim = c(0.4, 1.2), ylim = c(0.4, 1.2), xlab = "DEA Efficiency", ylab = "qDEA Efficiency", main = "DEA vs qDEA Efficiency Scores", pch = 19, col = "blue") abline(0, 1, col = "red", lty = 2) # 45-degree line text(dea$effvals, qdea$effvalsq, labels = CST22$HOSPITAL, pos = 3, cex = 0.8) grid() ``` ## Conclusion This vignette has demonstrated practical applications of qDEA including: - Complete hospital and retail efficiency analyses - Outlier detection and handling - Target setting and benchmarking - Sensitivity analysis - Result interpretation and reporting For more details on the underlying methodology, see the main package vignette. ## Further Reading - Main vignette: `vignette("introduction-to-qDEA")` - Package documentation: `help(package = "qDEA")` - Function help: `?qDEA` Contact: jatwood@montana.edu