---
title: "Practical Applications of qDEA"
author: "Joe Atwood"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Practical Applications of qDEA}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 5
)
library(qDEA)
```

## Introduction

This vignette demonstrates practical applications of qDEA through real-world examples and workflows. We'll cover:

- Hospital efficiency analysis
- Retail store performance
- Handling outliers and noisy data
- Benchmarking and target setting
- Reporting results

## Case Study 1: Hospital Efficiency Analysis

### The Problem

A hospital administrator wants to evaluate the efficiency of 12 hospitals using:

- **Inputs**: Number of doctors and nurses
- **Outputs**: Outpatient and inpatient treatments

The administrator suspects that 1-2 hospitals may have data quality issues or operate under exceptional circumstances.

### Analysis

```{r hospital-setup}
# Load hospital data
data(CST22)

# Examine the data
print(CST22)

# Prepare inputs and outputs
X <- as.matrix(CST22[, c("DOCTORS", "NURSES")])
Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")])

# Summary statistics
cat("Input summary:\n")
summary(X)
cat("\nOutput summary:\n")
summary(Y)
```

### Step 1: Standard DEA Analysis

```{r hospital-dea}
# Run standard DEA (no outliers allowed)
dea_result <- qDEA(X = X, Y = Y, 
                   orient = "in", 
                   RTS = "VRS", 
                   qout = 0,
                   getproject = TRUE)

# Create results table
results_dea <- data.frame(
  Hospital = CST22$HOSPITAL,
  Efficiency = round(dea_result$effvals, 3),
  Rank = rank(-dea_result$effvals, ties.method = "min")
)

print(results_dea)

cat("\nEfficient hospitals:", 
    sum(dea_result$effvals >= 0.99), "out of", nrow(X))
```

### Step 2: Robust qDEA Analysis

```{r hospital-qdea}
# Run qDEA allowing 10% outliers (≈1 hospital)
qdea_result <- qDEA(X = X, Y = Y,
                    orient = "in",
                    RTS = "VRS",
                    qout = 0.10,
                    getproject = TRUE)

# Compare DEA and qDEA results
results_comparison <- data.frame(
  Hospital = CST22$HOSPITAL,
  DEA_Eff = round(dea_result$effvals, 3),
  qDEA_Eff = round(qdea_result$effvalsq, 3),
  Change = round(qdea_result$effvalsq - dea_result$effvals, 3),
  DEA_Rank = rank(-dea_result$effvals, ties.method = "min"),
  qDEA_Rank = rank(-qdea_result$effvalsq, ties.method = "min")
)

print(results_comparison)
```

### Step 3: Target Setting

```{r hospital-targets}
# Calculate targets for inefficient hospitals
targets <- data.frame(
  Hospital = CST22$HOSPITAL,
  Current_Doctors = X[,1],
  Target_Doctors = round(qdea_result$PROJ_DATA$X0HATq[,1], 1),
  Doctor_Reduction = round(X[,1] - qdea_result$PROJ_DATA$X0HATq[,1], 1),
  Current_Nurses = X[,2],
  Target_Nurses = round(qdea_result$PROJ_DATA$X0HATq[,2], 1),
  Nurse_Reduction = round(X[,2] - qdea_result$PROJ_DATA$X0HATq[,2], 1),
  Efficiency = round(qdea_result$effvalsq, 3)
)

# Show only inefficient hospitals
inefficient <- targets[targets$Efficiency < 0.99, ]
print(inefficient)

# Calculate total potential savings
cat("\nTotal potential reductions:\n")
cat("Doctors:", sum(targets$Doctor_Reduction), "\n")
cat("Nurses:", sum(targets$Nurse_Reduction), "\n")
```

### Step 4: Peer Benchmarks

```{r hospital-peers}
# Identify peer hospitals for benchmarking
peers <- qdea_result$PEER_DATA$PEERSq

# Show peers for an inefficient hospital (e.g., Hospital D)
cat("Benchmark hospitals for Hospital D:\n")
hospital_d_peers <- peers[peers$dmu0 == "D", ]
print(hospital_d_peers[order(-hospital_d_peers$z), ])
```

### Management Report

```{r hospital-report}
# Create executive summary
cat("=" , rep("=", 50), "\n", sep="")
cat("HOSPITAL EFFICIENCY ANALYSIS - EXECUTIVE SUMMARY\n")
cat("=" , rep("=", 50), "\n", sep="")

cat("\nDATA: 12 hospitals\n")
cat("INPUTS: Doctors, Nurses\n")
cat("OUTPUTS: Outpatients, Inpatients\n")
cat("METHOD: qDEA with VRS, 10% outlier allowance\n")

cat("\n--- EFFICIENCY RESULTS ---\n")
cat("Mean efficiency:", round(mean(qdea_result$effvalsq), 3), "\n")
cat("Median efficiency:", round(median(qdea_result$effvalsq), 3), "\n")
cat("Efficient hospitals:", sum(qdea_result$effvalsq >= 0.99), "\n")
cat("Inefficient hospitals:", sum(qdea_result$effvalsq < 0.99), "\n")

cat("\n--- IMPROVEMENT POTENTIAL ---\n")
cat("If all hospitals achieve target efficiency:\n")
cat("  Doctor reduction:", sum(targets$Doctor_Reduction), 
    "(", round(100*sum(targets$Doctor_Reduction)/sum(X[,1]), 1), "%)\n")
cat("  Nurse reduction:", sum(targets$Nurse_Reduction),
    "(", round(100*sum(targets$Nurse_Reduction)/sum(X[,2]), 1), "%)\n")

cat("\n--- TOP PERFORMERS ---\n")
top3 <- head(results_comparison[order(-results_comparison$qDEA_Eff), ], 3)
print(top3[, c("Hospital", "qDEA_Eff")])

cat("\n--- NEEDS IMPROVEMENT ---\n")
bottom3 <- head(results_comparison[order(results_comparison$qDEA_Eff), ], 3)
print(bottom3[, c("Hospital", "qDEA_Eff")])
```

## Case Study 2: Retail Store Performance

### The Problem

A retail chain wants to evaluate store performance with potential outliers due to:
- Special events or temporary factors
- Data entry errors
- Unique local market conditions

```{r retail-setup}
# Load retail data
data(CST21)
print(CST21)

# Prepare data
X <- as.matrix(CST21[, c("EMPLOYEES", "FLOOR_AREA")])
Y <- as.matrix(CST21$SALES)
```

### Sensitivity Analysis: How Many Outliers?

```{r retail-sensitivity}
# Test different outlier proportions
qout_values <- c(0, 0.05, 0.10, 0.15, 0.20)

sensitivity_results <- data.frame(
  Store = CST21$STORE
)

for (q in qout_values) {
  result <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = q)
  col_name <- paste0("qout_", sprintf("%.2f", q))
  sensitivity_results[[col_name]] <- round(result$effvalsq, 3)
}

print(sensitivity_results)

# Calculate how efficiency changes with qout
sensitivity_results$Range <- apply(
  sensitivity_results[, -1], 1, 
  function(x) max(x) - min(x)
)

cat("\nStores most sensitive to outlier allowance:\n")
print(sensitivity_results[order(-sensitivity_results$Range), 
                          c("Store", "Range")])
```

### Recommended Approach

```{r retail-recommendation}
# Use moderate outlier allowance
result_retail <- qDEA(X = X, Y = Y,
                      orient = "out",
                      RTS = "VRS",
                      qout = 0.10,
                      getproject = TRUE)

# Performance report
performance <- data.frame(
  Store = CST21$STORE,
  Employees = X[,1],
  Floor_Area = X[,2],
  Actual_Sales = Y[,1],
  Target_Sales = round(result_retail$PROJ_DATA$Y0HATq[,1], 0),
  Sales_Gap = round(result_retail$PROJ_DATA$Y0HATq[,1] - Y[,1], 0),
  Efficiency = round(result_retail$effvalsq, 3)
)

print(performance)

# Classify stores
performance$Category <- ifelse(
  performance$Efficiency >= 0.95, "Excellent",
  ifelse(performance$Efficiency >= 0.85, "Good",
         ifelse(performance$Efficiency >= 0.75, "Needs Improvement",
                "Critical"))
)

cat("\nStore Classification:\n")
table(performance$Category)
```

## Case Study 3: Dealing with Outliers

### Identifying Outliers

```{r outlier-detection}
data(CST11)
X <- as.matrix(CST11$EMPLOYEES)
Y <- as.matrix(CST11$SALES_EJOR)

# Run with very restrictive outlier allowance
strict <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.01)

# Run with moderate outlier allowance  
moderate <- qDEA(X = X, Y = Y, orient = "out", RTS = "CRS", qout = 0.15)

# Stores with big efficiency changes are likely outliers
outlier_check <- data.frame(
  Store = CST11$STORE,
  Strict = round(strict$effvalsq, 3),
  Moderate = round(moderate$effvalsq, 3),
  Change = round(moderate$effvalsq - strict$effvalsq, 3)
)

print(outlier_check)

# Flag potential outliers (large efficiency changes)
outlier_check$Potential_Outlier <- outlier_check$Change > 0.10

cat("\nPotential outliers identified:\n")
print(outlier_check[outlier_check$Potential_Outlier, ])
```

### Impact of Outlier Removal

```{r outlier-impact}
# Compare DEA vs qDEA to see impact of outlier allowance
impact <- data.frame(
  Store = CST11$STORE,
  DEA = round(strict$effvals, 3),
  qDEA = round(moderate$effvalsq, 3),
  Difference = round(moderate$effvalsq - strict$effvals, 3)
)

print(impact)

cat("\nMean efficiency:\n")
cat("DEA (no outliers):", round(mean(strict$effvals), 3), "\n")
cat("qDEA (15% outliers):", round(mean(moderate$effvalsq), 3), "\n")
```

## Workflow: Complete Analysis Template

Here's a complete workflow you can adapt:

```{r workflow-template, eval=FALSE}
# ==========================================
# COMPLETE qDEA ANALYSIS WORKFLOW
# ==========================================

# 1. Load and examine data
data(CST22)  # Replace with your data
X <- as.matrix(CST22[, c("DOCTORS", "NURSES")])
Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")])

# Check data quality
summary(X)
summary(Y)
# Look for: missing values, extreme values, data entry errors

# 2. Run standard DEA baseline
baseline <- qDEA(X = X, Y = Y, 
                 orient = "in",    # Choose: in, out, inout
                 RTS = "VRS",      # Choose: CRS, VRS, DRS, IRS
                 qout = 0)

# 3. Run robust qDEA
robust <- qDEA(X = X, Y = Y,
               orient = "in",
               RTS = "VRS",
               qout = 0.10,       # Adjust based on expected outliers
               nqiter = 3,        # Iterative refinement
               getproject = TRUE) # Get targets

# 4. Compare results
comparison <- data.frame(
  Unit = rownames(X),
  DEA = round(baseline$effvals, 3),
  qDEA = round(robust$effvalsq, 3),
  Change = round(robust$effvalsq - baseline$effvals, 3)
)

# 5. Identify outliers
potential_outliers <- comparison$Unit[abs(comparison$Change) > 0.10]

# 6. Calculate targets
targets <- data.frame(
  Unit = rownames(X),
  Efficiency = round(robust$effvalsq, 3),
  Current_Input1 = X[,1],
  Target_Input1 = round(robust$PROJ_DATA$X0HATq[,1], 2),
  Current_Input2 = X[,2],
  Target_Input2 = round(robust$PROJ_DATA$X0HATq[,2], 2)
)

# 7. Generate report
# Export to CSV
write.csv(comparison, "efficiency_comparison.csv", row.names = FALSE)
write.csv(targets, "efficiency_targets.csv", row.names = FALSE)

# 8. Optional: Bootstrap for confidence intervals
boot_result <- qDEA(X = X, Y = Y,
                    orient = "in",
                    RTS = "VRS",
                    qout = 0.10,
                    nboot = 1000,
                    seedval = 12345)

boot_ci <- data.frame(
  Unit = rownames(X),
  Efficiency = round(boot_result$effvalsq, 3),
  BC_Efficiency = round(boot_result$BOOT_DATA$effvalsq.bc, 3),
  Bias = round(boot_result$effvalsq - boot_result$BOOT_DATA$effvalsq.bc, 3)
)
```

## Best Practices Summary

### 1. Data Preparation

✓ Check for missing values  
✓ Verify all values are positive  
✓ Look for extreme outliers or data entry errors  
✓ Ensure comparable units (scale if necessary)  
✓ Document data sources and definitions

### 2. Model Selection

✓ Choose orientation based on managerial control  
✓ Use VRS unless scale efficiency is of interest  
✓ Start with qout = 0.10 (10% outliers)  
✓ Test sensitivity to qout selection

### 3. Interpretation

✓ Efficiency scores are relative, not absolute  
✓ Compare units within same analysis only  
✓ Consider context (outliers may be legitimate)  
✓ Verify targets are achievable  
✓ Use peers for benchmarking

### 4. Reporting

✓ Document methodology clearly  
✓ Report both DEA and qDEA results  
✓ Explain outlier allowance rationale  
✓ Provide actionable recommendations  
✓ Include sensitivity analysis

### 5. Common Mistakes to Avoid

✗ Comparing efficiency across different analyses  
✗ Using CRS when scale varies significantly  
✗ Setting qout too high (> 0.25)  
✗ Ignoring data quality issues  
✗ Over-interpreting small efficiency differences

## Exporting Results

### To CSV

```{r export-csv, eval=FALSE}
# Prepare comprehensive results
results_export <- data.frame(
  Unit = CST22$HOSPITAL,
  Input1 = X[,1],
  Input2 = X[,2],
  Output1 = Y[,1],
  Output2 = Y[,2],
  DEA_Efficiency = round(baseline$effvals, 4),
  qDEA_Efficiency = round(robust$effvalsq, 4),
  Target_Input1 = round(robust$PROJ_DATA$X0HATq[,1], 2),
  Target_Input2 = round(robust$PROJ_DATA$X0HATq[,2], 2)
)

# Export
write.csv(results_export, "qDEA_results.csv", row.names = FALSE)
```

### To Excel (requires openxlsx package)

```{r export-excel, eval=FALSE}
library(openxlsx)

# Create workbook
wb <- createWorkbook()

# Add worksheets
addWorksheet(wb, "Efficiency Scores")
addWorksheet(wb, "Targets")
addWorksheet(wb, "Peers")

# Write data
writeData(wb, "Efficiency Scores", comparison)
writeData(wb, "Targets", targets)
writeData(wb, "Peers", robust$PEER_DATA$PEERSq)

# Save
saveWorkbook(wb, "qDEA_analysis.xlsx", overwrite = TRUE)
```

## Visualization Examples

### Efficiency Distribution

```{r viz-distribution, fig.width=7, fig.height=5}
data(CST22)
X <- as.matrix(CST22[, c("DOCTORS", "NURSES")])
Y <- as.matrix(CST22[, c("OUT_PATIENTS", "IN_PATIENTS")])

result <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10)

# Histogram
hist(result$effvalsq, 
     breaks = 10,
     col = "lightblue",
     border = "white",
     main = "Distribution of Efficiency Scores",
     xlab = "Efficiency",
     ylab = "Frequency")
abline(v = mean(result$effvalsq), col = "red", lwd = 2, lty = 2)
legend("topleft", 
       legend = paste("Mean =", round(mean(result$effvalsq), 3)),
       col = "red", lty = 2, lwd = 2)
```

### Efficiency Comparison

```{r viz-comparison, fig.width=7, fig.height=6}
# Compare DEA and qDEA
dea <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0)
qdea <- qDEA(X = X, Y = Y, orient = "in", RTS = "VRS", qout = 0.10)

# Scatter plot
plot(dea$effvals, qdea$effvalsq,
     xlim = c(0.4, 1.2), ylim = c(0.4, 1.2),
     xlab = "DEA Efficiency",
     ylab = "qDEA Efficiency",
     main = "DEA vs qDEA Efficiency Scores",
     pch = 19, col = "blue")
abline(0, 1, col = "red", lty = 2)  # 45-degree line
text(dea$effvals, qdea$effvalsq, 
     labels = CST22$HOSPITAL, 
     pos = 3, cex = 0.8)
grid()
```

## Conclusion

This vignette has demonstrated practical applications of qDEA including:

- Complete hospital and retail efficiency analyses
- Outlier detection and handling
- Target setting and benchmarking
- Sensitivity analysis
- Result interpretation and reporting

For more details on the underlying methodology, see the main package vignette.

## Further Reading

- Main vignette: `vignette("introduction-to-qDEA")`
- Package documentation: `help(package = "qDEA")`
- Function help: `?qDEA`

Contact: jatwood@montana.edu