## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>")

## -----------------------------------------------------------------------------
library(personnelSelectionUtility)

## -----------------------------------------------------------------------------
schmidt_pat <- data.frame(
  selection_ratio = c(.05, .10, .20, .30, .40, .50, .80)
)
schmidt_pat$one_year   <- vapply(
  schmidt_pat$selection_ratio,
  function(sr) bcg_utility(
    validity        = .76,
    selection_ratio = sr,
    sdy             = 10413,
    n_selected      = 618,
    tenure          = 1,
    cost            = 0
  )$net_utility,
  numeric(1)
)
schmidt_pat$multi_year <- vapply(
  schmidt_pat$selection_ratio,
  function(sr) bcg_utility(
    validity        = .76,
    selection_ratio = sr,
    sdy             = 10413,
    n_selected      = 618,
    tenure          = 9.69,
    cost            = 0
  )$net_utility,
  numeric(1)
)
schmidt_pat

## -----------------------------------------------------------------------------
# All three cases share the same expected standardised score among offered
# candidates:
z_offered <- selected_mean_z(.20)

# Case 1: uniform random rejection. The expected z among accepted candidates
# equals z_offered; only the realised headcount is scaled by the acceptance rate.
offer_rejection_adjustment(
  expected_z_offered = z_offered,
  mode               = "uniform",
  acceptance_rate    = .70,
  n_offered          = 100
)

# Case 2: correlated rejection. Top candidates are more likely to decline,
# captured by a negative quality-acceptance correlation.
offer_rejection_adjustment(
  expected_z_offered     = z_offered,
  mode                   = "correlated",
  acceptance_rate        = .70,
  rho_quality_acceptance = -0.20,
  n_offered              = 100
)

# Case 3: selective rejection. Explicit logit link with a strongly negative
# slope, representing the case Murphy emphasises in which the very top candidates
# almost always decline.
offer_rejection_adjustment(
  expected_z_offered = z_offered,
  mode               = "selective",
  acceptance_rate    = .70,
  logit_intercept    = qlogis(.70),
  logit_slope        = -1.0,
  n_offered          = 100
)

## -----------------------------------------------------------------------------
set.seed(2024)

# Simulate a moderately skewed criterion: lognormal with a few extreme outliers
n <- 200
y_normal_part   <- rlnorm(n, meanlog = 11.0, sdlog = 0.30)
y_outliers_idx  <- sample.int(n, 4)
y_normal_part[y_outliers_idx] <- y_normal_part[y_outliers_idx] * 3.5
y               <- y_normal_part

x <- .50 * scale(log(y))[, 1] + rnorm(n, 0, sqrt(1 - .25))

sdy_with_outliers   <- sd(y)
sdy_without_outliers <- sd(y[-y_outliers_idx])

c(with_outliers    = sdy_with_outliers,
  without_outliers = sdy_without_outliers,
  ratio            = sdy_with_outliers / sdy_without_outliers)

## -----------------------------------------------------------------------------
utility_regression_diagnostics(x = x, y = y)

## -----------------------------------------------------------------------------
focal_validity    <- .35
baseline_validity <- .20
selection_ratio   <- .20
sdy               <- 50000
n_year_one        <- 100
tenure_years      <- 5
fixed_cost        <- 75000

## -----------------------------------------------------------------------------
naive <- bcg_utility(
  validity          = focal_validity,
  selection_ratio   = selection_ratio,
  sdy               = sdy,
  n_selected        = n_year_one,
  tenure            = tenure_years,
  cost              = fixed_cost,
  baseline_validity = 0
)
naive$net_utility

## -----------------------------------------------------------------------------
with_baseline <- bcg_utility(
  validity          = focal_validity,
  selection_ratio   = selection_ratio,
  sdy               = sdy,
  n_selected        = n_year_one,
  tenure            = tenure_years,
  cost              = fixed_cost,
  baseline_validity = baseline_validity
)
with_baseline$net_utility

## -----------------------------------------------------------------------------
S11 <- matrix(c(1, .30, .30, 1), 2, 2)
S12 <- matrix(c(.30, .10, .15, .25), 2, 2, byrow = TRUE)
S22 <- matrix(c(1, .40, .40, 1), 2, 2)
b   <- c(.7, .3)

rcv <- restricted_canonical_validity(S11, S12, S22, criterion_weights = b)
rcv$validity

with_multidim <- bcg_utility(
  validity          = rcv$validity,
  baseline_validity = baseline_validity,
  selection_ratio   = selection_ratio,
  sdy               = sdy,
  n_selected        = n_year_one,
  tenure            = tenure_years,
  cost              = fixed_cost
)
with_multidim$net_utility

## -----------------------------------------------------------------------------
hires    <- c(n_year_one, 15, 15, 15, 15)
losses   <- c(0, 15, 15, 15, 15)
active_n <- employee_flow(hires, losses)
active_n

## -----------------------------------------------------------------------------
with_flows <- boudreau_utility(
  validity          = rcv$validity,
  baseline_validity = baseline_validity,
  selection_ratio   = selection_ratio,
  sdy               = sdy,
  n_by_period       = active_n,
  variable_value    = 0,
  tax_rate          = .25,
  discount_rate     = .08,
  cost_by_period    = c(fixed_cost, 5000, 5000, 5000, 5000)
)
with_flows$net_present_value

## -----------------------------------------------------------------------------
probation_z   <- -1
survivor_gain <- probation_adjustment(probation_z)
discount_rate <- .08
periods       <- seq_along(active_n)
later_periods <- periods[-1]

benefit_t  <- survivor_gain * sdy * active_n[later_periods] * (1 - .25)
discounted <- benefit_t / (1 + discount_rate)^later_periods
extra_npv  <- sum(discounted)

with_probation_npv <- with_flows$net_present_value + extra_npv
with_probation_npv

## -----------------------------------------------------------------------------
cascade <- data.frame(
  step = c("1. Naive BCG (random baseline)",
           "2. + operating baseline",
           "3. + multidim. criterion (RCV)",
           "4. + flows + tax + discount",
           "5. + probation (full comprehensive)"),
  net_utility = c(naive$net_utility,
                  with_baseline$net_utility,
                  with_multidim$net_utility,
                  with_flows$net_present_value,
                  with_probation_npv)
)
cascade$pct_of_naive <- round(100 * cascade$net_utility / naive$net_utility, 1)
cascade

## -----------------------------------------------------------------------------
S11 <- matrix(c(1, .30, .30, 1), 2, 2)
S12 <- matrix(c(.30, .10, .15, .25), 2, 2, byrow = TRUE)
S22 <- matrix(c(1, .40, .40, 1), 2, 2)

s <- sturman_comprehensive(
  validity                       = .35,
  baseline_validity              = .20,
  selection_ratio                = .20,
  sdy                            = 50000,
  n_year_one                     = 100,
  tenure                         = 5,
  fixed_cost                     = 75000,
  hires_per_period               = c(100, 15, 15, 15, 15),
  losses_per_period              = c(0, 15, 15, 15, 15),
  tax_rate                       = .25,
  discount_rate                  = .08,
  predictor_cor                  = S11,
  predictor_criterion_cor        = S12,
  criterion_cor                  = S22,
  criterion_weights              = c(.7, .3),
  probation_cutoff_z             = -1,
  acceptance_rate                = 0.70,
  quality_acceptance_correlation = -0.20
)

s

## -----------------------------------------------------------------------------
Rxx <- matrix(c(
  1.00, .30, .05, .10,
  .30, 1.00, .20, .25,
  .05, .20, 1.00, .40,
  .10, .25, .40, 1.00
), 4, 4, byrow = TRUE)

validities <- c(.51, .38, .23, .32)
predictor_labels <- c("GMA", "Interview", "Conscientiousness", "Integrity")

## -----------------------------------------------------------------------------
selection_ratios <- c(.10, .20, .40)

results <- lapply(selection_ratios, function(sr) {
  marginal_sr <- (sr)^(1 / 4)
  compare_selection_systems(
    predictor_cor                = Rxx,
    validities                   = validities,
    compensatory_weights         = validities,
    compensatory_selection_ratio = sr,
    hurdle_selection_ratios      = rep(marginal_sr, 4),
    n_sim                        = 50000,
    seed                         = 42
  )
})
names(results) <- paste0("SR=", selection_ratios)

## -----------------------------------------------------------------------------
ock_oswald <- data.frame(
  SR              = selection_ratios,
  compensatory_z  = vapply(results,
                           function(o) o$compensatory$expected_criterion_z,
                           numeric(1)),
  hurdle_z        = vapply(results,
                           function(o) o$multiple_hurdle$expected_criterion_z,
                           numeric(1)),
  hurdle_joint_sr = vapply(results,
                           function(o) o$multiple_hurdle$joint_selection_ratio,
                           numeric(1))
)
ock_oswald$z_difference <- ock_oswald$compensatory_z - ock_oswald$hurdle_z
ock_oswald

## -----------------------------------------------------------------------------
n_apps <- 1000

stage_design <- compare_selection_systems_staged(
  predictor_cor                  = Rxx,
  validities                     = validities,
  compensatory_weights           = validities,
  compensatory_selection_ratio   = .20,
  stage_predictors               = list(1, c(2, 3), 4),
  stage_selection_ratios         = c(.50, .60, .70),
  n_sim                          = 50000,
  seed                           = 42,
  n_applicants                   = n_apps,
  compensatory_cost_per_applicant = 800,
  hurdle_cost_per_stage          = c(100, 400, 600),
  sdy                            = 50000
)
stage_design$net_utility_difference