## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----parity-helpers, include = FALSE------------------------------------------ # Prefer local package sources when rendering inside the repository so # parity checks target the code under development. find_local_pkg_dir <- function() { roots <- c(".", "..", "../..") for (root in roots) { pkg_desc <- file.path(root, "pkg", "DESCRIPTION") root_desc <- file.path(root, "DESCRIPTION") if (file.exists(pkg_desc)) { return(file.path(root, "pkg")) } if (file.exists(root_desc)) { desc <- tryCatch(read.dcf(root_desc), error = function(e) NULL) if (!is.null(desc) && "Package" %in% colnames(desc) && identical(desc[1, "Package"], "Rfuzzydid")) { return(root) } } } NULL } local_pkg_dir <- find_local_pkg_dir() if (!is.null(local_pkg_dir) && requireNamespace("pkgload", quietly = TRUE)) { pkgload::load_all(local_pkg_dir, helpers = FALSE, export_all = FALSE, quiet = TRUE) } else { library(Rfuzzydid) } stata_partial <- c(TC_inf = 0.01220596000000, TC_sup = 3.08177130000000) stata_eqtest <- data.frame( contrast = c("W_DID_W_TC", "W_DID_W_CIC", "W_TC_W_CIC"), estimate = c(-0.0112300674614629, -0.145514320262384, -0.134284252800921), stringsAsFactors = FALSE ) stata_lqte <- data.frame( quantile = seq(0.05, 0.95, by = 0.05), estimate = c( 1.67274034023285, 1.79422724246979, 1.81602716445923, 1.94611406326294, 1.95544064044952, 1.94727826118469, 1.97404694557190, 2.09413313865662, 2.13104295730591, 2.03220224380493, 2.10763168334961, -0.360978126525879, -0.325689792633057, -0.228986263275146, -0.172835350036621, -0.146114349365234, -0.098971366882324, -0.079638004302979, -0.034532070159912 ) ) native_cluster <- c(n_reps = 20L, n_misreps = 3L, share_failures = 0.15) ## ----sim-data----------------------------------------------------------------- set.seed(50321) n_cell <- 120 # observations per cell # Helper function to create each (G, T) cell make_cell <- function(g, t, prob_d) { d <- rbinom(n_cell, size = 1, prob = prob_d) # Outcome: base + group effect + time effect + treatment effect + noise y <- 1 + 0.5 * g + 0.4 * t + 2.0 * d + sin(seq_len(n_cell) / 7) data.frame(y = y, g = g, t = t, d = d) } # Build the four cells df <- rbind( make_cell(g = 0, t = 0, prob_d = 0.20), # Control group, baseline make_cell(g = 0, t = 1, prob_d = 0.40), # Control group, follow-up make_cell(g = 1, t = 0, prob_d = 0.30), # Treatment group, baseline make_cell(g = 1, t = 1, prob_d = 0.80) # Treatment group, follow-up ) # Verify cell sizes and treatment rates aggregate(cbind(n = d) ~ g + t, data = df, FUN = function(x) c(n = length(x), mean = mean(x))) # Save fixture used by most examples for direct Stata comparison if (requireNamespace("haven", quietly = TRUE)) { haven::write_dta(df, "stata-parity-sim-fixture.dta") } ## ----basic-est---------------------------------------------------------------- fit <- fuzzydid( data = df, formula = y ~ d, group = "g", time = "t", did = TRUE, tc = TRUE, cic = TRUE, breps = 50 ) summary(fit) stata_basic <- c( W_DID = 2.00000000000000, W_TC = 1.97320415000280, W_CIC = 2.22209836585117 ) r_basic <- setNames(fit$late$estimate, fit$late$estimator)[names(stata_basic)] basic_cmp <- data.frame( estimator = names(stata_basic), R = unname(r_basic), Stata = unname(stata_basic), abs_diff = abs(unname(r_basic) - unname(stata_basic)) ) knitr::kable(basic_cmp, digits = 8) ## ----tidy-output, eval = requireNamespace("modelsummary", quietly = TRUE)----- library(modelsummary) modelsummary::modelsummary(fit) ## ----parity-check------------------------------------------------------------- n_cell <- 120L make_parity_cell <- function(g, t, ones) { data.frame( g = rep.int(g, n_cell), t = rep.int(t, n_cell), d = c(rep.int(1L, ones), rep.int(0L, n_cell - ones)) ) } out <- rbind( make_parity_cell(0L, 0L, 24L), make_parity_cell(0L, 1L, 48L), make_parity_cell(1L, 0L, 36L), make_parity_cell(1L, 1L, 96L) ) out$id <- seq_len(nrow(out)) out$y <- 1 + 0.5 * out$g + 0.4 * out$t + 2 * out$d + sin(out$id / 7) parity_fixture <- out[, c("y", "g", "t", "d")] fit_parity <- fuzzydid( data = parity_fixture, formula = y ~ d, group = "g", time = "t", did = TRUE, tc = TRUE, cic = TRUE, breps = 5, seed = 1 ) stata_core <- c( W_DID = 2.17430877504668, W_TC = 2.18553887285118, W_CIC = 2.31982319056988 ) r_core <- setNames(fit_parity$late$estimate, fit_parity$late$estimator)[c("W_DID", "W_TC", "W_CIC")] core_cmp <- data.frame( estimator = names(stata_core), R = unname(r_core), Stata = unname(stata_core), abs_diff = abs(unname(r_core) - unname(stata_core)) ) knitr::kable(core_cmp, digits = 8) # Save exact parity fixture for Stata users if (requireNamespace("haven", quietly = TRUE)) { haven::write_dta(parity_fixture, "stata-parity-core-fixture.dta") } ## ----seed-parity-------------------------------------------------------------- # These produce identical SE/CI because the same bootstrap seed is supplied fit1 <- fuzzydid(df, y ~ d, group = "g", time = "t", did = TRUE, breps = 50, seed = 1) fit2 <- fuzzydid(df, y ~ d, group = "g", time = "t", did = TRUE, breps = 50, seed = 1) all.equal(fit1$late$std.error, fit2$late$std.error) stata_seed_se <- 0.66310988000000 seed_cmp <- data.frame( estimator = "W_DID", R_seed_1_run_1 = fit1$late$std.error[fit1$late$estimator == "W_DID"], R_seed_1_run_2 = fit2$late$std.error[fit2$late$estimator == "W_DID"], Stata = stata_seed_se, stringsAsFactors = FALSE ) seed_cmp$abs_diff_run_1_vs_Stata <- abs(seed_cmp$R_seed_1_run_1 - seed_cmp$Stata) seed_cmp$abs_diff_run_2_vs_Stata <- abs(seed_cmp$R_seed_1_run_2 - seed_cmp$Stata) knitr::kable(seed_cmp, digits = 8) ## ----restrictions, error = TRUE----------------------------------------------- try({ df_restrict <- transform(df, x = rnorm(nrow(df))) # CIC requires no covariates try({ fuzzydid(df_restrict, y ~ d + x, group = "g", time = "t", cic = TRUE) }) # LQTE requires binary G, T, and D, plus no covariates try({ fuzzydid(df_restrict, y ~ d + x, group = "g", time = "t", lqte = TRUE) }) }) ## ----partial------------------------------------------------------------------ fit_partial <- fuzzydid( data = df, formula = y ~ d, group = "g", time = "t", tc = TRUE, partial = TRUE, breps = 50 ) knitr::kable(fit_partial$late) r_partial <- setNames(fit_partial$late$estimate, fit_partial$late$estimator)[names(stata_partial)] partial_cmp <- data.frame( estimator = names(stata_partial), R = unname(r_partial), Stata = unname(stata_partial), abs_diff = abs(unname(r_partial) - unname(stata_partial)) ) knitr::kable(partial_cmp, digits = 8) ## ----eqtest------------------------------------------------------------------- fit_eq <- fuzzydid( data = parity_fixture, formula = y ~ d, group = "g", time = "t", did = TRUE, tc = TRUE, cic = TRUE, eqtest = TRUE, breps = 5, seed = 1 ) eq_cmp <- data.frame( contrast = stata_eqtest$contrast, R = fit_eq$eqtest$estimate, Stata = stata_eqtest$estimate, abs_diff = abs(fit_eq$eqtest$estimate - stata_eqtest$estimate) ) knitr::kable(eq_cmp, digits = 8) ## ----covariates--------------------------------------------------------------- # Add covariates set.seed(123) df$x1 <- rnorm(nrow(df)) df$x2 <- factor(ifelse(runif(nrow(df)) > 0.5, "a", "b")) fit_modelx <- fuzzydid( data = df, formula = y ~ d + x1 + x2, group = "g", time = "t", did = TRUE, tc = TRUE, modelx = c("ols", "ols"), nose = TRUE ) knitr::kable(fit_modelx$late) stata_modelx <- c( W_DID = 2.03222780000000, W_TC = 1.93062450000000 ) r_modelx <- setNames(fit_modelx$late$estimate, fit_modelx$late$estimator)[names(stata_modelx)] modelx_cmp <- data.frame( estimator = names(stata_modelx), R = unname(r_modelx), Stata = unname(stata_modelx), abs_diff = abs(unname(r_modelx) - unname(stata_modelx)) ) knitr::kable(modelx_cmp, digits = 8) if (requireNamespace("haven", quietly = TRUE)) { haven::write_dta(df[, c("y", "g", "t", "d", "x1", "x2")], "stata-parity-covariates-fixture.dta") } ## ----sieves------------------------------------------------------------------- fit_sieves <- fuzzydid( data = df, formula = y ~ d + x1, group = "g", time = "t", did = TRUE, tc = TRUE, sieves = TRUE, nose = TRUE ) knitr::kable(fit_sieves$late) stata_sieves <- c( W_DID = 2.00081280000000, W_TC = 1.93678270000000 ) r_sieves <- setNames(fit_sieves$late$estimate, fit_sieves$late$estimator)[names(stata_sieves)] sieves_cmp <- data.frame( estimator = names(stata_sieves), R = unname(r_sieves), Stata = unname(stata_sieves), abs_diff = abs(unname(r_sieves) - unname(stata_sieves)) ) knitr::kable(sieves_cmp, digits = 8) ## ----lqte--------------------------------------------------------------------- fit_lqte <- fuzzydid( data = parity_fixture, formula = y ~ d, group = "g", time = "t", lqte = TRUE, nose = TRUE, seed = 1 ) lqte_cmp <- data.frame( quantile = stata_lqte$quantile, R = fit_lqte$lqte$estimate, Stata = stata_lqte$estimate, abs_diff = abs(fit_lqte$lqte$estimate - stata_lqte$estimate) ) knitr::kable(head(lqte_cmp, 10), digits = 8) ## ----multiperiod-------------------------------------------------------------- set.seed(7) n_mp <- 70 make_mp_cell <- function(gb, gf, t, prob, shift = 0) { d <- rbinom(n_mp, 1, prob) y <- 1 + 0.35 * t + 1.6 * d + shift + rnorm(n_mp, sd = 0.25) data.frame(y = y, d = d, gb = gb, gf = gf, t = t) } df_mp <- rbind( make_mp_cell(gb = 0, gf = 0, t = 0, prob = 0.20), make_mp_cell(gb = 0, gf = 0, t = 1, prob = 0.30), make_mp_cell(gb = 0, gf = 0, t = 2, prob = 0.35), make_mp_cell(gb = 0, gf = 1, t = 0, prob = 0.30, shift = 0.2), make_mp_cell(gb = 1, gf = 1, t = 1, prob = 0.60, shift = 0.8), make_mp_cell(gb = 1, gf = 0, t = 2, prob = 0.75, shift = 1.1) ) fit_mp <- fuzzydid( data = df_mp, formula = y ~ d, group = "gb", group_forward = "gf", time = "t", did = TRUE, tc = TRUE, nose = TRUE ) knitr::kable(fit_mp$late) if (requireNamespace("haven", quietly = TRUE)) { haven::write_dta(df_mp, "stata-parity-multiperiod-fixture.dta") } ## ----bootstrap-diag----------------------------------------------------------- df_diag <- parity_fixture df_diag$cl <- rep(seq_len(24L), each = 20L) fit_diag <- fuzzydid( data = df_diag, formula = y ~ d, group = "g", time = "t", did = TRUE, tc = TRUE, cic = TRUE, cluster = "cl", breps = 20, seed = 1 ) diag_cmp <- data.frame( metric = c("N.reps", "N.misreps", "Share.failures"), R = c(fit_diag$n_reps, fit_diag$n_misreps, fit_diag$share_failures), Reference = c( native_cluster[["n_reps"]], native_cluster[["n_misreps"]], native_cluster[["share_failures"]] ), stringsAsFactors = FALSE ) share_from_counts <- if (diag_cmp$R[1] > 0) diag_cmp$R[2] / diag_cmp$R[1] else NA_real_ # These diagnostics are deterministic in R when the same bootstrap seed is used. diag_cmp$Delta <- diag_cmp$R - diag_cmp$Reference knitr::kable(diag_cmp, digits = 8) if (requireNamespace("haven", quietly = TRUE)) { haven::write_dta(df_diag, "stata-parity-cluster-fixture.dta") }