The cdid
package extends the popular did
library by Brantly Callaway to improve efficiency and handle unbalanced
panel data in staggered treatment designs. It implements the methodology
introduced in:
Bellego, Benatia, and Dortet-Bernadet (2024), “The Chained Difference-in-Differences”, Journal of Econometrics. https://doi.org/10.1016/j.jeconom.2024.105783
Future developments: generalized attrition model (MAR & sequential MAR), doubly-robust estimator (so far only ipw is implemented), and improved computational efficiency. Hopefully, it will be directly integrated within the did library.
The cdid
package can be installed from CRAN using:
install.packages("cdid")
Alternatively, for the development version:
::install_github("joelcuerrier/cdid", ref = "main", build_vignettes = TRUE, force = TRUE) remotes
library(did) #for comparison
library(cdid)
set.seed(123)
# Generate a balanced dataset with unit fixed-effects
# The true values of the coefficients are based on time-to-treatment. The treatment
# effect is zero before the treatment, 1.75 one period after, 1.5 two period after,
# 1.25 three period after, 1 four period after, 0.75 five period after, 0.5 six
# period after, etc.
<- fonction_simu_attrition(
data0 N = 500, T = 8,
theta2_alpha_Gg = 0.5, lambda1_alpha_St = 0,
sigma_alpha = 2, sigma_epsilon = 0.1, tprob = 0.5
)
# Ensure all observations are included for a balanced panel
$S <- 1
data0
# Run the original `did` library estimation
= did:: att_gt(
did.results yname="Y",
tname="date",
idname = "id",
gname = "date_G",
xformla = ~X,
data = data0,
weightsname = NULL,
allow_unbalanced_panel = FALSE,
panel = TRUE,
control_group = "notyettreated",
alp = 0.05,
bstrap = TRUE,
cband = TRUE,
biters = 1000,
clustervars = NULL,
est_method = "ipw",
base_period = "varying",
print_details = FALSE,
pl = FALSE,
cores = 1
)
# Run `cdid` with 2-step weighting matrix
= att_gt_cdid(yname="Y", tname="date",
result_2step idname="id",
gname="date_G",
xformla=~X,
data=data0,
control_group="notyettreated",
alp=0.05,
bstrap=TRUE,
biters=1000,
clustervars=NULL,
cband=TRUE,
est_method="2-step",
base_period="varying",
print_details=FALSE,
pl=FALSE,
cores=1)
# Run `cdid` with identity weighting matrix
= att_gt_cdid(yname="Y", tname="date",
result_id idname="id",
gname="date_G",
xformla=~X,
data=data0,
control_group="notyettreated",
alp=0.05,
bstrap=TRUE,
biters=1000,
clustervars=NULL,
cband=TRUE,
est_method="Identity",
base_period="varying",
print_details=FALSE,
pl=FALSE,
cores=1)
# Print results
print(did.results)
print(result_2step)
print(result_id)
After computing the group-time ATT estimates, aggregate results can be obtained using functions from did library
<- aggte(MP = did.results, type = 'dynamic')
agg.es.did .2step <- aggte(MP = result_2step, type = 'dynamic')
agg.es<- aggte(MP = result_id, type = 'dynamic')
agg.es.id
# Print aggregate results
print(agg.es.did)
print(agg.es.2step)
print(agg.es.id)
The cdid
library excels with unbalanced panels. Here’s
an example:
# Generate a dataset with missing observations based on sampling indicator S
<- fonction_simu_attrition(
data0 N = 500, T = 8,
theta2_alpha_Gg = 0.5, lambda1_alpha_St = 0,
sigma_alpha = 2, sigma_epsilon = 0.1, tprob = 0.5
)
# Keep only non-missing (S==1)
<- data0[data0$S == 1, ]
data0
# Run estimations as before, but specify panel = FALSE for did::att_gt()
= did:: att_gt(
did.results yname="Y",
tname="date",
idname = "id",
gname = "date_G",
xformla = ~X,
data = data0,
weightsname = NULL,
allow_unbalanced_panel = FALSE,
panel = FALSE,
control_group = "notyettreated",
alp = 0.05,
bstrap = TRUE,
cband = TRUE,
biters = 1000,
clustervars = NULL,
est_method = "ipw",
base_period = "varying",
print_details = FALSE,
pl = FALSE,
cores = 1
)
#For cdid, there is no difference
= att_gt_cdid(yname="Y", tname="date",
result_2step idname="id",
gname="date_G",
xformla=~X,
data=data0,
control_group="notyettreated",
alp=0.05,
bstrap=TRUE,
biters=1000,
clustervars=NULL,
cband=TRUE,
est_method="2-step",
base_period="varying",
print_details=FALSE,
pl=FALSE,
cores=1)
= att_gt_cdid(yname="Y", tname="date",
result_id idname="id",
gname="date_G",
xformla=~X,
data=data0,
control_group="notyettreated",
alp=0.05,
bstrap=TRUE,
biters=1000,
clustervars=NULL,
cband=TRUE,
est_method="Identity",
base_period="varying",
print_details=FALSE,
pl=FALSE,
cores=1)
Complete documentation and detailed examples are available through the package’s help pages:
?cdidbrowseVignettes("cdid")
and a dedicated webpage: https://www.davidbenatia.com/projects/cdid-library/.
Bellego, C., Benatia, D., and Dortet-Bernadet, V. (2024). The Chained Difference-in-Differences. Journal of Econometrics. https://doi.org/10.1016/j.jeconom.2024.105783
Callaway, B., & Sant’Anna, P. H. C. (2021). Difference-in-Differences with Multiple Time Periods. Journal of Econometrics. https://doi.org/10.1016/j.jeconom.2020.12.001
This package is licensed under the GPL-2 license.