## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(undidR) ## ----schematic, echo=FALSE, fig.align='center', out.width='90%', fig.cap="Schematic of the UNDID framework."---- knitr::include_graphics("figures/undidR_schematic.png") ## ----------------------------------------------------------------------------- # First, an initializing CSV is created detailing the silos # and their treatment times. Control silos (here, 73 and 46) # should be labelled with "control". init <- create_init_csv(silo_names = c("73", "46", "71", "58"), start_times = "1989", end_times = "2000", treatment_times = c("control", "control", "1991", "1991")) init # After the initializing CSV file is created, `create_diff_df()` # can be called. This creates the empty differences data frame which # will then be filled out at each individual silo for its respective portion. init_filepath <- normalizePath(file.path(tempdir(), "init.csv"), winslash = "/", mustWork = FALSE) empty_diff_df <- create_diff_df(init_filepath, date_format = "yyyy", freq = "yearly") empty_diff_df ## ----------------------------------------------------------------------------- # The initializing CSV for staggered adoption is created in the same way. # When `create_diff_df()` is run, it will automatically detect whether or not # the initial setup is for a common adoption or staggered adoption scenario. init <- create_init_csv(silo_names = c("73", "46", "54", "23", "86", "32", "71", "58", "64", "59", "85", "57"), start_times = "1989", end_times = "2000", treatment_times = c(rep("control", 6), "1991", "1993", "1996", "1997", "1997", "1998")) init # Creating the empty differences data frame and associated CSV file is # the same for the case of staggered adoption as it is for common adoption. init_filepath <- normalizePath(file.path(tempdir(), "init.csv"), winslash = "/", mustWork = FALSE) empty_diff_df <- create_diff_df(init_filepath, date_format = "yyyy", freq = "yearly", covariates = c("asian", "black", "male")) head(empty_diff_df, 4) ## ----------------------------------------------------------------------------- # When calling `undid_stage_two()`, ensure that the `time_column` of # the `silo_df` contains only character values, i.e. date strings. silo_data <- silo71 silo_data$year <- as.character(silo_data$year) empty_diff_filepath <- system.file("extdata/common", "empty_diff_df.csv", package = "undidR") stage2 <- undid_stage_two(empty_diff_filepath, silo_name = "71", silo_df = silo_data, time_column = "year", outcome_column = "coll", silo_date_format = "yyyy") head(stage2$diff_df, 4) head(stage2$trends_data, 4) ## ----------------------------------------------------------------------------- # Here we can see that calling `undid_stage_two()` for staggered adoption # is no different than calling `undid_stage_two()` for common adoption. silo_data <- silo71 silo_data$year <- as.character(silo_data$year) empty_diff_filepath <- system.file("extdata/staggered", "empty_diff_df.csv", package = "undidR") stage2 <- undid_stage_two(empty_diff_filepath, silo_name = "71", silo_df = silo_data, time_column = "year", outcome_column = "coll", silo_date_format = "yyyy") head(stage2$diff_df, 4) head(stage2$trends_data, 4) ## ----fig.width=6, fig.height=4, out.width = "70%", dpi=300, fig.align="center"---- # `undid_stage_three()`, given a `dir_path`, will search that folder # for all CSV files that begin with "filled_diff_df_" and stitch # them together in order to compute the group level ATTs, aggregate ATT # and associated standard errors and p-values. dir_path <- system.file("extdata/common", package = "undidR") results <- undid_stage_three(dir_path, covariates = FALSE) results # We can also view a parallel trends plot of our silos by callling # `plot_parallel_trends()`. This function will search a folder for all # CSV files that start with "trends_data" and combine them. The combined # data is returned by the function. trends <- plot_parallel_trends( dir_path, simplify_legend = FALSE, # Setting to `FALSE` puts every silo in legend lwd = 3, # Set line width ylim = c(0, 1), # (min, max) of y-axis xdates = as.Date(c("1989-01-01", "1991-01-01", "1993-01-01", # Explicity declare dates on x-axis "1995-01-01", "1997-01-01", "1999-01-01")), date_format = "%Y", # Specify x-axis date format ylabels = c(0, 0.25, 0.5, 0.75, 1), # Values on y-axis legend_location = "topleft", # Where should the legend go? legend_on = TRUE, # Defaults `TRUE`, can be set to `FALSE` to omit legend ylab = "coll", # y-axis title treatment_colour = c("red", # Set colours of treatment silos "coral"), control_colour = c("#2fa7cf", # Colours can be entered as hexcodes "skyblue") ) head(trends, 4) ## ----fig.width=6, fig.height=4, out.width = "70%", dpi=300, fig.align="center"---- # When calling `undid_stage_three()` for staggered adoption it is # important to specify the aggregation method, `agg`. dir_path <- system.file("extdata/staggered", package = "undidR") results <- undid_stage_three(dir_path, agg = "silo", covariates = TRUE, nperm = 501) # The `nperm` parameter specifies how many permutations # to consider for randomization inference. results # If we have many silos, but want to view a parallel trends plot # we can set `combine = TRUE` when calling `plot_parallel_trends()` # which will combine all treatment silos into a single line, and all # control silos into a single line. trends <- plot_parallel_trends( dir_path, combine = TRUE, # Plots one line for control and one line for treated simplify_legend = TRUE, # Reduces legend to simply "Control" and "Treated" lwd = 3, # Set line width ylim = c(0.2, 0.6), # (min, max) of y-axis xdates = as.Date(c("1989-01-01", "1991-01-01", "1993-01-01", # Explicity declare dates on x-axis "1995-01-01", "1997-01-01", "1999-01-01")), date_format = "%Y", # Specify x-axis date format ylabels = c(0.2, 0.4, 0.6), # Values on y-axis legend_location = "topright", # Where should the legend go? treatment_indicator_lwd = 2, # line width for the treatment indicator lines treatment_indicator_alpha = 0.2, # transparency for treatment indicator lines treatment_indicator_col = "red" # colour for treatment indicator lines ) head(trends, 4)