This vignette outlines the 2023 manuscript that describes invivoPKfit’s use in a case study with CvT data from over 200 Chemicals. The main inquiry is to assess whether we can estimate parameters for one- and two-compartment models such that the majority of predicted values are “within a factor of two” – a common metric used to evaluate physiologically-based pharmacokinetic (PBPK) models.
set.seed(2023)
library(tidyverse, quietly = TRUE)
#> ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr 1.1.4 ✔ readr 2.1.5
#> ✔ forcats 1.0.0 ✔ stringr 1.5.1
#> ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
#> ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
#> ✔ purrr 1.0.2
#> ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag() masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(cowplot)
#>
#> Attaching package: 'cowplot'
#>
#> The following object is masked from 'package:lubridate':
#>
#> stamp
library(RColorBrewer)
devtools::load_all()
#> ℹ Loading invivoPKfit
today <- format(Sys.Date(), "%d%B%Y")
To speed up analysis in sections 4 and beyond, we can load an
.rds
file that includes the fitted models used in much of
the publication.
pk
objectSee pulling_iv_oral_cvtdb.Rmd
.
The following chunk shows the column/field mapping from the
cvt
object and the pk
object needed for
invivoPKfit analysis. The cvt
object is data from the CvTdb
database that includes chemicals with oral or intravenous routes of
administration and takes measurements from blood or plasma. The data has
been processed and cleaned to facilitate its use with
invivoPKfit.
### Minimal PK Object ###----
# Minimum pk object, add options later
# Note that these mappings are now a default mappings, just being verbose here.
# If there are any standard deviations that are zero or NA, but N_Subjects > 1,
# Set the n_subjects to 6 or the current value if lower than 6.
# (In Showa the mode is 6, but this condition is present in CVT_Legacy as well)
minimal_pk <- pk(data = cvt %>%
mutate(n_subjects_normalized = if_else(
n_subjects_normalized > 1 & is.na(invivPK_conc_sd),
min(n_subjects_normalized, 6), n_subjects_normalized)),
mapping = ggplot2::aes(
Chemical = analyte_dtxsid,
Chemical_Name = analyte_name_original,
DTXSID = analyte_dtxsid,
CASRN = analyte_casrn,
Species = species,
Reference = fk_extraction_document_id,
Media = conc_medium_normalized,
Route = administration_route_normalized,
Dose = invivPK_dose_level,
Dose.Units = "mg/kg",
Subject_ID = fk_subject_id,
Series_ID = fk_series_id,
Study_ID = fk_study_id,
ConcTime_ID = conc_time_id,
N_Subjects = n_subjects_normalized,
Weight = weight_kg,
Weight.Units = "kg",
Time = time_hr,
Time.Units = "hours",
Value = invivPK_conc,
Value.Units = "mg/L",
Value_SD = invivPK_conc_sd,
LOQ = invivPK_loq
))
Here we setup the PK object with the various options for fitting and data transformations. As shown in this vignette, it is possible to setup multiple distinct options for fitting and data transformation at once prior to processing and fitting the data.
# Types of Choices:
## Dose Normalized
## Log10 transform
## Scale time
# List of options
fitopts <- expand.grid(error_model = c("pooled",
"hierarchical"),
dose_norm = c(TRUE, FALSE),
log10_trans = c(TRUE, FALSE),
time_scale = c(
TRUE,
FALSE),
stringsAsFactors = FALSE)
# Make function similar to that in test_vignette authored by Caroline Ring
fit_data <- function(this_error_model,
this_dose_norm,
this_log10_trans,
this_time_scale,
n_cores = 12,
file_dir = Sys.getenv("OD_DIR")){
retval <- -9
dose_indic <- as.numeric(this_dose_norm)
log10_indic <- as.numeric(this_log10_trans)
time_indic <- as.numeric(this_time_scale)
errmodel_indic <- substr(this_error_model, 1, 1)
file_str <- paste0(today,
"mypk_fit_",
dose_indic,
log10_indic,
time_indic,
errmodel_indic,
".Rds")
cat("Fitting: ", file_str, "\n")
tryCatch(
expr = {
this_pk <- minimal_pk +
facet_data(vars(Chemical, Species)) +
settings_preprocess(keep_data_original = FALSE,
suppress.messages = TRUE) +
scale_conc(dose_norm = this_dose_norm, log10_trans = this_log10_trans) +
settings_optimx(method = c(
"L-BFGS-B",
"bobyqa"
))
if(this_error_model %in% "pooled"){
this_pk <- this_pk +
stat_error_model(error_group = vars(Chemical, Species))
} else if(this_error_model %in% c("hierarchical", "joint")){
this_pk <- this_pk +
stat_error_model(error_group = vars(Chemical, Species, Reference))
} else{
stop("this_error_model must be either 'pooled' or 'hierarchical'/'joint'")
}
if(this_time_scale %in% TRUE){
this_pk <- this_pk +
scale_time(new_units = "auto")
}
#do the fit
this_time <- system.time(this_fit <- do_fit(this_pk, n_cores = n_cores))
#save the result
saveRDS(this_fit,
file = paste0(file_dir,
file_str))
cli::cli_alert_success(text = "Fitting success!")
print(this_time)
rm(this_fit)
retval <- this_time[[3]]
},
error = function(e){
cli::cli_alert_danger(text = paste("Analysis",
file_str,
"failed with error",
e))
retval <- -1
}
)
return(retval)
}
system.time(tmp_out <- mapply(fit_data,
this_error_model = fitopts$error_model,
this_dose_norm = fitopts$dose_norm,
this_log10_trans = fitopts$log10_trans,
this_time_scale = fitopts$time_scale,
n_cores = 13))
mean(tmp_out)/60 # Average runtime (in minutes) per fitting option
median(tmp_out)/60
sum(tmp_out) # Looks like there's 10 extra seconds of overhead for non-fitting methods
Here I evaluate the various fitting options by:
# Evaluation
# Winning Model Tally
winmodel_comparison <- function(this_error_model,
this_dose_norm,
this_log10_trans,
this_time_scale) {
dose_indic <- as.numeric(this_dose_norm)
log10_indic <- as.numeric(this_log10_trans)
time_indic <- as.numeric(this_time_scale)
errmodel_indic <- substr(this_error_model, 1, 1)
file_str <- paste0(Sys.getenv("OD_DIR"),
today,
"mypk_fit_",
dose_indic,
log10_indic,
time_indic,
errmodel_indic,
".Rds")
pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
current_rds <- readRDS(file = file_str)
pf <- current_rds$fit %>%
distinct(Chemical, Species, model, method, convcode) %>%
group_by(method, .drop = FALSE) %>%
count(convcode) %>%
pivot_wider(names_from = convcode,
names_prefix = "convcode.",
values_from = n)
# Wide winning model
suppressMessages({
winning_model <- get_winning_model(obj = current_rds)
wide_winning_model <- winning_model %>%
group_by(method, model) %>%
count() %>%
pivot_wider(names_from = model, values_from = n) %>%
left_join(pf)
})
return(wide_winning_model)
}
# RMSLE for Cmax and AUC (with some tallys for extreme values)
rmsles_Cmax_AUC <- function(
this_error_model,
this_dose_norm,
this_log10_trans,
this_time_scale) {
dose_indic <- as.numeric(this_dose_norm)
log10_indic <- as.numeric(this_log10_trans)
time_indic <- as.numeric(this_time_scale)
errmodel_indic <- substr(this_error_model, 1, 1)
file_str <- paste0(Sys.getenv("OD_DIR"),
today,
"mypk_fit_",
dose_indic,
log10_indic,
time_indic,
errmodel_indic,
".Rds")
pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
message(paste0("Evaluation for: ", pk_name))
current_rds <- readRDS(file = file_str)
# Cmax and AUC_inf RMSEs, eval_tkstats already filters winning models
RMSLE_eval <- suppressMessages(eval_tkstats(obj = current_rds,
dose_norm = FALSE,
finite_only = TRUE) %>%
mutate(Options = pk_name) %>%
dplyr::select(
Options,
Chemical,
Species,
Route,
Media,
Reference,
method,
model,
starts_with("Cmax"),
starts_with("AUC_")
))
message(paste0("How many finite, non-zero comparisons had ",
"AUC_infinity.tkstats over/under 20X of AUC_infinity.nca?"))
print(
RMSLE_eval %>%
filter(
AUC_infinity.nca > 0,
AUC_infinity.tkstats > 0) %>%
group_by(Options, method) %>%
summarize(
`AUC_folderror>20X` = sum(
log10(AUC_infinity.tkstats/AUC_infinity.nca) > log10(20)
)
)
)
message("How many comparisons had AUC_infinity equal to zero?")
print(
RMSLE_eval %>%
filter(
AUC_infinity.nca > 0,
AUC_infinity.tkstats > 0) %>%
group_by(Options, method) %>%
summarize(NCA_zeroAUC = sum(AUC_infinity.nca == 0),
tkstats_zeroAUC =sum(AUC_infinity.tkstats == 0))
)
message("Removing those observations...")
RMSLE_eval <- RMSLE_eval %>%
filter(
AUC_infinity.nca > 0,
AUC_infinity.tkstats > 0,
log10(AUC_infinity.tkstats/AUC_infinity.nca) <= log10(20)
) %>%
rowwise() %>%
mutate(
SLE_Cmax = (log10(Cmax.tkstats) - log10(Cmax.nca)) ^ 2,
SLE_AUC_inf = (log10(AUC_infinity.tkstats) - log10(AUC_infinity.nca)) ^ 2
) %>%
filter(!is.na(SLE_Cmax), !is.na(SLE_AUC_inf)) %>%
group_by(Options, method) %>%
summarize(
RMSLE_Cmax = sqrt(mean(SLE_Cmax, na.rm = FALSE)) %>% signif(digits = 4),
RMSLE_AUC = sqrt(mean(SLE_AUC_inf, na.rm = FALSE)) %>% signif(digits = 4)
)
print(RMSLE_eval)
return(RMSLE_eval)
}
# Goodness-of-fit table with R-squared, RMSLE and AIC
get_gof <- function(this_error_model,
this_dose_norm,
this_log10_trans,
this_time_scale) {
dose_indic <- as.numeric(this_dose_norm)
log10_indic <- as.numeric(this_log10_trans)
time_indic <- as.numeric(this_time_scale)
errmodel_indic <- substr(this_error_model, 1, 1)
file_str <- paste0(Sys.getenv("OD_DIR"),
today,
"mypk_fit_",
dose_indic,
log10_indic,
time_indic,
errmodel_indic,
".Rds")
pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
current_rds <- readRDS(file = file_str)
winning_model <- suppressMessages(get_winning_model(obj = current_rds))
suppressMessages({
this_rsq <- rsq(current_rds,
use_scale_conc = FALSE,
rsq_group = ggplot2::vars(Chemical, Species)) %>%
semi_join(winning_model)
this_AIC <- AIC(current_rds) %>%
semi_join(winning_model)
this_rmsle <- rmse.pk(current_rds,
rmse_group = vars(Chemical, Species),
use_scale_conc = list(dose_norm = FALSE,
log10_trans = TRUE)) %>%
semi_join(winning_model)
})
# Since they are all joined by winmodel
# all three must have same NROW
message(
paste0("For ", pk_name, "... outputs below must have same number of rows")
)
message(paste0("rsq: ", NROW(this_rsq)))
message(paste0("aic: ", NROW(this_AIC)))
message(paste0("rmsle: ", NROW(this_rmsle)))
gof_df <- this_rsq %>%
inner_join(this_AIC, by = join_by(Chemical, Species, method, model)) %>%
inner_join(this_rmsle, by = join_by(Chemical, Species, method, model)) %>%
mutate(Options = pk_name, .before = Chemical)
return(gof_df)
}
# All predictions
get_all_preds <- function(this_error_model,
this_dose_norm,
this_log10_trans,
this_time_scale) {
dose_indic <- as.numeric(this_dose_norm)
log10_indic <- as.numeric(this_log10_trans)
time_indic <- as.numeric(this_time_scale)
errmodel_indic <- substr(this_error_model, 1, 1)
file_str <- paste0(Sys.getenv("OD_DIR"),
today,
"mypk_fit_",
dose_indic,
log10_indic,
time_indic,
errmodel_indic,
".Rds")
pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
current_rds <- readRDS(file = file_str)
# Wide winning model
suppressMessages({
winning_model <- get_winning_model(obj = current_rds) %>%
select(-c(near_flat, preds_below_loq))
this_preds <- predict(current_rds,
use_scale_conc = FALSE) %>%
semi_join(winning_model) %>%
mutate(Options = pk_name, .before = Chemical)
})
return(this_preds)
}
# Factor of two model error
tf_tests <- function(this_error_model,
this_dose_norm,
this_log10_trans,
this_time_scale) {
dose_indic <- as.numeric(this_dose_norm)
log10_indic <- as.numeric(this_log10_trans)
time_indic <- as.numeric(this_time_scale)
errmodel_indic <- substr(this_error_model, 1, 1)
file_str <- paste0(Sys.getenv("OD_DIR"),
today,
"mypk_fit_",
dose_indic,
log10_indic,
time_indic,
errmodel_indic,
".Rds")
pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
current_rds <- readRDS(file = file_str)
print(paste0("Now analyzing: ", pk_name))
out <- twofold_test(current_rds, suppress_messages = TRUE)
out <- out$model_error_summary %>%
mutate(Options = pk_name)
}
system.time(wm_comp <- fitopts %>%
dplyr::rowwise() %>%
dplyr::mutate(winmodel_comp = list(
winmodel_comparison(this_error_model = error_model,
this_dose_norm = dose_norm,
this_log10_trans = log10_trans,
this_time_scale = time_scale))) %>%
tidyr::unnest(winmodel_comp))
system.time(cmax_auc_rmsle_tbl <- fitopts %>%
dplyr::rowwise() %>%
dplyr::mutate(nca_comp = list(
rmsles_Cmax_AUC(this_error_model = error_model,
this_dose_norm = dose_norm,
this_log10_trans = log10_trans,
this_time_scale = time_scale))) %>%
tidyr::unnest(nca_comp))
system.time(gof_tbl <- fitopts %>%
dplyr::rowwise() %>%
dplyr::mutate(gof_win = list(
get_gof(this_error_model = error_model,
this_dose_norm = dose_norm,
this_log10_trans = log10_trans,
this_time_scale = time_scale))) %>%
tidyr::unnest(gof_win))
system.time(all_tf_tests <- fitopts %>%
dplyr::rowwise() %>%
dplyr::mutate(TF = list(
tf_tests(error_model,
this_dose_norm = dose_norm,
this_log10_trans = log10_trans,
this_time_scale = time_scale))) %>%
tidyr::unnest(TF))
system.time(all_preds <- fitopts %>%
dplyr::rowwise() %>%
dplyr::mutate(these_preds = list(
get_all_preds(this_error_model = error_model,
this_dose_norm = dose_norm,
this_log10_trans = log10_trans,
this_time_scale = time_scale))) %>%
tidyr::unnest(these_preds))
gc()
# Writing to file
writexl::write_xlsx(x = list(
Fit_Counts = wm_comp,
AUC_and_Cmax_RMSE_summary = cmax_auc_rmsle_tbl,
Prediction_Evaluations = gof_tbl,
Twofold_Tests = all_tf_tests),
path = paste0(Sys.getenv("FIG_DIR"),
"Manu_Files/",
today,
"_Supp_Table1",
".xlsx"))
gof_tbl <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
"Manu_Files/",
today,
"_Supp_Table1.xlsx"),
sheet = "Prediction_Evaluations") %>%
mutate(RMSLE = as.numeric(RMSLE))
wm_comp <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
"Manu_Files/",
today,
"_Supp_Table1.xlsx"),
sheet = "Fit_Counts")
cmax_auc_rmsle_tbl <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
"Manu_Files/",
today,
"_Supp_Table1.xlsx"),
sheet = "AUC_and_Cmax_RMSE_summary")
gof_tbl %>%
group_by(Options, method) %>%
count(RMSLE < 1) %>%
filter(`RMSLE < 1`) %>%
arrange(desc(n)) %>%
print(n = 8)
# bobyqa-optimized options 010h, 110h, 110p
wm_comp %>% filter(method %in% "bobyqa",
!time_scale, log10_trans)
# Off these, there is one more data group that is model_flat
# I will remove this so it doesn't affect downstream analyses
gof_tbl %>% filter(Options == "110p", model == "model_flat",
method == "bobyqa") %>% distinct(Chemical, Species)
gof_tbl %>% filter(Options == "110h", model == "model_flat",
method == "bobyqa") %>% distinct(Chemical, Species)
gof_tbl %>% filter(Options == "010h", model == "model_flat",
method == "bobyqa") %>% distinct(Chemical, Species)
gof_tbl %>% filter(Options == "010p", model == "model_flat",
method == "bobyqa") %>% distinct(Chemical, Species)
gof_tbl %>% filter(Options == "010h",
method == "bobyqa",
Chemical %in% "DTXSID1021116") %>%
distinct(Chemical, Species)
# Filter out DTXSID1021116
gof_tbl %>%
anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
filter(!(model %in% "model_flat")) %>%
group_by(Options, method) %>%
count(RMSLE < 0.7) %>%
filter(`RMSLE < 0.7`) %>%
arrange(desc(n)) %>%
print(n = 8)
# 110p seems to win out here.
gof_tbl %>%
filter(method %in% "bobyqa",
Options %in% c("110p", "010h", "110h")
) %>%
anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
group_by(Options) %>%
summarize(medianR2 = median(Rsq, na.rm = TRUE))
gof_tbl %>%
filter(method %in% "bobyqa",
Options %in% c("110p", "010h", "110h")) %>%
anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
group_by(Options) %>%
summarize(medianR2 = mean(Rsq, na.rm = TRUE))
# Here we see that the highest median Rsq is for 110p
# I use median because the distribution is skewed towards 1.
gof_tbl %>%
filter(method %in% "bobyqa",
Options %in% c("110p", "010h")) %>%
anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
distinct(Chemical, Species, Options, Rsq) %>%
pivot_wider(names_from = Options, values_from = Rsq) %>%
ggplot(aes(x = `110p`, y = `010h`)) + geom_point()
#110h is most even between models
cmax_auc_rmsle_tbl %>%
filter(Options %in% c("010h", "110p", "110h"),
method %in% "bobyqa")
# Here it is clear that 110p results in the
all_tf_tests %>%
filter(Options %in% c("110p", "110h", "010h"),
model %in% "All non-flat",
method %in% "bobyqa") %>%
select(Options, model, starts_with("percent_"))
# Relatively similar here
cmaxAUCpl <- cmax_auc_rmsle_tbl %>%
filter(RMSLE_AUC < 0.8, RMSLE_Cmax < 0.5) %>%
ggplot(aes(x = RMSLE_AUC, y = RMSLE_Cmax,
shape = method, fill = Options)) +
scale_shape_manual(values = c(21,22))+
scale_color_brewer(palette = "Paired")+
geom_point(color = "black", size = 2, stroke = 0.5) +
guides(fill = guide_legend(override.aes = list(shape = 21))) +
# coord_equal(xlim = c(0.3, 0.45), ylim = c(0.3, 0.4), expand = TRUE) +
labs(x = "AUC RMSLE", y = "Cmax RMSLE") +
theme_bw() +
theme(panel.border = element_rect(fill = NA,
color = "black",
linewidth = 2))
cmaxAUCpl
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"CmaxAUC_RMSLE_plotFinalists.png"),
bg = "white",
height = 3.25,
width = 3.5,
units = "in",
device = "png",
dpi = 300)
# Plot Rsq
violin_rmsle <- gof_tbl %>%
filter(!(model %in% "model_flat"),
Options %in% c("010h", "010p",
"111h", "111p",
"110h", "110p"),
RMSLE <= 10) %>%
ggplot() +
geom_violin(
aes(x = "",
y = RMSLE,
color = interaction(error_model, method)),
draw_quantiles = c(0.25,0.5,0.75)) +
facet_grid(cols = vars(Options)) +
scale_color_brewer(palette = "Paired") +
labs(title = "RMSLE",
color = "Error Model.Optimizer",
subtitle = "(values 10 or under)",
x = "") +
coord_cartesian(ylim = c(0, 10)) +
theme_bw() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()
)
violin_rsq <- gof_tbl %>%
filter(!(model %in% "model_flat"),
Options %in% c("010h", "010p",
"111h", "111p",
"110h", "110p")) %>%
ggplot() +
geom_violin(aes(x = "",
y = Rsq,
color = interaction(error_model, method)),
draw_quantiles = c(0.25,0.5,0.75)) +
facet_grid(cols = vars(Options)) +
scale_color_brewer(palette = "Paired") +
labs(title = "R-squared",
color = "Error Model.Optimizer",
x = "") +
coord_cartesian(ylim = c(0, 1)) +
theme_bw() +
guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5),
axis.title.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank()
)
cowplot::plot_grid(plotlist = list(violin_rmsle, violin_rsq),
nrow = 2, align = "v")
ggsave(
filename = paste0(
Sys.getenv("FIG_DIR"),
today,
"_RMSLE_Rsquared_ViolinPlots_selectOptions.png"
),
height = 5,
width = 6,
units = "in"
)
gof_tbl %>%
filter(!(model %in% "model_flat"),
Options %in% c("010h", "010p",
"111h", "111p",
"110h", "110p")) %>%
ggplot() +
geom_point(aes(x = Rsq, y = RMSLE,
color = interaction(method, model)),
shape = 1) +
facet_wrap(~Options) +
scale_color_brewer(palette = "Paired") +
theme_bw() +
coord_cartesian(
xlim = c(0, 1),
expand = TRUE
) +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
ggsave(
filename = paste0(
Sys.getenv("FIG_DIR"),
today,
"_RMSLE_Rsquared_ScatterPlot.png"
),
height = 7,
width = 10,
units = "in"
)
rmsle_hist <- gof_tbl %>% filter(model != "model_flat",
Options %in% c("100p", "010h", "110p", "110h"),
method == "bobyqa") %>%
ggplot(aes(x = RMSLE)) +
geom_histogram(fill = "grey5",
binwidth = 0.1) +
scale_x_log10(label = scales::label_math()) +
geom_vline(xintercept = 1, color = "red3", linetype = "dotted") +
facet_grid(rows = vars(Options),
cols = vars(method)) +
theme_bw()
rmsle_hist
ggsave(
filename = paste0(
Sys.getenv("FIG_DIR"),
today,
"_RMSLE_histogram_topChoices.png"
),
height = 4,
width = 4,
units = "in"
)
cowplot::plot_grid(
plotlist = list(rmsle_hist,
cmaxAUCpl +
guides(fill = guide_legend(ncol = 2,
override.aes = list(shape = 21)),
shape = guide_legend(nrow = 2)) +
theme(legend.position = "bottom",
legend.title.position = "top")
),
rel_widths = c(1, 1.75),
labels = "AUTO")
ggsave(
filename = paste0(
Sys.getenv("FIG_DIR"),
today,
"_RMSLE_histogram_CmaxAUC.png"
),
height = 4.5,
width = 6,
units = "in"
)
all_preds_prep <- all_preds %>%
filter(Conc_est >= LOQ,
Options %in% c("110p", "110h", "010h"),
method == "bobyqa") %>%
mutate(ID = as.factor(paste(Chemical, Species,
method)),
.keep = "unused") %>%
select(ID, Options, dose_norm, log10_trans, time_scale,
Route, Media, Dose, Conc, Conc_est, Time, Reference)
split_preds <- split(all_preds_prep, all_preds_prep$ID)
sp_plots <- lapply(names(split_preds), function(i) {
ggplot(data = split_preds[[i]],
mapping = aes(x = Conc, y = Conc_est,
color = Options, shape = Reference)) +
geom_point(size = 2) +
geom_abline(slope = 1) +
facet_grid(dose_norm~Route+Media+Dose,
scales = "free_y",
labeller = labeller(dose_norm = label_both)) +
scale_y_log10() + scale_x_log10() +
scale_color_brewer(palette = "Accent") +
theme_bw() +
labs(title = i) +
theme(legend.position = "bottom",
plot.title = element_text(hjust = 0.5, face = "bold"))
}
)
# Very BIG FILE
pdf(file = paste0(
Sys.getenv("FIG_DIR"),
today,
"log10_GOFplots_select.pdf"),
onefile = TRUE, height = 8, width = 8)
for (x in seq_along(sp_plots)) {
print(sp_plots[[x]])
}
dev.off()
Now I need to go back and re-analyze the data only for 100, 101, and 000 Pooled and Joint, L-BFGS-B only. I need to filter the cvt to only include Chem + Species combinations with multiple references. Then I will join them with the excel sheet of already collected data.
# Reference Multiples
multiRef_ChemSpec <- all_my_data %>%
ungroup() %>%
distinct(Chemical, Species, Reference) %>%
group_by(Chemical, Species) %>%
count() %>%
arrange(desc(n)) %>%
filter(n > 1) %>%
dplyr::select(!n)
multiRef_ChemSpec %>% nrow() # Surprisingly only 15 Chem + Species have multiple references
multiRef_ChemSpec
Some chemicals have one observation for a timepoint but not for
others, which then each timepoint gets a random error value so that
PK::nca()
can run. Here I evaluate 100 random seeds and the
differences in values of NCA estimated parameters.
# Which Chemical Species Route Media Dose groups have the randomness?
my_pk$data %>%
group_by(Chemical, Species, Reference, Route, Media, Dose, Time) %>%
summarize(Count = n()) %>%
group_by(Chemical, Species, Reference, Route, Media, Dose) %>%
filter(any(Count == 1) && any(Count > 1)) %>%
distinct(Chemical, Species, Reference, Route, Media, Dose) -> problem_set
# Evaluate 100 times
n_evals <- 100
evalNCA_df <- NULL
my_pk_new <- my_pk +
settings_preprocess(suppress.message = TRUE)
for (i in 1:100) {
seed_num <- 100 + i
set.seed(seed_num)
message(paste("Current seed:", seed_num))
my_pk_new <- do_preprocess(my_pk_new)
my_pk_new <- do_data_info(my_pk_new)
this_nca <- my_pk_new$data_info$nca %>%
select(-param_sd_z) %>%
mutate(seed = as.factor(seed_num))
evalNCA_df <- bind_rows(evalNCA_df, this_nca)
}
names(evalNCA_df)
evalNCA_df %>%
group_by(Chemical, Species, Route, Media, Dose, param_name) %>%
summarize(Average = mean(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
Median = median(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
StDev = sd(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
Min = min(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
Max = max(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
Unique_Values = length(unique(signif(param_value, 4))),
Inf_Count = sum(is.infinite(param_value))) %>%
filter(
# Route %in% "iv",
param_name %in% "Cmax"
) %>%
View()
evalNCA_df %>%
filter(param_name %in% c("tmax"),
Route %in% "iv",
is.finite(param_value),
Species %in% c("rat", "human", "hamster")) %>%
ggplot(aes(x = param_value, y = Chemical)) +
geom_point() +
facet_grid(rows = vars(Route)) +
theme_bw() +
theme(axis.text.y = element_text(size = 5),
axis.ticks.y = element_blank(),
panel.grid.major = element_line(color = "grey70",
linewidth = 0.4,
linetype = "solid"),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank())
set.seed(2023)
log10 transformation and a hierarchical model were the options that produced the least amount of error in predictions and derived TK stats. L-BFGS-B ran significantly faster, but bobyqa produced predictions with more lower RMSLE.
my_pk <- minimal_pk +
facet_data(vars(Chemical, Species)) +
settings_preprocess(keep_data_original = TRUE,
suppress.messages = FALSE) +
settings_optimx(method = c("bobyqa")) +
scale_conc(dose_norm = TRUE, log10_trans = TRUE) +
scale_time(new_units = "auto") +
stat_error_model(error_group = vars(Chemical, Species))
my_pk <- do_preprocess(my_pk)
cvt_DTXSIDs <- unique(my_pk$data$Chemical)
length(cvt_DTXSIDs)
my_pk <- do_prefit(my_pk)
system.time(my_pk <- do_fit(my_pk,
n_cores = 14))
# saveRDS(my_pk, "data-raw/all_cvt_pk.rds")
Next I will generate the figures for the paper. I will begin by
collating the data that I need. This data are generally accessible using
functions/methods for evaluating fitted pk
objects.
# Need the following data for any of the subsequent plots
# after fitting pk object for all cvt objects or reading the fitted pk object in `setup`
winmodel <- get_winning_model(my_pk)
my_preds <- semi_join(predict(my_pk, use_scale_conc = FALSE), winmodel)
my_residuals <- residuals(my_pk, use_scale_conc = FALSE) %>%
semi_join(winmodel)
my_tkstats <- eval_tkstats(my_pk) %>% semi_join(winmodel)
my_nca <- get_nca(my_pk)
all_my_data <- get_data(my_pk)
formatted_coefs <- my_pk$fit %>%
filter(str_detect(param_name, "sigma_", negate = TRUE)) %>%
select(Chemical, Species, model, model, param_name, estimate,
param_name, estimate, convcode) %>%
mutate(param_name = paste(param_name, "tkstats", sep = ".")) |>
pivot_wider(names_from = param_name,
values_from = estimate)
my_tf <- twofold_test(my_pk)
NROW(all_my_data) > NROW(my_preds) # THis must be true... or else try distinct(my_preds)
# Writing file to xlsx
writexl::write_xlsx(x = list(predictions = my_preds,
tkstats = my_tkstats),
path = paste0(Sys.getenv("FIG_DIR"),
"Manu_Files/",
today,
"_Supp_Table2",
".xlsx"))
fe_df <- fold_error(my_pk) %>% semi_join(winmodel)
# May need to change the use_scale_conc defaults
r2_df <- rsq(my_pk, use_scale_conc = FALSE) %>%
semi_join(winmodel)
myAIC <- AIC(my_pk) %>%
semi_join(winmodel)
myRMSLE <- rmse(my_pk,
use_scale_conc = list(dose_norm = FALSE, log10_trans = TRUE)) %>%
semi_join(winmodel)
myRMSE <- rmse(my_pk,
rmse_group = ggplot2::vars(Chemical, Species, Route, Media, Dose)) %>%
semi_join(winmodel)
chem_name_translate <- all_my_data %>%
dplyr::select(Chemical, Chemical_Name) %>%
dplyr::filter(!(Chemical_Name %in% c("2,4-d",
"methylene chloride",
"benzo[a]pyrene"))) %>%
dplyr::distinct()
The purpose of Figure 3 is to illustrate the variability among
replicate time points per Chemical/Species/Reference/Route/Media/Dose
group. This characterization is important as it can serve as a
pre-fitting data check and if the data are too variable it is likely
invivoPKfit
will struggle to find an appropriate fit. One
thing to consider is that there may be various factors that contribute
to data variability including but not limited to:
- Experimental design
- Technical precision
- Biological variation between subjects in different studies
Additionally, this also contributes to supplemental figure 2.
my_tf$indiv_data_test %>% select(Route, starts_with("percent_"))
pl3A <- my_tf$individual_data %>%
# Plotting
ggplot(aes(x = foldConc)) +
geom_histogram(binwidth = 0.2,
fill = "grey5",
color = "grey5",
linewidth = 0.4) +
coord_cartesian(xlim = c(0, 4),
ylim = c(0, 4000)) +
scale_y_continuous(expand = c(0, 0),
breaks = c(0, 1000, 2000, 3000),
labels = c("0", "1", "2", "3")) +
expand_limits(y = 0) +
geom_vline(xintercept = c(0.5, 2), color = "red3", linetype = "dashed") +
theme_bw() +
labs(x = "Mean-normalized\nconcentration",
y = "Observations\n(thousands)") +
theme(text = element_text(size = 10),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
panel.background = element_blank(),
panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4),
axis.title = element_text(face = "bold"),
axis.line = element_blank(),
plot.background = element_blank(),
axis.ticks.y = element_blank())
pl3A
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"NormalizedConcentrations",".png"),
plot = pl3A,
bg = "white",
height = 2.2,
width = 2.5,
units = "in",
device = "png",
dpi = 300)
# Not included in manuscript
my_tf$data_descriptors %>%
select(Chemical, Species, Reference, Route, Media, Time, data_descr) %>%
distinct() %>%
ggplot(aes(fill = data_descr, x = "")) +
geom_bar(position = "stack",
color = "black") +
labs(x = "", y = "Occurence by Chemical + Species group") +
scale_y_continuous(expand = c(0, NA)) +
theme_minimal()
This alternate version of Figure 3A aims to simplify the assessment of replication of datasets in invivoPKfit. In contrast to the original 4A, this will also result in the inclusion of datasets with recorded standard deviation values (i.e. non-indivudual animal data/summarized data).
The way this will work is by calculating the average and standard deviations of individual animal data (eliminating individual animal data that does not have replicates) then testing the following:
\[ \frac{\mu + 2\sigma}{\mu} \leq 2 \]
Which essentially tests whether, at least 95% of data is within a factor of 2. The result is a bar chart that shows how many timepoints with replicate values passed this test. It minimizes the double (or more) counting of timepoints with multiple replicates from individual animal data.
# With the mapped column names
my_tf$summarized_data_test
pl3A_alt <- my_tf$summarized_data_test %>%
filter(Route == "All") %>%
select(Route, within, outside) %>%
pivot_longer(cols = c(within, outside),
names_to = "status",
values_to = "value") %>%
ggplot(aes(x = status, fill = status, y = value)) +
geom_col(position = position_stack(),
color = "black", linewidth = 1) +
coord_cartesian(ylim = c(0, 2000)) +
scale_y_continuous(expand = c(0, NA),
breaks = c(0, 1000, 2000),
labels = c("0", "1", "2")) +
scale_fill_manual(values = c("black", "white")) +
theme_classic() +
labs(x = "95% of observations\nwithin factor of 2",
y = "Observations\n(thousands)") +
theme(text = element_text(size = 10),
panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
panel.background = element_blank(),
panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4),
axis.title = element_text(face = "bold"),
axis.line = element_blank(),
plot.background = element_blank(),
axis.ticks.y = element_blank(),
legend.position = "none")
pl3A_alt # Not included in final manuscript
Here the goal is to evaluate whether particular timepoints throughout CvT data are particularly susceptible to high variability. To do this, we create a relative time scale anchored by \(t_{max}\) and \(t_{last}\).
pl3B <- my_tf$individual_data %>%
inner_join(my_tkstats %>%
dplyr::select(Chemical, Species,
Route, Media,
tmax = tmax.nca) %>%
filter(!is.na(tmax))) %>%
group_by(Chemical,
Species,
Reference,
Route,
Media,
Dose) %>%
mutate(
normTime = ifelse(
Route == "iv",
1 + Time/max(Time),
ifelse(
Time > tmax,
((Time - tmax)/max(Time)) + 1,
0.5 + Time/(2*tmax)
)
)) %>%
ggplot(aes(
x = normTime,
y = foldConc
)) +
geom_point(alpha = 0.1, size = 0.7) +
geom_vline(xintercept = 1, color = "blue",
linewidth = 0.8, linetype = "dashed") +
geom_vline(xintercept = 2, color = "green4",
linewidth = 0.8, linetype = "dashed") +
geom_hline(yintercept = c(0.5, 2), color = "red3",
linewidth = 0.8, linetype = "dashed") +
facet_grid(cols = vars(Route),
scales = "free_x") +
labs(x = "ADME-Normalized Time",
y = "Mean-Normalized\nConcentration") +
theme(text = element_text(size = 10),
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
strip.background = element_rect(fill = "white"),
strip.text = element_text(face = "bold"),
axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text.x = element_blank(),
axis.title = element_text(face = "bold"),
panel.spacing = unit(0.125, units = "in"),
plot.background = element_blank(),
legend.position = "none")
pl3B
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"ADME_NormTime-NormConcs_onlyDetects.png"),
height = 3,
bg = "white",
width = 5,
units = "in",
dpi = 300)
# Make the Combination Plot----
plot_grid(pl3A, pl3B, labels = c("A", "B"),
align = "h", axis = "bt", rel_widths = c(1, 1.7))
figure3 <- plot_grid(pl3A, pl3B, labels = c("A", "B"),
align = "h", axis = "bt", rel_widths = c(1, 1.7))
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"Figure3_onlyDetects.png"),
plot = figure3,
bg = "white",
height = 3,
width = 6.5,
units = "in",
dpi = 300)
data_range <- all_my_data %>%
filter(Detect, !exclude) %>%
group_by(Chemical, Species, Reference, Media, Route, Dose, Dose.Units) %>%
summarize(maxT = max(Time)/24,
concRange = log10(max(Conc)/min(Conc))) %>%
ungroup()
sf_2A <- data_range %>%
ggplot(aes(x = maxT)) +
geom_histogram(fill = "grey5",
bins = 40,
color = "grey5") +
scale_x_log10(labels = scales::number,
breaks = c(0.1, 1, 7, 30, 365)) +
# scale_y_continuous(expand = c(0, 0),
# limits = c(0, 60),
# breaks = c(0, 10, 20, 30, 40, 50)) +
labs(x = "Day of final Timepoint",
y = "Count") +
theme(aspect.ratio = 1,
text = element_text(size = 10),
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
axis.ticks = element_blank(),
axis.line = element_blank())
sf_2A
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"SFig2A_TimePoints.png"),
plot = sf_2C,
height = 4,
width = 4,
units = "in",
dpi = 300)
sf_2B <- data_range %>%
ggplot(aes(x = concRange)) +
geom_histogram(fill = "grey5",
bins = 40,
color = "grey5") +
scale_y_continuous(expand = c(0, 0),
limits = c(0, 50),
breaks = c(0, 25, 50)) +
labs(x = "log10(Concentration Range)",
y = "Count") +
theme(aspect.ratio = 1,
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
strip.background = element_rect(fill = "white"),
strip.text = element_text(face = "bold"),
axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text = element_text(size = 10),
axis.title = element_text(size = 11))
sf_2B
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"SFig2B_ConcRange.png"),
plot = sf_2B,
height = 4,
width = 4,
units = "in",
dpi = 300)
sf_2AB <- plot_grid(sf_2A, sf_2B,
labels = c("A", "B"),
nrow = 1)
sf_2AB
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"SFig2_ConcTimeRanges.png"),
height = 3.2,
width = 6,
bg = "white",
units = "in",
dpi = 300)
my_tf$model_error_summary %>%
select(model, method, starts_with("percent_"))
my_tf$model_error_all %>%
filter(model %in% c("model_1comp", "model_2comp")) %>%
ggplot(aes(x = Fold_Error)) +
geom_histogram(
fill = "grey5") +
scale_x_log10(limits = c(1E-3, 1E3), labels = scales::label_math(format = log10)) +
expand_limits(y = 0) +
geom_vline(xintercept = c(0.5, 2), color = "red3", linetype = "dashed") +
facet_grid(cols = vars(model),
rows = vars(Route)) +
labs(x = "Predicted/Observed",
y = "Number of Observations") +
theme_classic() +
theme(aspect.ratio = 1,
panel.border = element_rect(color = "black", fill = NA),
panel.grid.major = element_line(color = "grey95", linewidth = 0.4))
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"PredictedObserved_compartmentModels.png"),
height = 5, width = 5)
Here we examine the performance of “best fit” models with relation to data variability. Please note that in supplemental Figure 3, the data with summarized observations (mean and sd) were included. Here, because we are trying to specifically compare data variability with model performance, we look at data with replicated timepoint observations, which are largely data with individual animal data.
my_tf$indiv_data_test_fold_errors %>%
select(model, starts_with("percent_")) %>%
glimpse()
pl_4A <- my_tf$indiv_data_fold_errors %>%
# For Plotting
ggplot(aes(
y = Fold_Error,
x = foldConc
)) +
geom_bin2d(bins = 40, color = NA) +
geom_hline(yintercept = c(0.5, 2), linetype = "dashed") +
geom_vline(xintercept = c(0.5, 2), linetype = "dashed") +
facet_grid(cols = vars(model)) +
scale_x_log10(labels = scales::label_math(format = log10),
limits = c(0.0001, 1000)) +
scale_y_log10(labels = scales::label_math(format = log10),
limits = c(0.0001, 10000)) +
scale_fill_viridis_c(option = "cividis",
limits = c(1, 100),
oob = scales::oob_squish) +
labs(y = "Model Error",
x = "Data Variability",
fill = "Count") +
theme(aspect.ratio = 1,
panel.border = element_rect(color = "black", fill = NA, size = 1.5),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
strip.background = element_rect(fill = "white"),
strip.text = element_text(face = "bold"),
axis.ticks = element_blank(),
axis.line = element_blank())
pl_4A
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"ModelPerformance_vDataVariability.png"),
height = 2,
width = 4.2,
device = "png",
dpi = 300,
units = "in")
my_table <- my_tf$indiv_data_test_fold_errors %>%
ungroup() %>%
select(starts_with("percent_")) %>% t() %>%
round(digits = 2) %>%
as.data.frame() %>%
rownames_to_column(var = "percentages") %>%
mutate(percentages = case_when(
percentages == "percent_both_within" ~ "Both within a factor of 2",
percentages == "percent_both_outside" ~ "Both outside a factor of 2",
percentages == "percent_model_outside" ~ "Model too variable",
percentages == "percent_data_outside" ~ "Data too variable"
))
names(my_table) <- c(" ","1-compartment (%)", "2-compartment (%)", "Overall (%)")
library(flextable)
library(officer)
flextable(my_table)%>%
border_inner() %>%
fontsize(part = "all", size = 11) %>%
bold(part = "all", j = 4) %>% autofit()
table_plot <- gen_grob(flextable(my_table) %>%
border_inner() %>%
fontsize(part = "all", size = 10) %>%
bold(part = "all", j = 4) %>%
autofit())
dual_plot <- plot_grid(pl_4A, table_plot, ncol = 1, align = "v",
rel_heights = c(1, 0.5))
dual_plot
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"ModelPerformance_vDataVariability_wTable.png"),
plot = dual_plot,
height = 4.5,
width = 6,
device = "png",
bg = "white",
dpi = 300,
units = "in")
Here we analyze and validate model performance per data group using R-squared, RMSE, and fraction of predictions that are within 2-fold for each Chemical, Species, Route, Media, and Dose groupings.
combined_gof_df <- my_tf$model_error_all %>%
group_by(Chemical, Species, model, method) %>%
summarize(within_2fold = sum(between(Fold_Error, 0.5, 2))/n()) %>%
left_join(r2_df) %>%
left_join(myAIC) %>%
left_join(myRMSLE) %>%
semi_join(winmodel)
mypl <- ggplot(data = combined_gof_df,
aes(
x = within_2fold,
y = Rsq,
color = model
)) +
geom_point() +
geom_abline(slope = 1, color = "red4", linetype = "longdash") +
scale_color_manual(values = c("#0398FC", "#D68E09", "black")) +
coord_cartesian(xlim = c(0,1), ylim = c(0,1)) +
labs(x = "Fraction of predictions within 2-fold",
y = "R-squared value") +
theme(aspect.ratio = 1,
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
axis.ticks = element_blank(),
axis.line = element_blank(),
legend.position = "none",
legend.title = element_blank(),
legend.key = element_blank())
panelA_plot <- ggExtra::ggMarginal(mypl, groupFill = TRUE,
type = "histogram",
xparams = list(binwidth = 0.1),
yparams = list(binwidth = 0.1))
panelA_plot
ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
today,
"R2_within2fold_comparison.png"),
plot = ggExtra::ggMarginal(mypl, groupFill = TRUE,
type = "histogram",
xparams = list(binwidth = 0.05),
yparams = list(binwidth = 0.05)),
height = 5,
width = 5,
units = "in")
panelB_plot <- ggplot(data = myRMSE,
aes(
x = log10(RMSE),
color = model,
fill = model
)) +
geom_histogram(bins = 50) +
labs(y = "Count") +
scale_color_manual(values = c("#0398FC", "#D68E09", "grey10")) +
scale_fill_manual(values = c("#0398FC", "#D68E09", "grey10")) +
facet_grid(rows = vars(model),
cols = vars(Route)) +
theme(text = element_text(size = 10),
aspect.ratio = 0.6,
panel.border = element_rect(color = "black", fill = NA, size = 1),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
axis.ticks = element_blank(),
axis.line = element_blank(),
strip.background = element_blank(),
panel.spacing.y = unit(0.125, units = "in"),
legend.position = "bottom",
legend.title = element_blank())
panelB_plot
ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
today,
"RMSEs_winningmodel_RouteFacet.png"),
height = 4,
width = 6,
units = "in")
legend <- get_legend(panelB_plot)
plot_grid(plot_grid(panelA_plot,
panelB_plot + theme(legend.position = "none"), labels = c("A", "B"),
axis = "tb", align = "h", rel_heights = c(1,1)), legend,
ncol = 1,
rel_heights = c(1, 0.1))
ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
today,
"CombinedPlot_Rsquared_RMSE.png"),
height = 4,
width = 6.5,
bg = "white",
units = "in")
pl <- plot(my_pk, best_fit = TRUE,
use_scale_conc = TRUE,
drop_nonDetect = FALSE)
cvt %>% filter(curation_set_tag == "CVT_Showa") %>%
pull(analyte_dtxsid) -> Showa_chems
combined_gof_df %>% filter(!(Chemical %in% Showa_chems)) %>% View()
ex_fits <- pl %>%
filter(Chemical %in% c("DTXSID2020139",
"DTXSID4023917",
"DTXSID3061635",
"DTXSID1034187"),
Species %in% "rat") %>%
pull(final_plot)
cowplot::plot_grid(plotlist = list(ex_fits[[2]] +
scale_color_manual(values = c("#a6bddb",
"#74a9cf",
"#41bbc4",
"#2b8cbe",
"#045a8d")) +
theme(legend.position = "none") +
xlim(0,30),
ex_fits[[3]] +
scale_color_manual(values = c("black",
"grey40")) +
theme(legend.position = "none"),
ex_fits[[1]] +
scale_color_manual(values = c("magenta3")) +
theme(legend.position = "none"),
ex_fits[[4]] +
scale_color_manual(values = c("#006837")) +
theme(legend.position = "none")),
scale = 0.85)
ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
today,
"exampleFits_Rsq_within2fold.png"),
height = 12,
width = 12)
my_pk$fit %>%
select(Chemical, Species, model, method, param_name, estimate) %>%
filter(str_detect(param_name, "sigma_")) %>%
semi_join(winmodel) %>%
left_join(my_pk$prefit$stat_error_model$sigma_DF) %>%
inner_join(get_data_summary(my_pk) %>%
distinct(Chemical, Species, n_obs)) -> my_coefs
my_coefs %>%
mutate(model =
case_when(
model == "model_1comp" ~ "1-compartment",
model == "model_2comp" ~ "2-compartment",
model == "model_flat" ~ "Null"
)) %>%
ggplot() +
geom_histogram(aes(x = estimate/n_obs),
bins = 15,
color = NA, fill = "grey5") +
# geom_histogram(aes(x = start/n_obs), fill = NA, color = "grey20", bins = 15) +
scale_x_log10(labels = scales::label_math(format = log10)) +
facet_grid(cols = vars(model)) +
labs(x = "Size-normalized Sigma Value",
y = "Number of Observations") +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1.5),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
strip.background = element_rect(fill = "white"),
strip.text = element_text(face = "bold"),
plot.title = element_text(hjust = 0.5),
axis.ticks = element_blank(),
axis.line = element_blank())
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"NormalizedSigma_Distributions.png"),
bg = "white",
height = 3, width = 5)
This meta-analysis compares the TK stats from model fits with other published studies.
cvt_DTXSIDs <- unique(my_pk$data$Chemical)
# First things first, let's load the data from Lombardo et al.
lombardo <- readxl::read_xlsx(
paste0(Sys.getenv("FIG_DIR"),
"Lombardo2018-Supplemental_82966_revised_corrected.xlsx"),
skip = 8)
l_batch_search <- readxl::read_xlsx(
paste0(Sys.getenv("FIG_DIR"),
"LombardoBatchSearch_Name.xlsx"),
sheet = "Main Data"
)
l_batch_search <- l_batch_search[c("INPUT", "CASRN","DTXSID")] %>%
distinct() %>%
right_join(lombardo[c("Name", "CAS #")], by = c("CASRN" = "CAS #", "INPUT" = "Name"))
l_noNamesCASRN <- readxl::read_xlsx(
paste0(Sys.getenv("FIG_DIR"),
"LombardoBatchSearch_CASRNnoName.xlsx"),
sheet = "Main Data"
) %>%
dplyr::select(CASRN = INPUT, DTXSID)
l_curatedDTX <- read_tsv(paste0(Sys.getenv("FIG_DIR"),
"CuratedIDs_lombardo.txt")) %>%
rename(INPUT = Name)
l_batch_search <- l_batch_search %>%
left_join(l_noNamesCASRN, by = "CASRN") %>%
mutate(DTXSID = coalesce(DTXSID.x, DTXSID.y)) %>%
dplyr::select(-DTXSID.x, -DTXSID.y)
# Now all but two of the IDs are unidentified
lombardo <- lombardo %>%
right_join(l_batch_search, by = c("Name" = "INPUT"))
####
lombard_abbr <- lombardo %>% dplyr::select(DTXSID,
hVss = `human VDss (L/kg)`,
hCltot = `human CL (mL/min/kg)`,
halflife = `terminal t1/2 (h)` ) %>%
filter(DTXSID %in% cvt_DTXSIDs) %>%
left_join(chem_name_translate, by = c("DTXSID" = "Chemical"))
# 15 chemicals in common with values for both
lombardo_comparison <- my_tkstats %>%
dplyr::select(DTXSID = Chemical, Species,
model, Vss.tkstats, CLtot.tkstats) %>%
dplyr::filter(model != "model_flat") %>%
distinct() %>%
inner_join(lombard_abbr, by = "DTXSID") %>%
# filter(!is.na(Vss.tkstats)) %>%
arrange(halflife)
comparable_ids <- lombardo_comparison %>% pull(DTXSID)
lombard_abbr <- lombard_abbr %>%
mutate(Species = "human", model = "Lombardo et al.") %>%
rename(Vss = hVss, CLtot = hCltot) %>%
mutate(CLtot = CLtot*60/1000) %>% # Lombardo reports this as mL/min/kg, need L/h/kg
filter(DTXSID %in% comparable_ids) %>%
mutate(Chemical_Name = reorder(Chemical_Name, halflife))
lombardo_comparison <- my_tkstats %>%
dplyr::select(DTXSID = Chemical, Species,
model, Vss.tkstats, CLtot.tkstats, halflife.tkstats) %>%
filter(DTXSID %in% comparable_ids) %>%
left_join(chem_name_translate, by = join_by(DTXSID == Chemical)) %>%
distinct() %>%
rename(Vss = Vss.tkstats, CLtot = CLtot.tkstats, halflife = halflife.tkstats) %>%
bind_rows(lombard_abbr) %>%
mutate(model = ifelse(str_detect(model, "^model"), "invivoPKfit", model),
Chemical_Name = factor(Chemical_Name,
levels = levels(lombard_abbr$Chemical_Name)))
lombardo_comparison %>%
pivot_longer(cols = c("Vss", "CLtot", "halflife")) %>%
mutate(name = factor(name, levels = c("halflife", "Vss", "CLtot"))) %>%
ggplot(mapping = aes(
x = value,
y = Chemical_Name
)) +
geom_point(aes(color = model,
shape = Species),
position = position_dodge(0.7),
size = 5/.pt, stroke = 2.5/.pt) +
facet_grid(cols = vars(name),
scales = "free_x") +
scale_x_log10( labels = scales::label_math(format = log10)) +
scale_shape_manual(values = c(21, 22, 24),
breaks = c("human", "dog", "rat")) +
scale_color_manual(values = c("black", "#1064c9")) +
guides(color = guide_legend(override.aes = list(shape = 21),
nrow = 2,
title.position = "top",
title.hjust = 0.5),
shape = guide_legend(nrow = 1,
title.position = "top",
title.hjust = 0.5)) +
theme(panel.border = element_rect(color = "black", fill = NA, linewidth = 1.5),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
strip.background = element_rect(fill = "white"),
strip.text = element_text(face = "bold", size = 12),
text = element_text(size = 10),
axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text = element_text(face = "bold", size = 6),
axis.title.y = element_blank(),
axis.title.x = element_blank(),
legend.position = "bottom",
legend.key = element_blank(),
legend.title = element_text(face = "bold"),
legend.text = element_text())
ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
today,
"lombardoComparison_named.png"),
height = 7,
width = 6.5,
device = "png", dpi = 300)
# Histograms
lombardo_hist <- data.frame(Chemical = c(lombardo$DTXSID,
my_tkstats$Chemical),
Vss = c(lombardo$`human VDss (L/kg)`,
my_tkstats$Vss.tkstats),
halflife = c(lombardo$`terminal t1/2 (h)`,
my_tkstats$halflife.tkstats),
Source = c(rep_len("lombardo",
length.out = nrow(lombardo)),
rep_len("invivoPKfit",
length.out = nrow(my_tkstats))))
sfig7A <- lombardo_hist %>%
ggplot(aes(x = log2(Vss), color = Source,
group = Source)) +
geom_freqpoly(aes(y = after_stat(ndensity)),
linewidth = 2/.pt,
bins = 20) +
labs(y = "Scaled Frequency",
x = "Vss") +
scale_x_continuous(labels = scales::label_math(expr = 2^.x)) +
scale_color_manual(values = c("black", "#1064c9")) +
guides(color = guide_legend(nrow = 2, linewidth = 1)) +
theme(panel.border = element_rect(color = "black", fill = NA, linewidth = 1.5),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
strip.background = element_rect(fill = "white"),
strip.text = element_text(face = "bold", size = 12),
text = element_text(size = 10),
axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text = element_text(face = "bold"),
legend.position = "bottom",
legend.key = element_blank(),
legend.title = element_text(face = "bold"),
legend.text = element_text())
sfig7A
other_meta <- read_csv(
file = paste0(Sys.getenv("FIG_DIR"), "SupTableInVivoDatandPreds.csv")) %>%
dplyr::select(DTXSID, fbioh, fbior) %>%
filter(DTXSID %in% my_tkstats$Chemical)
fgutabs <- my_pk$fit %>% filter(param_name %in% "Fgutabs", convcode == 0) %>%
distinct(Chemical, Species, model, method, estimate) %>%
semi_join(winmodel) %>%
filter(model != "model_flat") %>%
rename(Fgutabs.tkstats = "estimate")
sfig7B <- other_meta %>% left_join(fgutabs, by = c("DTXSID" = "Chemical")) %>%
filter(Species %in% c("rat")) %>%
filter(!is.na(Fgutabs.tkstats),
!is.na(fbior)) %>%
distinct(DTXSID, Species,
Fgutabs.tkstats, fbior) %>%
pivot_longer(cols = c(Fgutabs.tkstats, fbior),
names_to = "Source",
values_to = "Fgutabs") %>%
mutate(Source = ifelse(str_detect(Source, "tkstats"), "invivoPKfit",
"Honda et al.")) %>%
left_join(chem_name_translate, by = join_by(DTXSID == Chemical)) %>%
mutate(Chemical_Name) %>%
ggplot(mapping = aes(
x = Fgutabs,
y = reorder(Chemical_Name, Fgutabs)
)) +
geom_point(aes(color = Source),
shape = 24,
position = position_dodge(0.7),
size = 4/.pt, stroke = 1.5/.pt) +
scale_color_manual(values = c("black", "green4"),
breaks = c("invivoPKfit", "Honda et al.")) +
labs(x = "Oral Bioavailability") +
guides(color = guide_legend(nrow = 2)) +
theme(panel.border = element_rect(color = "black", fill = NA, linewidth = 1.5),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
strip.background = element_rect(fill = "white"),
strip.text = element_text(face = "bold", size = 12),
text = element_text(size = 10),
axis.ticks = element_blank(),
axis.line = element_blank(),
axis.text = element_text(face = "bold"),
axis.title.y = element_blank(),
legend.position = "bottom",
legend.key = element_blank(),
legend.title = element_text(face = "bold"),
legend.text = element_text())
plot_grid(sfig7A, sfig7B,
align = "v",
ncol = 1,
axis = "lr",
rel_heights = c(1,1.25),
labels = c("A", "B"))
ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
today,
"metaAnalysis_Comparisons_SuppFigure7_named.png"),
height = 5.5,
width = 4,
device = "png", dpi = 300)
Here I do a bit of benchmarking for the parallel-ized version of do_fit
# Randomize the order of chemical species
chem_spec <- cvt %>%
dplyr::distinct(analyte_dtxsid, species) %>%
dplyr::slice_sample(prop = 1)
test_group_size <- seq(25, 105, by = 10)
fit_options <- expand.grid(error_model = c("pooled",
"hierarchical"),
dose_norm = FALSE,
log10_trans = TRUE,
time_scale = c(
TRUE,
FALSE),
stringsAsFactors = FALSE)
fit_options <- tidyr::expand_grid(fit_options, test_group_size)
fit_options <- fit_options %>% rowwise() %>%
mutate(this_data = list({
tmp <- chem_spec %>% slice_head(n = test_group_size) %>%
left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
pk() +
scale_time(new_units = ifelse(!time_scale,
"identity",
"auto")) +
scale_conc(dose_norm = dose_norm, log10_trans = log10_trans) +
settings_preprocess(suppress.messages = TRUE)
do_prefit(tmp)
}
),
data_size = chem_spec %>% slice_head(n = test_group_size) %>%
left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
NROW())
# Organization of the benchmarking
# For each fit_option, return the four values of system.time() that we want
df_out <- fit_options %>%
rowwise() %>%
mutate(tim_1core = system.time(do_fit(this_data))[["elapsed"]],
tim_4core = system.time(do_fit(this_data, n_cores = 4))[["elapsed"]],
tim_7core = system.time(do_fit(this_data, n_cores = 7))[["elapsed"]],
tim_12core = system.time(do_fit(this_data, n_cores = 12))[["elapsed"]]
)
df_out <- df_out %>% select(-this_data)
gc()
full_long <- df_out %>%
pivot_longer(cols = c(tim_1core:tim_12core),
names_to = "N_cores",
values_to = "Time_s") %>%
group_by(N_cores, test_group_size, data_size) %>%
summarize(mean_time = mean(Time_s, na.rm = TRUE)/60,
max_time = max(Time_s, na.rm = TRUE)/60,
min_time = min(Time_s, na.rm = TRUE)/60
) %>%
mutate(N_cores = as.numeric(str_extract(N_cores, "[:digit:]+")))
df_out %>% clipr::write_clip()
ggplot(full_long,
aes(x = test_group_size,
y = mean_time,
color = as.factor(N_cores))) +
geom_point() +
geom_linerange(aes(ymin = min_time,
ymax = max_time)) +
geom_line(aes(group = N_cores)) +
labs(x = "Number of Data Groups",
y = "Runtime (minutes)",
title = "Number of Processing Cores",
subtitle = "(with min/max runtime range)") +
facet_grid(cols = vars(N_cores)) +
scale_color_manual(values = c("black", "#40c679", "#31a354", "#006837")) +
coord_fixed(ratio = 8) +
theme(panel.border = element_rect(color = "black", fill = NA, size = 1.5),
panel.background = element_blank(),
panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
legend.position = "none",
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
axis.ticks = element_blank(),
axis.line = element_blank(),
strip.background = element_blank())
ggsave(paste0(Sys.getenv("FIG_DIR"),
today,
"Parallelization_Benchmarking.png"),
width = 5,
height = 2.5,
units = "in")
pl <- plot(my_pk, use_scale_conc = TRUE, n_interp = 10)
names(pl) # Chemical Species observations observation_plot predicted predicted_plot final_plot
s6_plA <- pl %>% filter(Chemical %in% "DTXSID3031860") %>%
pull(observation_plot) %>% .[[1]]
s6_plB_mouse <- pl %>% filter(Chemical %in% "DTXSID4020533",
Species %in% "mouse") %>% pull(final_plot) %>% .[[1]]
s6_plB_rat <- pl %>% filter(Chemical %in% "DTXSID4020533",
Species %in% "rat") %>% pull(final_plot) %>% .[[1]]
s6_plC <- pl %>% filter(Chemical %in% "DTXSID00216205") %>%
pull(final_plot) %>% .[[1]]
s6_plD <- pl %>% filter(Chemical %in% "DTXSID8025337") %>%
pull(final_plot) %>% .[[1]]
# Set colors palettes for each
# panel A : Oranges
panelA_cols <- brewer.pal(8, "Oranges")[3:8]
# panel B : Greens
panelB_cols <- brewer.pal(9, "Greens")[2:10]
# panel C : Blue (only 2 values)
# panel D : Purple (6 values)
panelD_cols <- brewer.pal(9, "Purples")[4:9]
s6_plA <- s6_plA +
labs(title = "perfluorodecanoic acid") +
scale_color_manual(values = panelA_cols, aesthetics = c("color", "fill")) +
theme(text = element_text(size = 20),
legend.position = "none",
strip.background = element_rect(fill = "white"))
s6_plB_mouse <- s6_plB_mouse +
labs(title = "1,4-dioxane\nMouse") +
scale_color_manual(values = panelB_cols[c(5,8)],
aesthetics = c("color", "fill")) +
theme(text = element_text(size = 20),
legend.position = "none",
strip.background = element_rect(fill = "white"))
s6_plB_rat <- s6_plB_rat +
labs(title = "1,4-dioxane\nRat") +
scale_color_manual(values = panelB_cols[c(1, 2, 3, 4, 5, 6,7, 8)],
aesthetics = c("color", "fill")) +
theme(text = element_text(size = 20),
legend.position = "none",
strip.background = element_rect(fill = "white"))
s6_plC <- s6_plC +
labs(title = "psoralen") +
scale_color_manual(values = c("#6BAED6", "#08519C", "darkblue"),
aesthetics = c("color", "fill")) +
theme(text = element_text(size = 20),
legend.position = "none",
strip.background = element_rect(fill = "white"))
s6_plD <- s6_plD +
labs(title = "formamide") +
scale_color_manual(values = panelD_cols,
aesthetics = c("color", "fill")) +
theme(text = element_text(size = 20),
legend.position = "none",
strip.background = element_rect(fill = "white"))
# Put it all together
s6_plB <- plot_grid(s6_plB_mouse, s6_plB_rat,
align = "h", axis = "bt",
rel_widths = c(1.2,2))
s6_plCD <- plot_grid(s6_plC, s6_plD, labels = c("C", "D"), label_size = 24)
plot_grid(
s6_plA,
s6_plB,
s6_plCD,
align = "v",
ncol = 1,
rel_heights = c(1.25, 1.2, 1.25),
labels = c("A", "B", ""),
label_size = 24
)
# Am making this 2X the size I need it to be
# so the points are a reasonable size when I scale it down
ggsave(paste0(Sys.getenv("FIG_DIR"),
"ExampleFitsPlots_06.12.2024.png"),
width = 13,
height = 14,
units = "in")
Here is the code used to generate the CvT data and invivoPKfit fits for each chemical, species data group.
### Plotting for all (2.5MB file)
pl <- plot(my_pk, n_interp = 12, use_scale_conc = TRUE)
pdf(file = paste0(Sys.getenv("FIG_DIR"),
"Transformed_Joint_AllFits_Final_2024_05_16.pdf"),
height = 6, width = 10)
for (i in 1:nrow(pl)) {
print(pl$final_plot[[i]])
}
dev.off()
# From my new SQL pull
pl <- plot(my_pk, n_interp = 12, use_scale_conc = FALSE, best_fit = TRUE)
pdf(file = paste0(Sys.getenv("FIG_DIR"),
"Joint_bobyqa_AllFits_Final_20240514.pdf"),
height = 10, width = 10)
for (i in 1:nrow(pl)) {
print(pl$final_plot[[i]])
}
dev.off()
It is also possible to plot only one plot.
my_cvt <- cvt
my_wm <- get_winning_model(my_pk)
my_tkstats <- get_tkstats(my_pk) %>%
rename2_cvt() %>%
select(-c(invivpk_dose_level, dose_units, conc_units, time_units,
data_sigma_group, fk_extraction_document_id)) %>%
distinct() %>%
pivot_longer(cols = cltot:vss_fgutabs,
names_to = "param_name",
values_to = "estimate") %>%
filter(!is.na(estimate)) %>%
mutate(param_units = case_when(
param_name %in% c("cltot", "cltot_fgutabs") ~ "L/hours",
param_name %in% c("css", "cmax") ~ "mg/L",
param_name %in% c("halflife", "tmax") ~ "hours",
param_name %in% c("vss", "vss_fgutabs") ~ "L",
param_name %in% "auc_infinity" ~ "mg/L * hours",
.default = NA_character_
)) %>%
distinct()%>%
mutate(dose_normalization = conc_scale_use(TRUE, my_pk)$dose_norm,
log10_transformation = conc_scale_use(TRUE,my_pk)$log10_trans,
time_scaling = "identity",
invivoPKfit_version = "2.0.0",
update_date = today) %>%
filter(model %in% c("model_1comp", "model_2comp"))
my_params <- my_pk$fit %>%
rename2_cvt() %>% distinct(
analyte_dtxsid, species, model, method, param_name, estimate, param_units
) %>%
mutate(dose_normalization = conc_scale_use(TRUE, my_pk)$dose_norm,
log10_transformation = conc_scale_use(TRUE,my_pk)$log10_trans,
time_scaling = "identity",
invivoPKfit_version = "2.0.0",
update_date = today) %>%
filter(model %in% c("model_1comp", "model_2comp"))
my_cvt_unique <- my_cvt %>%
dplyr::mutate(
conc_processing_flags =
case_when(
invivPK_conc == conc ~ glue::glue("Used 'conc' value."),
invivPK_conc == conc_original ~ glue::glue("Used 'conc_original' value."),
invivPK_conc == conc_original * conc_unit_norm_factor ~ glue::glue(
"Mulitplied 'conc_original' by {conc_unit_norm_factor}."
),
invivPK_conc != conc_original * conc_unit_norm_factor ~ glue::glue(
"Multiplied 'conc_original' by {signif(invivPK_conc/conc_original, 2)}."
),
is.na(conc) ~ glue::glue(
"Multiplied 'conc_original' by {signif(invivPK_conc/conc_original, 2)}"
),
.default = ""
)
) %>%
dplyr::mutate(
loq_processing_flags =
if_else(
invivPK_loq == loq,
glue::glue("Used 'loq' value."),
glue::glue("Multiplied 'loq' value by {signif(invivPK_loq/loq,2)}."),
missing = glue::glue("'loq' was imputed as 90% of lowest concentration.")
)
) %>%
dplyr::mutate(
dose_processing_flags =
if_else(
invivPK_dose_level == dose_level_normalized,
glue::glue("Used 'dose_level_normalized'."),
glue::glue("Multiplied 'dose_level_normalized' by",
" {signif(invivPK_dose_level/dose_level_normalized, 3)}.")
)
) %>%
dplyr::select(fk_series_id,
analyte_dtxsid,
species,
administration_route_normalized,
conc_medium_normalized,
conc_processing_flags,
dose_processing_flags,
loq_processing_flags,
) %>%
group_by(analyte_dtxsid, species,
administration_route_normalized,
conc_medium_normalized) %>%
summarize(all_series_ids = paste(unique(fk_series_id), collapse = ", "),
conc_processing_flags = paste(unique(conc_processing_flags), collapse = " "),
dose_processing_flags = paste(unique(dose_processing_flags), collapse = " "),
loq_processing_flags = paste(unique(loq_processing_flags), collapse = " ")) %>%
dplyr::distinct()
tkstats_cvtdb <- inner_join(my_cvt_unique, my_tkstats,
by = join_by(analyte_dtxsid, species,
administration_route_normalized,
conc_medium_normalized)) %>%
distinct()
params_cvtdb <- inner_join(my_cvt_unique, my_params,
by = join_by(analyte_dtxsid, species),
relationship = "many-to-many") %>%
distinct()
# Writing to file
bind_rows(params_cvtdb,
tkstats_cvtdb) %>%
distinct() -> tkparams_cvtdb
tkparams_cvtdb_winmodels <- tkparams_cvtdb %>% semi_join(my_wm)
save(tkparams_cvtdb, tkstats_cvtdb, params_cvtdb, tkparams_cvtdb_winmodels,
file = "data-raw/tkparams_CvTdb.Rdata")
sessionInfo()
#> R version 4.4.1 (2024-06-14 ucrt)
#> Platform: x86_64-w64-mingw32/x64
#> Running under: Windows 11 x64 (build 22631)
#>
#> Matrix products: default
#>
#>
#> locale:
#> [1] LC_COLLATE=C
#> [2] LC_CTYPE=English_United States.utf8
#> [3] LC_MONETARY=English_United States.utf8
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=English_United States.utf8
#>
#> time zone: America/New_York
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] invivoPKfit_2.0.0 testthat_3.2.1.1 RColorBrewer_1.1-3 cowplot_1.1.3
#> [5] lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
#> [9] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1
#> [13] ggplot2_3.5.1 tidyverse_2.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] tidyselect_1.2.1 fastmap_1.2.0 pracma_2.4.4
#> [4] promises_1.3.0 digest_0.6.37 timechange_0.3.0
#> [7] mime_0.12 lifecycle_1.0.4 ellipsis_0.3.2
#> [10] survival_3.7-0 magrittr_2.0.3 compiler_4.4.1
#> [13] rlang_1.1.4 sass_0.4.9 tools_4.4.1
#> [16] utf8_1.2.4 yaml_2.3.10 data.table_1.16.2
#> [19] knitr_1.48 htmlwidgets_1.6.4 pkgbuild_1.4.4
#> [22] pkgload_1.4.0 miniUI_0.1.1.1 expm_1.0-0
#> [25] withr_3.0.1 numDeriv_2016.8-1.1 desc_1.4.3
#> [28] grid_4.4.1 fansi_1.0.6 urlchecker_1.0.1
#> [31] profvis_0.4.0 xtable_1.8-4 colorspace_2.1-1
#> [34] scales_1.3.0 httk_2.4.0 MASS_7.3-61
#> [37] cli_3.6.3 mvtnorm_1.3-1 survey_4.4-2
#> [40] rmarkdown_2.28 generics_0.1.3 remotes_2.5.0
#> [43] rstudioapi_0.17.0 tzdb_0.4.0 sessioninfo_1.2.2
#> [46] DBI_1.2.3 msm_1.8.1 cachem_1.1.0
#> [49] splines_4.4.1 mitools_2.4 vctrs_0.6.5
#> [52] devtools_2.4.5 Matrix_1.7-0 jsonlite_1.8.9
#> [55] hms_1.1.3 optimx_2023-10.21 jquerylib_0.1.4
#> [58] glue_1.8.0 nloptr_2.1.1 multidplyr_0.1.3
#> [61] PK_1.3-6 stringi_1.8.4 gtable_0.3.5
#> [64] later_1.3.2 munsell_0.5.1 pillar_1.9.0
#> [67] brio_1.1.5 htmltools_0.5.8.1 deSolve_1.40
#> [70] R6_2.5.1 Rdpack_2.6.1 rprojroot_2.0.4
#> [73] evaluate_1.0.1 shiny_1.9.1 lattice_0.22-6
#> [76] rbibutils_2.3 memoise_2.0.1 httpuv_1.6.15
#> [79] bslib_0.8.0 Rcpp_1.0.13 xfun_0.48
#> [82] fs_1.6.4 usethis_3.0.0 pkgconfig_2.0.3