## ----mlt-setup, echo = FALSE, results = "hide", message = FALSE, warning = FALSE---- set.seed(290875) pkgs <- sapply(c("mgcv", "mlt", "survival", "eha", "prodlim", "truncreg", "lattice", "gridExtra", "MASS", "nnet", "HSAUR3", "sandwich", "flexsurv", "grid", "latticeExtra", "colorspace", "multcomp", "eha"), require, char = TRUE) dvc <- obs <- NULL if (!file.exists("analysis/DVC.rda")) { op <- options(timeout = 120) dwnfl <- try(download.file("https://zenodo.org/record/17179/files/DVC.tgz", "DVC.tgz")) options(op) if (!inherits(dwnfl, "try-error")) { untar("DVC.tgz", file = "analysis/DVC.rda") load("analysis/DVC.rda") } } else { load("analysis/DVC.rda") } if (!is.null(obs)) dvc <- c(margin.table(obs[,,"wild"], 2)) logLik.phreg <- function(object) { ret <- object$loglik[2] attr(ret, "df") <- length(coef(object)) class(ret) <- "logLik" ret } vcov.phreg <- function(object) object$var trellis.par.set(list(plot.symbol = list(col=1,pch=20, cex=0.7), box.rectangle = list(col=1), box.umbrella = list(lty=1, col=1), strip.background = list(col = "white"))) ltheme <- canonical.theme(color = FALSE) ## in-built B&W theme ltheme$strip.background$col <- "transparent" ## change strip bg lattice.options(default.theme = ltheme) knitr::opts_chunk$set(echo = TRUE, results = 'markup', error = FALSE, warning = FALSE, message = FALSE, tidy = FALSE, cache = FALSE, size = "small", fig.width = 6, fig.height = 4, fig.align = "center", out.width = NULL, ###'.6\\linewidth', out.height = NULL, fig.scap = NA) knitr::render_sweave() # use Sweave environments knitr::set_header(highlight = '') # do not \usepackage{Sweave} ## R settings options(prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE) # JSS style options(width = 75) RC <- function(...) { ret <- do.call("cbind", list(...)) mlt <- which(colnames(ret) == "mlt") for (i in (1:ncol(ret))[-mlt]) { nm <- colnames(ret)[i] ret <- cbind(ret, (ret[,i] - ret[,mlt]) / ret[,mlt]) colnames(ret)[ncol(ret)] <- paste("(", nm, " - mlt)/mlt", sep = "") } print(ret, digits = 5) return(invisible(ret)) } ## ----mlt-citation, echo = FALSE------------------------------------------ year <- substr(packageDescription("mlt.docreg")$Date, 1, 4) version <- packageDescription("mlt.docreg")$Version ## ----fail, results = "asis", echo = FALSE-------------------------------- if (any(!pkgs)) { cat(paste("Package(s)", paste(names(pkgs)[!pkgs], collapse = ", "), "not available, stop processing.", "\\end{document}\n")) knitr::knit_exit() } if (is.null(dvc)) { cat("Downloading data from zenodo failed, stop processing.", "\\end{document}\n") knitr::knit_exit() } ## ----mlt-geyser-var, echo = TRUE----------------------------------------- library("mlt") var_d <- numeric_var("duration", support = c(1.0, 5.0), add = c(-1, 1), bounds = c(0, Inf)) ## ----mlt-geyser-basis, echo = TRUE--------------------------------------- B_d <- Bernstein_basis(var = var_d, order = 8, ui = "increasing") ## ----mlt-geyser-ctm, echo = TRUE----------------------------------------- ctm_d <- ctm(response = B_d, todistr = "Normal") ## ----mlt-geyser-grid, echo = TRUE---------------------------------------- str(nd_d <- mkgrid(ctm_d, 200)) ## ----mlt-geyser-data, echo = TRUE---------------------------------------- data("geyser", package = "TH.data") head(geyser) ## ----mlt-geyser-fit, echo = TRUE----------------------------------------- mlt_d <- mlt(ctm_d, data = geyser) logLik(mlt_d) ## ----mlt-geyser-density, echo = TRUE------------------------------------- nd_d$d <- predict(mlt_d, newdata = nd_d, type = "density") ## ----mlt-geyser-plot, echo = FALSE--------------------------------------- plot(d ~ duration, data = nd_d, type = "l", ylab = "Density", xlab = "Duration time") ## ----mlt-CHFLS-1--------------------------------------------------------- data("CHFLS", package = "HSAUR3") polr_CHFLS_1 <- polr(R_happy ~ 1, data = CHFLS) ## ----mlt-CHFLS-1-basefun------------------------------------------------- nl <- nlevels(CHFLS$R_happy) b_happy <- as.basis(~ R_happy, data = CHFLS, remove_intercept = TRUE, contrasts.arg = list(R_happy = function(n) contr.treatment(n, base = nl)), ui = diff(diag(nl - 1)), ci = rep(0, nl - 2)) ## ----mlt-CHFLS-1-basefun-basis------------------------------------------- b_happy <- as.basis(CHFLS$R_happy) ## ----mlt-CHFLS-1-ctm----------------------------------------------------- ctm_CHFLS_1 <- ctm(response = b_happy, todist = "Logistic") ## ----mlt-CHFLS-1-mlt----------------------------------------------------- mlt_CHFLS_1 <- mlt(model = ctm_CHFLS_1, data = CHFLS) ## ----mlt-CHFLS-1-cmpr---------------------------------------------------- logLik(polr_CHFLS_1) logLik(mlt_CHFLS_1) RC(polr = polr_CHFLS_1$zeta, mlt = coef(mlt_CHFLS_1)) ## ----mlt-CHFLS-1-pred---------------------------------------------------- RC(polr = predict(polr_CHFLS_1, newdata = data.frame(1), type = "prob"), mlt = c(predict(mlt_CHFLS_1, newdata = data.frame(1), type = "density", q = mkgrid(b_happy)[[1]])), ML = xtabs(~ R_happy, data = CHFLS) / nrow(CHFLS)) ## ----mlt-geyser-w-------------------------------------------------------- var_w <- numeric_var("waiting", support = c(40.0, 100), add = c(-5, 15), bounds = c(0, Inf)) c(sapply(nd_w <- mkgrid(var_w, 100), range)) ## ----mlt-geyser-bernstein------------------------------------------------ B_w <- Bernstein_basis(var_w, order = 8, ui = "increasing") ## ----mlt-geyser-w-ctm, echo = TRUE--------------------------------------- ctm_w <- ctm(response = B_w, todistr = "Normal") ## ----mlt-geyser-w-fit, echo = TRUE--------------------------------------- mlt_w <- mlt(ctm_w, data = geyser) ## ----mlt-geyser-w-distribution, echo = TRUE------------------------------ nd_w$d <- predict(mlt_w, newdata = nd_w, type = "distribution") ## ----mlt-geyser-w-plot, echo = FALSE------------------------------------- layout(matrix(1:2, ncol = 2)) plot(ecdf(geyser$waiting), col = "grey", xlab = "Waiting times", ylab = "Distribution", main = "", cex = .75) lines(nd_w$waiting, nd_w$d) B_w_40 <- Bernstein_basis(order = 40, var = var_w, ui = "incre") ctm_w_40 <- ctm(B_w_40, todistr = "Normal") mlt_w_40 <- mlt(ctm_w_40, data = geyser) nd_w$d2 <- predict(mlt_w_40, q = nd_w$waiting, type = "distribution") lines(nd_w$waiting, nd_w$d2, lty = 2) legend("bottomright", lty = 1:2, legend = c("M = 8", "M = 40"), bty = "n") plot(nd_w$waiting, predict(mlt_w, q = nd_w$waiting, type = "density"), type = "l", ylim = c(0, .04), xlab = "Waiting times", ylab = "Density") lines(nd_w$waiting, predict(mlt_w_40, q = nd_w$waiting, type = "density"), lty = 2) rug(geyser$waiting, col = rgb(.1, .1, .1, .1)) ## ----mlt-dvc------------------------------------------------------------- var_dvc <- numeric_var("dvc", support = min(dvc):max(dvc)) B_dvc <- Bernstein_basis(var_dvc, order = 6, ui = "increasing") dvc_mlt <- mlt(ctm(B_dvc), data = data.frame(dvc = dvc)) ## ----mlt-dvc-predict----------------------------------------------------- q <- support(var_dvc)[[1]] p <- predict(dvc_mlt, newdata = data.frame(1), q = q, type = "distribution") ## ----mlt-dvc-plot, echo = FALSE------------------------------------------ plot(ecdf(dvc), col = "grey", xlab = "Number of Roe Deer-Vehicle Collisions", ylab = "Distribution", main = "", cex = .75) lines(q, p, col = "blue") lines(q, ppois(q, lambda = mean(dvc)), col = "darkred") legend(400, .3, pch = c(20, NA, NA), lty = c(NA, 1, 1), legend = c("ECDF", "Transformation Model", "Poisson"), bty = "n", cex = .8, col = c("grey", "blue", "darkred")) ## ----mlt-CHFLS-2, cache = TRUE------------------------------------------- polr_CHFLS_2 <- polr(R_happy ~ R_age + R_income, data = CHFLS) ## ----mlt-CHFLS-2-base---------------------------------------------------- b_R <- as.basis(~ R_age + R_income, data = CHFLS, remove_intercept = TRUE, negative = TRUE) ## ----mlt-CHFLS-2-ctm----------------------------------------------------- ctm_CHFLS_2 <- ctm(response = b_happy, shifting = b_R, todistr = "Logistic") mlt_CHFLS_2 <- mlt(ctm_CHFLS_2, data = CHFLS, scale = TRUE) ## ----mlt-CHFLS-2-cmpr---------------------------------------------------- logLik(polr_CHFLS_2) logLik(mlt_CHFLS_2) RC(polr = c(polr_CHFLS_2$zeta, coef(polr_CHFLS_2)), mlt = coef(mlt_CHFLS_2)) ## ----mlt-GBSG2-1, echo = TRUE-------------------------------------------- data("GBSG2", package = "TH.data") GBSG2y <- numeric_var("y", support = c(100.0, max(GBSG2$time)), bounds = c(0, Inf)) GBSG2$y <- with(GBSG2, Surv(time, cens)) ## ----mlt-GBSG2-1-Cox----------------------------------------------------- B_GBSG2y <- Bernstein_basis(var = GBSG2y, order = 10, ui = "increasing") fm_GBSG2 <- Surv(time, cens) ~ horTh + age + menostat + tsize + tgrade + pnodes + progrec + estrec ctm_GBSG2_1 <- ctm(B_GBSG2y, shifting = fm_GBSG2[-2L], data = GBSG2, todistr = "MinExtrVal") ## ----mlt-GBSG2-1-xbasis, eval = FALSE------------------------------------ # as.basis(fm_GBSG2[-2L], data = GBSG2, remove_intercept = TRUE) ## ----mlt-GBSG2-1-mlt----------------------------------------------------- mlt_GBSG2_1 <- mlt(ctm_GBSG2_1, data = GBSG2, scale = TRUE) ## ----mlt-GBSG2-1-coxph--------------------------------------------------- coxph_GBSG2_1 <- coxph(fm_GBSG2, data = GBSG2, ties = "breslow") cf <- coef(coxph_GBSG2_1) RC(coxph = cf, mlt = coef(mlt_GBSG2_1)[names(cf)]) ## ----mlt-GBSG2-coxph_mlt, echo = FALSE, results = "hide", cache = TRUE---- ndtmp <- as.data.frame(mkgrid(GBSG2y, 100)) ord <- c(1:30, 35, 40, 45, 50) p <- vector(mode = "list", length = length(ord)) CF <- c() ll <- numeric(length(ord)) tm <- numeric(length(ord)) for (i in 1:length(ord)) { print(i) B_GBSG2ytmp <- Bernstein_basis(var = GBSG2y, order = ord[i], ui = "increasing") ctmi <- ctm(B_GBSG2ytmp, shifting = fm_GBSG2[-2L], data = GBSG2, todistr = "MinExtrVal") tm[i] <- system.time(mlti <- mlt(ctmi, data = GBSG2, scale = TRUE))[1] ll[i] <- logLik(mlti) p[[i]] <- predict(mlti, newdata = ndtmp, type = "trafo", terms = "bresponse") cf <- coef(mlti) cf <- cf[-grep("Bs", names(cf))] CF <- rbind(CF, cf) } colnames(CF) <- names(cf) of <- model.matrix(coxph_GBSG2_1) %*% coef(coxph_GBSG2_1) coxphtmp <- coxph(Surv(time, cens) ~ 1 + offset(of), data = GBSG2) b <- survfit(coxphtmp) layout(matrix(1:2, nc = 2)) col <- rgb(.1, .1, .1, .1) ylim <- range(unlist(p)[is.finite(unlist(p))]) plot(ndtmp$y, p[[1]], type = "l", ylim = ylim, col = col, xlab = "Survival Time (days)", ylab = "Log-cumulative Hazard") out <- sapply(p, function(y) lines(ndtmp$y, y, col = col)) lines(b$time, log(b$cumhaz), col = "red") ylim <- range(CF) plot(ord, CF[,1], ylim = ylim, col = col[1], xlab = "Order M", ylab = "Coefficients", type = "n") for (i in 1:ncol(CF)) { points(ord, CF[,i], cex = .5, pch = 19) abline(h = coef(coxph_GBSG2_1)[i], col = "darkgrey") } # text(20, coef(coxph_GBSG2_1) + .1, names(coef(coxph_GBSG2_1))) ## ----mlt-GBSG2-1-fss, cache = TRUE--------------------------------------- kn <- log(support(GBSG2y)$y) fss_GBSG2_1 <- flexsurvspline(fm_GBSG2, data = GBSG2, scale = "hazard", k = 9, bknots = kn) logLik(fss_GBSG2_1) logLik(mlt_GBSG2_1) cf <- coef(coxph_GBSG2_1) RC(coxph = cf, mlt = coef(mlt_GBSG2_1)[names(cf)], fss = coef(fss_GBSG2_1)[names(cf)]) ## ----mlt-GBSG2-1-fss-plot, echo = FALSE---------------------------------- p1 <- summary(fss_GBSG2_1, newdata = GBSG2[1,], ci = FALSE) p2 <- predict(mlt_GBSG2_1, newdata = GBSG2[1, all.vars(fm_GBSG2[-2L])], q = p1[[1]]$time, type = "survivor") plot(p1[[1]]$time, p1[[1]]$est, type = "l", lty = 1, xlab = "Survival Time (days)", ylab = "Probability", ylim = c(0, 1)) lines(p1[[1]]$time, p2[,1], lty = 2) p3 <- survfit(coxph_GBSG2_1, newdata = GBSG2[1,]) lines(p3$time, p3$surv, lty = 3) legend("topright", lty = 1:3, legend = c("flexsurvspline", "mlt", "coxph"), bty = "n") ## ----mlt-GBSG2-2--------------------------------------------------------- ly <- log_basis(GBSG2y, ui = "increasing") ctm_GBSG2_2 <- ctm(ly, shifting = fm_GBSG2[-2L], data = GBSG2, negative = TRUE, todistr = "MinExtrVal") mlt_GBSG2_2 <- mlt(ctm_GBSG2_2, data = GBSG2, fixed = c("log(y)" = 1), scale = TRUE) ## ----mlt-GBSG2-2-exp----------------------------------------------------- survreg_GBSG2_2 <- survreg(fm_GBSG2, data = GBSG2, dist = "exponential") phreg_GBSG2_2 <- phreg(fm_GBSG2, data = GBSG2, dist = "weibull", shape = 1) logLik(survreg_GBSG2_2) logLik(phreg_GBSG2_2) logLik(mlt_GBSG2_2) RC(survreg = coef(survreg_GBSG2_2)[names(cf)], phreg = -coef(phreg_GBSG2_2)[names(cf)], mlt = coef(mlt_GBSG2_2)[names(cf)]) ## ----mlt-GBSG2-3--------------------------------------------------------- mlt_GBSG2_3 <- mlt(ctm_GBSG2_2, data = GBSG2, scale = TRUE) survreg_GBSG2_3 <- survreg(fm_GBSG2, data = GBSG2, dist = "weibull") phreg_GBSG2_3 <- phreg(fm_GBSG2, data = GBSG2, dist = "weibull") logLik(survreg_GBSG2_3) logLik(phreg_GBSG2_3) logLik(mlt_GBSG2_3) RC(survreg = coef(survreg_GBSG2_3)[names(cf)] / survreg_GBSG2_3$scale, phreg = - coef(phreg_GBSG2_3)[names(cf)], mlt = coef(mlt_GBSG2_3)[names(cf)]) ## ----mlt-GBSG2-3a-------------------------------------------------------- log_GBSG2y <- numeric_var("y", support = c(100.0, max(GBSG2$time)), bounds = c(0.1, Inf)) lBy <- Bernstein_basis(log_GBSG2y, order = 10, ui = "increasing", log_first = TRUE) ctm_GBSG2_3a <- ctm(lBy, shifting = fm_GBSG2[-2L], data = GBSG2, negative = FALSE, todistr = "MinExtrVal") mlt_GBSG2_3a <- mlt(ctm_GBSG2_3a, data = GBSG2, scale = TRUE) logLik(mlt_GBSG2_3a) RC(coxph = cf, mlt = coef(mlt_GBSG2_3a)[names(cf)]) ## ----mlt-BostonHousing-lm------------------------------------------------ data("BostonHousing2", package = "mlbench") lm_BH <- lm(cmedv ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio + b + lstat, data = BostonHousing2) ## ----mlt-BostonHousing-mlt----------------------------------------------- BostonHousing2$medvc <- with(BostonHousing2, Surv(cmedv, cmedv < 50)) var_m <- numeric_var("medvc", support = c(10.0, 40.0), bounds = c(0, Inf)) fm_BH <- medvc ~ crim + zn + indus + chas + nox + rm + age + dis + rad + tax + ptratio + b + lstat ## ----mlt-BostonHousng-mlt-linear----------------------------------------- B_m <- polynomial_basis(var_m, coef = c(TRUE, TRUE), ui = matrix(c(0, 1), nrow = 1), ci = 0) ctm_BH <- ctm(B_m, shift = fm_BH[-2L], data = BostonHousing2, todistr = "Normal") lm_BH_2 <- mlt(ctm_BH, data = BostonHousing2, scale = TRUE) logLik(lm_BH_2) ## ----mlt-BostonHousing-ctm----------------------------------------------- B_m <- Bernstein_basis(var_m, order = 6, ui = "increasing") ctm_BH <- ctm(B_m, shift = fm_BH[-2L], data = BostonHousing2, todistr = "Normal") mlt_BH <- mlt(ctm_BH, data = BostonHousing2, scale = TRUE) logLik(mlt_BH) ## ----mlt-BostonHousing-plot, echo = FALSE, results = "hide", dev = "png"---- q <- 3:52 m <- predict(lm_BH, data = BostonHousing2) s <- summary(lm_BH)$sigma d <- sapply(m, function(m) pnorm(q, mean = m, sd = s)) nd <- expand.grid(q = q, lp = m) nd$d <- c(d) pfun <- function(x, y, z, subscripts, at, ...) { panel.contourplot(x, y, z, subscripts, at = 1:9/10, ...) panel.xyplot(x = m, y = BostonHousing2$cmedv, pch = 20, col = rgb(.1, .1, .1, .1), ...) } p1 <- contourplot(d ~ lp + q, data = nd, panel = pfun, xlab = "Linear predictor", ylab = "Observed", main = "Normal Linear Model") d <- predict(mlt_BH, newdata = BostonHousing2, q = q, type = "distribution") lp <- c(predict(mlt_BH, newdata = BostonHousing2, q = 1, terms = "bshift")) nd <- expand.grid(q = q, lp = -lp) nd$d <- c(d) pfun <- function(x, y, z, subscripts, at, ...) { panel.contourplot(x, y, z, subscripts, at = 1:9/10, ...) panel.xyplot(x = -lp, y = BostonHousing2$cmedv, pch = 20, col = rgb(.1, .1, .1, .1), ...) } p2 <- contourplot(d ~ lp + q, data = nd, panel = pfun, xlab = "Linear predictor", ylab = "Observed", main = "Linear Transformation Model") grid.arrange(p1, p2, nrow = 1) ## ----mlt-PSID1976-------------------------------------------------------- data("PSID1976", package = "AER") PSID1976$nwincome <- with(PSID1976, (fincome - hours * wage)/1000) PSID1976$hours <- as.double(PSID1976$hours) PSID1976_0 <- subset(PSID1976, participation == "yes") fm_PSID1976 <- hours ~ nwincome + education + experience + I(experience^2) + age + youngkids + oldkids ## ----mlt-PSID1976-truncreg----------------------------------------------- tr_PSID1976 <- truncreg(fm_PSID1976, data = PSID1976_0) ## ----mlt-PSID1976-mlt---------------------------------------------------- PSID1976_0$hours <- R(PSID1976_0$hours, tleft = 0) b_hours <- as.basis(~ hours, data = PSID1976, ui = matrix(c(0, 1), nr = 1), ci = 0) ctm_PSID1976_1 <- ctm(b_hours, shift = fm_PSID1976[-2L], data = PSID1976_0, todistr = "Normal") mlt_PSID1976_1 <- mlt(ctm_PSID1976_1, data = PSID1976_0, scale = TRUE) ## ----mlt-PSID1976-cmpr--------------------------------------------------- logLik(tr_PSID1976) logLik(mlt_PSID1976_1) cf <- coef(mlt_PSID1976_1) RC(truncreg = coef(tr_PSID1976), mlt = c(-cf[-grep("hours", names(cf))], 1) / cf["hours"]) ## ----mlt-PSID1976-mlt-ctm------------------------------------------------ var_h <- numeric_var("hours", support = range(PSID1976_0$hours$exact), bounds = c(0, Inf)) B_hours <- Bernstein_basis(var_h, order = 6, ui = "increasing") ctm_PSID1976_2 <- ctm(B_hours, shift = fm_PSID1976[-2L], data = PSID1976_0, todistr = "Normal") mlt_PSID1976_2 <- mlt(ctm_PSID1976_2, data = PSID1976_0, scale = TRUE) logLik(mlt_PSID1976_2) ## ----mlt-CHFLS-3, cache = TRUE------------------------------------------- b_health <- as.basis(~ R_health - 1, data = CHFLS) ctm_CHFLS_3 <- ctm(b_happy, interacting = b_health, todist = "Logistic") mlt_CHFLS_3 <- mlt(ctm_CHFLS_3, data = CHFLS, scale = TRUE) logLik(mlt_CHFLS_3) predict(mlt_CHFLS_3, newdata = mkgrid(mlt_CHFLS_3), type = "distribution") ## ----mlt-CHFLS-4, cache = TRUE------------------------------------------- ctm_CHFLS_4 <- ctm(b_happy, interacting = b_health, shifting = b_R, todist = "Logistic") mlt_CHFLS_4 <- mlt(ctm_CHFLS_4, data = CHFLS, scale = TRUE) coef(mlt_CHFLS_4)[c("R_age", "R_income")] ## ----mlt-GBSG2-4--------------------------------------------------------- b_horTh <- as.basis(GBSG2$horTh) ctm_GBSG2_4 <- ctm(B_GBSG2y, interacting = b_horTh, todistr = "MinExtrVal") mlt_GBSG2_4 <- mlt(ctm_GBSG2_4, data = GBSG2) ## ----mlt-GBSG2-strata-plot, echo = FALSE, results = "hide"--------------- nd <- expand.grid(s <- mkgrid(mlt_GBSG2_4, 100)) nd$mlt_S <- c(predict(mlt_GBSG2_4, newdata = s, type = "survivor")) nd$KM_S <- unlist(predict(prodlim(Surv(time, cens) ~ horTh, data = GBSG2), newdata = data.frame(horTh = s$horTh), times = s$y)) plot(nd$y, nd$mlt_S, ylim = c(0, 1), xlab = "Survival time (days)", ylab = "Probability", type = "n", las = 1) with(subset(nd, horTh == "no"), lines(y, mlt_S, col = "grey", lty = 2)) with(subset(nd, horTh == "yes"), lines(y, mlt_S, lty = 2)) with(subset(nd, horTh == "no"), lines(y, KM_S, type = "s", col = "grey")) with(subset(nd, horTh == "yes"), lines(y, KM_S, type = "s")) legend(250, 0.4, lty = c(1, 1, 2, 2), col = c("black", "grey", "black", "grey"), legend = c("hormonal therapy, KM", "no hormonal therapy, KM", "hormonal therapy, MLT", "no hormonal therapy, MLT"), bty = "n", cex = .75) ## ----mlt-GBSG2-5--------------------------------------------------------- ctm_GBSG2_5 <- ctm(B_GBSG2y, interacting = b_horTh, shifting = ~ age, data = GBSG2, todistr = "MinExtrVal") mlt_GBSG2_5 <- mlt(ctm_GBSG2_5, data = GBSG2, scale = TRUE) ## ----mlt-GBSG2-5-coxph--------------------------------------------------- coxph_GBSG2_5 <- coxph(Surv(time, cens) ~ age + strata(horTh), data = GBSG2) cf <- coef(coxph_GBSG2_5) RC(coxph = cf, mlt = coef(mlt_GBSG2_5)[names(cf)]) ## ----mlt-CHFLS-5, cache = TRUE------------------------------------------- contrasts(CHFLS$R_health) <- "contr.treatment" b_health <- as.basis(~ R_health, data = CHFLS) ctm_CHFLS_5 <- ctm(b_happy, interacting = b_health, todist = "Logistic") mlt_CHFLS_5 <- mlt(ctm_CHFLS_5, data = CHFLS, scale = TRUE) predict(mlt_CHFLS_5, newdata = mkgrid(mlt_CHFLS_5), type = "distribution") logLik(mlt_CHFLS_5) ## ----mlt-CHFLS-6, cache = TRUE------------------------------------------- b_R <- as.basis(~ R_age + R_income, data = CHFLS, remove_intercept = TRUE, scale = TRUE) ctm_CHFLS_6 <- ctm(b_happy, interacting = b_R, todist = "Logistic") mlt_CHFLS_6 <- mlt(ctm_CHFLS_6, data = CHFLS, scale = TRUE) logLik(mlt_CHFLS_6) ## ----mlt-CHFLS-7, cache = TRUE------------------------------------------- ctm_CHFLS_7 <- ctm(b_happy, interacting = c(h = b_health, R = b_R), todist = "Logistic") mlt_CHFLS_7 <- mlt(ctm_CHFLS_7, data = CHFLS, scale = TRUE) logLik(mlt_CHFLS_7) ## ----mlt-iris-1---------------------------------------------------------- fm_iris <- Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width multinom_iris <- nnet::multinom(fm_iris, data = iris, trace = FALSE) logLik(multinom_iris) iris$oSpecies <- ordered(iris$Species) b_Species <- as.basis(iris$oSpecies) b_x <- as.basis(fm_iris[-2L], data = iris, scale = TRUE) ctm_iris <- ctm(b_Species, interacting = b_x, todistr = "Logistic") mlt_iris <- mlt(ctm_iris, data = iris, scale = TRUE) logLik(mlt_iris) p1 <- predict(mlt_iris, newdata = iris, q = sort(unique(iris$oSpecies)), type = "density") p2 <- predict(multinom_iris, newdata = iris, type = "prob") max(abs(t(p1) - p2)) ## ----mlt-GBSG2-6--------------------------------------------------------- ctm_GBSG2_6 <- ctm(B_GBSG2y, shifting = ~ horTh, data = GBSG2, todistr = "MinExtrVal") mlt_GBSG2_6 <- mlt(ctm_GBSG2_6, data = GBSG2) logLik(mlt_GBSG2_6) ## ----mlt-GBSG2-7--------------------------------------------------------- b_horTh <- as.basis(~ horTh, data = GBSG2) ctm_GBSG2_7 <- ctm(B_GBSG2y, interacting = b_horTh, todistr = "MinExtrVal") nd <- data.frame(y = GBSG2$time[1:2], horTh = unique(GBSG2$horTh)) attr(model.matrix(ctm_GBSG2_7, data = nd), "constraint") mlt_GBSG2_7 <- mlt(ctm_GBSG2_7, data = GBSG2) logLik(mlt_GBSG2_7) ## ----mlt-GBSG2-deviation-plot, echo = FALSE, results = "hide"------------ s <- mkgrid(mlt_GBSG2_7, 15) s$y <- s$y[s$y > 100 & s$y < 2400] nd <- expand.grid(s) K <- model.matrix(ctm_GBSG2_7, data = nd) Kyes <- K[nd$horTh == "yes",] Kyes[,grep("Intercept", colnames(K))] <- 0 gh <- glht(parm(coef(mlt_GBSG2_7), vcov(mlt_GBSG2_7)), Kyes) ci <- confint(gh) coxy <- s$y K <- matrix(0, nrow = 1, ncol = length(coef(mlt_GBSG2_6))) K[,length(coef(mlt_GBSG2_6))] <- 1 ci2 <- confint(glht(mlt_GBSG2_6, K)) plot(coxy, ci$confint[, "Estimate"], ylim = range(ci$confint), type = "n", xlab = "Survival time (days)", ylab = "Log-hazard ratio", las = 1) polygon(c(coxy, rev(coxy)), c(ci$confint[,"lwr"], rev(ci$confint[, "upr"])), border = NA, col = rgb(.1, .1, .1, .1)) lines(coxy, ci$confint[, "Estimate"], lty = 1, lwd = 1) lines(coxy, rep(ci2$confint[,"Estimate"], length(coxy)), lty = 2, lwd = 1) lines(coxy, rep(0, length(coxy)), lty = 3) polygon(c(coxy[c(1, length(coxy))], rev(coxy[c(1, length(coxy))])), rep(ci2$confint[,c("lwr", "upr")], c(2, 2)), border = NA, col = rgb(.1, .1, .1, .1)) legend("bottomright", lty = 1:2, lwd = 1, legend = c("time-varying log-hazard ratio", "time-constant log-hazard ratio"), bty = "n", cex = .75) ## ----mlt-GBSG2-8, echo = TRUE, cache = TRUE------------------------------ var_a <- numeric_var("age", support = range(GBSG2$age)) B_age <- Bernstein_basis(var_a, order = 3) b_horTh <- as.basis(GBSG2$horTh) ctm_GBSG2_8 <- ctm(B_GBSG2y, interacting = b(horTh = b_horTh, age = B_age), todistr = "MinExtrVal") mlt_GBSG2_8 <- mlt(ctm_GBSG2_8, data = GBSG2) logLik(mlt_GBSG2_8) ## ----mlt-GBSG2-8-plot, echo = FALSE-------------------------------------- nlev <- c(no = "without hormonal therapy", yes = "with hormonal therapy") levels(nd$horTh) <- nlev[match(levels(nd$horTh), names(nlev))] s <- mkgrid(mlt_GBSG2_8, 100) nd <- expand.grid(s) nd$s <- c(predict(mlt_GBSG2_8, newdata = s, type = "survivor")) contourplot(s ~ age + y | horTh, data = nd, at = 1:9 / 10, ylab = "Survival time (days)", xlab = "Age (years)", scales = list(x = list(alternating = c(1, 1)))) ## ----mlt-head, echo = TRUE, cache = TRUE--------------------------------- data("db", package = "gamlss.data") db$lage <- with(db, age^(1/3)) var_head <- numeric_var("head", support = quantile(db$head, c(.1, .9)), bounds = range(db$head)) B_head <- Bernstein_basis(var_head, order = 3, ui = "increasing") var_lage <- numeric_var("lage", support = quantile(db$lage, c(.1, .9)), bounds = range(db$lage)) B_age <- Bernstein_basis(var_lage, order = 3, ui = "none") ctm_head <- ctm(B_head, interacting = B_age) mlt_head <- mlt(ctm_head, data = db, scale = TRUE) ## ----mlt-head-plot, echo = FALSE, dev = "png"---------------------------- pr <- expand.grid(s <- mkgrid(ctm_head, 100)) pr$p <- c(predict(mlt_head, newdata = s, type = "distribution")) pr$lage <- pr$lage^3 pr$cut <- factor(pr$lage > 2.5) levels(pr$cut) <- c("Age < 2.5 yrs", "Age > 2.5 yrs") pfun <- function(x, y, z, subscripts, at, ...) { panel.contourplot(x, y, z, subscripts, at = c(0.4, 2, 10, 25, 50, 75, 90, 98, 99.6)/ 100, ...) panel.xyplot(x = db$age, y = db$head, pch = 20, col = rgb(.1, .1, .1, .1), ...) } print(contourplot(p ~ lage + head | cut, data = pr, panel = pfun, region = FALSE, xlab = "Age (years)", ylab = "Head circumference (cm)", scales = list(x = list(relation = "free")))) ## ----mlt-BHi-start, echo = FALSE----------------------------------------- start <- c(`Bs1(medvc):(Intercept)` = -11.58786739767557, `Bs2(medvc):(Intercept)` = -6.7547950426875296, `Bs3(medvc):(Intercept)` = -4.1366028640187587, `Bs4(medvc):(Intercept)` = 1.70242739486858, `Bs5(medvc):(Intercept)` = 1.7024282405116158, `Bs6(medvc):(Intercept)` = 3.2595263987587746, `Bs7(medvc):(Intercept)` = 3.9025772292254435, `Bs1(medvc):crim` = 3.0699495656298534, `Bs2(medvc):crim` = 3.5955167824166181, `Bs3(medvc):crim` = 3.5955168278664127, `Bs4(medvc):crim` = 3.5955168750613429, `Bs5(medvc):crim` = 3.5955168904234664, `Bs6(medvc):crim` = 3.5955169392855586, `Bs7(medvc):crim` = 3.5955169730725052, `Bs1(medvc):zn` = -2.1151709334952202, `Bs2(medvc):zn` = -1.330004690968422, `Bs3(medvc):zn` = -1.3300045948503083, `Bs4(medvc):zn` = -1.3300033108698186, `Bs5(medvc):zn` = -1.3300033078086677, `Bs6(medvc):zn` = -0.96191066610824716, `Bs7(medvc):zn` = -0.96191065861104608, `Bs1(medvc):indus` = -0.35960908556525345, `Bs2(medvc):indus` = -0.35960905985490077, `Bs3(medvc):indus` = -0.35960793525271667, `Bs4(medvc):indus` = -0.35934309840163225, `Bs5(medvc):indus` = -0.35934309357921079, `Bs6(medvc):indus` = -0.35934403265085169, `Bs7(medvc):indus` = -0.35934405322662893, `Bs1(medvc):chas1` = -0.49786094816438858, `Bs2(medvc):chas1` = -0.49770555550324658, `Bs3(medvc):chas1` = -0.49085193409644223, `Bs4(medvc):chas1` = -0.28057150654979646, `Bs5(medvc):chas1` = -0.28057150745910958, `Bs6(medvc):chas1` = -0.28057159418145561, `Bs7(medvc):chas1` = -0.28057159957855826, `Bs1(medvc):nox` = 2.5376938304919792, `Bs2(medvc):nox` = 2.5376933697075534, `Bs3(medvc):nox` = 2.5375485200496413, `Bs4(medvc):nox` = 2.5375484910378336, `Bs5(medvc):nox` = 2.5375484949797813, `Bs6(medvc):nox` = 2.5375484970845217, `Bs7(medvc):nox` = 2.5375484892126163, `Bs1(medvc):rm` = -0.14558898695685421, `Bs2(medvc):rm` = -2.2333101594194145, `Bs3(medvc):rm` = -2.233310165514423, `Bs4(medvc):rm` = -6.913467986991809, `Bs5(medvc):rm` = -6.9134680513321216, `Bs6(medvc):rm` = -6.9134681648477878, `Bs7(medvc):rm` = -6.9134681532727758, `Bs1(medvc):age` = 1.7037598964290337, `Bs2(medvc):age` = 1.7037612199992875, `Bs3(medvc):age` = 1.5898930022989017, `Bs4(medvc):age` = 0.43102069612812693, `Bs5(medvc):age` = 0.43102067780471154, `Bs6(medvc):age` = -0.037041330032449186, `Bs7(medvc):age` = -0.037041406627502181, `Bs1(medvc):dis` = 2.9873219530242476, `Bs2(medvc):dis` = 4.4952372044989133, `Bs3(medvc):dis` = 4.4952373035891808, `Bs4(medvc):dis` = 4.4952386569329841, `Bs5(medvc):dis` = 4.4952391676337369, `Bs6(medvc):dis` = 4.4952392418127287, `Bs7(medvc):dis` = 4.7809968102137574, `Bs1(medvc):rad` = -0.97566895623645966, `Bs2(medvc):rad` = -0.97566909946874247, `Bs3(medvc):rad` = -3.4798480962672937, `Bs4(medvc):rad` = -3.4798481822134661, `Bs5(medvc):rad` = -3.4798481789260292, `Bs6(medvc):rad` = -4.3174402380431083, `Bs7(medvc):rad` = -4.3178484277713345, `Bs1(medvc):tax` = 3.9727833395481338, `Bs2(medvc):tax` = 2.3660629545899958, `Bs3(medvc):tax` = 2.3660628938371251, `Bs4(medvc):tax` = 2.3660629241250111, `Bs5(medvc):tax` = 2.3660629251840981, `Bs6(medvc):tax` = 2.1146216707381735, `Bs7(medvc):tax` = 1.4719791743622892, `Bs1(medvc):ptratio` = 2.2039034046410761, `Bs2(medvc):ptratio` = 2.2039025612746528, `Bs3(medvc):ptratio` = 2.203902513614862, `Bs4(medvc):ptratio` = 2.2039024990773788, `Bs5(medvc):ptratio` = 2.2039017916126986, `Bs6(medvc):ptratio` = 2.2039017060852997, `Bs7(medvc):ptratio` = 2.2039016974876349, `Bs1(medvc):b` = -0.50419574760654773, `Bs2(medvc):b` = -1.6428248797945801, `Bs3(medvc):b` = -0.71378127312071882, `Bs4(medvc):b` = -0.71378122029094859, `Bs5(medvc):b` = -0.71378126944149745, `Bs6(medvc):b` = -0.71378288163529846, `Bs7(medvc):b` = -0.71378290708848713, `Bs1(medvc):lstat` = 5.3505499532175946, `Bs2(medvc):lstat` = 5.7360315640224862, `Bs3(medvc):lstat` = 5.7360316296539118, `Bs4(medvc):lstat` = 5.7360355991353975, `Bs5(medvc):lstat` = 6.6638242380198784, `Bs6(medvc):lstat` = 7.0023018769629921, `Bs7(medvc):lstat` = 7.0023018815336835 ) ## ----mlt-BostonHousing-dr-sumconstr, cache = TRUE------------------------ b_BH_s <- as.basis(fm_BH[-2L], data = BostonHousing2, scale = TRUE) ctm_BHi <- ctm(B_m, interacting = b_BH_s, sumconstr = TRUE) mlt_BHi <- mlt(ctm_BHi, data = BostonHousing2, dofit = FALSE) coef(mlt_BHi) <- start logLik(mlt_BHi) ## ----mlt-BostonHousing-dr, cache = TRUE---------------------------------- ctm_BHi2 <- ctm(B_m, interacting = b_BH_s, sumconstr = FALSE) mlt_BHi2 <- mlt(ctm_BHi2, data = BostonHousing2) logLik(mlt_BHi2) ## ----mlt-Boston-Housing-dr-plot, echo = FALSE, fig.height = 3, dev = "png"---- q <- mkgrid(var_m, 100)[[1]] tr <- predict(mlt_BH, newdata = BostonHousing2[, all.vars(fm_BH[-2L])], q = q, type = "density") tri <- predict(mlt_BHi, newdata = BostonHousing2[, all.vars(fm_BH[-2L])], q = q, type = "density") tri2 <- predict(mlt_BHi2, newdata = BostonHousing2[, all.vars(fm_BH[-2L])], q = q, type = "density") layout(matrix(1:3, ncol = 3)) Q <- matrix(q, nrow = length(q), ncol = ncol(tr)) ylim <- range(c(tr, tri)) matplot(Q, tr, ylim = ylim, xlab = "Median Value", ylab = "Density", type = "l", col = rgb(.1, .1, .1, .1), lty = 1, main = "mlt_BH") matplot(Q, tri, ylim = ylim, xlab = "Median Value", ylab = "Density", type = "l", col = rgb(.1, .1, .1, .1), lty = 1, main = "mlt_BHi") matplot(Q, tri2, ylim = ylim, xlab = "Median Value", ylab = "Density", type = "l", col = rgb(.1, .1, .1, .1), lty = 1, main = "mlt_BHi2") ## ----mlt-treepipit, echo = TRUE, cache = TRUE---------------------------- data("treepipit", package = "coin") treepipit$ocounts <- ordered(treepipit$counts) B_cs <- Bernstein_basis(var = numeric_var("coverstorey", support = 1:110), order = 4) B_c <- as.basis(treepipit$ocounts) ctm_treepipit <- ctm(B_c, interacting = B_cs) mlt_treepipit <- mlt(ctm_treepipit, data = treepipit, scale = TRUE, optim = mltoptim()["spg"]) ## ----mlt-mgcv, echo = FALSE, results = "hide"---------------------------- library("mgcv") ### masks nnet::multinom ## ----mlt-treepipit-gam--------------------------------------------------- gam_treepipit <- gam(counts ~ s(coverstorey), data = treepipit, family = "poisson") ## ----mlt-treepipit-plot, echo = FALSE, fig.height = 3-------------------- s <- mkgrid(ctm_treepipit, 100) s$ocounts <- s$ocounts[1:5] nd <- expand.grid(s) nd$p <- c(predict(mlt_treepipit, newdata = s, type = "distribution")) ### produce a table tpt <- xtabs(~ counts + coverstorey, data = treepipit) ### construct a data frame with frequencies treepipit2 <- sapply(as.data.frame(tpt, stringsAsFactors = FALSE), as.integer) s <- mkgrid(ctm_treepipit, 10) s$ocounts <- s$ocounts[1] K <- model.matrix(ctm_treepipit, data = expand.grid(s)) #g <- glht(parm(coef(mod), vcov(mod)), linfct = K) #confint(g) nd$lambda <- predict(gam_treepipit, newdata = nd, type = "response") layout(matrix(1:3, nr = 1)) par("mai" = par("mai") * c(1, .95, 1, .85)) xlim <- range(treepipit[, "coverstorey"]) * c(0.98, 1.05) xlab <- "Cover storey" ylab <- "Number of tree pipits (TP)" ### scatterplot again; plots are proportional to number of plots plot(counts ~ coverstorey, data = treepipit2, cex = sqrt(Freq), ylim = c(-.5, 5), xlab = xlab, ylab = ylab, col = "darkgrey", xlim = xlim, las = 1, main = "Observations") plot(c(0, 110), c(0, 1), type = "n", xlab = xlab, ylab = "Conditional probability", xlim = xlim, las = 1, main = "MLT") with(subset(nd, ocounts == "0"), lines(coverstorey, p, lty = 1)) with(subset(nd, ocounts == "1"), lines(coverstorey, p, lty = 2)) with(subset(nd, ocounts == "2"), lines(coverstorey, p, lty = 3)) with(subset(nd, ocounts == "3"), lines(coverstorey, p, lty = 4)) with(subset(nd, ocounts == "4"), lines(coverstorey, p, lty = 5)) abline(h = 1, lty = 6) legend("bottomright", lty = 1:6, legend = c(expression(TP == 0), expression(TP <= 1), expression(TP <= 2), expression(TP <= 3), expression(TP <= 4), expression(TP <= 5)), bty = "n") plot(c(0, 110), c(0, 1), type = "n", xlab = xlab, ylab = "Conditional probability", xlim = xlim, las = 1, main = "GAM") with(subset(nd, ocounts == "0"), lines(coverstorey, ppois(0, lambda), lty = 1)) with(subset(nd, ocounts == "1"), lines(coverstorey, ppois(1, lambda), lty = 2)) with(subset(nd, ocounts == "2"), lines(coverstorey, ppois(2, lambda), lty = 3)) with(subset(nd, ocounts == "3"), lines(coverstorey, ppois(3, lambda), lty = 4)) with(subset(nd, ocounts == "4"), lines(coverstorey, ppois(4, lambda), lty = 5)) abline(h = 1, lty = 6) ## ----mlt-CHFLS-2-cmpr-estfun--------------------------------------------- sc_polr <- estfun(polr_CHFLS_2) sc_mlt <- -estfun(mlt_CHFLS_2)[,c(4, 5, 1:3)] summary((sc_polr - sc_mlt) / pmax(sqrt(.Machine$double.eps), sc_mlt)) ## ----mlt-CHFLS-2-cmpr-2-------------------------------------------------- RC(polr = sqrt(diag(vcov(polr_CHFLS_2))), mlt = sqrt(diag(vcov(mlt_CHFLS_2)))[c(4, 5, 1:3)]) ## ----mlt-CHFLS-2-cmpr-3-------------------------------------------------- cftest(polr_CHFLS_2) cftest(mlt_CHFLS_2, parm = names(coef(polr_CHFLS_2))) ## ----mlt-GBSG2-1-coxph-cmpr---------------------------------------------- cf <- coef(coxph_GBSG2_1) RC(coxph = sqrt(diag(vcov(coxph_GBSG2_1))), mlt = sqrt(diag(vcov(mlt_GBSG2_1)))[names(cf)], fss = sqrt(diag(vcov(fss_GBSG2_1)))[names(cf)]) ## ----mlt-GBSG2-1-coxph-cmpr-cftest--------------------------------------- cftest(coxph_GBSG2_1) cftest(mlt_GBSG2_1, parm = names(cf)) cftest(fss_GBSG2_1, parm = names(cf)) ## ----mlt-geyser-w-band--------------------------------------------------- cb_w <- confband(mlt_w, newdata = data.frame(1), K = 20, cheat = 100) ## ----mlt-geyser-w-cbplot, echo = FALSE----------------------------------- layout(matrix(1:2, ncol = 2)) #i <- (cb_w[, "q"] > 45 & cb_w[, "q"] < 110) #cb_w[-i, "lwr"] <- NA #cb_w[-i, "upr"] <- NA plot(cb_w[, "q"], cb_w[, "Estimate"], xlab = "Waiting times", ylab = "Transformation", main = "", type = "l") q <- cb_w[, "q"] lwr <- cb_w[, "lwr"] upr <- cb_w[, "upr"] polygon(c(q, rev(q)), c(lwr, rev(upr)), border = NA, col = "lightblue") ### rgb(.1, .1, .1, .1)) lines(cb_w[, "q"], cb_w[, "Estimate"]) rug(geyser$waiting, col = rgb(.1, .1, .1, .1)) plot(cb_w[, "q"], pnorm(cb_w[, "Estimate"]), xlab = "Waiting times", ylab = "Distribution", main = "", cex = .5, type = "n") polygon(c(q, rev(q)), c(pnorm(lwr), rev(pnorm(upr))), border = NA, col = "lightblue") ### rgb(.1, .1, .1, .1)) lines(cb_w[, "q"], pnorm(cb_w[, "Estimate"])) # lines(cb_w[, "q"], pnorm(cb_w[, "lwr"])) # lines(cb_w[, "q"], pnorm(cb_w[, "upr"])) rug(geyser$waiting, col = rgb(.1, .1, .1, .1)) ## ----mlt-geyser-w-simulate, results = "hide", cache = TRUE--------------- new_w <- simulate(mlt_w, nsim = 100) llr <- numeric(length(new_w)) pdist <- vector(mode = "list", length = length(new_w)) pdens <- vector(mode = "list", length = length(new_w)) ngeyser <- geyser q <- mkgrid(var_w, 100)[[1]] for (i in 1:length(new_w)) { ngeyser$waiting <- new_w[[i]] mlt_i <- mlt(ctm_w, data = ngeyser, scale = TRUE, theta = coef(mlt_w)) llr[[i]] <- logLik(mlt_i) - logLik(mlt_i, parm = coef(mlt_w)) pdist[[i]] <- predict(mlt_i, newdata = data.frame(1), type = "distribution", q = q) pdens[[i]] <- predict(mlt_i, newdata = data.frame(1), type = "density", q = q) } ## ----mlt-geyser-w-simulate-plot, echo = FALSE---------------------------- i <- which(llr < quantile(llr, prob = .95)) tpdist <- pdist[i] tpdens <- pdens[i] layout(matrix(1:2, ncol = 2)) plot(q, tpdist[[1]], type = "n", xlab = "Waiting times", ylab = "Distribution") polygon(c(q, rev(q)), c(pnorm(lwr), rev(pnorm(upr))), border = NA, col = "lightblue") tmp <- sapply(tpdist, function(x) lines(q, x, col = rgb(.1, .1, .1, .1))) plot(q, tpdens[[1]], type = "n", ylim = range(unlist(pdens)), xlab = "Waiting times", ylab = "Density") tmp <- sapply(tpdens, function(x) lines(q, x, col = rgb(.1, .1, .1, .1))) ## ----mlt-variables-factor------------------------------------------------ f_eye <- factor_var("eye", desc = "eye color", levels = c("blue", "brown", "green", "grey", "mixed")) ## ----mlt-variables-factor-methods---------------------------------------- variable.names(f_eye) desc(f_eye) variables::unit(f_eye) support(f_eye) bounds(f_eye) is.bounded(f_eye) ## ----mlt-variables-factor-mkgrid----------------------------------------- mkgrid(f_eye) ## ----mlt-variables-ordered----------------------------------------------- o_temp <- ordered_var("temp", desc = "temperature", levels = c("cold", "lukewarm", "warm", "hot")) ## ----mlt-variables-ordered-methods--------------------------------------- variable.names(o_temp) desc(o_temp) variables::unit(o_temp) support(o_temp) bounds(o_temp) is.bounded(o_temp) mkgrid(o_temp) ## ----mlt-variables-fd---------------------------------------------------- v_age <- numeric_var("age", desc = "age of patient", unit = "years", support = 25:75) ## ----mlt-variables-fd-methods-------------------------------------------- variable.names(v_age) desc(v_age) variables::unit(v_age) support(v_age) bounds(v_age) is.bounded(v_age) ## ----mlt-variables-fd-mkgrid--------------------------------------------- mkgrid(v_age) ## ----mlt-variables-c----------------------------------------------------- v_temp <- numeric_var("ztemp", desc = "Zurich daytime temperature", unit = "Celsius", support = c(-10.0, 35.0), add = c(-5, 5), bounds = c(-273.15, Inf)) ## ----mlt-variables-c-methods--------------------------------------------- variable.names(v_temp) desc(v_temp) variables::unit(v_temp) support(v_temp) bounds(v_temp) is.bounded(v_temp) ## ----mlt-variables-c-mkgrid---------------------------------------------- mkgrid(v_temp, n = 20) ## ----mlt-variables-vars-------------------------------------------------- vars <- c(f_eye, o_temp, v_age, v_temp) ## ----mlt-variables-vars-methods------------------------------------------ variable.names(vars) desc(vars) variables::unit(vars) support(vars) bounds(vars) is.bounded(vars) mkgrid(vars, n = 20) ## ----mlt-variables-vars-expand------------------------------------------- nd <- expand.grid(mkgrid(vars)) ## ----mlt-variables-check------------------------------------------------- check(vars, data = nd) ## ----mlt-basefun-polynom------------------------------------------------- xvar <- numeric_var("x", support = c(0.1, pi), bounds= c(0, Inf)) x <- as.data.frame(mkgrid(xvar, n = 20)) class(pb <- polynomial_basis(xvar, coef = c(TRUE, TRUE, FALSE, TRUE))) ## ----mlt-basefun-polynom-fun--------------------------------------------- head(pb(x)) ## ----mlt-basefun-polynom-mm---------------------------------------------- head(model.matrix(pb, data = x)) ## ----mlt-basefun-polynom-pred-------------------------------------------- predict(pb, newdata = x, coef = c(1, 2, 0, 1.75)) ## ----mlt-basefun-polynom-pred-deriv-------------------------------------- predict(pb, newdata = x, coef = c(1, 2, 0, 1.75), deriv = c(x = 1L)) ## ----mlt-basefun-log----------------------------------------------------- class(lb <- log_basis(xvar, ui = "increasing")) head(X <- model.matrix(lb, data = x)) ## ----mlt-basefun-log-constr---------------------------------------------- attr(X, "constraint") ## ----mlt-basefun-log-pred------------------------------------------------ predict(lb, newdata = x, coef = c(1, 2)) predict(lb, newdata = x, coef = c(1, 2), deriv = c(x = 1L)) ## ----mlt-basefun-Bernstein----------------------------------------------- class(bb <- Bernstein_basis(xvar, order = 3, ui = "increasing")) head(X <- model.matrix(bb, data = x)) ## ----mlt-basefun-Bernstein-constr---------------------------------------- cf <- c(1, 2, 2.5, 2.6) (cnstr <- attr(X, "constraint")) all(cnstr$ui %*% cf > cnstr$ci) ## ----mlt-basefun-Bernstein-predict--------------------------------------- predict(bb, newdata = x, coef = cf) predict(bb, newdata = x, coef = cf, deriv = c(x = 1)) ## ----mlt-basefun-as.basis------------------------------------------------ iv <- as.vars(iris) fb <- as.basis(~ Species + Sepal.Length + Sepal.Width, data = iv, remove_intercept = TRUE, negative = TRUE, contrasts.args = list(Species = "contr.sum")) class(fb) head(model.matrix(fb, data = iris)) ## ----mlt-basefun-c------------------------------------------------------- class(blb <- c(bern = bb, log = log_basis(xvar, ui = "increasing", remove_intercept = TRUE))) head(X <- model.matrix(blb, data = x)) attr(X, "constraint") ## ----mlt-basefun-b------------------------------------------------------- fb <- as.basis(~ g, data = factor_var("g", levels = LETTERS[1:2])) class(bfb <- b(bern = bb, f = fb)) nd <- expand.grid(mkgrid(bfb, n = 10)) head(X <- model.matrix(bfb, data = nd)) attr(X, "constraint") ## ----mlt-basefun-b-sumconstr--------------------------------------------- bfb <- b(bern = bb, f = fb, sumconstr = TRUE) head(X <- model.matrix(bfb, data = nd)) attr(X, "constraint") ## ----mlt-R-ordered------------------------------------------------------- head(R(sort(unique(CHFLS$R_happy)))) ## ----mlt-R-integer------------------------------------------------------- R(1:5, bounds = c(1, 5)) ## ----mlt-R-numeric------------------------------------------------------- x <- rnorm(10) xt <- round(x[x > -1 & x <= 2], 3) xl <- xt - sample(c(Inf, (0:(length(xt) - 2)) / length(xt)), replace = FALSE) xr <- xt + sample(c(Inf, (0:(length(xt) - 2)) / length(xt)), replace = FALSE) R(c(1.2, rep(NA, length(xt))), cleft = c(NA, xl), cright = c(NA, xr), tleft = -1, tright = 2) ## ----mlt-R-Surv---------------------------------------------------------- head(geyser$duration) head(R(geyser$duration)) ## ----mlt-mlt-chi-p------------------------------------------------------- pY <- function(x) pchisq(x, df = 20) dY <- function(x) dchisq(x, df = 20) qY <- function(p) qchisq(p, df = 20) ## ----mlt-mlt-chi-B------------------------------------------------------- yvar <- numeric_var("y", support = qY(c(.001, 1 - .001)), bounds = c(0, Inf)) By <- Bernstein_basis(yvar, order = ord <- 15, ui = "increasing") ## ----mlt-mlt-chi-mlt----------------------------------------------------- mod <- ctm(By) ## ----mlt-mlt-chi-trafo--------------------------------------------------- h <- function(x) qnorm(pY(x)) x <- seq(from = support(yvar)[["y"]][1], to = support(yvar)[["y"]][2], length.out = ord + 1) ## ----mlt-mlt-chi-coef---------------------------------------------------- mlt::coef(mod) <- h(x) ## ----mlt-mlt-chi-sim----------------------------------------------------- d <- as.data.frame(mkgrid(yvar, n = 500)) d$grid <- d$y d$y <- simulate(mod, newdata = d) ## ----mlt-mlt-chi-fit----------------------------------------------------- fmod <- mlt(mod, data = d, scale = TRUE) ## ----mlt-mlt-chi-model--------------------------------------------------- coef(mod) coef(fmod) logLik(fmod) logLik(fmod, parm = coef(mod)) ## ----mlt-mlt-chi-plot---------------------------------------------------- ## compute true density d$dtrue <- dY(d$grid) d$dest <- predict(fmod, q = sort(d$grid), type = "density") plot(mod, newdata = d, type = "density", col = "black", xlab = "y", ylab = "Density", ylim = c(0, max(d$dest))) lines(d$grid, d$dtrue, lty = 2) lines(sort(d$grid), d$dest[order(d$grid)], lty = 3) legend("topright", lty = 1:3, bty = "n", legend = c("True", "Approximated", "Estimated")) ## ----mlt-mlt-coef, echo = FALSE, results = "hide"------------------------ ### print coefs for regression tests objs <- ls() mltobj <- objs[grep("^mlt_", objs)] sapply(mltobj, function(m) eval(parse(text = paste("coef(", m, ")")))) ## ----mlt-sessionInfo, echo = FALSE, results = "hide"--------------------- sessionInfo()