## ----setup, include=FALSE-----------------------------------------------------

knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)
old_opts <- options(digits = 2)

## ----install-likelihood-model, eval = FALSE-----------------------------------
# if (!require(devtools)) {
#     install.packages("devtools")
# }
# devtools::install_github("queelius/likelihood.model")

## ----load-packages, message=FALSE, warning=FALSE, echo = FALSE----------------
library(dplyr)
library(likelihood.model)
library(generics)

## ----data-gen-exp-series------------------------------------------------------
n <- 150
rates <- c(1.1, 1.2, 1.3)
set.seed(1235)

df <- data.frame(
    t1 = rexp(n, rates[1]),
    t2 = rexp(n, rates[2]),
    t3 = rexp(n, rates[3])
)
df$t <- apply(df, 1, min)

# map each observation to the corresponding index (component) that was minimum
df$k <- apply(df, 1, which.min)

for (i in 1:n) {
    for (p in 1:3) {
        if (p == df$k[i]) next
        df[i, p] <- NA
    }
}
head(df)

## ----observed-likelihood, cache=TRUE------------------------------------------
score.observed <- function(df, rates, ...) {
  rep(-sum(df$t), length(rates)) + (df |> dplyr::count(k))$n / rates
}

loglik.observed <- function(df, rates, ...) {
  sum(log(rates[df$k])) - sum(rates) * sum(df$t)
}

model.observed <- likelihood_contr_model$new(
  obs_type = function(df) { "observed" }
)

## ----mle-optim-observed, cache=TRUE-------------------------------------------
optim(rates, fn = loglik(model.observed), df=df, control=list(fnscale=-1))

## ----mle-observed-likelihood-model, cache=TRUE--------------------------------
rates.hat <- fit(model.observed)(df, par = rates)
summary(rates.hat)

## ----hess-loglike-------------------------------------------------------------
hess_loglik.observed <- function(df, rates, ...) {
  p <- length(rates)
  H <- matrix(0, p, p)
  counts <- df |> dplyr::count(k)
  for (j in 1:p) {
    H[j, j] <- -counts$n[j] / rates[j]^2
  }
  H
}
# Observed information from MLE (negative Hessian)
cat("Observed information matrix (from MLE):\n")
print(-rates.hat$hessian)

# Computed directly via hess_loglik
cat("\nHessian at MLE:\n")
print(hess_loglik(model.observed)(df, coef(rates.hat)))

## ----bootstrap, cache=TRUE----------------------------------------------------
model.samp <- sampler(model.observed, df = df, par = rates)
boot_result <- model.samp(n = 500)

cat("Bootstrap MLE:\n")
print(coef(boot_result))

cat("\nBootstrap standard errors:\n")
print(se(boot_result))

cat("\nBootstrap bias:\n")
print(bias(boot_result))

cat("\nBootstrap covariance:\n")
print(vcov(boot_result))

## -----------------------------------------------------------------------------
# if df$t > .5, then the system was right-censored
df.censored <- df |> mutate(
    censored = t > .5,
    t = ifelse(censored, .5, t)
)

df.censored[df.censored[, "censored"], "k"] <- NA
df.censored[df.censored[, "censored"], paste0("t", 1:3)] <- rep(NA, 3)
head(df.censored)

## -----------------------------------------------------------------------------
loglik.censored <- function(df, rates) {
    -sum(rates) * sum(df$t)
}
score.censored <- function(df, rates) {
    rep(-sum(df$t), length(rates))
}
hess_loglik_censored <- function(df, rates) {
    p <- length(rates)
    matrix(0, p, p)
}

model.censored <- likelihood_contr_model$new(
    obs_type = function(df) {
        ifelse(df$censored, "censored", "observed")
    }
)

mle.censored <- fit(model.censored)(df.censored, par = rates)
summary(mle.censored)

## ----case2-inference----------------------------------------------------------
cat("Uncensored estimates:\n")
cat("  Estimates:", coef(rates.hat), "\n")
cat("  Std errors:", se(rates.hat), "\n")

cat("\nCensored estimates:\n")
cat("  Estimates:", coef(mle.censored), "\n")
cat("  Std errors:", se(mle.censored), "\n")

cat("\n95% Confidence Intervals (uncensored):\n")
print(confint(rates.hat))

cat("\n95% Confidence Intervals (censored):\n")
print(confint(mle.censored))

cat("\nTrue parameters:", rates, "\n")

## ----case2-fisherian----------------------------------------------------------
# Support at MLE (should be 0)
s_at_mle <- support(mle.censored, coef(mle.censored), df.censored,
                    model.censored)
cat("Support at MLE:", s_at_mle, "\n")

# Evidence for MLE vs true parameters
ev <- evidence(model.censored, df.censored, coef(mle.censored), rates)
cat("Evidence (MLE vs true):", ev, "\n")

# Likelihood interval for rate 1 (first component)
li <- likelihood_interval(mle.censored, df.censored, model.censored,
                          k = 8, param = 1)
cat("\n1/8 likelihood interval for lambda_1:\n")
print(li)

## ----single-param-------------------------------------------------------------
# Define contribution types for a single-rate exponential
loglik.exp_exact <- function(df, par, ...) {
  lambda <- par[1]
  sum(log(lambda) - lambda * df$t)
}

loglik.exp_right <- function(df, par, ...) {
  lambda <- par[1]
  -lambda * sum(df$t)
}

score.exp_exact <- function(df, par, ...) {
  lambda <- par[1]
  nrow(df) / lambda - sum(df$t)
}

score.exp_right <- function(df, par, ...) {
  lambda <- par[1]
  -sum(df$t)
}

# Simulate right-censored exponential data
set.seed(42)
lambda_true <- 2.0
n_single <- 200
censor_time_single <- 0.4

raw_lifetimes <- rexp(n_single, lambda_true)
df_single <- data.frame(
  t = pmin(raw_lifetimes, censor_time_single),
  type = ifelse(raw_lifetimes > censor_time_single, "exp_right", "exp_exact")
)

cat("Exact observations:", sum(df_single$type == "exp_exact"), "\n")
cat("Right-censored:", sum(df_single$type == "exp_right"), "\n")

model_single <- likelihood_contr_model$new(
  obs_type = function(df) df$type,
  assumptions = c("exponential lifetimes", "non-informative right censoring")
)

# Fit and verify
mle_single <- fit(model_single)(df_single, par = c(lambda = 1))
cat("\nMLE:", coef(mle_single), "(true:", lambda_true, ")\n")

# Closed-form MLE for comparison: d / T
d <- sum(df_single$type == "exp_exact")
total_T <- sum(df_single$t)
cat("Closed-form MLE:", d / total_T, "\n")
cat("95% CI:", confint(mle_single)[1, ], "\n")

## ----analytical-example, eval=FALSE-------------------------------------------
# # Slow (default): numerical differentiation of loglik.my_type
# loglik.my_type <- function(df, par, ...) { ... }
# 
# # Fast: provide analytical derivatives
# score.my_type <- function(df, par, ...) { ... }
# hess_loglik.my_type <- function(df, par, ...) { ... }

## ----cache-tip, eval=FALSE----------------------------------------------------
# # Good: cached S3 wrapper
# ll <- loglik(model)
# result <- optim(par, fn = ll, df = df, control = list(fnscale = -1))
# 
# # Also good: using fit() which does this internally
# result <- fit(model)(df, par = par)
# 
# # Slower: calling R6 method directly in a loop (no caching)
# # model$loglik(df, par) -- re-splits df every call

## ----cleanup, include=FALSE---------------------------------------------------
options(old_opts)