## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE, comment = "#>", eval = FALSE, cache = FALSE,
  warning = FALSE, fig.width = 8, fig.height = 5
)
library(fastcpd)

## ----logistic-regression-setup------------------------------------------------
#  #' Cost function for Logistic regression, i.e. binomial family in GLM.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_binomial <- function(data, family = "binomial") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(
#      as.matrix(data[, 1:p]), data[, p + 1],
#      family = family
#    )
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for logistic regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_binomial <- function(data, beta, cost = cost_glm_binomial) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ]))
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#    }
#    cp <- cp_set[[n + 1]]
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_logistic_update <- function(
#      data_new, coef, cum_coef, cmatrix, epsilon = 1e-10) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- 1 / (1 + exp(-eta))
#    cmatrix <- cmatrix + (X_new %o% X_new) * as.numeric((1 - mu) * mu)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_binomial <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + log(1 + exp(u))
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_binomial <- function(data, beta, B = 10, trim = 0.025) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(
#        as.matrix(data[index == i, 1:p]),
#        data[index == i, p + 1],
#        family = "binomial"
#      )
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta / (1 + e_eta)^2
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_logistic_update(data[t, ], coef_c, cum_coef_c, cmatrix_c)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) {
#          cval[i] <-
#            neg_log_lik_binomial(data[k:t, ], cum_coef[, i] / (t - k + 1))
#        }
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t / (1 + e_eta_t)^2
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (seq_len(length(cp)))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      cp <- cp[-ind3]
#    }
#  
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }

## ----poisson-regression-setup-------------------------------------------------
#  #' Cost function for Poisson regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_poisson <- function(data, family = "poisson") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(as.matrix(data[, 1:p]), data[, p + 1], family = family)
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for poisson regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_poisson <- function(data, beta, cost = cost_glm_poisson) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ])) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      # if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_poisson_update <- function(data_new, coef, cum_coef, cmatrix, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- exp(eta)
#    cmatrix <- cmatrix + (X_new %o% X_new) * min(as.numeric(mu), G)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    coef <- pmin(pmax(coef, L), H)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_poisson <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + exp(u) + lfactorial(Y)
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_poisson <- function(data, beta, B = 10, trim = 0.03, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(x = as.matrix(data[index == i, 1:p]), y = data[index == i, p + 1], family = "poisson")
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- pmin(pmax(matrix(coef.int[1, ], p, 1), L), H)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_poisson_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, epsilon = epsilon, G = G, L = L, H = H)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cum_coef_win <- pmin(pmax(cum_coef[, i] / (t - k + 1), L), H)
#        if (t - k >= p - 1) cval[i] <- neg_log_lik_poisson(data[k:t, ], cum_coef_win) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- pmin(pmax(coef.int[index[t], ], L), H)
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #   seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #   data_seg <- data[seg,]
#    #   out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="Poisson")
#    #   nLL <- out$deviance/2 + nLL
#    # }
#  
#    # output <- list(cp, nLL)
#    # names(output) <- c("cp", "nLL")
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  # Generate data from poisson regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_poisson <- function(n, d, true.coef, true.cp.loc, Sigma) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      mu <- exp(x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE])
#      group <- rpois(length(mu), mu)
#      y <- c(y, group)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }

## ----penalized-linear-regression-setup----------------------------------------
#  #' Cost function for penalized linear regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param lambda Penalty coefficient.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_lasso <- function(data, lambda, family = "gaussian") {
#    data <- as.matrix(data)
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    out <- glmnet::glmnet(as.matrix(data[, 1:p]), data[, p + 1], family = family, lambda = lambda)
#    return(deviance(out) / 2)
#  }
#  
#  #' Implementation of vanilla PELT for penalized linear regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_lasso <- function(data, beta, B = 10, cost = cost_lasso, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    index <- rep(1:B, rep(n / B, B))
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef)
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= 1) cval[i] <- suppressWarnings(cost(data[k:t, ], lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- glmnet(as.matrix(data_seg[, 1:p]), data_seg[, p+1], lambda=lambda, family=family)
#    #  nLL <- deviance(out)/2 + nLL
#    # }
#    # output <- list(cp, nLL)
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #' @param a Coefficient to be updated.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Updated coefficient.
#  soft_threshold <- function(a, lambda) {
#    sign(a) * pmax(abs(a) - lambda, 0)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_lasso_update <- function(data_new, coef, cum_coef, cmatrix, lambda) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    mu <- X_new %*% coef
#    cmatrix <- cmatrix + X_new %o% X_new
#    # B <- as.vector(cmatrix_inv%*%X_new)
#    # cmatrix_inv <- cmatrix_inv - B%o%B/(1+sum(X_new*B))
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix, lik_dev)
#    nc <- norm(cmatrix, type = "F") # the choice of norm affects the speed. Spectral norm is more accurate but slower than F norm.
#    coef <- soft_threshold(coef, lambda / nc)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_lasso <- function(data, b, lambda) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    resi <- Y - X %*% b
#    L <- sum(resi^2) / 2 + lambda * sum(abs(b))
#    return(L)
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_lasso <- function(data, beta, B = 10, trim = 0.025, epsilon = 1e-5, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef.int[i, ] <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef.int[i, ])
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef.int[i, ]) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    eta <- coef %*% X1
#    # c_int <- diag(1/epsilon,p) - X1%o%X1/epsilon^2/(1+sum(X1^2)/epsilon)
#    # cmatrix_inv <- array(c_int, c(p,p,1))
#    cmatrix <- array(X1 %o% X1 + epsilon * diag(1, p), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        # cmatrix_inv_c <- cmatrix_inv[,,i]
#        cmatrix_c <- cmatrix[, , i]
#        k <- set[i] + 1
#        out <- cost_lasso_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        # cmatrix_inv[,,i] <- out[[3]]
#        cmatrix[, , i] <- out[[3]]
#        if (t - k >= 2) cval[i] <- neg_log_lik_lasso(data[k:t, ], cum_coef[, i] / (t - k + 1), lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1))) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      # cmatrix_inv_add <- diag(1/epsilon,p) - Xt%o%Xt/epsilon^2/(1+sum(Xt^2)/epsilon)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      # cmatrix_inv <- abind::abind(cmatrix_inv, cmatrix_inv_add, along=3)
#      cmatrix <- abind::abind(cmatrix, Xt %o% Xt + epsilon * diag(1, p), along = 3)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      # cmatrix_inv <- cmatrix_inv[,,ind2,drop=FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries and merge change-points
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="binomial")
#    #  nLL <- out$deviance/2 + nLL
#    # }
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  # Generate data from penalized linear regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @param evar Error variance.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_lasso <- function(n, d, true.coef, true.cp.loc, Sigma, evar) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      Xb <- x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE]
#      add <- Xb + rnorm(length(Xb), sd = sqrt(evar))
#      y <- c(y, add)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }

## ----logistic-regression------------------------------------------------------
#  set.seed(1)
#  p <- 5
#  x <- matrix(rnorm(300 * p, 0, 1), ncol = p)
#  
#  # Randomly generate coefficients with different means.
#  theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
#  
#  # Randomly generate response variables based on the segmented data and
#  # corresponding coefficients
#  y <- c(
#    rbinom(125, 1, 1 / (1 + exp(-x[1:125, ] %*% theta[1, ]))),
#    rbinom(300 - 125, 1, 1 / (1 + exp(-x[(125 + 1):300, ] %*% theta[2, ])))
#  )
#  
#  segd_binomial(cbind(x, y), (p + 1) * log(300) / 2, B = 5)$cp
#  #> [1] 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125
#  
#  pelt_vanilla_binomial(cbind(x, y), (p + 1) * log(300) / 2)$cp
#  #> [1]   0 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    vanilla_percentage = 1,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125

## ----poisson-regression-------------------------------------------------------
#  set.seed(1)
#  n <- 1500
#  d <- 5
#  rho <- 0.9
#  Sigma <- array(0, c(d, d))
#  for (i in 1:d) {
#    Sigma[i, ] <- rho^(abs(i - (1:d)))
#  }
#  delta <- c(5, 7, 9, 11, 13)
#  a.sq <- 1
#  delta.new <-
#    delta * sqrt(a.sq) / sqrt(as.numeric(t(delta) %*% Sigma %*% delta))
#  true.cp.loc <- c(375, 750, 1125)
#  
#  # regression coefficients
#  true.coef <- matrix(0, nrow = d, ncol = length(true.cp.loc) + 1)
#  true.coef[, 1] <- c(1, 1.2, -1, 0.5, -2)
#  true.coef[, 2] <- true.coef[, 1] + delta.new
#  true.coef[, 3] <- true.coef[, 1]
#  true.coef[, 4] <- true.coef[, 3] - delta.new
#  
#  out <- data_gen_poisson(n, d, true.coef, true.cp.loc, Sigma)
#  data <- out[[1]]
#  g_tr <- out[[2]]
#  beta <- log(n) * (d + 1) / 2
#  
#  segd_poisson(
#    data, beta, trim = 0.03, B = 10, epsilon = 0.001, G = 10^10, L = -20, H = 20
#  )$cp
#  #> [1]  380  751 1136 1251
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    beta = beta,
#    cost_adjustment = "BIC",
#    epsilon = 0.001,
#    segment_count = 10,
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  380  751 1136 1251
#  
#  pelt_vanilla_poisson(data, beta)$cp
#  #> [1]    0  374  752 1133
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    segment_count = 10,
#    vanilla_percentage = 1,
#    beta = beta,
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  374  752 1133

## ----penalized-linear-regression----------------------------------------------
#  set.seed(1)
#  n <- 1000
#  s <- 3
#  d <- 50
#  evar <- 0.5
#  Sigma <- diag(1, d)
#  true.cp.loc <- c(100, 300, 500, 800, 900)
#  seg <- length(true.cp.loc) + 1
#  true.coef <- matrix(rnorm(seg * s), s, seg)
#  true.coef <- rbind(true.coef, matrix(0, d - s, seg))
#  out <- data_gen_lasso(n, d, true.coef, true.cp.loc, Sigma, evar)
#  data <- out[[1]]
#  beta <- log(n) / 2 # beta here has different meaning
#  
#  segd_lasso(data, beta, B = 10, trim = 0.025)$cp
#  #> [1] 100 300 520 800 901
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 100 300 520 800 901
#  
#  pelt_vanilla_lasso(data, beta, cost = cost_lasso)$cp
#  #> [1] 100
#  #> [1] 200
#  #> [1] 300
#  #> [1] 400
#  #> [1] 500
#  #> [1] 600
#  #> [1] 700
#  #> [1] 800
#  #> [1] 900
#  #> [1] 1000
#  #> [1]   0 103 299 510 800 900
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    vanilla_percentage = 1,
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 103 299 510 800 900

## ----ref.label = knitr::all_labels(), echo = TRUE-----------------------------
#  knitr::opts_chunk$set(
#    collapse = TRUE, comment = "#>", eval = FALSE, cache = FALSE,
#    warning = FALSE, fig.width = 8, fig.height = 5
#  )
#  library(fastcpd)
#  #' Cost function for Logistic regression, i.e. binomial family in GLM.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_binomial <- function(data, family = "binomial") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(
#      as.matrix(data[, 1:p]), data[, p + 1],
#      family = family
#    )
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for logistic regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_binomial <- function(data, beta, cost = cost_glm_binomial) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ]))
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#    }
#    cp <- cp_set[[n + 1]]
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_logistic_update <- function(
#      data_new, coef, cum_coef, cmatrix, epsilon = 1e-10) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- 1 / (1 + exp(-eta))
#    cmatrix <- cmatrix + (X_new %o% X_new) * as.numeric((1 - mu) * mu)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_binomial <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + log(1 + exp(u))
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_binomial <- function(data, beta, B = 10, trim = 0.025) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(
#        as.matrix(data[index == i, 1:p]),
#        data[index == i, p + 1],
#        family = "binomial"
#      )
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta / (1 + e_eta)^2
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_logistic_update(data[t, ], coef_c, cum_coef_c, cmatrix_c)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) {
#          cval[i] <-
#            neg_log_lik_binomial(data[k:t, ], cum_coef[, i] / (t - k + 1))
#        }
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t / (1 + e_eta_t)^2
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (seq_len(length(cp)))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      cp <- cp[-ind3]
#    }
#  
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }
#  #' Cost function for Poisson regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_poisson <- function(data, family = "poisson") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(as.matrix(data[, 1:p]), data[, p + 1], family = family)
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for poisson regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_poisson <- function(data, beta, cost = cost_glm_poisson) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ])) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      # if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_poisson_update <- function(data_new, coef, cum_coef, cmatrix, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- exp(eta)
#    cmatrix <- cmatrix + (X_new %o% X_new) * min(as.numeric(mu), G)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    coef <- pmin(pmax(coef, L), H)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_poisson <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + exp(u) + lfactorial(Y)
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_poisson <- function(data, beta, B = 10, trim = 0.03, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(x = as.matrix(data[index == i, 1:p]), y = data[index == i, p + 1], family = "poisson")
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- pmin(pmax(matrix(coef.int[1, ], p, 1), L), H)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_poisson_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, epsilon = epsilon, G = G, L = L, H = H)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cum_coef_win <- pmin(pmax(cum_coef[, i] / (t - k + 1), L), H)
#        if (t - k >= p - 1) cval[i] <- neg_log_lik_poisson(data[k:t, ], cum_coef_win) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- pmin(pmax(coef.int[index[t], ], L), H)
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #   seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #   data_seg <- data[seg,]
#    #   out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="Poisson")
#    #   nLL <- out$deviance/2 + nLL
#    # }
#  
#    # output <- list(cp, nLL)
#    # names(output) <- c("cp", "nLL")
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  # Generate data from poisson regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_poisson <- function(n, d, true.coef, true.cp.loc, Sigma) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      mu <- exp(x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE])
#      group <- rpois(length(mu), mu)
#      y <- c(y, group)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }
#  #' Cost function for penalized linear regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param lambda Penalty coefficient.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_lasso <- function(data, lambda, family = "gaussian") {
#    data <- as.matrix(data)
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    out <- glmnet::glmnet(as.matrix(data[, 1:p]), data[, p + 1], family = family, lambda = lambda)
#    return(deviance(out) / 2)
#  }
#  
#  #' Implementation of vanilla PELT for penalized linear regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_lasso <- function(data, beta, B = 10, cost = cost_lasso, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    index <- rep(1:B, rep(n / B, B))
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef)
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= 1) cval[i] <- suppressWarnings(cost(data[k:t, ], lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- glmnet(as.matrix(data_seg[, 1:p]), data_seg[, p+1], lambda=lambda, family=family)
#    #  nLL <- deviance(out)/2 + nLL
#    # }
#    # output <- list(cp, nLL)
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #' @param a Coefficient to be updated.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Updated coefficient.
#  soft_threshold <- function(a, lambda) {
#    sign(a) * pmax(abs(a) - lambda, 0)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_lasso_update <- function(data_new, coef, cum_coef, cmatrix, lambda) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    mu <- X_new %*% coef
#    cmatrix <- cmatrix + X_new %o% X_new
#    # B <- as.vector(cmatrix_inv%*%X_new)
#    # cmatrix_inv <- cmatrix_inv - B%o%B/(1+sum(X_new*B))
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix, lik_dev)
#    nc <- norm(cmatrix, type = "F") # the choice of norm affects the speed. Spectral norm is more accurate but slower than F norm.
#    coef <- soft_threshold(coef, lambda / nc)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_lasso <- function(data, b, lambda) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    resi <- Y - X %*% b
#    L <- sum(resi^2) / 2 + lambda * sum(abs(b))
#    return(L)
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_lasso <- function(data, beta, B = 10, trim = 0.025, epsilon = 1e-5, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef.int[i, ] <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef.int[i, ])
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef.int[i, ]) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    eta <- coef %*% X1
#    # c_int <- diag(1/epsilon,p) - X1%o%X1/epsilon^2/(1+sum(X1^2)/epsilon)
#    # cmatrix_inv <- array(c_int, c(p,p,1))
#    cmatrix <- array(X1 %o% X1 + epsilon * diag(1, p), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        # cmatrix_inv_c <- cmatrix_inv[,,i]
#        cmatrix_c <- cmatrix[, , i]
#        k <- set[i] + 1
#        out <- cost_lasso_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        # cmatrix_inv[,,i] <- out[[3]]
#        cmatrix[, , i] <- out[[3]]
#        if (t - k >= 2) cval[i] <- neg_log_lik_lasso(data[k:t, ], cum_coef[, i] / (t - k + 1), lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1))) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      # cmatrix_inv_add <- diag(1/epsilon,p) - Xt%o%Xt/epsilon^2/(1+sum(Xt^2)/epsilon)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      # cmatrix_inv <- abind::abind(cmatrix_inv, cmatrix_inv_add, along=3)
#      cmatrix <- abind::abind(cmatrix, Xt %o% Xt + epsilon * diag(1, p), along = 3)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      # cmatrix_inv <- cmatrix_inv[,,ind2,drop=FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries and merge change-points
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="binomial")
#    #  nLL <- out$deviance/2 + nLL
#    # }
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  # Generate data from penalized linear regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @param evar Error variance.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_lasso <- function(n, d, true.coef, true.cp.loc, Sigma, evar) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      Xb <- x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE]
#      add <- Xb + rnorm(length(Xb), sd = sqrt(evar))
#      y <- c(y, add)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }
#  set.seed(1)
#  p <- 5
#  x <- matrix(rnorm(300 * p, 0, 1), ncol = p)
#  
#  # Randomly generate coefficients with different means.
#  theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
#  
#  # Randomly generate response variables based on the segmented data and
#  # corresponding coefficients
#  y <- c(
#    rbinom(125, 1, 1 / (1 + exp(-x[1:125, ] %*% theta[1, ]))),
#    rbinom(300 - 125, 1, 1 / (1 + exp(-x[(125 + 1):300, ] %*% theta[2, ])))
#  )
#  
#  segd_binomial(cbind(x, y), (p + 1) * log(300) / 2, B = 5)$cp
#  #> [1] 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125
#  
#  pelt_vanilla_binomial(cbind(x, y), (p + 1) * log(300) / 2)$cp
#  #> [1]   0 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    vanilla_percentage = 1,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125
#  set.seed(1)
#  n <- 1500
#  d <- 5
#  rho <- 0.9
#  Sigma <- array(0, c(d, d))
#  for (i in 1:d) {
#    Sigma[i, ] <- rho^(abs(i - (1:d)))
#  }
#  delta <- c(5, 7, 9, 11, 13)
#  a.sq <- 1
#  delta.new <-
#    delta * sqrt(a.sq) / sqrt(as.numeric(t(delta) %*% Sigma %*% delta))
#  true.cp.loc <- c(375, 750, 1125)
#  
#  # regression coefficients
#  true.coef <- matrix(0, nrow = d, ncol = length(true.cp.loc) + 1)
#  true.coef[, 1] <- c(1, 1.2, -1, 0.5, -2)
#  true.coef[, 2] <- true.coef[, 1] + delta.new
#  true.coef[, 3] <- true.coef[, 1]
#  true.coef[, 4] <- true.coef[, 3] - delta.new
#  
#  out <- data_gen_poisson(n, d, true.coef, true.cp.loc, Sigma)
#  data <- out[[1]]
#  g_tr <- out[[2]]
#  beta <- log(n) * (d + 1) / 2
#  
#  segd_poisson(
#    data, beta, trim = 0.03, B = 10, epsilon = 0.001, G = 10^10, L = -20, H = 20
#  )$cp
#  #> [1]  380  751 1136 1251
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    beta = beta,
#    cost_adjustment = "BIC",
#    epsilon = 0.001,
#    segment_count = 10,
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  380  751 1136 1251
#  
#  pelt_vanilla_poisson(data, beta)$cp
#  #> [1]    0  374  752 1133
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    segment_count = 10,
#    vanilla_percentage = 1,
#    beta = beta,
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  374  752 1133
#  set.seed(1)
#  n <- 1000
#  s <- 3
#  d <- 50
#  evar <- 0.5
#  Sigma <- diag(1, d)
#  true.cp.loc <- c(100, 300, 500, 800, 900)
#  seg <- length(true.cp.loc) + 1
#  true.coef <- matrix(rnorm(seg * s), s, seg)
#  true.coef <- rbind(true.coef, matrix(0, d - s, seg))
#  out <- data_gen_lasso(n, d, true.coef, true.cp.loc, Sigma, evar)
#  data <- out[[1]]
#  beta <- log(n) / 2 # beta here has different meaning
#  
#  segd_lasso(data, beta, B = 10, trim = 0.025)$cp
#  #> [1] 100 300 520 800 901
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 100 300 520 800 901
#  
#  pelt_vanilla_lasso(data, beta, cost = cost_lasso)$cp
#  #> [1] 100
#  #> [1] 200
#  #> [1] 300
#  #> [1] 400
#  #> [1] 500
#  #> [1] 600
#  #> [1] 700
#  #> [1] 800
#  #> [1] 900
#  #> [1] 1000
#  #> [1]   0 103 299 510 800 900
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    vanilla_percentage = 1,
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 103 299 510 800 900