Comparison with other R packages

Data setup

Univariate mean change

# Univariate mean change
set.seed(1)
p <- 1
mean_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_1)
plot of chunk data-setup-univariate-mean-change
plot of chunk data-setup-univariate-mean-change

Univariate mean and/or variance change

# Univariate mean and/or variance change
set.seed(1)
p <- 1
mv_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_1)
plot of chunk data-setup-univariate-mean-and-or-variance-change
plot of chunk data-setup-univariate-mean-and-or-variance-change

Multivariate mean change

# Multivariate mean change
set.seed(1)
p <- 3
mean_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_3)
plot of chunk data-setup-multivariate-mean-change
plot of chunk data-setup-multivariate-mean-change

Multivariate mean and/or variance change

# Multivariate mean and/or variance change
set.seed(1)
p <- 4
mv_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_3)
plot of chunk data-setup-multivariate-mean-and-or-variance-change
plot of chunk data-setup-multivariate-mean-and-or-variance-change

Linear regression

# Linear regression
set.seed(1)
n <- 300
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(c(1, 3.2, -1, 0), c(-1, -0.5, 2.5, -2), c(0.8, 0, 1, 2))
y <- c(
  x[1:100, ] %*% theta_0[1, ] + rnorm(100, 0, 3),
  x[101:200, ] %*% theta_0[2, ] + rnorm(100, 0, 3),
  x[201:n, ] %*% theta_0[3, ] + rnorm(100, 0, 3)
)
lm_data <- data.frame(y = y, x = x)

plot.ts(lm_data)
plot of chunk data-setup-linear-regression
plot of chunk data-setup-linear-regression

Logistic regression

# Logistic regression
set.seed(1)
n <- 500
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
y <- c(
  rbinom(300, 1, 1 / (1 + exp(-x[1:300, ] %*% theta[1, ]))),
  rbinom(200, 1, 1 / (1 + exp(-x[301:n, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)

plot.ts(binomial_data)
plot of chunk data-setup-logistic-regression
plot of chunk data-setup-logistic-regression

Poisson regression

# Poisson regression
set.seed(1)
n <- 1100
p <- 3
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
delta <- rnorm(p)
theta_0 <- c(1, 0.3, -1)
y <- c(
  rpois(500, exp(x[1:500, ] %*% theta_0)),
  rpois(300, exp(x[501:800, ] %*% (theta_0 + delta))),
  rpois(200, exp(x[801:1000, ] %*% theta_0)),
  rpois(100, exp(x[1001:1100, ] %*% (theta_0 - delta)))
)
poisson_data <- data.frame(y = y, x = x)

plot.ts(log(poisson_data$y))
plot of chunk data-setup-poisson-regression
plot of chunk data-setup-poisson-regression
plot.ts(poisson_data[, -1])
plot of chunk data-setup-poisson-regression
plot of chunk data-setup-poisson-regression

Lasso

# Lasso
set.seed(1)
n <- 480
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(
  runif(p_true, -5, -2),
  runif(p_true, -3, 3),
  runif(p_true, 2, 5),
  runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
  x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
  x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
  x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
  x[321:n, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
lasso_data <- data.frame(y = y, x = x)

plot.ts(lasso_data[, seq_len(p_true + 1)])
plot of chunk data-setup-lasso
plot of chunk data-setup-lasso

AR(3)

# AR(3)
set.seed(1)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
  x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
  x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
ar_data <- x[-seq_len(3)]

plot.ts(ar_data)
plot of chunk data-setup-ar3
plot of chunk data-setup-ar3

GARCH(1, 1)

# GARCH(1, 1)
set.seed(1)
n <- 400
sigma_2 <- rep(1, n + 1)
x <- rep(0, n + 1)
for (i in seq_len(200)) {
  sigma_2[i + 1] <- 20 + 0.5 * x[i]^2 + 0.1 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
for (i in 201:400) {
  sigma_2[i + 1] <- 1 + 0.1 * x[i]^2 + 0.5 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
garch_data <- x[-1]

plot.ts(garch_data)
plot of chunk data-setup-garch11
plot of chunk data-setup-garch11

VAR(2)

# VAR(2)
set.seed(1)
n <- 800
p <- 2
theta_1 <- matrix(c(-0.3, 0.6, -0.5, 0.4, 0.2, 0.2, 0.2, -0.2), nrow = p)
theta_2 <- matrix(c(0.3, -0.4, 0.1, -0.5, -0.5, -0.2, -0.5, 0.2), nrow = p)
x <- matrix(0, n + 2, p)
for (i in 1:500) {
  x[i + 2, ] <- theta_1 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
for (i in 501:n) {
  x[i + 2, ] <- theta_2 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
var_data <- x[-seq_len(2), ]

plot.ts(var_data)
plot of chunk data-setup-var2
plot of chunk data-setup-var2

Univariate mean change

The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.

results[["mean_data_1"]][["fastcpd"]] <-
  fastcpd::fastcpd.mean(mean_data_1, r.progress = FALSE)@cp_set
results[["mean_data_1"]][["fastcpd"]]
#> [1] 300 700
results[["mean_data_1"]][["CptNonPar"]] <-
  CptNonPar::np.mojo(mean_data_1, G = floor(length(mean_data_1) / 6))$cpts
results[["mean_data_1"]][["CptNonPar"]]
#> [1] 300 700
results[["mean_data_1"]][["strucchange"]] <-
  strucchange::breakpoints(y ~ 1, data = data.frame(y = mean_data_1))$breakpoints
results[["mean_data_1"]][["strucchange"]]
#> [1] 300 700
results[["mean_data_1"]][["ecp"]] <- ecp::e.divisive(mean_data_1)$estimates
results[["mean_data_1"]][["ecp"]]
#> [1]    1  301  701 1001
results[["mean_data_1"]][["changepoint"]] <-
  changepoint::cpts(changepoint::cpt.mean(c(mean_data_1)/mad(mean_data_1), method = "PELT"))
results[["mean_data_1"]][["changepoint"]]
#> [1] 300 700
results[["mean_data_1"]][["breakfast"]] <-
  breakfast::breakfast(mean_data_1)$cptmodel.list[[6]]$cpts
results[["mean_data_1"]][["breakfast"]]
#> [1] 300 700
results[["mean_data_1"]][["wbs"]] <-
  wbs::wbs(mean_data_1)$cpt$cpt.ic$mbic.penalty
results[["mean_data_1"]][["wbs"]]
#> [1] 300 700
results[["mean_data_1"]][["mosum"]] <-
  mosum::mosum(c(mean_data_1), G = 40)$cpts.info$cpts
results[["mean_data_1"]][["mosum"]]
#> [1] 300 700
results[["mean_data_1"]][["fpop"]] <-
  fpop::Fpop(mean_data_1, nrow(mean_data_1))$t.est
results[["mean_data_1"]][["fpop"]]
#> [1]  300  700 1000
results[["mean_data_1"]][["gfpop"]] <-
  gfpop::gfpop(
    data = mean_data_1,
    mygraph = gfpop::graph(
      penalty = 2 * log(nrow(mean_data_1)) * gfpop::sdDiff(mean_data_1) ^ 2,
      type = "updown"
    ),
    type = "mean"
  )$changepoints
results[["mean_data_1"]][["gfpop"]]
#> [1]  300  700 1000
results[["mean_data_1"]][["jointseg"]] <-
  jointseg::jointSeg(mean_data_1, K = 2)$bestBkp
results[["mean_data_1"]][["jointseg"]]
#> [1] 300 700
results[["mean_data_1"]][["stepR"]] <-
  stepR::stepFit(mean_data_1, alpha = 0.5)$rightEnd
results[["mean_data_1"]][["stepR"]]
#> [1]  300  700 1000
results[["mean_data_1"]][["cpm"]] <-
  cpm::processStream(mean_data_1, cpmType = "Student")$changePoints
results[["mean_data_1"]][["cpm"]]
#> [1] 299 699
results[["mean_data_1"]][["segmented"]] <-
  segmented::stepmented(
    as.numeric(mean_data_1), npsi = 2
  )$psi[, "Est."]
results[["mean_data_1"]][["segmented"]]
#> psi1.index psi2.index 
#>   300.0813   700.1513
results[["mean_data_1"]][["mcp"]] <- mcp::mcp(
  list(y ~ 1, ~ 1, ~ 1),
  data = data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1))),
  par_x = "x"
)
#> Compiling model graph
#>    Resolving undeclared variables
#>    Allocating nodes
#> Graph information:
#>    Observed stochastic nodes: 1000
#>    Unobserved stochastic nodes: 6
#>    Total graph size: 17028
#> 
#> Initializing model
#> Finished sampling in 19.3 seconds
if (requireNamespace("mcp", quietly = TRUE)) {
  plot(results[["mean_data_1"]][["mcp"]])
}