Skip to contents
for (package in c("ggplot2", "mvtnorm")) {
  if (!requireNamespace(package, quietly = TRUE)) utils::install.packages(
    package, repos = "https://cloud.r-project.org", quiet = TRUE
  )
}
set.seed(1)
n <- 1500
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(480, rep(0, p), diag(p))
theta_0 <- rbind(
  runif(p_true, -5, -2),
  runif(p_true, -3, 3),
  runif(p_true, 2, 5),
  runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
  x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
  x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
  x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
  x[321:480, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
result <- fastcpd(
  formula = y ~ . - 1,
  data = data.frame(y = y, x = x),
  family = "lasso",
  r.progress = FALSE
)
summary(result)
#> 
#> Call:
#> fastcpd(formula = y ~ . - 1, data = data.frame(y = y, x = x), 
#>     family = "lasso", r.progress = FALSE)
#> 
#> Change points:
#> 79 200 325 
#> 
#> Cost values:
#> 304.9901 332.8808 468.1191 470.8976 
#> 
#> Parameters:
#> 50 x 4 sparse Matrix of class "dgCMatrix"
#>       segment 1  segment 2 segment 3 segment 4
#>  [1,] -1.794342 -1.7863922  3.828320 -2.384733
#>  [2,] -1.669612 -1.5520762  3.289372  2.148898
#>  [3,] -3.315550  0.6151654  4.081897  2.832707
#>  [4,] -4.056087  .          1.797223 -2.517172
#>  [5,] -3.952455  .          3.862429  2.286681
#>  [6,] -3.183277 -1.7592907  2.379079  3.277910
#>  [7,]  .         .          .         .       
#>  [8,]  .         .          .         .       
#>  [9,]  .         .          .         .       
#> [10,]  .         .          .         .       
#> [11,]  .         .          .         .       
#> [12,]  .         .          .         .       
#> [13,]  .         .          .         .       
#> [14,]  .         .          .         .       
#> [15,]  .         .          .         .       
#> [16,]  .         .          .         .       
#> [17,]  .         .          .         .       
#> [18,]  .         .          .         .       
#> [19,]  .         .          .         .       
#> [20,]  .         .          .         .       
#> [21,]  .         .          .         .       
#> [22,]  .         .          .         .       
#> [23,]  .         .          .         .       
#> [24,]  .         .          .         .       
#> [25,]  .         .          .         .       
#> [26,]  .         .          .         .       
#> [27,]  .         .          .         .       
#> [28,]  .         .          .         .       
#> [29,]  .         .          .         .       
#> [30,]  .         .          .         .       
#> [31,]  .         .          .         .       
#> [32,]  .         .          .         .       
#> [33,]  .         .          .         .       
#> [34,]  .         .          .         .       
#> [35,]  .         .          .         .       
#> [36,]  .         .          .         .       
#> [37,]  .         .          .         .       
#> [38,]  .         .          .         .       
#> [39,]  .         .          .         .       
#> [40,]  .         .          .         .       
#> [41,]  .         .          .         .       
#> [42,]  .         .          .         .       
#> [43,]  .         .          .         .       
#> [44,]  .         .          .         .       
#> [45,]  .         .          .         .       
#> [46,]  .         .          .         .       
#> [47,]  .         .          .         .       
#> [48,]  .         .          .         .       
#> [49,]  .         .          .         .       
#> [50,]  .         .          .         .

Multiple epochs

result_multiple_epochs <- fastcpd(
  formula = y ~ . - 1,
  data = data.frame(y = y, x = x),
  family = "lasso",
  multiple_epochs = function(segment_length) if (segment_length < 20) 1 else 0,
  r.progress = FALSE
)
summary(result_multiple_epochs)
#> 
#> Call:
#> fastcpd(formula = y ~ . - 1, data = data.frame(y = y, x = x), 
#>     multiple_epochs = function(segment_length) if (segment_length < 
#>         20) 1 else 0, family = "lasso", r.progress = FALSE)
#> 
#> Change points:
#> 79 200 325 
#> 
#> Cost values:
#> 297.184 325.943 456.7906 456.9829 
#> 
#> Parameters:
#> 50 x 4 sparse Matrix of class "dgCMatrix"
#>       segment 1  segment 2 segment 3 segment 4
#>  [1,] -1.815974 -1.8077660  3.847431 -2.399833
#>  [2,] -1.692328 -1.5757570  3.310427  2.170296
#>  [3,] -3.340743  0.6288282  4.100075  2.853726
#>  [4,] -4.075669  .          1.821134 -2.534484
#>  [5,] -3.972095  .          3.885295  2.308333
#>  [6,] -3.203743 -1.7778324  2.404614  3.302757
#>  [7,]  .         .          .         .       
#>  [8,]  .         .          .         .       
#>  [9,]  .         .          .         .       
#> [10,]  .         .          .         .       
#> [11,]  .         .          .         .       
#> [12,]  .         .          .         .       
#> [13,]  .         .          .         .       
#> [14,]  .         .          .         .       
#> [15,]  .         .          .         .       
#> [16,]  .         .          .         .       
#> [17,]  .         .          .         .       
#> [18,]  .         .          .         .       
#> [19,]  .         .          .         .       
#> [20,]  .         .          .         .       
#> [21,]  .         .          .         .       
#> [22,]  .         .          .         .       
#> [23,]  .         .          .         .       
#> [24,]  .         .          .         .       
#> [25,]  .         .          .         .       
#> [26,]  .         .          .         .       
#> [27,]  .         .          .         .       
#> [28,]  .         .          .         .       
#> [29,]  .         .          .         .       
#> [30,]  .         .          .         .       
#> [31,]  .         .          .         .       
#> [32,]  .         .          .         .       
#> [33,]  .         .          .         .       
#> [34,]  .         .          .         .       
#> [35,]  .         .          .         .       
#> [36,]  .         .          .         .       
#> [37,]  .         .          .         .       
#> [38,]  .         .          .         .       
#> [39,]  .         .          .         .       
#> [40,]  .         .          .         .       
#> [41,]  .         .          .         .       
#> [42,]  .         .          .         .       
#> [43,]  .         .          .         .       
#> [44,]  .         .          .         .       
#> [45,]  .         .          .         .       
#> [46,]  .         .          .         .       
#> [47,]  .         .          .         .       
#> [48,]  .         .          .         .       
#> [49,]  .         .          .         .       
#> [50,]  .         .          .         .

Vanilla percentage

result_vanilla_percentage <- fastcpd(
  formula = y ~ . - 1,
  data = data.frame(y = y, x = x),
  family = "lasso",
  vanilla_percentage = 0.2,
  r.progress = FALSE
)
summary(result_vanilla_percentage)
#> 
#> Call:
#> fastcpd(formula = y ~ . - 1, data = data.frame(y = y, x = x), 
#>     family = "lasso", vanilla_percentage = 0.2, r.progress = FALSE)
#> 
#> Change points:
#> 79 200 325 
#> 
#> Cost values:
#> 304.9901 332.8808 468.1191 470.8976 
#> 
#> Parameters:
#> 50 x 4 sparse Matrix of class "dgCMatrix"
#>       segment 1  segment 2 segment 3 segment 4
#>  [1,] -1.794342 -1.7863922  3.828320 -2.384733
#>  [2,] -1.669612 -1.5520762  3.289372  2.148898
#>  [3,] -3.315550  0.6151654  4.081897  2.832707
#>  [4,] -4.056087  .          1.797223 -2.517172
#>  [5,] -3.952455  .          3.862429  2.286681
#>  [6,] -3.183277 -1.7592907  2.379079  3.277910
#>  [7,]  .         .          .         .       
#>  [8,]  .         .          .         .       
#>  [9,]  .         .          .         .       
#> [10,]  .         .          .         .       
#> [11,]  .         .          .         .       
#> [12,]  .         .          .         .       
#> [13,]  .         .          .         .       
#> [14,]  .         .          .         .       
#> [15,]  .         .          .         .       
#> [16,]  .         .          .         .       
#> [17,]  .         .          .         .       
#> [18,]  .         .          .         .       
#> [19,]  .         .          .         .       
#> [20,]  .         .          .         .       
#> [21,]  .         .          .         .       
#> [22,]  .         .          .         .       
#> [23,]  .         .          .         .       
#> [24,]  .         .          .         .       
#> [25,]  .         .          .         .       
#> [26,]  .         .          .         .       
#> [27,]  .         .          .         .       
#> [28,]  .         .          .         .       
#> [29,]  .         .          .         .       
#> [30,]  .         .          .         .       
#> [31,]  .         .          .         .       
#> [32,]  .         .          .         .       
#> [33,]  .         .          .         .       
#> [34,]  .         .          .         .       
#> [35,]  .         .          .         .       
#> [36,]  .         .          .         .       
#> [37,]  .         .          .         .       
#> [38,]  .         .          .         .       
#> [39,]  .         .          .         .       
#> [40,]  .         .          .         .       
#> [41,]  .         .          .         .       
#> [42,]  .         .          .         .       
#> [43,]  .         .          .         .       
#> [44,]  .         .          .         .       
#> [45,]  .         .          .         .       
#> [46,]  .         .          .         .       
#> [47,]  .         .          .         .       
#> [48,]  .         .          .         .       
#> [49,]  .         .          .         .       
#> [50,]  .         .          .         .

linear regression with multi-dimensional responses

set.seed(1)
n <- 300
p <- 3
y_count <- 2
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- array(NA, dim = c(3, y_count, 3))
theta_0[, , 1] <- cbind(c(1, 1.2, -1), c(-1, 0, 0.5))
theta_0[, , 2] <- cbind(c(-1, 0, 0.5), c(0.5, -0.3, 0.2))
theta_0[, , 3] <- cbind(c(0.5, -0.3, 0.2), c(1, 1.2, -1))
y <- rbind(
  x[1:100, ] %*% theta_0[, , 1],
  x[101:200, ] %*% theta_0[, , 2],
  x[201:n, ] %*% theta_0[, , 3]
) + matrix(rnorm(n * y_count), ncol = y_count)
multi_response_linear_loss <- function(data) {
  x <- data[, (ncol(data) - p + 1):ncol(data)]
  y <- data[, 1:(ncol(data) - p)]

  if (nrow(data) <= p) {
    x_t_x <- diag(p)
  } else {
    x_t_x <- crossprod(x)
  }

  norm(y - x %*% solve(x_t_x, t(x)) %*% y, type = "F")^2 / 2
}
result <- fastcpd(
  formula = y ~ x - 1,
  data = data.frame(y = y, x = x),
  beta = (2 * p + 1) * log(n) / 2,
  cost = multi_response_linear_loss,
  cp_only = TRUE,
  r.progress = FALSE
)

testthat::expect_equal(result@cp_set, c(102, 195))