Exploration during development
Source:vignettes/exploration-during-development.Rmd
exploration-during-development.Rmd
Exploration of the QMLE method for ARMA models
qmle <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(0)
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
(log(2 * pi) + log(theta[p + q + 1])) * (nrow(data) - q) / 2 +
sum(variance_term^2) / (2 * theta[p + q + 1])
}
qmle_gradient <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(rep(1, length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
c(
phi_coefficient[nrow(data), ] * variance_term[nrow(data)] /
theta[p + q + 1],
psi_coefficient[nrow(data), ] * variance_term[nrow(data)] /
theta[p + q + 1],
1 / 2 / theta[p + q + 1] -
variance_term[nrow(data)]^2 / (2 * theta[p + q + 1]^2)
)
}
qmle_gradient_sum <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(rep(1, length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
c(
crossprod(phi_coefficient, variance_term) / theta[p + q + 1],
crossprod(psi_coefficient, variance_term) / theta[p + q + 1],
(nrow(data) - q) / 2 / theta[p + q + 1] -
crossprod(variance_term) / 2 / theta[p + q + 1]^2
)
}
qmle_hessian <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(diag(length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
phi_psi_coefficient <- array(0, c(q, p, nrow(data)))
psi_psi_coefficient <- array(0, c(q, q, nrow(data)))
for (i in (q + 1):nrow(data)) {
phi_psi_coefficient[, , i] <-
-phi_coefficient[(i - 1):(i - q), ] -
rowSums(
sweep(
phi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
psi_psi_coefficient[, , i] <-
-psi_coefficient[(i - 1):(i - q), ] -
t(psi_coefficient[(i - 1):(i - q), ]) -
rowSums(
sweep(
psi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
}
hessian <- matrix(0, nrow = p + q + 1, ncol = p + q + 1)
hessian[1:p, 1:p] <-
crossprod(phi_coefficient[nrow(data), , drop = FALSE]) /
theta[p + q + 1]
hessian[1:p, (p + 1):(p + q)] <- (
t(phi_psi_coefficient[, , nrow(data)]) * variance_term[nrow(data)] +
crossprod(
phi_coefficient[nrow(data), , drop = FALSE],
psi_coefficient[nrow(data), , drop = FALSE]
)
) / theta[p + q + 1]
hessian[(p + 1):(p + q), 1:p] <- t(hessian[1:p, (p + 1):(p + q)])
hessian[1:p, p + q + 1] <-
-t(phi_coefficient[nrow(data), ]) *
variance_term[nrow(data)] / theta[p + q + 1]^2
hessian[p + q + 1, 1:p] <- t(hessian[1:p, p + q + 1])
hessian[(p + 1):(p + q), (p + 1):(p + q)] <- (
crossprod(psi_coefficient[nrow(data), , drop = FALSE]) +
psi_psi_coefficient[, , nrow(data)] * variance_term[nrow(data)]
) / theta[p + q + 1]
hessian[(p + 1):(p + q), p + q + 1] <-
-t(psi_coefficient[nrow(data), ]) *
variance_term[nrow(data)] / theta[p + q + 1]^2
hessian[p + q + 1, (p + 1):(p + q)] <-
t(hessian[(p + 1):(p + q), p + q + 1])
hessian[p + q + 1, p + q + 1] <-
variance_term[nrow(data)]^2 / theta[p + q + 1]^3 -
1 / 2 / theta[p + q + 1]^2
hessian
}
qmle_hessian_sum <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(diag(length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
phi_psi_coefficient <- array(0, c(q, p, nrow(data)))
psi_psi_coefficient <- array(0, c(q, q, nrow(data)))
for (i in (q + 1):nrow(data)) {
phi_psi_coefficient[, , i] <-
-phi_coefficient[(i - 1):(i - q), ] -
rowSums(
sweep(
phi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
psi_psi_coefficient[, , i] <-
-psi_coefficient[(i - 1):(i - q), ] -
t(psi_coefficient[(i - 1):(i - q), ]) -
rowSums(
sweep(
psi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
}
hessian <- matrix(0, nrow = p + q + 1, ncol = p + q + 1)
hessian[1:p, 1:p] <-
crossprod(phi_coefficient) / theta[p + q + 1]
hessian[(p + 1):(p + q), 1:p] <- (
rowSums(
sweep(
phi_psi_coefficient,
3,
variance_term,
`*`
),
dims = 2
) +
crossprod(
psi_coefficient, phi_coefficient
)
) / theta[p + q + 1]
hessian[1:p, (p + 1):(p + q)] <- t(hessian[(p + 1):(p + q), 1:p])
hessian[1:p, p + q + 1] <-
-crossprod(phi_coefficient, variance_term) / theta[p + q + 1]^2
hessian[p + q + 1, 1:p] <- t(hessian[1:p, p + q + 1])
hessian[(p + 1):(p + q), (p + 1):(p + q)] <- (
crossprod(psi_coefficient) + rowSums(
sweep(
psi_psi_coefficient,
3,
variance_term,
`*`
),
dims = 2
)
) / theta[p + q + 1]
hessian[(p + 1):(p + q), p + q + 1] <-
-crossprod(psi_coefficient, variance_term) / theta[p + q + 1]^2
hessian[p + q + 1, (p + 1):(p + q)] <-
t(hessian[(p + 1):(p + q), p + q + 1])
hessian[p + q + 1, p + q + 1] <-
crossprod(variance_term) / theta[p + q + 1]^3 -
(nrow(data) - q) / 2 / theta[p + q + 1]^2
hessian
}
# fastcpd arma 1 1
set.seed(1)
n <- 600
w <- rnorm(n + 1, 0, 1)
x <- rep(0, n + 1)
for (i in 1:300) {
x[i + 1] <- 0.1 * x[i] + w[i + 1] + 0.1 * w[i]
}
for (i in 301:n) {
x[i + 1] <- 0.3 * x[i] + w[i + 1] + 0.4 * w[i]
}
result <- fastcpd(
formula = ~ . - 1,
data = data.frame(x = x[1 + seq_len(n)]),
trim = 0,
p = 1 + 1 + 1,
beta = (1 + 1 + 1 + 1) * log(n) / 2,
cost = qmle,
cost_gradient = qmle_gradient,
cost_hessian = qmle_hessian,
cp_only = TRUE,
lower = c(rep(-1, 1 + 1), 1e-10),
upper = c(rep(1, 1 + 1), Inf),
line_search = c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9)
)
# fastcpd arma 3 2
set.seed(1)
n <- 600
w <- rnorm(n + 2, 0, 1)
x <- rep(0, n + 3)
for (i in 1:300) {
x[i + 3] <- 0.1 * x[i + 2] - 0.2 * x[i + 1] + 0.6 * x[i] +
w[i + 2] + 0.1 * w[i + 1] + 0.5 * w[i]
}
for (i in 301:n) {
x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] +
w[i + 2] + 0.4 * w[i + 1] + 0.1 * w[i]
}
result <- fastcpd(
formula = ~ . - 1,
data = data.frame(x = x[3 + seq_len(n)]),
trim = 0,
p = 3 + 2 + 1,
beta = (3 + 2 + 1 + 1) * log(n) / 2,
cost = function(data, theta) {
qmle(data, theta, 3, 2)
},
cost_gradient = function(data, theta) {
qmle_gradient(data, theta, 3, 2)
},
cost_hessian = function(data, theta) {
qmle_hessian(data, theta, 3, 2)
},
cp_only = TRUE,
lower = c(rep(-1, 3 + 2), 1e-10),
upper = c(rep(1, 3 + 2), Inf),
line_search = c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9)
)
testthat::expect_equal(result@cp_set, c(4, 22, 290))
# hessian check
theta_estimate <- rep(0.1, 3 + 2 + 1)
testthat::expect_equal(
numDeriv::hessian(
qmle, theta_estimate, data = matrix(x[3 + seq_len(n)]), p = 3, q = 2
),
qmle_hessian_sum(matrix(x[3 + seq_len(n)]), theta_estimate, 3, 2)
)
optim(
rep(0.1, 3 + 2 + 1),
fn = function(data, theta) {
qmle(data, theta, 3, 2)
},
data = data,
method = "L-BFGS-B",
lower = c(rep(-1, 3 + 2), 1e-10),
upper = c(rep(1, 3 + 2), Inf),
gr = function(data, theta) {
qmle_gradient_sum(data, theta, 3, 2)
}
)
# convergence check
x <- arima.sim(list(ar = c(0.1, -0.2, 0.6), ma = c(0.1, 0.5)), n = n + 3)
theta_estimate <- rep(0.1, 3 + 2 + 1)
data <- matrix(x[3 + seq_len(n)])
qmle_path <- NULL
prev_qmle <- 1
curr_qmle <- Inf
epochs_num <- 0
while (abs(curr_qmle - prev_qmle) > 1e-5) {
prev_qmle <- curr_qmle
hessian <-
Matrix::nearPD(qmle_hessian_sum(data, theta_estimate, 3, 2))$mat
step <- solve(
hessian, qmle_gradient_sum(data, theta_estimate, 3, 2)
)
# line search
lr_choices <- c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9)
lr <- lr_choices[which.min(
sapply(lr_choices, function(lr) {
qmle(data, pmin(
pmax(theta_estimate - lr * step, c(rep(-1, 3 + 2), 1e-10)),
c(rep(1, 3 + 2), Inf)
), 3, 2)
})
)]
theta_estimate <- pmin(
pmax(theta_estimate - lr * step, c(rep(-1, 3 + 2), 1e-10)),
c(rep(1, 3 + 2), Inf)
)
curr_qmle <- qmle(data, theta_estimate, 3, 2)
cat(epochs_num, curr_qmle, theta_estimate, "\n")
qmle_path <- c(qmle_path, curr_qmle)
epochs_num <- epochs_num + 1
}
Appendix: all code snippets
knitr::opts_chunk$set(collapse = TRUE, comment = "#>", eval = FALSE)
library(fastcpd)
qmle <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(0)
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
(log(2 * pi) + log(theta[p + q + 1])) * (nrow(data) - q) / 2 +
sum(variance_term^2) / (2 * theta[p + q + 1])
}
qmle_gradient <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(rep(1, length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
c(
phi_coefficient[nrow(data), ] * variance_term[nrow(data)] /
theta[p + q + 1],
psi_coefficient[nrow(data), ] * variance_term[nrow(data)] /
theta[p + q + 1],
1 / 2 / theta[p + q + 1] -
variance_term[nrow(data)]^2 / (2 * theta[p + q + 1]^2)
)
}
qmle_gradient_sum <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(rep(1, length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
c(
crossprod(phi_coefficient, variance_term) / theta[p + q + 1],
crossprod(psi_coefficient, variance_term) / theta[p + q + 1],
(nrow(data) - q) / 2 / theta[p + q + 1] -
crossprod(variance_term) / 2 / theta[p + q + 1]^2
)
}
qmle_hessian <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(diag(length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
phi_psi_coefficient <- array(0, c(q, p, nrow(data)))
psi_psi_coefficient <- array(0, c(q, q, nrow(data)))
for (i in (q + 1):nrow(data)) {
phi_psi_coefficient[, , i] <-
-phi_coefficient[(i - 1):(i - q), ] -
rowSums(
sweep(
phi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
psi_psi_coefficient[, , i] <-
-psi_coefficient[(i - 1):(i - q), ] -
t(psi_coefficient[(i - 1):(i - q), ]) -
rowSums(
sweep(
psi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
}
hessian <- matrix(0, nrow = p + q + 1, ncol = p + q + 1)
hessian[1:p, 1:p] <-
crossprod(phi_coefficient[nrow(data), , drop = FALSE]) /
theta[p + q + 1]
hessian[1:p, (p + 1):(p + q)] <- (
t(phi_psi_coefficient[, , nrow(data)]) * variance_term[nrow(data)] +
crossprod(
phi_coefficient[nrow(data), , drop = FALSE],
psi_coefficient[nrow(data), , drop = FALSE]
)
) / theta[p + q + 1]
hessian[(p + 1):(p + q), 1:p] <- t(hessian[1:p, (p + 1):(p + q)])
hessian[1:p, p + q + 1] <-
-t(phi_coefficient[nrow(data), ]) *
variance_term[nrow(data)] / theta[p + q + 1]^2
hessian[p + q + 1, 1:p] <- t(hessian[1:p, p + q + 1])
hessian[(p + 1):(p + q), (p + 1):(p + q)] <- (
crossprod(psi_coefficient[nrow(data), , drop = FALSE]) +
psi_psi_coefficient[, , nrow(data)] * variance_term[nrow(data)]
) / theta[p + q + 1]
hessian[(p + 1):(p + q), p + q + 1] <-
-t(psi_coefficient[nrow(data), ]) *
variance_term[nrow(data)] / theta[p + q + 1]^2
hessian[p + q + 1, (p + 1):(p + q)] <-
t(hessian[(p + 1):(p + q), p + q + 1])
hessian[p + q + 1, p + q + 1] <-
variance_term[nrow(data)]^2 / theta[p + q + 1]^3 -
1 / 2 / theta[p + q + 1]^2
hessian
}
qmle_hessian_sum <- function(data, theta, p = 1, q = 1) {
if (nrow(data) < max(p, q) + 1) {
return(diag(length(theta)))
}
variance_term <- rep(0, nrow(data))
for (i in (max(p, q) + 1):nrow(data)) {
variance_term[i] <-
data[i] -
theta[1:p] %*% data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% variance_term[(i - 1):(i - q)]
}
phi_coefficient <- matrix(0, nrow(data), p)
psi_coefficient <- matrix(0, nrow(data), q)
for (i in (max(p, q) + 1):nrow(data)) {
phi_coefficient[i, ] <-
-data[(i - 1):(i - p)] -
theta[(p + 1):(p + q)] %*% phi_coefficient[(i - 1):(i - q), ]
}
for (i in (q + 1):nrow(data)) {
psi_coefficient[i, ] <-
-variance_term[(i - 1):(i - q)] -
theta[(p + 1):(p + q)] %*% psi_coefficient[(i - 1):(i - q), ]
}
phi_psi_coefficient <- array(0, c(q, p, nrow(data)))
psi_psi_coefficient <- array(0, c(q, q, nrow(data)))
for (i in (q + 1):nrow(data)) {
phi_psi_coefficient[, , i] <-
-phi_coefficient[(i - 1):(i - q), ] -
rowSums(
sweep(
phi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
psi_psi_coefficient[, , i] <-
-psi_coefficient[(i - 1):(i - q), ] -
t(psi_coefficient[(i - 1):(i - q), ]) -
rowSums(
sweep(
psi_psi_coefficient[, , (i - 1):(i - q), drop = FALSE],
3,
theta[(p + 1):(p + q)],
`*`
),
dims = 2
)
}
hessian <- matrix(0, nrow = p + q + 1, ncol = p + q + 1)
hessian[1:p, 1:p] <-
crossprod(phi_coefficient) / theta[p + q + 1]
hessian[(p + 1):(p + q), 1:p] <- (
rowSums(
sweep(
phi_psi_coefficient,
3,
variance_term,
`*`
),
dims = 2
) +
crossprod(
psi_coefficient, phi_coefficient
)
) / theta[p + q + 1]
hessian[1:p, (p + 1):(p + q)] <- t(hessian[(p + 1):(p + q), 1:p])
hessian[1:p, p + q + 1] <-
-crossprod(phi_coefficient, variance_term) / theta[p + q + 1]^2
hessian[p + q + 1, 1:p] <- t(hessian[1:p, p + q + 1])
hessian[(p + 1):(p + q), (p + 1):(p + q)] <- (
crossprod(psi_coefficient) + rowSums(
sweep(
psi_psi_coefficient,
3,
variance_term,
`*`
),
dims = 2
)
) / theta[p + q + 1]
hessian[(p + 1):(p + q), p + q + 1] <-
-crossprod(psi_coefficient, variance_term) / theta[p + q + 1]^2
hessian[p + q + 1, (p + 1):(p + q)] <-
t(hessian[(p + 1):(p + q), p + q + 1])
hessian[p + q + 1, p + q + 1] <-
crossprod(variance_term) / theta[p + q + 1]^3 -
(nrow(data) - q) / 2 / theta[p + q + 1]^2
hessian
}
# fastcpd arma 1 1
set.seed(1)
n <- 600
w <- rnorm(n + 1, 0, 1)
x <- rep(0, n + 1)
for (i in 1:300) {
x[i + 1] <- 0.1 * x[i] + w[i + 1] + 0.1 * w[i]
}
for (i in 301:n) {
x[i + 1] <- 0.3 * x[i] + w[i + 1] + 0.4 * w[i]
}
result <- fastcpd(
formula = ~ . - 1,
data = data.frame(x = x[1 + seq_len(n)]),
trim = 0,
p = 1 + 1 + 1,
beta = (1 + 1 + 1 + 1) * log(n) / 2,
cost = qmle,
cost_gradient = qmle_gradient,
cost_hessian = qmle_hessian,
cp_only = TRUE,
lower = c(rep(-1, 1 + 1), 1e-10),
upper = c(rep(1, 1 + 1), Inf),
line_search = c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9)
)
# fastcpd arma 3 2
set.seed(1)
n <- 600
w <- rnorm(n + 2, 0, 1)
x <- rep(0, n + 3)
for (i in 1:300) {
x[i + 3] <- 0.1 * x[i + 2] - 0.2 * x[i + 1] + 0.6 * x[i] +
w[i + 2] + 0.1 * w[i + 1] + 0.5 * w[i]
}
for (i in 301:n) {
x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] +
w[i + 2] + 0.4 * w[i + 1] + 0.1 * w[i]
}
result <- fastcpd(
formula = ~ . - 1,
data = data.frame(x = x[3 + seq_len(n)]),
trim = 0,
p = 3 + 2 + 1,
beta = (3 + 2 + 1 + 1) * log(n) / 2,
cost = function(data, theta) {
qmle(data, theta, 3, 2)
},
cost_gradient = function(data, theta) {
qmle_gradient(data, theta, 3, 2)
},
cost_hessian = function(data, theta) {
qmle_hessian(data, theta, 3, 2)
},
cp_only = TRUE,
lower = c(rep(-1, 3 + 2), 1e-10),
upper = c(rep(1, 3 + 2), Inf),
line_search = c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9)
)
testthat::expect_equal(result@cp_set, c(4, 22, 290))
# hessian check
theta_estimate <- rep(0.1, 3 + 2 + 1)
testthat::expect_equal(
numDeriv::hessian(
qmle, theta_estimate, data = matrix(x[3 + seq_len(n)]), p = 3, q = 2
),
qmle_hessian_sum(matrix(x[3 + seq_len(n)]), theta_estimate, 3, 2)
)
optim(
rep(0.1, 3 + 2 + 1),
fn = function(data, theta) {
qmle(data, theta, 3, 2)
},
data = data,
method = "L-BFGS-B",
lower = c(rep(-1, 3 + 2), 1e-10),
upper = c(rep(1, 3 + 2), Inf),
gr = function(data, theta) {
qmle_gradient_sum(data, theta, 3, 2)
}
)
# convergence check
x <- arima.sim(list(ar = c(0.1, -0.2, 0.6), ma = c(0.1, 0.5)), n = n + 3)
theta_estimate <- rep(0.1, 3 + 2 + 1)
data <- matrix(x[3 + seq_len(n)])
qmle_path <- NULL
prev_qmle <- 1
curr_qmle <- Inf
epochs_num <- 0
while (abs(curr_qmle - prev_qmle) > 1e-5) {
prev_qmle <- curr_qmle
hessian <-
Matrix::nearPD(qmle_hessian_sum(data, theta_estimate, 3, 2))$mat
step <- solve(
hessian, qmle_gradient_sum(data, theta_estimate, 3, 2)
)
# line search
lr_choices <- c(1, 0.1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7, 1e-8, 1e-9)
lr <- lr_choices[which.min(
sapply(lr_choices, function(lr) {
qmle(data, pmin(
pmax(theta_estimate - lr * step, c(rep(-1, 3 + 2), 1e-10)),
c(rep(1, 3 + 2), Inf)
), 3, 2)
})
)]
theta_estimate <- pmin(
pmax(theta_estimate - lr * step, c(rep(-1, 3 + 2), 1e-10)),
c(rep(1, 3 + 2), Inf)
)
curr_qmle <- qmle(data, theta_estimate, 3, 2)
cat(epochs_num, curr_qmle, theta_estimate, "\n")
qmle_path <- c(qmle_path, curr_qmle)
epochs_num <- epochs_num + 1
}